From 7a9c48f6b856fc6f2743178eb6d3e3bfbbe275a0 Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Sat, 18 Apr 2020 01:02:45 +0530
Subject: [PATCH 01/90] table_view pack: first commit.

---
 cpp/CMakeLists.txt           |  1 +
 cpp/include/cudf/copying.hpp | 19 ++++++++++++
 cpp/src/copying/pack.cpp     | 58 ++++++++++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+)
 create mode 100644 cpp/src/copying/pack.cpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ba1e83d742b..c66d9d94c5a 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -566,6 +566,7 @@ add_library(cudf
             src/copying/slice.cpp
             src/copying/split.cpp
             src/copying/contiguous_split.cu
+            src/copying/pack.cpp
             src/copying/legacy/copy.cpp
             src/copying/legacy/gather.cu
             src/copying/legacy/scatter.cu
diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index ac6c9f24546..27feed42122 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -444,6 +444,25 @@ std::vector<contiguous_split_result> contiguous_split(cudf::table_view const& in
                                                       std::vector<size_type> const& splits,
                                                       rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
+struct packed_table {
+    struct serialized_column {
+        cudf::data_type _type;
+        cudf::size_type _size;
+        size_t _data_offset;
+        size_t _null_mask_offset;
+        cudf::size_type _num_children;
+    };
+
+    std::vector<serialized_column> table_metadata;
+    std::unique_ptr<rmm::device_buffer> table_data;
+};
+
+packed_table pack(cudf::table_view const& input,
+                  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
+contiguous_split_result unpack(packed_table input,
+                               rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
 /**
  * @brief   Returns a new column, where each element is selected from either @p lhs or 
  *          @p rhs based on the value of the corresponding element in @p boolean_mask
diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
new file mode 100644
index 00000000000..6126efa0dd4
--- /dev/null
+++ b/cpp/src/copying/pack.cpp
@@ -0,0 +1,58 @@
+#include <cudf/copying.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <jit/type.h>
+
+namespace cudf {
+namespace experimental {
+namespace detail {
+
+packed_table::serialized_column serialize_column(column_view const& col,
+                                                 rmm::device_buffer const& table_data)
+{
+  auto all_data_buffer_ptr = static_cast<uint8_t const*>(table_data.data());
+  size_t data_offset = col.data<uint8_t>() - all_data_buffer_ptr;
+  size_t null_mask_offset = reinterpret_cast<uint8_t const*>(col.null_mask()) - all_data_buffer_ptr;
+  return packed_table::serialized_column{col.type(), col.size(), data_offset, null_mask_offset, col.num_children()};
+}
+
+void add_columns(std::vector<column_view> const& cols,
+                 rmm::device_buffer const& table_data,
+                 std::vector<packed_table::serialized_column> * table_metadata)
+{
+  for (auto &&col : cols) {
+    table_metadata->emplace_back(serialize_column(col, table_data));
+    std::vector<column_view> children;
+    for (size_t i = 0; i < col.num_children(); i++) {
+      children.push_back(col.child(i));
+    }
+    
+    add_columns(children, table_data, table_metadata);
+  }
+}
+
+packed_table pack(cudf::table_view const& input,
+                  cudaStream_t stream,
+                  rmm::mr::device_memory_resource* mr)
+{
+  contiguous_split_result contiguous_data = std::move(contiguous_split(input, {0})[0]);
+
+  packed_table result{{}, std::move(contiguous_data.all_data)};
+  
+  std::vector<column_view> table_columns(input.begin(), input.end());
+
+  add_columns(table_columns, *contiguous_data.all_data, &result.table_metadata);
+
+  return result;
+}
+
+} // namespace detail
+
+packed_table pack(cudf::table_view const& input,
+                  rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE()
+  return detail::pack(input, 0, mr);
+}
+
+} // namespace experimental  
+} // namespace cudf

From 3a884e78a8790d4cf762b37754389037dc1fc8da Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Sat, 18 Apr 2020 03:56:01 +0530
Subject: [PATCH 02/90] Add cudf::unpack

---
 cpp/include/cudf/copying.hpp |  2 +-
 cpp/src/copying/pack.cpp     | 77 +++++++++++++++++++++++++++++++++++-
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index 27feed42122..ad42bbf96db 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -460,7 +460,7 @@ struct packed_table {
 packed_table pack(cudf::table_view const& input,
                   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
-contiguous_split_result unpack(packed_table input,
+contiguous_split_result unpack(packed_table & input,
                                rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
 /**
diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index 6126efa0dd4..001ebafe9e7 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -6,11 +6,14 @@ namespace cudf {
 namespace experimental {
 namespace detail {
 
+namespace {
+
 packed_table::serialized_column serialize_column(column_view const& col,
                                                  rmm::device_buffer const& table_data)
 {
   auto all_data_buffer_ptr = static_cast<uint8_t const*>(table_data.data());
   size_t data_offset = col.data<uint8_t>() - all_data_buffer_ptr;
+  // TODO: fix for non-nullable
   size_t null_mask_offset = reinterpret_cast<uint8_t const*>(col.null_mask()) - all_data_buffer_ptr;
   return packed_table::serialized_column{col.type(), col.size(), data_offset, null_mask_offset, col.num_children()};
 }
@@ -30,6 +33,8 @@ void add_columns(std::vector<column_view> const& cols,
   }
 }
 
+} // namespace anonymous
+
 packed_table pack(cudf::table_view const& input,
                   cudaStream_t stream,
                   rmm::mr::device_memory_resource* mr)
@@ -45,14 +50,84 @@ packed_table pack(cudf::table_view const& input,
   return result;
 }
 
+namespace {
+
+column_view deserialize_column(packed_table::serialized_column serial_column,
+                               std::vector<column_view> const& children,
+                               rmm::device_buffer const& table_data)
+{
+  auto all_data_buffer_ptr = static_cast<uint8_t const*>(table_data.data());
+
+  auto data_ptr = all_data_buffer_ptr + serial_column._data_offset;
+
+  auto null_mask_ptr = reinterpret_cast<bitmask_type const*>(
+    all_data_buffer_ptr + serial_column._null_mask_offset);
+
+  return column_view(
+    serial_column._type,
+    serial_column._size,
+    data_ptr,
+    null_mask_ptr,
+    UNKNOWN_NULL_COUNT,
+    0,
+    children);
+}
+
+std::vector<column_view> get_columns(cudf::size_type num_columns,
+                                     std::vector<packed_table::serialized_column> const& serialized_columns,
+                                     rmm::device_buffer const& table_data,
+                                     size_t * current_index)
+{
+  std::vector<column_view> cols;
+  for (size_t i = 0; i < num_columns; i++)
+  {
+    auto serial_column = serialized_columns[*current_index];
+    (*current_index)++;
+
+    std::vector<column_view> children = get_columns(
+      serial_column._num_children,
+      serialized_columns,
+      table_data,
+      current_index);
+
+    cols.emplace_back(deserialize_column(serial_column, children, table_data));
+  }
+  
+  return cols;
+}
+
+} // namespace anonymous
+
+contiguous_split_result unpack(packed_table & input,
+                               cudaStream_t stream,
+                               rmm::mr::device_memory_resource* mr)
+{
+  cudf::size_type num_columns = input.table_metadata[0]._num_children;
+  size_t current_index = 1;
+
+  std::vector<column_view> table_columns = get_columns(num_columns,
+                                                       input.table_metadata,
+                                                       *input.table_data,
+                                                       &current_index);
+
+  return contiguous_split_result{table_view(table_columns), std::move(input.table_data)};
+}
+
 } // namespace detail
 
 packed_table pack(cudf::table_view const& input,
                   rmm::mr::device_memory_resource* mr)
 {
-  CUDF_FUNC_RANGE()
+  CUDF_FUNC_RANGE();
   return detail::pack(input, 0, mr);
 }
 
+contiguous_split_result unpack(packed_table & input,
+                               rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::unpack(input, 0, mr);
+}
+
 } // namespace experimental  
 } // namespace cudf

From 6e3f23a1180e08ae8f7ddd6bba3fe8a973d842d7 Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Sat, 18 Apr 2020 05:53:10 +0530
Subject: [PATCH 03/90] Pack-unpack tests fixed width nullable columns And fix
 for pack: was using moved-from data. Forgot to add first metadata element
 which stands in for the table

---
 cpp/src/copying/pack.cpp        |  8 +++--
 cpp/tests/CMakeLists.txt        |  3 +-
 cpp/tests/copying/pack_tests.cu | 57 +++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 4 deletions(-)
 create mode 100644 cpp/tests/copying/pack_tests.cu

diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index 001ebafe9e7..7257b37e2f2 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -39,13 +39,15 @@ packed_table pack(cudf::table_view const& input,
                   cudaStream_t stream,
                   rmm::mr::device_memory_resource* mr)
 {
-  contiguous_split_result contiguous_data = std::move(contiguous_split(input, {0})[0]);
+  contiguous_split_result contiguous_data = std::move(contiguous_split(input, {})[0]);
 
-  packed_table result{{}, std::move(contiguous_data.all_data)};
+  packed_table::serialized_column table_element = {{}, 0, 0, 0, contiguous_data.table.num_columns()};
+
+  packed_table result{{table_element}, std::move(contiguous_data.all_data)};
   
   std::vector<column_view> table_columns(input.begin(), input.end());
 
-  add_columns(table_columns, *contiguous_data.all_data, &result.table_metadata);
+  add_columns(table_columns, *result.table_data, &result.table_metadata);
 
   return result;
 }
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 54fd162fd26..0979eae9dfa 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -577,7 +577,8 @@ set(COPYING_TEST_SRC
     "${CMAKE_CURRENT_SOURCE_DIR}/copying/split_tests.cu"
     "${CMAKE_CURRENT_SOURCE_DIR}/copying/copy_tests.cu"
     "${CMAKE_CURRENT_SOURCE_DIR}/copying/shift_tests.cu"
-    "${CMAKE_CURRENT_SOURCE_DIR}/copying/concatenate_tests.cu")
+    "${CMAKE_CURRENT_SOURCE_DIR}/copying/concatenate_tests.cu"
+    "${CMAKE_CURRENT_SOURCE_DIR}/copying/pack_tests.cu")
 
 ConfigureTest(COPYING_TEST "${COPYING_TEST_SRC}")
 
diff --git a/cpp/tests/copying/pack_tests.cu b/cpp/tests/copying/pack_tests.cu
new file mode 100644
index 00000000000..03689ad54af
--- /dev/null
+++ b/cpp/tests/copying/pack_tests.cu
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/copying.hpp>
+#include <tests/utilities/base_fixture.hpp>
+#include <tests/utilities/column_wrapper.hpp>
+#include <tests/utilities/table_utilities.hpp>
+
+namespace cudf {
+namespace test {
+
+
+struct PackUnpackTest : public BaseFixture {};
+
+TEST_F(PackUnpackTest, SingleColumnFixedWidth)
+{
+  fixed_width_column_wrapper<int64_t> col1 ({ 1, 2, 3, 4, 5, 6, 7},
+                                            { 1, 1, 1, 0, 1, 0, 1});
+  table_view t({col1});
+
+  experimental::packed_table packed = experimental::pack(t);
+  experimental::contiguous_split_result unpacked = experimental::unpack(packed);
+
+  expect_tables_equal(t, unpacked.table);
+}
+
+TEST_F(PackUnpackTest, MultiColumnFixedWidth)
+{
+  fixed_width_column_wrapper<int16_t> col1 ({ 1, 2, 3, 4, 5, 6, 7},
+                                            { 1, 1, 1, 0, 1, 0, 1});
+  fixed_width_column_wrapper<float>   col2 ({ 7, 8, 6, 5, 4, 3, 2},
+                                            { 1, 0, 1, 1, 1, 1, 1});
+  fixed_width_column_wrapper<double>  col3 ({ 8, 4, 2, 0, 7, 1, 9},
+                                            { 0, 1, 1, 1, 1, 1, 1});
+  table_view t({col1, col2, col3});
+
+  experimental::packed_table packed = experimental::pack(t);
+  experimental::contiguous_split_result unpacked = experimental::unpack(packed);
+
+  expect_tables_equal(t, unpacked.table);
+}
+
+} // namespace test
+} // namespace cudf

From d43f28779d732c503ed30efe379e653744568474 Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Tue, 21 Apr 2020 04:03:33 +0530
Subject: [PATCH 04/90] Fix bug of pack still using old table view after
 contiguous split

---
 cpp/src/copying/pack.cpp        | 2 +-
 cpp/tests/copying/pack_tests.cu | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index 7257b37e2f2..10f754730d5 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -45,7 +45,7 @@ packed_table pack(cudf::table_view const& input,
 
   packed_table result{{table_element}, std::move(contiguous_data.all_data)};
   
-  std::vector<column_view> table_columns(input.begin(), input.end());
+  std::vector<column_view> table_columns(contiguous_data.table.begin(), contiguous_data.table.end());
 
   add_columns(table_columns, *result.table_data, &result.table_metadata);
 
diff --git a/cpp/tests/copying/pack_tests.cu b/cpp/tests/copying/pack_tests.cu
index 03689ad54af..a2add793163 100644
--- a/cpp/tests/copying/pack_tests.cu
+++ b/cpp/tests/copying/pack_tests.cu
@@ -32,7 +32,9 @@ TEST_F(PackUnpackTest, SingleColumnFixedWidth)
   table_view t({col1});
 
   experimental::packed_table packed = experimental::pack(t);
-  experimental::contiguous_split_result unpacked = experimental::unpack(packed);
+  experimental::packed_table packed2{
+    packed.table_metadata, std::make_unique<rmm::device_buffer>(*packed.table_data)};
+  experimental::contiguous_split_result unpacked = experimental::unpack(packed2);
 
   expect_tables_equal(t, unpacked.table);
 }

From 1862f0b8d7af1e1c55050a3c67109f6df90ec5ec Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Tue, 21 Apr 2020 05:13:12 +0530
Subject: [PATCH 05/90] Fixed the case where columns could have nulls.

---
 cpp/src/copying/pack.cpp        | 20 ++++++++++++++++----
 cpp/tests/copying/pack_tests.cu | 17 ++++++++++++++++-
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index 10f754730d5..8e90ad46af2 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -13,8 +13,9 @@ packed_table::serialized_column serialize_column(column_view const& col,
 {
   auto all_data_buffer_ptr = static_cast<uint8_t const*>(table_data.data());
   size_t data_offset = col.data<uint8_t>() - all_data_buffer_ptr;
-  // TODO: fix for non-nullable
-  size_t null_mask_offset = reinterpret_cast<uint8_t const*>(col.null_mask()) - all_data_buffer_ptr;
+  size_t null_mask_offset = col.nullable()
+                             ? reinterpret_cast<uint8_t const*>(col.null_mask()) - all_data_buffer_ptr
+                             : -1;
   return packed_table::serialized_column{col.type(), col.size(), data_offset, null_mask_offset, col.num_children()};
 }
 
@@ -62,8 +63,19 @@ column_view deserialize_column(packed_table::serialized_column serial_column,
 
   auto data_ptr = all_data_buffer_ptr + serial_column._data_offset;
 
-  auto null_mask_ptr = reinterpret_cast<bitmask_type const*>(
-    all_data_buffer_ptr + serial_column._null_mask_offset);
+  // size_t is an unsigned int so -1 is the max value of size_t. If the offset
+  // is UINT64_MAX then just assume there's no null mask instead of thinking
+  // what if there IS a null mask but the buffer is just -1u sized. This translates
+  // to 16 EB of memory. No GPU has that amount of memory and it'll be a while
+  // before anyone does. By that time, we'll have bigger problems because all code
+  // that exists will need to be re-written to consider memory > 16 EB. It'll be
+  // bigger than Y2K; and I'll be prepared with a cottage in Waknaghat and a lifetime
+  // supply of soylent and shotgun ammo.
+  // TODO: Replace above with better reasoning
+  auto null_mask_ptr = serial_column._null_mask_offset != -1
+                        ? reinterpret_cast<bitmask_type const*>(
+                            all_data_buffer_ptr + serial_column._null_mask_offset)
+                        : 0;
 
   return column_view(
     serial_column._type,
diff --git a/cpp/tests/copying/pack_tests.cu b/cpp/tests/copying/pack_tests.cu
index a2add793163..265f692ca28 100644
--- a/cpp/tests/copying/pack_tests.cu
+++ b/cpp/tests/copying/pack_tests.cu
@@ -39,6 +39,19 @@ TEST_F(PackUnpackTest, SingleColumnFixedWidth)
   expect_tables_equal(t, unpacked.table);
 }
 
+TEST_F(PackUnpackTest, SingleColumnFixedWidthNonNullable)
+{
+  fixed_width_column_wrapper<int64_t> col1 ({ 1, 2, 3, 4, 5, 6, 7});
+  table_view t({col1});
+
+  experimental::packed_table packed = experimental::pack(t);
+  experimental::packed_table packed2{
+    packed.table_metadata, std::make_unique<rmm::device_buffer>(*packed.table_data)};
+  experimental::contiguous_split_result unpacked = experimental::unpack(packed2);
+
+  expect_tables_equal(t, unpacked.table);
+}
+
 TEST_F(PackUnpackTest, MultiColumnFixedWidth)
 {
   fixed_width_column_wrapper<int16_t> col1 ({ 1, 2, 3, 4, 5, 6, 7},
@@ -50,7 +63,9 @@ TEST_F(PackUnpackTest, MultiColumnFixedWidth)
   table_view t({col1, col2, col3});
 
   experimental::packed_table packed = experimental::pack(t);
-  experimental::contiguous_split_result unpacked = experimental::unpack(packed);
+  experimental::packed_table packed2{
+    packed.table_metadata, std::make_unique<rmm::device_buffer>(*packed.table_data)};
+  experimental::contiguous_split_result unpacked = experimental::unpack(packed2);
 
   expect_tables_equal(t, unpacked.table);
 }

From a19b5efc6eb10f693dfc212c8ca3fc77961725a7 Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Tue, 21 Apr 2020 06:08:58 +0530
Subject: [PATCH 06/90] Tests and fix for string columns

---
 cpp/src/copying/pack.cpp        | 10 ++++++++--
 cpp/tests/copying/pack_tests.cu | 17 +++++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index 8e90ad46af2..0a768b65a79 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -12,7 +12,11 @@ packed_table::serialized_column serialize_column(column_view const& col,
                                                  rmm::device_buffer const& table_data)
 {
   auto all_data_buffer_ptr = static_cast<uint8_t const*>(table_data.data());
-  size_t data_offset = col.data<uint8_t>() - all_data_buffer_ptr;
+  
+  // There are columns types that don't have data in parent e.g. strings
+  size_t data_offset = col.data<uint8_t>()
+                        ? col.data<uint8_t>() - all_data_buffer_ptr
+                        : -1;
   size_t null_mask_offset = col.nullable()
                              ? reinterpret_cast<uint8_t const*>(col.null_mask()) - all_data_buffer_ptr
                              : -1;
@@ -61,7 +65,9 @@ column_view deserialize_column(packed_table::serialized_column serial_column,
 {
   auto all_data_buffer_ptr = static_cast<uint8_t const*>(table_data.data());
 
-  auto data_ptr = all_data_buffer_ptr + serial_column._data_offset;
+  auto data_ptr = serial_column._data_offset != -1
+                  ? all_data_buffer_ptr + serial_column._data_offset
+                  : 0;
 
   // size_t is an unsigned int so -1 is the max value of size_t. If the offset
   // is UINT64_MAX then just assume there's no null mask instead of thinking
diff --git a/cpp/tests/copying/pack_tests.cu b/cpp/tests/copying/pack_tests.cu
index 265f692ca28..cabc608bb35 100644
--- a/cpp/tests/copying/pack_tests.cu
+++ b/cpp/tests/copying/pack_tests.cu
@@ -70,5 +70,22 @@ TEST_F(PackUnpackTest, MultiColumnFixedWidth)
   expect_tables_equal(t, unpacked.table);
 }
 
+TEST_F(PackUnpackTest, MultiColumnWithStrings)
+{
+  fixed_width_column_wrapper<int16_t> col1 ({ 1, 2, 3, 4, 5, 6, 7},
+                                            { 1, 1, 1, 0, 1, 0, 1});
+  strings_column_wrapper              col2 ({"Lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing"},
+                                            {      1,       0,       1,     1,      1,             1,            1});
+  strings_column_wrapper              col3 ({"", "this", "is", "a", "column", "of", "strings"});
+  table_view t({col1, col2, col3});
+
+  experimental::packed_table packed = experimental::pack(t);
+  experimental::packed_table packed2{
+    packed.table_metadata, std::make_unique<rmm::device_buffer>(*packed.table_data)};
+  experimental::contiguous_split_result unpacked = experimental::unpack(packed2);
+
+  expect_tables_equal(t, unpacked.table);
+}
+
 } // namespace test
 } // namespace cudf

From 055fada3c5021e7d9ac4ff3fcc027a9242dfd52b Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Tue, 21 Apr 2020 06:15:01 +0530
Subject: [PATCH 07/90] changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 724ddfa23f8..0728c29c808 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@
 - PR #4789 Disallow timestamp sum and diffs via binary ops
 - PR #4815 Add JNI total memory allocated API
 - PR #4906 Add Java bindings for interleave_columns
+- PR #4941 Add `cudf::pack` and `cudf::unpack`
 
 ## Improvements
 

From 0ac61f2f6559127a4a1a56ee8cb02b81f23d910d Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Tue, 21 Apr 2020 14:24:34 +0530
Subject: [PATCH 08/90] Use stream and mr in pack. Remove both from unpack

---
 cpp/include/cudf/copying.hpp     |  3 +--
 cpp/include/cudf/detail/copy.hpp |  4 ++++
 cpp/src/copying/pack.cpp         | 14 ++++++--------
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index ad42bbf96db..a55162c4dc0 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -460,8 +460,7 @@ struct packed_table {
 packed_table pack(cudf::table_view const& input,
                   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
-contiguous_split_result unpack(packed_table & input,
-                               rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+contiguous_split_result unpack(packed_table & input);
 
 /**
  * @brief   Returns a new column, where each element is selected from either @p lhs or 
diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index 25e3fd37356..9e2c061e939 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -82,6 +82,10 @@ std::vector<contiguous_split_result> contiguous_split(cudf::table_view const& in
                                                       rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
                                                       cudaStream_t stream = 0);
 
+packed_table pack(cudf::table_view const& input,
+                  cudaStream_t stream = 0,
+                  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
 /**
  * @brief Creates an uninitialized new column of the specified size and same type as the `input`.
  * Supports only fixed-width types.
diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index 0a768b65a79..0124a035683 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -1,4 +1,4 @@
-#include <cudf/copying.hpp>
+#include <cudf/detail/copy.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <jit/type.h>
 
@@ -44,7 +44,8 @@ packed_table pack(cudf::table_view const& input,
                   cudaStream_t stream,
                   rmm::mr::device_memory_resource* mr)
 {
-  contiguous_split_result contiguous_data = std::move(contiguous_split(input, {})[0]);
+  contiguous_split_result contiguous_data = 
+    std::move(detail::contiguous_split(input, {}, mr, stream).front());
 
   packed_table::serialized_column table_element = {{}, 0, 0, 0, contiguous_data.table.num_columns()};
 
@@ -118,9 +119,7 @@ std::vector<column_view> get_columns(cudf::size_type num_columns,
 
 } // namespace anonymous
 
-contiguous_split_result unpack(packed_table & input,
-                               cudaStream_t stream,
-                               rmm::mr::device_memory_resource* mr)
+contiguous_split_result unpack(packed_table & input)
 {
   cudf::size_type num_columns = input.table_metadata[0]._num_children;
   size_t current_index = 1;
@@ -142,11 +141,10 @@ packed_table pack(cudf::table_view const& input,
   return detail::pack(input, 0, mr);
 }
 
-contiguous_split_result unpack(packed_table & input,
-                               rmm::mr::device_memory_resource* mr)
+contiguous_split_result unpack(packed_table & input)
 {
   CUDF_FUNC_RANGE();
-  return detail::unpack(input, 0, mr);
+  return detail::unpack(input);
 }
 
 } // namespace experimental  

From b3cd355ca5f34386fa7269b93e38f0d27f0a594e Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Tue, 21 Apr 2020 14:48:30 +0530
Subject: [PATCH 09/90] Change table_data ref to base_ptr as per review

---
 cpp/src/copying/pack.cpp | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index 0124a035683..6760f8693e9 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -9,32 +9,31 @@ namespace detail {
 namespace {
 
 packed_table::serialized_column serialize_column(column_view const& col,
-                                                 rmm::device_buffer const& table_data)
+                                                 uint8_t const* base_ptr)
 {
-  auto all_data_buffer_ptr = static_cast<uint8_t const*>(table_data.data());
-  
   // There are columns types that don't have data in parent e.g. strings
   size_t data_offset = col.data<uint8_t>()
-                        ? col.data<uint8_t>() - all_data_buffer_ptr
+                        ? col.data<uint8_t>() - base_ptr
                         : -1;
   size_t null_mask_offset = col.nullable()
-                             ? reinterpret_cast<uint8_t const*>(col.null_mask()) - all_data_buffer_ptr
+                             ? reinterpret_cast<uint8_t const*>(col.null_mask()) - base_ptr
                              : -1;
+
   return packed_table::serialized_column{col.type(), col.size(), data_offset, null_mask_offset, col.num_children()};
 }
 
 void add_columns(std::vector<column_view> const& cols,
-                 rmm::device_buffer const& table_data,
+                 uint8_t const* base_ptr,
                  std::vector<packed_table::serialized_column> * table_metadata)
 {
   for (auto &&col : cols) {
-    table_metadata->emplace_back(serialize_column(col, table_data));
+    table_metadata->emplace_back(serialize_column(col, base_ptr));
     std::vector<column_view> children;
     for (size_t i = 0; i < col.num_children(); i++) {
       children.push_back(col.child(i));
     }
     
-    add_columns(children, table_data, table_metadata);
+    add_columns(children, base_ptr, table_metadata);
   }
 }
 
@@ -53,7 +52,9 @@ packed_table pack(cudf::table_view const& input,
   
   std::vector<column_view> table_columns(contiguous_data.table.begin(), contiguous_data.table.end());
 
-  add_columns(table_columns, *result.table_data, &result.table_metadata);
+  add_columns(table_columns,
+              static_cast<uint8_t const*>(result.table_data->data()),
+              &result.table_metadata);
 
   return result;
 }
@@ -62,12 +63,10 @@ namespace {
 
 column_view deserialize_column(packed_table::serialized_column serial_column,
                                std::vector<column_view> const& children,
-                               rmm::device_buffer const& table_data)
+                               uint8_t const* base_ptr)
 {
-  auto all_data_buffer_ptr = static_cast<uint8_t const*>(table_data.data());
-
   auto data_ptr = serial_column._data_offset != -1
-                  ? all_data_buffer_ptr + serial_column._data_offset
+                  ? base_ptr + serial_column._data_offset
                   : 0;
 
   // size_t is an unsigned int so -1 is the max value of size_t. If the offset
@@ -81,7 +80,7 @@ column_view deserialize_column(packed_table::serialized_column serial_column,
   // TODO: Replace above with better reasoning
   auto null_mask_ptr = serial_column._null_mask_offset != -1
                         ? reinterpret_cast<bitmask_type const*>(
-                            all_data_buffer_ptr + serial_column._null_mask_offset)
+                            base_ptr + serial_column._null_mask_offset)
                         : 0;
 
   return column_view(
@@ -96,7 +95,7 @@ column_view deserialize_column(packed_table::serialized_column serial_column,
 
 std::vector<column_view> get_columns(cudf::size_type num_columns,
                                      std::vector<packed_table::serialized_column> const& serialized_columns,
-                                     rmm::device_buffer const& table_data,
+                                     uint8_t const* base_ptr,
                                      size_t * current_index)
 {
   std::vector<column_view> cols;
@@ -108,10 +107,10 @@ std::vector<column_view> get_columns(cudf::size_type num_columns,
     std::vector<column_view> children = get_columns(
       serial_column._num_children,
       serialized_columns,
-      table_data,
+      base_ptr,
       current_index);
 
-    cols.emplace_back(deserialize_column(serial_column, children, table_data));
+    cols.emplace_back(deserialize_column(serial_column, children, base_ptr));
   }
   
   return cols;
@@ -126,14 +125,14 @@ contiguous_split_result unpack(packed_table & input)
 
   std::vector<column_view> table_columns = get_columns(num_columns,
                                                        input.table_metadata,
-                                                       *input.table_data,
+                                                       static_cast<uint8_t const*>(input.table_data->data()),
                                                        &current_index);
 
   return contiguous_split_result{table_view(table_columns), std::move(input.table_data)};
 }
 
 } // namespace detail
-
+ 
 packed_table pack(cudf::table_view const& input,
                   rmm::mr::device_memory_resource* mr)
 {

From d0d20f5179159220ae27c7123107a15dc3a0d216 Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Wed, 22 Apr 2020 05:42:52 +0530
Subject: [PATCH 10/90] Change packed_table to hide serialized_column type

---
 cpp/include/cudf/copying.hpp | 13 ++++---------
 cpp/src/copying/pack.cpp     | 37 ++++++++++++++++++++++++++----------
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index a55162c4dc0..dfabe8480a2 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -445,15 +445,10 @@ std::vector<contiguous_split_result> contiguous_split(cudf::table_view const& in
                                                       rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
 struct packed_table {
-    struct serialized_column {
-        cudf::data_type _type;
-        cudf::size_type _size;
-        size_t _data_offset;
-        size_t _null_mask_offset;
-        cudf::size_type _num_children;
-    };
-
-    std::vector<serialized_column> table_metadata;
+    packed_table(std::vector<uint8_t>&& table_metadata,
+                 std::unique_ptr<rmm::device_buffer>&& table_data)
+        : table_metadata(table_metadata), table_data(std::move(table_data)) {};
+    std::vector<uint8_t> table_metadata;
     std::unique_ptr<rmm::device_buffer> table_data;
 };
 
diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index 6760f8693e9..4dca43eb9a8 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -8,7 +8,22 @@ namespace detail {
 
 namespace {
 
-packed_table::serialized_column serialize_column(column_view const& col,
+struct serialized_column {
+  cudf::data_type _type;
+  cudf::size_type _size;
+  size_t _data_offset;
+  size_t _null_mask_offset;
+  cudf::size_type _num_children;
+};
+
+inline void add_column_to_vector(serialized_column const& column,
+                                 std::vector<uint8_t> * table_metadata)
+{
+  auto bytes = reinterpret_cast<uint8_t const*>(&column);
+  std::copy(bytes, bytes + sizeof(serialized_column), std::back_inserter(*table_metadata));
+}
+
+serialized_column serialize_column(column_view const& col,
                                                  uint8_t const* base_ptr)
 {
   // There are columns types that don't have data in parent e.g. strings
@@ -19,15 +34,15 @@ packed_table::serialized_column serialize_column(column_view const& col,
                              ? reinterpret_cast<uint8_t const*>(col.null_mask()) - base_ptr
                              : -1;
 
-  return packed_table::serialized_column{col.type(), col.size(), data_offset, null_mask_offset, col.num_children()};
+  return serialized_column{col.type(), col.size(), data_offset, null_mask_offset, col.num_children()};
 }
 
 void add_columns(std::vector<column_view> const& cols,
                  uint8_t const* base_ptr,
-                 std::vector<packed_table::serialized_column> * table_metadata)
+                 std::vector<uint8_t> * table_metadata)
 {
   for (auto &&col : cols) {
-    table_metadata->emplace_back(serialize_column(col, base_ptr));
+    add_column_to_vector(serialize_column(col, base_ptr), table_metadata);
     std::vector<column_view> children;
     for (size_t i = 0; i < col.num_children(); i++) {
       children.push_back(col.child(i));
@@ -46,9 +61,10 @@ packed_table pack(cudf::table_view const& input,
   contiguous_split_result contiguous_data = 
     std::move(detail::contiguous_split(input, {}, mr, stream).front());
 
-  packed_table::serialized_column table_element = {{}, 0, 0, 0, contiguous_data.table.num_columns()};
+  serialized_column table_element = {{}, 0, 0, 0, contiguous_data.table.num_columns()};
 
-  packed_table result{{table_element}, std::move(contiguous_data.all_data)};
+  auto result = packed_table({}, std::move(contiguous_data.all_data));
+  add_column_to_vector(table_element, &result.table_metadata);
   
   std::vector<column_view> table_columns(contiguous_data.table.begin(), contiguous_data.table.end());
 
@@ -61,7 +77,7 @@ packed_table pack(cudf::table_view const& input,
 
 namespace {
 
-column_view deserialize_column(packed_table::serialized_column serial_column,
+column_view deserialize_column(serialized_column serial_column,
                                std::vector<column_view> const& children,
                                uint8_t const* base_ptr)
 {
@@ -94,7 +110,7 @@ column_view deserialize_column(packed_table::serialized_column serial_column,
 }
 
 std::vector<column_view> get_columns(cudf::size_type num_columns,
-                                     std::vector<packed_table::serialized_column> const& serialized_columns,
+                                     serialized_column const* serialized_columns,
                                      uint8_t const* base_ptr,
                                      size_t * current_index)
 {
@@ -120,11 +136,12 @@ std::vector<column_view> get_columns(cudf::size_type num_columns,
 
 contiguous_split_result unpack(packed_table & input)
 {
-  cudf::size_type num_columns = input.table_metadata[0]._num_children;
+  auto serialized_columns = reinterpret_cast<serialized_column const*>(input.table_metadata.data());
+  cudf::size_type num_columns = serialized_columns[0]._num_children;
   size_t current_index = 1;
 
   std::vector<column_view> table_columns = get_columns(num_columns,
-                                                       input.table_metadata,
+    serialized_columns,
                                                        static_cast<uint8_t const*>(input.table_data->data()),
                                                        &current_index);
 

From f211a85db6e4b8360b7d587789d41ce559e439e3 Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Wed, 22 Apr 2020 05:45:50 +0530
Subject: [PATCH 11/90] Change API to take ownership of packed_table in unpack

---
 cpp/include/cudf/copying.hpp    |  2 +-
 cpp/src/copying/pack.cpp        | 16 ++++++------
 cpp/tests/copying/pack_tests.cu | 43 ++++++++++++++-------------------
 3 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index dfabe8480a2..337ebf51f5c 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -455,7 +455,7 @@ struct packed_table {
 packed_table pack(cudf::table_view const& input,
                   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
-contiguous_split_result unpack(packed_table & input);
+contiguous_split_result unpack(std::unique_ptr<packed_table> input);
 
 /**
  * @brief   Returns a new column, where each element is selected from either @p lhs or 
diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index 4dca43eb9a8..20fcbff81cf 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -24,7 +24,7 @@ inline void add_column_to_vector(serialized_column const& column,
 }
 
 serialized_column serialize_column(column_view const& col,
-                                                 uint8_t const* base_ptr)
+                                   uint8_t const* base_ptr)
 {
   // There are columns types that don't have data in parent e.g. strings
   size_t data_offset = col.data<uint8_t>()
@@ -134,18 +134,18 @@ std::vector<column_view> get_columns(cudf::size_type num_columns,
 
 } // namespace anonymous
 
-contiguous_split_result unpack(packed_table & input)
+contiguous_split_result unpack(std::unique_ptr<packed_table> input)
 {
-  auto serialized_columns = reinterpret_cast<serialized_column const*>(input.table_metadata.data());
+  auto serialized_columns = reinterpret_cast<serialized_column const*>(input->table_metadata.data());
   cudf::size_type num_columns = serialized_columns[0]._num_children;
   size_t current_index = 1;
 
   std::vector<column_view> table_columns = get_columns(num_columns,
     serialized_columns,
-                                                       static_cast<uint8_t const*>(input.table_data->data()),
-                                                       &current_index);
+    static_cast<uint8_t const*>(input->table_data->data()),
+    &current_index);
 
-  return contiguous_split_result{table_view(table_columns), std::move(input.table_data)};
+  return contiguous_split_result{table_view(table_columns), std::move(input->table_data)};
 }
 
 } // namespace detail
@@ -157,10 +157,10 @@ packed_table pack(cudf::table_view const& input,
   return detail::pack(input, 0, mr);
 }
 
-contiguous_split_result unpack(packed_table & input)
+contiguous_split_result unpack(std::unique_ptr<packed_table> input)
 {
   CUDF_FUNC_RANGE();
-  return detail::unpack(input);
+  return detail::unpack(std::move(input));
 }
 
 } // namespace experimental  
diff --git a/cpp/tests/copying/pack_tests.cu b/cpp/tests/copying/pack_tests.cu
index cabc608bb35..20ab620c3a0 100644
--- a/cpp/tests/copying/pack_tests.cu
+++ b/cpp/tests/copying/pack_tests.cu
@@ -23,7 +23,20 @@ namespace cudf {
 namespace test {
 
 
-struct PackUnpackTest : public BaseFixture {};
+struct PackUnpackTest : public BaseFixture {
+
+  void run_test(table_view const& t) {
+    auto packed = experimental::pack(t);
+    auto packed2 = std::make_unique<experimental::packed_table>(
+      std::vector<uint8_t>(packed.table_metadata),
+      std::make_unique<rmm::device_buffer>(*packed.table_data));
+
+    experimental::contiguous_split_result unpacked = experimental::unpack(std::move(packed2));
+
+    expect_tables_equal(t, unpacked.table);
+  }
+
+};
 
 TEST_F(PackUnpackTest, SingleColumnFixedWidth)
 {
@@ -31,12 +44,7 @@ TEST_F(PackUnpackTest, SingleColumnFixedWidth)
                                             { 1, 1, 1, 0, 1, 0, 1});
   table_view t({col1});
 
-  experimental::packed_table packed = experimental::pack(t);
-  experimental::packed_table packed2{
-    packed.table_metadata, std::make_unique<rmm::device_buffer>(*packed.table_data)};
-  experimental::contiguous_split_result unpacked = experimental::unpack(packed2);
-
-  expect_tables_equal(t, unpacked.table);
+  this->run_test(t);
 }
 
 TEST_F(PackUnpackTest, SingleColumnFixedWidthNonNullable)
@@ -44,12 +52,7 @@ TEST_F(PackUnpackTest, SingleColumnFixedWidthNonNullable)
   fixed_width_column_wrapper<int64_t> col1 ({ 1, 2, 3, 4, 5, 6, 7});
   table_view t({col1});
 
-  experimental::packed_table packed = experimental::pack(t);
-  experimental::packed_table packed2{
-    packed.table_metadata, std::make_unique<rmm::device_buffer>(*packed.table_data)};
-  experimental::contiguous_split_result unpacked = experimental::unpack(packed2);
-
-  expect_tables_equal(t, unpacked.table);
+  this->run_test(t);
 }
 
 TEST_F(PackUnpackTest, MultiColumnFixedWidth)
@@ -62,12 +65,7 @@ TEST_F(PackUnpackTest, MultiColumnFixedWidth)
                                             { 0, 1, 1, 1, 1, 1, 1});
   table_view t({col1, col2, col3});
 
-  experimental::packed_table packed = experimental::pack(t);
-  experimental::packed_table packed2{
-    packed.table_metadata, std::make_unique<rmm::device_buffer>(*packed.table_data)};
-  experimental::contiguous_split_result unpacked = experimental::unpack(packed2);
-
-  expect_tables_equal(t, unpacked.table);
+  this->run_test(t);
 }
 
 TEST_F(PackUnpackTest, MultiColumnWithStrings)
@@ -79,12 +77,7 @@ TEST_F(PackUnpackTest, MultiColumnWithStrings)
   strings_column_wrapper              col3 ({"", "this", "is", "a", "column", "of", "strings"});
   table_view t({col1, col2, col3});
 
-  experimental::packed_table packed = experimental::pack(t);
-  experimental::packed_table packed2{
-    packed.table_metadata, std::make_unique<rmm::device_buffer>(*packed.table_data)};
-  experimental::contiguous_split_result unpacked = experimental::unpack(packed2);
-
-  expect_tables_equal(t, unpacked.table);
+  this->run_test(t);
 }
 
 } // namespace test

From 9fd698541c0d4b6ef107b1943f77a9ffc94203ea Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Wed, 22 Apr 2020 23:53:11 +0530
Subject: [PATCH 12/90] clang format of code in this PR

---
 cpp/include/cudf/copying.hpp     |  10 +--
 cpp/include/cudf/detail/copy.hpp |   2 +-
 cpp/src/copying/pack.cpp         | 143 ++++++++++++++-----------------
 3 files changed, 68 insertions(+), 87 deletions(-)

diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index a5fe7dc5f76..b317e3896dd 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -451,11 +451,11 @@ std::vector<contiguous_split_result> contiguous_split(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
 struct packed_table {
-    packed_table(std::vector<uint8_t>&& table_metadata,
-                 std::unique_ptr<rmm::device_buffer>&& table_data)
-        : table_metadata(table_metadata), table_data(std::move(table_data)) {};
-    std::vector<uint8_t> table_metadata;
-    std::unique_ptr<rmm::device_buffer> table_data;
+  packed_table(std::vector<uint8_t>&& table_metadata,
+               std::unique_ptr<rmm::device_buffer>&& table_data)
+    : table_metadata(table_metadata), table_data(std::move(table_data)){};
+  std::vector<uint8_t> table_metadata;
+  std::unique_ptr<rmm::device_buffer> table_data;
 };
 
 packed_table pack(cudf::table_view const& input,
diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index 97fad472bb9..518d526e8e6 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -85,7 +85,7 @@ std::vector<contiguous_split_result> contiguous_split(
   cudaStream_t stream                 = 0);
 
 packed_table pack(cudf::table_view const& input,
-                  cudaStream_t stream = 0,
+                  cudaStream_t stream                 = 0,
                   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
 /**
diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index 20fcbff81cf..9d7c19cf02e 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -1,5 +1,6 @@
 #include <cudf/detail/copy.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+
 #include <jit/type.h>
 
 namespace cudf {
@@ -9,68 +10,59 @@ namespace detail {
 namespace {
 
 struct serialized_column {
-  cudf::data_type _type;
-  cudf::size_type _size;
+  data_type _type;
+  size_type _size;
   size_t _data_offset;
   size_t _null_mask_offset;
-  cudf::size_type _num_children;
+  size_type _num_children;
 };
 
 inline void add_column_to_vector(serialized_column const& column,
-                                 std::vector<uint8_t> * table_metadata)
-{
+                                 std::vector<uint8_t>* table_metadata) {
   auto bytes = reinterpret_cast<uint8_t const*>(&column);
   std::copy(bytes, bytes + sizeof(serialized_column), std::back_inserter(*table_metadata));
 }
 
-serialized_column serialize_column(column_view const& col,
-                                   uint8_t const* base_ptr)
-{
+serialized_column serialize_column(column_view const& col, uint8_t const* base_ptr) {
   // There are columns types that don't have data in parent e.g. strings
-  size_t data_offset = col.data<uint8_t>()
-                        ? col.data<uint8_t>() - base_ptr
-                        : -1;
-  size_t null_mask_offset = col.nullable()
-                             ? reinterpret_cast<uint8_t const*>(col.null_mask()) - base_ptr
-                             : -1;
-
-  return serialized_column{col.type(), col.size(), data_offset, null_mask_offset, col.num_children()};
+  size_t data_offset = col.data<uint8_t>() ? col.data<uint8_t>() - base_ptr : -1;
+  size_t null_mask_offset =
+    col.nullable() ? reinterpret_cast<uint8_t const*>(col.null_mask()) - base_ptr : -1;
+
+  return serialized_column{
+    col.type(), col.size(), data_offset, null_mask_offset, col.num_children()};
 }
 
 void add_columns(std::vector<column_view> const& cols,
                  uint8_t const* base_ptr,
-                 std::vector<uint8_t> * table_metadata)
-{
-  for (auto &&col : cols) {
+                 std::vector<uint8_t>* table_metadata) {
+  for (auto&& col : cols) {
     add_column_to_vector(serialize_column(col, base_ptr), table_metadata);
     std::vector<column_view> children;
-    for (size_t i = 0; i < col.num_children(); i++) {
-      children.push_back(col.child(i));
-    }
-    
+    for (size_t i = 0; i < col.num_children(); i++) { children.push_back(col.child(i)); }
+
     add_columns(children, base_ptr, table_metadata);
   }
 }
 
-} // namespace anonymous
+}  // namespace
 
-packed_table pack(cudf::table_view const& input,
+packed_table pack(table_view const& input,
                   cudaStream_t stream,
-                  rmm::mr::device_memory_resource* mr)
-{
-  contiguous_split_result contiguous_data = 
+                  rmm::mr::device_memory_resource* mr) {
+  contiguous_split_result contiguous_data =
     std::move(detail::contiguous_split(input, {}, mr, stream).front());
 
   serialized_column table_element = {{}, 0, 0, 0, contiguous_data.table.num_columns()};
 
   auto result = packed_table({}, std::move(contiguous_data.all_data));
   add_column_to_vector(table_element, &result.table_metadata);
-  
-  std::vector<column_view> table_columns(contiguous_data.table.begin(), contiguous_data.table.end());
 
-  add_columns(table_columns,
-              static_cast<uint8_t const*>(result.table_data->data()),
-              &result.table_metadata);
+  std::vector<column_view> table_columns(contiguous_data.table.begin(),
+                                         contiguous_data.table.end());
+
+  add_columns(
+    table_columns, static_cast<uint8_t const*>(result.table_data->data()), &result.table_metadata);
 
   return result;
 }
@@ -79,11 +71,8 @@ namespace {
 
 column_view deserialize_column(serialized_column serial_column,
                                std::vector<column_view> const& children,
-                               uint8_t const* base_ptr)
-{
-  auto data_ptr = serial_column._data_offset != -1
-                  ? base_ptr + serial_column._data_offset
-                  : 0;
+                               uint8_t const* base_ptr) {
+  auto data_ptr = serial_column._data_offset != -1 ? base_ptr + serial_column._data_offset : 0;
 
   // size_t is an unsigned int so -1 is the max value of size_t. If the offset
   // is UINT64_MAX then just assume there's no null mask instead of thinking
@@ -94,74 +83,66 @@ column_view deserialize_column(serialized_column serial_column,
   // bigger than Y2K; and I'll be prepared with a cottage in Waknaghat and a lifetime
   // supply of soylent and shotgun ammo.
   // TODO: Replace above with better reasoning
-  auto null_mask_ptr = serial_column._null_mask_offset != -1
-                        ? reinterpret_cast<bitmask_type const*>(
-                            base_ptr + serial_column._null_mask_offset)
-                        : 0;
-
-  return column_view(
-    serial_column._type,
-    serial_column._size,
-    data_ptr,
-    null_mask_ptr,
-    UNKNOWN_NULL_COUNT,
-    0,
-    children);
+  auto null_mask_ptr =
+    serial_column._null_mask_offset != -1
+      ? reinterpret_cast<bitmask_type const*>(base_ptr + serial_column._null_mask_offset)
+      : 0;
+
+  return column_view(serial_column._type,
+                     serial_column._size,
+                     data_ptr,
+                     null_mask_ptr,
+                     UNKNOWN_NULL_COUNT,
+                     0,
+                     children);
 }
 
-std::vector<column_view> get_columns(cudf::size_type num_columns,
+std::vector<column_view> get_columns(size_type num_columns,
                                      serialized_column const* serialized_columns,
                                      uint8_t const* base_ptr,
-                                     size_t * current_index)
-{
+                                     size_t* current_index) {
   std::vector<column_view> cols;
-  for (size_t i = 0; i < num_columns; i++)
-  {
+  for (size_t i = 0; i < num_columns; i++) {
     auto serial_column = serialized_columns[*current_index];
     (*current_index)++;
 
-    std::vector<column_view> children = get_columns(
-      serial_column._num_children,
-      serialized_columns,
-      base_ptr,
-      current_index);
+    std::vector<column_view> children =
+      get_columns(serial_column._num_children, serialized_columns, base_ptr, current_index);
 
     cols.emplace_back(deserialize_column(serial_column, children, base_ptr));
   }
-  
+
   return cols;
 }
 
-} // namespace anonymous
+}  // namespace
 
-contiguous_split_result unpack(std::unique_ptr<packed_table> input)
-{
-  auto serialized_columns = reinterpret_cast<serialized_column const*>(input->table_metadata.data());
-  cudf::size_type num_columns = serialized_columns[0]._num_children;
-  size_t current_index = 1;
+contiguous_split_result unpack(std::unique_ptr<packed_table> input) {
+  auto serialized_columns =
+    reinterpret_cast<serialized_column const*>(input->table_metadata.data());
+  size_type num_columns = serialized_columns[0]._num_children;
+  size_t current_index  = 1;
 
-  std::vector<column_view> table_columns = get_columns(num_columns,
-    serialized_columns,
-    static_cast<uint8_t const*>(input->table_data->data()),
-    &current_index);
+  std::vector<column_view> table_columns =
+    get_columns(num_columns,
+                serialized_columns,
+                static_cast<uint8_t const*>(input->table_data->data()),
+                &current_index);
 
   return contiguous_split_result{table_view(table_columns), std::move(input->table_data)};
 }
 
-} // namespace detail
- 
-packed_table pack(cudf::table_view const& input,
-                  rmm::mr::device_memory_resource* mr)
-{
+}  // namespace detail
+
+packed_table pack(table_view const& input, rmm::mr::device_memory_resource* mr) {
   CUDF_FUNC_RANGE();
   return detail::pack(input, 0, mr);
 }
 
-contiguous_split_result unpack(std::unique_ptr<packed_table> input)
-{
+contiguous_split_result unpack(std::unique_ptr<packed_table> input) {
   CUDF_FUNC_RANGE();
   return detail::unpack(std::move(input));
 }
 
-} // namespace experimental  
-} // namespace cudf
+}  // namespace experimental
+}  // namespace cudf

From 4428907df7442facd72eb76a43bd536bbaaf2168 Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Thu, 23 Apr 2020 00:37:24 +0530
Subject: [PATCH 13/90] Change packed_table.table_metadata to unique_ptr
 Because that is also expensive to copy so we need to enforce taking ownership

---
 cpp/include/cudf/copying.hpp    |  8 ++++----
 cpp/src/copying/pack.cpp        | 12 +++++++-----
 cpp/tests/copying/pack_tests.cu | 13 ++++++-------
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index b317e3896dd..5d87ae47573 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -451,10 +451,10 @@ std::vector<contiguous_split_result> contiguous_split(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
 struct packed_table {
-  packed_table(std::vector<uint8_t>&& table_metadata,
-               std::unique_ptr<rmm::device_buffer>&& table_data)
-    : table_metadata(table_metadata), table_data(std::move(table_data)){};
-  std::vector<uint8_t> table_metadata;
+  packed_table(std::unique_ptr<std::vector<uint8_t>> table_metadata,
+               std::unique_ptr<rmm::device_buffer> table_data)
+    : table_metadata(std::move(table_metadata)), table_data(std::move(table_data)){};
+  std::unique_ptr<std::vector<uint8_t>> table_metadata;
   std::unique_ptr<rmm::device_buffer> table_data;
 };
 
diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index 9d7c19cf02e..82641cf777d 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -55,14 +55,16 @@ packed_table pack(table_view const& input,
 
   serialized_column table_element = {{}, 0, 0, 0, contiguous_data.table.num_columns()};
 
-  auto result = packed_table({}, std::move(contiguous_data.all_data));
-  add_column_to_vector(table_element, &result.table_metadata);
+  auto result =
+    packed_table(std::make_unique<std::vector<uint8_t>>(), std::move(contiguous_data.all_data));
+  add_column_to_vector(table_element, result.table_metadata.get());
 
   std::vector<column_view> table_columns(contiguous_data.table.begin(),
                                          contiguous_data.table.end());
 
-  add_columns(
-    table_columns, static_cast<uint8_t const*>(result.table_data->data()), &result.table_metadata);
+  add_columns(table_columns,
+              static_cast<uint8_t const*>(result.table_data->data()),
+              result.table_metadata.get());
 
   return result;
 }
@@ -119,7 +121,7 @@ std::vector<column_view> get_columns(size_type num_columns,
 
 contiguous_split_result unpack(std::unique_ptr<packed_table> input) {
   auto serialized_columns =
-    reinterpret_cast<serialized_column const*>(input->table_metadata.data());
+    reinterpret_cast<serialized_column const*>(input->table_metadata->data());
   size_type num_columns = serialized_columns[0]._num_children;
   size_t current_index  = 1;
 
diff --git a/cpp/tests/copying/pack_tests.cu b/cpp/tests/copying/pack_tests.cu
index 20ab620c3a0..f05af999f33 100644
--- a/cpp/tests/copying/pack_tests.cu
+++ b/cpp/tests/copying/pack_tests.cu
@@ -22,22 +22,20 @@
 namespace cudf {
 namespace test {
 
-
 struct PackUnpackTest : public BaseFixture {
-
   void run_test(table_view const& t) {
-    auto packed = experimental::pack(t);
+    auto packed  = experimental::pack(t);
     auto packed2 = std::make_unique<experimental::packed_table>(
-      std::vector<uint8_t>(packed.table_metadata),
+      std::make_unique<std::vector<uint8_t>>(*packed.table_metadata),
       std::make_unique<rmm::device_buffer>(*packed.table_data));
 
     experimental::contiguous_split_result unpacked = experimental::unpack(std::move(packed2));
 
     expect_tables_equal(t, unpacked.table);
   }
-
 };
 
+// clang-format off
 TEST_F(PackUnpackTest, SingleColumnFixedWidth)
 {
   fixed_width_column_wrapper<int64_t> col1 ({ 1, 2, 3, 4, 5, 6, 7},
@@ -79,6 +77,7 @@ TEST_F(PackUnpackTest, MultiColumnWithStrings)
 
   this->run_test(t);
 }
+// clang-format on
 
-} // namespace test
-} // namespace cudf
+}  // namespace test
+}  // namespace cudf

From aac9b146a77e2ff67deb0cbbec6cad86e893f8b9 Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Thu, 23 Apr 2020 22:36:15 +0530
Subject: [PATCH 14/90] Initial pack support in Cython

---
 cpp/include/cudf/copying.hpp          |   1 +
 python/cudf/cudf/_lib/copying.pyx     | 107 ++++++++++++++++++++++++++
 python/cudf/cudf/_lib/cpp/copying.pxd |  10 ++-
 python/cudf/cudf/_lib/move.pxd        |   2 +
 4 files changed, 119 insertions(+), 1 deletion(-)

diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index 5d87ae47573..4ad705c59b4 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -451,6 +451,7 @@ std::vector<contiguous_split_result> contiguous_split(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
 struct packed_table {
+  packed_table() = default;
   packed_table(std::unique_ptr<std::vector<uint8_t>> table_metadata,
                std::unique_ptr<rmm::device_buffer> table_data)
     : table_metadata(std::move(table_metadata)), table_data(std::move(table_data)){};
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 43d2ec0950c..e43b0748d71 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -7,6 +7,8 @@ from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.vector cimport vector
 from libc.stdint cimport int32_t
 
+from rmm._lib.device_buffer cimport DeviceBuffer
+
 from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport Scalar
 from cudf._lib.table cimport Table
@@ -445,6 +447,111 @@ def table_split(Table input_table, object splits, bool keep_index=True):
 
     return result
 
+import cudf
+import errno
+import os
+import pyarrow as pa
+import json
+
+from cython.operator import dereference
+import numpy as np
+
+from cudf.utils.dtypes import np_to_pa_dtype, is_categorical_dtype
+from libc.stdlib cimport free
+from libc.stdint cimport uint8_t
+from libcpp.memory cimport shared_ptr, unique_ptr, make_unique
+from libcpp.string cimport string
+from libcpp.map cimport map
+from libcpp.vector cimport vector
+
+from cudf._lib.cpp.types cimport size_type
+from cudf._lib.table cimport Table
+from cudf._lib.cpp.table.table cimport table
+from cudf._lib.cpp.table.table_view cimport (
+    table_view
+)
+from cudf._lib.move cimport move
+from cudf._lib.cpp.io.functions cimport (
+    write_parquet_args,
+    write_parquet as parquet_writer,
+    merge_rowgroup_metadata as parquet_merge_metadata,
+    read_parquet_args,
+    read_parquet as parquet_reader,
+    write_parquet_chunked_args,
+    write_parquet_chunked_begin,
+    write_parquet_chunked,
+    write_parquet_chunked_end,
+    pq_chunked_state
+)
+from cudf._lib.io.utils cimport (
+    make_source_info
+)
+
+cimport cudf._lib.cpp.types as cudf_types
+cimport cudf._lib.cpp.io.types as cudf_io_types
+
+cdef class BufferArrayFromVector:
+    cdef Py_ssize_t length
+    cdef unique_ptr[vector[uint8_t]] in_vec
+
+    # these two things declare part of the buffer interface
+    cdef Py_ssize_t shape[1]
+    cdef Py_ssize_t strides[1]
+
+    @staticmethod
+    cdef BufferArrayFromVector from_unique_ptr(
+        unique_ptr[vector[uint8_t]] in_vec
+    ):
+        cdef BufferArrayFromVector buf = BufferArrayFromVector()
+        buf.in_vec = move(in_vec)
+        buf.length = dereference(buf.in_vec).size()
+        return buf
+
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cdef Py_ssize_t itemsize = sizeof(uint8_t)
+
+        self.shape[0] = self.length
+        self.strides[0] = 1
+
+        buffer.buf = dereference(self.in_vec).data()
+
+        buffer.format = NULL  # byte
+        buffer.internal = NULL
+        buffer.itemsize = itemsize
+        buffer.len = self.length * itemsize   # product(shape) * itemsize
+        buffer.ndim = 1
+        buffer.obj = self
+        buffer.readonly = 0
+        buffer.shape = self.shape
+        buffer.strides = self.strides
+        buffer.suboffsets = NULL
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
+
+def pack(Table input_table):
+
+    cdef table_view input_table_view = input_table.view()
+
+    cdef cpp_copying.packed_table c_result
+
+    with nogil:
+        c_result = move(
+            cpp_copying.pack(
+                input_table_view
+            )
+        )
+    
+    # Convert to python object and return
+    # python objects needed: wrap vector of bytes, wrap rmm device vector
+    data = DeviceBuffer.c_from_unique_ptr(move(c_result.table_data))
+    metadata_py = BufferArrayFromVector.from_unique_ptr(
+        move(c_result.table_metadata)
+    )
+    metadata = np.asarray(metadata_py)
+
+    return (metadata, data)
 
 def _copy_if_else_column_column(Column lhs, Column rhs, Column boolean_mask):
 
diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd
index c3a7a5c502c..e35827ea2b3 100644
--- a/python/cudf/cudf/_lib/cpp/copying.pxd
+++ b/python/cudf/cudf/_lib/cpp/copying.pxd
@@ -3,7 +3,7 @@
 from rmm._lib.device_buffer cimport device_buffer
 
 from libcpp cimport bool
-from libc.stdint cimport int32_t
+from libc.stdint cimport int32_t, uint8_t
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
 
@@ -114,6 +114,14 @@ cdef extern from "cudf/copying.hpp" namespace "cudf::experimental" nogil:
         vector[size_type] splits
     ) except +
 
+    cdef struct packed_table:
+        unique_ptr[vector[uint8_t]] table_metadata
+        unique_ptr[device_buffer] table_data
+
+    cdef packed_table pack (
+        table_view input_table
+    ) except +
+
     cdef unique_ptr[column] copy_if_else (
         column_view lhs,
         column_view rhs,
diff --git a/python/cudf/cudf/_lib/move.pxd b/python/cudf/cudf/_lib/move.pxd
index 556063e8df5..3e2166af1de 100644
--- a/python/cudf/cudf/_lib/move.pxd
+++ b/python/cudf/cudf/_lib/move.pxd
@@ -10,6 +10,7 @@ from cudf._lib.cpp.types cimport (
 )
 from cudf._lib.cpp.aggregation cimport aggregation
 from cudf._lib.cpp.scalar.scalar cimport scalar
+from cudf._lib.cpp.copying cimport packed_table
 from cudf._lib.cpp.column.column cimport column, column_contents
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.table.table cimport table
@@ -61,6 +62,7 @@ cdef extern from "<utility>" namespace "std" nogil:
     cdef device_buffer move(device_buffer)
     cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer])
     cdef unique_ptr[scalar] move(unique_ptr[scalar])
+    cdef packed_table move(packed_table)
     cdef pair[unique_ptr[device_buffer], size_type] move(
         pair[unique_ptr[device_buffer], size_type]
     )

From 38a6dff803f5ca691faa7b523220dd88397c2e2c Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Fri, 24 Apr 2020 00:32:54 +0530
Subject: [PATCH 15/90] Cython bindings for unpack

---
 python/cudf/cudf/_lib/copying.pyx     | 30 ++++++++++++++++++++++++++-
 python/cudf/cudf/_lib/cpp/copying.pxd |  6 +++++-
 python/cudf/cudf/_lib/move.pxd        |  4 +++-
 3 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index e43b0748d71..fed748f2b25 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -7,7 +7,7 @@ from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.vector cimport vector
 from libc.stdint cimport int32_t
 
-from rmm._lib.device_buffer cimport DeviceBuffer
+from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
 from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport Scalar
@@ -553,6 +553,34 @@ def pack(Table input_table):
 
     return (metadata, data)
 
+
+def unpack(vector[uint8_t] input_packed_table_metadata,
+           DeviceBuffer input_packed_table_data):
+    
+    cdef unique_ptr[vector[uint8_t]] c_metadata = move(
+        make_unique[vector[uint8_t]](move(input_packed_table_metadata))
+    )
+
+    cdef unique_ptr[device_buffer] c_data = move(input_packed_table_data.c_obj)
+    cdef unique_ptr[cpp_copying.packed_table] c_packed_table = move(
+        make_unique[cpp_copying.packed_table](move(c_metadata), move(c_data))
+    )
+
+    cdef cpp_copying.contiguous_split_result c_result
+
+    with nogil:
+        c_result = move(
+            cpp_copying.unpack(move(c_packed_table))
+        )
+    
+    table_data_owner = DeviceBuffer.c_from_unique_ptr(move(c_result.all_data))
+    return Table.from_table_view(
+        c_result.table,
+        owner=table_data_owner,
+        column_names=range(c_result.table.num_columns())
+    )
+
+
 def _copy_if_else_column_column(Column lhs, Column rhs, Column boolean_mask):
 
     cdef column_view lhs_view = lhs.view()
diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd
index e35827ea2b3..21a3e083538 100644
--- a/python/cudf/cudf/_lib/cpp/copying.pxd
+++ b/python/cudf/cudf/_lib/cpp/copying.pxd
@@ -107,7 +107,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf::experimental" nogil:
 
     cdef struct contiguous_split_result:
         table_view table
-        vector[device_buffer] all_data
+        unique_ptr[device_buffer] all_data
 
     cdef vector[contiguous_split_result] contiguous_split (
         table_view input_table,
@@ -122,6 +122,10 @@ cdef extern from "cudf/copying.hpp" namespace "cudf::experimental" nogil:
         table_view input_table
     ) except +
 
+    cdef contiguous_split_result unpack (
+        unique_ptr[packed_table] input_packed_table
+    ) except +
+
     cdef unique_ptr[column] copy_if_else (
         column_view lhs,
         column_view rhs,
diff --git a/python/cudf/cudf/_lib/move.pxd b/python/cudf/cudf/_lib/move.pxd
index 3e2166af1de..4b975343e3e 100644
--- a/python/cudf/cudf/_lib/move.pxd
+++ b/python/cudf/cudf/_lib/move.pxd
@@ -10,7 +10,7 @@ from cudf._lib.cpp.types cimport (
 )
 from cudf._lib.cpp.aggregation cimport aggregation
 from cudf._lib.cpp.scalar.scalar cimport scalar
-from cudf._lib.cpp.copying cimport packed_table
+from cudf._lib.cpp.copying cimport packed_table, contiguous_split_result
 from cudf._lib.cpp.column.column cimport column, column_contents
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.table.table cimport table
@@ -63,6 +63,8 @@ cdef extern from "<utility>" namespace "std" nogil:
     cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer])
     cdef unique_ptr[scalar] move(unique_ptr[scalar])
     cdef packed_table move(packed_table)
+    cdef unique_ptr[packed_table] move(unique_ptr[packed_table])
+    cdef contiguous_split_result move(contiguous_split_result)
     cdef pair[unique_ptr[device_buffer], size_type] move(
         pair[unique_ptr[device_buffer], size_type]
     )

From 7e3fb844d85da9ace1cb54e49189041bab507237 Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Fri, 24 Apr 2020 02:28:42 +0530
Subject: [PATCH 16/90] Move BufferArrayFromVector from parquet to utils

---
 python/cudf/cudf/_lib/copying.pyx | 86 ++-----------------------------
 python/cudf/cudf/_lib/parquet.pyx | 39 +-------------
 python/cudf/cudf/_lib/utils.pxd   | 15 ++++++
 python/cudf/cudf/_lib/utils.pyx   | 36 +++++++++++++
 4 files changed, 55 insertions(+), 121 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index fed748f2b25..3d9273a07eb 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -1,11 +1,12 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
 
+import numpy as np
 import pandas as pd
 
 from libcpp cimport bool
 from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.vector cimport vector
-from libc.stdint cimport int32_t
+from libc.stdint cimport int32_t, uint8_t
 
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
@@ -15,6 +16,7 @@ from cudf._lib.table cimport Table
 from cudf._lib.move cimport move
 from cudf._lib.scalar cimport Scalar
 from cudf._lib.table cimport Table
+from cudf._lib.utils cimport BufferArrayFromVector
 
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport (
@@ -447,88 +449,6 @@ def table_split(Table input_table, object splits, bool keep_index=True):
 
     return result
 
-import cudf
-import errno
-import os
-import pyarrow as pa
-import json
-
-from cython.operator import dereference
-import numpy as np
-
-from cudf.utils.dtypes import np_to_pa_dtype, is_categorical_dtype
-from libc.stdlib cimport free
-from libc.stdint cimport uint8_t
-from libcpp.memory cimport shared_ptr, unique_ptr, make_unique
-from libcpp.string cimport string
-from libcpp.map cimport map
-from libcpp.vector cimport vector
-
-from cudf._lib.cpp.types cimport size_type
-from cudf._lib.table cimport Table
-from cudf._lib.cpp.table.table cimport table
-from cudf._lib.cpp.table.table_view cimport (
-    table_view
-)
-from cudf._lib.move cimport move
-from cudf._lib.cpp.io.functions cimport (
-    write_parquet_args,
-    write_parquet as parquet_writer,
-    merge_rowgroup_metadata as parquet_merge_metadata,
-    read_parquet_args,
-    read_parquet as parquet_reader,
-    write_parquet_chunked_args,
-    write_parquet_chunked_begin,
-    write_parquet_chunked,
-    write_parquet_chunked_end,
-    pq_chunked_state
-)
-from cudf._lib.io.utils cimport (
-    make_source_info
-)
-
-cimport cudf._lib.cpp.types as cudf_types
-cimport cudf._lib.cpp.io.types as cudf_io_types
-
-cdef class BufferArrayFromVector:
-    cdef Py_ssize_t length
-    cdef unique_ptr[vector[uint8_t]] in_vec
-
-    # these two things declare part of the buffer interface
-    cdef Py_ssize_t shape[1]
-    cdef Py_ssize_t strides[1]
-
-    @staticmethod
-    cdef BufferArrayFromVector from_unique_ptr(
-        unique_ptr[vector[uint8_t]] in_vec
-    ):
-        cdef BufferArrayFromVector buf = BufferArrayFromVector()
-        buf.in_vec = move(in_vec)
-        buf.length = dereference(buf.in_vec).size()
-        return buf
-
-    def __getbuffer__(self, Py_buffer *buffer, int flags):
-        cdef Py_ssize_t itemsize = sizeof(uint8_t)
-
-        self.shape[0] = self.length
-        self.strides[0] = 1
-
-        buffer.buf = dereference(self.in_vec).data()
-
-        buffer.format = NULL  # byte
-        buffer.internal = NULL
-        buffer.itemsize = itemsize
-        buffer.len = self.length * itemsize   # product(shape) * itemsize
-        buffer.ndim = 1
-        buffer.obj = self
-        buffer.readonly = 0
-        buffer.shape = self.shape
-        buffer.strides = self.strides
-        buffer.suboffsets = NULL
-
-    def __releasebuffer__(self, Py_buffer *buffer):
-        pass
-
 
 def pack(Table input_table):
 
diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index 80ab5f01c5f..5a141b90d2c 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -26,6 +26,7 @@ from cudf._lib.cpp.table.table_view cimport (
     table_view
 )
 from cudf._lib.move cimport move
+from cudf._lib.utils cimport BufferArrayFromVector
 from cudf._lib.cpp.io.functions cimport (
     write_parquet_args,
     write_parquet as parquet_writer,
@@ -45,44 +46,6 @@ from cudf._lib.io.utils cimport (
 cimport cudf._lib.cpp.types as cudf_types
 cimport cudf._lib.cpp.io.types as cudf_io_types
 
-cdef class BufferArrayFromVector:
-    cdef Py_ssize_t length
-    cdef unique_ptr[vector[uint8_t]] in_vec
-
-    # these two things declare part of the buffer interface
-    cdef Py_ssize_t shape[1]
-    cdef Py_ssize_t strides[1]
-
-    @staticmethod
-    cdef BufferArrayFromVector from_unique_ptr(
-        unique_ptr[vector[uint8_t]] in_vec
-    ):
-        cdef BufferArrayFromVector buf = BufferArrayFromVector()
-        buf.in_vec = move(in_vec)
-        buf.length = dereference(buf.in_vec).size()
-        return buf
-
-    def __getbuffer__(self, Py_buffer *buffer, int flags):
-        cdef Py_ssize_t itemsize = sizeof(uint8_t)
-
-        self.shape[0] = self.length
-        self.strides[0] = 1
-
-        buffer.buf = dereference(self.in_vec).data()
-
-        buffer.format = NULL  # byte
-        buffer.internal = NULL
-        buffer.itemsize = itemsize
-        buffer.len = self.length * itemsize   # product(shape) * itemsize
-        buffer.ndim = 1
-        buffer.obj = self
-        buffer.readonly = 0
-        buffer.shape = self.shape
-        buffer.strides = self.strides
-        buffer.suboffsets = NULL
-
-    def __releasebuffer__(self, Py_buffer *buffer):
-        pass
 
 cpdef generate_pandas_metadata(Table table, index):
     col_names = []
diff --git a/python/cudf/cudf/_lib/utils.pxd b/python/cudf/cudf/_lib/utils.pxd
index 6731f7298c2..d17d5b42444 100644
--- a/python/cudf/cudf/_lib/utils.pxd
+++ b/python/cudf/cudf/_lib/utils.pxd
@@ -1,5 +1,7 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
 
+from libc.stdint cimport uint8_t
+from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
 from cudf._lib.cpp.column.column cimport column_view
 from cudf._lib.cpp.table.table cimport table_view
@@ -7,3 +9,16 @@ from cudf._lib.cpp.table.table cimport table_view
 cdef vector[column_view] make_column_views(object columns) except*
 cdef vector[table_view] make_table_views(object tables) except*
 cdef vector[table_view] make_table_data_views(object tables) except*
+
+cdef class BufferArrayFromVector:
+    cdef Py_ssize_t length
+    cdef unique_ptr[vector[uint8_t]] in_vec
+
+    # these two things declare part of the buffer interface
+    cdef Py_ssize_t shape[1]
+    cdef Py_ssize_t strides[1]
+
+    @staticmethod
+    cdef BufferArrayFromVector from_unique_ptr(
+        unique_ptr[vector[uint8_t]] in_vec
+    )
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index 6a7bf4d25bc..5fd19b6ff93 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -1,11 +1,14 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
 
+from cython.operator import dereference
+
 from libcpp.vector cimport vector
 
 from cudf._lib.column cimport Column
 from cudf._lib.table cimport Table
 from cudf._lib.cpp.column.column cimport column_view
 from cudf._lib.cpp.table.table cimport table_view
+from cudf._lib.move cimport move
 
 
 cdef vector[column_view] make_column_views(object columns):
@@ -30,3 +33,36 @@ cdef vector[table_view] make_table_data_views(object tables):
     for tbl in tables:
         views.push_back((<Table> tbl).data_view())
     return views
+
+
+cdef class BufferArrayFromVector:
+    @staticmethod
+    cdef BufferArrayFromVector from_unique_ptr(
+        unique_ptr[vector[uint8_t]] in_vec
+    ):
+        cdef BufferArrayFromVector buf = BufferArrayFromVector()
+        buf.in_vec = move(in_vec)
+        buf.length = dereference(buf.in_vec).size()
+        return buf
+
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cdef Py_ssize_t itemsize = sizeof(uint8_t)
+
+        self.shape[0] = self.length
+        self.strides[0] = 1
+
+        buffer.buf = dereference(self.in_vec).data()
+
+        buffer.format = NULL  # byte
+        buffer.internal = NULL
+        buffer.itemsize = itemsize
+        buffer.len = self.length * itemsize   # product(shape) * itemsize
+        buffer.ndim = 1
+        buffer.obj = self
+        buffer.readonly = 0
+        buffer.shape = self.shape
+        buffer.strides = self.strides
+        buffer.suboffsets = NULL
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass

From 2adc6b1084b084dff023cf2c33d1726e0fdaa20f Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Fri, 24 Apr 2020 04:05:22 +0530
Subject: [PATCH 17/90] Documentation for pack/unpack

---
 cpp/include/cudf/copying.hpp     | 33 ++++++++++++++++++++++++++++++++
 cpp/include/cudf/detail/copy.hpp |  5 +++++
 2 files changed, 38 insertions(+)

diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index 4ad705c59b4..eddcfb43a39 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -450,6 +450,12 @@ std::vector<contiguous_split_result> contiguous_split(
   std::vector<size_type> const& splits,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
+/**
+ * @brief Table data in a serialized format
+ * 
+ * Contains data from a table in two contiguous buffers: one on host, which contains table metadata
+ * and one on device which contains the table data.
+ */
 struct packed_table {
   packed_table() = default;
   packed_table(std::unique_ptr<std::vector<uint8_t>> table_metadata,
@@ -459,9 +465,36 @@ struct packed_table {
   std::unique_ptr<rmm::device_buffer> table_data;
 };
 
+/**
+ * @brief Deep-copy a `table_view` into a serialized contiguous memory format
+ * 
+ * The metadata from the `table_view` is copied into a host vector of bytes and the data from the
+ * `table_view` is copied into a `device_buffer`. Pass the output of this function into
+ * `cudf::experimental::unpack` to deserialize.
+ * 
+ * @param input View of the table to pack
+ * @param[in] mr Optional, The resource to use for all returned device allocations
+ * @return packed_table A struct containing the serialized metadata and data in contiguous host
+ *         and device memory respectively
+ */
 packed_table pack(cudf::table_view const& input,
                   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
+/**
+ * @brief Deserialize the result of `cudf::experimental::pack`
+ * 
+ * Converts the result of a serialized table into a `table_view` that points to the data stored in
+ * the contiguous device buffer `output.all_data`. The data for the table `output.all_data` is moved
+ * from the input `packed_table`'s member `table_data`. 
+ * 
+ * It is the caller's responsibility to ensure that the `table_view` in the output does not outlive 
+ * the device_buffer `all_data` in the output.
+ * 
+ * No new device memory is allocated in this function.
+ * 
+ * @param input The packed table to unpack
+ * @return contiguous_split_result The unpacked `table_view` and corresponding device data buffer
+ */
 contiguous_split_result unpack(std::unique_ptr<packed_table> input);
 
 /**
diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index 518d526e8e6..488174a5ff5 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -84,6 +84,11 @@ std::vector<contiguous_split_result> contiguous_split(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
   cudaStream_t stream                 = 0);
 
+/**
+ * @copydoc cudf::experimental::pack
+ *
+ * @param stream Optional CUDA stream on which to execute kernels
+ **/
 packed_table pack(cudf::table_view const& input,
                   cudaStream_t stream                 = 0,
                   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());

From a52d18ba26d1c824fe802618d5fe82262e47700b Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Fri, 24 Apr 2020 04:06:29 +0530
Subject: [PATCH 18/90] Fixing style CI

---
 python/cudf/cudf/_lib/copying.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 3d9273a07eb..33b44edc178 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -462,7 +462,7 @@ def pack(Table input_table):
                 input_table_view
             )
         )
-    
+
     # Convert to python object and return
     # python objects needed: wrap vector of bytes, wrap rmm device vector
     data = DeviceBuffer.c_from_unique_ptr(move(c_result.table_data))
@@ -476,7 +476,7 @@ def pack(Table input_table):
 
 def unpack(vector[uint8_t] input_packed_table_metadata,
            DeviceBuffer input_packed_table_data):
-    
+
     cdef unique_ptr[vector[uint8_t]] c_metadata = move(
         make_unique[vector[uint8_t]](move(input_packed_table_metadata))
     )
@@ -492,7 +492,7 @@ def unpack(vector[uint8_t] input_packed_table_metadata,
         c_result = move(
             cpp_copying.unpack(move(c_packed_table))
         )
-    
+
     table_data_owner = DeviceBuffer.c_from_unique_ptr(move(c_result.all_data))
     return Table.from_table_view(
         c_result.table,

From 7f0bef8b26687f73043e7d2015d4410f5ebae04f Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Fri, 24 Apr 2020 04:37:12 +0530
Subject: [PATCH 19/90] Refactor unpack to avoid passing counter around

---
 cpp/src/copying/pack.cpp | 63 ++++++++++++++++++----------------------
 1 file changed, 28 insertions(+), 35 deletions(-)

diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index 82641cf777d..d7aa3425915 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -23,7 +23,7 @@ inline void add_column_to_vector(serialized_column const& column,
   std::copy(bytes, bytes + sizeof(serialized_column), std::back_inserter(*table_metadata));
 }
 
-serialized_column serialize_column(column_view const& col, uint8_t const* base_ptr) {
+serialized_column serialize(column_view const& col, uint8_t const* base_ptr) {
   // There are columns types that don't have data in parent e.g. strings
   size_t data_offset = col.data<uint8_t>() ? col.data<uint8_t>() - base_ptr : -1;
   size_t null_mask_offset =
@@ -33,15 +33,15 @@ serialized_column serialize_column(column_view const& col, uint8_t const* base_p
     col.type(), col.size(), data_offset, null_mask_offset, col.num_children()};
 }
 
-void add_columns(std::vector<column_view> const& cols,
-                 uint8_t const* base_ptr,
-                 std::vector<uint8_t>* table_metadata) {
+void serialize_columns(std::vector<column_view> const& cols,
+                       uint8_t const* base_ptr,
+                       std::vector<uint8_t>* table_metadata) {
   for (auto&& col : cols) {
-    add_column_to_vector(serialize_column(col, base_ptr), table_metadata);
+    add_column_to_vector(serialize(col, base_ptr), table_metadata);
     std::vector<column_view> children;
     for (size_t i = 0; i < col.num_children(); i++) { children.push_back(col.child(i)); }
 
-    add_columns(children, base_ptr, table_metadata);
+    serialize_columns(children, base_ptr, table_metadata);
   }
 }
 
@@ -62,9 +62,9 @@ packed_table pack(table_view const& input,
   std::vector<column_view> table_columns(contiguous_data.table.begin(),
                                          contiguous_data.table.end());
 
-  add_columns(table_columns,
-              static_cast<uint8_t const*>(result.table_data->data()),
-              result.table_metadata.get());
+  serialize_columns(table_columns,
+                    static_cast<uint8_t const*>(result.table_data->data()),
+                    result.table_metadata.get());
 
   return result;
 }
@@ -99,37 +99,30 @@ column_view deserialize_column(serialized_column serial_column,
                      children);
 }
 
-std::vector<column_view> get_columns(size_type num_columns,
-                                     serialized_column const* serialized_columns,
-                                     uint8_t const* base_ptr,
-                                     size_t* current_index) {
-  std::vector<column_view> cols;
-  for (size_t i = 0; i < num_columns; i++) {
-    auto serial_column = serialized_columns[*current_index];
-    (*current_index)++;
-
-    std::vector<column_view> children =
-      get_columns(serial_column._num_children, serialized_columns, base_ptr, current_index);
-
-    cols.emplace_back(deserialize_column(serial_column, children, base_ptr));
-  }
-
-  return cols;
-}
-
 }  // namespace
 
 contiguous_split_result unpack(std::unique_ptr<packed_table> input) {
   auto serialized_columns =
     reinterpret_cast<serialized_column const*>(input->table_metadata->data());
-  size_type num_columns = serialized_columns[0]._num_children;
-  size_t current_index  = 1;
-
-  std::vector<column_view> table_columns =
-    get_columns(num_columns,
-                serialized_columns,
-                static_cast<uint8_t const*>(input->table_data->data()),
-                &current_index);
+  uint8_t const* base_ptr = static_cast<uint8_t const*>(input->table_data->data());
+  size_t current_index    = 1;
+
+  std::function<std::vector<column_view>(size_type)> get_columns;
+  get_columns = [&serialized_columns, &current_index, base_ptr, &get_columns](size_t num_columns) {
+    std::vector<column_view> cols;
+    for (size_t i = 0; i < num_columns; i++) {
+      auto serial_column = serialized_columns[current_index];
+      current_index++;
+
+      std::vector<column_view> children = get_columns(serial_column._num_children);
+
+      cols.emplace_back(deserialize_column(serial_column, children, base_ptr));
+    }
+
+    return cols;
+  };
+
+  std::vector<column_view> table_columns = get_columns(serialized_columns[0]._num_children);
 
   return contiguous_split_result{table_view(table_columns), std::move(input->table_data)};
 }

From 44acd2e8de78878c513ab8e6d88c87b4f03c361b Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Sat, 25 Apr 2020 03:13:10 +0530
Subject: [PATCH 20/90] Modified pack to work with vector of columns

---
 cpp/include/cudf/copying.hpp        | 27 +++++++-----
 cpp/include/cudf/detail/copy.hpp    | 23 ++++++++--
 cpp/src/copying/contiguous_split.cu | 66 ++++++++++-------------------
 cpp/src/copying/pack.cpp            | 49 ++++++++++-----------
 cpp/tests/copying/pack_tests.cu     | 34 +++++++--------
 5 files changed, 96 insertions(+), 103 deletions(-)

diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index eddcfb43a39..be309a8980f 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -456,13 +456,13 @@ std::vector<contiguous_split_result> contiguous_split(
  * Contains data from a table in two contiguous buffers: one on host, which contains table metadata
  * and one on device which contains the table data.
  */
-struct packed_table {
-  packed_table() = default;
-  packed_table(std::unique_ptr<std::vector<uint8_t>> table_metadata,
-               std::unique_ptr<rmm::device_buffer> table_data)
-    : table_metadata(std::move(table_metadata)), table_data(std::move(table_data)){};
-  std::unique_ptr<std::vector<uint8_t>> table_metadata;
-  std::unique_ptr<rmm::device_buffer> table_data;
+struct packed_columns {
+  packed_columns() = default;
+  packed_columns(std::unique_ptr<std::vector<uint8_t>> metadata,
+                 std::unique_ptr<rmm::device_buffer> data)
+    : metadata(std::move(metadata)), data(std::move(data)){};
+  std::unique_ptr<std::vector<uint8_t>> metadata;
+  std::unique_ptr<rmm::device_buffer> data;
 };
 
 /**
@@ -474,11 +474,16 @@ struct packed_table {
  * 
  * @param input View of the table to pack
  * @param[in] mr Optional, The resource to use for all returned device allocations
- * @return packed_table A struct containing the serialized metadata and data in contiguous host
+ * @return packed_columns A struct containing the serialized metadata and data in contiguous host
  *         and device memory respectively
  */
-packed_table pack(cudf::table_view const& input,
-                  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+packed_columns pack(std::vector<column_view> const& input,
+                    rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
+struct unpack_result {
+  std::vector<column_view> columns;
+  std::unique_ptr<rmm::device_buffer> all_data;
+};
 
 /**
  * @brief Deserialize the result of `cudf::experimental::pack`
@@ -495,7 +500,7 @@ packed_table pack(cudf::table_view const& input,
  * @param input The packed table to unpack
  * @return contiguous_split_result The unpacked `table_view` and corresponding device data buffer
  */
-contiguous_split_result unpack(std::unique_ptr<packed_table> input);
+unpack_result unpack(std::unique_ptr<packed_columns> input);
 
 /**
  * @brief   Returns a new column, where each element is selected from either @p lhs or 
diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index 488174a5ff5..45e0b8ac5a5 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -73,6 +73,23 @@ std::vector<column_view> slice(column_view const& input,
                                std::vector<size_type> const& indices,
                                cudaStream_t stream = 0);
 
+/**
+ * @brief Information about the split for a given column. Bundled together
+ *        into a struct because tuples were getting pretty unreadable. 
+ */
+struct column_split_info {
+  size_t data_buf_size;      // size of the data (including padding)
+  size_t validity_buf_size;  // validity vector size (including padding)
+
+  size_t offsets_buf_size;  // (strings only) size of offset column (including padding)
+  size_type num_chars;      // (strings only) number of chars in the column
+  size_type chars_offset;   // (strings only) offset from head of chars data
+};
+
+unpack_result alloc_and_copy(std::vector<column_view> const& t,
+                             rmm::mr::device_memory_resource* mr,
+                             cudaStream_t stream);
+
 /**
  * @copydoc cudf::experimental::contiguous_split
  *
@@ -89,9 +106,9 @@ std::vector<contiguous_split_result> contiguous_split(
  *
  * @param stream Optional CUDA stream on which to execute kernels
  **/
-packed_table pack(cudf::table_view const& input,
-                  cudaStream_t stream                 = 0,
-                  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+packed_columns pack(cudf::table_view const& input,
+                    cudaStream_t stream                 = 0,
+                    rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
 /**
  * @brief Creates an uninitialized new column of the specified size and same type as the `input`.
diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu
index cd55d59065a..729c6908306 100644
--- a/cpp/src/copying/contiguous_split.cu
+++ b/cpp/src/copying/contiguous_split.cu
@@ -18,6 +18,7 @@
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_view.hpp>
 #include <cudf/copying.hpp>
+#include <cudf/detail/copy.hpp>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
@@ -172,19 +173,6 @@ __launch_bounds__(block_size) __global__
 // start at that alignment.
 static constexpr size_t split_align = 64;
 
-/**
- * @brief Information about the split for a given column. Bundled together
- *        into a struct because tuples were getting pretty unreadable. 
- */
-struct column_split_info {
-  size_t data_buf_size;      // size of the data (including padding)
-  size_t validity_buf_size;  // validity vector size (including padding)
-
-  size_t offsets_buf_size;  // (strings only) size of offset column (including padding)
-  size_type num_chars;      // (strings only) number of chars in the column
-  size_type chars_offset;   // (strings only) offset from head of chars data
-};
-
 /**
  * @brief Functor called by the `type_dispatcher` to incrementally compute total
  * memory buffer size needed to allocate a contiguous copy of all columns within
@@ -348,12 +336,10 @@ struct column_preprocess_info {
  * - creating column_device_views on the base string_column_view itself as that causes gpu memory allocation.
  */
 thrust::host_vector<column_split_info> preprocess_string_column_info(
-  cudf::table_view const& t,
-  rmm::device_vector<column_split_info>& device_split_info,
-  cudaStream_t stream) {
+  std::vector<column_view> const& t, cudaStream_t stream) {
   // build a list of all the offset columns and their indices for all input string columns and put them on the gpu
   thrust::host_vector<column_preprocess_info> offset_columns;
-  offset_columns.reserve(t.num_columns());  // worst case
+  offset_columns.reserve(t.size());  // worst case
 
   // collect only string columns
   size_type column_index = 0;
@@ -368,7 +354,7 @@ thrust::host_vector<column_split_info> preprocess_string_column_info(
   rmm::device_vector<column_preprocess_info> device_offset_columns = offset_columns;
 
   // compute column split information
-  rmm::device_vector<thrust::pair<size_type, size_type>> device_offsets(t.num_columns());
+  rmm::device_vector<thrust::pair<size_type, size_type>> device_offsets(t.size());
   auto* offsets_p = device_offsets.data().get();
   thrust::for_each(rmm::exec_policy(stream)->on(stream),
                    device_offset_columns.begin(),
@@ -379,7 +365,7 @@ thrust::host_vector<column_split_info> preprocess_string_column_info(
                                          cpi.offsets.head<int32_t>()[cpi.offset + cpi.size]);
                    });
   thrust::host_vector<thrust::pair<size_type, size_type>> host_offsets(device_offsets);
-  thrust::host_vector<column_split_info> split_info(t.num_columns());
+  thrust::host_vector<column_split_info> split_info(t.size());
   std::for_each(offset_columns.begin(),
                 offset_columns.end(),
                 [&split_info, &host_offsets](column_preprocess_info const& cpi) {
@@ -398,6 +384,8 @@ thrust::host_vector<column_split_info> preprocess_string_column_info(
   return split_info;
 }
 
+};  // anonymous namespace
+
 /**
  * @brief Creates a contiguous_split_result object which contains a deep-copy of the input
  * table_view into a single contiguous block of memory. 
@@ -406,13 +394,11 @@ thrust::host_vector<column_split_info> preprocess_string_column_info(
  * call with the input table.  The memory referenced by the table_view and its internal column_views
  * is entirely contained in single block of memory.
  */
-contiguous_split_result alloc_and_copy(cudf::table_view const& t,
-                                       rmm::device_vector<column_split_info>& device_split_info,
-                                       rmm::mr::device_memory_resource* mr,
-                                       cudaStream_t stream) {
+unpack_result alloc_and_copy(std::vector<column_view> const& t,
+                             rmm::mr::device_memory_resource* mr,
+                             cudaStream_t stream) {
   // preprocess column split information for string columns.
-  thrust::host_vector<column_split_info> split_info =
-    preprocess_string_column_info(t, device_split_info, stream);
+  thrust::host_vector<column_split_info> split_info = preprocess_string_column_info(t, stream);
 
   // compute the rest of the column sizes (non-string columns, and total buffer size)
   size_t total_size      = 0;
@@ -430,7 +416,7 @@ contiguous_split_result alloc_and_copy(cudf::table_view const& t,
 
   // copy (this would be cleaner with a std::transform, but there's an nvcc compiler issue in the way)
   std::vector<column_view> out_cols;
-  out_cols.reserve(t.num_columns());
+  out_cols.reserve(t.size());
 
   column_index = 0;
   std::for_each(
@@ -440,33 +426,25 @@ contiguous_split_result alloc_and_copy(cudf::table_view const& t,
       column_index++;
     });
 
-  return contiguous_split_result{cudf::table_view{out_cols}, std::move(device_buf)};
+  return unpack_result{out_cols, std::move(device_buf)};
 }
 
-};  // anonymous namespace
-
 std::vector<contiguous_split_result> contiguous_split(cudf::table_view const& input,
                                                       std::vector<size_type> const& splits,
                                                       rmm::mr::device_memory_resource* mr,
                                                       cudaStream_t stream) {
   auto subtables = cudf::experimental::split(input, splits);
 
-  // optimization : for large numbers of splits this allocation can dominate total time
-  //                spent if done inside alloc_and_copy().  so we'll allocate it once
-  //                and reuse it.
-  //
-  //                benchmark:        1 GB data, 10 columns, 256 splits.
-  //                no optimization:  106 ms (8 GB/s)
-  //                optimization:     20 ms (48 GB/s)
-  rmm::device_vector<column_split_info> device_split_info(input.num_columns());
-
   std::vector<contiguous_split_result> result;
-  std::transform(subtables.begin(),
-                 subtables.end(),
-                 std::back_inserter(result),
-                 [mr, stream, &device_split_info](table_view const& t) {
-                   return alloc_and_copy(t, device_split_info, mr, stream);
-                 });
+  std::transform(
+    subtables.begin(),
+    subtables.end(),
+    std::back_inserter(result),
+    [mr, stream](table_view const& t) {
+      std::vector<column_view> table_columns(t.begin(), t.end());
+      unpack_result result(alloc_and_copy(table_columns, mr, stream));
+      return contiguous_split_result{table_view(result.columns), std::move(result.all_data)};
+    });
 
   return result;
 }
diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index d7aa3425915..3383cfe5775 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -17,10 +17,9 @@ struct serialized_column {
   size_type _num_children;
 };
 
-inline void add_column_to_vector(serialized_column const& column,
-                                 std::vector<uint8_t>* table_metadata) {
+inline void add_column_to_vector(serialized_column const& column, std::vector<uint8_t>* metadata) {
   auto bytes = reinterpret_cast<uint8_t const*>(&column);
-  std::copy(bytes, bytes + sizeof(serialized_column), std::back_inserter(*table_metadata));
+  std::copy(bytes, bytes + sizeof(serialized_column), std::back_inserter(*metadata));
 }
 
 serialized_column serialize(column_view const& col, uint8_t const* base_ptr) {
@@ -35,36 +34,35 @@ serialized_column serialize(column_view const& col, uint8_t const* base_ptr) {
 
 void serialize_columns(std::vector<column_view> const& cols,
                        uint8_t const* base_ptr,
-                       std::vector<uint8_t>* table_metadata) {
+                       std::vector<uint8_t>* metadata) {
   for (auto&& col : cols) {
-    add_column_to_vector(serialize(col, base_ptr), table_metadata);
+    add_column_to_vector(serialize(col, base_ptr), metadata);
     std::vector<column_view> children;
     for (size_t i = 0; i < col.num_children(); i++) { children.push_back(col.child(i)); }
 
-    serialize_columns(children, base_ptr, table_metadata);
+    serialize_columns(children, base_ptr, metadata);
   }
 }
 
 }  // namespace
 
-packed_table pack(table_view const& input,
-                  cudaStream_t stream,
-                  rmm::mr::device_memory_resource* mr) {
-  contiguous_split_result contiguous_data =
-    std::move(detail::contiguous_split(input, {}, mr, stream).front());
+packed_columns pack(std::vector<column_view> const& input,
+                    cudaStream_t stream,
+                    rmm::mr::device_memory_resource* mr) {
+  unpack_result contiguous_data = std::move(detail::alloc_and_copy(input, mr, stream));
 
-  serialized_column table_element = {{}, 0, 0, 0, contiguous_data.table.num_columns()};
+  serialized_column first_element = {
+    {}, 0, 0, 0, static_cast<size_type>(contiguous_data.columns.size())};
 
   auto result =
-    packed_table(std::make_unique<std::vector<uint8_t>>(), std::move(contiguous_data.all_data));
-  add_column_to_vector(table_element, result.table_metadata.get());
+    packed_columns(std::make_unique<std::vector<uint8_t>>(), std::move(contiguous_data.all_data));
+  add_column_to_vector(first_element, result.metadata.get());
 
-  std::vector<column_view> table_columns(contiguous_data.table.begin(),
-                                         contiguous_data.table.end());
+  std::vector<column_view> table_columns(contiguous_data.columns.begin(),
+                                         contiguous_data.columns.end());
 
-  serialize_columns(table_columns,
-                    static_cast<uint8_t const*>(result.table_data->data()),
-                    result.table_metadata.get());
+  serialize_columns(
+    table_columns, static_cast<uint8_t const*>(result.data->data()), result.metadata.get());
 
   return result;
 }
@@ -101,10 +99,9 @@ column_view deserialize_column(serialized_column serial_column,
 
 }  // namespace
 
-contiguous_split_result unpack(std::unique_ptr<packed_table> input) {
-  auto serialized_columns =
-    reinterpret_cast<serialized_column const*>(input->table_metadata->data());
-  uint8_t const* base_ptr = static_cast<uint8_t const*>(input->table_data->data());
+unpack_result unpack(std::unique_ptr<packed_columns> input) {
+  auto serialized_columns = reinterpret_cast<serialized_column const*>(input->metadata->data());
+  uint8_t const* base_ptr = static_cast<uint8_t const*>(input->data->data());
   size_t current_index    = 1;
 
   std::function<std::vector<column_view>(size_type)> get_columns;
@@ -124,17 +121,17 @@ contiguous_split_result unpack(std::unique_ptr<packed_table> input) {
 
   std::vector<column_view> table_columns = get_columns(serialized_columns[0]._num_children);
 
-  return contiguous_split_result{table_view(table_columns), std::move(input->table_data)};
+  return unpack_result{table_columns, std::move(input->data)};
 }
 
 }  // namespace detail
 
-packed_table pack(table_view const& input, rmm::mr::device_memory_resource* mr) {
+packed_columns pack(std::vector<column_view> const& input, rmm::mr::device_memory_resource* mr) {
   CUDF_FUNC_RANGE();
   return detail::pack(input, 0, mr);
 }
 
-contiguous_split_result unpack(std::unique_ptr<packed_table> input) {
+unpack_result unpack(std::unique_ptr<packed_columns> input) {
   CUDF_FUNC_RANGE();
   return detail::unpack(std::move(input));
 }
diff --git a/cpp/tests/copying/pack_tests.cu b/cpp/tests/copying/pack_tests.cu
index f05af999f33..f758c9de456 100644
--- a/cpp/tests/copying/pack_tests.cu
+++ b/cpp/tests/copying/pack_tests.cu
@@ -16,22 +16,22 @@
 
 #include <cudf/copying.hpp>
 #include <tests/utilities/base_fixture.hpp>
+#include <tests/utilities/column_utilities.hpp>
 #include <tests/utilities/column_wrapper.hpp>
-#include <tests/utilities/table_utilities.hpp>
 
 namespace cudf {
 namespace test {
 
 struct PackUnpackTest : public BaseFixture {
-  void run_test(table_view const& t) {
+  void run_test(std::vector<column_view> const& t) {
     auto packed  = experimental::pack(t);
-    auto packed2 = std::make_unique<experimental::packed_table>(
-      std::make_unique<std::vector<uint8_t>>(*packed.table_metadata),
-      std::make_unique<rmm::device_buffer>(*packed.table_data));
+    auto packed2 = std::make_unique<experimental::packed_columns>(
+      std::make_unique<std::vector<uint8_t>>(*packed.metadata),
+      std::make_unique<rmm::device_buffer>(*packed.data));
 
-    experimental::contiguous_split_result unpacked = experimental::unpack(std::move(packed2));
+    experimental::unpack_result unpacked = experimental::unpack(std::move(packed2));
 
-    expect_tables_equal(t, unpacked.table);
+    for (size_t i = 0; i < t.size(); i++) { expect_columns_equal(t[i], unpacked.columns[i]); }
   }
 };
 
@@ -40,17 +40,15 @@ TEST_F(PackUnpackTest, SingleColumnFixedWidth)
 {
   fixed_width_column_wrapper<int64_t> col1 ({ 1, 2, 3, 4, 5, 6, 7},
                                             { 1, 1, 1, 0, 1, 0, 1});
-  table_view t({col1});
 
-  this->run_test(t);
+  this->run_test({col1});
 }
 
 TEST_F(PackUnpackTest, SingleColumnFixedWidthNonNullable)
 {
   fixed_width_column_wrapper<int64_t> col1 ({ 1, 2, 3, 4, 5, 6, 7});
-  table_view t({col1});
 
-  this->run_test(t);
+  this->run_test({col1});
 }
 
 TEST_F(PackUnpackTest, MultiColumnFixedWidth)
@@ -59,23 +57,21 @@ TEST_F(PackUnpackTest, MultiColumnFixedWidth)
                                             { 1, 1, 1, 0, 1, 0, 1});
   fixed_width_column_wrapper<float>   col2 ({ 7, 8, 6, 5, 4, 3, 2},
                                             { 1, 0, 1, 1, 1, 1, 1});
-  fixed_width_column_wrapper<double>  col3 ({ 8, 4, 2, 0, 7, 1, 9},
-                                            { 0, 1, 1, 1, 1, 1, 1});
-  table_view t({col1, col2, col3});
+  fixed_width_column_wrapper<double>  col3 ({ 8, 4, 2, 0, 7, 1, 9, 3},
+                                            { 0, 1, 1, 1, 1, 1, 1, 1});
 
-  this->run_test(t);
+  this->run_test({col1, col2, col3});
 }
 
 TEST_F(PackUnpackTest, MultiColumnWithStrings)
 {
   fixed_width_column_wrapper<int16_t> col1 ({ 1, 2, 3, 4, 5, 6, 7},
                                             { 1, 1, 1, 0, 1, 0, 1});
-  strings_column_wrapper              col2 ({"Lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing"},
-                                            {      1,       0,       1,     1,      1,             1,            1});
+  strings_column_wrapper              col2 ({"Lorem", "ipsum", "dolor", "sit", "amet"},
+                                            {      1,       0,       1,     1,      1});
   strings_column_wrapper              col3 ({"", "this", "is", "a", "column", "of", "strings"});
-  table_view t({col1, col2, col3});
 
-  this->run_test(t);
+  this->run_test({col1, col2, col3});
 }
 // clang-format on
 

From 66eaeee96a13821a5e337f2613a614017197b511 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 27 Apr 2020 13:58:28 -0400
Subject: [PATCH 21/90] Add pack/unpack based serialization

---
 cpp/src/copying/contiguous_split.cu      |  2 +-
 python/cudf/cudf/_lib/copying.pyx        | 63 +++++++++--------
 python/cudf/cudf/_lib/cpp/copying.pxd    | 18 +++--
 python/cudf/cudf/_lib/move.pxd           |  8 +--
 python/cudf/cudf/_lib/nvtx/nvtx.py       |  6 ++
 python/cudf/cudf/comm/serialize.py       | 16 +++--
 python/cudf/cudf/core/dataframe.py       | 34 ---------
 python/cudf/cudf/core/frame.py           | 87 ++++++++++++++++++++++++
 python/cudf/cudf/core/series.py          | 31 ---------
 python/cudf/cudf/tests/test_serialize.py | 12 ++--
 10 files changed, 158 insertions(+), 119 deletions(-)

diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu
index 5c2900768d7..3d820167e4b 100644
--- a/cpp/src/copying/contiguous_split.cu
+++ b/cpp/src/copying/contiguous_split.cu
@@ -171,7 +171,7 @@ __launch_bounds__(block_size) __global__
 // start at that alignment.
 static constexpr size_t split_align = 64;
 
- * @brief Functor called by the `type_dispatcher` to incrementally compute total
+/** @brief Functor called by the `type_dispatcher` to incrementally compute total
  * memory buffer size needed to allocate a contiguous copy of all columns within
  * a source table.
  */
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 33b44edc178..49585b2772b 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -450,55 +450,58 @@ def table_split(Table input_table, object splits, bool keep_index=True):
     return result
 
 
-def pack(Table input_table):
-
-    cdef table_view input_table_view = input_table.view()
+def pack(columns):
+    """
+    Pack the given list of columns into a single contiguous bufferr
+    and associated metadata.
+    """
+    cdef vector[column_view] c_columns
+    cdef Column col
+    for col in columns:
+        c_columns.push_back(col.view())
 
-    cdef cpp_copying.packed_table c_result
+    cdef cpp_copying.packed_columns c_result
 
     with nogil:
         c_result = move(
             cpp_copying.pack(
-                input_table_view
+                c_columns
             )
         )
 
-    # Convert to python object and return
-    # python objects needed: wrap vector of bytes, wrap rmm device vector
-    data = DeviceBuffer.c_from_unique_ptr(move(c_result.table_data))
-    metadata_py = BufferArrayFromVector.from_unique_ptr(
-        move(c_result.table_metadata)
+    # Convert to python objects:
+    data = DeviceBuffer.c_from_unique_ptr(move(c_result.data))
+    metadata = BufferArrayFromVector.from_unique_ptr(
+        move(c_result.metadata)
     )
-    metadata = np.asarray(metadata_py)
+    metadata = np.asarray(metadata)
 
     return (metadata, data)
 
 
-def unpack(vector[uint8_t] input_packed_table_metadata,
-           DeviceBuffer input_packed_table_data):
-
-    cdef unique_ptr[vector[uint8_t]] c_metadata = move(
-        make_unique[vector[uint8_t]](move(input_packed_table_metadata))
-    )
-
-    cdef unique_ptr[device_buffer] c_data = move(input_packed_table_data.c_obj)
-    cdef unique_ptr[cpp_copying.packed_table] c_packed_table = move(
-        make_unique[cpp_copying.packed_table](move(c_metadata), move(c_data))
+def unpack(vector[uint8_t] metadata,
+           DeviceBuffer data):
+    """
+    Given the results of a `pack`, unpack into a list of Columns
+    """
+    cdef unique_ptr[cpp_copying.packed_columns] c_packed_columns = move(
+        make_unique[cpp_copying.packed_columns](
+            make_unique[vector[uint8_t]](metadata),
+            move(data.c_obj)
+        )
     )
-
-    cdef cpp_copying.contiguous_split_result c_result
+    cdef cpp_copying.unpack_result c_result
 
     with nogil:
         c_result = move(
-            cpp_copying.unpack(move(c_packed_table))
+            cpp_copying.unpack(move(c_packed_columns))
         )
 
-    table_data_owner = DeviceBuffer.c_from_unique_ptr(move(c_result.all_data))
-    return Table.from_table_view(
-        c_result.table,
-        owner=table_data_owner,
-        column_names=range(c_result.table.num_columns())
-    )
+    owner = DeviceBuffer.c_from_unique_ptr(move(c_result.all_data))
+    columns = []
+    for i in range(c_result.columns.size()):
+        columns.append(Column.from_column_view(c_result.columns[i], owner))
+    return columns
 
 
 def _copy_if_else_column_column(Column lhs, Column rhs, Column boolean_mask):
diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd
index 21a3e083538..c263068fca1 100644
--- a/python/cudf/cudf/_lib/cpp/copying.pxd
+++ b/python/cudf/cudf/_lib/cpp/copying.pxd
@@ -114,16 +114,20 @@ cdef extern from "cudf/copying.hpp" namespace "cudf::experimental" nogil:
         vector[size_type] splits
     ) except +
 
-    cdef struct packed_table:
-        unique_ptr[vector[uint8_t]] table_metadata
-        unique_ptr[device_buffer] table_data
+    cdef struct packed_columns:
+        unique_ptr[vector[uint8_t]] metadata
+        unique_ptr[device_buffer] data
 
-    cdef packed_table pack (
-        table_view input_table
+    cdef struct unpack_result:
+        vector[column_view] columns
+        unique_ptr[device_buffer] all_data
+
+    cdef packed_columns pack (
+        vector[column_view] columns
     ) except +
 
-    cdef contiguous_split_result unpack (
-        unique_ptr[packed_table] input_packed_table
+    cdef unpack_result unpack (
+        unique_ptr[packed_columns]
     ) except +
 
     cdef unique_ptr[column] copy_if_else (
diff --git a/python/cudf/cudf/_lib/move.pxd b/python/cudf/cudf/_lib/move.pxd
index 4b975343e3e..5da9de247df 100644
--- a/python/cudf/cudf/_lib/move.pxd
+++ b/python/cudf/cudf/_lib/move.pxd
@@ -10,7 +10,7 @@ from cudf._lib.cpp.types cimport (
 )
 from cudf._lib.cpp.aggregation cimport aggregation
 from cudf._lib.cpp.scalar.scalar cimport scalar
-from cudf._lib.cpp.copying cimport packed_table, contiguous_split_result
+from cudf._lib.cpp.copying cimport packed_columns, unpack_result
 from cudf._lib.cpp.column.column cimport column, column_contents
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.table.table cimport table
@@ -62,9 +62,9 @@ cdef extern from "<utility>" namespace "std" nogil:
     cdef device_buffer move(device_buffer)
     cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer])
     cdef unique_ptr[scalar] move(unique_ptr[scalar])
-    cdef packed_table move(packed_table)
-    cdef unique_ptr[packed_table] move(unique_ptr[packed_table])
-    cdef contiguous_split_result move(contiguous_split_result)
+    cdef packed_columns move(packed_columns)
+    cdef unique_ptr[packed_columns] move(unique_ptr[packed_columns])
+    cdef unpack_result move(unpack_result)
     cdef pair[unique_ptr[device_buffer], size_type] move(
         pair[unique_ptr[device_buffer], size_type]
     )
diff --git a/python/cudf/cudf/_lib/nvtx/nvtx.py b/python/cudf/cudf/_lib/nvtx/nvtx.py
index c0047fa4a29..0c3c304a51a 100644
--- a/python/cudf/cudf/_lib/nvtx/nvtx.py
+++ b/python/cudf/cudf/_lib/nvtx/nvtx.py
@@ -51,9 +51,15 @@ def __init__(self, message=None, color="blue", domain=None):
         ...    time.sleep(10)
         ...
         """
+        self._message = message
+        self._color = color
+        self._domain_name = domain
         self.attributes = EventAttributes(message, color)
         self.domain = Domain(domain)
 
+    def __reduce__(self):
+        return self.__class__, (self._message, self._color, self._domain_name)
+
     def __enter__(self):
         libnvtx_push_range(self.attributes, self.domain.handle)
         return self
diff --git a/python/cudf/cudf/comm/serialize.py b/python/cudf/cudf/comm/serialize.py
index a6cf211aa90..9ed3ff77164 100644
--- a/python/cudf/cudf/comm/serialize.py
+++ b/python/cudf/cudf/comm/serialize.py
@@ -26,7 +26,6 @@
     def cuda_serialize_cudf_object(x):
         with log_errors():
             header, frames = x.serialize()
-            assert all((type(f) is cudf.core.buffer.Buffer) for f in frames)
             header["lengths"] = [f.nbytes for f in frames]
             return header, frames
 
@@ -34,9 +33,13 @@ def cuda_serialize_cudf_object(x):
     # Series/DataFrame/Index/Column/Buffer/etc
     @dask_serialize.register(serializable_classes)
     def dask_serialize_cudf_object(x):
+        import rmm
+
         header, frames = cuda_serialize_cudf_object(x)
         with log_errors():
-            frames = [f.to_host_array().data for f in frames]
+            for i, f in enumerate(frames):
+                if isinstance(f, rmm.DeviceBuffer):
+                    frames[i] = f.copy_to_host().data
             return header, frames
 
     @cuda_deserialize.register(serializable_classes)
@@ -44,10 +47,11 @@ def dask_serialize_cudf_object(x):
     def deserialize_cudf_object(header, frames):
         with log_errors():
             if header["serializer"] == "cuda":
-                for f in frames:
-                    # some frames are empty -- meta/empty partitions/etc
-                    if len(f) > 0:
-                        assert hasattr(f, "__cuda_array_interface__")
+                pass
+                # for f in frames:
+                #     # some frames are empty -- meta/empty partitions/etc
+                #     if len(f) > 0:
+                #         assert hasattr(f, "__cuda_array_interface__")
             elif header["serializer"] == "dask":
                 frames = [memoryview(f) for f in frames]
 
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 708edaf718d..d7d0e5298a9 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6,7 +6,6 @@
 import itertools
 import logging
 import numbers
-import pickle
 import uuid
 import warnings
 from collections import OrderedDict
@@ -327,39 +326,6 @@ def _constructor_expanddim(self):
             "_constructor_expanddim not supported for DataFrames!"
         )
 
-    def serialize(self):
-        header = {}
-        frames = []
-        header["type-serialized"] = pickle.dumps(type(self))
-        header["index"], index_frames = self._index.serialize()
-        header["index_frame_count"] = len(index_frames)
-        frames.extend(index_frames)
-
-        # Use the column directly to avoid duplicating the index
-        # need to pickle column names to handle numpy integer columns
-        header["column_names"] = pickle.dumps(tuple(self._data.names))
-        column_header, column_frames = column.serialize_columns(self._columns)
-        header["columns"] = column_header
-        frames.extend(column_frames)
-
-        return header, frames
-
-    @classmethod
-    def deserialize(cls, header, frames):
-        # Reconstruct the index
-        index_frames = frames[: header["index_frame_count"]]
-
-        idx_typ = pickle.loads(header["index"]["type-serialized"])
-        index = idx_typ.deserialize(header["index"], index_frames)
-
-        # Reconstruct the columns
-        column_frames = frames[header["index_frame_count"] :]
-
-        column_names = pickle.loads(header["column_names"])
-        columns = column.deserialize_columns(header["columns"], column_frames)
-
-        return cls(dict(zip(column_names, columns)), index=index)
-
     @property
     def dtypes(self):
         """Return the dtypes in this object."""
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 6be544c42f9..a70c41e6cd2 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1,5 +1,6 @@
 import functools
 import itertools
+import pickle
 import warnings
 from collections import OrderedDict
 
@@ -1769,6 +1770,92 @@ def _is_sorted(self, ascending=None, null_position=None):
             self, ascending=ascending, null_position=null_position
         )
 
+    def serialize(self):
+        data = []
+        column_names = self._data.names
+        categorical_column_names = []
+
+        # index columns
+        if self._index is not None:
+            data.extend(self._index._data.columns)
+
+        # data columns
+        data.extend(self._data.columns)
+
+        # categories columns
+        for name, col in self._data.items():
+            if isinstance(col, cudf.core.column.CategoricalColumn):
+                data.append(col.categories)
+                categorical_column_names.append(name)
+
+        packed_meta, packed_data = libcudf.copying.pack(data)
+
+        # construct header and frames:
+        header = {}
+        header["type-serialized"] = pickle.dumps(type(self))
+        if self._index is not None:
+            header["index_names"] = pickle.dumps(self._index.names)
+        header["column_names"] = pickle.dumps(column_names)
+        header["categorical_column_names"] = pickle.dumps(
+            categorical_column_names
+        )
+        header["multicolumn"] = pickle.dumps(self._data.multiindex)
+        header["level_names"] = pickle.dumps(self._data.level_names)
+
+        frames = [packed_meta.data, packed_data]
+
+        return header, frames
+
+    @classmethod
+    def deserialize(self, header, frames):
+        typ = pickle.loads(header["type-serialized"])
+        column_names = pickle.loads(header["column_names"])
+        categorical_column_names = pickle.loads(
+            header["categorical_column_names"]
+        )
+        multicolumn = pickle.loads(header["multicolumn"])
+        level_names = pickle.loads(header["level_names"])
+
+        if "index_names" in header:
+            index_names = pickle.loads(header["index_names"])
+        else:
+            index_names = []
+
+        import rmm
+
+        if not isinstance(frames[1], rmm.DeviceBuffer):
+            frames[1] = rmm.DeviceBuffer.to_device(frames[1])
+
+        # unpack into columns
+        columns = libcudf.copying.unpack(frames[0], frames[1])
+
+        if "index_names" in header:
+            index_columns = columns[: len(index_names)]
+            index = cudf.core.index.Index._from_table(
+                libcudf.table.Table(dict(zip(index_names, index_columns)))
+            )
+        else:
+            index = None
+
+        data_columns = columns[
+            len(index_names) : len(index_names) + len(column_names)
+        ]
+        data = dict(zip(column_names, data_columns))
+
+        categorical_columns = columns[-len(categorical_column_names) :]
+
+        for i, name in enumerate(categorical_column_names):
+            mask = data[name].mask
+            data[name].set_base_mask(None)
+            data[name] = cudf.core.column.build_categorical_column(
+                categories=categorical_columns[i], codes=data[name], mask=mask
+            )
+
+        tbl = libcudf.table.Table(data, index=index)
+        tbl._multiindex = multicolumn
+        tbl._level_names = level_names
+        return typ._from_table(tbl)
+
 
 def _get_replacement_values(to_replace, replacement, col_name, column):
     from cudf.utils import utils
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 894cf59dfd8..1cd0cfda22f 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1,5 +1,4 @@
 # Copyright (c) 2018, NVIDIA CORPORATION.
-import pickle
 import warnings
 from numbers import Number
 
@@ -190,20 +189,6 @@ def values_host(self):
     def from_arrow(cls, s):
         return cls(s)
 
-    def serialize(self):
-        header = {}
-        frames = []
-        header["index"], index_frames = self._index.serialize()
-        header["name"] = pickle.dumps(self.name)
-        frames.extend(index_frames)
-        header["index_frame_count"] = len(index_frames)
-        header["column"], column_frames = self._column.serialize()
-        header["type-serialized"] = pickle.dumps(type(self))
-        frames.extend(column_frames)
-        header["column_frame_count"] = len(column_frames)
-
-        return header, frames
-
     @property
     def shape(self):
         """Returns a tuple representing the dimensionality of the Series.
@@ -236,22 +221,6 @@ def name(self, value):
         col = self._data.pop(self.name)
         self._data[value] = col
 
-    @classmethod
-    def deserialize(cls, header, frames):
-
-        index_nframes = header["index_frame_count"]
-        idx_typ = pickle.loads(header["index"]["type-serialized"])
-        index = idx_typ.deserialize(header["index"], frames[:index_nframes])
-        name = pickle.loads(header["name"])
-
-        frames = frames[index_nframes:]
-
-        column_nframes = header["column_frame_count"]
-        col_typ = pickle.loads(header["column"]["type-serialized"])
-        column = col_typ.deserialize(header["column"], frames[:column_nframes])
-
-        return Series(column, index=index, name=name)
-
     def _copy_construct_defaults(self):
         return dict(data=self._column, index=self._index, name=self.name)
 
diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py
index 3f0c605ac68..570182d5228 100644
--- a/python/cudf/cudf/tests/test_serialize.py
+++ b/python/cudf/cudf/tests/test_serialize.py
@@ -251,9 +251,9 @@ def test_serialize_seriesgroupby():
     assert_eq(recreated.sum(), gb.sum())
 
 
-def test_serialize_string_check_buffer_sizes():
-    df = cudf.DataFrame({"a": ["a", "b", "cd", None]})
-    expect = df.memory_usage(deep=True).loc["a"]
-    header, frames = df.serialize()
-    got = sum(b.nbytes for b in frames)
-    assert expect == got
+# def test_serialize_string_check_buffer_sizes():
+#     df = cudf.DataFrame({"a": ["a", "b", "cd", None]})
+#     expect = df.memory_usage(deep=True).loc["a"]
+#     header, frames = df.serialize()
+#     got = sum(b.nbytes for b in frames)
+#     assert expect == got

From 124a5091f91ae3f4260bb610f15d0f938b44734e Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 27 Apr 2020 15:14:04 -0400
Subject: [PATCH 22/90] Handle any kind of device object in
 dask_serialize_cudf_object

---
 python/cudf/cudf/comm/serialize.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/comm/serialize.py b/python/cudf/cudf/comm/serialize.py
index 9ed3ff77164..2690a80b57b 100644
--- a/python/cudf/cudf/comm/serialize.py
+++ b/python/cudf/cudf/comm/serialize.py
@@ -33,28 +33,19 @@ def cuda_serialize_cudf_object(x):
     # Series/DataFrame/Index/Column/Buffer/etc
     @dask_serialize.register(serializable_classes)
     def dask_serialize_cudf_object(x):
-        import rmm
-
         header, frames = cuda_serialize_cudf_object(x)
         with log_errors():
             for i, f in enumerate(frames):
-                if isinstance(f, rmm.DeviceBuffer):
-                    frames[i] = f.copy_to_host().data
+                if hasattr(f, "__cuda_array_interface__"):
+                    frames[i] = cudf.core.buffer.Buffer(f).to_host_array().data
             return header, frames
 
     @cuda_deserialize.register(serializable_classes)
     @dask_deserialize.register(serializable_classes)
     def deserialize_cudf_object(header, frames):
         with log_errors():
-            if header["serializer"] == "cuda":
-                pass
-                # for f in frames:
-                #     # some frames are empty -- meta/empty partitions/etc
-                #     if len(f) > 0:
-                #         assert hasattr(f, "__cuda_array_interface__")
-            elif header["serializer"] == "dask":
+            if header["serializer"] == "dask":
                 frames = [memoryview(f) for f in frames]
-
             cudf_typ = pickle.loads(header["type-serialized"])
             cudf_obj = cudf_typ.deserialize(header, frames)
             return cudf_obj

From fa8c20785294ce34888ffba3aa33d1196777de18 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 27 Apr 2020 15:26:30 -0400
Subject: [PATCH 23/90] Add level_names setter

---
 python/cudf/cudf/core/column_accessor.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index b7405610479..e2dbeff2f96 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -74,6 +74,10 @@ def level_names(self):
         else:
             return self._level_names
 
+    @level_names.setter
+    def level_names(self, value):
+        self._level_names = value
+
     @property
     def nlevels(self):
         if len(self._data) == 0:

From 7963f5fc5cb8c4f9804cab79a6f8e0c8d68ef6ff Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 27 Apr 2020 15:28:43 -0400
Subject: [PATCH 24/90] Better handling of metadata in serialize/deserialize

---
 python/cudf/cudf/core/frame.py | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index a70c41e6cd2..782d0451c7e 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1799,8 +1799,10 @@ def serialize(self):
         header["categorical_column_names"] = pickle.dumps(
             categorical_column_names
         )
-        header["multicolumn"] = pickle.dumps(self._data.multiindex)
-        header["level_names"] = pickle.dumps(self._data.level_names)
+        header["has_multicolumn"] = pickle.dumps(self._data.multiindex)
+        header["multicolumn_level_names"] = pickle.dumps(
+            self._data.level_names
+        )
 
         frames = [packed_meta.data, packed_data]
 
@@ -1808,20 +1810,27 @@ def serialize(self):
 
     @classmethod
     def deserialize(self, header, frames):
+        import rmm
+
         typ = pickle.loads(header["type-serialized"])
         column_names = pickle.loads(header["column_names"])
         categorical_column_names = pickle.loads(
             header["categorical_column_names"]
         )
-        multicolumn = pickle.loads(header["multicolumn"])
-        level_names = pickle.loads(header["level_names"])
+        has_multicolumn = pickle.loads(header["has_multicolumn"])
+        multicolumn_level_names = pickle.loads(
+            header["multicolumn_level_names"]
+        )
 
         if "index_names" in header:
             index_names = pickle.loads(header["index_names"])
+            num_index_columns = len(index_names)
         else:
             index_names = []
+            num_index_columns = 0
 
-        import rmm
+        num_data_columns = len(column_names)
+        num_categorical_columns = len(categorical_column_names)
 
         if not isinstance(frames[1], rmm.DeviceBuffer):
             frames[1] = rmm.DeviceBuffer.to_device(frames[1])
@@ -1829,7 +1838,8 @@ def deserialize(self, header, frames):
         # unpack into columns
         columns = libcudf.copying.unpack(frames[0], frames[1])
 
-        if "index_names" in header:
+        # construct Index
+        if num_index_columns:
             index_columns = columns[: len(index_names)]
             index = cudf.core.index.Index._from_table(
                 libcudf.table.Table(dict(zip(index_names, index_columns)))
@@ -1837,13 +1847,14 @@ def deserialize(self, header, frames):
         else:
             index = None
 
+        # construct data dictionary
         data_columns = columns[
-            len(index_names) : len(index_names) + len(column_names)
+            num_index_columns : num_index_columns + num_data_columns
         ]
         data = dict(zip(column_names, data_columns))
 
-        categorical_columns = columns[-len(categorical_column_names) :]
-
+        # add category information back to categorical columns:
+        categorical_columns = columns[-num_categorical_columns:]
         for i, name in enumerate(categorical_column_names):
             mask = data[name].mask
             data[name].set_base_mask(None)
@@ -1852,8 +1863,8 @@ def deserialize(self, header, frames):
             )
 
         tbl = libcudf.table.Table(data, index=index)
-        tbl._multiindex = multicolumn
-        tbl._level_names = level_names
+        tbl._data.multiindex = has_multicolumn
+        tbl._data.level_names = multicolumn_level_names
         return typ._from_table(tbl)
 
 

From e97d9499f7a562156cb474b482065bb1d4f44b44 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 28 Apr 2020 16:18:02 -0400
Subject: [PATCH 25/90] Changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cfc33a3c326..9092c1eedf6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -121,6 +121,7 @@
 - PR #4993 Remove Java memory prediction code
 - PR #4985 Add null_count to Python Column ctors and use already computed null_count when possible
 - PR #5002 Fix Column.__reduce__ to accept `null_count`
+- PR #5025 Serialize/deserialize with libcudf pack/unpack
 
 ## Bug Fixes
 

From b36998ef6ac00eb34bc23061d043cd1ba817cf0a Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 18 May 2020 18:44:04 +0530
Subject: [PATCH 26/90] update doc uniformly for device memory resource param
 mr

---
 cpp/include/cudf/binaryop.hpp                 |  8 ++---
 cpp/include/cudf/column/column.hpp            |  4 +--
 cpp/include/cudf/column/column_factories.hpp  | 17 ++++-------
 cpp/include/cudf/concatenate.hpp              |  7 ++---
 cpp/include/cudf/copying.hpp                  |  8 ++---
 cpp/include/cudf/detail/binaryop.hpp          |  8 ++---
 cpp/include/cudf/detail/copy_if_else.cuh      |  2 +-
 cpp/include/cudf/detail/copy_range.cuh        |  2 +-
 cpp/include/cudf/detail/dlpack.hpp            |  4 +--
 cpp/include/cudf/detail/fill.hpp              |  2 +-
 cpp/include/cudf/detail/gather.cuh            |  8 ++---
 cpp/include/cudf/detail/hashing.hpp           |  4 +--
 .../cudf/detail/reduction_functions.hpp       | 20 ++++++-------
 cpp/include/cudf/detail/repeat.hpp            |  4 +--
 cpp/include/cudf/detail/scatter.hpp           |  4 +--
 cpp/include/cudf/detail/search.hpp            |  8 ++---
 cpp/include/cudf/detail/transform.hpp         |  2 +-
 cpp/include/cudf/detail/unary.hpp             |  6 ++--
 cpp/include/cudf/dictionary/detail/encode.hpp |  4 +--
 .../cudf/dictionary/detail/update_keys.hpp    |  8 ++---
 .../cudf/dictionary/dictionary_factories.hpp  |  2 +-
 cpp/include/cudf/dictionary/encode.hpp        |  4 +--
 cpp/include/cudf/dictionary/update_keys.hpp   |  8 ++---
 cpp/include/cudf/dlpack.hpp                   |  4 +--
 cpp/include/cudf/filling.hpp                  | 10 +++----
 cpp/include/cudf/groupby.hpp                  |  4 +--
 cpp/include/cudf/hashing.hpp                  |  2 +-
 cpp/include/cudf/io/functions.hpp             | 19 +++++++-----
 cpp/include/cudf/io/readers.hpp               | 30 +++++++++----------
 cpp/include/cudf/io/writers.hpp               |  6 ++--
 cpp/include/cudf/join.hpp                     |  6 ++--
 cpp/include/cudf/null_mask.hpp                | 10 +++----
 cpp/include/cudf/partitioning.hpp             |  5 ++--
 cpp/include/cudf/replace.hpp                  |  6 ++--
 cpp/include/cudf/scalar/scalar.hpp            | 16 +++++-----
 cpp/include/cudf/scalar/scalar_factories.hpp  | 15 ++++------
 cpp/include/cudf/search.hpp                   |  6 ++--
 cpp/include/cudf/strings/attributes.hpp       |  6 ++--
 cpp/include/cudf/strings/case.hpp             |  6 ++--
 .../cudf/strings/char_types/char_types.hpp    |  6 ++--
 cpp/include/cudf/strings/combine.hpp          |  4 +--
 cpp/include/cudf/strings/contains.hpp         |  6 ++--
 .../cudf/strings/convert/convert_booleans.hpp |  4 +--
 .../cudf/strings/convert/convert_datetime.hpp |  4 +--
 .../cudf/strings/convert/convert_floats.hpp   |  4 +--
 .../cudf/strings/convert/convert_integers.hpp |  6 ++--
 .../cudf/strings/convert/convert_ipv4.hpp     |  4 +--
 .../cudf/strings/convert/convert_urls.hpp     |  4 +--
 cpp/include/cudf/strings/copying.hpp          |  2 +-
 .../cudf/strings/detail/concatenate.hpp       |  2 +-
 .../cudf/strings/detail/copy_if_else.cuh      |  2 +-
 .../cudf/strings/detail/copy_range.cuh        |  2 +-
 cpp/include/cudf/strings/detail/fill.hpp      |  2 +-
 cpp/include/cudf/strings/detail/gather.cuh    |  4 +--
 cpp/include/cudf/strings/detail/merge.cuh     |  2 +-
 .../cudf/strings/detail/modify_strings.cuh    |  2 +-
 cpp/include/cudf/strings/detail/scatter.cuh   |  2 +-
 cpp/include/cudf/strings/detail/utilities.cuh |  2 +-
 cpp/include/cudf/strings/detail/utilities.hpp |  8 ++---
 cpp/include/cudf/strings/extract.hpp          |  2 +-
 cpp/include/cudf/strings/find.hpp             | 10 +++----
 cpp/include/cudf/strings/find_multiple.hpp    |  2 +-
 cpp/include/cudf/strings/findall.hpp          |  2 +-
 cpp/include/cudf/strings/padding.hpp          |  4 +--
 cpp/include/cudf/strings/replace.hpp          |  8 ++---
 cpp/include/cudf/strings/replace_re.hpp       |  6 ++--
 cpp/include/cudf/strings/sorting.hpp          |  2 +-
 cpp/include/cudf/strings/split/partition.hpp  |  4 +--
 cpp/include/cudf/strings/split/split.hpp      |  8 ++---
 .../cudf/strings/strings_column_view.hpp      |  2 +-
 cpp/include/cudf/strings/strip.hpp            |  2 +-
 cpp/include/cudf/strings/substring.hpp        |  4 +--
 cpp/include/cudf/strings/translate.hpp        |  2 +-
 cpp/include/cudf/table/table.hpp              |  3 +-
 cpp/include/cudf/transform.hpp                |  6 ++--
 cpp/include/cudf/unary.hpp                    | 12 ++++----
 cpp/include/nvtext/detail/tokenize.hpp        |  8 ++---
 cpp/include/nvtext/generate_ngrams.hpp        |  2 +-
 cpp/include/nvtext/ngrams_tokenize.hpp        |  2 +-
 cpp/include/nvtext/normalize.hpp              |  2 +-
 cpp/include/nvtext/tokenize.hpp               |  8 ++---
 cpp/src/binaryop/compiled/binary_ops.hpp      |  6 ++--
 cpp/src/dictionary/remove_keys.cu             |  2 +-
 cpp/src/groupby/sort/group_reductions.hpp     | 22 +++++++-------
 cpp/src/io/avro/reader_impl.hpp               |  2 +-
 cpp/src/io/csv/reader_impl.hpp                |  2 +-
 cpp/src/io/csv/writer_impl.hpp                |  2 +-
 cpp/src/io/orc/reader_impl.hpp                |  2 +-
 cpp/src/io/orc/writer_impl.hpp                |  2 +-
 cpp/src/io/parquet/reader_impl.hpp            |  2 +-
 cpp/src/io/parquet/writer_impl.hpp            |  2 +-
 cpp/src/io/utilities/column_buffer.hpp        |  4 +--
 cpp/src/join/join.cu                          |  3 +-
 cpp/src/reductions/scan.cu                    |  2 +-
 cpp/src/strings/attributes.cu                 |  2 +-
 cpp/src/strings/case.cu                       |  2 +-
 cpp/src/strings/find.cu                       |  4 +--
 cpp/src/strings/utilities.cuh                 |  2 +-
 98 files changed, 260 insertions(+), 271 deletions(-)

diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp
index a7e94f9a962..cfd7f9f46f8 100644
--- a/cpp/include/cudf/binaryop.hpp
+++ b/cpp/include/cudf/binaryop.hpp
@@ -89,7 +89,7 @@ enum class binary_operator : int32_t {
  * @param lhs         The left operand scalar
  * @param rhs         The right operand column
  * @param output_type The desired data type of the output column
- * @param mr          Memory resource for allocating output column
+ * @param mr          Device memory resource used to allocate the returned column
  * @return std::unique_ptr<column> Output column
  * @throw cudf::logic_error if @p lhs and @p rhs dtypes aren't numeric
  * @throw cudf::logic_error if @p output_type dtype isn't numeric
@@ -114,7 +114,7 @@ std::unique_ptr<column> binary_operation(
  * @param lhs         The left operand column
  * @param rhs         The right operand scalar
  * @param output_type The desired data type of the output column
- * @param mr          Memory resource for allocating output column
+ * @param mr          Device memory resource used to allocate the returned column
  * @return std::unique_ptr<column> Output column
  * @throw cudf::logic_error if @p lhs and @p rhs dtypes aren't numeric
  * @throw cudf::logic_error if @p output_type dtype isn't numeric
@@ -138,7 +138,7 @@ std::unique_ptr<column> binary_operation(
  * @param lhs         The left operand column
  * @param rhs         The right operand column
  * @param output_type The desired data type of the output column
- * @param mr          Memory resource for allocating output column
+ * @param mr          Device memory resource used to allocate the returned column
  * @return std::unique_ptr<column> Output column
  * @throw cudf::logic_error if @p lhs and @p rhs are different sizes
  * @throw cudf::logic_error if @p lhs and @p rhs dtypes aren't fixed-width
@@ -167,7 +167,7 @@ std::unique_ptr<column> binary_operation(
  * @param output_type The desired data type of the output column. It is assumed
  *                    that output_type is compatible with the output data type
  *                    of the function in the PTX code
- * @param mr          Memory resource for allocating output column
+ * @param mr          Device memory resource used to allocate the returned column
  * @return std::unique_ptr<column> Output column
  * @throw cudf::logic_error if @p lhs and @p rhs are different sizes
  * @throw cudf::logic_error if @p lhs and @p rhs dtypes aren't numeric
diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp
index b5300fe980f..280207d9550 100644
--- a/cpp/include/cudf/column/column.hpp
+++ b/cpp/include/cudf/column/column.hpp
@@ -59,7 +59,7 @@ class column {
    *
    * @param other The `column` to copy
    * @param stream The stream on which to execute all allocations and copies
-   * @param mr The resource to use for all allocations
+   * @param mr Device memory resource to use for all allocations
    */
   column(column const& other,
          cudaStream_t stream,
@@ -115,7 +115,7 @@ class column {
    * @param view The view to copy
    * @param stream The stream on which all allocations and copies will be
    * executed
-   * @param mr The resource to use for all allocations
+   * @param mr Device memory resource to use for all allocations
    */
   explicit column(column_view view,
                   cudaStream_t stream                 = 0,
diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
index da0ed8f658d..de9a46702d9 100644
--- a/cpp/include/cudf/column/column_factories.hpp
+++ b/cpp/include/cudf/column/column_factories.hpp
@@ -241,8 +241,7 @@ std::unique_ptr<column> make_fixed_width_column(
  * (indicating a null string). The size must be the number of bytes.
  * @param stream Optional stream for use with all memory allocation
  *               and device kernels
- * @param mr Optional resource to use for device memory
- *           allocation of the column's `null_mask` and children.
+ * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
  */
 std::unique_ptr<column> make_strings_column(
   const rmm::device_vector<thrust::pair<const char*, size_type>>& strings,
@@ -273,8 +272,7 @@ std::unique_ptr<column> make_strings_column(
  * string_views.
  * @param stream Optional stream for use with all memory allocation
  *               and device kernels
- * @param mr Optional resource to use for device memory
- *           allocation of the column's `null_mask` and children.
+ * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
  */
 std::unique_ptr<column> make_strings_column(
   const rmm::device_vector<string_view>& string_views,
@@ -312,8 +310,7 @@ std::unique_ptr<column> make_strings_column(
  * first invocation of `column::null_count()`
  * @param stream Optional stream for use with all memory allocation
  *               and device kernels
- * @param mr Optional resource to use for device memory
- *           allocation of the column's `null_mask` and children.
+ * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
  */
 std::unique_ptr<column> make_strings_column(
   const rmm::device_vector<char>& strings,
@@ -353,8 +350,7 @@ std::unique_ptr<column> make_strings_column(
  * first invocation of `column::null_count()`
  * @param stream Optional stream for use with all memory allocation
  *               and device kernels
- * @param mr Optional resource to use for device memory
- *           allocation of the column's `null_mask` and children.
+ * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
  */
 std::unique_ptr<column> make_strings_column(
   const std::vector<char>& strings,
@@ -382,8 +378,7 @@ std::unique_ptr<column> make_strings_column(
  *                  Arrow format for nulls is used for interpeting this bitmask.
  * @param stream Optional stream for use with all memory allocation
  *               and device kernels
- * @param mr Optional resource to use for device memory
- *           allocation of the column's `null_mask` and children.
+ * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
  */
 std::unique_ptr<column> make_strings_column(
   size_type num_strings,
@@ -406,7 +401,7 @@ std::unique_ptr<column> make_strings_column(
  * @param size The number of rows for the output column.
  * @param stream Optional stream for use with all memory allocation
  *               and device kernels
- * @param mr Optional resource to use for device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  */
 std::unique_ptr<column> make_column_from_scalar(
   scalar const& s,
diff --git a/cpp/include/cudf/concatenate.hpp b/cpp/include/cudf/concatenate.hpp
index 7ba1d0b2fe9..68beac55dea 100644
--- a/cpp/include/cudf/concatenate.hpp
+++ b/cpp/include/cudf/concatenate.hpp
@@ -35,8 +35,7 @@ namespace cudf {
  * Returns empty `device_buffer` if the column is not nullable
  *
  * @param views Vector of column views whose bitmask will to be concatenated
- * @param mr Optional, the memory resource that will be used for allocating
- * the device memory for the new device_buffer
+ * @param mr Device memory resource used for allocating the new device_buffer
  * @return rmm::device_buffer A `device_buffer` containing the bitmasks of all
  * the column views in the views vector
  */
@@ -52,7 +51,7 @@ rmm::device_buffer concatenate_masks(
  *
  * @param columns_to_concat The column views to be concatenated into a single
  * column
- * @param mr Optional The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned column.
  * @return Unique pointer to a single table having all the rows from the
  * elements of `columns_to_concat` respectively in the same order.
  */
@@ -83,7 +82,7 @@ std::unique_ptr<column> concatenate(
  *
  * @param tables_to_concat The table views to be concatenated into a single
  * table
- * @param mr Optional The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned table.
  * @return Unique pointer to a single table having all the rows from the
  * elements of `tables_to_concat` respectively in the same order.
  **/
diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index a77216f81b2..9f4073e31dc 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -100,7 +100,7 @@ std::unique_ptr<table> gather(
  * are to be scattered
  * @param check_bounds Optionally perform bounds checking on the values of
  * `scatter_map` and throw an error if any of its values are out of bounds.
- * @param mr The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned table.
  * @return Result of scattering values from source to target
  */
 std::unique_ptr<table> scatter(
@@ -140,7 +140,7 @@ std::unique_ptr<table> scatter(
  * are to be scattered
  * @param check_bounds Optionally perform bounds checking on the values of
  * `scatter_map` and throw an error if any of its values are out of bounds.
- * @param mr The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned table.
  * @return Result of scattering values from source to target
  */
 std::unique_ptr<table> scatter(
@@ -273,7 +273,7 @@ void copy_range_in_place(column_view const& source,
  * @param source_end The index of the last element in the source range
  * (exclusive)
  * @param target_begin The starting index of the target range (inclusive)
- * @param mr Memory resource to allocate the result target column.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return std::unique_ptr<column> The result target column
  */
 std::unique_ptr<column> copy_range(
@@ -709,7 +709,7 @@ std::unique_ptr<table> boolean_mask_scatter(
  *
  * @param input Column view to get the element from
  * @param index Index into `input` to get the element at
- * @param mr Optional, The resource to use for all returned allocations
+ * @param mr Device memory resource used to allocate the returned scalar.
  * @return std::unique_ptr<scalar> Scalar containing the single value
  */
 std::unique_ptr<scalar> get_element(
diff --git a/cpp/include/cudf/detail/binaryop.hpp b/cpp/include/cudf/detail/binaryop.hpp
index 4ac1c3614ea..356f9da535d 100644
--- a/cpp/include/cudf/detail/binaryop.hpp
+++ b/cpp/include/cudf/detail/binaryop.hpp
@@ -34,7 +34,7 @@ namespace detail {
  * @param lhs         The left operand scalar
  * @param rhs         The right operand column
  * @param output_type The desired data type of the output column
- * @param mr          Memory resource for allocating output column
+ * @param mr          Device memory resource used to allocate the returned column
  * @param stream      CUDA stream on which to execute kernels
  * @return std::unique_ptr<column> Output column
  */
@@ -59,7 +59,7 @@ std::unique_ptr<column> binary_operation(
  * @param lhs         The left operand column
  * @param rhs         The right operand scalar
  * @param output_type The desired data type of the output column
- * @param mr          Memory resource for allocating output column
+ * @param mr          Device memory resource used to allocate the returned column
  * @param stream      CUDA stream on which to execute kernels
  * @return std::unique_ptr<column> Output column
  */
@@ -84,7 +84,7 @@ std::unique_ptr<column> binary_operation(
  * @param lhs         The left operand column
  * @param rhs         The right operand column
  * @param output_type The desired data type of the output column
- * @param mr          Memory resource for allocating output column
+ * @param mr          Device memory resource used to allocate the returned column
  * @param stream      CUDA stream on which to execute kernels
  * @return std::unique_ptr<column> Output column
  */
@@ -113,7 +113,7 @@ std::unique_ptr<column> binary_operation(
  * @param output_type The desired data type of the output column. It is assumed
  *                    that output_type is compatible with the output data type
  *                    of the function in the PTX code
- * @param mr          Memory resource for allocating output column
+ * @param mr          Device memory resource used to allocate the returned column
  * @param stream      CUDA stream on which to execute kernels
  * @return std::unique_ptr<column> Output column
  */
diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh
index 3a43e3bcb49..bcf8d2d3d10 100644
--- a/cpp/include/cudf/detail/copy_if_else.cuh
+++ b/cpp/include/cudf/detail/copy_if_else.cuh
@@ -153,7 +153,7 @@ __launch_bounds__(block_size) __global__
  * @param rhs         Begin iterator of rhs range
  * @param filter      Function of type `FilterFn` which determines for index `i` where to get the
  *                    corresponding output value from
- * @param mr          Memory resource to use for allocating the output
+ * @param mr          Device memory resource used to allocate the returned column
  * @param stream      CUDA stream to perform the computation in
  * @return            A new column that contains the values from either `lhs` or `rhs` as determined
  *                    by `filter[i]`
diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh
index 3bf5e80320a..4a0b043bcd7 100644
--- a/cpp/include/cudf/detail/copy_range.cuh
+++ b/cpp/include/cudf/detail/copy_range.cuh
@@ -255,7 +255,7 @@ void copy_range_in_place(column_view const& source,
  * @param source_end The index of the last element in the source range
  * (exclusive)
  * @param target_begin The starting index of the target range (inclusive)
- * @param mr Memory resource to allocate the result target column.
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream CUDA stream to run this function
  * @return std::unique_ptr<column> The result target column
  */
diff --git a/cpp/include/cudf/detail/dlpack.hpp b/cpp/include/cudf/detail/dlpack.hpp
index dfafbb1b802..474ec29d177 100644
--- a/cpp/include/cudf/detail/dlpack.hpp
+++ b/cpp/include/cudf/detail/dlpack.hpp
@@ -32,7 +32,7 @@ namespace detail {
  * @throw cudf::logic_error if the any of the DLTensor fields are unsupported
  *
  * @param managed_tensor a 1D or 2D column-major (Fortran order) tensor
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate the returned table
  * @param stream Optional stream on which to execute
  *
  * @return Table with a copy of the tensor data
@@ -56,7 +56,7 @@ std::unique_ptr<experimental::table> from_dlpack(
  * or if any of columns have non-zero null count
  *
  * @param input Table to convert to DLPack
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate the returned DLManagedTensor
  * @param stream Optional stream on which to execute
  *
  * @return 1D or 2D DLPack tensor with a copy of the table data, or nullptr
diff --git a/cpp/include/cudf/detail/fill.hpp b/cpp/include/cudf/detail/fill.hpp
index 3adf3d0e5d8..0b17c314432 100644
--- a/cpp/include/cudf/detail/fill.hpp
+++ b/cpp/include/cudf/detail/fill.hpp
@@ -80,7 +80,7 @@ void fill_in_place(mutable_column_view& destination,
  * @param begin The starting index of the fill range (inclusive)
  * @param end The index of the last element in the fill range (exclusive)
  * @param value The scalar value to fill
- * @param mr Memory resource to allocate the result output column
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream CUDA stream to run this function
  * @return std::unique_ptr<column> The result output column
  */
diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
index 1439eff5cae..9316ee939f5 100644
--- a/cpp/include/cudf/detail/gather.cuh
+++ b/cpp/include/cudf/detail/gather.cuh
@@ -112,7 +112,7 @@ struct column_gatherer_impl {
    *map
    * @param gather_map_end End of iterator range of integral values representing the gather map
    * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
-   * @param mr Memory resource to use for all allocations
+   * @param mr Device memory resource used to allocate the returned column
    * @param stream CUDA stream on which to execute kernels
    */
   std::unique_ptr<column> operator()(column_view const& source_column,
@@ -171,7 +171,7 @@ struct column_gatherer_impl<string_view, MapItType> {
    *map
    * @param gather_map_end End of iterator range of integral values representing the gather map
    * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
-   * @param mr Memory resource to use for all allocations
+   * @param mr Device memory resource used to allocate the returned column
    * @param stream CUDA stream on which to execute kernels
    */
   std::unique_ptr<column> operator()(column_view const& source_column,
@@ -205,7 +205,7 @@ struct column_gatherer_impl<dictionary32, MapItType> {
    * map
    * @param gather_map_end End of iterator range of integral values representing the gather map
    * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
-   * @param mr Memory resource to use for all allocations
+   * @param mr Device memory resource used to allocate the returned column
    * @param stream CUDA stream on which to execute kernels
    * @return New dictionary column with gathered rows.
    */
@@ -272,7 +272,7 @@ struct column_gatherer {
    * map
    * @param gather_map_end End of iterator range of integral values representing the gather map
    * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
-   * @param mr Memory resource to use for all allocations
+   * @param mr Device memory resource used to allocate the returned column
    * @param stream CUDA stream on which to execute kernels
    */
   template <typename Element, typename MapIterator>
diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp
index 83bc9b9734f..ab3aa680051 100644
--- a/cpp/include/cudf/detail/hashing.hpp
+++ b/cpp/include/cudf/detail/hashing.hpp
@@ -32,7 +32,7 @@ namespace detail {
  * @param input The table to partition
  * @param columns_to_hash Indices of input columns to hash
  * @param num_partitions The number of partitions to use
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate the returned table
  * @param stream Optional stream to use for allocations and copies
  *
  * @returns An output table and a vector of row offsets to each partition
@@ -50,7 +50,7 @@ std::pair<std::unique_ptr<experimental::table>, std::vector<size_type>> hash_par
  * @param input The table of columns to hash
  * @param initial_hash Optional vector of initial hash values for each column.
  * If this vector is empty then each element will be hashed as-is.
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Optional stream to use for allocations and copies
  *
  * @returns A column where each row is the hash of a column from the input
diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp
index 92f521712d9..fd4815343f9 100644
--- a/cpp/include/cudf/detail/reduction_functions.hpp
+++ b/cpp/include/cudf/detail/reduction_functions.hpp
@@ -32,7 +32,7 @@ namespace reduction {
  *
  * @param col input column to compute sum
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr The resource used to allocate the device memory for the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar
  * @param stream Stream to use for any kernel launches.
  * @return Sum as scalar of type `output_dtype`.
  */
@@ -49,7 +49,7 @@ std::unique_ptr<scalar> sum(column_view const& col,
  *
  * @param col input column to compute minimum.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr The resource used to allocate the device memory for the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar
  * @param stream Stream to use for any kernel launches.
  * @return Minimum element as scalar of type `output_dtype`.
  */
@@ -66,7 +66,7 @@ std::unique_ptr<scalar> min(column_view const& col,
  *
  * @param col input column to compute maximum.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr The resource used to allocate the device memory for the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar
  * @param stream Stream to use for any kernel launches.
  * @return Maximum element as scalar of type `output_dtype`.
  */
@@ -84,7 +84,7 @@ std::unique_ptr<scalar> max(column_view const& col,
  *
  * @param col input column to compute any_of.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr The resource used to allocate the device memory for the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar
  * @param stream Stream to use for any kernel launches.
  * @return bool scalar if any of elements is true when typecasted to bool
  */
@@ -102,7 +102,7 @@ std::unique_ptr<scalar> any(column_view const& col,
  *
  * @param col input column to compute all_of.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr The resource used to allocate the device memory for the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar
  * @param stream Stream to use for any kernel launches.
  * @return bool scalar if all of elements is true when typecasted to bool
  */
@@ -120,7 +120,7 @@ std::unique_ptr<scalar> all(column_view const& col,
  *
  * @param col input column to compute product.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr The resource used to allocate the device memory for the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar
  * @param stream Stream to use for any kernel launches.
  * @return Product as scalar of type `output_dtype`.
  */
@@ -140,7 +140,7 @@ std::unique_ptr<scalar> product(
  *
  * @param col input column to compute sum of squares.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr The resource used to allocate the device memory for the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar
  * @param stream Stream to use for any kernel launches.
  * @return Sum of squares as scalar of type `output_dtype`.
  */
@@ -160,7 +160,7 @@ std::unique_ptr<scalar> sum_of_squares(
  *
  * @param col input column to compute mean.
  * @param output_dtype data type of return type and typecast elements of input column.
- * @param mr The resource used to allocate the device memory for the returned scalar.
+ * @param mr Device memory resource used to allocate the returned scalar.
  * @param stream Stream to use for any kernel launches.
  * @return Mean as scalar of type `output_dtype`.
  */
@@ -179,7 +179,7 @@ std::unique_ptr<scalar> mean(column_view const& col,
  *
  * @param col input column to compute variance.
  * @param output_dtype data type of return type and typecast elements of input column.
- * @param mr The resource used to allocate the device memory for the returned scalar.
+ * @param mr Device memory resource used to allocate the returned scalar.
  * @param stream Stream to use for any kernel launches.
  * @return Variance as scalar of type `output_dtype`.
  */
@@ -200,7 +200,7 @@ std::unique_ptr<scalar> variance(
  *
  * @param col input column to compute standard deviation.
  * @param output_dtype data type of return type and typecast elements of input column.
- * @param mr The resource used to allocate the device memory for the returned scalar.
+ * @param mr Device memory resource used to allocate the returned scalar.
  * @param stream Stream to use for any kernel launches.
  * @return Standard deviation as scalar of type `output_dtype`.
  */
diff --git a/cpp/include/cudf/detail/repeat.hpp b/cpp/include/cudf/detail/repeat.hpp
index 895f4cb34b8..7df571097ac 100644
--- a/cpp/include/cudf/detail/repeat.hpp
+++ b/cpp/include/cudf/detail/repeat.hpp
@@ -51,7 +51,7 @@ namespace detail {
  * @param input_table Input table
  * @param count Non-nullable column of a integral type
  * @param check_count Whether to check count (negative values and overflow)
- * @param mr Memory resource to allocate the result output table
+ * @param mr Device memory resource used to allocate the returned table
  * @param stream CUDA stream to run this function
  * @return std::unique_ptr<table> The result table containing the repetitions
  */
@@ -78,7 +78,7 @@ std::unique_ptr<table> repeat(table_view const& input_table,
  *
  * @param input_table Input table
  * @param count Non-null scalar of a integral type
- * @param mr Memory resource to allocate the result output table
+ * @param mr Device memory resource used to allocate the returned table
  * @param stream CUDA stream to run this function
  * @return std::unique_ptr<table> The result table containing the repetitions
  */
diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp
index 09d71f58eb3..aa99c43ed80 100644
--- a/cpp/include/cudf/detail/scatter.hpp
+++ b/cpp/include/cudf/detail/scatter.hpp
@@ -56,7 +56,7 @@ namespace detail {
  * are to be scattered
  * @param check_bounds Optionally perform bounds checking on the values of
  * `scatter_map` and throw an error if any of its values are out of bounds.
- * @param mr The resource to use for all allocations
+ * @param mrDevice memory resource used to allocate the returned table
  * @param stream The stream to use for CUDA operations
  * @return Result of scattering values from source to target
  **/
@@ -96,7 +96,7 @@ std::unique_ptr<table> scatter(
  * are to be scattered
  * @param check_bounds Optionally perform bounds checking on the values of
  * `scatter_map` and throw an error if any of its values are out of bounds.
- * @param mr The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned table
  * @param stream The stream to use for CUDA operations
  * @return Result of scattering values from source to target
  **/
diff --git a/cpp/include/cudf/detail/search.hpp b/cpp/include/cudf/detail/search.hpp
index 8fcb9648584..b9a36cc796f 100644
--- a/cpp/include/cudf/detail/search.hpp
+++ b/cpp/include/cudf/detail/search.hpp
@@ -55,7 +55,7 @@ namespace detail {
  * @param values          Find insert locations for these values
  * @param column_order    Vector of column sort order
  * @param null_precedence Vector of null_precedence enums values
- * @param mr              Device memory resource to use for device memory allocation
+ * @param mr              Device memory resource used to allocate the returned column
  * @param stream          Stream to use for any kernel launches.
  * @return std::unique_ptr<column> A non-nullable column of cudf::size_type elements
  * containing the insertion points.
@@ -99,7 +99,7 @@ std::unique_ptr<column> lower_bound(
  * @param values          Find insert locations for these values
  * @param column_order    Vector of column sort order
  * @param null_precedence Vector of null_precedence enums values
- * @param mr              Device memory resource to use for device memory allocation
+ * @param mr              Device memory resource used to allocate the returned column
  * @param stream          Stream to use for any kernel launches.
  * @return std::unique_ptr<column> A non-nullable column of cudf::size_type elements
  * containing the insertion points.
@@ -129,7 +129,7 @@ std::unique_ptr<column> upper_bound(
  *
  * @param col      A column object
  * @param value    A scalar value to search for in `col`
- * @param mr       Device memory resource to use for device memory allocation
+ * @param mr       Device memory resource used to allocate the returned column
  * @param stream   Stream to use for any kernel launches.
  * @return bool    If `value` is found in `column` true, else false.
  */
@@ -157,7 +157,7 @@ bool contains(column_view const& col,
  *
  * @param haystack  A column object
  * @param needles   A column of values to search for in `col`
- * @param mr        Device memory resource to use for device memory allocation
+ * @param mr        Device memory resource used to allocate the returned column
  * @param stream    Stream to use for any kernel launches.
  * @return std::unique_ptr<column> A column of bool elements containing
  * true if the corresponding entry in haystack is contained in needles and false
diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp
index 413ca8131ba..2be6f7f3cdf 100644
--- a/cpp/include/cudf/detail/transform.hpp
+++ b/cpp/include/cudf/detail/transform.hpp
@@ -35,7 +35,7 @@ namespace detail {
  * @param unary_udf     The PTX/CUDA string of the unary function to apply
  * @param outout_type   The output type that is compatible with the output type in the UDF
  * @param is_ptx        true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code
- * @param mr            The memory resource to use for for all device allocations
+ * @param mr            Device memory resource used to allocate the returned column
  * @param stream        CUDA stream on which to execute kernels
  * @return cudf::column The column resulting from applying the unary function to
  *                      every element of the input
diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp
index 282a1579713..3c010661dc4 100644
--- a/cpp/include/cudf/detail/unary.hpp
+++ b/cpp/include/cudf/detail/unary.hpp
@@ -32,7 +32,7 @@ namespace detail {
  * @param begin Begining of the sequence of elements
  * @param end End of the sequence of elements
  * @param p Predicate to be applied to each element in `[begin,end)`
- * @param mr Optional, The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Optional CUDA stream on which to execute kernels
  *
  * @returns std::unique_ptr<cudf::column> A column of type `BOOL8,` with `true` representing
@@ -62,7 +62,7 @@ std::unique_ptr<column> true_if(
  *
  * @param input A `column_view` as input
  * @param op operation to perform
- * @param mr Optional, The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Optional CUDA stream on which to execute kernels
  *
  * @returns std::unique_ptr<cudf::column> Result of the operation
@@ -79,7 +79,7 @@ std::unique_ptr<cudf::column> unary_operation(
  *
  * @param column_view Input column
  * @param out_type Desired datatype of output column
- * @param mr Optional, The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Optional CUDA stream on which to execute kernels
  *
  * @returns unique_ptr<column> Result of the cast operation
diff --git a/cpp/include/cudf/dictionary/detail/encode.hpp b/cpp/include/cudf/dictionary/detail/encode.hpp
index 3bcf09d84ac..67e205b0229 100644
--- a/cpp/include/cudf/dictionary/detail/encode.hpp
+++ b/cpp/include/cudf/dictionary/detail/encode.hpp
@@ -44,7 +44,7 @@ namespace detail {
  *
  * @param column The column to dictionary encode.
  * @param indices_type The integer type to use for the indices.
- * @param mr Optional resource to use for device memory allocation.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Optional stream on which to issue all memory allocation and
  *               device kernels.
  * @return Returns a dictionary column.
@@ -66,7 +66,7 @@ std::unique_ptr<column> encode(
  * ```
  *
  * @param dictionary_column Existing dictionary column.
- * @param mr Resource for allocating memory for the output.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Optional stream on which to issue all memory allocation and
  *               device kernels.
  * @return New column with type matching the dictionary_column's keys.
diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp
index 190bb6d2c1b..d5c9fdcb9f6 100644
--- a/cpp/include/cudf/dictionary/detail/update_keys.hpp
+++ b/cpp/include/cudf/dictionary/detail/update_keys.hpp
@@ -27,7 +27,7 @@ namespace detail {
  *
  * @param dictionary_column Existing dictionary column.
  * @param new_keys New keys to incorporate into the dictionary_column
- * @param mr Resource for allocating memory for the output.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for any CUDA calls.
  * @return New dictionary column.
  */
@@ -43,7 +43,7 @@ std::unique_ptr<column> add_keys(
  *
  * @param dictionary_column Existing dictionary column.
  * @param keys_to_remove The keys to remove from the dictionary_column
- * @param mr Resource for allocating memory for the output.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for any CUDA calls.
  * @return New dictionary column.
  */
@@ -58,7 +58,7 @@ std::unique_ptr<column> remove_keys(
  * const&,mm::mr::device_memory_resource*)
  *
  * @param dictionary_column Existing dictionary column.
- * @param mr Resource for allocating memory for the output.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for any CUDA calls.
  * @return New dictionary column.
  */
@@ -73,7 +73,7 @@ std::unique_ptr<column> remove_unused_keys(
  *
  * @param dictionary_column Existing dictionary column.
  * @param keys New keys to use for the output column. Must not contain nulls.
- * @param mr Resource for allocating memory for the output.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for any CUDA calls.
  * @return New dictionary column.
  */
diff --git a/cpp/include/cudf/dictionary/dictionary_factories.hpp b/cpp/include/cudf/dictionary/dictionary_factories.hpp
index 01a991f9168..62ec8ebc0a4 100644
--- a/cpp/include/cudf/dictionary/dictionary_factories.hpp
+++ b/cpp/include/cudf/dictionary/dictionary_factories.hpp
@@ -50,7 +50,7 @@ namespace cudf {
  *
  * @param keys_column Column of unique, ordered values to use as the new dictionary column's keys.
  * @param indices_column Indices to use for the new dictionary column.
- * @param mr Resource for allocating memory for the output.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Optional stream on which to issue all memory allocation and
  *               device kernels.
  * @return New dictionary column.
diff --git a/cpp/include/cudf/dictionary/encode.hpp b/cpp/include/cudf/dictionary/encode.hpp
index 9273971e52f..596a90644b4 100644
--- a/cpp/include/cudf/dictionary/encode.hpp
+++ b/cpp/include/cudf/dictionary/encode.hpp
@@ -54,7 +54,7 @@ namespace dictionary {
  *
  * @param column The column to dictionary encode.
  * @param indices_type The integer type to use for the indices.
- * @param mr Optional resource to use for device memory allocation.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return Returns a dictionary column.
  */
 std::unique_ptr<column> encode(
@@ -73,7 +73,7 @@ std::unique_ptr<column> encode(
  * @endcode
  *
  * @param dictionary_column Existing dictionary column.
- * @param mr Resource for allocating memory for the output.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column with type matching the dictionary_column's keys.
  */
 std::unique_ptr<column> decode(
diff --git a/cpp/include/cudf/dictionary/update_keys.hpp b/cpp/include/cudf/dictionary/update_keys.hpp
index 7e9bbb90e94..c57e503e94f 100644
--- a/cpp/include/cudf/dictionary/update_keys.hpp
+++ b/cpp/include/cudf/dictionary/update_keys.hpp
@@ -48,7 +48,7 @@ namespace dictionary {
  *
  * @param dictionary_column Existing dictionary column.
  * @param new_keys New keys to incorporate into the dictionary_column
- * @param mr Resource for allocating memory for the output.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New dictionary column.
  */
 std::unique_ptr<column> add_keys(
@@ -78,7 +78,7 @@ std::unique_ptr<column> add_keys(
  *
  * @param dictionary_column Existing dictionary column.
  * @param keys_to_remove The keys to remove from the dictionary_column
- * @param mr Resource for allocating memory for the output.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New dictionary column.
  */
 std::unique_ptr<column> remove_keys(
@@ -99,7 +99,7 @@ std::unique_ptr<column> remove_keys(
  * @endcode
  *
  * @param dictionary_column Existing dictionary column.
- * @param mr Resource for allocating memory for the output.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New dictionary column.
  */
 std::unique_ptr<column> remove_unused_keys(
@@ -130,7 +130,7 @@ std::unique_ptr<column> remove_unused_keys(
  *
  * @param dictionary_column Existing dictionary column.
  * @param keys New keys to use for the output column. Must not contain nulls.
- * @param mr Resource for allocating memory for the output.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New dictionary column.
  */
 std::unique_ptr<column> set_keys(
diff --git a/cpp/include/cudf/dlpack.hpp b/cpp/include/cudf/dlpack.hpp
index 7809f12de99..c66498e93f2 100644
--- a/cpp/include/cudf/dlpack.hpp
+++ b/cpp/include/cudf/dlpack.hpp
@@ -39,7 +39,7 @@ namespace cudf {
  * @throw cudf::logic_error if the any of the DLTensor fields are unsupported
  *
  * @param managed_tensor a 1D or 2D column-major (Fortran order) tensor
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate the returned table.
  *
  * @return Table with a copy of the tensor data
  */
@@ -61,7 +61,7 @@ std::unique_ptr<experimental::table> from_dlpack(
  * or if any of columns have non-zero null count
  *
  * @param input Table to convert to DLPack
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate the returned DLPack tensor.
  *
  * @return 1D or 2D DLPack tensor with a copy of the table data, or nullptr
  */
diff --git a/cpp/include/cudf/filling.hpp b/cpp/include/cudf/filling.hpp
index f0670d0d210..b09f80de17f 100644
--- a/cpp/include/cudf/filling.hpp
+++ b/cpp/include/cudf/filling.hpp
@@ -77,7 +77,7 @@ void fill_in_place(mutable_column_view& destination,
  * @param begin The starting index of the fill range (inclusive)
  * @param end The index of the last element in the fill range (exclusive)
  * @param value The scalar value to fill
- * @param mr Memory resource to allocate the result output column
+ * @param mr Device memory resource used to allocate the returned column
  * @return std::unique_ptr<column> The result output column
  */
 std::unique_ptr<column> fill(column_view const& input,
@@ -113,7 +113,7 @@ std::unique_ptr<column> fill(column_view const& input,
  * @param input_table Input table
  * @param count Non-nullable column of a integral type
  * @param check_count Whether to check count (negative values and overflow)
- * @param mr Memory resource to allocate the result output table
+ * @param mr Device memory resource used to allocate the returned table
  * @return std::unique_ptr<table> The result table containing the repetitions
  */
 std::unique_ptr<table> repeat(
@@ -139,7 +139,7 @@ std::unique_ptr<table> repeat(
  *
  * @param input_table Input table
  * @param count Non-null scalar of a integral type
- * @param mr Memory resource to allocate the result output table
+ * @param mr Device memory resource used to allocate the returned table.
  * @return std::unique_ptr<table> The result table containing the repetitions
  */
 std::unique_ptr<table> repeat(
@@ -167,7 +167,7 @@ std::unique_ptr<table> repeat(
  * @param size Size of the output column
  * @param init First value in the sequence
  * @param step Increment value
- * @param mr Memory resource to allocate the result output column
+ * @param mr Device memory resource used to allocate the returned column
  * @return std::unique_ptr<column> The result table containing the sequence
  **/
 std::unique_ptr<column> sequence(
@@ -193,7 +193,7 @@ std::unique_ptr<column> sequence(
  *
  * @param size Size of the output column
  * @param init First value in the sequence
- * @param mr Memory resource to allocate the result output column
+ * @param mr Device memory resource used to allocate the returned column
  * @return std::unique_ptr<column> The result table containing the sequence
  **/
 std::unique_ptr<column> sequence(
diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp
index 11104ccc2bc..9d44a1e346a 100644
--- a/cpp/include/cudf/groupby.hpp
+++ b/cpp/include/cudf/groupby.hpp
@@ -156,7 +156,7 @@ class groupby {
    *
    * @param requests The set of columns to aggregate and the aggregations to
    * perform
-   * @param mr Memory resource used to allocate the returned table and columns
+   * @param mr Device memory resource used to allocate the returned table and columns
    * @return Pair containing the table with each group's unique key and
    * a vector of aggregation_results for each request in the same order as
    * specified in `requests`.
@@ -187,7 +187,7 @@ class groupby {
    * and the `values` of the `groups` object will be `nullptr`.
    *
    * @param values Table representing values on which a groupby operation is to be performed
-   * @param mr Memory resource used to allocate the returned tables
+   * @param mr Device memory resource used to allocate the returned tables in the returned groups
    * @return A `groups` object representing grouped keys and values
    */
   groups get_groups(cudf::table_view values             = {},
diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp
index 2fec3913968..b26131c7c90 100644
--- a/cpp/include/cudf/hashing.hpp
+++ b/cpp/include/cudf/hashing.hpp
@@ -30,7 +30,7 @@ namespace cudf {
  * @param input The table of columns to hash
  * @param initial_hash Optional vector of initial hash values for each column.
  * If this vector is empty then each element will be hashed as-is.
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate the returned column.
  *
  * @returns A column where each row is the hash of a column from the input
  */
diff --git a/cpp/include/cudf/io/functions.hpp b/cpp/include/cudf/io/functions.hpp
index fcf506ef73d..997bea1427c 100644
--- a/cpp/include/cudf/io/functions.hpp
+++ b/cpp/include/cudf/io/functions.hpp
@@ -75,7 +75,8 @@ struct read_avro_args {
  * @endcode
  *
  * @param args Settings for controlling reading behavior
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate device memory of the table in the returned
+ * table_with_metadata
  *
  * @return The set of columns along with metadata
  */
@@ -149,7 +150,8 @@ struct read_json_args {
  * @endcode
  *
  * @param args Settings for controlling reading behavior
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate device memory of the table in the returned
+ * table_with_metadata
  *
  * @return The set of columns along with metadata
  */
@@ -265,8 +267,9 @@ struct read_csv_args {
  * @endcode
  *
  * @param args Settings for controlling reading behavior
- * @param mr Optional resource to use for device memory allocation
- *
+ * @param mr Device memory resource used to allocate device memory of the table in the returned
+ * table_with_metadata
+ * 
  * @return The set of columns along with metadata
  */
 table_with_metadata read_csv(read_csv_args const& args,
@@ -398,7 +401,8 @@ struct read_orc_args {
  * @endcode
  *
  * @param args Settings for controlling reading behavior
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate device memory of the table in the returned
+ * table_with_metadata
  *
  * @return The set of columns
  */
@@ -453,7 +457,8 @@ struct read_parquet_args {
  * @endcode
  *
  * @param args Settings for controlling reading behavior
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate device memory of the table in the returned
+ * table_with_metadata
  *
  * @return The set of columns along with metadata
  */
@@ -510,7 +515,7 @@ struct write_orc_args {
  * @endcode
  *
  * @param args Settings for controlling reading behavior
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource to use for device memory allocation
  */
 void write_orc(write_orc_args const& args,
                rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
diff --git a/cpp/include/cudf/io/readers.hpp b/cpp/include/cudf/io/readers.hpp
index 07be64a9801..84a96652acb 100644
--- a/cpp/include/cudf/io/readers.hpp
+++ b/cpp/include/cudf/io/readers.hpp
@@ -77,7 +77,7 @@ class reader {
    *
    * @param filepath Path to whole dataset
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(std::string filepath,
                   reader_options const &options,
@@ -89,7 +89,7 @@ class reader {
    * @param buffer Pointer to whole dataset
    * @param length Host buffer size in bytes
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(const char *buffer,
                   size_t length,
@@ -101,7 +101,7 @@ class reader {
    *
    * @param file Arrow file object of dataset
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(std::shared_ptr<arrow::io::RandomAccessFile> file,
                   reader_options const &options,
@@ -181,7 +181,7 @@ class reader {
    *
    * @param filepath Path to whole dataset
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(std::string filepath,
                   reader_options const &options,
@@ -193,7 +193,7 @@ class reader {
    * @param buffer Pointer to whole dataset
    * @param length Host buffer size in bytes
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(const char *buffer,
                   size_t length,
@@ -205,7 +205,7 @@ class reader {
    *
    * @param file Arrow file object of dataset
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(std::shared_ptr<arrow::io::RandomAccessFile> file,
                   reader_options const &options,
@@ -342,7 +342,7 @@ class reader {
    *
    * @param filepath Path to whole dataset
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(std::string filepath,
                   reader_options const &options,
@@ -354,7 +354,7 @@ class reader {
    * @param buffer Pointer to whole dataset
    * @param length Host buffer size in bytes
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(const char *buffer,
                   size_t length,
@@ -366,7 +366,7 @@ class reader {
    *
    * @param file Arrow file object of dataset
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(std::shared_ptr<arrow::io::RandomAccessFile> file,
                   reader_options const &options,
@@ -472,7 +472,7 @@ class reader {
    *
    * @param filepath Path to whole dataset
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(std::string filepath,
                   reader_options const &options,
@@ -484,7 +484,7 @@ class reader {
    * @param buffer Pointer to whole dataset
    * @param length Host buffer size in bytes
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(const char *buffer,
                   size_t length,
@@ -496,7 +496,7 @@ class reader {
    *
    * @param file Arrow file object of dataset
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(std::shared_ptr<arrow::io::RandomAccessFile> file,
                   reader_options const &options,
@@ -605,7 +605,7 @@ class reader {
    *
    * @param filepath Path to whole dataset
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(std::string filepath,
                   reader_options const &options,
@@ -617,7 +617,7 @@ class reader {
    * @param buffer Pointer to whole dataset
    * @param length Host buffer size in bytes
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(const char *buffer,
                   size_t length,
@@ -629,7 +629,7 @@ class reader {
    *
    * @param file Arrow file object of dataset
    * @param options Settings for controlling reading behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit reader(std::shared_ptr<arrow::io::RandomAccessFile> file,
                   reader_options const &options,
diff --git a/cpp/include/cudf/io/writers.hpp b/cpp/include/cudf/io/writers.hpp
index 99f8bafa70b..1784b14ab59 100644
--- a/cpp/include/cudf/io/writers.hpp
+++ b/cpp/include/cudf/io/writers.hpp
@@ -77,7 +77,7 @@ class writer {
    *
    * @param sinkp The data sink to write the data to
    * @param options Settings for controlling writing behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit writer(std::unique_ptr<cudf::io::data_sink> sinkp,
                   writer_options const& options,
@@ -163,7 +163,7 @@ class writer {
    *
    * @param sink The data sink to write the data to
    * @param options Settings for controlling writing behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit writer(std::unique_ptr<cudf::io::data_sink> sink,
                   writer_options const& options,
@@ -325,7 +325,7 @@ class writer {
    *
    * @param sinkp The data sink to write the data to
    * @param options Settings for controlling writing behavior
-   * @param mr Optional resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   writer(std::unique_ptr<cudf::io::data_sink> sinkp,
          writer_options const& options,
diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index b58f909a01b..53b16a87291 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -77,7 +77,7 @@ namespace experimental {
  * from `left_on` columns. Else, for every column in `left_on` and `right_on`,
  * an output column will be produced.  For each of these pairs (L, R), L
  * should exist in `left_on` and R should exist in `right_on`.
- * @param mr Memory resource used to allocate the returned table and columns
+ * @param mr Device memory resource used to allocate the returned table and columns
  *
  * @returns Result of joining `left` and `right` tables on the columns
  * specified by `left_on` and `right_on`. The resulting table will be joined columns of
@@ -142,7 +142,7 @@ std::unique_ptr<cudf::experimental::table> inner_join(
  * from `left_on` columns. Else, for every column in `left_on` and `right_on`,
  * an output column will be produced.  For each of these pairs (L, R), L
  * should exist in `left_on` and R should exist in `right_on`.
- * @param mr Memory resource used to allocate the returned table and columns
+ * @param mr Device memory resource used to allocate the returned table and columns
  *
  * @returns Result of joining `left` and `right` tables on the columns
  * specified by `left_on` and `right_on`. The resulting table will be joined columns of
@@ -207,7 +207,7 @@ std::unique_ptr<cudf::experimental::table> left_join(
  * from `left_on` columns. Else, for every column in `left_on` and `right_on`,
  * an output column will be produced.  For each of these pairs (L, R), L
  * should exist in `left_on` and R should exist in `right_on`.
- * @param mr Memory resource used to allocate the returned table and columns
+ * @param mr Device memory resource used to allocate the returned table and columns
  *
  * @returns Result of joining `left` and `right` tables on the columns
  * specified by `left_on` and `right_on`. The resulting table will be joined columns of
diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp
index fe18161f369..2ac2ee2736b 100644
--- a/cpp/include/cudf/null_mask.hpp
+++ b/cpp/include/cudf/null_mask.hpp
@@ -76,7 +76,7 @@ size_type num_bitmask_words(size_type number_of_bits);
  * @param state The desired state of the mask
  * @param stream Optional, stream on which all memory allocations/operations
  * will be submitted
- * @param mr Device memory resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate the returned device_buffer.
  * @return rmm::device_buffer A `device_buffer` for use as a null bitmask
  * satisfying the desired size and state
  **/
@@ -190,8 +190,7 @@ std::vector<size_type> segmented_count_unset_bits(bitmask_type const* bitmask,
  * @param end_bit Index of the last bit to be copied (exclusive)
  * @param stream Optional, stream on which all memory allocations and copies
  * will be performed
- * @param mr Optional, the memory resource that will be used for allocating
- * the device memory for the new device_buffer
+ * @param mr Device memory resource used to allocate the returned device_buffer
  * @return rmm::device_buffer A `device_buffer` containing the bits
  * `[begin_bit, end_bit)` from `mask`.
  **/
@@ -211,8 +210,7 @@ rmm::device_buffer copy_bitmask(
  * @param view Column view whose bitmask needs to be copied
  * @param stream Optional, stream on which all memory allocations and copies
  * will be performed
- * @param mr Optional, the memory resource that will be used for allocating
- * the device memory for the new device_buffer
+ * @param mr Device memory resource used to allocate the returned device_buffer
  * @return rmm::device_buffer A `device_buffer` containing the bits
  * `[view.offset(), view.offset() + view.size())` from `view`'s bitmask.
  **/
@@ -229,7 +227,7 @@ rmm::device_buffer copy_bitmask(
  *
  * @param view The table of columns
  * @param stream CUDA stream on which to execute kernels
- * @param mr Memory resource for allocating output bitmask
+ * @param mr Device memory resource used to allocate the returned device_buffer
  * @return rmm::device_buffer Output bitmask
  */
 rmm::device_buffer bitmask_and(
diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp
index fd34fba20f6..40c8af99576 100644
--- a/cpp/include/cudf/partitioning.hpp
+++ b/cpp/include/cudf/partitioning.hpp
@@ -55,8 +55,7 @@ namespace experimental {
  * @param partition_map Non-nullable column of integer values that map each row
  * in `t` to it's partition.
  * @param num_partitions The total number of partitions.
- * @param mr The resource used to allocate the device memory for the returned
- * table
+ * @param mr Device memory resource used to allocate the returned table.
  * @return Pair containing the reordered table and vector of `num_partitions +
  * 1` offsets to each partition such that the size of partition `i` is
  * determined by `offset[i+1] - offset[i]`.
@@ -80,7 +79,7 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> partition(
  * @param input The table to partition
  * @param columns_to_hash Indices of input columns to hash
  * @param num_partitions The number of partitions to use
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate the returned table.
  *
  * @returns An output table and a vector of row offsets to each partition
  */
diff --git a/cpp/include/cudf/replace.hpp b/cpp/include/cudf/replace.hpp
index 35fc4d35704..ee4218e2295 100644
--- a/cpp/include/cudf/replace.hpp
+++ b/cpp/include/cudf/replace.hpp
@@ -81,7 +81,7 @@ std::unique_ptr<column> replace_nulls(
  *
  * @param input A column whose NaN values will be replaced
  * @param replacement A cudf::column whose values will replace NaN values in input
- * @param mr Optional device_memory_resource to use for allocations
+ * @param mr Device memory resource used to allocate the returned column
  * @return A copy of `input` with the NaN values replaced with corresponding values from
  * `replacement`.
  */
@@ -107,7 +107,7 @@ std::unique_ptr<column> replace_nans(
  *
  * @param input A column whose NaN values will be replaced
  * @param replacement A cudf::scalar whose value will replace NaN values in input
- * @param mr Optional device_memory_resource to use for allocations
+ * @param mr Device memory resource used to allocate the returned column
  * @return A copy of `input` with the NaN values replaced by `replacement`.
  */
 std::unique_ptr<column> replace_nans(
@@ -122,7 +122,7 @@ std::unique_ptr<column> replace_nans(
  * @param input_col The column to find and replace values in.
  * @param values_to_replace The values to replace
  * @param replacement_values The values to replace with
- * @param mr Optional device_memory_resource to use for allocations.
+ * @param mr Device memory resource used to allocate the returned column.
  *
  * @returns Copy of `input_col` with specified values replaced.
  */
diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp
index 5e5d416f11f..181b3aaed1e 100644
--- a/cpp/include/cudf/scalar/scalar.hpp
+++ b/cpp/include/cudf/scalar/scalar.hpp
@@ -97,7 +97,7 @@ class scalar {
    * @param type Data type of the scalar
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream The CUDA stream to do the allocation in
-   * @param mr The memory resource to use for allocation
+   * @param mr Device memory resource to use for allocation
    */
   scalar(data_type type,
          bool is_valid                       = false,
@@ -167,7 +167,7 @@ class fixed_width_scalar : public scalar {
    * @param value The initial value of the scalar
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream The CUDA stream to do the allocation in
-   * @param mr The memory resource to use for allocation
+   * @param mr Device memory resource to use for allocation
    */
   fixed_width_scalar(T value,
                      bool is_valid                       = true,
@@ -221,7 +221,7 @@ class numeric_scalar : public detail::fixed_width_scalar<T> {
    * @param value The initial value of the scalar
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream The CUDA stream to do the allocation in
-   * @param mr The memory resource to use for allocation
+   * @param mr Device memory resource to use for allocation
    */
   numeric_scalar(T value,
                  bool is_valid                       = true,
@@ -268,7 +268,7 @@ class string_scalar : public scalar {
    * @param value The value of the string
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream The CUDA stream to do the allocation in
-   * @param mr The memory resource to use for allocation
+   * @param mr Device memory resource to use for allocation
    */
   string_scalar(std::string const& string,
                 bool is_valid                       = true,
@@ -285,7 +285,7 @@ class string_scalar : public scalar {
    * @param source string_view pointing string value to copy
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream The CUDA stream to do the allocation in
-   * @param mr The memory resource to use for allocation
+   * @param mr Device memory resource to use for allocation
    */
   string_scalar(value_type const& source,
                 bool is_valid                       = true,
@@ -302,7 +302,7 @@ class string_scalar : public scalar {
    * @param data device_scalar string_view pointing string value to copy
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream The CUDA stream to do the allocation in
-   * @param mr The memory resource to use for allocation
+   * @param mr Device memory resource to use for allocation
    */
   string_scalar(rmm::device_scalar<value_type>& data,
                 bool is_valid                       = true,
@@ -371,7 +371,7 @@ class timestamp_scalar : public detail::fixed_width_scalar<T> {
    * @param value The initial value of the scalar
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream The CUDA stream to do the allocation in
-   * @param mr The memory resource to use for allocation
+   * @param mr Device memory resource to use for allocation
    */
   timestamp_scalar(T value,
                    bool is_valid                       = true,
@@ -387,7 +387,7 @@ class timestamp_scalar : public detail::fixed_width_scalar<T> {
    * @param value Integer representing number of ticks since the UNIX epoch
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream The CUDA stream to do the allocation in
-   * @param mr The memory resource to use for allocation
+   * @param mr Device memory resource to use for allocation
    */
   timestamp_scalar(typename T::duration::rep value,
                    bool is_valid,
diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp
index 43477773d12..300d4ce6ffe 100644
--- a/cpp/include/cudf/scalar/scalar_factories.hpp
+++ b/cpp/include/cudf/scalar/scalar_factories.hpp
@@ -35,8 +35,7 @@ namespace cudf {
  *
  * @param type The desired numeric element type
  * @param stream Optional stream on which to issue all memory allocations
- * @param mr Optional resource to use for device memory
- *           allocation of the scalar's `data` and `is_valid` bool.
+ * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
  */
 std::unique_ptr<scalar> make_numeric_scalar(
   data_type type,
@@ -52,8 +51,7 @@ std::unique_ptr<scalar> make_numeric_scalar(
  *
  * @param type The desired timestamp element type
  * @param stream Optional stream on which to issue all memory allocations
- * @param mr Optional resource to use for device memory
- *           allocation of the scalar's `data` and `is_valid` bool.
+ * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
  */
 std::unique_ptr<scalar> make_timestamp_scalar(
   data_type type,
@@ -69,8 +67,7 @@ std::unique_ptr<scalar> make_timestamp_scalar(
  *
  * @param type The desired fixed-width element type
  * @param stream Optional stream on which to issue all memory allocations
- * @param mr Optional resource to use for device memory
- *           allocation of the scalar's `data` and `is_valid` bool.
+ * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
  */
 std::unique_ptr<scalar> make_fixed_width_scalar(
   data_type type,
@@ -86,8 +83,7 @@ std::unique_ptr<scalar> make_fixed_width_scalar(
  *
  * @param string The `std::string` to copy to device
  * @param stream Optional stream for use with all memory allocations
- * @param mr Optional resource to use for device memory
- *           allocation of the scalar's `data` and `is_valid`.
+ * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
  */
 std::unique_ptr<scalar> make_string_scalar(
   std::string const& string,
@@ -109,8 +105,7 @@ std::unique_ptr<scalar> make_default_constructed_scalar(data_type type);
  * @tparam T Datatype of the value to be represented by the scalar
  * @param value The value to store in the scalar object
  * @param stream Optional stream on which to issue all memory allocations
- * @param mr Optional resource to use for device memory
- *           allocation of the scalar's `data` and `is_valid` bool.
+ * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
  */
 template <typename T>
 std::unique_ptr<scalar> make_fixed_width_scalar(
diff --git a/cpp/include/cudf/search.hpp b/cpp/include/cudf/search.hpp
index bfa192291cd..a6d2ff09d24 100644
--- a/cpp/include/cudf/search.hpp
+++ b/cpp/include/cudf/search.hpp
@@ -62,7 +62,7 @@ namespace experimental {
  * @param column_order    Vector of column sort order
  * @param null_precedence Vector of null_precedence enums
  * values
- * @param mr              Device memory resource to use for device memory allocation
+ * @param mr              Device memory resource used to allocate the returned column
  * @return std::unique_ptr<column> A non-nullable column of cudf::size_type elements
  * containing the insertion points.
  */
@@ -105,7 +105,7 @@ std::unique_ptr<column> lower_bound(
  * @param column_order    Vector of column sort order
  * @param null_precedence Vector of null_precedence enums
  * values
- * @param mr              Device memory resource to use for device memory allocation
+ * @param mr              Device memory resource used to allocate the returned column
  * @return std::unique_ptr<column> A non-nullable column of cudf::size_type elements
  * containing the insertion points.
  */
@@ -160,7 +160,7 @@ bool contains(column_view const& col,
  *
  * @param haystack  A column object
  * @param needles   A column of values to search for in `col`
- * @param mr         Device memory resource to use for device memory allocation
+ * @param mr        Device memory resource used to allocate the returned column
  *
  * @return std::unique_ptr<column> A column of bool elements containing
  * true if the corresponding entry in haystack is contained in needles and false
diff --git a/cpp/include/cudf/strings/attributes.hpp b/cpp/include/cudf/strings/attributes.hpp
index 323ffabe0ca..668ea8a9efa 100644
--- a/cpp/include/cudf/strings/attributes.hpp
+++ b/cpp/include/cudf/strings/attributes.hpp
@@ -38,7 +38,7 @@ namespace strings {
  * Any null string will result in a null entry for that row in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New INT32 column with lengths for each string.
  */
 std::unique_ptr<column> count_characters(
@@ -56,7 +56,7 @@ std::unique_ptr<column> count_characters(
  * Any null string will result in a null entry for that row in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New INT32 column with the number of bytes for each string.
  */
 std::unique_ptr<column> count_bytes(
@@ -76,7 +76,7 @@ std::unique_ptr<column> count_bytes(
  * Any null string is ignored. No null entries will appear in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New INT32 column with code point integer values for each character.
  */
 std::unique_ptr<column> code_points(
diff --git a/cpp/include/cudf/strings/case.hpp b/cpp/include/cudf/strings/case.hpp
index a1f3fe3e099..16ef0412c34 100644
--- a/cpp/include/cudf/strings/case.hpp
+++ b/cpp/include/cudf/strings/case.hpp
@@ -35,7 +35,7 @@ namespace strings {
  * Any null entries create null entries in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column of strings with characters converted.
  */
 std::unique_ptr<column> to_lower(
@@ -52,7 +52,7 @@ std::unique_ptr<column> to_lower(
  * Any null entries create null entries in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column of strings with characters converted.
  */
 std::unique_ptr<column> to_upper(
@@ -70,7 +70,7 @@ std::unique_ptr<column> to_upper(
  * Any null entries create null entries in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column of strings with characters converted.
  */
 std::unique_ptr<column> swapcase(
diff --git a/cpp/include/cudf/strings/char_types/char_types.hpp b/cpp/include/cudf/strings/char_types/char_types.hpp
index 4ad7030af1f..0933ef33c12 100644
--- a/cpp/include/cudf/strings/char_types/char_types.hpp
+++ b/cpp/include/cudf/strings/char_types/char_types.hpp
@@ -91,7 +91,7 @@ string_character_types& operator|=(string_character_types& lhs, string_character
  * @param verify_types Only verify against these character types.
  *                     Default `ALL_TYPES` means return `true`
  *                     iff all characters match `types`.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column of boolean results for each string.
  */
 std::unique_ptr<column> all_characters_of_type(
@@ -117,7 +117,7 @@ std::unique_ptr<column> all_characters_of_type(
  * Any null row results in a null entry for that row in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column of boolean results for each string.
  */
 std::unique_ptr<column> is_integer(
@@ -157,7 +157,7 @@ bool all_integer(strings_column_view const& strings,
  * Any null row results in a null entry for that row in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column of boolean results for each string.
  */
 std::unique_ptr<column> is_float(
diff --git a/cpp/include/cudf/strings/combine.hpp b/cpp/include/cudf/strings/combine.hpp
index 8382e616a64..b1ba63814a1 100644
--- a/cpp/include/cudf/strings/combine.hpp
+++ b/cpp/include/cudf/strings/combine.hpp
@@ -59,7 +59,7 @@ namespace strings {
  * @param narep String that should be used in place of any null strings
  *        found in any column. Default of invalid-scalar means any null entry in any column will
  *        produces a null result for that row.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column with concatenated results.
  */
 std::unique_ptr<column> concatenate(
@@ -89,7 +89,7 @@ std::unique_ptr<column> concatenate(
  *        Default is an empty string.
  * @param narep String that should represent any null strings found.
  *        Default of invalid-scalar will ignore any null entries.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column containing one string.
  */
 std::unique_ptr<column> join_strings(
diff --git a/cpp/include/cudf/strings/contains.hpp b/cpp/include/cudf/strings/contains.hpp
index a15be452875..29f93b67a0e 100644
--- a/cpp/include/cudf/strings/contains.hpp
+++ b/cpp/include/cudf/strings/contains.hpp
@@ -42,7 +42,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param pattern Regex pattern to match to each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column of boolean results for each string.
  */
 std::unique_ptr<column> contains_re(
@@ -67,7 +67,7 @@ std::unique_ptr<column> contains_re(
  *
  * @param strings Strings instance for this operation.
  * @param pattern Regex pattern to match to each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column of boolean results for each string.
  */
 std::unique_ptr<column> matches_re(
@@ -92,7 +92,7 @@ std::unique_ptr<column> matches_re(
  *
  * @param strings Strings instance for this operation.
  * @param pattern Regex pattern to match within each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New INT32 column with counts for each string.
  */
 std::unique_ptr<column> count_re(
diff --git a/cpp/include/cudf/strings/convert/convert_booleans.hpp b/cpp/include/cudf/strings/convert/convert_booleans.hpp
index eecad0d3bd4..a224ea7ba4c 100644
--- a/cpp/include/cudf/strings/convert/convert_booleans.hpp
+++ b/cpp/include/cudf/strings/convert/convert_booleans.hpp
@@ -34,7 +34,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param true_string String to expect for true. Non-matching strings are false.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New BOOL8 column converted from strings.
  */
 std::unique_ptr<column> to_booleans(
@@ -53,7 +53,7 @@ std::unique_ptr<column> to_booleans(
  * @param booleans Boolean column to convert.
  * @param true_string String to use for true in the output column.
  * @param false_string String to use for false in the output column.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column.
  */
 std::unique_ptr<column> from_booleans(
diff --git a/cpp/include/cudf/strings/convert/convert_datetime.hpp b/cpp/include/cudf/strings/convert/convert_datetime.hpp
index 88c1f6a723e..e2baffa7577 100644
--- a/cpp/include/cudf/strings/convert/convert_datetime.hpp
+++ b/cpp/include/cudf/strings/convert/convert_datetime.hpp
@@ -66,7 +66,7 @@ namespace strings {
  * @param strings Strings instance for this operation.
  * @param timestamp_type The timestamp type used for creating the output column.
  * @param format String specifying the timestamp format in strings.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New datetime column.
  */
 std::unique_ptr<column> to_timestamps(
@@ -117,7 +117,7 @@ std::unique_ptr<column> to_timestamps(
  * @param timestamps Timestamp values to convert.
  * @param format The string specifying output format.
  *        Default format is "%Y-%m-%dT%H:%M:%SZ".
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column with formatted timestamps.
  */
 std::unique_ptr<column> from_timestamps(
diff --git a/cpp/include/cudf/strings/convert/convert_floats.hpp b/cpp/include/cudf/strings/convert/convert_floats.hpp
index 61cc44f32eb..f8418696575 100644
--- a/cpp/include/cudf/strings/convert/convert_floats.hpp
+++ b/cpp/include/cudf/strings/convert/convert_floats.hpp
@@ -38,7 +38,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param output_type Type of float numeric column to return.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column with floats converted from strings.
  */
 std::unique_ptr<column> to_floats(
@@ -60,7 +60,7 @@ std::unique_ptr<column> to_floats(
  * @throw cudf::logic_error if floats column is not float type.
  *
  * @param floats Numeric column to convert.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column with floats as strings.
  */
 std::unique_ptr<column> from_floats(
diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp
index f3ea8cbdd82..3a69bcaa709 100644
--- a/cpp/include/cudf/strings/convert/convert_integers.hpp
+++ b/cpp/include/cudf/strings/convert/convert_integers.hpp
@@ -45,7 +45,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param output_type Type of integer numeric column to return.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column with integers converted from strings.
  */
 std::unique_ptr<column> to_integers(
@@ -65,7 +65,7 @@ std::unique_ptr<column> to_integers(
  * @throw cudf::logic_error if integers column is not integral type.
  *
  * @param integers Numeric column to convert.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column with integers as strings.
  */
 std::unique_ptr<column> from_integers(
@@ -92,7 +92,7 @@ std::unique_ptr<column> from_integers(
  *
  * @param strings Strings instance for this operation.
  * @param output_type Type of integer numeric column to return.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column with integers converted from strings.
  */
 std::unique_ptr<column> hex_to_integers(
diff --git a/cpp/include/cudf/strings/convert/convert_ipv4.hpp b/cpp/include/cudf/strings/convert/convert_ipv4.hpp
index 0fa4f0a1173..71d401f4b9a 100644
--- a/cpp/include/cudf/strings/convert/convert_ipv4.hpp
+++ b/cpp/include/cudf/strings/convert/convert_ipv4.hpp
@@ -46,7 +46,7 @@ namespace strings {
  * Any null entries will result in corresponding null entries in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New INT64 column converted from strings.
  */
 std::unique_ptr<column> ipv4_to_integers(
@@ -69,7 +69,7 @@ std::unique_ptr<column> ipv4_to_integers(
  * @throw cudf::logic_error if the input column is not INT64 type.
  *
  * @param integers Integer (INT64) column to convert.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column.
  */
 std::unique_ptr<column> integers_to_ipv4(
diff --git a/cpp/include/cudf/strings/convert/convert_urls.hpp b/cpp/include/cudf/strings/convert/convert_urls.hpp
index bb0d0b37b64..4c73e9c9c19 100644
--- a/cpp/include/cudf/strings/convert/convert_urls.hpp
+++ b/cpp/include/cudf/strings/convert/convert_urls.hpp
@@ -37,7 +37,7 @@ namespace strings {
  * Any null entries will result in corresponding null entries in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column.
  */
 std::unique_ptr<column> url_encode(
@@ -58,7 +58,7 @@ std::unique_ptr<column> url_encode(
  * Any null entries will result in corresponding null entries in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column.
  */
 std::unique_ptr<column> url_decode(
diff --git a/cpp/include/cudf/strings/copying.hpp b/cpp/include/cudf/strings/copying.hpp
index 97f4fb91a89..2c7f7441cfc 100644
--- a/cpp/include/cudf/strings/copying.hpp
+++ b/cpp/include/cudf/strings/copying.hpp
@@ -42,7 +42,7 @@ namespace detail {
  * @param step Increment value between indices.
  *             Default step is 1.
  * @param stream CUDA stream to use kernels in this method.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column of size (end-start)/step.
  */
 std::unique_ptr<cudf::column> slice(
diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp
index 279152578fd..b29e8db5d79 100644
--- a/cpp/include/cudf/strings/detail/concatenate.hpp
+++ b/cpp/include/cudf/strings/detail/concatenate.hpp
@@ -34,7 +34,7 @@ namespace detail {
  * ```
  *
  * @param columns List of string columns to concatenate.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream CUDA stream to use for any kernels in this function.
  * @return New column with concatenated results.
  */
diff --git a/cpp/include/cudf/strings/detail/copy_if_else.cuh b/cpp/include/cudf/strings/detail/copy_if_else.cuh
index 158fcaf9b89..8a60aaa6aee 100644
--- a/cpp/include/cudf/strings/detail/copy_if_else.cuh
+++ b/cpp/include/cudf/strings/detail/copy_if_else.cuh
@@ -44,7 +44,7 @@ namespace detail {
  * @param rhs_begin Strings of second set of data. Used when filter_fn returns false.
  * @param filter_fn Called to determine which iterator (lhs or rhs) to retrieve an entry for a
  * specific row.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream CUDA stream to use kernels in this method.
  * @return New strings column.
  */
diff --git a/cpp/include/cudf/strings/detail/copy_range.cuh b/cpp/include/cudf/strings/detail/copy_range.cuh
index e2fa3b4def9..310352603e7 100644
--- a/cpp/include/cudf/strings/detail/copy_range.cuh
+++ b/cpp/include/cudf/strings/detail/copy_range.cuh
@@ -89,7 +89,7 @@ namespace detail {
  * @param target_begin The starting index of the target range (inclusive)
  * @param target_end The index of the last element in the target range
  * (exclusive)
- * @param mr Memory resource to allocate the result target column.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream CUDA stream to run this function
  * @return std::unique_ptr<column> The result target column
  */
diff --git a/cpp/include/cudf/strings/detail/fill.hpp b/cpp/include/cudf/strings/detail/fill.hpp
index 5f295dbe8d6..8f47db846db 100644
--- a/cpp/include/cudf/strings/detail/fill.hpp
+++ b/cpp/include/cudf/strings/detail/fill.hpp
@@ -35,7 +35,7 @@ namespace detail {
  * @param begin First row index to include the new string.
  * @param end Last row index (exclusive).
  * @param value String to use when filling the range.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream CUDA stream to use for any kernels in this function.
  * @return New strings column.
  */
diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh
index 0015ecc3765..af52b801a52 100644
--- a/cpp/include/cudf/strings/detail/gather.cuh
+++ b/cpp/include/cudf/strings/detail/gather.cuh
@@ -45,7 +45,7 @@ namespace detail {
  * @param strings Strings instance for this operation.
  * @param begin Start of index iterator.
  * @param end End of index iterator.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream CUDA stream to use kernels in this method.
  * @return New strings column containing the gathered strings.
  */
@@ -122,7 +122,7 @@ std::unique_ptr<cudf::column> gather(
  * @param begin Start of index iterator.
  * @param end End of index iterator.
  * @param nullify_out_of_bounds If true, indices outside the column's range are nullified.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream CUDA stream to use kernels in this method.
  * @return New strings column containing the gathered strings.
  */
diff --git a/cpp/include/cudf/strings/detail/merge.cuh b/cpp/include/cudf/strings/detail/merge.cuh
index d414418a7ce..4c4f49cb72f 100644
--- a/cpp/include/cudf/strings/detail/merge.cuh
+++ b/cpp/include/cudf/strings/detail/merge.cuh
@@ -37,7 +37,7 @@ namespace detail {
  * @param lhs First column.
  * @param rhs Second column.
  * @param row_order Indexes for each column.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream CUDA stream to use for any kernels in this function.
  * @return New strings column.
  */
diff --git a/cpp/include/cudf/strings/detail/modify_strings.cuh b/cpp/include/cudf/strings/detail/modify_strings.cuh
index 8233aba958d..6a5999f2f3a 100644
--- a/cpp/include/cudf/strings/detail/modify_strings.cuh
+++ b/cpp/include/cudf/strings/detail/modify_strings.cuh
@@ -41,7 +41,7 @@ namespace detail {
  *
  * @param strings Number Column of strings to apply the modifications on;
  * it is not modified in place; rather a new column is returned instead
- * @param mr Memory resource to use
+ * @param mr Device memory resource used to allocate the returned column.
  * (cannot be a default argument because of the variadic pack);
  * @param stream Stream to use for any kernel calls.
  * (cannot be a default argument because of the variadic pack);
diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh
index 577c785b600..8911cf9c098 100644
--- a/cpp/include/cudf/strings/detail/scatter.cuh
+++ b/cpp/include/cudf/strings/detail/scatter.cuh
@@ -44,7 +44,7 @@ namespace detail {
  * @param scatter_map Iterator of indices into the output column.
  * @param target The set of columns into which values from the source column
  *        are to be scattered.
- * @param mr The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream The stream to use for CUDA operations
  * @return New strings column.
  */
diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh
index cb2157af1a5..e96b7360835 100644
--- a/cpp/include/cudf/strings/detail/utilities.cuh
+++ b/cpp/include/cudf/strings/detail/utilities.cuh
@@ -33,7 +33,7 @@ namespace detail {
  * @tparam Iterator Used as input to scan to set the offset values.
  * @param begin The beginning of the input sequence
  * @param end The end of the input sequence
- * @param mr Memory resource to use.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for any kernel calls.
  * @return offsets child column for strings column
  */
diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp
index ae9332d5f5f..b88d5b66e4a 100644
--- a/cpp/include/cudf/strings/detail/utilities.hpp
+++ b/cpp/include/cudf/strings/detail/utilities.hpp
@@ -30,7 +30,7 @@ namespace detail {
  * @param strings_count Number of strings in the column.
  * @param null_count Number of null string entries in the column.
  * @param bytes Number of bytes for the chars column.
- * @param mr Memory resource to use.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for any kernel calls.
  * @return The chars child column for a strings column.
  */
@@ -44,7 +44,7 @@ std::unique_ptr<column> create_chars_child_column(
 /**
  * @brief Create a strings column with no strings.
  *
- * @param mr Memory resource to use.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for any kernel calls.
  * @return Empty strings column
  */
@@ -65,7 +65,7 @@ rmm::device_vector<string_view> create_string_vector_from_column(cudf::strings_c
  * @brief Creates an offsets column from a string_view vector.
  *
  * @param strings Strings column
- * @param mr Memory resource to use create the output column.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to execute any device code against.
  * @return Child offsets column
  */
@@ -80,7 +80,7 @@ std::unique_ptr<cudf::column> child_offsets_from_string_vector(
  * @param strings Strings vector
  * @param d_offsets Offsets vector for placing strings into column's memory.
  * @param null_count Number of null strings.
- * @param mr Memory resource to use create the output column.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to execute any device code against.
  * @return Child chars column
  */
diff --git a/cpp/include/cudf/strings/extract.hpp b/cpp/include/cudf/strings/extract.hpp
index 4068f7a6c87..fbde5ac8ae7 100644
--- a/cpp/include/cudf/strings/extract.hpp
+++ b/cpp/include/cudf/strings/extract.hpp
@@ -46,7 +46,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param pattern The regular expression pattern with group indicators.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned table.
  * @return Columns of strings extracted from the input column.
  */
 std::unique_ptr<experimental::table> extract(
diff --git a/cpp/include/cudf/strings/find.hpp b/cpp/include/cudf/strings/find.hpp
index ab6afc82094..53b6c0a2f8f 100644
--- a/cpp/include/cudf/strings/find.hpp
+++ b/cpp/include/cudf/strings/find.hpp
@@ -45,7 +45,7 @@ namespace strings {
  * @param start First character position to include in the search.
  * @param stop Last position (exclusive) to include in the search.
  *             Default of -1 will search to the end of the string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New integer column with character position values.
  */
 std::unique_ptr<column> find(strings_column_view const& strings,
@@ -73,7 +73,7 @@ std::unique_ptr<column> find(strings_column_view const& strings,
  * @param start First position to include in the search.
  * @param stop Last position (exclusive) to include in the search.
  *             Default of -1 will search starting at the end of the string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New integer column with character position values.
  */
 std::unique_ptr<column> rfind(
@@ -93,7 +93,7 @@ std::unique_ptr<column> rfind(
  *
  * @param strings Strings instance for this operation.
  * @param target UTF-8 encoded string to search for in each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New BOOL8 column.
  */
 std::unique_ptr<column> contains(
@@ -113,7 +113,7 @@ std::unique_ptr<column> contains(
  *
  * @param strings Strings instance for this operation.
  * @param target UTF-8 encoded string to search for in each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New BOOL8 column.
  */
 std::unique_ptr<column> starts_with(
@@ -133,7 +133,7 @@ std::unique_ptr<column> starts_with(
  *
  * @param strings Strings instance for this operation.
  * @param target UTF-8 encoded string to search for in each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New BOOL8 column.
  */
 std::unique_ptr<column> ends_with(
diff --git a/cpp/include/cudf/strings/find_multiple.hpp b/cpp/include/cudf/strings/find_multiple.hpp
index a6dd91a4b24..6a61f4b103d 100644
--- a/cpp/include/cudf/strings/find_multiple.hpp
+++ b/cpp/include/cudf/strings/find_multiple.hpp
@@ -45,7 +45,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param targets Strings to search for in each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New integer column with character position values.
  */
 std::unique_ptr<column> find_multiple(
diff --git a/cpp/include/cudf/strings/findall.hpp b/cpp/include/cudf/strings/findall.hpp
index 79114182116..a2ca92b932c 100644
--- a/cpp/include/cudf/strings/findall.hpp
+++ b/cpp/include/cudf/strings/findall.hpp
@@ -48,7 +48,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param pattern Regex pattern to match within each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned table.
  * @return New table of strings columns.
  */
 std::unique_ptr<experimental::table> findall_re(
diff --git a/cpp/include/cudf/strings/padding.hpp b/cpp/include/cudf/strings/padding.hpp
index 29b4bcf4a7d..0cbc249919c 100644
--- a/cpp/include/cudf/strings/padding.hpp
+++ b/cpp/include/cudf/strings/padding.hpp
@@ -57,7 +57,7 @@ enum class pad_side {
  *        Default is pad right (left justify).
  * @param fill_char Single UTF-8 character to use for padding.
  *        Default is the space character.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column with padded strings.
  */
 std::unique_ptr<column> pad(strings_column_view const& strings,
@@ -85,7 +85,7 @@ std::unique_ptr<column> pad(strings_column_view const& strings,
  *
  * @param strings Strings instance for this operation.
  * @param width The minimum number of characters for each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column of strings.
  */
 std::unique_ptr<column> zfill(
diff --git a/cpp/include/cudf/strings/replace.hpp b/cpp/include/cudf/strings/replace.hpp
index 12b3db4e825..8d85fa88d85 100644
--- a/cpp/include/cudf/strings/replace.hpp
+++ b/cpp/include/cudf/strings/replace.hpp
@@ -56,7 +56,7 @@ namespace strings {
  * @param repl Replacement string if target is found.
  * @param maxrepl Maximum times to replace if target appears multiple times in the input string.
  *        Default of -1 specifies replace all occurrences of target in each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column.
  */
 std::unique_ptr<column> replace(
@@ -96,7 +96,7 @@ std::unique_ptr<column> replace(
  *        Default is 0, first character position.
  * @param stop End position (exclusive) to use for replacement.
  *        Default of -1 specifies the end of each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column.
  */
 std::unique_ptr<column> replace_slice(
@@ -141,7 +141,7 @@ std::unique_ptr<column> replace_slice(
  * @param strings Strings column for this operation.
  * @param targets Strings to search for in each string.
  * @param repls Corresponding replacement strings for target strings.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column.
  */
 std::unique_ptr<column> replace(
@@ -164,7 +164,7 @@ std::unique_ptr<column> replace(
  *
  * @param strings Strings column for this operation.
  * @param repl Replacement string for null entries. Default is empty string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column.
  */
 std::unique_ptr<column> replace_nulls(
diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp
index 9ced03b1e90..0ab1be68eeb 100644
--- a/cpp/include/cudf/strings/replace_re.hpp
+++ b/cpp/include/cudf/strings/replace_re.hpp
@@ -39,7 +39,7 @@ namespace strings {
  * @param repl The string used to replace the matched sequence in each string.
  *        Default is an empty string.
  * @param maxrepl The maximum number of times to replace the matched pattern within each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column.
  */
 std::unique_ptr<column> replace_re(
@@ -60,7 +60,7 @@ std::unique_ptr<column> replace_re(
  * @param strings Strings instance for this operation.
  * @param patterns The regular expression patterns to search within each string.
  * @param repls The strings used for replacement.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column.
  */
 std::unique_ptr<column> replace_re(
@@ -80,7 +80,7 @@ std::unique_ptr<column> replace_re(
  * @param strings Strings instance for this operation.
  * @param pattern The regular expression patterns to search within each string.
  * @param repl The replacement template for creating the output string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column.
  */
 std::unique_ptr<column> replace_with_backrefs(
diff --git a/cpp/include/cudf/strings/sorting.hpp b/cpp/include/cudf/strings/sorting.hpp
index 0aa929abf90..c915433f51b 100644
--- a/cpp/include/cudf/strings/sorting.hpp
+++ b/cpp/include/cudf/strings/sorting.hpp
@@ -40,7 +40,7 @@ enum sort_type {
  * @param order Sort strings in ascending or descending order.
  * @param null_order Sort nulls to the beginning or the end of the new column.
  * @param stream CUDA stream to use kernels in this method.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column with sorted elements of this instance.
  */
 std::unique_ptr<cudf::column> sort(
diff --git a/cpp/include/cudf/strings/split/partition.hpp b/cpp/include/cudf/strings/split/partition.hpp
index 0eebd70d5c2..01e19359468 100644
--- a/cpp/include/cudf/strings/split/partition.hpp
+++ b/cpp/include/cudf/strings/split/partition.hpp
@@ -50,7 +50,7 @@ namespace strings {
  * @param strings Strings instance for this operation.
  * @param delimiter UTF-8 encoded string indentifying where to split each string.
  *        Default of empty string indicates split on whitespace.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned table.
  * @return New table of strings columns.
  */
 std::unique_ptr<experimental::table> partition(
@@ -82,7 +82,7 @@ std::unique_ptr<experimental::table> partition(
  * @param strings Strings instance for this operation.
  * @param delimiter UTF-8 encoded string indentifying where to split each string.
  *        Default of empty string indicates split on whitespace.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned table.
  * @return New strings columns.
  */
 std::unique_ptr<experimental::table> rpartition(
diff --git a/cpp/include/cudf/strings/split/split.hpp b/cpp/include/cudf/strings/split/split.hpp
index 68fa08b22ca..644a363bd64 100644
--- a/cpp/include/cudf/strings/split/split.hpp
+++ b/cpp/include/cudf/strings/split/split.hpp
@@ -45,7 +45,7 @@ namespace strings {
  *        Default of empty string indicates split on whitespace.
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned table.
  * @return New table of strings columns.
  */
 std::unique_ptr<experimental::table> split(
@@ -73,7 +73,7 @@ std::unique_ptr<experimental::table> split(
  *        Default of empty string indicates split on whitespace.
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned table.
  * @return New strings columns.
  */
 std::unique_ptr<experimental::table> rsplit(
@@ -120,7 +120,7 @@ struct contiguous_split_record_result {
  *        Default of empty string indicates split on whitespace.
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned result.
  * @return contiguous_split_record_result New vector of strings column_view
  *         objects
  *         (each column_view element of the vector holds splits from a string
@@ -152,7 +152,7 @@ contiguous_split_record_result contiguous_split_record(
  *        Default of empty string indicates split on whitespace.
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned result.
  * @return contiguous_split_record_result New vector of strings column_view
  *         objects
  *         (each column_view element of the vector holds splits from a string
diff --git a/cpp/include/cudf/strings/strings_column_view.hpp b/cpp/include/cudf/strings/strings_column_view.hpp
index abd4e4dc6d0..46fc1aeade7 100644
--- a/cpp/include/cudf/strings/strings_column_view.hpp
+++ b/cpp/include/cudf/strings/strings_column_view.hpp
@@ -108,7 +108,7 @@ void print(strings_column_view const& strings,
  *
  * @param strings Strings instance for this operation.
  * @param stream CUDA stream to use kernels in this method.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned device_vectors.
  * @return Pair containing a vector of chars and a vector of offsets.
  */
 std::pair<rmm::device_vector<char>, rmm::device_vector<size_type>> create_offsets(
diff --git a/cpp/include/cudf/strings/strip.hpp b/cpp/include/cudf/strings/strip.hpp
index 513801f3e87..51b6cf51d9e 100644
--- a/cpp/include/cudf/strings/strip.hpp
+++ b/cpp/include/cudf/strings/strip.hpp
@@ -62,7 +62,7 @@ enum class strip_type {
  * string. Default is both.
  * @param to_strip UTF-8 encoded characters to strip from each string.
  *        Default is empty string which indicates strip whitespace characters.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column.
  */
 std::unique_ptr<column> strip(
diff --git a/cpp/include/cudf/strings/substring.hpp b/cpp/include/cudf/strings/substring.hpp
index d438fd4cc29..5fdae1b58a3 100644
--- a/cpp/include/cudf/strings/substring.hpp
+++ b/cpp/include/cudf/strings/substring.hpp
@@ -51,7 +51,7 @@ namespace strings {
  * @param start First character position to begin the substring.
  * @param stop Last character position (exclusive) to end the substring.
  * @param step Distance between input characters retrieved.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column with sorted elements of this instance.
  */
 std::unique_ptr<column> slice_strings(
@@ -95,7 +95,7 @@ std::unique_ptr<column> slice_strings(
  * @param strings Strings column for this operation.
  * @param starts First character positions to begin the substring.
  * @param stops Last character (exclusive) positions to end the substring.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column with sorted elements of this instance.
  */
 std::unique_ptr<column> slice_strings(
diff --git a/cpp/include/cudf/strings/translate.hpp b/cpp/include/cudf/strings/translate.hpp
index 1ac75f8a511..e16e11d0afb 100644
--- a/cpp/include/cudf/strings/translate.hpp
+++ b/cpp/include/cudf/strings/translate.hpp
@@ -42,7 +42,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param chars_table Table of UTF-8 character mappings.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New column with padded strings.
  */
 std::unique_ptr<column> translate(
diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp
index cd38e811e52..b2fbf7f57ef 100644
--- a/cpp/include/cudf/table/table.hpp
+++ b/cpp/include/cudf/table/table.hpp
@@ -56,8 +56,7 @@ class table {
    * @param view The view whose contents will be copied to create a new `table`
    * @param stream Optional, stream on which all memory allocations and copies
    * will be performed
-   * @param mr Optional, the memory resource that will be used for allocating
-   * the device memory for the new columns
+   * @param mr Device memory resource used for allocating the device memory for the new columns
    **/
   table(table_view view,
         cudaStream_t stream                 = 0,
diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp
index 6da0b195c2c..e602d5ba1a5 100644
--- a/cpp/include/cudf/transform.hpp
+++ b/cpp/include/cudf/transform.hpp
@@ -41,7 +41,7 @@ namespace experimental {
  * @param unary_udf     The PTX/CUDA string of the unary function to apply
  * @param outout_type   The output type that is compatible with the output type in the UDF
  * @param is_ptx        true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code
- * @param mr            The memory resource to use for for all device allocations
+ * @param mr            Device memory resource used to allocate the returned column
  * @return cudf::column The column resulting from applying the unary function to
  *                      every element of the input
  **/
@@ -59,7 +59,7 @@ std::unique_ptr<column> transform(
  * @throws `cudf::logic_error` if `input.type()` is a non-floating type
  *
  * @param input         An immutable view of the input column of floating-point type
- * @param mr            The memory resource to use for for all device allocations
+ * @param mr            Device memory resource used to allocate the returned bitmask.
  * @return A pair containing a `device_buffer` with the new bitmask and it's
  * null count obtained by replacing `NaN` in `input` with null.
  **/
@@ -76,7 +76,7 @@ std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
  * @throws `cudf::logic_error` if `input.type()` is a non-boolean type
  *
  * @param input        Boolean elements to convert to a bitmask.
- * @param mr            The memory resource used to allocate the returned bitmask.
+ * @param mr           Device memory resource used to allocate the returned bitmask.
  * @return A pair containing a `device_buffer` with the new bitmask and it's
  * null count obtained from input considering `true` represent `valid`/`1` and
  * `false` represent `invalid`/`0`.
diff --git a/cpp/include/cudf/unary.hpp b/cpp/include/cudf/unary.hpp
index dba6a0631ca..60d478b6555 100644
--- a/cpp/include/cudf/unary.hpp
+++ b/cpp/include/cudf/unary.hpp
@@ -56,7 +56,7 @@ enum class unary_op : int32_t {
  *
  * @param input A `column_view` as input
  * @param op operation to perform
- * @param mr Optional, The resource to use for all allocations
+ * @param mrDevice memory resource used to allocate the returned column
  *
  * @returns std::unique_ptr<cudf::column> Result of the operation
  */
@@ -70,7 +70,7 @@ std::unique_ptr<cudf::column> unary_operation(
  * indicates the value is null and `false` indicates the value is valid.
  *
  * @param input A `column_view` as input
- * @param mr Optional, The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned column
  *
  * @returns std::unique_ptr<cudf::column> A non-nulalble column of `BOOL8` elements with `true`
  * representing `null` values.
@@ -84,7 +84,7 @@ std::unique_ptr<cudf::column> is_null(
  * indicates the value is valid and `false` indicates the value is null.
  *
  * @param input A `column_view` as input
- * @param mr Optional, The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned column
  *
  * @returns std::unique_ptr<cudf::column> A non-nulalble column of `BOOL8` elements with `false`
  * representing `null` values.
@@ -99,7 +99,7 @@ std::unique_ptr<cudf::column> is_valid(
  *
  * @param column_view Input column
  * @param out_type Desired datatype of output column
- * @param mr Optional, The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned column
  *
  * @returns unique_ptr<column> Result of the cast operation
  * @throw cudf::logic_error if `out_type` is not a fixed-width type
@@ -116,7 +116,7 @@ std::unique_ptr<column> cast(column_view const& input,
  * @throws cudf::logic_error if `input` is a non-floating point type
  *
  * @param input A column of floating-point elements
- * @param mr Optional, The resource to use for allocating the device memory in the returned column.
+ * @param mr Device memory resource used to allocate the returned column.
  *
  * @returns unique_ptr<column> A non-nulalble column of `BOOL8` elements with `true`
  * representing `NAN` values
@@ -133,7 +133,7 @@ std::unique_ptr<column> is_nan(
  * @throws cudf::logic_error if `input` is a non-floating point type
  *
  * @param input A column of floating-point elements
- * @param mr Optional, The resource to use for allocating the device memory in the returned column.
+ * @param mr Device memory resource used to allocate the returned column.
  *
  * @returns unique_ptr<column> A non-nulalble column of `BOOL8` elements with `false`
  * representing `NAN` values
diff --git a/cpp/include/nvtext/detail/tokenize.hpp b/cpp/include/nvtext/detail/tokenize.hpp
index 741f9153b0e..1e7e26178ab 100644
--- a/cpp/include/nvtext/detail/tokenize.hpp
+++ b/cpp/include/nvtext/detail/tokenize.hpp
@@ -28,7 +28,7 @@ namespace detail {
  * @param strings Strings column tokenize.
  * @param delimiter UTF-8 characters used to separate each string into tokens.
  *                  The default of empty string will separate tokens using whitespace.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for any CUDA calls.
  * @return New strings columns of tokens.
  */
@@ -44,7 +44,7 @@ std::unique_ptr<cudf::column> tokenize(
  *
  * @param strings Strings column to tokenize.
  * @param delimiters Strings used to separate individual strings into tokens.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for any CUDA calls.
  * @return New strings columns of tokens.
  */
@@ -61,7 +61,7 @@ std::unique_ptr<cudf::column> tokenize(
  * @param strings Strings column to use for this operation.
  * @param delimiter Strings used to separate each string into tokens.
  *                  The default of empty string will separate tokens using whitespace.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for any CUDA calls.
  * @return New INT32 column of token counts.
  */
@@ -77,7 +77,7 @@ std::unique_ptr<cudf::column> count_tokens(
  *
  * @param strings Strings column to use for this operation.
  * @param delimiters Strings used to separate each string into tokens.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for any CUDA calls.
  * @return New INT32 column of token counts.
  */
diff --git a/cpp/include/nvtext/generate_ngrams.hpp b/cpp/include/nvtext/generate_ngrams.hpp
index 44089fe66e9..f83b3e817bc 100644
--- a/cpp/include/nvtext/generate_ngrams.hpp
+++ b/cpp/include/nvtext/generate_ngrams.hpp
@@ -47,7 +47,7 @@ namespace nvtext {
  *               Default is 2 = bigram.
  * @param separator The string to use for separating ngram tokens.
  *                  Default is "_" character.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings columns of tokens.
  */
 std::unique_ptr<cudf::column> generate_ngrams(
diff --git a/cpp/include/nvtext/ngrams_tokenize.hpp b/cpp/include/nvtext/ngrams_tokenize.hpp
index 584d54b59f5..6fd1a5fba6b 100644
--- a/cpp/include/nvtext/ngrams_tokenize.hpp
+++ b/cpp/include/nvtext/ngrams_tokenize.hpp
@@ -72,7 +72,7 @@ namespace nvtext {
  *                  The default of empty string will separate tokens using whitespace.
  * @param separator The string to use for separating ngram tokens.
  *                  Default is "_" character.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings columns of tokens.
  */
 std::unique_ptr<cudf::column> ngrams_tokenize(
diff --git a/cpp/include/nvtext/normalize.hpp b/cpp/include/nvtext/normalize.hpp
index c68007f352e..e92718876aa 100644
--- a/cpp/include/nvtext/normalize.hpp
+++ b/cpp/include/nvtext/normalize.hpp
@@ -44,7 +44,7 @@ namespace nvtext {
  * for row `i` in the output column.
  *
  * @param strings Strings column to normalize.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings columns of normalized strings.
  */
 std::unique_ptr<cudf::column> normalize_spaces(
diff --git a/cpp/include/nvtext/tokenize.hpp b/cpp/include/nvtext/tokenize.hpp
index 764d3af1028..908d04af1d1 100644
--- a/cpp/include/nvtext/tokenize.hpp
+++ b/cpp/include/nvtext/tokenize.hpp
@@ -51,7 +51,7 @@ namespace nvtext {
  * @param strings Strings column tokenize.
  * @param delimiter UTF-8 characters used to separate each string into tokens.
  *                  The default of empty string will separate tokens using whitespace.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings columns of tokens.
  */
 std::unique_ptr<cudf::column> tokenize(
@@ -85,7 +85,7 @@ std::unique_ptr<cudf::column> tokenize(
  *
  * @param strings Strings column to tokenize.
  * @param delimiters Strings used to separate individual strings into tokens.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New strings columns of tokens.
  */
 std::unique_ptr<cudf::column> tokenize(
@@ -114,7 +114,7 @@ std::unique_ptr<cudf::column> tokenize(
  * @param strings Strings column to use for this operation.
  * @param delimiter Strings used to separate each string into tokens.
  *                  The default of empty string will separate tokens using whitespace.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New INT32 column of token counts.
  */
 std::unique_ptr<cudf::column> count_tokens(
@@ -144,7 +144,7 @@ std::unique_ptr<cudf::column> count_tokens(
  *
  * @param strings Strings column to use for this operation.
  * @param delimiters Strings used to separate each string into tokens.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @return New INT32 column of token counts.
  */
 std::unique_ptr<cudf::column> count_tokens(
diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp
index 727c68633a6..5186f0ff3c0 100644
--- a/cpp/src/binaryop/compiled/binary_ops.hpp
+++ b/cpp/src/binaryop/compiled/binary_ops.hpp
@@ -58,7 +58,7 @@ namespace compiled {
  * @param lhs         The left operand string scalar
  * @param rhs         The right operand string column
  * @param output_type The desired data type of the output column
- * @param mr          Memory resource for allocating output column
+ * @param mr          Device memory resource used to allocate the returned column
  * @param stream      CUDA stream on which to execute kernels
  * @return std::unique_ptr<column> Output column
  */
@@ -84,7 +84,7 @@ std::unique_ptr<column> binary_operation(
  * @param lhs         The left operand string column
  * @param rhs         The right operand string scalar
  * @param output_type The desired data type of the output column
- * @param mr          Memory resource for allocating output column
+ * @param mr          Device memory resource used to allocate the returned column
  * @param stream      CUDA stream on which to execute kernels
  * @return std::unique_ptr<column> Output column
  */
@@ -110,7 +110,7 @@ std::unique_ptr<column> binary_operation(
  * @param lhs         The left operand string column
  * @param rhs         The right operand string column
  * @param output_type The desired data type of the output column
- * @param mr          Memory resource for allocating output column
+ * @param mr          Device memory resource used to allocate the returned column
  * @param stream      CUDA stream on which to execute kernels
  * @return std::unique_ptr<column> Output column
  */
diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu
index 58e8457bf07..539ff306512 100644
--- a/cpp/src/dictionary/remove_keys.cu
+++ b/cpp/src/dictionary/remove_keys.cu
@@ -43,7 +43,7 @@ namespace {
  *                    and returns true if that key is to be used in the output dictionary.
  * @param dictionary_column The column to use for creating the new dictionary.
  * @param keys_to_keep_fn Called to determine which keys in `dictionary_column` to keep.
- * @param mr Resource for creating output columns.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream CUDA Stream for kernel calls.
  */
 template <typename KeysKeeper>
diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp
index 466a5d929c7..14999336de0 100644
--- a/cpp/src/groupby/sort/group_reductions.hpp
+++ b/cpp/src/groupby/sort/group_reductions.hpp
@@ -33,7 +33,7 @@ namespace detail {
  * @param values Grouped values to get sum of
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
- * @param mr Memory resource to allocate output with
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Stream to perform computation in
  */
 std::unique_ptr<column> group_sum(column_view const& values,
@@ -48,7 +48,7 @@ std::unique_ptr<column> group_sum(column_view const& values,
  * @param values Grouped values to get minimum from
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
- * @param mr Memory resource to allocate output with
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Stream to perform computation in
  */
 std::unique_ptr<column> group_min(column_view const& values,
@@ -63,7 +63,7 @@ std::unique_ptr<column> group_min(column_view const& values,
  * @param values Grouped values to get maximum from
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
- * @param mr Memory resource to allocate output with
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Stream to perform computation in
  */
 std::unique_ptr<column> group_max(column_view const& values,
@@ -79,7 +79,7 @@ std::unique_ptr<column> group_max(column_view const& values,
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
  * @param key_sort_order Indices indicating sort order of groupby keys
- * @param mr Memory resource to allocate output with
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Stream to perform computation in
  */
 std::unique_ptr<column> group_argmax(column_view const& values,
@@ -96,7 +96,7 @@ std::unique_ptr<column> group_argmax(column_view const& values,
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
  * @param key_sort_order Indices indicating sort order of groupby keys
- * @param mr Memory resource to allocate output with
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Stream to perform computation in
  */
 std::unique_ptr<column> group_argmin(column_view const& values,
@@ -113,7 +113,7 @@ std::unique_ptr<column> group_argmin(column_view const& values,
  * @param values Grouped values to get valid count of
  * @param group_labels ID of group that the corresponding value belongs to
  * @param num_groups Number of groups ( unique values in @p group_labels )
- * @param mr Memory resource to allocate output with
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Stream to perform computation in
  */
 std::unique_ptr<column> group_count_valid(column_view const& values,
@@ -127,7 +127,7 @@ std::unique_ptr<column> group_count_valid(column_view const& values,
  *
  * @param group_offsets Offsets of groups' starting points within @p values
  * @param num_groups Number of groups ( unique values in @p group_labels )
- * @param mr Memory resource to allocate output with
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Stream to perform computation in
  */
 std::unique_ptr<column> group_count_all(rmm::device_vector<size_type> const& group_offsets,
@@ -144,7 +144,7 @@ std::unique_ptr<column> group_count_all(rmm::device_vector<size_type> const& gro
  * @param group_labels ID of group corresponding value in @p values belongs to
  * @param ddof Delta degrees of freedom. The divisor used in calculation of
  *             `var` is `N - ddof`, where `N` is the group size.
- * @param mr Memory resource to allocate output with
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Stream to perform computation in
  */
 std::unique_ptr<column> group_var(column_view const& values,
@@ -163,7 +163,7 @@ std::unique_ptr<column> group_var(column_view const& values,
  * @param group_offsets Offsets of groups' starting points within @p values
  * @param quantiles List of quantiles q where q lies in [0,1]
  * @param interp Method to use when desired value lies between data points
- * @param mr Memory resource to allocate output with
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Stream to perform computation in
  */
 std::unique_ptr<column> group_quantiles(column_view const& values,
@@ -186,7 +186,7 @@ std::unique_ptr<column> group_quantiles(column_view const& values,
  * @param null_handling Exclude nulls while counting if null_policy::EXCLUDE,
  *  Include nulls if null_policy::INCLUDE.
  *  Nulls are treated equal.
- * @param mr Memory resource to allocate output with
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Stream to perform computation in
  */
 std::unique_ptr<column> group_nunique(column_view const& values,
@@ -208,7 +208,7 @@ std::unique_ptr<column> group_nunique(column_view const& values,
  * @param n nth element to choose from each group of @p values
  * @param null_handling Exclude nulls while counting if null_policy::EXCLUDE,
  *  Include nulls if null_policy::INCLUDE.
- * @param mr Memory resource to allocate output with
+ * @param mr Device memory resource used to allocate the returned column
  * @param stream Stream to perform computation in
  */
 std::unique_ptr<column> group_nth_element(column_view const& values,
diff --git a/cpp/src/io/avro/reader_impl.hpp b/cpp/src/io/avro/reader_impl.hpp
index b81b141d419..6bc1d8d577a 100644
--- a/cpp/src/io/avro/reader_impl.hpp
+++ b/cpp/src/io/avro/reader_impl.hpp
@@ -56,7 +56,7 @@ class reader::impl {
    *
    * @param source Dataset source
    * @param options Settings for controlling reading behavior
-   * @param mr Resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit impl(std::unique_ptr<datasource> source,
                 reader_options const &options,
diff --git a/cpp/src/io/csv/reader_impl.hpp b/cpp/src/io/csv/reader_impl.hpp
index 05c574d14d5..aeb726dab5e 100644
--- a/cpp/src/io/csv/reader_impl.hpp
+++ b/cpp/src/io/csv/reader_impl.hpp
@@ -74,7 +74,7 @@ class reader::impl {
    * @param source Dataset source
    * @param filepath Filepath if reading dataset from a file
    * @param options Settings for controlling reading behavior
-   * @param mr Resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit impl(std::unique_ptr<datasource> source,
                 std::string filepath,
diff --git a/cpp/src/io/csv/writer_impl.hpp b/cpp/src/io/csv/writer_impl.hpp
index 6664ea94759..68d03d05105 100644
--- a/cpp/src/io/csv/writer_impl.hpp
+++ b/cpp/src/io/csv/writer_impl.hpp
@@ -56,7 +56,7 @@ class writer::impl {
    *
    * @param sink Output sink
    * @param options Settings for controlling behavior
-   * @param mr Resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    **/
   impl(std::unique_ptr<data_sink> sink,
        writer_options const& options,
diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp
index 33bcb6a899c..429b58f5d78 100644
--- a/cpp/src/io/orc/reader_impl.hpp
+++ b/cpp/src/io/orc/reader_impl.hpp
@@ -59,7 +59,7 @@ class reader::impl {
    *
    * @param source Dataset source
    * @param options Settings for controlling reading behavior
-   * @param mr Resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit impl(std::unique_ptr<datasource> source,
                 reader_options const &options,
diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp
index ab12a1e419e..245cffb9a3f 100644
--- a/cpp/src/io/orc/writer_impl.hpp
+++ b/cpp/src/io/orc/writer_impl.hpp
@@ -71,7 +71,7 @@ class writer::impl {
    *
    * @param sink Output sink
    * @param options Settings for controlling behavior
-   * @param mr Resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    **/
   explicit impl(std::unique_ptr<data_sink> sink,
                 writer_options const& options,
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 3955ff35e03..961d1040f6d 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -56,7 +56,7 @@ class reader::impl {
    *
    * @param source Dataset source
    * @param options Settings for controlling reading behavior
-   * @param mr Resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   explicit impl(std::unique_ptr<datasource> source,
                 reader_options const &options,
diff --git a/cpp/src/io/parquet/writer_impl.hpp b/cpp/src/io/parquet/writer_impl.hpp
index 83a7d7ab840..b0f04d8b560 100644
--- a/cpp/src/io/parquet/writer_impl.hpp
+++ b/cpp/src/io/parquet/writer_impl.hpp
@@ -66,7 +66,7 @@ class writer::impl {
    *
    * @param filepath Filepath if storing dataset to a file
    * @param options Settings for controlling behavior
-   * @param mr Resource to use for device memory allocation
+   * @param mr Device memory resource to use for device memory allocation
    **/
   explicit impl(std::unique_ptr<data_sink> sink,
                 writer_options const& options,
diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp
index b4d9cebe3aa..8ae7b1b51c6 100644
--- a/cpp/src/io/utilities/column_buffer.hpp
+++ b/cpp/src/io/utilities/column_buffer.hpp
@@ -40,7 +40,7 @@ namespace detail {
  * @param size The number of elements to be represented by the mask
  * @param state The desired state of the mask
  * @param stream Optional stream to use for device memory alloc and kernels
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate the returned device_buffer
  *
  * @return `rmm::device_buffer` Device buffer allocation
  */
@@ -108,7 +108,7 @@ namespace {
  * @param size List of page information
  * @param size List of page information
  * @param stream Optional stream to use for device memory alloc and kernels
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource used to allocate the returned column
  *
  * @return `std::unique_ptr<cudf::column>` Column from the existing device data
  */
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 7c572a403a0..8d9e6ddf154 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -395,8 +395,7 @@ std::unique_ptr<experimental::table> construct_join_output_df(
  * full join.
  * Else, for every column in `left_on` and `right_on`, an output column will
  * be produced.
- * @param mr The memory resource that will be used for allocating
- * the device memory for the new table
+ * @param mr Device memory resource used to allocate the returned table
  * @param stream Optional, stream on which all memory allocations and copies
  * will be performed
  *
diff --git a/cpp/src/reductions/scan.cu b/cpp/src/reductions/scan.cu
index 7040cb0ad88..604c68ac986 100644
--- a/cpp/src/reductions/scan.cu
+++ b/cpp/src/reductions/scan.cu
@@ -184,7 +184,7 @@ struct ScanDispatcher {
    *
    * @param input     input column view
    * @param inclusive inclusive or exclusive scan
-   * @param mr The resource to use for all allocations
+   * @param mr Device memory resource used to allocate the returned column
    * @param stream The stream on which to execute all allocations and copies
    * @return
    *
diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu
index 5e6c8d78628..31da8a72262 100644
--- a/cpp/src/strings/attributes.cu
+++ b/cpp/src/strings/attributes.cu
@@ -41,7 +41,7 @@ namespace {
  * @param strings Strings instance for this operation.
  * @param ufn Function returns an integer for each string.
  * @param stream Stream to use for any kernels in this function.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column
  * @return New INT32 column with lengths for each string.
  */
 template <typename UnaryFunction>
diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu
index 48e5657365d..72a68cccbcc 100644
--- a/cpp/src/strings/case.cu
+++ b/cpp/src/strings/case.cu
@@ -124,7 +124,7 @@ struct upper_lower_fn {
  *
  * @param strings Strings to convert.
  * @param case_flag The character type to convert (upper, lower, or both)
- * @param mr Memory resource to use for allocation.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for any kernels launched.
  * @return New strings column with characters converted.
  */
diff --git a/cpp/src/strings/find.cu b/cpp/src/strings/find.cu
index c791f8f7ab2..7bd945bdef5 100644
--- a/cpp/src/strings/find.cu
+++ b/cpp/src/strings/find.cu
@@ -43,7 +43,7 @@ namespace {
  * @param start First character position to start the search.
  * @param stop Last character position (exclusive) to end the search.
  * @param pfn Functor used for locating `target` in each string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for kernel calls.
  * @return New integer column with character position values.
  */
@@ -166,7 +166,7 @@ namespace {
  * @param strings Column of strings to check for target.
  * @param target UTF-8 encoded string to check in strings column.
  * @param pfn Returns bool value if target is found in the given string.
- * @param mr Resource for allocating device memory.
+ * @param mr Device memory resource used to allocate the returned column.
  * @param stream Stream to use for kernel calls.
  * @return New BOOL column.
  */
diff --git a/cpp/src/strings/utilities.cuh b/cpp/src/strings/utilities.cuh
index da131568ada..9c81f195504 100644
--- a/cpp/src/strings/utilities.cuh
+++ b/cpp/src/strings/utilities.cuh
@@ -67,7 +67,7 @@ __device__ inline char* copy_string(char* buffer, const string_view& d_string)
  * chars memory.
  * @param strings_count Number of strings.
  * @param null_count Number of nulls in the strings column.
- * @param mr Memory resource to use.
+ * @param mr Device memory resource used to allocate the returned columns.
  * @param stream Stream to use for any kernel calls.
  * @return offsets child column and chars child column for a strings column
  */

From cee0229a4fe27727f589520c05eea221002fc9ae Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 11:07:50 +0530
Subject: [PATCH 27/90] remove unused device memory resource param

---
 cpp/include/cudf/io/functions.hpp                |  2 +-
 .../cudf/strings/char_types/char_types.hpp       |  8 ++------
 cpp/src/strings/char_types/char_types.cu         | 16 ++++++----------
 3 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/cpp/include/cudf/io/functions.hpp b/cpp/include/cudf/io/functions.hpp
index 997bea1427c..7b98a843494 100644
--- a/cpp/include/cudf/io/functions.hpp
+++ b/cpp/include/cudf/io/functions.hpp
@@ -269,7 +269,7 @@ struct read_csv_args {
  * @param args Settings for controlling reading behavior
  * @param mr Device memory resource used to allocate device memory of the table in the returned
  * table_with_metadata
- * 
+ *
  * @return The set of columns along with metadata
  */
 table_with_metadata read_csv(read_csv_args const& args,
diff --git a/cpp/include/cudf/strings/char_types/char_types.hpp b/cpp/include/cudf/strings/char_types/char_types.hpp
index 0933ef33c12..4ffe4c35601 100644
--- a/cpp/include/cudf/strings/char_types/char_types.hpp
+++ b/cpp/include/cudf/strings/char_types/char_types.hpp
@@ -134,11 +134,9 @@ std::unique_ptr<column> is_integer(
  * Any null entry or empty string will cause this function to return `false`.
  *
  * @param strings Strings instance for this operation.
- * @param mr Resource for allocating device memory.
  * @return true if all string are valid
  */
-bool all_integer(strings_column_view const& strings,
-                 rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+bool all_integer(strings_column_view const& strings);
 
 /**
  * @brief Returns a boolean column identifying strings in which all
@@ -174,11 +172,9 @@ std::unique_ptr<column> is_float(
  * Any null entry or empty string will cause this function to return `false`.
  *
  * @param strings Strings instance for this operation.
- * @param mr Resource for allocating device memory.
  * @return true if all string are valid
  */
-bool all_float(strings_column_view const& strings,
-               rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+bool all_float(strings_column_view const& strings);
 
 /** @} */  // end of doxygen group
 }  // namespace strings
diff --git a/cpp/src/strings/char_types/char_types.cu b/cpp/src/strings/char_types/char_types.cu
index 188e38ca824..46760545679 100644
--- a/cpp/src/strings/char_types/char_types.cu
+++ b/cpp/src/strings/char_types/char_types.cu
@@ -110,9 +110,7 @@ std::unique_ptr<column> is_integer(
   return results;
 }
 
-bool all_integer(strings_column_view const& strings,
-                 rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
-                 cudaStream_t stream                 = 0)
+bool all_integer(strings_column_view const& strings, cudaStream_t stream = 0)
 {
   auto strings_column  = column_device_view::create(strings.parent(), stream);
   auto d_column        = *strings_column;
@@ -155,9 +153,7 @@ std::unique_ptr<column> is_float(
   return results;
 }
 
-bool all_float(strings_column_view const& strings,
-               rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
-               cudaStream_t stream                 = 0)
+bool all_float(strings_column_view const& strings, cudaStream_t stream = 0)
 {
   auto strings_column  = column_device_view::create(strings.parent(), stream);
   auto d_column        = *strings_column;
@@ -199,16 +195,16 @@ std::unique_ptr<column> is_float(strings_column_view const& strings,
   return detail::is_float(strings, mr);
 }
 
-bool all_integer(strings_column_view const& strings, rmm::mr::device_memory_resource* mr)
+bool all_integer(strings_column_view const& strings)
 {
   CUDF_FUNC_RANGE();
-  return detail::all_integer(strings, mr);
+  return detail::all_integer(strings);
 }
 
-bool all_float(strings_column_view const& strings, rmm::mr::device_memory_resource* mr)
+bool all_float(strings_column_view const& strings)
 {
   CUDF_FUNC_RANGE();
-  return detail::all_float(strings, mr);
+  return detail::all_float(strings);
 }
 
 }  // namespace strings

From 17f524a2b1fc38cb07f1a157ca2ec19d3f7a7321 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 11:08:19 +0530
Subject: [PATCH 28/90] changelog entry for PR #5216

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4e5bc346d91..88c2ffe43b9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -182,6 +182,7 @@
 - PR #5214 Move docs build script into repository
 - PR #5155 Fix cudf documentation misspellings
 - PR #5214 Move docs build script into repository
+- PR #5216 Make documentation uniform for params
 
 ## Bug Fixes
 

From 9070aa8355af49553350b70d8be717d26df63744 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 15:32:21 +0530
Subject: [PATCH 29/90] update doc of param stream in scalar_factories

---
 cpp/include/cudf/scalar/scalar_factories.hpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp
index 300d4ce6ffe..5c7da826468 100644
--- a/cpp/include/cudf/scalar/scalar_factories.hpp
+++ b/cpp/include/cudf/scalar/scalar_factories.hpp
@@ -34,7 +34,7 @@ namespace cudf {
  * @throws cudf::logic_error if `type` is not a numeric type
  *
  * @param type The desired numeric element type
- * @param stream Optional stream on which to issue all memory allocations
+ * @param stream CUDA stream used for device memory operations.
  * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
  */
 std::unique_ptr<scalar> make_numeric_scalar(
@@ -50,7 +50,7 @@ std::unique_ptr<scalar> make_numeric_scalar(
  * @throws cudf::logic_error if `type` is not a timestamp type
  *
  * @param type The desired timestamp element type
- * @param stream Optional stream on which to issue all memory allocations
+ * @param stream CUDA stream used for device memory operations.
  * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
  */
 std::unique_ptr<scalar> make_timestamp_scalar(
@@ -66,7 +66,7 @@ std::unique_ptr<scalar> make_timestamp_scalar(
  * @throws cudf::logic_error if `type` is not a fixed-width type
  *
  * @param type The desired fixed-width element type
- * @param stream Optional stream on which to issue all memory allocations
+ * @param stream CUDA stream used for device memory operations.
  * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
  */
 std::unique_ptr<scalar> make_fixed_width_scalar(
@@ -82,7 +82,7 @@ std::unique_ptr<scalar> make_fixed_width_scalar(
  * @throws std::bad_alloc if device memory allocation fails
  *
  * @param string The `std::string` to copy to device
- * @param stream Optional stream for use with all memory allocations
+ * @param stream CUDA stream used for device memory operations.
  * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
  */
 std::unique_ptr<scalar> make_string_scalar(
@@ -104,7 +104,7 @@ std::unique_ptr<scalar> make_default_constructed_scalar(data_type type);
  *
  * @tparam T Datatype of the value to be represented by the scalar
  * @param value The value to store in the scalar object
- * @param stream Optional stream on which to issue all memory allocations
+ * @param stream CUDA stream used for device memory operations.
  * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
  */
 template <typename T>

From 0f4e9285c606f48f574026df17e943e2cdb1466e Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 15:40:55 +0530
Subject: [PATCH 30/90] update doc of param stream in scalar.hpp

---
 cpp/include/cudf/scalar/scalar.hpp | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp
index 181b3aaed1e..bae1d0f72f5 100644
--- a/cpp/include/cudf/scalar/scalar.hpp
+++ b/cpp/include/cudf/scalar/scalar.hpp
@@ -57,7 +57,7 @@ class scalar {
    * @brief Updates the validity of the value
    *
    * @param is_valid true: set the value to valid. false: set it to null
-   * @param stream The CUDA stream to do the operation in
+   * @param stream CUDA stream used for device memory operations.
    */
   void set_valid(bool is_valid, cudaStream_t stream = 0) { _is_valid.set_value(is_valid, stream); }
 
@@ -66,7 +66,7 @@ class scalar {
    *
    * @note Using the value when `is_valid() == false` is undefined behaviour
    *
-   * @param stream The CUDA stream to do the operation in
+   * @param stream CUDA stream used for device memory operations.
    * @return true Value is valid
    * @return false Value is invalid/null
    */
@@ -96,7 +96,7 @@ class scalar {
    *
    * @param type Data type of the scalar
    * @param is_valid Whether the value held by the scalar is valid
-   * @param stream The CUDA stream to do the allocation in
+   * @param stream CUDA stream used for device memory operations.
    * @param mr Device memory resource to use for allocation
    */
   scalar(data_type type,
@@ -126,7 +126,7 @@ class fixed_width_scalar : public scalar {
    * @brief Set the value of the scalar
    *
    * @param value New value of scalar
-   * @param stream The CUDA stream to do the operation in
+   * @param stream CUDA stream used for device memory operations.
    */
   void set_value(T value, cudaStream_t stream = 0)
   {
@@ -142,7 +142,7 @@ class fixed_width_scalar : public scalar {
   /**
    * @brief Get the value of the scalar
    *
-   * @param stream The CUDA stream to do the operation in
+   * @param stream CUDA stream used for device memory operations.
    */
   T value(cudaStream_t stream = 0) const { return _data.value(stream); }
 
@@ -166,7 +166,7 @@ class fixed_width_scalar : public scalar {
    *
    * @param value The initial value of the scalar
    * @param is_valid Whether the value held by the scalar is valid
-   * @param stream The CUDA stream to do the allocation in
+   * @param stream CUDA stream used for device memory operations.
    * @param mr Device memory resource to use for allocation
    */
   fixed_width_scalar(T value,
@@ -220,7 +220,7 @@ class numeric_scalar : public detail::fixed_width_scalar<T> {
    *
    * @param value The initial value of the scalar
    * @param is_valid Whether the value held by the scalar is valid
-   * @param stream The CUDA stream to do the allocation in
+   * @param stream CUDA stream used for device memory operations.
    * @param mr Device memory resource to use for allocation
    */
   numeric_scalar(T value,
@@ -267,7 +267,7 @@ class string_scalar : public scalar {
    *
    * @param value The value of the string
    * @param is_valid Whether the value held by the scalar is valid
-   * @param stream The CUDA stream to do the allocation in
+   * @param stream CUDA stream used for device memory operations.
    * @param mr Device memory resource to use for allocation
    */
   string_scalar(std::string const& string,
@@ -284,7 +284,7 @@ class string_scalar : public scalar {
    *
    * @param source string_view pointing string value to copy
    * @param is_valid Whether the value held by the scalar is valid
-   * @param stream The CUDA stream to do the allocation in
+   * @param stream CUDA stream used for device memory operations.
    * @param mr Device memory resource to use for allocation
    */
   string_scalar(value_type const& source,
@@ -301,7 +301,7 @@ class string_scalar : public scalar {
    *
    * @param data device_scalar string_view pointing string value to copy
    * @param is_valid Whether the value held by the scalar is valid
-   * @param stream The CUDA stream to do the allocation in
+   * @param stream CUDA stream used for device memory operations.
    * @param mr Device memory resource to use for allocation
    */
   string_scalar(rmm::device_scalar<value_type>& data,
@@ -320,14 +320,14 @@ class string_scalar : public scalar {
   /**
    * @brief Get the value of the scalar in a host std::string
    *
-   * @param stream The CUDA stream to do the operation in
+   * @param stream CUDA stream used for device memory operations.
    */
   std::string to_string(cudaStream_t stream = 0) const;
 
   /**
    * @brief Get the value of the scalar as a string_view
    *
-   * @param stream The CUDA stream to do the operation in
+   * @param stream CUDA stream used for device memory operations.
    */
   value_type value(cudaStream_t stream = 0) const { return value_type{data(), size()}; }
 
@@ -370,7 +370,7 @@ class timestamp_scalar : public detail::fixed_width_scalar<T> {
    *
    * @param value The initial value of the scalar
    * @param is_valid Whether the value held by the scalar is valid
-   * @param stream The CUDA stream to do the allocation in
+   * @param stream CUDA stream used for device memory operations.
    * @param mr Device memory resource to use for allocation
    */
   timestamp_scalar(T value,
@@ -386,7 +386,7 @@ class timestamp_scalar : public detail::fixed_width_scalar<T> {
    *
    * @param value Integer representing number of ticks since the UNIX epoch
    * @param is_valid Whether the value held by the scalar is valid
-   * @param stream The CUDA stream to do the allocation in
+   * @param stream CUDA stream used for device memory operations.
    * @param mr Device memory resource to use for allocation
    */
   timestamp_scalar(typename T::duration::rep value,

From f4058fa9fbed8b5ea898075f5f34589c8bb76e4d Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 15:43:50 +0530
Subject: [PATCH 31/90] update doc of param stream in column_factories

---
 cpp/include/cudf/column/column_factories.hpp | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
index de9a46702d9..78f15cdd589 100644
--- a/cpp/include/cudf/column/column_factories.hpp
+++ b/cpp/include/cudf/column/column_factories.hpp
@@ -239,8 +239,7 @@ std::unique_ptr<column> make_fixed_width_column(
  * @param strings The vector of pointer/size pairs.
  *                Each pointer must be a device memory address or `nullptr`
  * (indicating a null string). The size must be the number of bytes.
- * @param stream Optional stream for use with all memory allocation
- *               and device kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
  */
 std::unique_ptr<column> make_strings_column(
@@ -270,8 +269,7 @@ std::unique_ptr<column> make_strings_column(
  * bytes.
  * @param null_placeholder string_view indicating null string in given list of
  * string_views.
- * @param stream Optional stream for use with all memory allocation
- *               and device kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
  */
 std::unique_ptr<column> make_strings_column(
@@ -308,8 +306,7 @@ std::unique_ptr<column> make_strings_column(
  * @param null_count The number of null string entries. If equal to
  * `UNKNOWN_NULL_COUNT`, the null count will be computed dynamically on the
  * first invocation of `column::null_count()`
- * @param stream Optional stream for use with all memory allocation
- *               and device kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
  */
 std::unique_ptr<column> make_strings_column(
@@ -348,8 +345,7 @@ std::unique_ptr<column> make_strings_column(
  * @param null_count The number of null string entries. If equal to
  * `UNKNOWN_NULL_COUNT`, the null count will be computed dynamically on the
  * first invocation of `column::null_count()`
- * @param stream Optional stream for use with all memory allocation
- *               and device kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
  */
 std::unique_ptr<column> make_strings_column(
@@ -376,8 +372,7 @@ std::unique_ptr<column> make_strings_column(
  * @param null_count The number of null string entries.
  * @param null_mask The bits specifying the null strings in device memory.
  *                  Arrow format for nulls is used for interpeting this bitmask.
- * @param stream Optional stream for use with all memory allocation
- *               and device kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
  */
 std::unique_ptr<column> make_strings_column(
@@ -399,8 +394,7 @@ std::unique_ptr<column> make_strings_column(
  *
  * @param s The scalar to use for values in the column.
  * @param size The number of rows for the output column.
- * @param stream Optional stream for use with all memory allocation
- *               and device kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned column.
  */
 std::unique_ptr<column> make_column_from_scalar(

From 3bae00ddcf0cbb479ce4dc264bc5d64048f7e81e Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 15:48:05 +0530
Subject: [PATCH 32/90] update doc for param stream in column.hpp

---
 cpp/include/cudf/column/column.hpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp
index 280207d9550..bf552650193 100644
--- a/cpp/include/cudf/column/column.hpp
+++ b/cpp/include/cudf/column/column.hpp
@@ -58,7 +58,7 @@ class column {
    * and copies.
    *
    * @param other The `column` to copy
-   * @param stream The stream on which to execute all allocations and copies
+   * @param stream CUDA stream used for device memory operations.
    * @param mr Device memory resource to use for all allocations
    */
   column(column const& other,
@@ -113,8 +113,7 @@ class column {
    * This accounts for the `column_view`'s offset.
    *
    * @param view The view to copy
-   * @param stream The stream on which all allocations and copies will be
-   * executed
+   * @param stream CUDA stream used for device memory operations.
    * @param mr Device memory resource to use for all allocations
    */
   explicit column(column_view view,

From 063fc9daddaa7065852178abd994d34e5652b8b0 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 15:48:29 +0530
Subject: [PATCH 33/90] update doc for param stream in column_device_view.cuh

---
 cpp/include/cudf/column/column_device_view.cuh | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh
index 89f17c70406..3f447ac1eb7 100644
--- a/cpp/include/cudf/column/column_device_view.cuh
+++ b/cpp/include/cudf/column/column_device_view.cuh
@@ -373,8 +373,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
    * A `column_device_view` should be passed by value into GPU kernels.
    *
    * @param source_view The `column_view` to make usable in device code
-   * @param stream optional, stream on which the memory for children will be
-   * allocated
+   * @param stream CUDA stream used for device memory operations for children columns.
    * @return A `unique_ptr` to a `column_device_view` that makes the data from
    *`source_view` available in device memory.
    */
@@ -465,8 +464,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    * A `mutable_column_device_view` should be passed by value into GPU kernels.
    *
    * @param source_view The `column_view` to make usable in device code
-   * @param stream optional, stream on which the memory for children will be
-   * allocated
+   * @param stream CUDA stream used for device memory operations for children columns.
    * @return A `unique_ptr` to a `mutable_column_device_view` that makes the
    * data from `source_view` available in device memory.
    */

From 1ac169e43b247b74aa29cd16619ed78522a439ca Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 15:48:57 +0530
Subject: [PATCH 34/90] update doc for param stream for table.hpp

---
 cpp/include/cudf/table/table.hpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp
index b2fbf7f57ef..924a37746c0 100644
--- a/cpp/include/cudf/table/table.hpp
+++ b/cpp/include/cudf/table/table.hpp
@@ -54,8 +54,7 @@ class table {
    * @brief Copy the contents of a `table_view` to construct a new `table`.
    *
    * @param view The view whose contents will be copied to create a new `table`
-   * @param stream Optional, stream on which all memory allocations and copies
-   * will be performed
+   * @param stream CUDA stream used for device memory operations.
    * @param mr Device memory resource used for allocating the device memory for the new columns
    **/
   table(table_view view,

From a6d762faf38fd32b447ff712a89ca747c80d558a Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:11:57 +0530
Subject: [PATCH 35/90] update doc for param stream for string headers

---
 cpp/include/cudf/strings/copying.hpp             | 2 +-
 cpp/include/cudf/strings/sorting.hpp             | 2 +-
 cpp/include/cudf/strings/strings_column_view.hpp | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/include/cudf/strings/copying.hpp b/cpp/include/cudf/strings/copying.hpp
index 33a1e519172..d128f598f82 100644
--- a/cpp/include/cudf/strings/copying.hpp
+++ b/cpp/include/cudf/strings/copying.hpp
@@ -41,7 +41,7 @@ namespace detail {
  *            Default -1 indicates the last element.
  * @param step Increment value between indices.
  *             Default step is 1.
- * @param stream CUDA stream to use kernels in this method.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column of size (end-start)/step.
  */
diff --git a/cpp/include/cudf/strings/sorting.hpp b/cpp/include/cudf/strings/sorting.hpp
index c915433f51b..7116d9aabb3 100644
--- a/cpp/include/cudf/strings/sorting.hpp
+++ b/cpp/include/cudf/strings/sorting.hpp
@@ -39,7 +39,7 @@ enum sort_type {
  * @param stype Specify what attribute of the string to sort on.
  * @param order Sort strings in ascending or descending order.
  * @param null_order Sort nulls to the beginning or the end of the new column.
- * @param stream CUDA stream to use kernels in this method.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned column.
  * @return New strings column with sorted elements of this instance.
  */
diff --git a/cpp/include/cudf/strings/strings_column_view.hpp b/cpp/include/cudf/strings/strings_column_view.hpp
index 46fc1aeade7..eaf57bf3ed7 100644
--- a/cpp/include/cudf/strings/strings_column_view.hpp
+++ b/cpp/include/cudf/strings/strings_column_view.hpp
@@ -107,7 +107,7 @@ void print(strings_column_view const& strings,
  * The return pair is the vector of chars and the vector of offsets.
  *
  * @param strings Strings instance for this operation.
- * @param stream CUDA stream to use kernels in this method.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned device_vectors.
  * @return Pair containing a vector of chars and a vector of offsets.
  */

From 582c247bbbe9fc0daf56a4df8c5f52ff9536e469 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:12:30 +0530
Subject: [PATCH 36/90] update doc for param stream for string detail headers

---
 cpp/include/cudf/strings/detail/combine.hpp      |  4 ++--
 cpp/include/cudf/strings/detail/concatenate.hpp  |  2 +-
 cpp/include/cudf/strings/detail/converters.hpp   | 16 ++++++++--------
 cpp/include/cudf/strings/detail/copy_if_else.cuh |  2 +-
 cpp/include/cudf/strings/detail/copy_range.cuh   |  2 +-
 cpp/include/cudf/strings/detail/fill.hpp         |  2 +-
 cpp/include/cudf/strings/detail/gather.cuh       |  4 ++--
 cpp/include/cudf/strings/detail/merge.cuh        |  2 +-
 .../cudf/strings/detail/modify_strings.cuh       |  2 +-
 cpp/include/cudf/strings/detail/scatter.cuh      |  2 +-
 cpp/include/cudf/strings/detail/utilities.cuh    |  2 +-
 cpp/include/cudf/strings/detail/utilities.hpp    | 10 +++++-----
 12 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/cpp/include/cudf/strings/detail/combine.hpp b/cpp/include/cudf/strings/detail/combine.hpp
index c2420df2a2a..c45bc9558ed 100644
--- a/cpp/include/cudf/strings/detail/combine.hpp
+++ b/cpp/include/cudf/strings/detail/combine.hpp
@@ -28,7 +28,7 @@ namespace detail {
  * @copydoc concatenate(table_view const&,string_scalar const&,string_scalar
  * const&,rmm::mr::device_memory_resource*)
  *
- * @param stream Stream on which to run kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> concatenate(table_view const& strings_columns,
                                     string_scalar const& separator,
@@ -40,7 +40,7 @@ std::unique_ptr<column> concatenate(table_view const& strings_columns,
  * @copydoc join_strings(table_view const&,string_scalar const&,string_scalar
  * const&,rmm::mr::device_memory_resource*)
  *
- * @param stream Stream on which to run kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> join_strings(strings_column_view const& strings,
                                      string_scalar const& separator,
diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp
index b29e8db5d79..312132806f9 100644
--- a/cpp/include/cudf/strings/detail/concatenate.hpp
+++ b/cpp/include/cudf/strings/detail/concatenate.hpp
@@ -35,7 +35,7 @@ namespace detail {
  *
  * @param columns List of string columns to concatenate.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream CUDA stream to use for any kernels in this function.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New column with concatenated results.
  */
 std::unique_ptr<column> concatenate(
diff --git a/cpp/include/cudf/strings/detail/converters.hpp b/cpp/include/cudf/strings/detail/converters.hpp
index 3f8e9d3bf05..4741a4f31ba 100644
--- a/cpp/include/cudf/strings/detail/converters.hpp
+++ b/cpp/include/cudf/strings/detail/converters.hpp
@@ -26,7 +26,7 @@ namespace detail {
 /**
  * @copydoc to_integers(strings_column_view const&,data_type,rmm::mr::device_memory_resource*)
  *
- * @param stream Stream on which to execute kernels.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> to_integers(strings_column_view const& strings,
                                     data_type output_type,
@@ -36,7 +36,7 @@ std::unique_ptr<column> to_integers(strings_column_view const& strings,
 /**
  * @copydoc from_integers(strings_column_view const&,rmm::mr::device_memory_resource*)
  *
- * @param stream Stream on which to execute kernels.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> from_integers(column_view const& integers,
                                       cudaStream_t stream,
@@ -45,7 +45,7 @@ std::unique_ptr<column> from_integers(column_view const& integers,
 /**
  * @copydoc to_floats(strings_column_view const&,data_type,rmm::mr::device_memory_resource*)
  *
- * @param stream Stream on which to execute kernels.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> to_floats(strings_column_view const& strings,
                                   data_type output_type,
@@ -55,7 +55,7 @@ std::unique_ptr<column> to_floats(strings_column_view const& strings,
 /**
  * @copydoc from_floats(strings_column_view const&,rmm::mr::device_memory_resource*)
  *
- * @param stream Stream on which to execute kernels.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> from_floats(column_view const& floats,
                                     cudaStream_t stream,
@@ -65,7 +65,7 @@ std::unique_ptr<column> from_floats(column_view const& floats,
  * @copydoc to_booleans(strings_column_view const&,string_scalar
  * const&,rmm::mr::device_memory_resource*)
  *
- * @param stream Stream on which to execute kernels.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> to_booleans(strings_column_view const& strings,
                                     string_scalar const& true_string,
@@ -76,7 +76,7 @@ std::unique_ptr<column> to_booleans(strings_column_view const& strings,
  * @copydoc from_booleans(strings_column_view const&,string_scalar const&,string_scalar
  * const&,rmm::mr::device_memory_resource*)
  *
- * @param stream Stream on which to execute kernels.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> from_booleans(column_view const& booleans,
                                       string_scalar const& true_string,
@@ -88,7 +88,7 @@ std::unique_ptr<column> from_booleans(column_view const& booleans,
  * @copydoc to_timestamps(strings_column_view const&,data_type,std::string
  * const&,rmm::mr::device_memory_resource*)
  *
- * @param stream Stream on which to execute kernels.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<cudf::column> to_timestamps(strings_column_view const& strings,
                                             data_type timestamp_type,
@@ -100,7 +100,7 @@ std::unique_ptr<cudf::column> to_timestamps(strings_column_view const& strings,
  * @copydoc from_timestamps(strings_column_view const&,std::string
  * const&,rmm::mr::device_memory_resource*)
  *
- * @param stream Stream on which to execute kernels.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> from_timestamps(column_view const& timestamps,
                                         std::string const& format,
diff --git a/cpp/include/cudf/strings/detail/copy_if_else.cuh b/cpp/include/cudf/strings/detail/copy_if_else.cuh
index 8a60aaa6aee..5ff701cf460 100644
--- a/cpp/include/cudf/strings/detail/copy_if_else.cuh
+++ b/cpp/include/cudf/strings/detail/copy_if_else.cuh
@@ -45,7 +45,7 @@ namespace detail {
  * @param filter_fn Called to determine which iterator (lhs or rhs) to retrieve an entry for a
  * specific row.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream CUDA stream to use kernels in this method.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column.
  */
 template <typename StringPairIterLeft, typename StringPairIterRight, typename Filter>
diff --git a/cpp/include/cudf/strings/detail/copy_range.cuh b/cpp/include/cudf/strings/detail/copy_range.cuh
index 9b252d6f41a..2a3110b56df 100644
--- a/cpp/include/cudf/strings/detail/copy_range.cuh
+++ b/cpp/include/cudf/strings/detail/copy_range.cuh
@@ -90,7 +90,7 @@ namespace detail {
  * @param target_end The index of the last element in the target range
  * (exclusive)
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream CUDA stream to run this function
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return std::unique_ptr<column> The result target column
  */
 template <typename SourceValueIterator, typename SourceValidityIterator>
diff --git a/cpp/include/cudf/strings/detail/fill.hpp b/cpp/include/cudf/strings/detail/fill.hpp
index 8f47db846db..e1cb95c28bd 100644
--- a/cpp/include/cudf/strings/detail/fill.hpp
+++ b/cpp/include/cudf/strings/detail/fill.hpp
@@ -36,7 +36,7 @@ namespace detail {
  * @param end Last row index (exclusive).
  * @param value String to use when filling the range.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream CUDA stream to use for any kernels in this function.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column.
  */
 std::unique_ptr<column> fill(strings_column_view const& strings,
diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh
index af52b801a52..fdbd79750d7 100644
--- a/cpp/include/cudf/strings/detail/gather.cuh
+++ b/cpp/include/cudf/strings/detail/gather.cuh
@@ -46,7 +46,7 @@ namespace detail {
  * @param begin Start of index iterator.
  * @param end End of index iterator.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream CUDA stream to use kernels in this method.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column containing the gathered strings.
  */
 template <bool NullifyOutOfBounds, typename MapIterator>
@@ -123,7 +123,7 @@ std::unique_ptr<cudf::column> gather(
  * @param end End of index iterator.
  * @param nullify_out_of_bounds If true, indices outside the column's range are nullified.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream CUDA stream to use kernels in this method.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column containing the gathered strings.
  */
 template <typename MapIterator>
diff --git a/cpp/include/cudf/strings/detail/merge.cuh b/cpp/include/cudf/strings/detail/merge.cuh
index 4c4f49cb72f..f7c247b35a7 100644
--- a/cpp/include/cudf/strings/detail/merge.cuh
+++ b/cpp/include/cudf/strings/detail/merge.cuh
@@ -38,7 +38,7 @@ namespace detail {
  * @param rhs Second column.
  * @param row_order Indexes for each column.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream CUDA stream to use for any kernels in this function.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column.
  */
 template <typename index_type, typename row_order_iterator>
diff --git a/cpp/include/cudf/strings/detail/modify_strings.cuh b/cpp/include/cudf/strings/detail/modify_strings.cuh
index 6a5999f2f3a..97a59b89afc 100644
--- a/cpp/include/cudf/strings/detail/modify_strings.cuh
+++ b/cpp/include/cudf/strings/detail/modify_strings.cuh
@@ -43,7 +43,7 @@ namespace detail {
  * it is not modified in place; rather a new column is returned instead
  * @param mr Device memory resource used to allocate the returned column.
  * (cannot be a default argument because of the variadic pack);
- * @param stream Stream to use for any kernel calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * (cannot be a default argument because of the variadic pack);
  * @param ...args Additional arguments to be forwarded to
  * the probe / execute constructors (can be empty);
diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh
index 8911cf9c098..f7806921fb4 100644
--- a/cpp/include/cudf/strings/detail/scatter.cuh
+++ b/cpp/include/cudf/strings/detail/scatter.cuh
@@ -45,7 +45,7 @@ namespace detail {
  * @param target The set of columns into which values from the source column
  *        are to be scattered.
  * @param mr Device memory resource used to allocate the returned column
- * @param stream The stream to use for CUDA operations
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column.
  */
 template <typename SourceIterator, typename MapIterator>
diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh
index e96b7360835..9a24a33c0f8 100644
--- a/cpp/include/cudf/strings/detail/utilities.cuh
+++ b/cpp/include/cudf/strings/detail/utilities.cuh
@@ -34,7 +34,7 @@ namespace detail {
  * @param begin The beginning of the input sequence
  * @param end The end of the input sequence
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for any kernel calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return offsets child column for strings column
  */
 template <typename InputIterator>
diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp
index b88d5b66e4a..04680e59753 100644
--- a/cpp/include/cudf/strings/detail/utilities.hpp
+++ b/cpp/include/cudf/strings/detail/utilities.hpp
@@ -31,7 +31,7 @@ namespace detail {
  * @param null_count Number of null string entries in the column.
  * @param bytes Number of bytes for the chars column.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for any kernel calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return The chars child column for a strings column.
  */
 std::unique_ptr<column> create_chars_child_column(
@@ -45,7 +45,7 @@ std::unique_ptr<column> create_chars_child_column(
  * @brief Create a strings column with no strings.
  *
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for any kernel calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Empty strings column
  */
 std::unique_ptr<column> make_empty_strings_column(
@@ -55,7 +55,7 @@ std::unique_ptr<column> make_empty_strings_column(
  * @brief Creates a string_view vector from a strings column.
  *
  * @param strings Strings column instance.
- * @param stream Stream to execute any device code against.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Device vector of string_views
  */
 rmm::device_vector<string_view> create_string_vector_from_column(cudf::strings_column_view strings,
@@ -66,7 +66,7 @@ rmm::device_vector<string_view> create_string_vector_from_column(cudf::strings_c
  *
  * @param strings Strings column
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to execute any device code against.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Child offsets column
  */
 std::unique_ptr<cudf::column> child_offsets_from_string_vector(
@@ -81,7 +81,7 @@ std::unique_ptr<cudf::column> child_offsets_from_string_vector(
  * @param d_offsets Offsets vector for placing strings into column's memory.
  * @param null_count Number of null strings.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to execute any device code against.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Child chars column
  */
 std::unique_ptr<cudf::column> child_chars_from_string_vector(

From 028d0345ea894b20cc32e25d9c9b602507f3d153 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:13:19 +0530
Subject: [PATCH 37/90] update doc for param stream for string cu files

---
 cpp/src/strings/attributes.cu | 2 +-
 cpp/src/strings/case.cu       | 2 +-
 cpp/src/strings/find.cu       | 4 ++--
 cpp/src/strings/utilities.cuh | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu
index 31da8a72262..5f4dab419c3 100644
--- a/cpp/src/strings/attributes.cu
+++ b/cpp/src/strings/attributes.cu
@@ -40,7 +40,7 @@ namespace {
  * @tparam UnaryFunction Device function that returns an integer given a string_view.
  * @param strings Strings instance for this operation.
  * @param ufn Function returns an integer for each string.
- * @param stream Stream to use for any kernels in this function.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned column
  * @return New INT32 column with lengths for each string.
  */
diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu
index 72a68cccbcc..e9006f8ac0d 100644
--- a/cpp/src/strings/case.cu
+++ b/cpp/src/strings/case.cu
@@ -125,7 +125,7 @@ struct upper_lower_fn {
  * @param strings Strings to convert.
  * @param case_flag The character type to convert (upper, lower, or both)
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for any kernels launched.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column with characters converted.
  */
 std::unique_ptr<column> convert_case(strings_column_view const& strings,
diff --git a/cpp/src/strings/find.cu b/cpp/src/strings/find.cu
index 7bd945bdef5..4365f986334 100644
--- a/cpp/src/strings/find.cu
+++ b/cpp/src/strings/find.cu
@@ -44,7 +44,7 @@ namespace {
  * @param stop Last character position (exclusive) to end the search.
  * @param pfn Functor used for locating `target` in each string.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for kernel calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New integer column with character position values.
  */
 template <typename FindFunction>
@@ -167,7 +167,7 @@ namespace {
  * @param target UTF-8 encoded string to check in strings column.
  * @param pfn Returns bool value if target is found in the given string.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for kernel calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New BOOL column.
  */
 template <typename BoolFunction>
diff --git a/cpp/src/strings/utilities.cuh b/cpp/src/strings/utilities.cuh
index 9c81f195504..541f96c57b8 100644
--- a/cpp/src/strings/utilities.cuh
+++ b/cpp/src/strings/utilities.cuh
@@ -68,7 +68,7 @@ __device__ inline char* copy_string(char* buffer, const string_view& d_string)
  * @param strings_count Number of strings.
  * @param null_count Number of nulls in the strings column.
  * @param mr Device memory resource used to allocate the returned columns.
- * @param stream Stream to use for any kernel calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return offsets child column and chars child column for a strings column
  */
 template <typename SizeAndExecuteFunction>

From 3f9bfcce4099ee8b40f9bd5c47e33e5b0af9d651 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:20:46 +0530
Subject: [PATCH 38/90] update doc for param stream for dictionary headers

---
 cpp/include/cudf/detail/utilities/cuda.cuh           | 2 +-
 cpp/include/cudf/dictionary/detail/encode.hpp        | 6 ++----
 cpp/include/cudf/dictionary/detail/search.hpp        | 2 +-
 cpp/include/cudf/dictionary/detail/update_keys.hpp   | 8 ++++----
 cpp/include/cudf/dictionary/dictionary_factories.hpp | 3 +--
 5 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/cuda.cuh b/cpp/include/cudf/detail/utilities/cuda.cuh
index e5eb36898f8..2a33efae442 100644
--- a/cpp/include/cudf/detail/utilities/cuda.cuh
+++ b/cpp/include/cudf/detail/utilities/cuda.cuh
@@ -166,7 +166,7 @@ __global__ void single_thread_kernel(F f)
  *
  * @tparam Functor Device functor type
  * @param functor device functor object or device lambda function
- * @param stream stream to run the kernel
+ * @param stream CUDA stream used for the kernel launch
  */
 template <class Functor>
 void device_single_thread(Functor functor, cudaStream_t stream = 0)
diff --git a/cpp/include/cudf/dictionary/detail/encode.hpp b/cpp/include/cudf/dictionary/detail/encode.hpp
index 67e205b0229..555be4a6cfb 100644
--- a/cpp/include/cudf/dictionary/detail/encode.hpp
+++ b/cpp/include/cudf/dictionary/detail/encode.hpp
@@ -45,8 +45,7 @@ namespace detail {
  * @param column The column to dictionary encode.
  * @param indices_type The integer type to use for the indices.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Optional stream on which to issue all memory allocation and
- *               device kernels.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Returns a dictionary column.
  */
 std::unique_ptr<column> encode(
@@ -67,8 +66,7 @@ std::unique_ptr<column> encode(
  *
  * @param dictionary_column Existing dictionary column.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Optional stream on which to issue all memory allocation and
- *               device kernels.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New column with type matching the dictionary_column's keys.
  */
 std::unique_ptr<column> decode(
diff --git a/cpp/include/cudf/dictionary/detail/search.hpp b/cpp/include/cudf/dictionary/detail/search.hpp
index 551707b9d32..99a2024b793 100644
--- a/cpp/include/cudf/dictionary/detail/search.hpp
+++ b/cpp/include/cudf/dictionary/detail/search.hpp
@@ -25,7 +25,7 @@ namespace detail {
  * @copydoc cudf::dictionary::get_index(dictionary_column_view const&,scalar
  * const&,rmm::mr::device_memory_resource*)
  *
- * @param stream Optional CUDA stream on which to execute kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<numeric_scalar<int32_t>> get_index(
   dictionary_column_view const& dictionary,
diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp
index d5c9fdcb9f6..78ffff72484 100644
--- a/cpp/include/cudf/dictionary/detail/update_keys.hpp
+++ b/cpp/include/cudf/dictionary/detail/update_keys.hpp
@@ -28,7 +28,7 @@ namespace detail {
  * @param dictionary_column Existing dictionary column.
  * @param new_keys New keys to incorporate into the dictionary_column
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for any CUDA calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New dictionary column.
  */
 std::unique_ptr<column> add_keys(
@@ -44,7 +44,7 @@ std::unique_ptr<column> add_keys(
  * @param dictionary_column Existing dictionary column.
  * @param keys_to_remove The keys to remove from the dictionary_column
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for any CUDA calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New dictionary column.
  */
 std::unique_ptr<column> remove_keys(
@@ -59,7 +59,7 @@ std::unique_ptr<column> remove_keys(
  *
  * @param dictionary_column Existing dictionary column.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for any CUDA calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New dictionary column.
  */
 std::unique_ptr<column> remove_unused_keys(
@@ -74,7 +74,7 @@ std::unique_ptr<column> remove_unused_keys(
  * @param dictionary_column Existing dictionary column.
  * @param keys New keys to use for the output column. Must not contain nulls.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for any CUDA calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New dictionary column.
  */
 std::unique_ptr<column> set_keys(
diff --git a/cpp/include/cudf/dictionary/dictionary_factories.hpp b/cpp/include/cudf/dictionary/dictionary_factories.hpp
index 62ec8ebc0a4..ad962105c02 100644
--- a/cpp/include/cudf/dictionary/dictionary_factories.hpp
+++ b/cpp/include/cudf/dictionary/dictionary_factories.hpp
@@ -51,8 +51,7 @@ namespace cudf {
  * @param keys_column Column of unique, ordered values to use as the new dictionary column's keys.
  * @param indices_column Indices to use for the new dictionary column.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Optional stream on which to issue all memory allocation and
- *               device kernels.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New dictionary column.
  */
 std::unique_ptr<column> make_dictionary_column(

From ac624a265f18d3e96f10f9d34132e8efaa719e6e Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:21:09 +0530
Subject: [PATCH 39/90] update doc for param steam in dictionary cu file

---
 cpp/src/dictionary/remove_keys.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu
index 539ff306512..0001155868f 100644
--- a/cpp/src/dictionary/remove_keys.cu
+++ b/cpp/src/dictionary/remove_keys.cu
@@ -44,7 +44,7 @@ namespace {
  * @param dictionary_column The column to use for creating the new dictionary.
  * @param keys_to_keep_fn Called to determine which keys in `dictionary_column` to keep.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream CUDA Stream for kernel calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 template <typename KeysKeeper>
 std::unique_ptr<column> remove_keys_fn(

From 9fb327cb7c513801f0cfe1e59d9bf3546f5d2643 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:23:56 +0530
Subject: [PATCH 40/90] update doc for param stream in reduction functions

---
 .../cudf/detail/reduction_functions.hpp       | 20 +++++++++----------
 cpp/src/reductions/scan.cu                    |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp
index fd4815343f9..8f61abbdd6c 100644
--- a/cpp/include/cudf/detail/reduction_functions.hpp
+++ b/cpp/include/cudf/detail/reduction_functions.hpp
@@ -33,7 +33,7 @@ namespace reduction {
  * @param col input column to compute sum
  * @param output_dtype data type of return type and typecast elements of input column
  * @param mr Device memory resource used to allocate the returned scalar
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Sum as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> sum(column_view const& col,
@@ -50,7 +50,7 @@ std::unique_ptr<scalar> sum(column_view const& col,
  * @param col input column to compute minimum.
  * @param output_dtype data type of return type and typecast elements of input column
  * @param mr Device memory resource used to allocate the returned scalar
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Minimum element as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> min(column_view const& col,
@@ -67,7 +67,7 @@ std::unique_ptr<scalar> min(column_view const& col,
  * @param col input column to compute maximum.
  * @param output_dtype data type of return type and typecast elements of input column
  * @param mr Device memory resource used to allocate the returned scalar
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Maximum element as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> max(column_view const& col,
@@ -85,7 +85,7 @@ std::unique_ptr<scalar> max(column_view const& col,
  * @param col input column to compute any_of.
  * @param output_dtype data type of return type and typecast elements of input column
  * @param mr Device memory resource used to allocate the returned scalar
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return bool scalar if any of elements is true when typecasted to bool
  */
 std::unique_ptr<scalar> any(column_view const& col,
@@ -103,7 +103,7 @@ std::unique_ptr<scalar> any(column_view const& col,
  * @param col input column to compute all_of.
  * @param output_dtype data type of return type and typecast elements of input column
  * @param mr Device memory resource used to allocate the returned scalar
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return bool scalar if all of elements is true when typecasted to bool
  */
 std::unique_ptr<scalar> all(column_view const& col,
@@ -121,7 +121,7 @@ std::unique_ptr<scalar> all(column_view const& col,
  * @param col input column to compute product.
  * @param output_dtype data type of return type and typecast elements of input column
  * @param mr Device memory resource used to allocate the returned scalar
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Product as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> product(
@@ -141,7 +141,7 @@ std::unique_ptr<scalar> product(
  * @param col input column to compute sum of squares.
  * @param output_dtype data type of return type and typecast elements of input column
  * @param mr Device memory resource used to allocate the returned scalar
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Sum of squares as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> sum_of_squares(
@@ -161,7 +161,7 @@ std::unique_ptr<scalar> sum_of_squares(
  * @param col input column to compute mean.
  * @param output_dtype data type of return type and typecast elements of input column.
  * @param mr Device memory resource used to allocate the returned scalar.
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Mean as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> mean(column_view const& col,
@@ -180,7 +180,7 @@ std::unique_ptr<scalar> mean(column_view const& col,
  * @param col input column to compute variance.
  * @param output_dtype data type of return type and typecast elements of input column.
  * @param mr Device memory resource used to allocate the returned scalar.
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Variance as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> variance(
@@ -201,7 +201,7 @@ std::unique_ptr<scalar> variance(
  * @param col input column to compute standard deviation.
  * @param output_dtype data type of return type and typecast elements of input column.
  * @param mr Device memory resource used to allocate the returned scalar.
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Standard deviation as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> standard_deviation(
diff --git a/cpp/src/reductions/scan.cu b/cpp/src/reductions/scan.cu
index bf2e0f3635b..72b9998921c 100644
--- a/cpp/src/reductions/scan.cu
+++ b/cpp/src/reductions/scan.cu
@@ -185,7 +185,7 @@ struct ScanDispatcher {
    * @param input     input column view
    * @param inclusive inclusive or exclusive scan
    * @param mr Device memory resource used to allocate the returned column
-   * @param stream The stream on which to execute all allocations and copies
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    * @return
    *
    * @tparam T type of input column

From c0215a53b903a450282c6566be92905987f8371c Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:26:27 +0530
Subject: [PATCH 41/90] update doc for param stream in null_mask.hpp

---
 cpp/include/cudf/null_mask.hpp | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp
index 2ac2ee2736b..c53a4f8fb3d 100644
--- a/cpp/include/cudf/null_mask.hpp
+++ b/cpp/include/cudf/null_mask.hpp
@@ -74,8 +74,7 @@ size_type num_bitmask_words(size_type number_of_bits);
  *
  * @param size The number of elements to be represented by the mask
  * @param state The desired state of the mask
- * @param stream Optional, stream on which all memory allocations/operations
- * will be submitted
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned device_buffer.
  * @return rmm::device_buffer A `device_buffer` for use as a null bitmask
  * satisfying the desired size and state
@@ -97,8 +96,7 @@ rmm::device_buffer create_null_mask(
  * @param begin_bit Index of the first bit to set (inclusive)
  * @param end_bit Index of the last bit to set (exclusive)
  * @param valid If true set all entries to valid; otherwise, set all to null.
- * @param stream Optional, stream on which all memory allocations/operations
- * will be submitted
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  **/
 void set_null_mask(bitmask_type* bitmask,
                    size_type begin_bit,
@@ -188,8 +186,7 @@ std::vector<size_type> segmented_count_unset_bits(bitmask_type const* bitmask,
  * @param mask Bitmask residing in device memory whose bits will be copied
  * @param begin_bit Index of the first bit to be copied (inclusive)
  * @param end_bit Index of the last bit to be copied (exclusive)
- * @param stream Optional, stream on which all memory allocations and copies
- * will be performed
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned device_buffer
  * @return rmm::device_buffer A `device_buffer` containing the bits
  * `[begin_bit, end_bit)` from `mask`.
@@ -208,8 +205,7 @@ rmm::device_buffer copy_bitmask(
  * Returns empty `device_buffer` if the column is not nullable
  *
  * @param view Column view whose bitmask needs to be copied
- * @param stream Optional, stream on which all memory allocations and copies
- * will be performed
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned device_buffer
  * @return rmm::device_buffer A `device_buffer` containing the bits
  * `[view.offset(), view.offset() + view.size())` from `view`'s bitmask.
@@ -226,7 +222,7 @@ rmm::device_buffer copy_bitmask(
  * If no column in the table is nullable, an empty bitmask is returned.
  *
  * @param view The table of columns
- * @param stream CUDA stream on which to execute kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned device_buffer
  * @return rmm::device_buffer Output bitmask
  */

From 9b8e8c0d805b3ed5475455d91aeea80f63c30c3c Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:31:31 +0530
Subject: [PATCH 42/90] update doc for param stream for io readers.hpp

---
 cpp/include/cudf/io/readers.hpp | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/cpp/include/cudf/io/readers.hpp b/cpp/include/cudf/io/readers.hpp
index 84a96652acb..76b47db20d0 100644
--- a/cpp/include/cudf/io/readers.hpp
+++ b/cpp/include/cudf/io/readers.hpp
@@ -115,7 +115,7 @@ class reader {
   /**
    * @brief Reads the entire dataset.
    *
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with table metadata
    */
@@ -127,7 +127,7 @@ class reader {
    * @param skip_rows Number of rows to skip from the start
    * @param num_rows Number of rows to read; use `0` for all remaining data
    * @param metadata Optional location to return table metadata
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with table metadata
    */
@@ -395,7 +395,7 @@ class reader {
    *
    * @param offset Byte offset from the start
    * @param size Number of bytes from the offset; set to 0 for all remaining
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with table metadata
    */
@@ -407,7 +407,7 @@ class reader {
    * @param skip_rows Number of rows to skip from the start
    * @param skip_rows_end Number of rows to skip from the end
    * @param num_rows Number of rows to read; use `0` for all remaining data
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with table metadata
    */
@@ -510,7 +510,7 @@ class reader {
   /**
    * @brief Reads the entire dataset.
    *
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with table metadata
    */
@@ -521,7 +521,7 @@ class reader {
    *
    * @param stripe Index of the stripe
    * @param stripe_count Number of stripes to read
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with table metadata
    *
@@ -535,7 +535,7 @@ class reader {
    * @brief Reads and returns specific stripes.
    *
    * @param stripe_list Indices of the stripes to read
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with table metadata
    *
@@ -549,7 +549,7 @@ class reader {
    *
    * @param skip_rows Number of rows to skip from the start
    * @param num_rows Number of rows to read; use `0` for all remaining data
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with table metadata
    */
@@ -643,7 +643,7 @@ class reader {
   /**
    * @brief Reads the entire dataset.
    *
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with table metadata
    */
@@ -654,7 +654,7 @@ class reader {
    *
    * @param row_group Index of the row group
    * @param row_group_count Number of row groups to read
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with table metadata
    *
@@ -668,7 +668,7 @@ class reader {
    * @brief Reads specific row groups.
    *
    * @param row_group_list Indices of the row groups
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with table metadata
    *
@@ -682,7 +682,7 @@ class reader {
    *
    * @param skip_rows Number of rows to skip from the start
    * @param num_rows Number of rows to read; use `0` for all remaining data
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with table metadata
    */

From 0b05b0f73e4d66084142e5442cf489dabc2f97b2 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:32:15 +0530
Subject: [PATCH 43/90] update doc for param stream for io writers.hpp

---
 cpp/include/cudf/io/writers.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/include/cudf/io/writers.hpp b/cpp/include/cudf/io/writers.hpp
index 1784b14ab59..5b79f1ff495 100644
--- a/cpp/include/cudf/io/writers.hpp
+++ b/cpp/include/cudf/io/writers.hpp
@@ -93,7 +93,7 @@ class writer {
    *
    * @param table Set of columns to output
    * @param metadata Table metadata and column names
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   void write_all(table_view const& table,
                  const table_metadata* metadata = nullptr,
@@ -181,7 +181,7 @@ class writer {
    * @param metadata Table metadata and column names
    * @param return_filemetadata If true, return the raw file metadata
    * @param metadata_out_file_path Column chunks file path to be set in the raw output metadata
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   std::unique_ptr<std::vector<uint8_t>> write_all(table_view const& table,
                                                   const table_metadata* metadata = nullptr,
@@ -344,7 +344,7 @@ class writer {
    *
    * @param table Set of columns to output
    * @param metadata Table metadata and column names
-   * @param stream Optional stream to use for device memory alloc and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   void write_all(table_view const& table,
                  const table_metadata* metadata = nullptr,

From 2eeab9d7c134be95760546cca5f78eeae252d33b Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:40:30 +0530
Subject: [PATCH 44/90] update doc of param stream for csv headers

---
 cpp/src/io/csv/csv_gpu.h       | 6 +++---
 cpp/src/io/csv/reader_impl.hpp | 8 ++++----
 cpp/src/io/csv/writer_impl.hpp | 8 ++++----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/cpp/src/io/csv/csv_gpu.h b/cpp/src/io/csv/csv_gpu.h
index f8dcd5a3de3..b81907a5e17 100644
--- a/cpp/src/io/csv/csv_gpu.h
+++ b/cpp/src/io/csv/csv_gpu.h
@@ -143,7 +143,7 @@ inline __host__ __device__ rowctx64_t select_row_context(rowctx64_t sel_ctx,
  * @param skip_rows Number of rows to skip (ignored in phase 1)
  * @param num_row_offsets Number of entries in offsets_out array
  * @param options Options that control parsing of individual fields
- * @param stream CUDA stream to use, default 0
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  *
  * @return Number of row contexts
  **/
@@ -167,7 +167,7 @@ uint32_t gather_row_offsets(uint64_t *row_ctx,
  * @param row_offsets Row offsets in the character data buffer
  * @param d_data Character data buffer
  * @param options Options that control parsing of individual fields
- * @param stream CUDA stream to use, default 0
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  *
  **/
 size_t count_blank_rows(rmm::device_vector<uint64_t> const &row_offsets,
@@ -181,7 +181,7 @@ size_t count_blank_rows(rmm::device_vector<uint64_t> const &row_offsets,
  * @param row_offsets Row offsets in the character data buffer
  * @param d_data Character data buffer
  * @param options Options that control parsing of individual fields
- * @param stream CUDA stream to use, default 0
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  *
  **/
 void remove_blank_rows(rmm::device_vector<uint64_t> &row_offsets,
diff --git a/cpp/src/io/csv/reader_impl.hpp b/cpp/src/io/csv/reader_impl.hpp
index aeb726dab5e..2f6dfa701d9 100644
--- a/cpp/src/io/csv/reader_impl.hpp
+++ b/cpp/src/io/csv/reader_impl.hpp
@@ -90,7 +90,7 @@ class reader::impl {
    * @param skip_rows_end Number of rows to skip from the end
    * @param num_rows Number of rows to read
    * @param metadata Optional location to return table metadata
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with metadata
    */
@@ -116,7 +116,7 @@ class reader::impl {
    * @param skip_rows Number of rows to skip from the start
    * @param num_rows Number of rows to read; -1: all remaining data
    * @param load_whole_file Hint that the entire data will be needed on gpu
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   void gather_row_offsets(const char *h_data,
                           size_t h_size,
@@ -140,7 +140,7 @@ class reader::impl {
   /**
    * @brief Returns a detected or parsed list of column dtypes.
    *
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return `std::vector<data_type>` List of column types
    */
@@ -151,7 +151,7 @@ class reader::impl {
    *
    * @param column_types Column types
    * @param out_buffers Output columns' device buffers
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   void decode_data(std::vector<data_type> const &column_types,
                    std::vector<column_buffer> &out_buffers,
diff --git a/cpp/src/io/csv/writer_impl.hpp b/cpp/src/io/csv/writer_impl.hpp
index 68d03d05105..ed87cb3c1c2 100644
--- a/cpp/src/io/csv/writer_impl.hpp
+++ b/cpp/src/io/csv/writer_impl.hpp
@@ -67,7 +67,7 @@ class writer::impl {
    *
    * @param table The set of columns
    * @param metadata The metadata associated with the table
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void write(table_view const& table,
              const table_metadata* metadata = nullptr,
@@ -78,7 +78,7 @@ class writer::impl {
    *
    * @param table The set of columns
    * @param metadata The metadata associated with the table
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void write_chunked_begin(table_view const& table,
                            const table_metadata* metadata = nullptr,
@@ -89,7 +89,7 @@ class writer::impl {
    *
    * @param strings_column Subset of columns converted to string to be written.
    * @param metadata The metadata associated with the table
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void write_chunked(strings_column_view const& strings_column,
                      const table_metadata* metadata = nullptr,
@@ -100,7 +100,7 @@ class writer::impl {
    *
    * @param table The set of columns
    * @param metadata The metadata associated with the table
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void write_chunked_end(table_view const& table,
                          const table_metadata* metadata = nullptr,

From f2b2bf55336a47a451f5fa7e55e4482164aef3ca Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:41:11 +0530
Subject: [PATCH 45/90] update doc of param stream for orc headers

---
 cpp/src/io/orc/reader_impl.hpp |  6 +++---
 cpp/src/io/orc/writer_impl.hpp | 14 +++++++-------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp
index 429b58f5d78..0cd3df1b3db 100644
--- a/cpp/src/io/orc/reader_impl.hpp
+++ b/cpp/src/io/orc/reader_impl.hpp
@@ -73,7 +73,7 @@ class reader::impl {
    * @param stripe Stripe index to select
    * @param max_stripe_count Max number of consecutive stripes if greater than 0
    * @param stripe_indices Indices of individual stripes to load if non-null [max_stripe_count]
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with metadata
    */
@@ -95,7 +95,7 @@ class reader::impl {
    * @param num_stripes Number of stripes making up column chunks
    * @param row_groups List of row index descriptors
    * @param row_index_stride Distance between each row index
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return Device buffer to decompressed page data
    */
@@ -119,7 +119,7 @@ class reader::impl {
    * @param row_groups List of row index descriptors
    * @param row_index_stride Distance between each row index
    * @param out_buffers Output columns' device buffers
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   void decode_stream_data(hostdevice_vector<gpu::ColumnDesc> &chunks,
                           size_t num_dicts,
diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp
index 245cffb9a3f..84ec1db7366 100644
--- a/cpp/src/io/orc/writer_impl.hpp
+++ b/cpp/src/io/orc/writer_impl.hpp
@@ -82,7 +82,7 @@ class writer::impl {
    *
    * @param table The set of columns
    * @param metadata The metadata associated with the table
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void write(table_view const& table, const table_metadata* metadata, cudaStream_t stream);
 
@@ -121,7 +121,7 @@ class writer::impl {
    * @param dict_data Dictionary data memory
    * @param dict_index Dictionary index memory
    * @param dict List of dictionary chunks
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void init_dictionaries(orc_column_view* columns,
                          size_t num_rows,
@@ -141,7 +141,7 @@ class writer::impl {
    * @param dict List of dictionary chunks
    * @param dict_index List of dictionary indices
    * @param stripe_dict List of stripe dictionaries
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void build_dictionaries(orc_column_view* columns,
                           size_t num_rows,
@@ -182,7 +182,7 @@ class writer::impl {
    * @param streams List of columns' index and data streams
    * @param strm_ids List of unique stream identifiers
    * @param chunks List of column data chunks
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return Device buffer containing encoded data
    **/
@@ -208,7 +208,7 @@ class writer::impl {
    * @param stripe_list List of stripe boundaries
    * @param chunks List of column data chunks
    * @param strm_desc List of stream descriptors
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The stripes' information
    **/
@@ -232,7 +232,7 @@ class writer::impl {
    * @param stripe_list List of stripe boundaries
    * @param stripes Stripe information
    * @param chunks List of column data chunks
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The statistic blobs
    **/
@@ -285,7 +285,7 @@ class writer::impl {
    * @param stream_out Temporary host output buffer
    * @param stripe Stream's parent stripe
    * @param streams List of all streams
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void write_data_stream(gpu::StripeStream const& strm_desc,
                          gpu::EncChunk const& chunk,

From 64f56c5229199e762956cea0358da01d17a9733e Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:41:32 +0530
Subject: [PATCH 46/90] update doc of param stream for parquet headers

---
 cpp/src/io/parquet/reader_impl.hpp | 12 ++++++------
 cpp/src/io/parquet/writer_impl.hpp | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 961d1040f6d..1fb24f69e85 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -70,7 +70,7 @@ class reader::impl {
    * @param row_group Row group index to select
    * @param max_rowgroup_count Max number of consecutive row groups if greater than 0
    * @param row_group_indices if non-null, indices of rowgroups to read [max_rowgroup_count]
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with metadata
    */
@@ -90,7 +90,7 @@ class reader::impl {
    * @param begin_chunk Index of first column chunk to read
    * @param end_chunk Index after the last column chunk to read
    * @param column_chunk_offsets File offset for all chunks
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    */
   void read_column_chunks(std::vector<rmm::device_buffer> &page_data,
@@ -104,7 +104,7 @@ class reader::impl {
    * @brief Returns the number of total pages from the given column chunks
    *
    * @param chunks List of column chunk descriptors
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The total number of pages
    */
@@ -115,7 +115,7 @@ class reader::impl {
    *
    * @param chunks List of column chunk descriptors
    * @param pages List of page information
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   void decode_page_headers(hostdevice_vector<gpu::ColumnChunkDesc> &chunks,
                            hostdevice_vector<gpu::PageInfo> &pages,
@@ -126,7 +126,7 @@ class reader::impl {
    *
    * @param chunks List of column chunk descriptors
    * @param pages List of page information
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return Device buffer to decompressed page data
    */
@@ -143,7 +143,7 @@ class reader::impl {
    * @param total_rows Number of rows to output
    * @param chunk_map Mapping between chunk and column
    * @param out_buffers Output columns' device buffers
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   void decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc> &chunks,
                         hostdevice_vector<gpu::PageInfo> &pages,
diff --git a/cpp/src/io/parquet/writer_impl.hpp b/cpp/src/io/parquet/writer_impl.hpp
index b0f04d8b560..048ec2b1574 100644
--- a/cpp/src/io/parquet/writer_impl.hpp
+++ b/cpp/src/io/parquet/writer_impl.hpp
@@ -79,7 +79,7 @@ class writer::impl {
    * @param metadata The metadata associated with the table
    * @param return_filemetadata If true, return the raw parquet file metadata
    * @param metadata_out_file_path Column chunks file path to be set in the raw output metadata
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    * @return unique_ptr to FileMetadata thrift message if requested
    **/
   std::unique_ptr<std::vector<uint8_t>> write(table_view const& table,
@@ -129,7 +129,7 @@ class writer::impl {
    * @param num_fragments Total number of fragments per column
    * @param num_rows Total number of rows
    * @param fragment_size Number of rows per fragment
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void init_page_fragments(hostdevice_vector<gpu::PageFragment>& frag,
                            hostdevice_vector<gpu::EncColumnDesc>& col_desc,
@@ -147,7 +147,7 @@ class writer::impl {
    * @param num_columns Total number of columns
    * @param num_fragments Total number of fragments per column
    * @param fragment_size Number of rows per fragment
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void gather_fragment_statistics(statistics_chunk* dst_stats,
                                   hostdevice_vector<gpu::PageFragment>& frag,
@@ -164,7 +164,7 @@ class writer::impl {
    * @param num_rowgroups Total number of rowgroups
    * @param num_columns Total number of columns
    * @param num_dictionaries Total number of dictionaries
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void build_chunk_dictionaries(hostdevice_vector<gpu::EncColumnChunk>& chunks,
                                 hostdevice_vector<gpu::EncColumnDesc>& col_desc,
@@ -182,7 +182,7 @@ class writer::impl {
    * @param num_columns Total number of columns
    * @param num_pages Total number of pages
    * @param num_stats_bfr Number of statistics buffers
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void init_encoder_pages(hostdevice_vector<gpu::EncColumnChunk>& chunks,
                           hostdevice_vector<gpu::EncColumnDesc>& col_desc,
@@ -208,7 +208,7 @@ class writer::impl {
    * @param comp_out compressor status array
    * @param page_stats optional page-level statistics (nullptr if none)
    * @param chunk_stats optional chunk-level statistics (nullptr if none)
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void encode_pages(hostdevice_vector<gpu::EncColumnChunk>& chunks,
                     gpu::EncPage* pages,

From 5fdba29a0302b724602bfdd45da7268cb7d81f8a Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:41:59 +0530
Subject: [PATCH 47/90] update doc of param stream for misc io headers

---
 cpp/src/io/avro/reader_impl.hpp        | 6 +++---
 cpp/src/io/utilities/column_buffer.hpp | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cpp/src/io/avro/reader_impl.hpp b/cpp/src/io/avro/reader_impl.hpp
index 6bc1d8d577a..f00e4c37ef8 100644
--- a/cpp/src/io/avro/reader_impl.hpp
+++ b/cpp/src/io/avro/reader_impl.hpp
@@ -67,7 +67,7 @@ class reader::impl {
    *
    * @param skip_rows Number of rows to skip from the start
    * @param num_rows Number of rows to read
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return The set of columns along with metadata
    */
@@ -78,7 +78,7 @@ class reader::impl {
    * @brief Decompresses the block data.
    *
    * @param comp_block_data Compressed block data
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return Device buffer to decompressed block data
    */
@@ -93,7 +93,7 @@ class reader::impl {
    * @param global_dictionary Dictionary allocation
    * @param total_dictionary_entries Number of dictionary entries
    * @param out_buffers Output columns' device buffers
-   * @param stream Stream to use for memory allocation and kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   void decode_data(const rmm::device_buffer &block_data,
                    const std::vector<std::pair<uint32_t, uint32_t>> &dict,
diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp
index 8ae7b1b51c6..b0a9ca38fd3 100644
--- a/cpp/src/io/utilities/column_buffer.hpp
+++ b/cpp/src/io/utilities/column_buffer.hpp
@@ -39,7 +39,7 @@ namespace detail {
  * @param type The intended data type to populate
  * @param size The number of elements to be represented by the mask
  * @param state The desired state of the mask
- * @param stream Optional stream to use for device memory alloc and kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned device_buffer
  *
  * @return `rmm::device_buffer` Device buffer allocation
@@ -107,7 +107,7 @@ namespace {
  * @param type List of column chunk descriptors
  * @param size List of page information
  * @param size List of page information
- * @param stream Optional stream to use for device memory alloc and kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned column
  *
  * @return `std::unique_ptr<cudf::column>` Column from the existing device data

From 17350cea1fdfa78abc3da9a47f1876931d706b7c Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:44:46 +0530
Subject: [PATCH 48/90] update doc of param stream for group reductions header

---
 cpp/src/groupby/sort/group_reductions.hpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp
index 14999336de0..195b97dcf01 100644
--- a/cpp/src/groupby/sort/group_reductions.hpp
+++ b/cpp/src/groupby/sort/group_reductions.hpp
@@ -34,7 +34,7 @@ namespace detail {
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
  * @param mr Device memory resource used to allocate the returned column
- * @param stream Stream to perform computation in
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_sum(column_view const& values,
                                   size_type num_groups,
@@ -49,7 +49,7 @@ std::unique_ptr<column> group_sum(column_view const& values,
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
  * @param mr Device memory resource used to allocate the returned column
- * @param stream Stream to perform computation in
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_min(column_view const& values,
                                   size_type num_groups,
@@ -64,7 +64,7 @@ std::unique_ptr<column> group_min(column_view const& values,
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
  * @param mr Device memory resource used to allocate the returned column
- * @param stream Stream to perform computation in
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_max(column_view const& values,
                                   size_type num_groups,
@@ -80,7 +80,7 @@ std::unique_ptr<column> group_max(column_view const& values,
  * @param group_labels ID of group that the corresponding value belongs to
  * @param key_sort_order Indices indicating sort order of groupby keys
  * @param mr Device memory resource used to allocate the returned column
- * @param stream Stream to perform computation in
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_argmax(column_view const& values,
                                      size_type num_groups,
@@ -97,7 +97,7 @@ std::unique_ptr<column> group_argmax(column_view const& values,
  * @param group_labels ID of group that the corresponding value belongs to
  * @param key_sort_order Indices indicating sort order of groupby keys
  * @param mr Device memory resource used to allocate the returned column
- * @param stream Stream to perform computation in
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_argmin(column_view const& values,
                                      size_type num_groups,
@@ -114,7 +114,7 @@ std::unique_ptr<column> group_argmin(column_view const& values,
  * @param group_labels ID of group that the corresponding value belongs to
  * @param num_groups Number of groups ( unique values in @p group_labels )
  * @param mr Device memory resource used to allocate the returned column
- * @param stream Stream to perform computation in
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_count_valid(column_view const& values,
                                           rmm::device_vector<size_type> const& group_labels,
@@ -128,7 +128,7 @@ std::unique_ptr<column> group_count_valid(column_view const& values,
  * @param group_offsets Offsets of groups' starting points within @p values
  * @param num_groups Number of groups ( unique values in @p group_labels )
  * @param mr Device memory resource used to allocate the returned column
- * @param stream Stream to perform computation in
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_count_all(rmm::device_vector<size_type> const& group_offsets,
                                         size_type num_groups,
@@ -145,7 +145,7 @@ std::unique_ptr<column> group_count_all(rmm::device_vector<size_type> const& gro
  * @param ddof Delta degrees of freedom. The divisor used in calculation of
  *             `var` is `N - ddof`, where `N` is the group size.
  * @param mr Device memory resource used to allocate the returned column
- * @param stream Stream to perform computation in
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_var(column_view const& values,
                                   column_view const& group_means,
@@ -164,7 +164,7 @@ std::unique_ptr<column> group_var(column_view const& values,
  * @param quantiles List of quantiles q where q lies in [0,1]
  * @param interp Method to use when desired value lies between data points
  * @param mr Device memory resource used to allocate the returned column
- * @param stream Stream to perform computation in
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_quantiles(column_view const& values,
                                         column_view const& group_sizes,
@@ -187,7 +187,7 @@ std::unique_ptr<column> group_quantiles(column_view const& values,
  *  Include nulls if null_policy::INCLUDE.
  *  Nulls are treated equal.
  * @param mr Device memory resource used to allocate the returned column
- * @param stream Stream to perform computation in
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_nunique(column_view const& values,
                                       rmm::device_vector<size_type> const& group_labels,
@@ -209,7 +209,7 @@ std::unique_ptr<column> group_nunique(column_view const& values,
  * @param null_handling Exclude nulls while counting if null_policy::EXCLUDE,
  *  Include nulls if null_policy::INCLUDE.
  * @param mr Device memory resource used to allocate the returned column
- * @param stream Stream to perform computation in
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_nth_element(column_view const& values,
                                           column_view const& group_sizes,

From f93c829b2e2933249064bc990f96289044f08004 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:51:16 +0530
Subject: [PATCH 49/90] update doc of param stream for binary op headers

---
 cpp/include/cudf/detail/binaryop.hpp     | 8 ++++----
 cpp/src/binaryop/compiled/binary_ops.hpp | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cpp/include/cudf/detail/binaryop.hpp b/cpp/include/cudf/detail/binaryop.hpp
index 0e0a22efe8e..8e7902a3b5f 100644
--- a/cpp/include/cudf/detail/binaryop.hpp
+++ b/cpp/include/cudf/detail/binaryop.hpp
@@ -26,7 +26,7 @@ namespace detail {
  * @copydoc cudf::experimental::binary_operation(scalar const&, column_view const&, binary_operator,
  * data_type, rmm::mr::device_memory_resource *)
  *
- * @param stream CUDA stream on which to execute kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> binary_operation(
   scalar const& lhs,
@@ -40,7 +40,7 @@ std::unique_ptr<column> binary_operation(
  * @copydoc cudf::experimental::binary_operation(column_view const&, scalar const&, binary_operator,
  * data_type, rmm::mr::device_memory_resource *)
  *
- * @param stream CUDA stream on which to execute kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> binary_operation(
   column_view const& lhs,
@@ -54,7 +54,7 @@ std::unique_ptr<column> binary_operation(
  * @copydoc cudf::experimental::binary_operation(column_view const&, column_view const&,
  * binary_operator, data_type, rmm::mr::device_memory_resource *)
  *
- * @param stream CUDA stream on which to execute kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> binary_operation(
   column_view const& lhs,
@@ -68,7 +68,7 @@ std::unique_ptr<column> binary_operation(
  * @copydoc cudf::experimental::binary_operation(column_view const&, column_view const&,
  * std::string const&, data_type, rmm::mr::device_memory_resource *)
  *
- * @param stream CUDA stream on which to execute kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> binary_operation(
   column_view const& lhs,
diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp
index 5186f0ff3c0..63560a97ea1 100644
--- a/cpp/src/binaryop/compiled/binary_ops.hpp
+++ b/cpp/src/binaryop/compiled/binary_ops.hpp
@@ -59,7 +59,7 @@ namespace compiled {
  * @param rhs         The right operand string column
  * @param output_type The desired data type of the output column
  * @param mr          Device memory resource used to allocate the returned column
- * @param stream      CUDA stream on which to execute kernels
+ * @param stream      CUDA stream used for device memory operations and kernel launches.
  * @return std::unique_ptr<column> Output column
  */
 std::unique_ptr<column> binary_operation(
@@ -85,7 +85,7 @@ std::unique_ptr<column> binary_operation(
  * @param rhs         The right operand string scalar
  * @param output_type The desired data type of the output column
  * @param mr          Device memory resource used to allocate the returned column
- * @param stream      CUDA stream on which to execute kernels
+ * @param stream      CUDA stream used for device memory operations and kernel launches.
  * @return std::unique_ptr<column> Output column
  */
 std::unique_ptr<column> binary_operation(
@@ -111,7 +111,7 @@ std::unique_ptr<column> binary_operation(
  * @param rhs         The right operand string column
  * @param output_type The desired data type of the output column
  * @param mr          Device memory resource used to allocate the returned column
- * @param stream      CUDA stream on which to execute kernels
+ * @param stream      CUDA stream used for device memory operations and kernel launches.
  * @return std::unique_ptr<column> Output column
  */
 std::unique_ptr<column> binary_operation(

From 3b9d23973843bb4d53a1014f9b12b5b1158f155c Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:52:21 +0530
Subject: [PATCH 50/90] update doc for param stream in nvtext headers

---
 cpp/include/nvtext/detail/tokenize.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/include/nvtext/detail/tokenize.hpp b/cpp/include/nvtext/detail/tokenize.hpp
index 1e7e26178ab..bf3f2ee00f1 100644
--- a/cpp/include/nvtext/detail/tokenize.hpp
+++ b/cpp/include/nvtext/detail/tokenize.hpp
@@ -29,7 +29,7 @@ namespace detail {
  * @param delimiter UTF-8 characters used to separate each string into tokens.
  *                  The default of empty string will separate tokens using whitespace.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for any CUDA calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings columns of tokens.
  */
 std::unique_ptr<cudf::column> tokenize(
@@ -45,7 +45,7 @@ std::unique_ptr<cudf::column> tokenize(
  * @param strings Strings column to tokenize.
  * @param delimiters Strings used to separate individual strings into tokens.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for any CUDA calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings columns of tokens.
  */
 std::unique_ptr<cudf::column> tokenize(
@@ -62,7 +62,7 @@ std::unique_ptr<cudf::column> tokenize(
  * @param delimiter Strings used to separate each string into tokens.
  *                  The default of empty string will separate tokens using whitespace.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for any CUDA calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New INT32 column of token counts.
  */
 std::unique_ptr<cudf::column> count_tokens(
@@ -78,7 +78,7 @@ std::unique_ptr<cudf::column> count_tokens(
  * @param strings Strings column to use for this operation.
  * @param delimiters Strings used to separate each string into tokens.
  * @param mr Device memory resource used to allocate the returned column.
- * @param stream Stream to use for any CUDA calls.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New INT32 column of token counts.
  */
 std::unique_ptr<cudf::column> count_tokens(

From d3a33005f8aeb5442f380166d6297b8a6f6ee9d7 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:56:11 +0530
Subject: [PATCH 51/90] update doc for param stream in copy headers

---
 cpp/include/cudf/detail/copy.hpp         | 4 ++--
 cpp/include/cudf/detail/copy_if_else.cuh | 2 +-
 cpp/include/cudf/detail/copy_range.cuh   | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index 7cf4a84e306..6f903aa5bec 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -68,7 +68,7 @@ ColumnView slice(ColumnView const& input, cudf::size_type begin, cudf::size_type
 /**
  * @copydoc cudf::experimental::slice(column_view const&,std::vector<size_type> const&)
  *
- * @param stream Optional CUDA stream on which to execute kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::vector<column_view> slice(column_view const& input,
                                std::vector<size_type> const& indices,
@@ -77,7 +77,7 @@ std::vector<column_view> slice(column_view const& input,
 /**
  * @copydoc cudf::experimental::contiguous_split
  *
- * @param stream Optional CUDA stream on which to execute kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  **/
 std::vector<contiguous_split_result> contiguous_split(
   cudf::table_view const& input,
diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh
index bcf8d2d3d10..13361f6e4ea 100644
--- a/cpp/include/cudf/detail/copy_if_else.cuh
+++ b/cpp/include/cudf/detail/copy_if_else.cuh
@@ -154,7 +154,7 @@ __launch_bounds__(block_size) __global__
  * @param filter      Function of type `FilterFn` which determines for index `i` where to get the
  *                    corresponding output value from
  * @param mr          Device memory resource used to allocate the returned column
- * @param stream      CUDA stream to perform the computation in
+ * @param stream      CUDA stream used for device memory operations and kernel launches.
  * @return            A new column that contains the values from either `lhs` or `rhs` as determined
  *                    by `filter[i]`
  */
diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh
index a7da74d4c46..8d1327846a0 100644
--- a/cpp/include/cudf/detail/copy_range.cuh
+++ b/cpp/include/cudf/detail/copy_range.cuh
@@ -128,7 +128,7 @@ namespace detail {
  * @param target_begin The starting index of the target range (inclusive)
  * @param target_end The index of the last element in the target range
  * (exclusive)
- * @param stream CUDA stream to run this function
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 template <typename SourceValueIterator, typename SourceValidityIterator>
 void copy_range(SourceValueIterator source_value_begin,
@@ -190,7 +190,7 @@ void copy_range(SourceValueIterator source_value_begin,
 
 /**
  * @copydoc cudf::experimental::copy_range_in_place
- * @param stream Optional CUDA stream to run this function
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 void copy_range_in_place(column_view const& source,
                          mutable_column_view& target,
@@ -201,7 +201,7 @@ void copy_range_in_place(column_view const& source,
 
 /**
  * @copydoc cudf::experimental::copy_range
- * @param stream CUDA stream to run this function
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return std::unique_ptr<column> The result target column
  */
 std::unique_ptr<column> copy_range(

From e5fffae42c3d3e6e289c93cd8839ce36dc372b4d Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 16:56:28 +0530
Subject: [PATCH 52/90] update doc for param stream in scatter, gather headers

---
 cpp/include/cudf/detail/gather.cuh  | 8 ++++----
 cpp/include/cudf/detail/scatter.hpp | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
index 1791edd7e51..fc01ab9487b 100644
--- a/cpp/include/cudf/detail/gather.cuh
+++ b/cpp/include/cudf/detail/gather.cuh
@@ -113,7 +113,7 @@ struct column_gatherer_impl {
    * @param gather_map_end End of iterator range of integral values representing the gather map
    * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
    * @param mr Device memory resource used to allocate the returned column
-   * @param stream CUDA stream on which to execute kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   std::unique_ptr<column> operator()(column_view const& source_column,
                                      MapIterator gather_map_begin,
@@ -172,7 +172,7 @@ struct column_gatherer_impl<string_view, MapItType> {
    * @param gather_map_end End of iterator range of integral values representing the gather map
    * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
    * @param mr Device memory resource used to allocate the returned column
-   * @param stream CUDA stream on which to execute kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   std::unique_ptr<column> operator()(column_view const& source_column,
                                      MapItType gather_map_begin,
@@ -206,7 +206,7 @@ struct column_gatherer_impl<dictionary32, MapItType> {
    * @param gather_map_end End of iterator range of integral values representing the gather map
    * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
    * @param mr Device memory resource used to allocate the returned column
-   * @param stream CUDA stream on which to execute kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    * @return New dictionary column with gathered rows.
    */
   std::unique_ptr<column> operator()(column_view const& source_column,
@@ -273,7 +273,7 @@ struct column_gatherer {
    * @param gather_map_end End of iterator range of integral values representing the gather map
    * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
    * @param mr Device memory resource used to allocate the returned column
-   * @param stream CUDA stream on which to execute kernels
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   template <typename Element, typename MapIterator>
   std::unique_ptr<column> operator()(column_view const& source_column,
diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp
index 64a4512835f..032bee6ab03 100644
--- a/cpp/include/cudf/detail/scatter.hpp
+++ b/cpp/include/cudf/detail/scatter.hpp
@@ -57,7 +57,7 @@ namespace detail {
  * @param check_bounds Optionally perform bounds checking on the values of
  * `scatter_map` and throw an error if any of its values are out of bounds.
  * @param mrDevice memory resource used to allocate the returned table
- * @param stream The stream to use for CUDA operations
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Result of scattering values from source to target
  **/
 std::unique_ptr<table> scatter(
@@ -97,7 +97,7 @@ std::unique_ptr<table> scatter(
  * @param check_bounds Optionally perform bounds checking on the values of
  * `scatter_map` and throw an error if any of its values are out of bounds.
  * @param mr Device memory resource used to allocate the returned table
- * @param stream The stream to use for CUDA operations
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Result of scattering values from source to target
  **/
 std::unique_ptr<table> scatter(
@@ -114,7 +114,7 @@ std::unique_ptr<table> scatter(
  *                    column_view const& boolean_mask,
  *                    rmm::mr::device_memory_resource *mr)
  *
- * @param stream The stream to use for CUDA operations
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<table> boolean_mask_scatter(table_view const& source,
                                             table_view const& target,
@@ -129,7 +129,7 @@ std::unique_ptr<table> boolean_mask_scatter(table_view const& source,
  *                    column_view const& boolean_mask,
  *                    rmm::mr::device_memory_resource *mr)
  *
- * @param stream The stream to use for CUDA operations
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<table> boolean_mask_scatter(
   std::vector<std::reference_wrapper<scalar>> const& source,

From af9d392b4625b558737225232d86a2f438966c00 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 17:08:15 +0530
Subject: [PATCH 53/90] update doc for param stream in fill headers

---
 cpp/include/cudf/detail/fill.hpp      | 4 ++--
 cpp/include/cudf/detail/repeat.hpp    | 4 ++--
 cpp/include/cudf/detail/sequence.hpp  | 4 ++--
 cpp/include/cudf/detail/transpose.hpp | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cpp/include/cudf/detail/fill.hpp b/cpp/include/cudf/detail/fill.hpp
index 04fc6fdfd9e..05ea5b59c09 100644
--- a/cpp/include/cudf/detail/fill.hpp
+++ b/cpp/include/cudf/detail/fill.hpp
@@ -28,7 +28,7 @@ namespace detail {
 /**
  * @copydoc cudf::experimental::fill_in_place
  *
- * @param stream CUDA stream to run this function
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 void fill_in_place(mutable_column_view& destination,
                    size_type begin,
@@ -39,7 +39,7 @@ void fill_in_place(mutable_column_view& destination,
 /**
  * @copydoc cudf::experimental::fill
  *
- * @param stream CUDA stream to run this function
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> fill(column_view const& input,
                              size_type begin,
diff --git a/cpp/include/cudf/detail/repeat.hpp b/cpp/include/cudf/detail/repeat.hpp
index c3ac1707c62..7d2a52a7412 100644
--- a/cpp/include/cudf/detail/repeat.hpp
+++ b/cpp/include/cudf/detail/repeat.hpp
@@ -28,7 +28,7 @@ namespace detail {
  * @copydoc cudf::experimental::repeat(table_view const&, column_view const&, bool,
  * rmm::mr::device_memory_resource*)
  *
- * @param stream CUDA stream to run this function
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<table> repeat(table_view const& input_table,
                               column_view const& count,
@@ -40,7 +40,7 @@ std::unique_ptr<table> repeat(table_view const& input_table,
  * @copydoc cudf::experimental::repeat(table_view const&, scalar const&,
  * rmm::mr::device_memory_resource*)
  *
- * @param stream CUDA stream to run this function
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<table> repeat(table_view const& input_table,
                               scalar const& count,
diff --git a/cpp/include/cudf/detail/sequence.hpp b/cpp/include/cudf/detail/sequence.hpp
index 340c41eb5ff..221e3280a51 100644
--- a/cpp/include/cudf/detail/sequence.hpp
+++ b/cpp/include/cudf/detail/sequence.hpp
@@ -28,7 +28,7 @@ namespace detail {
  *                                       rmm::mr::device_memory_resource* mr =
  *rmm::mr::get_default_resource())
  *
- * @param stream CUDA stream to run this function
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  **/
 std::unique_ptr<column> sequence(
   size_type size,
@@ -42,7 +42,7 @@ std::unique_ptr<column> sequence(
                                          rmm::mr::device_memory_resource* mr =
  rmm::mr::get_default_resource())
  *
- * @param stream CUDA stream to run this function
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  **/
 std::unique_ptr<column> sequence(
   size_type size,
diff --git a/cpp/include/cudf/detail/transpose.hpp b/cpp/include/cudf/detail/transpose.hpp
index 38c26f5de78..cb378a2b563 100644
--- a/cpp/include/cudf/detail/transpose.hpp
+++ b/cpp/include/cudf/detail/transpose.hpp
@@ -23,7 +23,7 @@ namespace detail {
 /**
  * @copydoc cudf::transpose
  *
- * @param stream Optional CUDA stream on which to execute kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::pair<std::unique_ptr<column>, table_view> transpose(
   table_view const& input,

From 833995e67cf7639ff7c85a8fdb7691ab12d8a957 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 17:08:47 +0530
Subject: [PATCH 54/90] update doc for param stream in dlpack.hpp

---
 cpp/include/cudf/detail/dlpack.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/include/cudf/detail/dlpack.hpp b/cpp/include/cudf/detail/dlpack.hpp
index 68ded55b6ae..928d95180ad 100644
--- a/cpp/include/cudf/detail/dlpack.hpp
+++ b/cpp/include/cudf/detail/dlpack.hpp
@@ -23,7 +23,7 @@ namespace detail {
 /**
  * @copydoc cudf::from_dlpack
  *
- * @param stream Optional stream on which to execute
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<experimental::table> from_dlpack(
   DLManagedTensor const* managed_tensor,
@@ -33,7 +33,7 @@ std::unique_ptr<experimental::table> from_dlpack(
 /**
  * @copydoc cudf::to_dlpack
  *
- * @param stream Optional stream on which to execute
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 DLManagedTensor* to_dlpack(table_view const& input,
                            rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),

From ab1133fb846ee03a502c0e7a796c9fae6bbad1ff Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 17:09:55 +0530
Subject: [PATCH 55/90] update doc for param stream in hash headers

---
 cpp/src/hash/concurrent_unordered_map.cuh      | 6 +++---
 cpp/src/hash/concurrent_unordered_multimap.cuh | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh
index c4e49559b38..24574cd0805 100644
--- a/cpp/src/hash/concurrent_unordered_map.cuh
+++ b/cpp/src/hash/concurrent_unordered_map.cuh
@@ -151,7 +151,7 @@ class concurrent_unordered_map {
    * equal
    * @param allocator The allocator to use for allocation the hash table's
    * storage
-   * @param stream CUDA stream to use for device operations.
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   static auto create(size_type capacity,
                      const mapped_type unused_element = std::numeric_limits<key_type>::max(),
@@ -468,7 +468,7 @@ class concurrent_unordered_map {
    * This function is invoked as the deleter of the `std::unique_ptr` returned
    * from the `create()` factory function.
    *
-   * @param stream CUDA stream to use for device operations.
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void destroy(cudaStream_t stream = 0)
   {
@@ -503,7 +503,7 @@ class concurrent_unordered_map {
    *are equal
    * @param allocator The allocator to use for allocation the hash table's
    * storage
-   * @param stream CUDA stream to use for device operations.
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   concurrent_unordered_map(size_type capacity,
                            const mapped_type unused_element,
diff --git a/cpp/src/hash/concurrent_unordered_multimap.cuh b/cpp/src/hash/concurrent_unordered_multimap.cuh
index 720a1b3cd92..af783ffc2fa 100644
--- a/cpp/src/hash/concurrent_unordered_multimap.cuh
+++ b/cpp/src/hash/concurrent_unordered_multimap.cuh
@@ -97,7 +97,7 @@ class concurrent_unordered_multimap {
    * @param equal The equality comparison function for comparing if two keys are
    * equal
    * @param allocator The allocator to use for allocation of the map's storage
-   * @param stream CUDA stream to use for device operations.
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   static auto create(size_type capacity,
                      const bool init                 = true,
@@ -129,7 +129,7 @@ class concurrent_unordered_multimap {
    * This function is invoked as the deleter of the `std::unique_ptr` returned
    * from the `create()` factory function.
    *
-   * @param stream CUDA stream to use for device operations.
+   * @param stream CUDA stream used for device memory operations and kernel launches.
    **/
   void destroy(cudaStream_t stream = 0)
   {

From e8f1bad91fb31a00376d9057321debecc2ac6d72 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 17:10:19 +0530
Subject: [PATCH 56/90] update doc for param stream in join headers

---
 cpp/src/join/hash_join.cuh | 6 ++----
 cpp/src/join/join.cu       | 9 +++------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index 7271b1f5747..ae4d43afd94 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -160,8 +160,7 @@ size_type estimate_join_output_size(table_device_view build_table,
  * JoinNoneValue, i.e. -1.
  *
  * @param left  Table of left columns to join
- * @param stream stream on which all memory allocations and copies
- * will be performed
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  *
  * @returns Join output indices vector pair
  */
@@ -189,8 +188,7 @@ get_trivial_left_join_indices(table_view const& left, cudaStream_t stream)
  * @param right Table of right  columns to join
  * @param flip_join_indices Flag that indicates whether the left and right
  * tables have been flipped, meaning the output indices should also be flipped.
- * @param stream stream on which all memory allocations and copies
- * will be performed
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @tparam join_kind The type of join to be performed
  *
  * @returns Join output indices vector pair
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 8d9e6ddf154..29f5f138dd3 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -118,8 +118,7 @@ struct valid_range {
  * @Param right_indices Vector of indices
  * @Param left_table_row_count Number of rows of left table
  * @Param right_table_row_count Number of rows of right table
- * @param stream Optional, stream on which all memory allocations and copies
- * will be performed
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  *
  * @Returns  Pair of vectors containing the left join indices complement
  */
@@ -192,8 +191,7 @@ get_left_join_indices_complement(rmm::device_vector<size_type>& right_indices,
  *
  * @param left  Table of left columns to join
  * @param right Table of right  columns to join
- * @param stream stream on which all memory allocations and copies
- * will be performed
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @tparam join_kind The type of join to be performed
  *
  * @returns Join output indices vector pair
@@ -396,8 +394,7 @@ std::unique_ptr<experimental::table> construct_join_output_df(
  * Else, for every column in `left_on` and `right_on`, an output column will
  * be produced.
  * @param mr Device memory resource used to allocate the returned table
- * @param stream Optional, stream on which all memory allocations and copies
- * will be performed
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  *
  * @tparam join_kind The type of join to be performed
  *

From 9cc2fb3ce14dadd050b45b343d2860010e9ef17c Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 17:11:12 +0530
Subject: [PATCH 57/90] update doc for param stream in rolling, rank

---
 cpp/src/rolling/rolling.cu | 2 +-
 cpp/src/sort/rank.cu       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/rolling/rolling.cu b/cpp/src/rolling/rolling.cu
index 4cccf17cfcc..59553e16d2d 100644
--- a/cpp/src/rolling/rolling.cu
+++ b/cpp/src/rolling/rolling.cu
@@ -621,7 +621,7 @@ std::unique_ptr<column> rolling_window_udf(column_view const& input,
  *                                  std::unique_ptr<aggregation> const& agg,
  *                                  rmm::mr::device_memory_resource* mr)
  *
- * @param stream The stream to use for CUDA operations
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 template <typename PrecedingWindowIterator, typename FollowingWindowIterator>
 std::unique_ptr<column> rolling_window(column_view const& input,
diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu
index 0f42a2875a0..fcb82695086 100644
--- a/cpp/src/sort/rank.cu
+++ b/cpp/src/sort/rank.cu
@@ -95,7 +95,7 @@ rmm::device_vector<size_type> sorted_dense_rank(column_view input_col,
  * @param rank_iter output rank iterator
  * @param tie_breaker tie breaking operator. For example, maximum & minimum.
  * @param transformer transform after tie breaking (useful for average).
- * @param stream stream to run the computations on
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 template <typename TieType,
           typename outputIterator,

From bbfe3bfab11663ed1ccecea58323c2ddc7aaf983 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 17:11:51 +0530
Subject: [PATCH 58/90] update doc for param stream in search, replace headers

---
 cpp/include/cudf/detail/replace.hpp | 6 +++---
 cpp/include/cudf/detail/search.hpp  | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cpp/include/cudf/detail/replace.hpp b/cpp/include/cudf/detail/replace.hpp
index f88119f3b04..ad1513ad8cc 100644
--- a/cpp/include/cudf/detail/replace.hpp
+++ b/cpp/include/cudf/detail/replace.hpp
@@ -51,7 +51,7 @@ std::unique_ptr<column> replace_nulls(
  * @copydoc cudf::experimental::replace_nans(column_view const&, column_view const&,
  * rmm::mr::device_memory_resource*)
  *
- * @param stream Optional CUDA stream to use for operations
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> replace_nans(
   column_view const& input,
@@ -63,7 +63,7 @@ std::unique_ptr<column> replace_nans(
  * @copydoc cudf::experimental::replace_nans(column_view const&, scalar const&,
  * rmm::mr::device_memory_resource*)
  *
- * @param stream Optional CUDA stream to use for operations
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> replace_nans(
   column_view const& input,
@@ -74,7 +74,7 @@ std::unique_ptr<column> replace_nans(
 /**
  * @copydoc cudf::experimental::find_and_replace_all
  *
- * @param stream Optional CUDA stream to use for operations
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> find_and_replace_all(
   column_view const& input_col,
diff --git a/cpp/include/cudf/detail/search.hpp b/cpp/include/cudf/detail/search.hpp
index 8f59edfa376..21b6e3ad8d4 100644
--- a/cpp/include/cudf/detail/search.hpp
+++ b/cpp/include/cudf/detail/search.hpp
@@ -29,7 +29,7 @@ namespace detail {
 /**
  * @copydoc cudf::experimental::lower_bound
  *
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> lower_bound(
   table_view const& t,
@@ -42,7 +42,7 @@ std::unique_ptr<column> lower_bound(
 /**
  * @copydoc cudf::experimental::upper_bound
  *
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> upper_bound(
   table_view const& t,
@@ -56,7 +56,7 @@ std::unique_ptr<column> upper_bound(
  * @copydoc cudf::experimental::contains(column_view const&, scalar const&,
  *                                       rmm::mr::device_memory_resource*)
  *
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 bool contains(column_view const& col,
               scalar const& value,
@@ -67,7 +67,7 @@ bool contains(column_view const& col,
  * @copydoc cudf::experimental::contains(column_view const&, column_view const&,
  *                                       rmm::mr::device_memory_resource*)
  *
- * @param stream Stream to use for any kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> contains(
   column_view const& haystack,

From 139df92bf616f7516712873ebb9892ad1eedf9a2 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 17:12:25 +0530
Subject: [PATCH 59/90] update doc for param stream in transform, hashing,
 unary op headers

---
 cpp/include/cudf/detail/concatenate.cuh | 8 ++++----
 cpp/include/cudf/detail/hashing.hpp     | 4 ++--
 cpp/include/cudf/detail/transform.hpp   | 6 +++---
 cpp/include/cudf/detail/unary.hpp       | 6 +++---
 cpp/include/cudf/detail/valid_if.cuh    | 3 +--
 5 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/cpp/include/cudf/detail/concatenate.cuh b/cpp/include/cudf/detail/concatenate.cuh
index 8f77caa331f..1088dacf991 100644
--- a/cpp/include/cudf/detail/concatenate.cuh
+++ b/cpp/include/cudf/detail/concatenate.cuh
@@ -29,7 +29,7 @@ namespace detail {
  * @copydoc cudf::concatenate_masks(std::vector<column_view>
  * const&,rmm::mr::device_memory_resource*)
  *
- * @param stream stream on which all memory allocations and copies will be performed
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 void concatenate_masks(rmm::device_vector<column_device_view> const& d_views,
                        rmm::device_vector<size_t> const& d_offsets,
@@ -40,7 +40,7 @@ void concatenate_masks(rmm::device_vector<column_device_view> const& d_views,
 /**
  * @copydoc cudf::concatenate_masks(std::vector<column_view> const&,bitmask_type*)
  *
- * @param stream stream on which all memory allocations and copies will be performed
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 void concatenate_masks(std::vector<column_view> const& views,
                        bitmask_type* dest_mask,
@@ -49,7 +49,7 @@ void concatenate_masks(std::vector<column_view> const& views,
 /**
  * @copydoc cudf::concatenate(std::vector<column_view> const&,rmm::mr::device_memory_resource*)
  *
- * @param stream Optional The stream on which to execute all allocations and copies
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> concatenate(
   std::vector<column_view> const& columns_to_concat,
@@ -59,7 +59,7 @@ std::unique_ptr<column> concatenate(
 /**
  * @copydoc cudf::concatenate(std::vector<table_view> const&,rmm::mr::device_memory_resource*)
  *
- * @param stream Optional The stream on which to execute all allocations and copies
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<experimental::table> concatenate(
   std::vector<table_view> const& tables_to_concat,
diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp
index 520ee47f70b..c3b2bf74ae9 100644
--- a/cpp/include/cudf/detail/hashing.hpp
+++ b/cpp/include/cudf/detail/hashing.hpp
@@ -22,7 +22,7 @@ namespace detail {
 /**
  * @copydoc cudf::experimental::hash_partition
  *
- * @param stream Optional stream to use for allocations and copies
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::pair<std::unique_ptr<experimental::table>, std::vector<size_type>> hash_partition(
   table_view const& input,
@@ -34,7 +34,7 @@ std::pair<std::unique_ptr<experimental::table>, std::vector<size_type>> hash_par
 /**
  * @copydoc cudf::hash
  *
- * @param stream Optional stream to use for allocations and copies
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> hash(table_view const& input,
                              std::vector<uint32_t> const& initial_hash = {},
diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp
index b7870793d97..1a039f52dfb 100644
--- a/cpp/include/cudf/detail/transform.hpp
+++ b/cpp/include/cudf/detail/transform.hpp
@@ -24,7 +24,7 @@ namespace detail {
 /**
  * @copydoc cudf::experimental::transform
  *
- * @param stream        CUDA stream on which to execute kernels
+ * @param stream        CUDA stream used for device memory operations and kernel launches.
  **/
 std::unique_ptr<column> transform(
   column_view const& input,
@@ -37,7 +37,7 @@ std::unique_ptr<column> transform(
 /**
  * @copydoc cudf::experimental::nans_to_nulls
  *
- * @param stream        CUDA stream on which to execute kernels
+ * @param stream        CUDA stream used for device memory operations and kernel launches.
  **/
 std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
   column_view const& input,
@@ -47,7 +47,7 @@ std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
 /**
  * @copydoc cudf::experimental::bools_to_mask
  *
- * @param stream        CUDA stream on which to execute kernels
+ * @param stream        CUDA stream used for device memory operations and kernel launches.
  **/
 std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
   column_view const& input,
diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp
index 991c312584d..188efa6047e 100644
--- a/cpp/include/cudf/detail/unary.hpp
+++ b/cpp/include/cudf/detail/unary.hpp
@@ -33,7 +33,7 @@ namespace detail {
  * @param end End of the sequence of elements
  * @param p Predicate to be applied to each element in `[begin,end)`
  * @param mr Optional, The resource to use for all allocations
- * @param stream Optional CUDA stream on which to execute kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  *
  * @returns std::unique_ptr<cudf::column> A column of type `BOOL8,` with `true` representing
  * predicate is satisfied.
@@ -60,7 +60,7 @@ std::unique_ptr<column> true_if(
 /**
  * @copydoc cudf::experimental::unary_operation
  *
- * @param stream Optional CUDA stream on which to execute kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<cudf::column> unary_operation(
   cudf::column_view const& input,
@@ -71,7 +71,7 @@ std::unique_ptr<cudf::column> unary_operation(
 /**
  * @copydoc cudf::experimental::cast
  *
- * @param stream Optional CUDA stream on which to execute kernels
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> cast(column_view const& input,
                              data_type type,
diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh
index 6c8f3f13299..3140a7eb320 100644
--- a/cpp/include/cudf/detail/valid_if.cuh
+++ b/cpp/include/cudf/detail/valid_if.cuh
@@ -79,8 +79,7 @@ __global__ void valid_if_kernel(
  * @param begin The beginning of the sequence
  * @param end The end of the sequence
  * @param p The predicate
- * @param stream Stream on which to execute all GPU activity and device memory
- * allocations.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return A pair containing a `device_buffer` with the new bitmask and it's
  * null count
  */

From 8547a9d6f384114e5f1254ea24dae2ac849b89e5 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 20:51:45 +0530
Subject: [PATCH 60/90] Apply suggestions from code review (Ram)

Co-authored-by: Ram (Ramakrishna Prabhu) <42624703+rgsl888prabhu@users.noreply.github.com>
---
 cpp/include/cudf/detail/scatter.hpp | 2 +-
 cpp/include/cudf/unary.hpp          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp
index 032bee6ab03..a2a50ad1028 100644
--- a/cpp/include/cudf/detail/scatter.hpp
+++ b/cpp/include/cudf/detail/scatter.hpp
@@ -56,7 +56,7 @@ namespace detail {
  * are to be scattered
  * @param check_bounds Optionally perform bounds checking on the values of
  * `scatter_map` and throw an error if any of its values are out of bounds.
- * @param mrDevice memory resource used to allocate the returned table
+ * @param mr Device memory resource used to allocate the returned table
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Result of scattering values from source to target
  **/
diff --git a/cpp/include/cudf/unary.hpp b/cpp/include/cudf/unary.hpp
index a078e285165..d2a9c13dad2 100644
--- a/cpp/include/cudf/unary.hpp
+++ b/cpp/include/cudf/unary.hpp
@@ -56,7 +56,7 @@ enum class unary_op : int32_t {
  *
  * @param input A `column_view` as input
  * @param op operation to perform
- * @param mrDevice memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column
  *
  * @returns Column of same size as `input` containing result of the operation
  */

From 0d115c035b6210971c7ced8294e0443e7e4dbf66 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 19 May 2020 21:37:21 +0530
Subject: [PATCH 61/90] fix extra spacing in detail/transform.hpp

---
 cpp/include/cudf/detail/transform.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp
index 1a039f52dfb..ca6c54980ad 100644
--- a/cpp/include/cudf/detail/transform.hpp
+++ b/cpp/include/cudf/detail/transform.hpp
@@ -24,7 +24,7 @@ namespace detail {
 /**
  * @copydoc cudf::experimental::transform
  *
- * @param stream        CUDA stream used for device memory operations and kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  **/
 std::unique_ptr<column> transform(
   column_view const& input,
@@ -37,7 +37,7 @@ std::unique_ptr<column> transform(
 /**
  * @copydoc cudf::experimental::nans_to_nulls
  *
- * @param stream        CUDA stream used for device memory operations and kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  **/
 std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
   column_view const& input,
@@ -47,7 +47,7 @@ std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
 /**
  * @copydoc cudf::experimental::bools_to_mask
  *
- * @param stream        CUDA stream used for device memory operations and kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  **/
 std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
   column_view const& input,

From 1eb790715cf2cceb0c49ccf5bc70f1bdf680d1e9 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Thu, 21 May 2020 15:15:17 +0530
Subject: [PATCH 62/90] add column's device memory in param mr doc

---
 cpp/include/cudf/binaryop.hpp                 |  8 +++----
 cpp/include/cudf/column/column_factories.hpp  |  2 +-
 cpp/include/cudf/concatenate.hpp              |  2 +-
 cpp/include/cudf/copying.hpp                  |  2 +-
 cpp/include/cudf/detail/copy_if_else.cuh      |  2 +-
 cpp/include/cudf/detail/gather.cuh            |  8 +++----
 cpp/include/cudf/dictionary/detail/encode.hpp |  4 ++--
 .../cudf/dictionary/detail/update_keys.hpp    |  8 +++----
 .../cudf/dictionary/dictionary_factories.hpp  |  2 +-
 cpp/include/cudf/dictionary/encode.hpp        |  4 ++--
 cpp/include/cudf/dictionary/update_keys.hpp   |  8 +++----
 cpp/include/cudf/filling.hpp                  |  6 ++---
 cpp/include/cudf/hashing.hpp                  |  2 +-
 cpp/include/cudf/replace.hpp                  |  6 ++---
 cpp/include/cudf/search.hpp                   | 14 ++++++------
 cpp/include/cudf/strings/attributes.hpp       |  6 ++---
 cpp/include/cudf/strings/case.hpp             |  6 ++---
 .../cudf/strings/char_types/char_types.hpp    |  6 ++---
 cpp/include/cudf/strings/combine.hpp          |  4 ++--
 cpp/include/cudf/strings/contains.hpp         |  6 ++---
 .../cudf/strings/convert/convert_booleans.hpp |  4 ++--
 .../cudf/strings/convert/convert_datetime.hpp |  4 ++--
 .../cudf/strings/convert/convert_floats.hpp   |  4 ++--
 .../cudf/strings/convert/convert_integers.hpp |  6 ++---
 .../cudf/strings/convert/convert_ipv4.hpp     |  4 ++--
 .../cudf/strings/convert/convert_urls.hpp     |  4 ++--
 cpp/include/cudf/strings/copying.hpp          |  2 +-
 .../cudf/strings/detail/concatenate.hpp       |  2 +-
 .../cudf/strings/detail/copy_if_else.cuh      |  2 +-
 .../cudf/strings/detail/copy_range.cuh        |  2 +-
 cpp/include/cudf/strings/detail/fill.hpp      |  2 +-
 cpp/include/cudf/strings/detail/gather.cuh    |  4 ++--
 cpp/include/cudf/strings/detail/merge.cuh     |  2 +-
 .../cudf/strings/detail/modify_strings.cuh    |  2 +-
 cpp/include/cudf/strings/detail/scatter.cuh   |  2 +-
 cpp/include/cudf/strings/detail/utilities.cuh |  2 +-
 cpp/include/cudf/strings/detail/utilities.hpp |  8 +++----
 cpp/include/cudf/strings/find.hpp             | 10 ++++-----
 cpp/include/cudf/strings/find_multiple.hpp    |  2 +-
 cpp/include/cudf/strings/padding.hpp          |  4 ++--
 cpp/include/cudf/strings/replace.hpp          |  8 +++----
 cpp/include/cudf/strings/replace_re.hpp       |  6 ++---
 cpp/include/cudf/strings/sorting.hpp          |  2 +-
 cpp/include/cudf/strings/strip.hpp            |  2 +-
 cpp/include/cudf/strings/substring.hpp        |  4 ++--
 cpp/include/cudf/strings/translate.hpp        |  2 +-
 cpp/include/cudf/transform.hpp                |  2 +-
 cpp/include/cudf/unary.hpp                    | 12 +++++-----
 cpp/include/nvtext/detail/tokenize.hpp        |  8 +++----
 cpp/include/nvtext/generate_ngrams.hpp        |  2 +-
 cpp/include/nvtext/ngrams_tokenize.hpp        |  2 +-
 cpp/include/nvtext/normalize.hpp              |  2 +-
 cpp/include/nvtext/tokenize.hpp               |  8 +++----
 cpp/src/binaryop/compiled/binary_ops.hpp      |  6 ++---
 cpp/src/dictionary/remove_keys.cu             |  2 +-
 cpp/src/groupby/sort/group_reductions.hpp     | 22 +++++++++----------
 cpp/src/io/utilities/column_buffer.hpp        |  2 +-
 cpp/src/reductions/scan.cu                    |  2 +-
 cpp/src/strings/attributes.cu                 |  2 +-
 cpp/src/strings/case.cu                       |  2 +-
 cpp/src/strings/find.cu                       |  4 ++--
 cpp/src/strings/utilities.cuh                 |  2 +-
 62 files changed, 141 insertions(+), 141 deletions(-)

diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp
index a24fa447a68..83fe247daa1 100644
--- a/cpp/include/cudf/binaryop.hpp
+++ b/cpp/include/cudf/binaryop.hpp
@@ -87,7 +87,7 @@ enum class binary_operator : int32_t {
  * @param lhs         The left operand scalar
  * @param rhs         The right operand column
  * @param output_type The desired data type of the output column
- * @param mr          Device memory resource used to allocate the returned column
+ * @param mr          Device memory resource used to allocate the returned column's device memory
  * @return            Output column of `output_type` type containing the result of
  *                    the binary operation
  * @throw cudf::logic_error if @p output_type dtype isn't fixed-width
@@ -112,7 +112,7 @@ std::unique_ptr<column> binary_operation(
  * @param lhs         The left operand column
  * @param rhs         The right operand scalar
  * @param output_type The desired data type of the output column
- * @param mr          Device memory resource used to allocate the returned column
+ * @param mr          Device memory resource used to allocate the returned column's device memory
  * @return            Output column of `output_type` type containing the result of
  *                    the binary operation
  * @throw cudf::logic_error if @p output_type dtype isn't fixed-width
@@ -135,7 +135,7 @@ std::unique_ptr<column> binary_operation(
  * @param lhs         The left operand column
  * @param rhs         The right operand column
  * @param output_type The desired data type of the output column
- * @param mr          Device memory resource used to allocate the returned column
+ * @param mr          Device memory resource used to allocate the returned column's device memory
  * @return            Output column of `output_type` type containing the result of
  *                    the binary operation
  * @throw cudf::logic_error if @p lhs and @p rhs are different sizes
@@ -163,7 +163,7 @@ std::unique_ptr<column> binary_operation(
  * @param output_type The desired data type of the output column. It is assumed
  *                    that output_type is compatible with the output data type
  *                    of the function in the PTX code
- * @param mr          Device memory resource used to allocate the returned column
+ * @param mr          Device memory resource used to allocate the returned column's device memory
  * @return            Output column of `output_type` type containing the result of
  *                    the binary operation
  * @throw cudf::logic_error if @p lhs and @p rhs are different sizes
diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
index 78f15cdd589..a8800e8cd55 100644
--- a/cpp/include/cudf/column/column_factories.hpp
+++ b/cpp/include/cudf/column/column_factories.hpp
@@ -395,7 +395,7 @@ std::unique_ptr<column> make_strings_column(
  * @param s The scalar to use for values in the column.
  * @param size The number of rows for the output column.
  * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  */
 std::unique_ptr<column> make_column_from_scalar(
   scalar const& s,
diff --git a/cpp/include/cudf/concatenate.hpp b/cpp/include/cudf/concatenate.hpp
index 5b1225f399a..34926e1a1d0 100644
--- a/cpp/include/cudf/concatenate.hpp
+++ b/cpp/include/cudf/concatenate.hpp
@@ -51,7 +51,7 @@ rmm::device_buffer concatenate_masks(
  *
  * @param columns_to_concat The column views to be concatenated into a single
  * column
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return Unique pointer to a single table having all the rows from the
  * elements of `columns_to_concat` respectively in the same order.
  */
diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index 1c033a94465..e87b7c7217e 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -274,7 +274,7 @@ void copy_range_in_place(column_view const& source,
  * @param source_end The index of the last element in the source range
  * (exclusive)
  * @param target_begin The starting index of the target range (inclusive)
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return std::unique_ptr<column> The result target column
  */
 std::unique_ptr<column> copy_range(
diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh
index 13361f6e4ea..7eea9606ea4 100644
--- a/cpp/include/cudf/detail/copy_if_else.cuh
+++ b/cpp/include/cudf/detail/copy_if_else.cuh
@@ -153,7 +153,7 @@ __launch_bounds__(block_size) __global__
  * @param rhs         Begin iterator of rhs range
  * @param filter      Function of type `FilterFn` which determines for index `i` where to get the
  *                    corresponding output value from
- * @param mr          Device memory resource used to allocate the returned column
+ * @param mr          Device memory resource used to allocate the returned column's device memory
  * @param stream      CUDA stream used for device memory operations and kernel launches.
  * @return            A new column that contains the values from either `lhs` or `rhs` as determined
  *                    by `filter[i]`
diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
index fc01ab9487b..d3f89c0f9a8 100644
--- a/cpp/include/cudf/detail/gather.cuh
+++ b/cpp/include/cudf/detail/gather.cuh
@@ -112,7 +112,7 @@ struct column_gatherer_impl {
    *map
    * @param gather_map_end End of iterator range of integral values representing the gather map
    * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
-   * @param mr Device memory resource used to allocate the returned column
+   * @param mr Device memory resource used to allocate the returned column's device memory
    * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   std::unique_ptr<column> operator()(column_view const& source_column,
@@ -171,7 +171,7 @@ struct column_gatherer_impl<string_view, MapItType> {
    *map
    * @param gather_map_end End of iterator range of integral values representing the gather map
    * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
-   * @param mr Device memory resource used to allocate the returned column
+   * @param mr Device memory resource used to allocate the returned column's device memory
    * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   std::unique_ptr<column> operator()(column_view const& source_column,
@@ -205,7 +205,7 @@ struct column_gatherer_impl<dictionary32, MapItType> {
    * map
    * @param gather_map_end End of iterator range of integral values representing the gather map
    * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
-   * @param mr Device memory resource used to allocate the returned column
+   * @param mr Device memory resource used to allocate the returned column's device memory
    * @param stream CUDA stream used for device memory operations and kernel launches.
    * @return New dictionary column with gathered rows.
    */
@@ -272,7 +272,7 @@ struct column_gatherer {
    * map
    * @param gather_map_end End of iterator range of integral values representing the gather map
    * @param nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds
-   * @param mr Device memory resource used to allocate the returned column
+   * @param mr Device memory resource used to allocate the returned column's device memory
    * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   template <typename Element, typename MapIterator>
diff --git a/cpp/include/cudf/dictionary/detail/encode.hpp b/cpp/include/cudf/dictionary/detail/encode.hpp
index 555be4a6cfb..56ba89b8e53 100644
--- a/cpp/include/cudf/dictionary/detail/encode.hpp
+++ b/cpp/include/cudf/dictionary/detail/encode.hpp
@@ -44,7 +44,7 @@ namespace detail {
  *
  * @param column The column to dictionary encode.
  * @param indices_type The integer type to use for the indices.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Returns a dictionary column.
  */
@@ -65,7 +65,7 @@ std::unique_ptr<column> encode(
  * ```
  *
  * @param dictionary_column Existing dictionary column.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New column with type matching the dictionary_column's keys.
  */
diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp
index 78ffff72484..a884d14085a 100644
--- a/cpp/include/cudf/dictionary/detail/update_keys.hpp
+++ b/cpp/include/cudf/dictionary/detail/update_keys.hpp
@@ -27,7 +27,7 @@ namespace detail {
  *
  * @param dictionary_column Existing dictionary column.
  * @param new_keys New keys to incorporate into the dictionary_column
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New dictionary column.
  */
@@ -43,7 +43,7 @@ std::unique_ptr<column> add_keys(
  *
  * @param dictionary_column Existing dictionary column.
  * @param keys_to_remove The keys to remove from the dictionary_column
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New dictionary column.
  */
@@ -58,7 +58,7 @@ std::unique_ptr<column> remove_keys(
  * const&,mm::mr::device_memory_resource*)
  *
  * @param dictionary_column Existing dictionary column.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New dictionary column.
  */
@@ -73,7 +73,7 @@ std::unique_ptr<column> remove_unused_keys(
  *
  * @param dictionary_column Existing dictionary column.
  * @param keys New keys to use for the output column. Must not contain nulls.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New dictionary column.
  */
diff --git a/cpp/include/cudf/dictionary/dictionary_factories.hpp b/cpp/include/cudf/dictionary/dictionary_factories.hpp
index ad962105c02..a18ad743416 100644
--- a/cpp/include/cudf/dictionary/dictionary_factories.hpp
+++ b/cpp/include/cudf/dictionary/dictionary_factories.hpp
@@ -50,7 +50,7 @@ namespace cudf {
  *
  * @param keys_column Column of unique, ordered values to use as the new dictionary column's keys.
  * @param indices_column Indices to use for the new dictionary column.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New dictionary column.
  */
diff --git a/cpp/include/cudf/dictionary/encode.hpp b/cpp/include/cudf/dictionary/encode.hpp
index 596a90644b4..3473b4c107b 100644
--- a/cpp/include/cudf/dictionary/encode.hpp
+++ b/cpp/include/cudf/dictionary/encode.hpp
@@ -54,7 +54,7 @@ namespace dictionary {
  *
  * @param column The column to dictionary encode.
  * @param indices_type The integer type to use for the indices.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return Returns a dictionary column.
  */
 std::unique_ptr<column> encode(
@@ -73,7 +73,7 @@ std::unique_ptr<column> encode(
  * @endcode
  *
  * @param dictionary_column Existing dictionary column.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column with type matching the dictionary_column's keys.
  */
 std::unique_ptr<column> decode(
diff --git a/cpp/include/cudf/dictionary/update_keys.hpp b/cpp/include/cudf/dictionary/update_keys.hpp
index c57e503e94f..f5942f2524b 100644
--- a/cpp/include/cudf/dictionary/update_keys.hpp
+++ b/cpp/include/cudf/dictionary/update_keys.hpp
@@ -48,7 +48,7 @@ namespace dictionary {
  *
  * @param dictionary_column Existing dictionary column.
  * @param new_keys New keys to incorporate into the dictionary_column
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New dictionary column.
  */
 std::unique_ptr<column> add_keys(
@@ -78,7 +78,7 @@ std::unique_ptr<column> add_keys(
  *
  * @param dictionary_column Existing dictionary column.
  * @param keys_to_remove The keys to remove from the dictionary_column
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New dictionary column.
  */
 std::unique_ptr<column> remove_keys(
@@ -99,7 +99,7 @@ std::unique_ptr<column> remove_keys(
  * @endcode
  *
  * @param dictionary_column Existing dictionary column.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New dictionary column.
  */
 std::unique_ptr<column> remove_unused_keys(
@@ -130,7 +130,7 @@ std::unique_ptr<column> remove_unused_keys(
  *
  * @param dictionary_column Existing dictionary column.
  * @param keys New keys to use for the output column. Must not contain nulls.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New dictionary column.
  */
 std::unique_ptr<column> set_keys(
diff --git a/cpp/include/cudf/filling.hpp b/cpp/include/cudf/filling.hpp
index b1eb0324060..9c9bff782a2 100644
--- a/cpp/include/cudf/filling.hpp
+++ b/cpp/include/cudf/filling.hpp
@@ -77,7 +77,7 @@ void fill_in_place(mutable_column_view& destination,
  * @param begin The starting index of the fill range (inclusive)
  * @param end The index of the last element in the fill range (exclusive)
  * @param value The scalar value to fill
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @return The result output column
  */
 std::unique_ptr<column> fill(column_view const& input,
@@ -167,7 +167,7 @@ std::unique_ptr<table> repeat(
  * @param size Size of the output column
  * @param init First value in the sequence
  * @param step Increment value
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @return std::unique_ptr<column> The result table containing the sequence
  **/
 std::unique_ptr<column> sequence(
@@ -193,7 +193,7 @@ std::unique_ptr<column> sequence(
  *
  * @param size Size of the output column
  * @param init First value in the sequence
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @return std::unique_ptr<column> The result table containing the sequence
  **/
 std::unique_ptr<column> sequence(
diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp
index b26131c7c90..cc532e377e7 100644
--- a/cpp/include/cudf/hashing.hpp
+++ b/cpp/include/cudf/hashing.hpp
@@ -30,7 +30,7 @@ namespace cudf {
  * @param input The table of columns to hash
  * @param initial_hash Optional vector of initial hash values for each column.
  * If this vector is empty then each element will be hashed as-is.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  *
  * @returns A column where each row is the hash of a column from the input
  */
diff --git a/cpp/include/cudf/replace.hpp b/cpp/include/cudf/replace.hpp
index 2d9051c2f6f..7310e74438f 100644
--- a/cpp/include/cudf/replace.hpp
+++ b/cpp/include/cudf/replace.hpp
@@ -78,7 +78,7 @@ std::unique_ptr<column> replace_nulls(
  *
  * @param input A column whose NaN values will be replaced
  * @param replacement A cudf::column whose values will replace NaN values in input
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @return A copy of `input` with the NaN values replaced with corresponding values from
  * `replacement`.
  */
@@ -104,7 +104,7 @@ std::unique_ptr<column> replace_nans(
  *
  * @param input A column whose NaN values will be replaced
  * @param replacement A cudf::scalar whose value will replace NaN values in input
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @return A copy of `input` with the NaN values replaced by `replacement`.
  */
 std::unique_ptr<column> replace_nans(
@@ -119,7 +119,7 @@ std::unique_ptr<column> replace_nans(
  * @param input_col The column to find and replace values in.
  * @param values_to_replace The values to replace
  * @param replacement_values The values to replace with
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  *
  * @returns Copy of `input_col` with specified values replaced.
  */
diff --git a/cpp/include/cudf/search.hpp b/cpp/include/cudf/search.hpp
index e7d3ffb5c0f..1e90ff97c64 100644
--- a/cpp/include/cudf/search.hpp
+++ b/cpp/include/cudf/search.hpp
@@ -60,9 +60,9 @@ namespace experimental {
  * @param t               Table to search
  * @param values          Find insert locations for these values
  * @param column_order    Vector of column sort order
- * @param null_precedence Vector of null_precedence enums
- * values
- * @param mr              Device memory resource used to allocate the returned column
+ * @param null_precedence Vector of null_precedence enums values
+ * @param mr              Device memory resource used to allocate the returned column's device
+ * memory
  * @return A non-nullable column of cudf::size_type elements containing the insertion points.
  */
 std::unique_ptr<column> lower_bound(
@@ -102,9 +102,9 @@ std::unique_ptr<column> lower_bound(
  * @param column          Table to search
  * @param values          Find insert locations for these values
  * @param column_order    Vector of column sort order
- * @param null_precedence Vector of null_precedence enums
- * values
- * @param mr              Device memory resource used to allocate the returned column
+ * @param null_precedence Vector of null_precedence enums values
+ * @param mr              Device memory resource used to allocate the returned column's device
+ * memory
  * @return A non-nullable column of cudf::size_type elements containing the insertion points.
  */
 std::unique_ptr<column> upper_bound(
@@ -158,7 +158,7 @@ bool contains(column_view const& col,
  *
  * @param haystack  A column object
  * @param needles   A column of values to search for in `col`
- * @param mr        Device memory resource used to allocate the returned column
+ * @param mr        Device memory resource used to allocate the returned column's device memory
  *
  * @return A column of bool elements containing true if the corresponding entry in haystack is
  * contained in needles and false if it is not.
diff --git a/cpp/include/cudf/strings/attributes.hpp b/cpp/include/cudf/strings/attributes.hpp
index 668ea8a9efa..62d17779222 100644
--- a/cpp/include/cudf/strings/attributes.hpp
+++ b/cpp/include/cudf/strings/attributes.hpp
@@ -38,7 +38,7 @@ namespace strings {
  * Any null string will result in a null entry for that row in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New INT32 column with lengths for each string.
  */
 std::unique_ptr<column> count_characters(
@@ -56,7 +56,7 @@ std::unique_ptr<column> count_characters(
  * Any null string will result in a null entry for that row in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New INT32 column with the number of bytes for each string.
  */
 std::unique_ptr<column> count_bytes(
@@ -76,7 +76,7 @@ std::unique_ptr<column> count_bytes(
  * Any null string is ignored. No null entries will appear in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New INT32 column with code point integer values for each character.
  */
 std::unique_ptr<column> code_points(
diff --git a/cpp/include/cudf/strings/case.hpp b/cpp/include/cudf/strings/case.hpp
index 16ef0412c34..f475c271016 100644
--- a/cpp/include/cudf/strings/case.hpp
+++ b/cpp/include/cudf/strings/case.hpp
@@ -35,7 +35,7 @@ namespace strings {
  * Any null entries create null entries in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column of strings with characters converted.
  */
 std::unique_ptr<column> to_lower(
@@ -52,7 +52,7 @@ std::unique_ptr<column> to_lower(
  * Any null entries create null entries in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column of strings with characters converted.
  */
 std::unique_ptr<column> to_upper(
@@ -70,7 +70,7 @@ std::unique_ptr<column> to_upper(
  * Any null entries create null entries in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column of strings with characters converted.
  */
 std::unique_ptr<column> swapcase(
diff --git a/cpp/include/cudf/strings/char_types/char_types.hpp b/cpp/include/cudf/strings/char_types/char_types.hpp
index 4ffe4c35601..fa7d96f1f2b 100644
--- a/cpp/include/cudf/strings/char_types/char_types.hpp
+++ b/cpp/include/cudf/strings/char_types/char_types.hpp
@@ -91,7 +91,7 @@ string_character_types& operator|=(string_character_types& lhs, string_character
  * @param verify_types Only verify against these character types.
  *                     Default `ALL_TYPES` means return `true`
  *                     iff all characters match `types`.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column of boolean results for each string.
  */
 std::unique_ptr<column> all_characters_of_type(
@@ -117,7 +117,7 @@ std::unique_ptr<column> all_characters_of_type(
  * Any null row results in a null entry for that row in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column of boolean results for each string.
  */
 std::unique_ptr<column> is_integer(
@@ -155,7 +155,7 @@ bool all_integer(strings_column_view const& strings);
  * Any null row results in a null entry for that row in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column of boolean results for each string.
  */
 std::unique_ptr<column> is_float(
diff --git a/cpp/include/cudf/strings/combine.hpp b/cpp/include/cudf/strings/combine.hpp
index b1ba63814a1..0f02b6657ed 100644
--- a/cpp/include/cudf/strings/combine.hpp
+++ b/cpp/include/cudf/strings/combine.hpp
@@ -59,7 +59,7 @@ namespace strings {
  * @param narep String that should be used in place of any null strings
  *        found in any column. Default of invalid-scalar means any null entry in any column will
  *        produces a null result for that row.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column with concatenated results.
  */
 std::unique_ptr<column> concatenate(
@@ -89,7 +89,7 @@ std::unique_ptr<column> concatenate(
  *        Default is an empty string.
  * @param narep String that should represent any null strings found.
  *        Default of invalid-scalar will ignore any null entries.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column containing one string.
  */
 std::unique_ptr<column> join_strings(
diff --git a/cpp/include/cudf/strings/contains.hpp b/cpp/include/cudf/strings/contains.hpp
index 29f93b67a0e..1a14e68af39 100644
--- a/cpp/include/cudf/strings/contains.hpp
+++ b/cpp/include/cudf/strings/contains.hpp
@@ -42,7 +42,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param pattern Regex pattern to match to each string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column of boolean results for each string.
  */
 std::unique_ptr<column> contains_re(
@@ -67,7 +67,7 @@ std::unique_ptr<column> contains_re(
  *
  * @param strings Strings instance for this operation.
  * @param pattern Regex pattern to match to each string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column of boolean results for each string.
  */
 std::unique_ptr<column> matches_re(
@@ -92,7 +92,7 @@ std::unique_ptr<column> matches_re(
  *
  * @param strings Strings instance for this operation.
  * @param pattern Regex pattern to match within each string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New INT32 column with counts for each string.
  */
 std::unique_ptr<column> count_re(
diff --git a/cpp/include/cudf/strings/convert/convert_booleans.hpp b/cpp/include/cudf/strings/convert/convert_booleans.hpp
index a224ea7ba4c..812b1e67c0e 100644
--- a/cpp/include/cudf/strings/convert/convert_booleans.hpp
+++ b/cpp/include/cudf/strings/convert/convert_booleans.hpp
@@ -34,7 +34,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param true_string String to expect for true. Non-matching strings are false.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New BOOL8 column converted from strings.
  */
 std::unique_ptr<column> to_booleans(
@@ -53,7 +53,7 @@ std::unique_ptr<column> to_booleans(
  * @param booleans Boolean column to convert.
  * @param true_string String to use for true in the output column.
  * @param false_string String to use for false in the output column.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 std::unique_ptr<column> from_booleans(
diff --git a/cpp/include/cudf/strings/convert/convert_datetime.hpp b/cpp/include/cudf/strings/convert/convert_datetime.hpp
index e2baffa7577..a6b14eaf988 100644
--- a/cpp/include/cudf/strings/convert/convert_datetime.hpp
+++ b/cpp/include/cudf/strings/convert/convert_datetime.hpp
@@ -66,7 +66,7 @@ namespace strings {
  * @param strings Strings instance for this operation.
  * @param timestamp_type The timestamp type used for creating the output column.
  * @param format String specifying the timestamp format in strings.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New datetime column.
  */
 std::unique_ptr<column> to_timestamps(
@@ -117,7 +117,7 @@ std::unique_ptr<column> to_timestamps(
  * @param timestamps Timestamp values to convert.
  * @param format The string specifying output format.
  *        Default format is "%Y-%m-%dT%H:%M:%SZ".
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column with formatted timestamps.
  */
 std::unique_ptr<column> from_timestamps(
diff --git a/cpp/include/cudf/strings/convert/convert_floats.hpp b/cpp/include/cudf/strings/convert/convert_floats.hpp
index f8418696575..becf0977752 100644
--- a/cpp/include/cudf/strings/convert/convert_floats.hpp
+++ b/cpp/include/cudf/strings/convert/convert_floats.hpp
@@ -38,7 +38,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param output_type Type of float numeric column to return.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column with floats converted from strings.
  */
 std::unique_ptr<column> to_floats(
@@ -60,7 +60,7 @@ std::unique_ptr<column> to_floats(
  * @throw cudf::logic_error if floats column is not float type.
  *
  * @param floats Numeric column to convert.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column with floats as strings.
  */
 std::unique_ptr<column> from_floats(
diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp
index 3a69bcaa709..426f4b00428 100644
--- a/cpp/include/cudf/strings/convert/convert_integers.hpp
+++ b/cpp/include/cudf/strings/convert/convert_integers.hpp
@@ -45,7 +45,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param output_type Type of integer numeric column to return.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column with integers converted from strings.
  */
 std::unique_ptr<column> to_integers(
@@ -65,7 +65,7 @@ std::unique_ptr<column> to_integers(
  * @throw cudf::logic_error if integers column is not integral type.
  *
  * @param integers Numeric column to convert.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column with integers as strings.
  */
 std::unique_ptr<column> from_integers(
@@ -92,7 +92,7 @@ std::unique_ptr<column> from_integers(
  *
  * @param strings Strings instance for this operation.
  * @param output_type Type of integer numeric column to return.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column with integers converted from strings.
  */
 std::unique_ptr<column> hex_to_integers(
diff --git a/cpp/include/cudf/strings/convert/convert_ipv4.hpp b/cpp/include/cudf/strings/convert/convert_ipv4.hpp
index a5f1b0d51e2..0e5b9b18179 100644
--- a/cpp/include/cudf/strings/convert/convert_ipv4.hpp
+++ b/cpp/include/cudf/strings/convert/convert_ipv4.hpp
@@ -46,7 +46,7 @@ namespace strings {
  * Any null entries will result in corresponding null entries in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New INT64 column converted from strings.
  */
 std::unique_ptr<column> ipv4_to_integers(
@@ -69,7 +69,7 @@ std::unique_ptr<column> ipv4_to_integers(
  * @throw cudf::logic_error if the input column is not INT64 type.
  *
  * @param integers Integer (INT64) column to convert.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 std::unique_ptr<column> integers_to_ipv4(
diff --git a/cpp/include/cudf/strings/convert/convert_urls.hpp b/cpp/include/cudf/strings/convert/convert_urls.hpp
index c6133f943cc..48915eacf3f 100644
--- a/cpp/include/cudf/strings/convert/convert_urls.hpp
+++ b/cpp/include/cudf/strings/convert/convert_urls.hpp
@@ -37,7 +37,7 @@ namespace strings {
  * Any null entries will result in corresponding null entries in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 std::unique_ptr<column> url_encode(
@@ -58,7 +58,7 @@ std::unique_ptr<column> url_encode(
  * Any null entries will result in corresponding null entries in the output column.
  *
  * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 std::unique_ptr<column> url_decode(
diff --git a/cpp/include/cudf/strings/copying.hpp b/cpp/include/cudf/strings/copying.hpp
index d128f598f82..e087fc43dda 100644
--- a/cpp/include/cudf/strings/copying.hpp
+++ b/cpp/include/cudf/strings/copying.hpp
@@ -42,7 +42,7 @@ namespace detail {
  * @param step Increment value between indices.
  *             Default step is 1.
  * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column of size (end-start)/step.
  */
 std::unique_ptr<cudf::column> slice(
diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp
index 312132806f9..52ff327aea5 100644
--- a/cpp/include/cudf/strings/detail/concatenate.hpp
+++ b/cpp/include/cudf/strings/detail/concatenate.hpp
@@ -34,7 +34,7 @@ namespace detail {
  * ```
  *
  * @param columns List of string columns to concatenate.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New column with concatenated results.
  */
diff --git a/cpp/include/cudf/strings/detail/copy_if_else.cuh b/cpp/include/cudf/strings/detail/copy_if_else.cuh
index 5ff701cf460..fca2934bf48 100644
--- a/cpp/include/cudf/strings/detail/copy_if_else.cuh
+++ b/cpp/include/cudf/strings/detail/copy_if_else.cuh
@@ -44,7 +44,7 @@ namespace detail {
  * @param rhs_begin Strings of second set of data. Used when filter_fn returns false.
  * @param filter_fn Called to determine which iterator (lhs or rhs) to retrieve an entry for a
  * specific row.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column.
  */
diff --git a/cpp/include/cudf/strings/detail/copy_range.cuh b/cpp/include/cudf/strings/detail/copy_range.cuh
index 2a3110b56df..881a8d8acc4 100644
--- a/cpp/include/cudf/strings/detail/copy_range.cuh
+++ b/cpp/include/cudf/strings/detail/copy_range.cuh
@@ -89,7 +89,7 @@ namespace detail {
  * @param target_begin The starting index of the target range (inclusive)
  * @param target_end The index of the last element in the target range
  * (exclusive)
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return std::unique_ptr<column> The result target column
  */
diff --git a/cpp/include/cudf/strings/detail/fill.hpp b/cpp/include/cudf/strings/detail/fill.hpp
index e1cb95c28bd..58add56b206 100644
--- a/cpp/include/cudf/strings/detail/fill.hpp
+++ b/cpp/include/cudf/strings/detail/fill.hpp
@@ -35,7 +35,7 @@ namespace detail {
  * @param begin First row index to include the new string.
  * @param end Last row index (exclusive).
  * @param value String to use when filling the range.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column.
  */
diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh
index fdbd79750d7..f811eccf365 100644
--- a/cpp/include/cudf/strings/detail/gather.cuh
+++ b/cpp/include/cudf/strings/detail/gather.cuh
@@ -45,7 +45,7 @@ namespace detail {
  * @param strings Strings instance for this operation.
  * @param begin Start of index iterator.
  * @param end End of index iterator.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column containing the gathered strings.
  */
@@ -122,7 +122,7 @@ std::unique_ptr<cudf::column> gather(
  * @param begin Start of index iterator.
  * @param end End of index iterator.
  * @param nullify_out_of_bounds If true, indices outside the column's range are nullified.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column containing the gathered strings.
  */
diff --git a/cpp/include/cudf/strings/detail/merge.cuh b/cpp/include/cudf/strings/detail/merge.cuh
index f7c247b35a7..a9ae4c0a86d 100644
--- a/cpp/include/cudf/strings/detail/merge.cuh
+++ b/cpp/include/cudf/strings/detail/merge.cuh
@@ -37,7 +37,7 @@ namespace detail {
  * @param lhs First column.
  * @param rhs Second column.
  * @param row_order Indexes for each column.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column.
  */
diff --git a/cpp/include/cudf/strings/detail/modify_strings.cuh b/cpp/include/cudf/strings/detail/modify_strings.cuh
index 97a59b89afc..e859b324ed7 100644
--- a/cpp/include/cudf/strings/detail/modify_strings.cuh
+++ b/cpp/include/cudf/strings/detail/modify_strings.cuh
@@ -41,7 +41,7 @@ namespace detail {
  *
  * @param strings Number Column of strings to apply the modifications on;
  * it is not modified in place; rather a new column is returned instead
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * (cannot be a default argument because of the variadic pack);
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * (cannot be a default argument because of the variadic pack);
diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh
index f7806921fb4..6ced8428188 100644
--- a/cpp/include/cudf/strings/detail/scatter.cuh
+++ b/cpp/include/cudf/strings/detail/scatter.cuh
@@ -44,7 +44,7 @@ namespace detail {
  * @param scatter_map Iterator of indices into the output column.
  * @param target The set of columns into which values from the source column
  *        are to be scattered.
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column.
  */
diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh
index 9a24a33c0f8..3bb4fea3ac4 100644
--- a/cpp/include/cudf/strings/detail/utilities.cuh
+++ b/cpp/include/cudf/strings/detail/utilities.cuh
@@ -33,7 +33,7 @@ namespace detail {
  * @tparam Iterator Used as input to scan to set the offset values.
  * @param begin The beginning of the input sequence
  * @param end The end of the input sequence
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return offsets child column for strings column
  */
diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp
index 04680e59753..80fa605ea68 100644
--- a/cpp/include/cudf/strings/detail/utilities.hpp
+++ b/cpp/include/cudf/strings/detail/utilities.hpp
@@ -30,7 +30,7 @@ namespace detail {
  * @param strings_count Number of strings in the column.
  * @param null_count Number of null string entries in the column.
  * @param bytes Number of bytes for the chars column.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return The chars child column for a strings column.
  */
@@ -44,7 +44,7 @@ std::unique_ptr<column> create_chars_child_column(
 /**
  * @brief Create a strings column with no strings.
  *
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Empty strings column
  */
@@ -65,7 +65,7 @@ rmm::device_vector<string_view> create_string_vector_from_column(cudf::strings_c
  * @brief Creates an offsets column from a string_view vector.
  *
  * @param strings Strings column
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Child offsets column
  */
@@ -80,7 +80,7 @@ std::unique_ptr<cudf::column> child_offsets_from_string_vector(
  * @param strings Strings vector
  * @param d_offsets Offsets vector for placing strings into column's memory.
  * @param null_count Number of null strings.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Child chars column
  */
diff --git a/cpp/include/cudf/strings/find.hpp b/cpp/include/cudf/strings/find.hpp
index 53b6c0a2f8f..b95ed81b71e 100644
--- a/cpp/include/cudf/strings/find.hpp
+++ b/cpp/include/cudf/strings/find.hpp
@@ -45,7 +45,7 @@ namespace strings {
  * @param start First character position to include in the search.
  * @param stop Last position (exclusive) to include in the search.
  *             Default of -1 will search to the end of the string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New integer column with character position values.
  */
 std::unique_ptr<column> find(strings_column_view const& strings,
@@ -73,7 +73,7 @@ std::unique_ptr<column> find(strings_column_view const& strings,
  * @param start First position to include in the search.
  * @param stop Last position (exclusive) to include in the search.
  *             Default of -1 will search starting at the end of the string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New integer column with character position values.
  */
 std::unique_ptr<column> rfind(
@@ -93,7 +93,7 @@ std::unique_ptr<column> rfind(
  *
  * @param strings Strings instance for this operation.
  * @param target UTF-8 encoded string to search for in each string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New BOOL8 column.
  */
 std::unique_ptr<column> contains(
@@ -113,7 +113,7 @@ std::unique_ptr<column> contains(
  *
  * @param strings Strings instance for this operation.
  * @param target UTF-8 encoded string to search for in each string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New BOOL8 column.
  */
 std::unique_ptr<column> starts_with(
@@ -133,7 +133,7 @@ std::unique_ptr<column> starts_with(
  *
  * @param strings Strings instance for this operation.
  * @param target UTF-8 encoded string to search for in each string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New BOOL8 column.
  */
 std::unique_ptr<column> ends_with(
diff --git a/cpp/include/cudf/strings/find_multiple.hpp b/cpp/include/cudf/strings/find_multiple.hpp
index 6a61f4b103d..bc0100abd4a 100644
--- a/cpp/include/cudf/strings/find_multiple.hpp
+++ b/cpp/include/cudf/strings/find_multiple.hpp
@@ -45,7 +45,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param targets Strings to search for in each string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New integer column with character position values.
  */
 std::unique_ptr<column> find_multiple(
diff --git a/cpp/include/cudf/strings/padding.hpp b/cpp/include/cudf/strings/padding.hpp
index 0cbc249919c..0ac5bb388c6 100644
--- a/cpp/include/cudf/strings/padding.hpp
+++ b/cpp/include/cudf/strings/padding.hpp
@@ -57,7 +57,7 @@ enum class pad_side {
  *        Default is pad right (left justify).
  * @param fill_char Single UTF-8 character to use for padding.
  *        Default is the space character.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column with padded strings.
  */
 std::unique_ptr<column> pad(strings_column_view const& strings,
@@ -85,7 +85,7 @@ std::unique_ptr<column> pad(strings_column_view const& strings,
  *
  * @param strings Strings instance for this operation.
  * @param width The minimum number of characters for each string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column of strings.
  */
 std::unique_ptr<column> zfill(
diff --git a/cpp/include/cudf/strings/replace.hpp b/cpp/include/cudf/strings/replace.hpp
index 8d85fa88d85..2a57b99b0fe 100644
--- a/cpp/include/cudf/strings/replace.hpp
+++ b/cpp/include/cudf/strings/replace.hpp
@@ -56,7 +56,7 @@ namespace strings {
  * @param repl Replacement string if target is found.
  * @param maxrepl Maximum times to replace if target appears multiple times in the input string.
  *        Default of -1 specifies replace all occurrences of target in each string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 std::unique_ptr<column> replace(
@@ -96,7 +96,7 @@ std::unique_ptr<column> replace(
  *        Default is 0, first character position.
  * @param stop End position (exclusive) to use for replacement.
  *        Default of -1 specifies the end of each string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 std::unique_ptr<column> replace_slice(
@@ -141,7 +141,7 @@ std::unique_ptr<column> replace_slice(
  * @param strings Strings column for this operation.
  * @param targets Strings to search for in each string.
  * @param repls Corresponding replacement strings for target strings.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 std::unique_ptr<column> replace(
@@ -164,7 +164,7 @@ std::unique_ptr<column> replace(
  *
  * @param strings Strings column for this operation.
  * @param repl Replacement string for null entries. Default is empty string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 std::unique_ptr<column> replace_nulls(
diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp
index 0ab1be68eeb..1d409031a7e 100644
--- a/cpp/include/cudf/strings/replace_re.hpp
+++ b/cpp/include/cudf/strings/replace_re.hpp
@@ -39,7 +39,7 @@ namespace strings {
  * @param repl The string used to replace the matched sequence in each string.
  *        Default is an empty string.
  * @param maxrepl The maximum number of times to replace the matched pattern within each string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 std::unique_ptr<column> replace_re(
@@ -60,7 +60,7 @@ std::unique_ptr<column> replace_re(
  * @param strings Strings instance for this operation.
  * @param patterns The regular expression patterns to search within each string.
  * @param repls The strings used for replacement.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 std::unique_ptr<column> replace_re(
@@ -80,7 +80,7 @@ std::unique_ptr<column> replace_re(
  * @param strings Strings instance for this operation.
  * @param pattern The regular expression patterns to search within each string.
  * @param repl The replacement template for creating the output string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 std::unique_ptr<column> replace_with_backrefs(
diff --git a/cpp/include/cudf/strings/sorting.hpp b/cpp/include/cudf/strings/sorting.hpp
index 7116d9aabb3..fd414820e04 100644
--- a/cpp/include/cudf/strings/sorting.hpp
+++ b/cpp/include/cudf/strings/sorting.hpp
@@ -40,7 +40,7 @@ enum sort_type {
  * @param order Sort strings in ascending or descending order.
  * @param null_order Sort nulls to the beginning or the end of the new column.
  * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column with sorted elements of this instance.
  */
 std::unique_ptr<cudf::column> sort(
diff --git a/cpp/include/cudf/strings/strip.hpp b/cpp/include/cudf/strings/strip.hpp
index 51b6cf51d9e..0f70c9d6fb6 100644
--- a/cpp/include/cudf/strings/strip.hpp
+++ b/cpp/include/cudf/strings/strip.hpp
@@ -62,7 +62,7 @@ enum class strip_type {
  * string. Default is both.
  * @param to_strip UTF-8 encoded characters to strip from each string.
  *        Default is empty string which indicates strip whitespace characters.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 std::unique_ptr<column> strip(
diff --git a/cpp/include/cudf/strings/substring.hpp b/cpp/include/cudf/strings/substring.hpp
index 5fdae1b58a3..04387f047a8 100644
--- a/cpp/include/cudf/strings/substring.hpp
+++ b/cpp/include/cudf/strings/substring.hpp
@@ -51,7 +51,7 @@ namespace strings {
  * @param start First character position to begin the substring.
  * @param stop Last character position (exclusive) to end the substring.
  * @param step Distance between input characters retrieved.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column with sorted elements of this instance.
  */
 std::unique_ptr<column> slice_strings(
@@ -95,7 +95,7 @@ std::unique_ptr<column> slice_strings(
  * @param strings Strings column for this operation.
  * @param starts First character positions to begin the substring.
  * @param stops Last character (exclusive) positions to end the substring.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column with sorted elements of this instance.
  */
 std::unique_ptr<column> slice_strings(
diff --git a/cpp/include/cudf/strings/translate.hpp b/cpp/include/cudf/strings/translate.hpp
index e16e11d0afb..cd58f930b0e 100644
--- a/cpp/include/cudf/strings/translate.hpp
+++ b/cpp/include/cudf/strings/translate.hpp
@@ -42,7 +42,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param chars_table Table of UTF-8 character mappings.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column with padded strings.
  */
 std::unique_ptr<column> translate(
diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp
index 4b6c63a7080..abef24089a6 100644
--- a/cpp/include/cudf/transform.hpp
+++ b/cpp/include/cudf/transform.hpp
@@ -41,7 +41,7 @@ namespace experimental {
  * @param unary_udf     The PTX/CUDA string of the unary function to apply
  * @param outout_type   The output type that is compatible with the output type in the UDF
  * @param is_ptx        true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code
- * @param mr            Device memory resource used to allocate the returned column
+ * @param mr            Device memory resource used to allocate the returned column's device memory
  * @return              The column resulting from applying the unary function to
  *                      every element of the input
  **/
diff --git a/cpp/include/cudf/unary.hpp b/cpp/include/cudf/unary.hpp
index d2a9c13dad2..20bcc554b94 100644
--- a/cpp/include/cudf/unary.hpp
+++ b/cpp/include/cudf/unary.hpp
@@ -56,7 +56,7 @@ enum class unary_op : int32_t {
  *
  * @param input A `column_view` as input
  * @param op operation to perform
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  *
  * @returns Column of same size as `input` containing result of the operation
  */
@@ -70,7 +70,7 @@ std::unique_ptr<cudf::column> unary_operation(
  * indicates the value is null and `false` indicates the value is valid.
  *
  * @param input A `column_view` as input
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  *
  * @returns A non-nullable column of `BOOL8` elements with `true`
  * representing `null` values.
@@ -84,7 +84,7 @@ std::unique_ptr<cudf::column> is_null(
  * indicates the value is valid and `false` indicates the value is null.
  *
  * @param input A `column_view` as input
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  *
  * @returns A non-nullable column of `BOOL8` elements with `false`
  * representing `null` values.
@@ -99,7 +99,7 @@ std::unique_ptr<cudf::column> is_valid(
  *
  * @param column_view Input column
  * @param out_type Desired datatype of output column
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  *
  * @returns Column of same size as `input` containing result of the cast operation
  * @throw cudf::logic_error if `out_type` is not a fixed-width type
@@ -116,7 +116,7 @@ std::unique_ptr<column> cast(column_view const& input,
  * @throws cudf::logic_error if `input` is a non-floating point type
  *
  * @param input A column of floating-point elements
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  *
  * @returns A non-nullable column of `BOOL8` elements with `true` representing `NAN` values
  */
@@ -132,7 +132,7 @@ std::unique_ptr<column> is_nan(
  * @throws cudf::logic_error if `input` is a non-floating point type
  *
  * @param input A column of floating-point elements
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  *
  * @returns A non-nullable column of `BOOL8` elements with `false` representing `NAN` values
  */
diff --git a/cpp/include/nvtext/detail/tokenize.hpp b/cpp/include/nvtext/detail/tokenize.hpp
index bf3f2ee00f1..a3051ea96fb 100644
--- a/cpp/include/nvtext/detail/tokenize.hpp
+++ b/cpp/include/nvtext/detail/tokenize.hpp
@@ -28,7 +28,7 @@ namespace detail {
  * @param strings Strings column tokenize.
  * @param delimiter UTF-8 characters used to separate each string into tokens.
  *                  The default of empty string will separate tokens using whitespace.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings columns of tokens.
  */
@@ -44,7 +44,7 @@ std::unique_ptr<cudf::column> tokenize(
  *
  * @param strings Strings column to tokenize.
  * @param delimiters Strings used to separate individual strings into tokens.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings columns of tokens.
  */
@@ -61,7 +61,7 @@ std::unique_ptr<cudf::column> tokenize(
  * @param strings Strings column to use for this operation.
  * @param delimiter Strings used to separate each string into tokens.
  *                  The default of empty string will separate tokens using whitespace.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New INT32 column of token counts.
  */
@@ -77,7 +77,7 @@ std::unique_ptr<cudf::column> count_tokens(
  *
  * @param strings Strings column to use for this operation.
  * @param delimiters Strings used to separate each string into tokens.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New INT32 column of token counts.
  */
diff --git a/cpp/include/nvtext/generate_ngrams.hpp b/cpp/include/nvtext/generate_ngrams.hpp
index f83b3e817bc..7fe4439fcdc 100644
--- a/cpp/include/nvtext/generate_ngrams.hpp
+++ b/cpp/include/nvtext/generate_ngrams.hpp
@@ -47,7 +47,7 @@ namespace nvtext {
  *               Default is 2 = bigram.
  * @param separator The string to use for separating ngram tokens.
  *                  Default is "_" character.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings columns of tokens.
  */
 std::unique_ptr<cudf::column> generate_ngrams(
diff --git a/cpp/include/nvtext/ngrams_tokenize.hpp b/cpp/include/nvtext/ngrams_tokenize.hpp
index 6fd1a5fba6b..8b1646d87e0 100644
--- a/cpp/include/nvtext/ngrams_tokenize.hpp
+++ b/cpp/include/nvtext/ngrams_tokenize.hpp
@@ -72,7 +72,7 @@ namespace nvtext {
  *                  The default of empty string will separate tokens using whitespace.
  * @param separator The string to use for separating ngram tokens.
  *                  Default is "_" character.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings columns of tokens.
  */
 std::unique_ptr<cudf::column> ngrams_tokenize(
diff --git a/cpp/include/nvtext/normalize.hpp b/cpp/include/nvtext/normalize.hpp
index e92718876aa..329a0073985 100644
--- a/cpp/include/nvtext/normalize.hpp
+++ b/cpp/include/nvtext/normalize.hpp
@@ -44,7 +44,7 @@ namespace nvtext {
  * for row `i` in the output column.
  *
  * @param strings Strings column to normalize.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings columns of normalized strings.
  */
 std::unique_ptr<cudf::column> normalize_spaces(
diff --git a/cpp/include/nvtext/tokenize.hpp b/cpp/include/nvtext/tokenize.hpp
index 908d04af1d1..50227c4704e 100644
--- a/cpp/include/nvtext/tokenize.hpp
+++ b/cpp/include/nvtext/tokenize.hpp
@@ -51,7 +51,7 @@ namespace nvtext {
  * @param strings Strings column tokenize.
  * @param delimiter UTF-8 characters used to separate each string into tokens.
  *                  The default of empty string will separate tokens using whitespace.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings columns of tokens.
  */
 std::unique_ptr<cudf::column> tokenize(
@@ -85,7 +85,7 @@ std::unique_ptr<cudf::column> tokenize(
  *
  * @param strings Strings column to tokenize.
  * @param delimiters Strings used to separate individual strings into tokens.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings columns of tokens.
  */
 std::unique_ptr<cudf::column> tokenize(
@@ -114,7 +114,7 @@ std::unique_ptr<cudf::column> tokenize(
  * @param strings Strings column to use for this operation.
  * @param delimiter Strings used to separate each string into tokens.
  *                  The default of empty string will separate tokens using whitespace.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New INT32 column of token counts.
  */
 std::unique_ptr<cudf::column> count_tokens(
@@ -144,7 +144,7 @@ std::unique_ptr<cudf::column> count_tokens(
  *
  * @param strings Strings column to use for this operation.
  * @param delimiters Strings used to separate each string into tokens.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New INT32 column of token counts.
  */
 std::unique_ptr<cudf::column> count_tokens(
diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp
index 63560a97ea1..5f76856aafd 100644
--- a/cpp/src/binaryop/compiled/binary_ops.hpp
+++ b/cpp/src/binaryop/compiled/binary_ops.hpp
@@ -58,7 +58,7 @@ namespace compiled {
  * @param lhs         The left operand string scalar
  * @param rhs         The right operand string column
  * @param output_type The desired data type of the output column
- * @param mr          Device memory resource used to allocate the returned column
+ * @param mr          Device memory resource used to allocate the returned column's device memory
  * @param stream      CUDA stream used for device memory operations and kernel launches.
  * @return std::unique_ptr<column> Output column
  */
@@ -84,7 +84,7 @@ std::unique_ptr<column> binary_operation(
  * @param lhs         The left operand string column
  * @param rhs         The right operand string scalar
  * @param output_type The desired data type of the output column
- * @param mr          Device memory resource used to allocate the returned column
+ * @param mr          Device memory resource used to allocate the returned column's device memory
  * @param stream      CUDA stream used for device memory operations and kernel launches.
  * @return std::unique_ptr<column> Output column
  */
@@ -110,7 +110,7 @@ std::unique_ptr<column> binary_operation(
  * @param lhs         The left operand string column
  * @param rhs         The right operand string column
  * @param output_type The desired data type of the output column
- * @param mr          Device memory resource used to allocate the returned column
+ * @param mr          Device memory resource used to allocate the returned column's device memory
  * @param stream      CUDA stream used for device memory operations and kernel launches.
  * @return std::unique_ptr<column> Output column
  */
diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu
index 0001155868f..b60daf814a8 100644
--- a/cpp/src/dictionary/remove_keys.cu
+++ b/cpp/src/dictionary/remove_keys.cu
@@ -43,7 +43,7 @@ namespace {
  *                    and returns true if that key is to be used in the output dictionary.
  * @param dictionary_column The column to use for creating the new dictionary.
  * @param keys_to_keep_fn Called to determine which keys in `dictionary_column` to keep.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 template <typename KeysKeeper>
diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp
index 195b97dcf01..90f2d8d5f69 100644
--- a/cpp/src/groupby/sort/group_reductions.hpp
+++ b/cpp/src/groupby/sort/group_reductions.hpp
@@ -33,7 +33,7 @@ namespace detail {
  * @param values Grouped values to get sum of
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_sum(column_view const& values,
@@ -48,7 +48,7 @@ std::unique_ptr<column> group_sum(column_view const& values,
  * @param values Grouped values to get minimum from
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_min(column_view const& values,
@@ -63,7 +63,7 @@ std::unique_ptr<column> group_min(column_view const& values,
  * @param values Grouped values to get maximum from
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_max(column_view const& values,
@@ -79,7 +79,7 @@ std::unique_ptr<column> group_max(column_view const& values,
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
  * @param key_sort_order Indices indicating sort order of groupby keys
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_argmax(column_view const& values,
@@ -96,7 +96,7 @@ std::unique_ptr<column> group_argmax(column_view const& values,
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
  * @param key_sort_order Indices indicating sort order of groupby keys
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_argmin(column_view const& values,
@@ -113,7 +113,7 @@ std::unique_ptr<column> group_argmin(column_view const& values,
  * @param values Grouped values to get valid count of
  * @param group_labels ID of group that the corresponding value belongs to
  * @param num_groups Number of groups ( unique values in @p group_labels )
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_count_valid(column_view const& values,
@@ -127,7 +127,7 @@ std::unique_ptr<column> group_count_valid(column_view const& values,
  *
  * @param group_offsets Offsets of groups' starting points within @p values
  * @param num_groups Number of groups ( unique values in @p group_labels )
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_count_all(rmm::device_vector<size_type> const& group_offsets,
@@ -144,7 +144,7 @@ std::unique_ptr<column> group_count_all(rmm::device_vector<size_type> const& gro
  * @param group_labels ID of group corresponding value in @p values belongs to
  * @param ddof Delta degrees of freedom. The divisor used in calculation of
  *             `var` is `N - ddof`, where `N` is the group size.
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_var(column_view const& values,
@@ -163,7 +163,7 @@ std::unique_ptr<column> group_var(column_view const& values,
  * @param group_offsets Offsets of groups' starting points within @p values
  * @param quantiles List of quantiles q where q lies in [0,1]
  * @param interp Method to use when desired value lies between data points
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_quantiles(column_view const& values,
@@ -186,7 +186,7 @@ std::unique_ptr<column> group_quantiles(column_view const& values,
  * @param null_handling Exclude nulls while counting if null_policy::EXCLUDE,
  *  Include nulls if null_policy::INCLUDE.
  *  Nulls are treated equal.
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_nunique(column_view const& values,
@@ -208,7 +208,7 @@ std::unique_ptr<column> group_nunique(column_view const& values,
  * @param n nth element to choose from each group of @p values
  * @param null_handling Exclude nulls while counting if null_policy::EXCLUDE,
  *  Include nulls if null_policy::INCLUDE.
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> group_nth_element(column_view const& values,
diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp
index b0a9ca38fd3..4732e6c96bf 100644
--- a/cpp/src/io/utilities/column_buffer.hpp
+++ b/cpp/src/io/utilities/column_buffer.hpp
@@ -108,7 +108,7 @@ namespace {
  * @param size List of page information
  * @param size List of page information
  * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  *
  * @return `std::unique_ptr<cudf::column>` Column from the existing device data
  */
diff --git a/cpp/src/reductions/scan.cu b/cpp/src/reductions/scan.cu
index 72b9998921c..498dbd29cf2 100644
--- a/cpp/src/reductions/scan.cu
+++ b/cpp/src/reductions/scan.cu
@@ -184,7 +184,7 @@ struct ScanDispatcher {
    *
    * @param input     input column view
    * @param inclusive inclusive or exclusive scan
-   * @param mr Device memory resource used to allocate the returned column
+   * @param mr Device memory resource used to allocate the returned column's device memory
    * @param stream CUDA stream used for device memory operations and kernel launches.
    * @return
    *
diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu
index 5f4dab419c3..5efefd8dc50 100644
--- a/cpp/src/strings/attributes.cu
+++ b/cpp/src/strings/attributes.cu
@@ -41,7 +41,7 @@ namespace {
  * @param strings Strings instance for this operation.
  * @param ufn Function returns an integer for each string.
  * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @return New INT32 column with lengths for each string.
  */
 template <typename UnaryFunction>
diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu
index e9006f8ac0d..83cf8e04e48 100644
--- a/cpp/src/strings/case.cu
+++ b/cpp/src/strings/case.cu
@@ -124,7 +124,7 @@ struct upper_lower_fn {
  *
  * @param strings Strings to convert.
  * @param case_flag The character type to convert (upper, lower, or both)
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New strings column with characters converted.
  */
diff --git a/cpp/src/strings/find.cu b/cpp/src/strings/find.cu
index 4365f986334..90ed239bda7 100644
--- a/cpp/src/strings/find.cu
+++ b/cpp/src/strings/find.cu
@@ -43,7 +43,7 @@ namespace {
  * @param start First character position to start the search.
  * @param stop Last character position (exclusive) to end the search.
  * @param pfn Functor used for locating `target` in each string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New integer column with character position values.
  */
@@ -166,7 +166,7 @@ namespace {
  * @param strings Column of strings to check for target.
  * @param target UTF-8 encoded string to check in strings column.
  * @param pfn Returns bool value if target is found in the given string.
- * @param mr Device memory resource used to allocate the returned column.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return New BOOL column.
  */
diff --git a/cpp/src/strings/utilities.cuh b/cpp/src/strings/utilities.cuh
index 541f96c57b8..a2b5bf3ee7d 100644
--- a/cpp/src/strings/utilities.cuh
+++ b/cpp/src/strings/utilities.cuh
@@ -67,7 +67,7 @@ __device__ inline char* copy_string(char* buffer, const string_view& d_string)
  * chars memory.
  * @param strings_count Number of strings.
  * @param null_count Number of nulls in the strings column.
- * @param mr Device memory resource used to allocate the returned columns.
+ * @param mr Device memory resource used to allocate the returned columns' device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return offsets child column and chars child column for a strings column
  */

From 2acf6602b4d4ed4bca5778a102d640f5976dd4e2 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Thu, 21 May 2020 15:21:55 +0530
Subject: [PATCH 63/90] add device memory to table param mr doc

---
 cpp/include/cudf/concatenate.hpp             | 2 +-
 cpp/include/cudf/copying.hpp                 | 4 ++--
 cpp/include/cudf/detail/scatter.hpp          | 4 ++--
 cpp/include/cudf/dlpack.hpp                  | 2 +-
 cpp/include/cudf/filling.hpp                 | 4 ++--
 cpp/include/cudf/groupby.hpp                 | 5 +++--
 cpp/include/cudf/join.hpp                    | 6 +++---
 cpp/include/cudf/partitioning.hpp            | 4 ++--
 cpp/include/cudf/strings/extract.hpp         | 2 +-
 cpp/include/cudf/strings/findall.hpp         | 2 +-
 cpp/include/cudf/strings/split/partition.hpp | 4 ++--
 cpp/include/cudf/strings/split/split.hpp     | 4 ++--
 cpp/src/join/join.cu                         | 2 +-
 13 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/cpp/include/cudf/concatenate.hpp b/cpp/include/cudf/concatenate.hpp
index 34926e1a1d0..82595344a40 100644
--- a/cpp/include/cudf/concatenate.hpp
+++ b/cpp/include/cudf/concatenate.hpp
@@ -82,7 +82,7 @@ std::unique_ptr<column> concatenate(
  *
  * @param tables_to_concat The table views to be concatenated into a single
  * table
- * @param mr Device memory resource used to allocate the returned table.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
  * @return Unique pointer to a single table having all the rows from the
  * elements of `tables_to_concat` respectively in the same order.
  */
diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index e87b7c7217e..59bd8f4c583 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -100,7 +100,7 @@ std::unique_ptr<table> gather(
  * are to be scattered
  * @param check_bounds Optionally perform bounds checking on the values of
  * `scatter_map` and throw an error if any of its values are out of bounds.
- * @param mr Device memory resource used to allocate the returned table.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
  * @return Result of scattering values from source to target
  */
 std::unique_ptr<table> scatter(
@@ -140,7 +140,7 @@ std::unique_ptr<table> scatter(
  * are to be scattered
  * @param check_bounds Optionally perform bounds checking on the values of
  * `scatter_map` and throw an error if any of its values are out of bounds.
- * @param mr Device memory resource used to allocate the returned table.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
  * @return Result of scattering values from source to target
  */
 std::unique_ptr<table> scatter(
diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp
index a2a50ad1028..fd2c1327e51 100644
--- a/cpp/include/cudf/detail/scatter.hpp
+++ b/cpp/include/cudf/detail/scatter.hpp
@@ -56,7 +56,7 @@ namespace detail {
  * are to be scattered
  * @param check_bounds Optionally perform bounds checking on the values of
  * `scatter_map` and throw an error if any of its values are out of bounds.
- * @param mr Device memory resource used to allocate the returned table
+ * @param mr Device memory resource used to allocate the returned table's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Result of scattering values from source to target
  **/
@@ -96,7 +96,7 @@ std::unique_ptr<table> scatter(
  * are to be scattered
  * @param check_bounds Optionally perform bounds checking on the values of
  * `scatter_map` and throw an error if any of its values are out of bounds.
- * @param mr Device memory resource used to allocate the returned table
+ * @param mr Device memory resource used to allocate the returned table's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Result of scattering values from source to target
  **/
diff --git a/cpp/include/cudf/dlpack.hpp b/cpp/include/cudf/dlpack.hpp
index c66498e93f2..81fc57633a5 100644
--- a/cpp/include/cudf/dlpack.hpp
+++ b/cpp/include/cudf/dlpack.hpp
@@ -39,7 +39,7 @@ namespace cudf {
  * @throw cudf::logic_error if the any of the DLTensor fields are unsupported
  *
  * @param managed_tensor a 1D or 2D column-major (Fortran order) tensor
- * @param mr Device memory resource used to allocate the returned table.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
  *
  * @return Table with a copy of the tensor data
  */
diff --git a/cpp/include/cudf/filling.hpp b/cpp/include/cudf/filling.hpp
index 9c9bff782a2..888dcf152a2 100644
--- a/cpp/include/cudf/filling.hpp
+++ b/cpp/include/cudf/filling.hpp
@@ -113,7 +113,7 @@ std::unique_ptr<column> fill(column_view const& input,
  * @param input_table Input table
  * @param count Non-nullable column of an integral type
  * @param check_count Whether to check count (negative values and overflow)
- * @param mr Device memory resource used to allocate the returned table
+ * @param mr Device memory resource used to allocate the returned table's device memory
  * @return The result table containing the repetitions
  */
 std::unique_ptr<table> repeat(
@@ -139,7 +139,7 @@ std::unique_ptr<table> repeat(
  *
  * @param input_table Input table
  * @param count Non-null scalar of an integral type
- * @param mr Device memory resource used to allocate the returned table.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
  * @return The result table containing the repetitions
  */
 std::unique_ptr<table> repeat(
diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp
index 9d44a1e346a..64892dbb565 100644
--- a/cpp/include/cudf/groupby.hpp
+++ b/cpp/include/cudf/groupby.hpp
@@ -156,7 +156,7 @@ class groupby {
    *
    * @param requests The set of columns to aggregate and the aggregations to
    * perform
-   * @param mr Device memory resource used to allocate the returned table and columns
+   * @param mr Device memory resource used to allocate the returned table and columns' device memory
    * @return Pair containing the table with each group's unique key and
    * a vector of aggregation_results for each request in the same order as
    * specified in `requests`.
@@ -187,7 +187,8 @@ class groupby {
    * and the `values` of the `groups` object will be `nullptr`.
    *
    * @param values Table representing values on which a groupby operation is to be performed
-   * @param mr Device memory resource used to allocate the returned tables in the returned groups
+   * @param mr Device memory resource used to allocate the returned tables's device memory in the
+   * returned groups
    * @return A `groups` object representing grouped keys and values
    */
   groups get_groups(cudf::table_view values             = {},
diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 53b16a87291..f1d87e84a60 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -77,7 +77,7 @@ namespace experimental {
  * from `left_on` columns. Else, for every column in `left_on` and `right_on`,
  * an output column will be produced.  For each of these pairs (L, R), L
  * should exist in `left_on` and R should exist in `right_on`.
- * @param mr Device memory resource used to allocate the returned table and columns
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
  * @returns Result of joining `left` and `right` tables on the columns
  * specified by `left_on` and `right_on`. The resulting table will be joined columns of
@@ -142,7 +142,7 @@ std::unique_ptr<cudf::experimental::table> inner_join(
  * from `left_on` columns. Else, for every column in `left_on` and `right_on`,
  * an output column will be produced.  For each of these pairs (L, R), L
  * should exist in `left_on` and R should exist in `right_on`.
- * @param mr Device memory resource used to allocate the returned table and columns
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
  * @returns Result of joining `left` and `right` tables on the columns
  * specified by `left_on` and `right_on`. The resulting table will be joined columns of
@@ -207,7 +207,7 @@ std::unique_ptr<cudf::experimental::table> left_join(
  * from `left_on` columns. Else, for every column in `left_on` and `right_on`,
  * an output column will be produced.  For each of these pairs (L, R), L
  * should exist in `left_on` and R should exist in `right_on`.
- * @param mr Device memory resource used to allocate the returned table and columns
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
  * @returns Result of joining `left` and `right` tables on the columns
  * specified by `left_on` and `right_on`. The resulting table will be joined columns of
diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp
index 53e5c511ffb..2885f752684 100644
--- a/cpp/include/cudf/partitioning.hpp
+++ b/cpp/include/cudf/partitioning.hpp
@@ -55,7 +55,7 @@ namespace experimental {
  * @param partition_map Non-nullable column of integer values that map each row
  * in `t` to it's partition.
  * @param num_partitions The total number of partitions.
- * @param mr Device memory resource used to allocate the returned table.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
  * @return Pair containing the reordered table and vector of `num_partitions +
  * 1` offsets to each partition such that the size of partition `i` is
  * determined by `offset[i+1] - offset[i]`.
@@ -79,7 +79,7 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> partition(
  * @param input The table to partition
  * @param columns_to_hash Indices of input columns to hash
  * @param num_partitions The number of partitions to use
- * @param mr Device memory resource used to allocate the returned table.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
  *
  * @returns An output table and a vector of row offsets to each partition
  */
diff --git a/cpp/include/cudf/strings/extract.hpp b/cpp/include/cudf/strings/extract.hpp
index fbde5ac8ae7..b71ef8614f2 100644
--- a/cpp/include/cudf/strings/extract.hpp
+++ b/cpp/include/cudf/strings/extract.hpp
@@ -46,7 +46,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param pattern The regular expression pattern with group indicators.
- * @param mr Device memory resource used to allocate the returned table.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
  * @return Columns of strings extracted from the input column.
  */
 std::unique_ptr<experimental::table> extract(
diff --git a/cpp/include/cudf/strings/findall.hpp b/cpp/include/cudf/strings/findall.hpp
index a2ca92b932c..3ad6ef0659b 100644
--- a/cpp/include/cudf/strings/findall.hpp
+++ b/cpp/include/cudf/strings/findall.hpp
@@ -48,7 +48,7 @@ namespace strings {
  *
  * @param strings Strings instance for this operation.
  * @param pattern Regex pattern to match within each string.
- * @param mr Device memory resource used to allocate the returned table.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
  * @return New table of strings columns.
  */
 std::unique_ptr<experimental::table> findall_re(
diff --git a/cpp/include/cudf/strings/split/partition.hpp b/cpp/include/cudf/strings/split/partition.hpp
index 943e75c1f9d..e54fe33e8db 100644
--- a/cpp/include/cudf/strings/split/partition.hpp
+++ b/cpp/include/cudf/strings/split/partition.hpp
@@ -50,7 +50,7 @@ namespace strings {
  * @param strings Strings instance for this operation.
  * @param delimiter UTF-8 encoded string indicating where to split each string.
  *        Default of empty string indicates split on whitespace.
- * @param mr Device memory resource used to allocate the returned table.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
  * @return New table of strings columns.
  */
 std::unique_ptr<experimental::table> partition(
@@ -82,7 +82,7 @@ std::unique_ptr<experimental::table> partition(
  * @param strings Strings instance for this operation.
  * @param delimiter UTF-8 encoded string indicating where to split each string.
  *        Default of empty string indicates split on whitespace.
- * @param mr Device memory resource used to allocate the returned table.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
  * @return New strings columns.
  */
 std::unique_ptr<experimental::table> rpartition(
diff --git a/cpp/include/cudf/strings/split/split.hpp b/cpp/include/cudf/strings/split/split.hpp
index 480035ab493..b7ad7c219ff 100644
--- a/cpp/include/cudf/strings/split/split.hpp
+++ b/cpp/include/cudf/strings/split/split.hpp
@@ -45,7 +45,7 @@ namespace strings {
  *        Default of empty string indicates split on whitespace.
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Device memory resource used to allocate the returned table.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
  * @return New table of strings columns.
  */
 std::unique_ptr<experimental::table> split(
@@ -73,7 +73,7 @@ std::unique_ptr<experimental::table> split(
  *        Default of empty string indicates split on whitespace.
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Device memory resource used to allocate the returned table.
+ * @param mr Device memory resource used to allocate the returned table's device memory.
  * @return New strings columns.
  */
 std::unique_ptr<experimental::table> rsplit(
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 29f5f138dd3..b792cea4a7a 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -393,7 +393,7 @@ std::unique_ptr<experimental::table> construct_join_output_df(
  * full join.
  * Else, for every column in `left_on` and `right_on`, an output column will
  * be produced.
- * @param mr Device memory resource used to allocate the returned table
+ * @param mr Device memory resource used to allocate the returned table's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  *
  * @tparam join_kind The type of join to be performed

From 24f7d26ebaa9664fd471b972f43ce111c74c7284 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Thu, 21 May 2020 23:19:29 +0530
Subject: [PATCH 64/90] more doc update for param mr

---
 cpp/include/cudf/column/column.hpp            |  4 +--
 cpp/include/cudf/column/column_factories.hpp  | 33 +++++++++----------
 cpp/include/cudf/copying.hpp                  | 24 +++++++-------
 cpp/include/cudf/detail/gather.cuh            |  2 +-
 cpp/include/cudf/detail/gather.hpp            |  2 +-
 .../cudf/detail/reduction_functions.hpp       | 20 +++++------
 cpp/include/cudf/detail/scatter.cuh           |  2 +-
 cpp/include/cudf/detail/unary.hpp             |  5 ++-
 cpp/include/cudf/dlpack.hpp                   |  2 +-
 cpp/include/cudf/io/functions.hpp             |  8 ++---
 cpp/include/cudf/join.hpp                     |  6 ++--
 cpp/include/cudf/partitioning.hpp             |  2 +-
 cpp/include/cudf/reduction.hpp                |  4 +--
 cpp/include/cudf/replace.hpp                  | 10 +++---
 cpp/include/cudf/scalar/scalar.hpp            | 16 ++++-----
 cpp/include/cudf/sorting.hpp                  |  8 ++---
 cpp/include/cudf/stream_compaction.hpp        |  9 ++---
 cpp/include/cudf/strings/capitalize.hpp       |  4 +--
 cpp/include/cudf/strings/split/split.hpp      |  4 +--
 cpp/include/cudf/strings/wrap.hpp             |  2 +-
 cpp/src/join/semi_join.cu                     |  3 +-
 cpp/src/partitioning/round_robin.cu           |  2 +-
 cpp/src/reductions/compound.cuh               |  2 +-
 cpp/src/reductions/simple.cuh                 |  2 +-
 cpp/src/stream_compaction/drop_duplicates.cu  |  1 -
 25 files changed, 88 insertions(+), 89 deletions(-)

diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp
index bf552650193..71b6f4aef7f 100644
--- a/cpp/include/cudf/column/column.hpp
+++ b/cpp/include/cudf/column/column.hpp
@@ -59,7 +59,7 @@ class column {
    *
    * @param other The `column` to copy
    * @param stream CUDA stream used for device memory operations.
-   * @param mr Device memory resource to use for all allocations
+   * @param mr Device memory resource to use for all device memory allocations
    */
   column(column const& other,
          cudaStream_t stream,
@@ -114,7 +114,7 @@ class column {
    *
    * @param view The view to copy
    * @param stream CUDA stream used for device memory operations.
-   * @param mr Device memory resource to use for all allocations
+   * @param mr Device memory resource to use for all device memory allocations
    */
   explicit column(column_view view,
                   cudaStream_t stream                 = 0,
diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
index a8800e8cd55..8bc9d6bf504 100644
--- a/cpp/include/cudf/column/column_factories.hpp
+++ b/cpp/include/cudf/column/column_factories.hpp
@@ -52,8 +52,7 @@ std::unique_ptr<column> make_empty_column(data_type type);
  * column's null mask. By default, no null mask is allocated.
  * @param[in] stream Optional stream on which to issue all memory allocation and
  * device kernels
- * @param[in] mr Optional resource to use for device memory
- * allocation of the column's `data` and `null_mask`.
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  */
 std::unique_ptr<column> make_numeric_column(
   data_type type,
@@ -78,8 +77,7 @@ std::unique_ptr<column> make_numeric_column(
  * @param[in] null_count Optional number of nulls in the null_mask.
  * @param[in] stream Optional stream on which to issue all memory allocation and
  * device kernels
- * @param[in] mr Optional resource to use for device memory
- * allocation of the column's `data` and `null_mask`.
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  */
 template <typename B>
 std::unique_ptr<column> make_numeric_column(
@@ -114,8 +112,7 @@ std::unique_ptr<column> make_numeric_column(
  * column's null mask. By default, no null mask is allocated.
  * @param[in] stream Optional stream on which to issue all memory allocation and
  * device kernels
- * @param[in] mr Optional resource to use for device memory
- * allocation of the column's `data` and `null_mask`.
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  */
 std::unique_ptr<column> make_timestamp_column(
   data_type type,
@@ -140,8 +137,7 @@ std::unique_ptr<column> make_timestamp_column(
  * @param[in] null_count Optional number of nulls in the null_mask.
  * @param[in] stream Optional stream on which to issue all memory allocation and
  * device kernels
- * @param[in] mr Optional resource to use for device memory
- * allocation of the column's `data` and `null_mask`.
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  */
 template <typename B>
 std::unique_ptr<column> make_timestamp_column(
@@ -176,8 +172,7 @@ std::unique_ptr<column> make_timestamp_column(
  * column's null mask. By default, no null mask is allocated.
  * @param[in] stream Optional stream on which to issue all memory allocation and device
  * kernels
- * @param[in] mr Optional resource to use for device memory
- * allocation of the column's `data` and `null_mask`.
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  */
 std::unique_ptr<column> make_fixed_width_column(
   data_type type,
@@ -202,8 +197,7 @@ std::unique_ptr<column> make_fixed_width_column(
  * @param[in] null_count Optional number of nulls in the null_mask.
  * @param[in] stream Optional stream on which to issue all memory allocation and device
  * kernels
- * @param[in] mr Optional resource to use for device memory
- * allocation of the column's `data` and `null_mask`.
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  */
 template <typename B>
 std::unique_ptr<column> make_fixed_width_column(
@@ -240,7 +234,8 @@ std::unique_ptr<column> make_fixed_width_column(
  *                Each pointer must be a device memory address or `nullptr`
  * (indicating a null string). The size must be the number of bytes.
  * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
+ * @param mr Device memory resource used for allocation of the column's `null_mask` and children
+ * columns' device memory.
  */
 std::unique_ptr<column> make_strings_column(
   const rmm::device_vector<thrust::pair<const char*, size_type>>& strings,
@@ -270,7 +265,8 @@ std::unique_ptr<column> make_strings_column(
  * @param null_placeholder string_view indicating null string in given list of
  * string_views.
  * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
+ * @param mr Device memory resource used for allocation of the column's `null_mask` and children
+ * columns' device memory.
  */
 std::unique_ptr<column> make_strings_column(
   const rmm::device_vector<string_view>& string_views,
@@ -307,7 +303,8 @@ std::unique_ptr<column> make_strings_column(
  * `UNKNOWN_NULL_COUNT`, the null count will be computed dynamically on the
  * first invocation of `column::null_count()`
  * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
+ * @param mr Device memory resource used for allocation of the column's `null_mask` and children
+ * columns' device memory.
  */
 std::unique_ptr<column> make_strings_column(
   const rmm::device_vector<char>& strings,
@@ -346,7 +343,8 @@ std::unique_ptr<column> make_strings_column(
  * `UNKNOWN_NULL_COUNT`, the null count will be computed dynamically on the
  * first invocation of `column::null_count()`
  * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
+ * @param mr Device memory resource used for allocation of the column's `null_mask` and children
+ * columns' device memory.
  */
 std::unique_ptr<column> make_strings_column(
   const std::vector<char>& strings,
@@ -373,7 +371,8 @@ std::unique_ptr<column> make_strings_column(
  * @param null_mask The bits specifying the null strings in device memory.
  *                  Arrow format for nulls is used for interpeting this bitmask.
  * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used for allocation of the column's `null_mask` and children.
+ * @param mr Device memory resource used for allocation of the column's `null_mask` and children
+ * columns' device memory.
  */
 std::unique_ptr<column> make_strings_column(
   size_type num_strings,
diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index 59bd8f4c583..52342055f26 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -57,7 +57,7 @@ namespace experimental {
  * rows in the source columns to rows in the destination columns.
  * @param[in] check_bounds Optionally perform bounds checking on the values
  * of `gather_map` and throw an error if any of its values are out of bounds.
- * @param[in] mr The resource to use for all allocations
+ * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @return std::unique_ptr<table> Result of the gather
  */
 std::unique_ptr<table> gather(
@@ -173,8 +173,8 @@ std::unique_ptr<column> empty_like(column_view const& input);
  *
  * @param[in] input Immutable view of input column to emulate
  * @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN.
- * @param[in] mr Optional, The resource to use for all allocations
- * @return std::unique_ptr<column> A column with sufficient uninitialized capacity to hold the same
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
+ * @return A column with sufficient uninitialized capacity to hold the same
  * number of elements as `input` of the same type as `input.type()`
  */
 std::unique_ptr<column> allocate_like(
@@ -189,7 +189,7 @@ std::unique_ptr<column> allocate_like(
  * @param[in] input Immutable view of input column to emulate
  * @param[in] size The desired number of elements that the new column should have capacity for
  * @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN.
- * @param[in] mr Optional, The resource to use for all allocations
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  * @return A column with sufficient uninitialized capacity to hold the specified number of elements
  * as `input` of the same type as `input.type()`
  */
@@ -477,7 +477,7 @@ struct contiguous_split_result {
  *
  * @param input View of a table to split
  * @param splits A vector of indices where the view will be split
- * @param[in] mr Optional, The resource to use for all returned allocations
+ * @param[in] mr Device memory resource used to allocate the returned result's device memory
  * @param[in] stream Optional CUDA stream on which to execute kernels
  * @return The set of requested views of `input` indicated by the `splits` and the viewed memory
  * buffer.
@@ -502,7 +502,7 @@ std::vector<contiguous_split_result> contiguous_split(
  * @param[in] rhs right-hand column_view
  * @param[in] boolean_mask column of `BOOL8` representing "left (true) / right (false)" boolean for
  * each element. Null element represents false.
- * @param[in] mr resource for allocating device memory
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  *
  * @returns new column with the selected elements
  */
@@ -565,7 +565,7 @@ std::unique_ptr<column> shift(column_view const& input,
  * @param[in] rhs right-hand column_view
  * @param[in] boolean_mask column of `BOOL8` representing "left (true) / right (false)" boolean for
  * each element. Null element represents false.
- * @param[in] mr resource for allocating device memory
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  *
  * @returns new column with the selected elements
  */
@@ -589,7 +589,7 @@ std::unique_ptr<column> copy_if_else(
  * @param[in] rhs right-hand scalar
  * @param[in] boolean_mask column of `BOOL8` representing "left (true) / right (false)" boolean for
  * each element. Null element represents false.
- * @param[in] mr resource for allocating device memory
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  *
  * @returns new column with the selected elements
  */
@@ -611,7 +611,7 @@ std::unique_ptr<column> copy_if_else(
  * @param[in] rhs right-hand scalar
  * @param[in] boolean_mask column of `BOOL8` representing "left (true) / right (false)" boolean for
  * each element. null element represents false.
- * @param[in] mr resource for allocating device memory
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  *
  * @returns new column with the selected elements
  */
@@ -653,7 +653,7 @@ std::unique_ptr<column> copy_if_else(
  * @param[in] input table_view (set of dense columns) to scatter
  * @param[in] target table_view to modify with scattered values from `input`
  * @param[in] boolean_mask column_view which acts as boolean mask.
- * @param[in] mr Optional, The resource to use for all returned allocations
+ * @param[in] mr Device memory resource used to allocate device memory of the returned table.
  *
  * @returns Returns a table by scattering `input` into `target` as per `boolean_mask`.
  */
@@ -690,7 +690,7 @@ std::unique_ptr<table> boolean_mask_scatter(
  * @param[in] input scalars to scatter
  * @param[in] target table_view to modify with scattered values from `input`
  * @param[in] boolean_mask column_view which acts as boolean mask.
- * @param[in] mr Optional, The resource to use for all returned allocations
+ * @param[in] mr Device memory resource used to allocate device memory of the returned table.
  *
  * @returns Returns a table by scattering `input` into `target` as per `boolean_mask`.
  */
@@ -710,7 +710,7 @@ std::unique_ptr<table> boolean_mask_scatter(
  *
  * @param input Column view to get the element from
  * @param index Index into `input` to get the element at
- * @param mr Device memory resource used to allocate the returned scalar.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory.
  * @return std::unique_ptr<scalar> Scalar containing the single value
  */
 std::unique_ptr<scalar> get_element(
diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
index d3f89c0f9a8..f8be5f10771 100644
--- a/cpp/include/cudf/detail/gather.cuh
+++ b/cpp/include/cudf/detail/gather.cuh
@@ -421,7 +421,7 @@ void gather_bitmask(table_view const& source,
  * @param[in] gather_map_end End of iterator range of integer indices that map the rows in the
  * source columns to rows in the destination columns
  * @param[in] nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds.
- * @param[in] mr The resource to use for all allocations
+ * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @param[in] stream The CUDA stream on which to execute kernels
  * @return cudf::table Result of the gather
  */
diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp
index a972ebd642e..5755b707c33 100644
--- a/cpp/include/cudf/detail/gather.hpp
+++ b/cpp/include/cudf/detail/gather.hpp
@@ -35,7 +35,7 @@ namespace detail {
  * i.e., setting both to `true` is undefined.
  * @param[in] allow_negative_indices Interpret each negative index `i` in the
  * gathermap as the positive index `i+num_source_rows`.
- * @param[in] mr The resource to use for all allocations
+ * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @param[in] stream The CUDA stream on which to execute kernels
  * @return cudf::table Result of the gather
  */
diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp
index 8f61abbdd6c..32995a22665 100644
--- a/cpp/include/cudf/detail/reduction_functions.hpp
+++ b/cpp/include/cudf/detail/reduction_functions.hpp
@@ -32,7 +32,7 @@ namespace reduction {
  *
  * @param col input column to compute sum
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Sum as scalar of type `output_dtype`.
  */
@@ -49,7 +49,7 @@ std::unique_ptr<scalar> sum(column_view const& col,
  *
  * @param col input column to compute minimum.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Minimum element as scalar of type `output_dtype`.
  */
@@ -66,7 +66,7 @@ std::unique_ptr<scalar> min(column_view const& col,
  *
  * @param col input column to compute maximum.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Maximum element as scalar of type `output_dtype`.
  */
@@ -84,7 +84,7 @@ std::unique_ptr<scalar> max(column_view const& col,
  *
  * @param col input column to compute any_of.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return bool scalar if any of elements is true when typecasted to bool
  */
@@ -102,7 +102,7 @@ std::unique_ptr<scalar> any(column_view const& col,
  *
  * @param col input column to compute all_of.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return bool scalar if all of elements is true when typecasted to bool
  */
@@ -120,7 +120,7 @@ std::unique_ptr<scalar> all(column_view const& col,
  *
  * @param col input column to compute product.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Product as scalar of type `output_dtype`.
  */
@@ -140,7 +140,7 @@ std::unique_ptr<scalar> product(
  *
  * @param col input column to compute sum of squares.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Sum of squares as scalar of type `output_dtype`.
  */
@@ -160,7 +160,7 @@ std::unique_ptr<scalar> sum_of_squares(
  *
  * @param col input column to compute mean.
  * @param output_dtype data type of return type and typecast elements of input column.
- * @param mr Device memory resource used to allocate the returned scalar.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Mean as scalar of type `output_dtype`.
  */
@@ -179,7 +179,7 @@ std::unique_ptr<scalar> mean(column_view const& col,
  *
  * @param col input column to compute variance.
  * @param output_dtype data type of return type and typecast elements of input column.
- * @param mr Device memory resource used to allocate the returned scalar.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Variance as scalar of type `output_dtype`.
  */
@@ -200,7 +200,7 @@ std::unique_ptr<scalar> variance(
  *
  * @param col input column to compute standard deviation.
  * @param output_dtype data type of return type and typecast elements of input column.
- * @param mr Device memory resource used to allocate the returned scalar.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Standard deviation as scalar of type `output_dtype`.
  */
diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh
index 5f9eff5e2c0..26db4c61106 100644
--- a/cpp/include/cudf/detail/scatter.cuh
+++ b/cpp/include/cudf/detail/scatter.cuh
@@ -193,7 +193,7 @@ struct column_scatterer {
  * are to be scattered
  * @param[in] check_bounds Optionally perform bounds checking on the values of
  * `scatter_map` and throw an error if any of its values are out of bounds.
- * @param[in] mr The resource to use for all allocations
+ * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @param[in] stream The stream to use for CUDA operations
  *
  * @return Result of scattering values from source to target
diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp
index 188efa6047e..73f245c9348 100644
--- a/cpp/include/cudf/detail/unary.hpp
+++ b/cpp/include/cudf/detail/unary.hpp
@@ -32,11 +32,10 @@ namespace detail {
  * @param begin Begining of the sequence of elements
  * @param end End of the sequence of elements
  * @param p Predicate to be applied to each element in `[begin,end)`
- * @param mr Optional, The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  *
- * @returns std::unique_ptr<cudf::column> A column of type `BOOL8,` with `true` representing
- * predicate is satisfied.
+ * @returns A column of type `BOOL8,` with `true` representing predicate is satisfied.
  */
 
 template <typename InputIterator, typename Predicate>
diff --git a/cpp/include/cudf/dlpack.hpp b/cpp/include/cudf/dlpack.hpp
index 81fc57633a5..36bc0edd4e1 100644
--- a/cpp/include/cudf/dlpack.hpp
+++ b/cpp/include/cudf/dlpack.hpp
@@ -61,7 +61,7 @@ std::unique_ptr<experimental::table> from_dlpack(
  * or if any of columns have non-zero null count
  *
  * @param input Table to convert to DLPack
- * @param mr Device memory resource used to allocate the returned DLPack tensor.
+ * @param mr Device memory resource used to allocate the returned DLPack tensor's device memory.
  *
  * @return 1D or 2D DLPack tensor with a copy of the table data, or nullptr
  */
diff --git a/cpp/include/cudf/io/functions.hpp b/cpp/include/cudf/io/functions.hpp
index 7b98a843494..87ff16850c7 100644
--- a/cpp/include/cudf/io/functions.hpp
+++ b/cpp/include/cudf/io/functions.hpp
@@ -339,7 +339,7 @@ struct write_csv_args : detail::csv::writer_options {
  * @endcode
  *
  * @param args Settings for controlling writing behavior
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource to use for device memory allocation
  */
 void write_csv(write_csv_args const& args,
                rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
@@ -577,7 +577,7 @@ struct orc_chunked_state;
  * @endcode
  *
  * @param[in] args Settings for controlling writing behavior
- * @param[in] mr Optional resource to use for device memory allocation
+ * @param[in] mr Device memory resource to use for device memory allocation
  *
  * @returns pointer to an anonymous state structure storing information about the chunked write.
  * this pointer must be passed to all subsequent write_orc_chunked() and write_orc_chunked_end()
@@ -665,7 +665,7 @@ struct write_parquet_args {
  * @endcode
  *
  * @param args Settings for controlling writing behavior
- * @param mr Optional resource to use for device memory allocation
+ * @param mr Device memory resource to use for device memory allocation
  *
  * @return A blob that contains the file metadata (parquet FileMetadata thrift message) if
  *         requested in write_parquet_args (empty blob otherwise)
@@ -747,7 +747,7 @@ struct pq_chunked_state;
  * @endcode
  *
  * @param[in] args Settings for controlling writing behavior
- * @param[in] mr Optional resource to use for device memory allocation
+ * @param[in] mr Device memory resource to use for device memory allocation
  *
  * @returns pointer to an anonymous state structure storing information about the chunked write.
  * this pointer must be passed to all subsequent write_parquet_chunked() and
diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index f1d87e84a60..301cdf25361 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -259,7 +259,8 @@ std::unique_ptr<cudf::experimental::table> full_join(
  *                             indicated by `left_on[i]`.
  * @param[in] return_columns   A vector of column indices from `left` to
  *                             include in the returned table.
- * @param[in] mr               Device memory resource to use for device memory allocation
+ * @param[in] mr               Device memory resource used to allocate the returned table's device
+ *                             memory
  *
  * @returns                    Result of joining `left` and `right` tables on the columns
  *                             specified by `left_on` and `right_on`. The resulting table
@@ -312,7 +313,8 @@ std::unique_ptr<cudf::experimental::table> left_semi_join(
  *                             indicated by `left_on[i]`.
  * @param[in] return_columns   A vector of column indices from `left` to
  *                             include in the returned table.
- * @param[in] mr               Device memory resource to use for device memory allocation
+ * @param[in] mr               Device memory resource used to allocate the returned table's device
+ *                             memory
  *
  * @returns                    Result of joining `left` and `right` tables on the columns
  *                             specified by `left_on` and `right_on`. The resulting table
diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp
index 2885f752684..ceaa03bea5c 100644
--- a/cpp/include/cudf/partitioning.hpp
+++ b/cpp/include/cudf/partitioning.hpp
@@ -221,7 +221,7 @@ std::pair<std::unique_ptr<experimental::table>, std::vector<size_type>> hash_par
  * @param[in] input The input table to be round-robin partitioned
  * @param[in] num_partitions Number of partitions for the table
  * @param[in] start_partition Index of the 1st partition
- * @param[in] mr Device memory allocator
+ * @param[in] mr Device memory resource used to allocate the returned table's device memory
  *
  * @return A std::pair consisting of a unique_ptr to the partitioned table
  * and the partition offsets for each partition within the table.
diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp
index cf6f992ee06..05b5c1ffe78 100644
--- a/cpp/include/cudf/reduction.hpp
+++ b/cpp/include/cudf/reduction.hpp
@@ -53,7 +53,7 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE };
  * @param[in] col Input column view
  * @param[in] agg unique_ptr of the aggregation operator applied by the reduction
  * @param[in] output_dtype  The computation and output precision.
- * @param[in] mr The resource to use for all allocations
+ * @param[in] mr Device memory resource used to allocate the returned scalar's device memory
  * @returns  cudf::scalar the result value
  * If the reduction fails, the member is_valid of the output scalar
  * will contain `false`.
@@ -79,7 +79,7 @@ std::unique_ptr<scalar> reduce(
  * @param[in] null_handling Exclude null values when computing the result if
  * null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE.
  * Any operation with a null results in a null.
- * @param[in] mr The resource to use for all allocations
+ * @param[in] mr Device memory resource used to allocate the returned scalar's device memory
  * @returns unique pointer to new output column
  */
 std::unique_ptr<column> scan(const column_view &input,
diff --git a/cpp/include/cudf/replace.hpp b/cpp/include/cudf/replace.hpp
index 7310e74438f..a8b48cf9883 100644
--- a/cpp/include/cudf/replace.hpp
+++ b/cpp/include/cudf/replace.hpp
@@ -34,7 +34,7 @@ namespace experimental {
  *
  * @param[in] input A column whose null values will be replaced
  * @param[in] replacement A cudf::column whose values will replace null values in input
- * @param[in] mr Optional device_memory_resource to use for allocations.
+ * @param[in] mr Device memory resource used to allocate device memory of the returned column.
  *
  * @returns A copy of `input` with the null values replaced with corresponding values from
  * `replacement`.
@@ -52,7 +52,7 @@ std::unique_ptr<column> replace_nulls(
  *
  * @param[in] input A column whose null values will be replaced
  * @param[in] replacement Scalar used to replace null values in `input`.
- * @param[in] mr Optional device_memory_resource to use for allocations.
+ * @param[in] mr Device memory resource used to allocate device memory of the returned column.
  *
  * @returns Copy of `input` with null values replaced by `replacement`.
  */
@@ -170,8 +170,7 @@ std::unique_ptr<column> find_and_replace_all(
  * @param[in] hi Maximum clamp value. All elements greater than `hi` will be replaced by
  * `hi_replace`. Ignored if null.
  * @param[in] hi_replace All elements greater than `hi` will be replaced by `hi_replace`.
- * @param[in] mr Optional resource to use for device memory
- *           allocation of the returned result column.
+ * @param[in] mr Device memory resource used to allocate device memory of the returned column.
  *
  * @return Returns a clamped column as per `lo` and `hi` boundaries
  */
@@ -217,8 +216,7 @@ std::unique_ptr<column> clamp(
  * if null.
  * @param[in] hi Maximum clamp value. All elements greater than `hi` will be replaced by `hi`.
  * Ignored if null.
- * @param[in] mr Optional resource to use for device memory
- *           allocation of the returned result column.
+ * @param[in] mr Device memory resource used to allocate device memory of the returned column.
  *
  * @return Returns a clamped column as per `lo` and `hi` boundaries
  */
diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp
index bae1d0f72f5..17634f11c92 100644
--- a/cpp/include/cudf/scalar/scalar.hpp
+++ b/cpp/include/cudf/scalar/scalar.hpp
@@ -97,7 +97,7 @@ class scalar {
    * @param type Data type of the scalar
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream CUDA stream used for device memory operations.
-   * @param mr Device memory resource to use for allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   scalar(data_type type,
          bool is_valid                       = false,
@@ -167,7 +167,7 @@ class fixed_width_scalar : public scalar {
    * @param value The initial value of the scalar
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream CUDA stream used for device memory operations.
-   * @param mr Device memory resource to use for allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   fixed_width_scalar(T value,
                      bool is_valid                       = true,
@@ -221,7 +221,7 @@ class numeric_scalar : public detail::fixed_width_scalar<T> {
    * @param value The initial value of the scalar
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream CUDA stream used for device memory operations.
-   * @param mr Device memory resource to use for allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   numeric_scalar(T value,
                  bool is_valid                       = true,
@@ -268,7 +268,7 @@ class string_scalar : public scalar {
    * @param value The value of the string
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream CUDA stream used for device memory operations.
-   * @param mr Device memory resource to use for allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   string_scalar(std::string const& string,
                 bool is_valid                       = true,
@@ -285,7 +285,7 @@ class string_scalar : public scalar {
    * @param source string_view pointing string value to copy
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream CUDA stream used for device memory operations.
-   * @param mr Device memory resource to use for allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   string_scalar(value_type const& source,
                 bool is_valid                       = true,
@@ -302,7 +302,7 @@ class string_scalar : public scalar {
    * @param data device_scalar string_view pointing string value to copy
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream CUDA stream used for device memory operations.
-   * @param mr Device memory resource to use for allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   string_scalar(rmm::device_scalar<value_type>& data,
                 bool is_valid                       = true,
@@ -371,7 +371,7 @@ class timestamp_scalar : public detail::fixed_width_scalar<T> {
    * @param value The initial value of the scalar
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream CUDA stream used for device memory operations.
-   * @param mr Device memory resource to use for allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   timestamp_scalar(T value,
                    bool is_valid                       = true,
@@ -387,7 +387,7 @@ class timestamp_scalar : public detail::fixed_width_scalar<T> {
    * @param value Integer representing number of ticks since the UNIX epoch
    * @param is_valid Whether the value held by the scalar is valid
    * @param stream CUDA stream used for device memory operations.
-   * @param mr Device memory resource to use for allocation
+   * @param mr Device memory resource to use for device memory allocation
    */
   timestamp_scalar(typename T::duration::rep value,
                    bool is_valid,
diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp
index 42d20f19e37..c6602879ae6 100644
--- a/cpp/include/cudf/sorting.hpp
+++ b/cpp/include/cudf/sorting.hpp
@@ -52,7 +52,7 @@ namespace experimental {
  * @param null_precedence The desired order of null compared to other elements
  * for each column.  Size must be equal to `input.num_columns()` or empty.
  * If empty, all columns will be sorted in `null_order::BEFORE`.
- * @param mr Optional, The resource to use for all allocations
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @return A non-nullable column of `size_type` elements containing the permuted row indices of
  * `input` if it were sorted
  */
@@ -107,7 +107,7 @@ bool is_sorted(cudf::table_view const& table,
  * elements for each column in `input`. Size must be equal to
  * `input.num_columns()` or empty. If empty, all columns will be sorted with
  * `null_order::BEFORE`.
- * @param mr The device memory resource used to allocate the returned table
+ * @param mr Device memory resource used to allocate the returned table's device memory
  * @return New table containing the desired sorted order of `input`
  */
 std::unique_ptr<table> sort(table_view input,
@@ -132,7 +132,7 @@ std::unique_ptr<table> sort(table_view input,
  * elements for each column in `keys`. Size must be equal to
  * `keys.num_columns()` or empty. If empty, all columns will be sorted with
  * `null_order::BEFORE`.
- * @param mr The device memory resource used to allocate the returned table
+ * @param mr Device memory resource used to allocate the returned table's device memory
  * @return The reordering of `values` determined by the lexicographic order of
  * the rows of `keys`.
  */
@@ -167,7 +167,7 @@ std::unique_ptr<table> sort_by_key(
  * @param null_precedence The desired order of null compared to other elements
  * for column
  * @param percentage flag to convert ranks to percentage in range (0,1}
- * @param mr The device memory resource used to allocate the returned column
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @return std::unique_ptr<column> A column of containing the rank of the each
  * element of the column of `input`. The output column type will be `size_type`
  * column by default or else `double` when `method=rank_method::AVERAGE` or
diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp
index e3cb75f9904..daedc79237b 100644
--- a/cpp/include/cudf/stream_compaction.hpp
+++ b/cpp/include/cudf/stream_compaction.hpp
@@ -61,7 +61,7 @@ namespace experimental {
  * @param[in] keys  vector of indices representing key columns from `input`
  * @param[in] keep_threshold The minimum number of non-null fields in a row
  *                           required to keep the row.
- * @param[in] mr Optional, The resource to use for all allocations
+ * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @return Table containing all rows of the `input` with at least @p
  * keep_threshold non-null fields in @p keys.
  */
@@ -93,7 +93,7 @@ std::unique_ptr<table> drop_nulls(
  *
  * @param[in] input The input `table_view` to filter.
  * @param[in] keys  vector of indices representing key columns from `input`
- * @param[in] mr Optional, The resource to use for all allocations
+ * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @return Table containing all rows of the `input` without nulls in the columns
  * of @p keys.
  */
@@ -119,7 +119,7 @@ std::unique_ptr<experimental::table> drop_nulls(
  * @param[in] input The input table_view to filter
  * @param[in] boolean_mask A nullable column_view of type BOOL8 used
  * as a mask to filter the `input`.
- * @param[in] mr Optional, The resource to use for all allocations
+ * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @return Table containing copy of all rows of @p input passing
  * the filter defined by @p boolean_mask.
  */
@@ -153,7 +153,8 @@ enum class duplicate_keep_option {
  * @param[in] keep            keep first entry, last entry, or no entries if duplicates found
  * @param[in] nulls_equal     flag to denote nulls are equal if null_equality::EQUAL,
  * nulls are not equal if null_equality::UNEQUAL
- * @param[in] mr Optional, The resource to use for allocation of returned table
+ * @param[in] mr              Device memory resource used to allocate the returned table's device
+ * memory
  *
  * @return Table with unique rows as per specified `keep`.
  */
diff --git a/cpp/include/cudf/strings/capitalize.hpp b/cpp/include/cudf/strings/capitalize.hpp
index 210fa5429b9..43f7588cf54 100644
--- a/cpp/include/cudf/strings/capitalize.hpp
+++ b/cpp/include/cudf/strings/capitalize.hpp
@@ -38,7 +38,7 @@ namespace strings {
  * ```
  *
  * @param[in] strings String column.
- * @param[in] mr Resource for allocating device memory.
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  * @return Column of strings capitalized from the input column.
  */
 std::unique_ptr<column> capitalize(
@@ -62,7 +62,7 @@ std::unique_ptr<column> capitalize(
  * ```
  *
  * @param[in] strings String column.
- * @param[in] mr Resource for allocating device memory.
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  * @return Column of title strings.
  */
 std::unique_ptr<column> title(
diff --git a/cpp/include/cudf/strings/split/split.hpp b/cpp/include/cudf/strings/split/split.hpp
index b7ad7c219ff..e861a9f2225 100644
--- a/cpp/include/cudf/strings/split/split.hpp
+++ b/cpp/include/cudf/strings/split/split.hpp
@@ -120,7 +120,7 @@ struct contiguous_split_record_result {
  *        Default of empty string indicates split on whitespace.
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Device memory resource used to allocate the returned result.
+ * @param mr Device memory resource used to allocate the returned result's device memory.
  * @return contiguous_split_record_result New vector of strings column_view
  *         objects
  *         (each column_view element of the vector holds splits from a string
@@ -152,7 +152,7 @@ contiguous_split_record_result contiguous_split_record(
  *        Default of empty string indicates split on whitespace.
  * @param maxsplit Maximum number of splits to perform.
  *        Default of -1 indicates all possible splits on each string.
- * @param mr Device memory resource used to allocate the returned result.
+ * @param mr Device memory resource used to allocate the returned result's device memory.
  * @return contiguous_split_record_result New vector of strings column_view
  *         objects
  *         (each column_view element of the vector holds splits from a string
diff --git a/cpp/include/cudf/strings/wrap.hpp b/cpp/include/cudf/strings/wrap.hpp
index 37ba3edffeb..aa88e68bd99 100644
--- a/cpp/include/cudf/strings/wrap.hpp
+++ b/cpp/include/cudf/strings/wrap.hpp
@@ -56,7 +56,7 @@ namespace strings {
  *
  * @param[in] strings String column.
  * @param[in] width Maximum character width of a line within each string.
- * @param[in] mr Resource for allocating device memory.
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory
  * @return Column of wrapped strings.
  */
 std::unique_ptr<column> wrap(strings_column_view const& strings,
diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index 85fb5bd58cc..b1c823f22f3 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -44,7 +44,8 @@ namespace detail {
  *                             indicated by `left_on[i]`.
  * @param[in] return_columns   A vector of column indices from `left` to
  *                             include in the returned table.
- * @param[in] mr               Device memory resource to use for device memory allocation
+ * @param[in] mr               Device memory resource to used to allocate the returned table's
+ *                             device memory
  * @param[in] stream           Cuda stream
  * @tparam    join_kind        Indicates whether to do LEFT_SEMI_JOIN or LEFT_ANTI_JOIN
  *
diff --git a/cpp/src/partitioning/round_robin.cu b/cpp/src/partitioning/round_robin.cu
index b64e2eafecc..9d5f095ab29 100644
--- a/cpp/src/partitioning/round_robin.cu
+++ b/cpp/src/partitioning/round_robin.cu
@@ -67,7 +67,7 @@ using VectorT = rmm::device_vector<T>;
  * @Param[in] input The input table to be round-robin partitioned
  * @Param[in] num_partitions Number of partitions for the table
  * @Param[in] start_partition Index of the 1st partition
- * @Param[in] mr Device memory allocator
+ * @Param[in] mr Device memory resource used to allocate the returned table's device memory
  * @Param[in] stream cuda stream to execute on
  *
  * @Returns A std::pair consisting of a unique_ptr to the partitioned table and the partition
diff --git a/cpp/src/reductions/compound.cuh b/cpp/src/reductions/compound.cuh
index 52601413d2f..f61d6daca03 100644
--- a/cpp/src/reductions/compound.cuh
+++ b/cpp/src/reductions/compound.cuh
@@ -34,7 +34,7 @@ namespace compound {
  * @param[in] ddof   `Delta Degrees of Freedom` used for `std`, `var`.
  *                   The divisor used in calculations is N - ddof, where N
  *                   represents the number of elements.
- * @param[in] mr    The resource to use for all allocations
+ * @param[in] mr     Device memory resource used to allocate the returned scalar's device memory
  * @param[in] stream cuda stream
  * @returns   Output scalar in device memory
  *
diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh
index 302a6178ed5..cc8b564115f 100644
--- a/cpp/src/reductions/simple.cuh
+++ b/cpp/src/reductions/simple.cuh
@@ -31,7 +31,7 @@ namespace simple {
  * which directly compute the reduction by a single step reduction call
  *
  * @param[in] col    input column view
- * @param[in] mr The resource to use for all allocations
+ * @param[in] mr Device memory resource used to allocate the returned scalar's device memory
  * @param[in] stream cuda stream
  * @returns   Output scalar in device memory
  *
diff --git a/cpp/src/stream_compaction/drop_duplicates.cu b/cpp/src/stream_compaction/drop_duplicates.cu
index da17009cfb1..1fc92e69081 100644
--- a/cpp/src/stream_compaction/drop_duplicates.cu
+++ b/cpp/src/stream_compaction/drop_duplicates.cu
@@ -97,7 +97,6 @@ OutputIterator unique_copy(Exec&& exec,
  * @param[in] keep            keep first entry, last entry, or no entries if duplicates found
  * @param[in] nulls_equal     flag to denote nulls are equal if null_equality::EQUAL,
  * nulls are not equal if null_equality::UNEQUAL
- * @param[in] mr Optional, The resource to use for all allocations
  * @param[in] stream Optional CUDA stream on which to execute kernels
  *
  * @return column_view column_view of unique row index as per specified `keep`, this is actually

From ba502c90e633cbcbfd43d60efe8678a161da30aa Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Thu, 21 May 2020 23:42:25 +0530
Subject: [PATCH 65/90] more doc update for param stream

---
 cpp/include/cudf/column/column_factories.hpp   | 18 ++++++------------
 cpp/include/cudf/copying.hpp                   |  2 +-
 cpp/include/cudf/detail/copy.hpp               | 10 +++++-----
 cpp/include/cudf/detail/gather.cuh             |  2 +-
 cpp/include/cudf/detail/gather.hpp             |  2 +-
 cpp/include/cudf/detail/null_mask.hpp          |  4 ++--
 cpp/include/cudf/detail/reduction.cuh          |  4 ++--
 cpp/include/cudf/detail/replace.hpp            |  4 ++--
 cpp/include/cudf/detail/scatter.cuh            |  2 +-
 cpp/include/cudf/detail/sorting.hpp            |  6 +++---
 cpp/include/cudf/detail/stream_compaction.hpp  |  8 ++++----
 cpp/include/cudf/detail/unary.hpp              |  4 ++--
 cpp/src/hash/concurrent_unordered_multimap.cuh |  2 +-
 cpp/src/io/json/json_gpu.h                     |  4 ++--
 cpp/src/io/json/reader_impl.cu                 | 10 +++++-----
 cpp/src/io/json/reader_impl.hpp                | 10 +++++-----
 cpp/src/join/semi_join.cu                      |  2 +-
 cpp/src/merge/merge.cu                         |  2 +-
 cpp/src/partitioning/round_robin.cu            |  2 +-
 cpp/src/reductions/compound.cuh                |  2 +-
 cpp/src/reductions/simple.cuh                  |  2 +-
 cpp/src/replace/clamp.cu                       |  2 +-
 cpp/src/stream_compaction/drop_duplicates.cu   |  8 ++++----
 23 files changed, 53 insertions(+), 59 deletions(-)

diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
index 8bc9d6bf504..05510d11b9b 100644
--- a/cpp/include/cudf/column/column_factories.hpp
+++ b/cpp/include/cudf/column/column_factories.hpp
@@ -50,8 +50,7 @@ std::unique_ptr<column> make_empty_column(data_type type);
  * @param[in] size The number of elements in the column
  * @param[in] state Optional, controls allocation/initialization of the
  * column's null mask. By default, no null mask is allocated.
- * @param[in] stream Optional stream on which to issue all memory allocation and
- * device kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  * @param[in] mr Device memory resource used to allocate the returned column's device memory
  */
 std::unique_ptr<column> make_numeric_column(
@@ -75,8 +74,7 @@ std::unique_ptr<column> make_numeric_column(
  * @param[in] size The number of elements in the column
  * @param[in] null_mask Null mask to use for this column.
  * @param[in] null_count Optional number of nulls in the null_mask.
- * @param[in] stream Optional stream on which to issue all memory allocation and
- * device kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  * @param[in] mr Device memory resource used to allocate the returned column's device memory
  */
 template <typename B>
@@ -110,8 +108,7 @@ std::unique_ptr<column> make_numeric_column(
  * @param[in] size The number of elements in the column
  * @param[in] state Optional, controls allocation/initialization of the
  * column's null mask. By default, no null mask is allocated.
- * @param[in] stream Optional stream on which to issue all memory allocation and
- * device kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  * @param[in] mr Device memory resource used to allocate the returned column's device memory
  */
 std::unique_ptr<column> make_timestamp_column(
@@ -135,8 +132,7 @@ std::unique_ptr<column> make_timestamp_column(
  * @param[in] size The number of elements in the column
  * @param[in] null_mask Null mask to use for this column.
  * @param[in] null_count Optional number of nulls in the null_mask.
- * @param[in] stream Optional stream on which to issue all memory allocation and
- * device kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  * @param[in] mr Device memory resource used to allocate the returned column's device memory
  */
 template <typename B>
@@ -170,8 +166,7 @@ std::unique_ptr<column> make_timestamp_column(
  * @param[in] size The number of elements in the column
  * @param[in] state Optional, controls allocation/initialization of the
  * column's null mask. By default, no null mask is allocated.
- * @param[in] stream Optional stream on which to issue all memory allocation and device
- * kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  * @param[in] mr Device memory resource used to allocate the returned column's device memory
  */
 std::unique_ptr<column> make_fixed_width_column(
@@ -195,8 +190,7 @@ std::unique_ptr<column> make_fixed_width_column(
  * @param[in] size The number of elements in the column
  * @param[in] null_mask Null mask to use for this column.
  * @param[in] null_count Optional number of nulls in the null_mask.
- * @param[in] stream Optional stream on which to issue all memory allocation and device
- * kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  * @param[in] mr Device memory resource used to allocate the returned column's device memory
  */
 template <typename B>
diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index 52342055f26..e1d88335790 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -478,7 +478,7 @@ struct contiguous_split_result {
  * @param input View of a table to split
  * @param splits A vector of indices where the view will be split
  * @param[in] mr Device memory resource used to allocate the returned result's device memory
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  * @return The set of requested views of `input` indicated by the `splits` and the viewed memory
  * buffer.
  */
diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index 6f903aa5bec..4e666d2d8a9 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -89,7 +89,7 @@ std::vector<contiguous_split_result> contiguous_split(
  * @copydoc cudf::experimental::allocate_like(column_view const&, size_type, mask_allocation_policy,
  * rmm::mr::device_memory_resource*)
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> allocate_like(
   column_view const& input,
@@ -102,7 +102,7 @@ std::unique_ptr<column> allocate_like(
  * @copydoc cudf::experimental::copy_if_else( column_view const&, column_view const&,
  * column_view const&, rmm::mr::device_memory_resource*)
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> copy_if_else(
   column_view const& lhs,
@@ -115,7 +115,7 @@ std::unique_ptr<column> copy_if_else(
  * @copydoc cudf::experimental::copy_if_else( scalar const&, column_view const&,
  * column_view const&, rmm::mr::device_memory_resource*)
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> copy_if_else(
   scalar const& lhs,
@@ -128,7 +128,7 @@ std::unique_ptr<column> copy_if_else(
  * @copydoc cudf::experimental::copy_if_else( column_view const&, scalar const&,
  * column_view const&, rmm::mr::device_memory_resource*)
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> copy_if_else(
   column_view const& lhs,
@@ -141,7 +141,7 @@ std::unique_ptr<column> copy_if_else(
  * @copydoc cudf::experimental::copy_if_else( scalar const&, scalar const&,
  * column_view const&, rmm::mr::device_memory_resource*)
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> copy_if_else(
   scalar const& lhs,
diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
index f8be5f10771..bc06239b822 100644
--- a/cpp/include/cudf/detail/gather.cuh
+++ b/cpp/include/cudf/detail/gather.cuh
@@ -422,7 +422,7 @@ void gather_bitmask(table_view const& source,
  * source columns to rows in the destination columns
  * @param[in] nullify_out_of_bounds Nullify values in `gather_map` that are out of bounds.
  * @param[in] mr Device memory resource used to allocate the returned table's device memory
- * @param[in] stream The CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  * @return cudf::table Result of the gather
  */
 template <typename MapIterator>
diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp
index 5755b707c33..2b3d01f0801 100644
--- a/cpp/include/cudf/detail/gather.hpp
+++ b/cpp/include/cudf/detail/gather.hpp
@@ -36,7 +36,7 @@ namespace detail {
  * @param[in] allow_negative_indices Interpret each negative index `i` in the
  * gathermap as the positive index `i+num_source_rows`.
  * @param[in] mr Device memory resource used to allocate the returned table's device memory
- * @param[in] stream The CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  * @return cudf::table Result of the gather
  */
 std::unique_ptr<table> gather(table_view const& source_table,
diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp
index 58e99134eb3..bc8e7e450f3 100644
--- a/cpp/include/cudf/detail/null_mask.hpp
+++ b/cpp/include/cudf/detail/null_mask.hpp
@@ -24,7 +24,7 @@ namespace detail {
 /**
  * @copydoc cudf::segmented_count_set_bits
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::vector<size_type> segmented_count_set_bits(bitmask_type const* bitmask,
                                                 std::vector<size_type> const& indices,
@@ -33,7 +33,7 @@ std::vector<size_type> segmented_count_set_bits(bitmask_type const* bitmask,
 /**
  * @copydoc cudf::segmented_count_unset_bits
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::vector<size_type> segmented_count_unset_bits(bitmask_type const* bitmask,
                                                   std::vector<size_type> const& indices,
diff --git a/cpp/include/cudf/detail/reduction.cuh b/cpp/include/cudf/detail/reduction.cuh
index b62337021bb..a4cbe59f1b0 100644
--- a/cpp/include/cudf/detail/reduction.cuh
+++ b/cpp/include/cudf/detail/reduction.cuh
@@ -37,7 +37,7 @@ namespace detail {
  * @param[in] d_in      the begin iterator
  * @param[in] num_items the number of items
  * @param[in] op        the reduction operator
- * @param[in] stream    cuda stream
+ * @param[in] stream    CUDA stream used for device memory operations and kernel launches.
  * @returns   Output scalar in device memory
  *
  * @tparam Op               the reduction operator with device binary operator
@@ -152,7 +152,7 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
  * @param[in] op        the reduction operator
  * @param[in] valid_count   the intermediate operator argument 1
  * @param[in] ddof      the intermediate operator argument 2
- * @param[in] stream    cuda stream
+ * @param[in] stream    CUDA stream used for device memory operations and kernel launches.
  * @returns   Output scalar in device memory
  *
  * The reduction operator must have `intermediate::compute_result()` method.
diff --git a/cpp/include/cudf/detail/replace.hpp b/cpp/include/cudf/detail/replace.hpp
index ad1513ad8cc..8259ace6894 100644
--- a/cpp/include/cudf/detail/replace.hpp
+++ b/cpp/include/cudf/detail/replace.hpp
@@ -27,7 +27,7 @@ namespace detail {
  * @copydoc cudf::experimental::replace_nulls(column_view const&, column_view const&,
  * rmm::mr::device_memory_resource*)
  *
- * @param[in] stream Optional stream in which to perform allocations
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> replace_nulls(
   column_view const& input,
@@ -39,7 +39,7 @@ std::unique_ptr<column> replace_nulls(
  * @copydoc cudf::experimental::replace_nulls(column_view const&, scalar const&,
  * rmm::mr::device_memory_resource*)
  *
- * @param[in] stream Optional stream in which to perform allocations
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> replace_nulls(
   column_view const& input,
diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh
index 26db4c61106..3c86052b2b0 100644
--- a/cpp/include/cudf/detail/scatter.cuh
+++ b/cpp/include/cudf/detail/scatter.cuh
@@ -194,7 +194,7 @@ struct column_scatterer {
  * @param[in] check_bounds Optionally perform bounds checking on the values of
  * `scatter_map` and throw an error if any of its values are out of bounds.
  * @param[in] mr Device memory resource used to allocate the returned table's device memory
- * @param[in] stream The stream to use for CUDA operations
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  *
  * @return Result of scattering values from source to target
  **/
diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp
index 06d3ef5ad1f..ccffa6b1070 100644
--- a/cpp/include/cudf/detail/sorting.hpp
+++ b/cpp/include/cudf/detail/sorting.hpp
@@ -28,7 +28,7 @@ namespace detail {
 /**
  * @copydoc cudf::experimental::sorted_order
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> sorted_order(
   table_view input,
@@ -40,7 +40,7 @@ std::unique_ptr<column> sorted_order(
 /**
  * @copydoc cudf::experimental::stable_sorted_order
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> stable_sorted_order(
   table_view input,
@@ -52,7 +52,7 @@ std::unique_ptr<column> stable_sorted_order(
 /**
  * @copydoc cudf::experimental::sort_by_key
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<table> sort_by_key(
   table_view const& values,
diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp
index ebb0311aa5a..511c30e07b3 100644
--- a/cpp/include/cudf/detail/stream_compaction.hpp
+++ b/cpp/include/cudf/detail/stream_compaction.hpp
@@ -27,7 +27,7 @@ namespace detail {
  * @copydoc cudf::experimental::drop_nulls(table_view const&, std::vector<size_type> const&,
  *                                         cudf::size_type, rmm::mr::device_memory_resource*)
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<experimental::table> drop_nulls(
   table_view const& input,
@@ -39,7 +39,7 @@ std::unique_ptr<experimental::table> drop_nulls(
 /**
  * @copydoc cudf::experimental::apply_boolean_mask
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<experimental::table> apply_boolean_mask(
   table_view const& input,
@@ -50,7 +50,7 @@ std::unique_ptr<experimental::table> apply_boolean_mask(
 /**
  * @copydoc cudf::experimental::drop_duplicates
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<experimental::table> drop_duplicates(
   table_view const& input,
@@ -63,7 +63,7 @@ std::unique_ptr<experimental::table> drop_duplicates(
 /**
  * @copydoc cudf::experimental::unique_count
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 cudf::size_type unique_count(column_view const& input,
                              null_policy null_handling,
diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp
index 73f245c9348..a4b86aadb55 100644
--- a/cpp/include/cudf/detail/unary.hpp
+++ b/cpp/include/cudf/detail/unary.hpp
@@ -80,7 +80,7 @@ std::unique_ptr<column> cast(column_view const& input,
 /**
  * @copydoc cudf::experimental::is_nan
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> is_nan(
   cudf::column_view const& input,
@@ -90,7 +90,7 @@ std::unique_ptr<column> is_nan(
 /**
  * @copydoc cudf::experimental::is_not_nan
  *
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> is_not_nan(
   cudf::column_view const& input,
diff --git a/cpp/src/hash/concurrent_unordered_multimap.cuh b/cpp/src/hash/concurrent_unordered_multimap.cuh
index af783ffc2fa..80d7876fe51 100644
--- a/cpp/src/hash/concurrent_unordered_multimap.cuh
+++ b/cpp/src/hash/concurrent_unordered_multimap.cuh
@@ -559,7 +559,7 @@ class concurrent_unordered_multimap {
    * @param[in] hash_function An optional hashing function
    * @param[in] equal An optional functor for comparing if two keys are equal
    * @param[in] a An optional functor for allocating the hash table memory
-   * @param[in] stream CUDA stream to use for device opertions.
+   * @param[in] stream CUDA stream used for device memory operations and kernel launches.
    */
   explicit concurrent_unordered_multimap(size_type n,
                                          const bool init             = true,
diff --git a/cpp/src/io/json/json_gpu.h b/cpp/src/io/json/json_gpu.h
index 584659d0305..6493f5d21d2 100644
--- a/cpp/src/io/json/json_gpu.h
+++ b/cpp/src/io/json/json_gpu.h
@@ -36,7 +36,7 @@ namespace gpu {
  * @param[out] valid_fields The bitmaps indicating whether column fields are valid
  * @param[out] num_valid_fields The numbers of valid fields in columns
  * @param[in] opts A set of parsing options
- * @param[in] stream Cuda stream to run kernels on
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  *
  * @returns void
  **/
@@ -61,7 +61,7 @@ void convert_json_to_columns(rmm::device_buffer const &input_data,
  * @param[in] num_columns The number of columns of input data
  * @param[in] rec_starts The start the input data of interest
  * @param[in] num_records The number of lines/rows of input data
- * @param[in] stream Cuda stream to run kernels on
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  *
  * @returns void
  **/
diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu
index 7d6c9af4854..718bd9d0ec5 100644
--- a/cpp/src/io/json/reader_impl.cu
+++ b/cpp/src/io/json/reader_impl.cu
@@ -176,7 +176,7 @@ void reader::impl::decompress_input()
  *
  * Does not upload the entire file to the GPU
  *
- * @param[in] stream Cuda stream to execute gpu operations on
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  *
  * @return void
  **/
@@ -295,7 +295,7 @@ void reader::impl::upload_data_to_device()
  *
  * Sets the column_names_ data member
  *
- * @param[in] stream Cuda stream to execute gpu operations on
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  *
  * @return void
  **/
@@ -349,7 +349,7 @@ void reader::impl::set_column_names(cudaStream_t stream)
  *
  * If user does not pass the data types, deduces types from the file content
  *
- * @param[in] stream Cuda stream to execute gpu operations on
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  *
  * @return void
  **/
@@ -435,7 +435,7 @@ void reader::impl::set_data_types(cudaStream_t stream)
 /**
  * @brief Parse the input data and store results a table
  *
- * @param[in] stream Cuda stream to execute gpu operations on
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  *
  * @return table_with_metadata struct
  **/
@@ -517,7 +517,7 @@ reader::impl::impl(std::unique_ptr<datasource> source,
  *
  * @param[in] range_offset Number of bytes offset from the start
  * @param[in] range_size Bytes to read; use `0` for all remaining data
- * @param[in] stream Cuda stream to execute gpu operations on
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  *
  * @return Unique pointer to the table data
  **/
diff --git a/cpp/src/io/json/reader_impl.hpp b/cpp/src/io/json/reader_impl.hpp
index 4fe9248d1e1..b24cbe25ec7 100644
--- a/cpp/src/io/json/reader_impl.hpp
+++ b/cpp/src/io/json/reader_impl.hpp
@@ -104,7 +104,7 @@ class reader::impl {
    *
    * Does not upload the entire file to the GPU
    *
-   * @param[in] stream Cuda stream to execute gpu operations on
+   * @param[in] stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return void
    **/
@@ -126,7 +126,7 @@ class reader::impl {
    *
    * Sets the column_names_ data member
    *
-   * @param[in] stream Cuda stream to execute gpu operations on
+   * @param[in] stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return void
    **/
@@ -137,7 +137,7 @@ class reader::impl {
    *
    * If user does not pass the data types, deduces types from the file content
    *
-   * @param[in] stream Cuda stream to execute gpu operations on
+   * @param[in] stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return void
    **/
@@ -146,7 +146,7 @@ class reader::impl {
   /**
    * @brief Parse the input data and store results a table
    *
-   * @param[in] stream Cuda stream to execute gpu operations on
+   * @param[in] stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return table_with_metadata struct
    **/
@@ -166,7 +166,7 @@ class reader::impl {
    *
    * @param[in] range_offset Number of bytes offset from the start
    * @param[in] range_size Bytes to read; use `0` for all remaining data
-   * @param[in] stream Cuda stream to execute gpu operations on
+   * @param[in] stream CUDA stream used for device memory operations and kernel launches.
    *
    * @return Unique pointer to the table data
    **/
diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index b1c823f22f3..2f4e7e7cb94 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -46,7 +46,7 @@ namespace detail {
  *                             include in the returned table.
  * @param[in] mr               Device memory resource to used to allocate the returned table's
  *                             device memory
- * @param[in] stream           Cuda stream
+ * @param[in] stream           CUDA stream used for device memory operations and kernel launches.
  * @tparam    join_kind        Indicates whether to do LEFT_SEMI_JOIN or LEFT_ANTI_JOIN
  *
  * @returns                    Result of joining `left` and `right` tables on the columns
diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu
index 179899ca86f..e8a17ef4ef3 100644
--- a/cpp/src/merge/merge.cu
+++ b/cpp/src/merge/merge.cu
@@ -149,7 +149,7 @@ void materialize_bitmask(column_view const& left_col,
  * index columns
  * @param[in] nullable Flag indicating if at least one of the table_view arguments has nulls
  * (defaults to true)
- * @param[in] stream CUDA stream (defaults to nullptr)
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  *
  * @return A vector of merged indices
  */
diff --git a/cpp/src/partitioning/round_robin.cu b/cpp/src/partitioning/round_robin.cu
index 9d5f095ab29..1e2c5129cf3 100644
--- a/cpp/src/partitioning/round_robin.cu
+++ b/cpp/src/partitioning/round_robin.cu
@@ -68,7 +68,7 @@ using VectorT = rmm::device_vector<T>;
  * @Param[in] num_partitions Number of partitions for the table
  * @Param[in] start_partition Index of the 1st partition
  * @Param[in] mr Device memory resource used to allocate the returned table's device memory
- * @Param[in] stream cuda stream to execute on
+ * @Param[in] stream CUDA stream used for device memory operations and kernel launches.
  *
  * @Returns A std::pair consisting of a unique_ptr to the partitioned table and the partition
  * offsets for each partition within the table
diff --git a/cpp/src/reductions/compound.cuh b/cpp/src/reductions/compound.cuh
index f61d6daca03..8b919af1778 100644
--- a/cpp/src/reductions/compound.cuh
+++ b/cpp/src/reductions/compound.cuh
@@ -35,7 +35,7 @@ namespace compound {
  *                   The divisor used in calculations is N - ddof, where N
  *                   represents the number of elements.
  * @param[in] mr     Device memory resource used to allocate the returned scalar's device memory
- * @param[in] stream cuda stream
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  * @returns   Output scalar in device memory
  *
  * @tparam ElementType  the input column cudf dtype
diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh
index cc8b564115f..32facd9db45 100644
--- a/cpp/src/reductions/simple.cuh
+++ b/cpp/src/reductions/simple.cuh
@@ -32,7 +32,7 @@ namespace simple {
  *
  * @param[in] col    input column view
  * @param[in] mr Device memory resource used to allocate the returned scalar's device memory
- * @param[in] stream cuda stream
+ * @param[in] CUDA stream used for device memory operations and kernel launches.
  * @returns   Output scalar in device memory
  *
  * @tparam ElementType  the input column cudf dtype
diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu
index 49adb5da95a..cd9b77660b0 100644
--- a/cpp/src/replace/clamp.cu
+++ b/cpp/src/replace/clamp.cu
@@ -295,7 +295,7 @@ std::unique_ptr<column> dispatch_clamp::operator()<cudf::list_view>(
                                       scalar const& hi_replace,
                                       rmm::mr::device_memory_resource* mr);
  *
- * @param[in] stream Optional stream on which to issue all memory allocations
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> clamp(column_view const& input,
                               scalar const& lo,
diff --git a/cpp/src/stream_compaction/drop_duplicates.cu b/cpp/src/stream_compaction/drop_duplicates.cu
index 1fc92e69081..820d50510ed 100644
--- a/cpp/src/stream_compaction/drop_duplicates.cu
+++ b/cpp/src/stream_compaction/drop_duplicates.cu
@@ -96,8 +96,8 @@ OutputIterator unique_copy(Exec&& exec,
  * @param[out] unique_indices Column to store the index with unique rows
  * @param[in] keep            keep first entry, last entry, or no entries if duplicates found
  * @param[in] nulls_equal     flag to denote nulls are equal if null_equality::EQUAL,
- * nulls are not equal if null_equality::UNEQUAL
- * @param[in] stream Optional CUDA stream on which to execute kernels
+ *                            nulls are not equal if null_equality::UNEQUAL
+ * @param[in] stream          CUDA stream used for device memory operations and kernel launches.
  *
  * @return column_view column_view of unique row index as per specified `keep`, this is actually
  * slice of `unique_indices`.
@@ -248,7 +248,7 @@ struct has_nans {
    * @note This will be applicable only for floating point type columns.
    *
    * @param[in] input The `column_view` which will be checked for `NAN`
-   * @param[in] stream Optional CUDA stream on which to execute kernels
+   * @param[in] stream CUDA stream used for device memory operations and kernel launches.
    *
    * @returns bool true if `input` has `NAN` else false
    */
@@ -272,7 +272,7 @@ struct has_nans {
    * false
    *
    * @param[in] input The `column_view` which will be checked for `NAN`
-   * @param[in] stream Optional CUDA stream on which to execute kernels
+   * @param[in] stream CUDA stream used for device memory operations and kernel launches.
    *
    * @returns bool Always false as non-floating point columns can't have `NAN`
    */

From 05c22f3a95c6d391a9a2f7a5ce2ac8d76639f3d9 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 21 May 2020 21:35:40 -0700
Subject: [PATCH 66/90] Add deprecation warnings to nvstrings, nvcategory,
 nvtext

---
 python/nvstrings/nvcategory.py | 5 +++++
 python/nvstrings/nvstrings.py  | 5 +++++
 python/nvstrings/nvtext.py     | 5 +++++
 3 files changed, 15 insertions(+)

diff --git a/python/nvstrings/nvcategory.py b/python/nvstrings/nvcategory.py
index eb30676987c..c50b81a26db 100644
--- a/python/nvstrings/nvcategory.py
+++ b/python/nvstrings/nvcategory.py
@@ -1,5 +1,10 @@
 import weakref
 
+import warn
+
+warnings.warn("NVCategory will be removed in the future. Please use cuDF.",
+              DeprecationWarning, stacklevel=2)
+
 import rmm  # noqa: F401
 
 import nvstrings as nvs
diff --git a/python/nvstrings/nvstrings.py b/python/nvstrings/nvstrings.py
index 721be4f67d1..f2ad52aafeb 100644
--- a/python/nvstrings/nvstrings.py
+++ b/python/nvstrings/nvstrings.py
@@ -1,4 +1,9 @@
 import weakref
+import warn
+
+warnings.warn("NVStrings will be removed in the future. "
+              "Please use cuDF for strings.",
+              DeprecationWarning, stacklevel=2)
 
 import rmm  # noqa: F401
 
diff --git a/python/nvstrings/nvtext.py b/python/nvstrings/nvtext.py
index a067c2de5ee..2cdbd666949 100644
--- a/python/nvstrings/nvtext.py
+++ b/python/nvstrings/nvtext.py
@@ -1,5 +1,10 @@
 # Copyright (c) 2019, NVIDIA CORPORATION.
 
+import warn
+
+warnings.warn("NVText will be removed in the future. Please use cuDF.",
+              DeprecationWarning, stacklevel=2)
+
 import nvstrings as nvs
 import pyniNVText
 

From c8f2d30587767d83156dd86dc8e1ae2dd48c4e38 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 21 May 2020 22:14:13 -0700
Subject: [PATCH 67/90] Fix import

---
 python/nvstrings/nvcategory.py | 2 +-
 python/nvstrings/nvstrings.py  | 2 +-
 python/nvstrings/nvtext.py     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/nvstrings/nvcategory.py b/python/nvstrings/nvcategory.py
index c50b81a26db..e2b99f0c965 100644
--- a/python/nvstrings/nvcategory.py
+++ b/python/nvstrings/nvcategory.py
@@ -1,6 +1,6 @@
 import weakref
 
-import warn
+import warnings
 
 warnings.warn("NVCategory will be removed in the future. Please use cuDF.",
               DeprecationWarning, stacklevel=2)
diff --git a/python/nvstrings/nvstrings.py b/python/nvstrings/nvstrings.py
index f2ad52aafeb..452dc82c7d9 100644
--- a/python/nvstrings/nvstrings.py
+++ b/python/nvstrings/nvstrings.py
@@ -1,5 +1,5 @@
 import weakref
-import warn
+import warnings
 
 warnings.warn("NVStrings will be removed in the future. "
               "Please use cuDF for strings.",
diff --git a/python/nvstrings/nvtext.py b/python/nvstrings/nvtext.py
index 2cdbd666949..8fa519f332b 100644
--- a/python/nvstrings/nvtext.py
+++ b/python/nvstrings/nvtext.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2019, NVIDIA CORPORATION.
 
-import warn
+import warnings
 
 warnings.warn("NVText will be removed in the future. Please use cuDF.",
               DeprecationWarning, stacklevel=2)

From 02c365d358388fd9eda42375924063912910b8fd Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 21 May 2020 22:19:37 -0700
Subject: [PATCH 68/90] Changelog for #5254

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 90253004c05..4b48cceb04c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,7 @@
 - PR #5196 Add Java bindings for NULL_EQUALS, NULL_MAX and NULL_MIN
 - PR #5203 Add Java bindings for is_integer and is_float
 - PR #5205 Add ci test for libcudf, libnvstrings headers existence check in meta.yml
+- PR #5254 Deprecate NVStrings, NVCategory, and NVText
 
 ## Improvements
 

From ba12d9f9855fc760c01e01fdfd49550a8dbaf6b5 Mon Sep 17 00:00:00 2001
From: Devavret Makkar <dmakkar@nvidia.com>
Date: Sat, 23 May 2020 04:30:08 +0530
Subject: [PATCH 69/90] put back column_split_info because it don't need to be
 exposed anymore.

---
 cpp/include/cudf/detail/copy.hpp    | 13 -------------
 cpp/src/copying/contiguous_split.cu | 13 +++++++++++++
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index 53e9cb6003f..70c4444e4b4 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -73,19 +73,6 @@ std::vector<column_view> slice(column_view const& input,
                                std::vector<size_type> const& indices,
                                cudaStream_t stream = 0);
 
-/**
- * @brief Information about the split for a given column. Bundled together
- *        into a struct because tuples were getting pretty unreadable.
- */
-struct column_split_info {
-  size_t data_buf_size;      // size of the data (including padding)
-  size_t validity_buf_size;  // validity vector size (including padding)
-
-  size_t offsets_buf_size;  // (strings only) size of offset column (including padding)
-  size_type num_chars;      // (strings only) number of chars in the column
-  size_type chars_offset;   // (strings only) offset from head of chars data
-};
-
 unpack_result alloc_and_copy(std::vector<column_view> const& t,
                              rmm::mr::device_memory_resource* mr,
                              cudaStream_t stream);
diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu
index e2a593ce537..628482df435 100644
--- a/cpp/src/copying/contiguous_split.cu
+++ b/cpp/src/copying/contiguous_split.cu
@@ -169,6 +169,19 @@ __launch_bounds__(block_size) __global__
 // start at that alignment.
 static constexpr size_t split_align = 64;
 
+/**
+ * @brief Information about the split for a given column. Bundled together
+ *        into a struct because tuples were getting pretty unreadable.
+ */
+struct column_split_info {
+  size_t data_buf_size;      // size of the data (including padding)
+  size_t validity_buf_size;  // validity vector size (including padding)
+
+  size_t offsets_buf_size;  // (strings only) size of offset column (including padding)
+  size_type num_chars;      // (strings only) number of chars in the column
+  size_type chars_offset;   // (strings only) offset from head of chars data
+};
+
 /**
  * @brief Functor called by the `type_dispatcher` to incrementally compute total
  * memory buffer size needed to allocate a contiguous copy of all columns within

From caff618d518eab5aef6884e67c63c7a7781cce57 Mon Sep 17 00:00:00 2001
From: Conor Hoekstra <codereport@outlook.com>
Date: Tue, 26 May 2020 12:33:51 -0400
Subject: [PATCH 70/90] Fix compilation failure

---
 cpp/include/cudf/fixed_point/fixed_point.hpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp
index f7034d530f1..6da278c20ae 100644
--- a/cpp/include/cudf/fixed_point/fixed_point.hpp
+++ b/cpp/include/cudf/fixed_point/fixed_point.hpp
@@ -511,8 +511,8 @@ CUDA_HOST_DEVICE_CALLABLE fixed_point<Rep1, Rad1> operator+(fixed_point<Rep1, Ra
 
 #if defined(__CUDACC_DEBUG__)
 
-  assert(!addition_overflow<Rep1>(lhsv, rhsv) &&
-         "fixed_point overflow of underlying representation type " + print_rep<Rep1>());
+  assert(("fixed_point overflow of underlying representation type " + print_rep<Rep1>(),
+          !addition_overflow<Rep1>(lhsv, rhsv)));
 
 #endif
 
@@ -534,8 +534,8 @@ CUDA_HOST_DEVICE_CALLABLE fixed_point<Rep1, Rad1> operator-(fixed_point<Rep1, Ra
 
 #if defined(__CUDACC_DEBUG__)
 
-  assert(!subtraction_overflow<Rep1>(lhsv, rhsv) &&
-         "fixed_point overflow of underlying representation type " + print_rep<Rep1>());
+  assert(("fixed_point overflow of underlying representation type " + print_rep<Rep1>(),
+          !subtraction_overflow<Rep1>(lhsv, rhsv)));
 
 #endif
 
@@ -549,8 +549,8 @@ CUDA_HOST_DEVICE_CALLABLE fixed_point<Rep1, Rad1> operator*(fixed_point<Rep1, Ra
 {
 #if defined(__CUDACC_DEBUG__)
 
-  assert(!multiplication_overflow<Rep1>(lhs._value, rhs._value) &&
-         "fixed_point overflow of underlying representation type " + print_rep<Rep1>());
+  assert(("fixed_point overflow of underlying representation type " + print_rep<Rep1>(),
+          !multiplication_overflow<Rep1>(lhs._value, rhs._value)));
 
 #endif
 
@@ -565,8 +565,8 @@ CUDA_HOST_DEVICE_CALLABLE fixed_point<Rep1, Rad1> operator/(fixed_point<Rep1, Ra
 {
 #if defined(__CUDACC_DEBUG__)
 
-  assert(!division_overflow<Rep1>(lhs._value, rhs._value) &&
-         "fixed_point overflow of underlying representation type " + print_rep<Rep1>());
+  assert(("fixed_point overflow of underlying representation type " + print_rep<Rep1>(),
+          !division_overflow<Rep1>(lhs._value, rhs._value)));
 
 #endif
 

From 62a6864649e888098661bbbd14c42a3b3b49b44d Mon Sep 17 00:00:00 2001
From: Conor Hoekstra <codereport@outlook.com>
Date: Tue, 26 May 2020 12:39:43 -0400
Subject: [PATCH 71/90] Update CHANGELOG

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ada7799247..c59f0b000bd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -322,6 +322,7 @@
 - PR #5224 Add new headers from 5198 to libcudf/meta.yaml
 - PR #5228 Fix datetime64 scalar dtype handling for unsupported time units
 - PR #5256 ORC reader: fix loading individual timestamp columns
+- PR #5285 Fix DEBUG compilation failure due to `fixed_point.hpp`
 
 
 # cuDF 0.13.0 (31 Mar 2020)

From cfc99b28f9571ed1e24bb1d9276a83e9f1f415e0 Mon Sep 17 00:00:00 2001
From: "Ram (Ramakrishna Prabhu)" <ramakrishnap@nvidia.com>
Date: Tue, 26 May 2020 16:49:39 -0500
Subject: [PATCH 72/90] DeprecationWarning for nvstrings, nvcategory and nvtext

---
 python/cudf/cudf/core/column/column.py |   5 ++
 python/cudf/cudf/core/column/string.py | 110 +++++++++++++++++++++++++
 python/cudf/cudf/tests/test_string.py  |   7 ++
 python/nvstrings/nvcategory.py         |   5 --
 python/nvstrings/nvstrings.py          |   5 --
 python/nvstrings/nvtext.py             |   5 --
 6 files changed, 122 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 98e1a99ecef..d11497fa005 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1186,6 +1186,11 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None):
             data = data.astype(dtype)
     # TODO: Remove nvstrings here when nvstrings is fully removed
     elif isinstance(arbitrary, nvstrings.nvstrings):
+        warnings.warn(
+            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         byte_count = arbitrary.byte_count()
         if byte_count > libcudf.MAX_STRING_COLUMN_BYTES:
             raise MemoryError(
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index c91c6522351..bfbbf6c0d87 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -166,6 +166,11 @@ def __getattr__(self, attr, *args, **kwargs):
         from cudf.core.series import Series
 
         # TODO: Remove when all needed string compute APIs are ported
+        warnings.warn(
+            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         if hasattr(self._column.nvstrings, attr):
             passed_attr = getattr(self._column.nvstrings, attr)
             if callable(passed_attr):
@@ -300,6 +305,11 @@ def _return_or_inplace(self, new_col, **kwargs):
     def __dir__(self):
         keys = dir(type(self))
         # TODO: Remove along with `__getattr__` above when all is ported
+        warnings.warn(
+            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         return set(keys + dir(self._column.nvstrings))
 
     def len(self, **kwargs):
@@ -3379,6 +3389,11 @@ def normalize_spaces(self, **kwargs):
         1    test string
         dtype: object
         """
+        warnings.warn(
+            "nvtext will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         return self._return_or_inplace(
             cpp_normalize_spaces(self._column), **kwargs
         )
@@ -3414,6 +3429,11 @@ def tokenize(self, delimiter=" ", **kwargs):
         """
         delimiter = _massage_string_arg(delimiter, "delimiter", allow_col=True)
         kwargs.setdefault("retain_index", False)
+        warnings.warn(
+            "nvtext will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         return self._return_or_inplace(
             cpp_tokenize(self._column, delimiter), **kwargs
         )
@@ -3445,6 +3465,11 @@ def token_count(self, delimiter=" ", **kwargs):
         dtype: int32
         """
         delimiter = _massage_string_arg(delimiter, "delimiter", allow_col=True)
+        warnings.warn(
+            "nvtext will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         return self._return_or_inplace(
             cpp_count_tokens(self._column, delimiter), **kwargs
         )
@@ -3483,6 +3508,11 @@ def ngrams(self, n=2, separator="_", **kwargs):
         """
         separator = _massage_string_arg(separator, "separator")
         kwargs.setdefault("retain_index", False)
+        warnings.warn(
+            "nvtext will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         return self._return_or_inplace(
             cpp_generate_ngrams(self._column, n, separator), **kwargs
         )
@@ -3519,6 +3549,11 @@ def ngrams_tokenize(self, n=2, delimiter=" ", separator="_", **kwargs):
         delimiter = _massage_string_arg(delimiter, "delimiter")
         separator = _massage_string_arg(separator, "separator")
         kwargs.setdefault("retain_index", False)
+        warnings.warn(
+            "nvtext will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         return self._return_or_inplace(
             cpp_ngrams_tokenize(self._column, n, delimiter, separator),
             **kwargs,
@@ -3606,7 +3641,17 @@ def __init__(
         )
 
         # TODO: Remove these once NVStrings is fully deprecated / removed
+        warnings.warn(
+            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         self._nvstrings = None
+        warnings.warn(
+            "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         self._nvcategory = None
         self._indices = None
 
@@ -3634,7 +3679,17 @@ def set_base_mask(self, value):
 
         # TODO: Remove these once NVStrings is fully deprecated / removed
         self._indices = None
+        warnings.warn(
+            "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         self._nvcategory = None
+        warnings.warn(
+            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         self._nvstrings = None
 
     def set_base_children(self, value):
@@ -3643,7 +3698,17 @@ def set_base_children(self, value):
 
         # TODO: Remove these once NVStrings is fully deprecated / removed
         self._indices = None
+        warnings.warn(
+            "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         self._nvcategory = None
+        warnings.warn(
+            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         self._nvstrings = None
 
     @property
@@ -3723,6 +3788,11 @@ def __len__(self):
     # TODO: Remove this once NVStrings is fully deprecated / removed
     @property
     def nvstrings(self):
+        warnings.warn(
+            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         if self._nvstrings is None:
             if self.nullable:
                 mask_ptr = self.mask_ptr
@@ -3744,6 +3814,16 @@ def nvstrings(self):
     # TODO: Remove these once NVStrings is fully deprecated / removed
     @property
     def nvcategory(self):
+        warnings.warn(
+            "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        warnings.warn(
+            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         if self._nvcategory is None:
             import nvcategory as nvc
 
@@ -3753,11 +3833,26 @@ def nvcategory(self):
     # TODO: Remove these once NVStrings is fully deprecated / removed
     @nvcategory.setter
     def nvcategory(self, nvc):
+        warnings.warn(
+            "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         self._nvcategory = nvc
 
     def _set_mask(self, value):
         # TODO: Remove these once NVStrings is fully deprecated / removed
+        warnings.warn(
+            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         self._nvstrings = None
+        warnings.warn(
+            "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         self._nvcategory = None
         self._indices = None
 
@@ -3767,6 +3862,11 @@ def _set_mask(self, value):
     @property
     def indices(self):
         if self._indices is None:
+            warnings.warn(
+                "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+                DeprecationWarning,
+                stacklevel=2,
+            )
             out_col = column_empty(self.nvcategory.size(), dtype="int32")
             ptr = out_col.data_ptr
             self.nvcategory.values(devptr=ptr)
@@ -3989,7 +4089,17 @@ def _mimic_inplace(self, other_col, inplace=False):
         out = super()._mimic_inplace(other_col, inplace=inplace)
         if inplace:
             # TODO: Remove these once NVStrings is fully deprecated / removed
+            warnings.warn(
+                "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+                DeprecationWarning,
+                stacklevel=2,
+            )
             self._nvstrings = other_col._nvstrings
+            warnings.warn(
+                "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+                DeprecationWarning,
+                stacklevel=2,
+            )
             self._nvcategory = other_col._nvcategory
             self._indices = other_col._indices
 
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 6f9d1a2e413..b1be0ca448b 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -3,6 +3,7 @@
 from contextlib import ExitStack as does_not_raise
 from sys import getsizeof
 from unittest.mock import patch
+import warnings
 
 import cupy
 import numpy as np
@@ -65,6 +66,12 @@ def test_from_nvstrings_nbytes(mock_byte_count, nbytes):
     expectation = raise_builder(
         [nbytes > libcudf.MAX_STRING_COLUMN_BYTES], MemoryError
     )
+    warnings.warn(
+        "nvstrings will be removed in 0.15. Please remove this test case.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+
     with expectation:
         Series(nvstrings.to_device([""]))
 
diff --git a/python/nvstrings/nvcategory.py b/python/nvstrings/nvcategory.py
index e2b99f0c965..eb30676987c 100644
--- a/python/nvstrings/nvcategory.py
+++ b/python/nvstrings/nvcategory.py
@@ -1,10 +1,5 @@
 import weakref
 
-import warnings
-
-warnings.warn("NVCategory will be removed in the future. Please use cuDF.",
-              DeprecationWarning, stacklevel=2)
-
 import rmm  # noqa: F401
 
 import nvstrings as nvs
diff --git a/python/nvstrings/nvstrings.py b/python/nvstrings/nvstrings.py
index 452dc82c7d9..721be4f67d1 100644
--- a/python/nvstrings/nvstrings.py
+++ b/python/nvstrings/nvstrings.py
@@ -1,9 +1,4 @@
 import weakref
-import warnings
-
-warnings.warn("NVStrings will be removed in the future. "
-              "Please use cuDF for strings.",
-              DeprecationWarning, stacklevel=2)
 
 import rmm  # noqa: F401
 
diff --git a/python/nvstrings/nvtext.py b/python/nvstrings/nvtext.py
index 8fa519f332b..a067c2de5ee 100644
--- a/python/nvstrings/nvtext.py
+++ b/python/nvstrings/nvtext.py
@@ -1,10 +1,5 @@
 # Copyright (c) 2019, NVIDIA CORPORATION.
 
-import warnings
-
-warnings.warn("NVText will be removed in the future. Please use cuDF.",
-              DeprecationWarning, stacklevel=2)
-
 import nvstrings as nvs
 import pyniNVText
 

From 23abde394be09f77ca8a7fc24a9b0085f06cab1b Mon Sep 17 00:00:00 2001
From: "Ram (Ramakrishna Prabhu)" <ramakrishnap@nvidia.com>
Date: Tue, 26 May 2020 16:50:59 -0500
Subject: [PATCH 73/90] CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4cb484d89af..2ed36c78af2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -198,6 +198,7 @@
 - PR #5233 Remove experimental namespace used during libcudf++ refactor
 - PR #5213 Documentation enhancements to `cudf` python APIs
 - PR #5251 Fix more mispellings in cpp comments and strings
+- PR #5254 Deprecate nvstrings, nvcategory and nvtext
 
 ## Bug Fixes
 

From 610da49ee5c76888c2dc3a3098ccc958f16394dd Mon Sep 17 00:00:00 2001
From: "Ram (Ramakrishna Prabhu)" <ramakrishnap@nvidia.com>
Date: Tue, 26 May 2020 17:08:55 -0500
Subject: [PATCH 74/90] style and CHANGELOG

---
 CHANGELOG.md                           |  1 -
 python/cudf/cudf/core/column/column.py |  3 +-
 python/cudf/cudf/core/column/string.py | 66 +++++++++++++++++---------
 python/cudf/cudf/tests/test_string.py  |  2 +-
 4 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2ed36c78af2..0c9d8b61724 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,7 +33,6 @@
 - PR #5196 Add Java bindings for NULL_EQUALS, NULL_MAX and NULL_MIN
 - PR #5203 Add Java bindings for is_integer and is_float
 - PR #5205 Add ci test for libcudf, libnvstrings headers existence check in meta.yml
-- PR #5254 Deprecate NVStrings, NVCategory, and NVText
 
 ## Improvements
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index d11497fa005..ee4ec0135e7 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1187,7 +1187,8 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None):
     # TODO: Remove nvstrings here when nvstrings is fully removed
     elif isinstance(arbitrary, nvstrings.nvstrings):
         warnings.warn(
-            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            "nvstrings will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index bfbbf6c0d87..8c3ed433138 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -167,7 +167,8 @@ def __getattr__(self, attr, *args, **kwargs):
 
         # TODO: Remove when all needed string compute APIs are ported
         warnings.warn(
-            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            "nvstrings will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -306,7 +307,8 @@ def __dir__(self):
         keys = dir(type(self))
         # TODO: Remove along with `__getattr__` above when all is ported
         warnings.warn(
-            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            "nvstrings will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -3390,7 +3392,8 @@ def normalize_spaces(self, **kwargs):
         dtype: object
         """
         warnings.warn(
-            "nvtext will be removed in 0.15. Please use equivalent from libcudf",
+            "nvtext will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -3430,7 +3433,8 @@ def tokenize(self, delimiter=" ", **kwargs):
         delimiter = _massage_string_arg(delimiter, "delimiter", allow_col=True)
         kwargs.setdefault("retain_index", False)
         warnings.warn(
-            "nvtext will be removed in 0.15. Please use equivalent from libcudf",
+            "nvtext will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -3466,7 +3470,8 @@ def token_count(self, delimiter=" ", **kwargs):
         """
         delimiter = _massage_string_arg(delimiter, "delimiter", allow_col=True)
         warnings.warn(
-            "nvtext will be removed in 0.15. Please use equivalent from libcudf",
+            "nvtext will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -3509,7 +3514,8 @@ def ngrams(self, n=2, separator="_", **kwargs):
         separator = _massage_string_arg(separator, "separator")
         kwargs.setdefault("retain_index", False)
         warnings.warn(
-            "nvtext will be removed in 0.15. Please use equivalent from libcudf",
+            "nvtext will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -3550,7 +3556,8 @@ def ngrams_tokenize(self, n=2, delimiter=" ", separator="_", **kwargs):
         separator = _massage_string_arg(separator, "separator")
         kwargs.setdefault("retain_index", False)
         warnings.warn(
-            "nvtext will be removed in 0.15. Please use equivalent from libcudf",
+            "nvtext will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -3642,13 +3649,15 @@ def __init__(
 
         # TODO: Remove these once NVStrings is fully deprecated / removed
         warnings.warn(
-            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            "nvstrings will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
         self._nvstrings = None
         warnings.warn(
-            "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+            "nvcategory will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -3680,13 +3689,15 @@ def set_base_mask(self, value):
         # TODO: Remove these once NVStrings is fully deprecated / removed
         self._indices = None
         warnings.warn(
-            "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+            "nvcategory will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
         self._nvcategory = None
         warnings.warn(
-            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            "nvstrings will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -3699,13 +3710,15 @@ def set_base_children(self, value):
         # TODO: Remove these once NVStrings is fully deprecated / removed
         self._indices = None
         warnings.warn(
-            "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+            "nvcategory will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
         self._nvcategory = None
         warnings.warn(
-            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            "nvstrings will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -3789,7 +3802,8 @@ def __len__(self):
     @property
     def nvstrings(self):
         warnings.warn(
-            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            "nvstrings will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -3815,12 +3829,14 @@ def nvstrings(self):
     @property
     def nvcategory(self):
         warnings.warn(
-            "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+            "nvcategory will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
         warnings.warn(
-            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            "nvstrings will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -3834,7 +3850,8 @@ def nvcategory(self):
     @nvcategory.setter
     def nvcategory(self, nvc):
         warnings.warn(
-            "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+            "nvcategory will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -3843,13 +3860,15 @@ def nvcategory(self, nvc):
     def _set_mask(self, value):
         # TODO: Remove these once NVStrings is fully deprecated / removed
         warnings.warn(
-            "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+            "nvstrings will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
         self._nvstrings = None
         warnings.warn(
-            "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+            "nvcategory will be removed in 0.15. \
+            Please use equivalent from libcudf",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -3863,7 +3882,8 @@ def _set_mask(self, value):
     def indices(self):
         if self._indices is None:
             warnings.warn(
-                "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+                "nvcategory will be removed in 0.15. \
+                Please use equivalent from libcudf",
                 DeprecationWarning,
                 stacklevel=2,
             )
@@ -4090,13 +4110,15 @@ def _mimic_inplace(self, other_col, inplace=False):
         if inplace:
             # TODO: Remove these once NVStrings is fully deprecated / removed
             warnings.warn(
-                "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+                "nvstrings will be removed in 0.15. \
+                Please use equivalent from libcudf",
                 DeprecationWarning,
                 stacklevel=2,
             )
             self._nvstrings = other_col._nvstrings
             warnings.warn(
-                "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+                "nvcategory will be removed in 0.15. \
+                Please use equivalent from libcudf",
                 DeprecationWarning,
                 stacklevel=2,
             )
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index b1be0ca448b..c4403cbb42b 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -1,9 +1,9 @@
 # Copyright (c) 2018-2020, NVIDIA CORPORATION.
 
+import warnings
 from contextlib import ExitStack as does_not_raise
 from sys import getsizeof
 from unittest.mock import patch
-import warnings
 
 import cupy
 import numpy as np

From b765578f3001df4a9d5256f9d30f69372b370a85 Mon Sep 17 00:00:00 2001
From: "Ram (Ramakrishna Prabhu)" <ramakrishnap@nvidia.com>
Date: Tue, 26 May 2020 17:40:53 -0500
Subject: [PATCH 75/90] addressing reviews

---
 python/cudf/cudf/core/column/string.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 8c3ed433138..504d104783b 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -3880,13 +3880,13 @@ def _set_mask(self, value):
     # TODO: Remove these once NVStrings is fully deprecated / removed
     @property
     def indices(self):
+        warnings.warn(
+            "nvcategory will be removed in 0.15. \
+            Please use equivalent from libcudf",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         if self._indices is None:
-            warnings.warn(
-                "nvcategory will be removed in 0.15. \
-                Please use equivalent from libcudf",
-                DeprecationWarning,
-                stacklevel=2,
-            )
             out_col = column_empty(self.nvcategory.size(), dtype="int32")
             ptr = out_col.data_ptr
             self.nvcategory.values(devptr=ptr)

From cfd4b332b4bc7796807aa1b0063eda5a7b231d79 Mon Sep 17 00:00:00 2001
From: Conor Hoekstra <codereport@outlook.com>
Date: Wed, 27 May 2020 00:20:20 -0400
Subject: [PATCH 76/90] Fix unit tests

---
 cpp/tests/fixed_point/fixed_point_tests.cu | 40 ++++++++++++----------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu
index 8b7d6480bee..b54a73890c6 100644
--- a/cpp/tests/fixed_point/fixed_point_tests.cu
+++ b/cpp/tests/fixed_point/fixed_point_tests.cu
@@ -237,6 +237,8 @@ TYPED_TEST(FixedPointTestBothReps, DecimalXXThrust)
 
 TEST_F(FixedPointTest, OverflowDecimal32)
 {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+
   using decimal32 = fixed_point<int32_t, Radix::BASE_10>;
 
 #if defined(__CUDACC_DEBUG__)
@@ -244,8 +246,8 @@ TEST_F(FixedPointTest, OverflowDecimal32)
   decimal32 num0{2, scale_type{-9}};
   decimal32 num1{-2, scale_type{-9}};
 
-  ASSERT_NO_THROW(num0 + num0);
-  ASSERT_NO_THROW(num1 - num0);
+  ASSERT_DEATH(num0 + num0, ".*");
+  ASSERT_DEATH(num1 - num0, ".*");
 
   decimal32 min{std::numeric_limits<int32_t>::min(), scale_type{0}};
   decimal32 max{std::numeric_limits<int32_t>::max(), scale_type{0}};
@@ -253,19 +255,21 @@ TEST_F(FixedPointTest, OverflowDecimal32)
   decimal32 ONE{1, scale_type{0}};
   decimal32 TWO{2, scale_type{0}};
 
-  ASSERT_NO_THROW(min / NEG_ONE);
-  ASSERT_NO_THROW(max * TWO);
-  ASSERT_NO_THROW(min * TWO);
-  ASSERT_NO_THROW(max + ONE);
-  ASSERT_NO_THROW(max - NEG_ONE);
-  ASSERT_NO_THROW(min - ONE);
-  ASSERT_NO_THROW(max - NEG_ONE);
+  ASSERT_DEATH(min / NEG_ONE, ".*");
+  ASSERT_DEATH(max * TWO, ".*");
+  ASSERT_DEATH(min * TWO, ".*");
+  ASSERT_DEATH(max + ONE, ".*");
+  ASSERT_DEATH(max - NEG_ONE, ".*");
+  ASSERT_DEATH(min - ONE, ".*");
+  ASSERT_DEATH(max - NEG_ONE, ".*");
 
 #endif
 }
 
 TEST_F(FixedPointTest, OverflowDecimal64)
 {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+
   using decimal64 = fixed_point<int64_t, Radix::BASE_10>;
 
 #if defined(__CUDACC_DEBUG__)
@@ -273,8 +277,8 @@ TEST_F(FixedPointTest, OverflowDecimal64)
   decimal64 num0{5, scale_type{-18}};
   decimal64 num1{-5, scale_type{-18}};
 
-  ASSERT_NO_THROW(num0 + num0);
-  ASSERT_NO_THROW(num1 - num0);
+  ASSERT_DEATH(num0 + num0, ".*");
+  ASSERT_DEATH(num1 - num0, ".*");
 
   decimal64 min{std::numeric_limits<int64_t>::min(), scale_type{0}};
   decimal64 max{std::numeric_limits<int64_t>::max(), scale_type{0}};
@@ -282,13 +286,13 @@ TEST_F(FixedPointTest, OverflowDecimal64)
   decimal64 ONE{1, scale_type{0}};
   decimal64 TWO{2, scale_type{0}};
 
-  ASSERT_NO_THROW(min / NEG_ONE);
-  ASSERT_NO_THROW(max * TWO);
-  ASSERT_NO_THROW(min * TWO);
-  ASSERT_NO_THROW(max + ONE);
-  ASSERT_NO_THROW(max - NEG_ONE);
-  ASSERT_NO_THROW(min - ONE);
-  ASSERT_NO_THROW(max - NEG_ONE);
+  ASSERT_DEATH(min / NEG_ONE, ".*");
+  ASSERT_DEATH(max * TWO, ".*");
+  ASSERT_DEATH(min * TWO, ".*");
+  ASSERT_DEATH(max + ONE, ".*");
+  ASSERT_DEATH(max - NEG_ONE, ".*");
+  ASSERT_DEATH(min - ONE, ".*");
+  ASSERT_DEATH(max - NEG_ONE, ".*");
 
 #endif
 }

From ad45fe25edfe68fcd2ac19590260e5e8cc5f2829 Mon Sep 17 00:00:00 2001
From: Conor Hoekstra <36027403+codereport@users.noreply.github.com>
Date: Wed, 27 May 2020 00:47:46 -0400
Subject: [PATCH 77/90] Update cpp/tests/fixed_point/fixed_point_tests.cu

Co-authored-by: Mark Harris <mharris@nvidia.com>
---
 cpp/tests/fixed_point/fixed_point_tests.cu | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu
index b54a73890c6..7b5ef2b5149 100644
--- a/cpp/tests/fixed_point/fixed_point_tests.cu
+++ b/cpp/tests/fixed_point/fixed_point_tests.cu
@@ -237,6 +237,7 @@ TYPED_TEST(FixedPointTestBothReps, DecimalXXThrust)
 
 TEST_F(FixedPointTest, OverflowDecimal32)
 {
+  // This flag is needed to avoid warnings with ASSERT_DEATH
   ::testing::FLAGS_gtest_death_test_style = "threadsafe";
 
   using decimal32 = fixed_point<int32_t, Radix::BASE_10>;

From 954e1d591142e2120c96969f120bb5201cb319fb Mon Sep 17 00:00:00 2001
From: Conor Hoekstra <36027403+codereport@users.noreply.github.com>
Date: Wed, 27 May 2020 00:47:52 -0400
Subject: [PATCH 78/90] Update cpp/tests/fixed_point/fixed_point_tests.cu

Co-authored-by: Mark Harris <mharris@nvidia.com>
---
 cpp/tests/fixed_point/fixed_point_tests.cu | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu
index 7b5ef2b5149..5a7de3821a0 100644
--- a/cpp/tests/fixed_point/fixed_point_tests.cu
+++ b/cpp/tests/fixed_point/fixed_point_tests.cu
@@ -269,6 +269,7 @@ TEST_F(FixedPointTest, OverflowDecimal32)
 
 TEST_F(FixedPointTest, OverflowDecimal64)
 {
+  // This flag is needed to avoid warnings with ASSERT_DEATH
   ::testing::FLAGS_gtest_death_test_style = "threadsafe";
 
   using decimal64 = fixed_point<int64_t, Radix::BASE_10>;

From eebed57096817ef2c8ebf89475cdd23be26436b3 Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans" <bobby@apache.org>
Date: Wed, 27 May 2020 08:48:17 -0500
Subject: [PATCH 79/90] Step one remove unused libraries

---
 java/pom.xml                                            | 2 --
 java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java | 2 --
 2 files changed, 4 deletions(-)

diff --git a/java/pom.xml b/java/pom.xml
index ee1a7ae69ff..a084cbc2e3d 100755
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -479,8 +479,6 @@
                                     <directory>${native.cudf.path}</directory>
                                     <includes>
                                         <include>libcudf.so</include>
-                                        <include>libNVCategory.so</include>
-                                        <include>libNVStrings.so</include>
                                     </includes>
                                 </resource>
                                 <resource>
diff --git a/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java b/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
index bf4ea531402..c159a088d86 100755
--- a/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
+++ b/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
@@ -34,8 +34,6 @@ public class NativeDepsLoader {
   private static final String[] loadOrder = new String[] {
       "boost_filesystem",
       "rmm",
-      "NVStrings",
-      "NVCategory",
       "cudf",
       "cudfjni"
   };

From 2f41c1f0158af9e023368b0d727de6830bc9b5da Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans" <bobby@apache.org>
Date: Wed, 27 May 2020 09:39:54 -0500
Subject: [PATCH 80/90] Cleanup java dependencies

---
 java/pom.xml                                  | 66 ++++++++++++++-----
 .../java/ai/rapids/cudf/NativeDepsLoader.java | 55 +++++++++++++---
 2 files changed, 96 insertions(+), 25 deletions(-)

diff --git a/java/pom.xml b/java/pom.xml
index a084cbc2e3d..ebaa63d29aa 100755
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -231,6 +231,45 @@
                 </plugins>
             </build>
         </profile>
+        <profile>
+            <id>COPY_NVRTC</id>
+            <activation>
+                <property>
+                    <name>CUDA_STATIC_RUNTIME</name>
+                    <value>ON</value>
+                </property>
+            </activation>
+            <build>
+            <plugins>
+              <plugin>
+                <artifactId>maven-resources-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>copy-nvrtc</id>
+                        <phase>validate</phase>
+                        <goals>
+                            <goal>copy-resources</goal>
+                        </goals>
+                        <configuration>
+                            <overwrite>true</overwrite>
+                            <skip>${skipNativeCopy}</skip>
+                            <outputDirectory>${basedir}/target/native-deps/${os.arch}/${os.name}</outputDirectory>
+                            <resources>
+                                <resource>
+                                    <!--Set by groovy script-->
+                                    <directory>${cuda.path}</directory>
+                                    <includes>
+                                        <include>libnvrtc.so</include>
+                                    </includes>
+                                </resource>
+                            </resources>
+                        </configuration>
+                    </execution>
+                </executions>
+              </plugin>
+          </plugins>
+      </build>
+        </profile>
     </profiles>
 
     <build>
@@ -413,6 +452,17 @@
                             } else {
                                 pom.properties['cuda.classifier'] = ''
                             }
+
+                            if (pom.properties['CUDA_STATIC_RUNTIME'] == 'ON') {
+                                // We want to pull in nvrtc in this case
+                                def libnvrtc =  ~/libnvrtc\\.so.*\\s+=>\\s+(.*)libnvrtc.*\\.so.*\\s+.*/
+                                def rtcm = libnvrtc.matcher(sout)
+                                if (rtcm.find()) {
+                                  pom.properties['cuda.path'] = rtcm.group(1)
+                                } else {
+                                  fail('could not find libnvrtc which we need to package when statically linking')
+                                }
+                            }
                             </source>
                         </configuration>
                     </execution>
@@ -426,22 +476,6 @@
                     <classifier>${cuda.classifier}</classifier>
                 </configuration>
             </plugin>
-            <plugin>
-                <groupId>org.codehaus.mojo</groupId>
-                <artifactId>native-maven-plugin</artifactId>
-                <version>1.0-alpha-8</version>
-                <extensions>true</extensions>
-
-                <configuration>
-                    <javahClassNames>
-                        <javahClassName>ai.rapids.cudf.Cuda</javahClassName>
-                        <javahClassName>ai.rapids.cudf.ColumnVector</javahClassName>
-                        <javahClassName>ai.rapids.cudf.Table</javahClassName>
-                        <javahClassName>ai.rapids.cudf.Rmm</javahClassName>
-                    </javahClassNames>
-                    <javahOutputDirectory>${basedir}/src/main/native/include/</javahOutputDirectory>
-                </configuration>
-            </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-surefire-plugin</artifactId>
diff --git a/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java b/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
index c159a088d86..e2b9e7471e6 100755
--- a/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
+++ b/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
@@ -31,22 +31,54 @@
  */
 public class NativeDepsLoader {
   private static final Logger log = LoggerFactory.getLogger(NativeDepsLoader.class);
-  private static final String[] loadOrder = new String[] {
-      "boost_filesystem",
-      "rmm",
-      "cudf",
-      "cudfjni"
+  private static final Loader[] loadOrder = new Loader[] {
+      new Optional("nvrtc"),
+      new Required("boost_filesystem"),
+      new Required("rmm"),
+      new Required("cudf"),
+      new Required("cudfjni")
   };
   private static ClassLoader loader = NativeDepsLoader.class.getClassLoader();
   private static boolean loaded = false;
 
+  private static abstract class Loader {
+    protected final String baseName;
+    public Loader(String baseName) {
+      this.baseName = baseName;
+    }
+
+    public abstract void load(String os, String arch) throws IOException;
+  }
+
+  private static class Required extends Loader {
+    public Required(String baseName) {
+      super(baseName);
+    }
+
+    @Override
+    public void load(String os, String arch) throws IOException {
+      loadDep(os, arch, this.baseName, true);
+    }
+  }
+
+  private static class Optional extends Loader {
+    public Optional(String baseName) {
+      super(baseName);
+    }
+
+    @Override
+    public void load(String os, String arch) throws IOException {
+      loadDep(os, arch, this.baseName, false);
+    }
+  }
+
   static synchronized void loadNativeDeps() {
     if (!loaded) {
       String os = System.getProperty("os.name");
       String arch = System.getProperty("os.arch");
       try {
-        for (String toLoad : loadOrder) {
-          loadDep(os, arch, toLoad);
+        for (Loader toLoad : loadOrder) {
+          toLoad.load(os, arch);
         }
         loaded = true;
       } catch (Throwable t) {
@@ -55,7 +87,8 @@ static synchronized void loadNativeDeps() {
     }
   }
 
-  private static void loadDep(String os, String arch, String baseName) throws IOException {
+  private static void loadDep(String os, String arch, String baseName, boolean required)
+          throws IOException {
     String path = arch + "/" + os + "/" + System.mapLibraryName(baseName);
     File loc;
     URL resource = loader.getResource(path);
@@ -63,7 +96,11 @@ private static void loadDep(String os, String arch, String baseName) throws IOEx
       // It looks like we are not running from the jar, or there are issues with the jar
       File f = new File("./target/native-deps/" + path);
       if (!f.exists()) {
-        throw new FileNotFoundException("Could not locate native dependency " + path);
+        if (required) {
+          throw new FileNotFoundException("Could not locate native dependency " + path);
+        }
+        // Not required so we will skip it
+        return;
       }
       resource = f.toURL();
     }

From c7065ddf801efdf119be0ed26b5855015cc0f299 Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans" <bobby@apache.org>
Date: Wed, 27 May 2020 09:45:07 -0500
Subject: [PATCH 81/90] Update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 79f18ee1b27..296311595f0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -201,6 +201,7 @@
 - PR #5213 Documentation enhancements to `cudf` python APIs
 - PR #5251 Fix more mispellings in cpp comments and strings
 - PR #5270 Add support to check for "NaT" and "None" strings while typecasting to `datetime64`
+- PR #5298 Remove unused native deps from java library
 
 ## Bug Fixes
 

From 312106c48dc8f5e93d887e4c4467acc1ef28fc13 Mon Sep 17 00:00:00 2001
From: "Ram (Ramakrishna Prabhu)" <ramakrishnap@nvidia.com>
Date: Wed, 27 May 2020 09:57:45 -0500
Subject: [PATCH 82/90] Addressing reviews

---
 python/cudf/cudf/core/column/column.py |  10 +-
 python/cudf/cudf/core/column/string.py | 139 +------------------------
 python/cudf/cudf/tests/test_string.py  |   6 --
 python/nvstrings/nvcategory.py         |   7 ++
 python/nvstrings/nvstrings.py          |   7 ++
 python/nvstrings/nvtext.py             |   8 ++
 6 files changed, 29 insertions(+), 148 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index ee4ec0135e7..12c3ce106bc 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -10,8 +10,6 @@
 import pyarrow as pa
 from numba import cuda, njit
 
-import nvstrings
-
 import cudf
 import cudf._lib as libcudf
 from cudf._lib.column import Column
@@ -1170,6 +1168,8 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None):
     * pyarrow array
     * pandas.Categorical objects
     """
+    import nvstrings
+
     if isinstance(arbitrary, ColumnBase):
         if dtype is not None:
             return arbitrary.astype(dtype)
@@ -1186,12 +1186,6 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None):
             data = data.astype(dtype)
     # TODO: Remove nvstrings here when nvstrings is fully removed
     elif isinstance(arbitrary, nvstrings.nvstrings):
-        warnings.warn(
-            "nvstrings will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         byte_count = arbitrary.byte_count()
         if byte_count > libcudf.MAX_STRING_COLUMN_BYTES:
             raise MemoryError(
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 504d104783b..29523b75012 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -9,8 +9,6 @@
 import pandas as pd
 import pyarrow as pa
 
-import nvstrings
-
 import cudf._lib as libcudf
 import cudf._lib.string_casting as str_cast
 from cudf._lib.nvtext.generate_ngrams import (
@@ -166,18 +164,14 @@ def __getattr__(self, attr, *args, **kwargs):
         from cudf.core.series import Series
 
         # TODO: Remove when all needed string compute APIs are ported
-        warnings.warn(
-            "nvstrings will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         if hasattr(self._column.nvstrings, attr):
             passed_attr = getattr(self._column.nvstrings, attr)
             if callable(passed_attr):
 
                 @functools.wraps(passed_attr)
                 def wrapper(*args, **kwargs):
+                    import nvstrings
+
                     ret = passed_attr(*args, **kwargs)
                     if isinstance(ret, nvstrings.nvstrings):
                         ret = Series(
@@ -306,12 +300,6 @@ def _return_or_inplace(self, new_col, **kwargs):
     def __dir__(self):
         keys = dir(type(self))
         # TODO: Remove along with `__getattr__` above when all is ported
-        warnings.warn(
-            "nvstrings will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         return set(keys + dir(self._column.nvstrings))
 
     def len(self, **kwargs):
@@ -3391,12 +3379,6 @@ def normalize_spaces(self, **kwargs):
         1    test string
         dtype: object
         """
-        warnings.warn(
-            "nvtext will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         return self._return_or_inplace(
             cpp_normalize_spaces(self._column), **kwargs
         )
@@ -3432,12 +3414,6 @@ def tokenize(self, delimiter=" ", **kwargs):
         """
         delimiter = _massage_string_arg(delimiter, "delimiter", allow_col=True)
         kwargs.setdefault("retain_index", False)
-        warnings.warn(
-            "nvtext will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         return self._return_or_inplace(
             cpp_tokenize(self._column, delimiter), **kwargs
         )
@@ -3469,12 +3445,6 @@ def token_count(self, delimiter=" ", **kwargs):
         dtype: int32
         """
         delimiter = _massage_string_arg(delimiter, "delimiter", allow_col=True)
-        warnings.warn(
-            "nvtext will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         return self._return_or_inplace(
             cpp_count_tokens(self._column, delimiter), **kwargs
         )
@@ -3513,12 +3483,6 @@ def ngrams(self, n=2, separator="_", **kwargs):
         """
         separator = _massage_string_arg(separator, "separator")
         kwargs.setdefault("retain_index", False)
-        warnings.warn(
-            "nvtext will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         return self._return_or_inplace(
             cpp_generate_ngrams(self._column, n, separator), **kwargs
         )
@@ -3555,12 +3519,6 @@ def ngrams_tokenize(self, n=2, delimiter=" ", separator="_", **kwargs):
         delimiter = _massage_string_arg(delimiter, "delimiter")
         separator = _massage_string_arg(separator, "separator")
         kwargs.setdefault("retain_index", False)
-        warnings.warn(
-            "nvtext will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         return self._return_or_inplace(
             cpp_ngrams_tokenize(self._column, n, delimiter, separator),
             **kwargs,
@@ -3648,19 +3606,7 @@ def __init__(
         )
 
         # TODO: Remove these once NVStrings is fully deprecated / removed
-        warnings.warn(
-            "nvstrings will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         self._nvstrings = None
-        warnings.warn(
-            "nvcategory will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         self._nvcategory = None
         self._indices = None
 
@@ -3688,19 +3634,7 @@ def set_base_mask(self, value):
 
         # TODO: Remove these once NVStrings is fully deprecated / removed
         self._indices = None
-        warnings.warn(
-            "nvcategory will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         self._nvcategory = None
-        warnings.warn(
-            "nvstrings will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         self._nvstrings = None
 
     def set_base_children(self, value):
@@ -3709,19 +3643,7 @@ def set_base_children(self, value):
 
         # TODO: Remove these once NVStrings is fully deprecated / removed
         self._indices = None
-        warnings.warn(
-            "nvcategory will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         self._nvcategory = None
-        warnings.warn(
-            "nvstrings will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         self._nvstrings = None
 
     @property
@@ -3801,17 +3723,14 @@ def __len__(self):
     # TODO: Remove this once NVStrings is fully deprecated / removed
     @property
     def nvstrings(self):
-        warnings.warn(
-            "nvstrings will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         if self._nvstrings is None:
             if self.nullable:
                 mask_ptr = self.mask_ptr
             else:
                 mask_ptr = None
+
+            import nvstrings
+
             if self.size == 0:
                 self._nvstrings = nvstrings.to_device([])
             else:
@@ -3828,18 +3747,6 @@ def nvstrings(self):
     # TODO: Remove these once NVStrings is fully deprecated / removed
     @property
     def nvcategory(self):
-        warnings.warn(
-            "nvcategory will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        warnings.warn(
-            "nvstrings will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         if self._nvcategory is None:
             import nvcategory as nvc
 
@@ -3849,29 +3756,11 @@ def nvcategory(self):
     # TODO: Remove these once NVStrings is fully deprecated / removed
     @nvcategory.setter
     def nvcategory(self, nvc):
-        warnings.warn(
-            "nvcategory will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         self._nvcategory = nvc
 
     def _set_mask(self, value):
         # TODO: Remove these once NVStrings is fully deprecated / removed
-        warnings.warn(
-            "nvstrings will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         self._nvstrings = None
-        warnings.warn(
-            "nvcategory will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         self._nvcategory = None
         self._indices = None
 
@@ -3880,12 +3769,6 @@ def _set_mask(self, value):
     # TODO: Remove these once NVStrings is fully deprecated / removed
     @property
     def indices(self):
-        warnings.warn(
-            "nvcategory will be removed in 0.15. \
-            Please use equivalent from libcudf",
-            DeprecationWarning,
-            stacklevel=2,
-        )
         if self._indices is None:
             out_col = column_empty(self.nvcategory.size(), dtype="int32")
             ptr = out_col.data_ptr
@@ -4109,19 +3992,7 @@ def _mimic_inplace(self, other_col, inplace=False):
         out = super()._mimic_inplace(other_col, inplace=inplace)
         if inplace:
             # TODO: Remove these once NVStrings is fully deprecated / removed
-            warnings.warn(
-                "nvstrings will be removed in 0.15. \
-                Please use equivalent from libcudf",
-                DeprecationWarning,
-                stacklevel=2,
-            )
             self._nvstrings = other_col._nvstrings
-            warnings.warn(
-                "nvcategory will be removed in 0.15. \
-                Please use equivalent from libcudf",
-                DeprecationWarning,
-                stacklevel=2,
-            )
             self._nvcategory = other_col._nvcategory
             self._indices = other_col._indices
 
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index c4403cbb42b..92babac4f0f 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2018-2020, NVIDIA CORPORATION.
 
-import warnings
 from contextlib import ExitStack as does_not_raise
 from sys import getsizeof
 from unittest.mock import patch
@@ -66,11 +65,6 @@ def test_from_nvstrings_nbytes(mock_byte_count, nbytes):
     expectation = raise_builder(
         [nbytes > libcudf.MAX_STRING_COLUMN_BYTES], MemoryError
     )
-    warnings.warn(
-        "nvstrings will be removed in 0.15. Please remove this test case.",
-        DeprecationWarning,
-        stacklevel=2,
-    )
 
     with expectation:
         Series(nvstrings.to_device([""]))
diff --git a/python/nvstrings/nvcategory.py b/python/nvstrings/nvcategory.py
index eb30676987c..35b70294ff2 100644
--- a/python/nvstrings/nvcategory.py
+++ b/python/nvstrings/nvcategory.py
@@ -1,3 +1,4 @@
+import warnings
 import weakref
 
 import rmm  # noqa: F401
@@ -5,6 +6,12 @@
 import nvstrings as nvs
 import pyniNVCategory
 
+warnings.warn(
+    "nvcategory will be removed in 0.15. Please use equivalent from libcudf",
+    DeprecationWarning,
+    stacklevel=2,
+)
+
 
 def to_device(strs):
     """
diff --git a/python/nvstrings/nvstrings.py b/python/nvstrings/nvstrings.py
index 721be4f67d1..14a5310fce0 100644
--- a/python/nvstrings/nvstrings.py
+++ b/python/nvstrings/nvstrings.py
@@ -1,9 +1,16 @@
+import warnings
 import weakref
 
 import rmm  # noqa: F401
 
 import pyniNVStrings
 
+warnings.warn(
+    "nvstrings will be removed in 0.15. Please use equivalent from libcudf",
+    DeprecationWarning,
+    stacklevel=2,
+)
+
 
 def to_device(strs):
     """
diff --git a/python/nvstrings/nvtext.py b/python/nvstrings/nvtext.py
index a067c2de5ee..e058b04bf38 100644
--- a/python/nvstrings/nvtext.py
+++ b/python/nvstrings/nvtext.py
@@ -1,8 +1,16 @@
 # Copyright (c) 2019, NVIDIA CORPORATION.
 
+import warnings
+
 import nvstrings as nvs
 import pyniNVText
 
+warnings.warn(
+    "nvtext will be removed in 0.15. Please use equivalent from libcudf",
+    DeprecationWarning,
+    stacklevel=2,
+)
+
 
 def tokenize(strs, delimiter=None):
     """

From 84d895d07d2dca5b4d551a1dea9ab8104b96ee24 Mon Sep 17 00:00:00 2001
From: "Ram (Ramakrishna Prabhu)" <ramakrishnap@nvidia.com>
Date: Wed, 27 May 2020 10:46:13 -0500
Subject: [PATCH 83/90] addressing reviews

---
 python/cudf/cudf/core/column/column.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 12c3ce106bc..12fbc6d46b9 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1168,8 +1168,6 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None):
     * pyarrow array
     * pandas.Categorical objects
     """
-    import nvstrings
-
     if isinstance(arbitrary, ColumnBase):
         if dtype is not None:
             return arbitrary.astype(dtype)
@@ -1185,7 +1183,7 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None):
         if dtype is not None:
             data = data.astype(dtype)
     # TODO: Remove nvstrings here when nvstrings is fully removed
-    elif isinstance(arbitrary, nvstrings.nvstrings):
+    elif type(arbitrary).__name__ == "nvstrings":
         byte_count = arbitrary.byte_count()
         if byte_count > libcudf.MAX_STRING_COLUMN_BYTES:
             raise MemoryError(

From 0f47cc6d13ffa1e7a43b8d95337bf6a6a1ecdac8 Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans" <bobby@apache.org>
Date: Wed, 27 May 2020 10:52:49 -0500
Subject: [PATCH 84/90] Addressed review comments

---
 java/README.md                                | 16 +++---
 java/pom.xml                                  | 48 +---------------
 .../java/ai/rapids/cudf/NativeDepsLoader.java | 55 +++----------------
 3 files changed, 19 insertions(+), 100 deletions(-)

diff --git a/java/README.md b/java/README.md
index 172fb1a5faa..06a3dc65592 100644
--- a/java/README.md
+++ b/java/README.md
@@ -53,13 +53,15 @@ CUDA 10.0:
 Build the native code first, and make sure the a JDK is installed and available. 
 If you use the default cmake options libcudart will be dynamically linked to libcudf and librmm
 which are included.  If you do this the resulting jar will have a classifier associated with it
-because that jar can only be used with a single version of the CUDA runtime.  If you want
-to remove that requirement you can build RMM and cuDF with `-DCUDA_STATIC_RUNTIME=ON` when
-running cmake, and similarly -DCUDA_STATIC_RUNTIME=ON when running maven.  This will statically 
-link in the CUDA runtime and result in a jar with no
-classifier that should run on any host that has a version of the driver new enough to support
-the runtime that this was built with.  Official releases will indicate in the release notes
-the minimum driver version required.
+because that jar can only be used with a single version of the CUDA runtime.  
+
+
+There is experimental work to try and remove that requirement but it is not fully functionaly
+you can build RMM and cuDF with `-DCUDA_STATIC_RUNTIME=ON` when running cmake, and similarly 
+-DCUDA_STATIC_RUNTIME=ON when running maven.  This will statically  link in the CUDA runtime
+and result in a jar with no classifier that should run on any host that has a version of the
+driver new enough to support the runtime that this was built with. Unfortunely libnvrtc is still
+required for runtime code generation which also is tied to a specific version of cuda.
 
 To build with maven for dynamic linking you would run.
 
diff --git a/java/pom.xml b/java/pom.xml
index ebaa63d29aa..f3e573c8c57 100755
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -231,45 +231,6 @@
                 </plugins>
             </build>
         </profile>
-        <profile>
-            <id>COPY_NVRTC</id>
-            <activation>
-                <property>
-                    <name>CUDA_STATIC_RUNTIME</name>
-                    <value>ON</value>
-                </property>
-            </activation>
-            <build>
-            <plugins>
-              <plugin>
-                <artifactId>maven-resources-plugin</artifactId>
-                <executions>
-                    <execution>
-                        <id>copy-nvrtc</id>
-                        <phase>validate</phase>
-                        <goals>
-                            <goal>copy-resources</goal>
-                        </goals>
-                        <configuration>
-                            <overwrite>true</overwrite>
-                            <skip>${skipNativeCopy}</skip>
-                            <outputDirectory>${basedir}/target/native-deps/${os.arch}/${os.name}</outputDirectory>
-                            <resources>
-                                <resource>
-                                    <!--Set by groovy script-->
-                                    <directory>${cuda.path}</directory>
-                                    <includes>
-                                        <include>libnvrtc.so</include>
-                                    </includes>
-                                </resource>
-                            </resources>
-                        </configuration>
-                    </execution>
-                </executions>
-              </plugin>
-          </plugins>
-      </build>
-        </profile>
     </profiles>
 
     <build>
@@ -454,14 +415,7 @@
                             }
 
                             if (pom.properties['CUDA_STATIC_RUNTIME'] == 'ON') {
-                                // We want to pull in nvrtc in this case
-                                def libnvrtc =  ~/libnvrtc\\.so.*\\s+=>\\s+(.*)libnvrtc.*\\.so.*\\s+.*/
-                                def rtcm = libnvrtc.matcher(sout)
-                                if (rtcm.find()) {
-                                  pom.properties['cuda.path'] = rtcm.group(1)
-                                } else {
-                                  fail('could not find libnvrtc which we need to package when statically linking')
-                                }
+                                println 'WARNING RUNNING WITH STATIC LINKING DOES NOT FULLY WORK. USE WITH CAUTION.'
                             }
                             </source>
                         </configuration>
diff --git a/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java b/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
index e2b9e7471e6..c159a088d86 100755
--- a/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
+++ b/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
@@ -31,54 +31,22 @@
  */
 public class NativeDepsLoader {
   private static final Logger log = LoggerFactory.getLogger(NativeDepsLoader.class);
-  private static final Loader[] loadOrder = new Loader[] {
-      new Optional("nvrtc"),
-      new Required("boost_filesystem"),
-      new Required("rmm"),
-      new Required("cudf"),
-      new Required("cudfjni")
+  private static final String[] loadOrder = new String[] {
+      "boost_filesystem",
+      "rmm",
+      "cudf",
+      "cudfjni"
   };
   private static ClassLoader loader = NativeDepsLoader.class.getClassLoader();
   private static boolean loaded = false;
 
-  private static abstract class Loader {
-    protected final String baseName;
-    public Loader(String baseName) {
-      this.baseName = baseName;
-    }
-
-    public abstract void load(String os, String arch) throws IOException;
-  }
-
-  private static class Required extends Loader {
-    public Required(String baseName) {
-      super(baseName);
-    }
-
-    @Override
-    public void load(String os, String arch) throws IOException {
-      loadDep(os, arch, this.baseName, true);
-    }
-  }
-
-  private static class Optional extends Loader {
-    public Optional(String baseName) {
-      super(baseName);
-    }
-
-    @Override
-    public void load(String os, String arch) throws IOException {
-      loadDep(os, arch, this.baseName, false);
-    }
-  }
-
   static synchronized void loadNativeDeps() {
     if (!loaded) {
       String os = System.getProperty("os.name");
       String arch = System.getProperty("os.arch");
       try {
-        for (Loader toLoad : loadOrder) {
-          toLoad.load(os, arch);
+        for (String toLoad : loadOrder) {
+          loadDep(os, arch, toLoad);
         }
         loaded = true;
       } catch (Throwable t) {
@@ -87,8 +55,7 @@ static synchronized void loadNativeDeps() {
     }
   }
 
-  private static void loadDep(String os, String arch, String baseName, boolean required)
-          throws IOException {
+  private static void loadDep(String os, String arch, String baseName) throws IOException {
     String path = arch + "/" + os + "/" + System.mapLibraryName(baseName);
     File loc;
     URL resource = loader.getResource(path);
@@ -96,11 +63,7 @@ private static void loadDep(String os, String arch, String baseName, boolean req
       // It looks like we are not running from the jar, or there are issues with the jar
       File f = new File("./target/native-deps/" + path);
       if (!f.exists()) {
-        if (required) {
-          throw new FileNotFoundException("Could not locate native dependency " + path);
-        }
-        // Not required so we will skip it
-        return;
+        throw new FileNotFoundException("Could not locate native dependency " + path);
       }
       resource = f.toURL();
     }

From 1bc72f56ae29be86382f8ff7869f25f35d542ed2 Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans" <bobby@apache.org>
Date: Wed, 27 May 2020 10:56:11 -0500
Subject: [PATCH 85/90] spell check

---
 java/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/java/README.md b/java/README.md
index 06a3dc65592..c4bdfafad60 100644
--- a/java/README.md
+++ b/java/README.md
@@ -56,11 +56,11 @@ which are included.  If you do this the resulting jar will have a classifier ass
 because that jar can only be used with a single version of the CUDA runtime.  
 
 
-There is experimental work to try and remove that requirement but it is not fully functionaly
+There is experimental work to try and remove that requirement but it is not fully functional
 you can build RMM and cuDF with `-DCUDA_STATIC_RUNTIME=ON` when running cmake, and similarly 
--DCUDA_STATIC_RUNTIME=ON when running maven.  This will statically  link in the CUDA runtime
+`-DCUDA_STATIC_RUNTIME=ON` when running maven.  This will statically link in the CUDA runtime
 and result in a jar with no classifier that should run on any host that has a version of the
-driver new enough to support the runtime that this was built with. Unfortunely libnvrtc is still
+driver new enough to support the runtime that this was built with. Unfortunately `libnvrtc` is still
 required for runtime code generation which also is tied to a specific version of cuda.
 
 To build with maven for dynamic linking you would run.

From 45858a636db3803bb2d6ecbfae1adc592d23c435 Mon Sep 17 00:00:00 2001
From: "Ram (Ramakrishna Prabhu)" <ramakrishnap@nvidia.com>
Date: Wed, 27 May 2020 11:17:47 -0500
Subject: [PATCH 86/90] moving the nvstring condition below

---
 python/cudf/cudf/core/column/column.py | 61 +++++++++++++-------------
 1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 12fbc6d46b9..4527740411d 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1182,36 +1182,6 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None):
         data = arbitrary._values
         if dtype is not None:
             data = data.astype(dtype)
-    # TODO: Remove nvstrings here when nvstrings is fully removed
-    elif type(arbitrary).__name__ == "nvstrings":
-        byte_count = arbitrary.byte_count()
-        if byte_count > libcudf.MAX_STRING_COLUMN_BYTES:
-            raise MemoryError(
-                "Cannot construct string columns "
-                "containing > {} bytes. "
-                "Consider using dask_cudf to partition "
-                "your data.".format(libcudf.MAX_STRING_COLUMN_BYTES_STR)
-            )
-        sbuf = Buffer.empty(arbitrary.byte_count())
-        obuf = Buffer.empty(
-            (arbitrary.size() + 1) * np.dtype("int32").itemsize
-        )
-
-        nbuf = None
-        if arbitrary.null_count() > 0:
-            nbuf = create_null_mask(
-                arbitrary.size(), state=MaskState.UNINITIALIZED
-            )
-            arbitrary.set_null_bitmask(nbuf.ptr, bdevmem=True)
-        arbitrary.to_offsets(sbuf.ptr, obuf.ptr, None, bdevmem=True)
-        children = (
-            build_column(obuf, dtype="int32"),
-            build_column(sbuf, dtype="int8"),
-        )
-        data = build_column(
-            data=None, dtype="object", mask=nbuf, children=children
-        )
-        data._nvstrings = arbitrary
 
     elif isinstance(arbitrary, Buffer):
         if dtype is None:
@@ -1245,6 +1215,37 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None):
                 col = utils.time_col_replace_nulls(col)
         return col
 
+    # TODO: Remove nvstrings here when nvstrings is fully removed
+    elif type(arbitrary).__name__ == "nvstrings":
+        byte_count = arbitrary.byte_count()
+        if byte_count > libcudf.MAX_STRING_COLUMN_BYTES:
+            raise MemoryError(
+                "Cannot construct string columns "
+                "containing > {} bytes. "
+                "Consider using dask_cudf to partition "
+                "your data.".format(libcudf.MAX_STRING_COLUMN_BYTES_STR)
+            )
+        sbuf = Buffer.empty(arbitrary.byte_count())
+        obuf = Buffer.empty(
+            (arbitrary.size() + 1) * np.dtype("int32").itemsize
+        )
+
+        nbuf = None
+        if arbitrary.null_count() > 0:
+            nbuf = create_null_mask(
+                arbitrary.size(), state=MaskState.UNINITIALIZED
+            )
+            arbitrary.set_null_bitmask(nbuf.ptr, bdevmem=True)
+        arbitrary.to_offsets(sbuf.ptr, obuf.ptr, None, bdevmem=True)
+        children = (
+            build_column(obuf, dtype="int32"),
+            build_column(sbuf, dtype="int8"),
+        )
+        data = build_column(
+            data=None, dtype="object", mask=nbuf, children=children
+        )
+        data._nvstrings = arbitrary
+
     elif isinstance(arbitrary, pa.Array):
         if isinstance(arbitrary, pa.StringArray):
             pa_size, pa_offset, nbuf, obuf, sbuf = buffers_from_pyarrow(

From afc2b5e9e5277c6a3aeb74e1b3ca1fae5e69661c Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 27 May 2020 19:09:46 -0400
Subject: [PATCH 87/90] Hack around current Serializable requirements

---
 cpp/include/cudf/copying.hpp      | 418 +++++++++++++++---------------
 python/cudf/cudf/_lib/copying.pyx |   1 +
 python/cudf/cudf/core/abc.py      |  11 +-
 3 files changed, 218 insertions(+), 212 deletions(-)

diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index 2f58994295d..22f68f83c64 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -538,6 +538,7 @@ struct unpack_result {
  */
 unpack_result unpack(std::unique_ptr<packed_columns> input);
 
+/**
  * @brief   Returns a new column, where each element is selected from either @p lhs or
  *          @p rhs based on the value of the corresponding element in @p boolean_mask
  *
@@ -562,212 +563,211 @@ std::unique_ptr<column> copy_if_else(
   column_view const& boolean_mask,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
- /**
-  * @brief Creates a new column by shifting all values by an offset.
-  *
-  * @ingroup copy_shift
-  *
-  * Elements will be determined by `output[idx] = input[idx - offset]`.
-  * Some elements in the output may be indeterminable from the input. For those
-  * elements, the value will be determined by `fill_values`.
-  *
-  * @code{.pseudo}
-  * Examples
-  * -------------------------------------------------
-  * input       = [0, 1, 2, 3, 4]
-  * offset      = 3
-  * fill_values = @
-  * return      = [@, @, @, 0, 1]
-  * -------------------------------------------------
-  * input       = [5, 4, 3, 2, 1]
-  * offset      = -2
-  * fill_values = 7
-  * return      = [3, 2, 1, 7, 7]
-  * @endcode
-  *
-  * @note if the input is nullable, the output will be nullable.
-  * @note if the fill value is null, the output will be nullable.
-  *
-  * @param input      Column to be shifted.
-  * @param offset     The offset by which to shift the input.
-  * @param fill_value Fill value for indeterminable outputs.
-  *
-  * @throw cudf::logic_error if @p input dtype is not fixed-with.
-  * @throw cudf::logic_error if @p fill_value dtype does not match @p input dtype.
-  */
- std::unique_ptr<column> shift(
-   column_view const& input,
-   size_type offset,
-   scalar const& fill_value,
-   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
-   cudaStream_t stream                 = 0);
-
- /**
-  * @brief   Returns a new column, where each element is selected from either @p lhs or
-  *          @p rhs based on the value of the corresponding element in @p boolean_mask
-  *
-  * Selects each element i in the output column from either @p rhs or @p lhs using the following
-  * rule: `output[i] = (boolean_mask.valid(i) and boolean_mask[i]) ? lhs : rhs[i]`
-  *
-  * @throws cudf::logic_error if lhs and rhs are not of the same type
-  * @throws cudf::logic_error if boolean mask is not of type bool
-  * @throws cudf::logic_error if boolean mask is not of the same length as rhs
-  * @param[in] lhs left-hand scalar
-  * @param[in] rhs right-hand column_view
-  * @param[in] boolean_mask column of `BOOL8` representing "left (true) / right (false)" boolean for
-  * each element. Null element represents false.
-  * @param[in] mr resource for allocating device memory
-  *
-  * @returns new column with the selected elements
-  */
- std::unique_ptr<column> copy_if_else(
-   scalar const& lhs,
-   column_view const& rhs,
-   column_view const& boolean_mask,
-   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
-
- /**
-  * @brief   Returns a new column, where each element is selected from either @p lhs or
-  *          @p rhs based on the value of the corresponding element in @p boolean_mask
-  *
-  * Selects each element i in the output column from either @p rhs or @p lhs using the following
-  * rule: `output[i] = (boolean_mask.valid(i) and boolean_mask[i]) ? lhs[i] : rhs`
-  *
-  * @throws cudf::logic_error if lhs and rhs are not of the same type
-  * @throws cudf::logic_error if boolean mask is not of type bool
-  * @throws cudf::logic_error if boolean mask is not of the same length as lhs
-  * @param[in] lhs left-hand column_view
-  * @param[in] rhs right-hand scalar
-  * @param[in] boolean_mask column of `BOOL8` representing "left (true) / right (false)" boolean for
-  * each element. Null element represents false.
-  * @param[in] mr resource for allocating device memory
-  *
-  * @returns new column with the selected elements
-  */
- std::unique_ptr<column> copy_if_else(
-   column_view const& lhs,
-   scalar const& rhs,
-   column_view const& boolean_mask,
-   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
-
- /**
-  * @brief   Returns a new column, where each element is selected from either @p lhs or
-  *          @p rhs based on the value of the corresponding element in @p boolean_mask
-  *
-  * Selects each element i in the output column from either @p rhs or @p lhs using the following
-  * rule: `output[i] = (boolean_mask.valid(i) and boolean_mask[i]) ? lhs : rhs`
-  *
-  * @throws cudf::logic_error if boolean mask is not of type bool
-  * @param[in] lhs left-hand scalar
-  * @param[in] rhs right-hand scalar
-  * @param[in] boolean_mask column of `BOOL8` representing "left (true) / right (false)" boolean for
-  * each element. null element represents false.
-  * @param[in] mr resource for allocating device memory
-  *
-  * @returns new column with the selected elements
-  */
- std::unique_ptr<column> copy_if_else(
-   scalar const& lhs,
-   scalar const& rhs,
-   column_view const& boolean_mask,
-   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
-
- /**
-  * @brief Scatters rows from the input table to rows of the output corresponding
-  * to true values in a boolean mask.
-  *
-  * @ingroup copy_scatter
-  *
-  * The `i`th row of `input` will be written to the output table at the location
-  * of the `i`th true value in `boolean_mask`. All other rows in the output will
-  * equal the same row in `target`.
-  *
-  * `boolean_mask` should have number of `true`s <= number of rows in `input`.
-  * If boolean mask is `true`, corresponding value in target is updated with
-  * value from corresponding `input` column, else it is left untouched.
-  *
-  * @code{.pseudo}
-  * Example:
-  * input: {{1, 5, 6, 8, 9}}
-  * boolean_mask: {true, false, false, false, true, true, false, true, true, false}
-  * target:       {{   2,     2,     3,     4,    4,     7,    7,    7,    8,    10}}
-  *
-  * output:       {{   1,     2,     3,     4,    5,     6,    7,    8,    9,    10}}
-  * @endcode
-  *
-  * @throw  cudf::logic_error if input.num_columns() != target.num_columns()
-  * @throws cudf::logic_error if any `i`th input_column type != `i`th target_column type
-  * @throws cudf::logic_error if boolean_mask.type() != bool
-  * @throws cudf::logic_error if boolean_mask.size() != target.num_rows()
-  * @throws cudf::logic_error if number of `true` in `boolean_mask` > input.num_rows()
-  *
-  * @param[in] input table_view (set of dense columns) to scatter
-  * @param[in] target table_view to modify with scattered values from `input`
-  * @param[in] boolean_mask column_view which acts as boolean mask.
-  * @param[in] mr Optional, The resource to use for all returned allocations
-  *
-  * @returns Returns a table by scattering `input` into `target` as per `boolean_mask`.
-  */
- std::unique_ptr<table> boolean_mask_scatter(
-   table_view const& input,
-   table_view const& target,
-   column_view const& boolean_mask,
-   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
-
- /**
-  * @brief Scatters scalar values to rows of the output corresponding
-  * to true values in a boolean mask.
-  *
-  * @ingroup copy_scatter
-  *
-  * The `i`th scalar in `input` will be written to all columns of the output
-  * table at the location of the `i`th true value in `boolean_mask`.
-  * All other rows in the output will equal the same row in `target`.
-  *
-  * @code{.pseudo}
-  * Example:
-  * input: {11}
-  * boolean_mask: {true, false, false, false, true, true, false, true, true, false}
-  * target:      {{   2,     2,     3,     4,    4,     7,    7,    7,    8,    10}}
-  *
-  * output:       {{   11,    2,     3,     4,   11,    11,    7,   11,   11,    10}}
-  * @endcode
-  *
-  * @throw  cudf::logic_error if input.size() != target.num_columns()
-  * @throws cudf::logic_error if any `i`th input_scalar type != `i`th target_column type
-  * @throws cudf::logic_error if boolean_mask.type() != bool
-  * @throws cudf::logic_error if boolean_mask.size() != target.size()
-  *
-  * @param[in] input scalars to scatter
-  * @param[in] target table_view to modify with scattered values from `input`
-  * @param[in] boolean_mask column_view which acts as boolean mask.
-  * @param[in] mr Optional, The resource to use for all returned allocations
-  *
-  * @returns Returns a table by scattering `input` into `target` as per `boolean_mask`.
-  */
- std::unique_ptr<table> boolean_mask_scatter(
-   std::vector<std::reference_wrapper<scalar>> const& input,
-   table_view const& target,
-   column_view const& boolean_mask,
-   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
-
- /**
-  * @brief Get the element at specified index from a column
-  *
-  * @warning This function is expensive (invokes a kernel launch). So, it is not
-  * recommended to be used in performance sensitive code or inside a loop.
-  *
-  * @throws cudf::logic_error if `index` is not within the range `[0, input.size())`
-  *
-  * @param input Column view to get the element from
-  * @param index Index into `input` to get the element at
-  * @param mr Optional, The resource to use for all returned allocations
-  * @return std::unique_ptr<scalar> Scalar containing the single value
-  */
- std::unique_ptr<scalar> get_element(
-   column_view const& input,
-   size_type index,
-   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
-
- /** @} */
- }  // namespace cudf
+/**
+ * @brief Creates a new column by shifting all values by an offset.
+ *
+ * @ingroup copy_shift
+ *
+ * Elements will be determined by `output[idx] = input[idx - offset]`.
+ * Some elements in the output may be indeterminable from the input. For those
+ * elements, the value will be determined by `fill_values`.
+ *
+ * @code{.pseudo}
+ * Examples
+ * -------------------------------------------------
+ * input       = [0, 1, 2, 3, 4]
+ * offset      = 3
+ * fill_values = @
+ * return      = [@, @, @, 0, 1]
+ * -------------------------------------------------
+ * input       = [5, 4, 3, 2, 1]
+ * offset      = -2
+ * fill_values = 7
+ * return      = [3, 2, 1, 7, 7]
+ * @endcode
+ *
+ * @note if the input is nullable, the output will be nullable.
+ * @note if the fill value is null, the output will be nullable.
+ *
+ * @param input      Column to be shifted.
+ * @param offset     The offset by which to shift the input.
+ * @param fill_value Fill value for indeterminable outputs.
+ *
+ * @throw cudf::logic_error if @p input dtype is not fixed-with.
+ * @throw cudf::logic_error if @p fill_value dtype does not match @p input dtype.
+ */
+std::unique_ptr<column> shift(column_view const& input,
+                              size_type offset,
+                              scalar const& fill_value,
+                              rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
+                              cudaStream_t stream                 = 0);
+
+/**
+ * @brief   Returns a new column, where each element is selected from either @p lhs or
+ *          @p rhs based on the value of the corresponding element in @p boolean_mask
+ *
+ * Selects each element i in the output column from either @p rhs or @p lhs using the following
+ * rule: `output[i] = (boolean_mask.valid(i) and boolean_mask[i]) ? lhs : rhs[i]`
+ *
+ * @throws cudf::logic_error if lhs and rhs are not of the same type
+ * @throws cudf::logic_error if boolean mask is not of type bool
+ * @throws cudf::logic_error if boolean mask is not of the same length as rhs
+ * @param[in] lhs left-hand scalar
+ * @param[in] rhs right-hand column_view
+ * @param[in] boolean_mask column of `BOOL8` representing "left (true) / right (false)" boolean for
+ * each element. Null element represents false.
+ * @param[in] mr resource for allocating device memory
+ *
+ * @returns new column with the selected elements
+ */
+std::unique_ptr<column> copy_if_else(
+  scalar const& lhs,
+  column_view const& rhs,
+  column_view const& boolean_mask,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
+/**
+ * @brief   Returns a new column, where each element is selected from either @p lhs or
+ *          @p rhs based on the value of the corresponding element in @p boolean_mask
+ *
+ * Selects each element i in the output column from either @p rhs or @p lhs using the following
+ * rule: `output[i] = (boolean_mask.valid(i) and boolean_mask[i]) ? lhs[i] : rhs`
+ *
+ * @throws cudf::logic_error if lhs and rhs are not of the same type
+ * @throws cudf::logic_error if boolean mask is not of type bool
+ * @throws cudf::logic_error if boolean mask is not of the same length as lhs
+ * @param[in] lhs left-hand column_view
+ * @param[in] rhs right-hand scalar
+ * @param[in] boolean_mask column of `BOOL8` representing "left (true) / right (false)" boolean for
+ * each element. Null element represents false.
+ * @param[in] mr resource for allocating device memory
+ *
+ * @returns new column with the selected elements
+ */
+std::unique_ptr<column> copy_if_else(
+  column_view const& lhs,
+  scalar const& rhs,
+  column_view const& boolean_mask,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
+/**
+ * @brief   Returns a new column, where each element is selected from either @p lhs or
+ *          @p rhs based on the value of the corresponding element in @p boolean_mask
+ *
+ * Selects each element i in the output column from either @p rhs or @p lhs using the following
+ * rule: `output[i] = (boolean_mask.valid(i) and boolean_mask[i]) ? lhs : rhs`
+ *
+ * @throws cudf::logic_error if boolean mask is not of type bool
+ * @param[in] lhs left-hand scalar
+ * @param[in] rhs right-hand scalar
+ * @param[in] boolean_mask column of `BOOL8` representing "left (true) / right (false)" boolean for
+ * each element. null element represents false.
+ * @param[in] mr resource for allocating device memory
+ *
+ * @returns new column with the selected elements
+ */
+std::unique_ptr<column> copy_if_else(
+  scalar const& lhs,
+  scalar const& rhs,
+  column_view const& boolean_mask,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
+/**
+ * @brief Scatters rows from the input table to rows of the output corresponding
+ * to true values in a boolean mask.
+ *
+ * @ingroup copy_scatter
+ *
+ * The `i`th row of `input` will be written to the output table at the location
+ * of the `i`th true value in `boolean_mask`. All other rows in the output will
+ * equal the same row in `target`.
+ *
+ * `boolean_mask` should have number of `true`s <= number of rows in `input`.
+ * If boolean mask is `true`, corresponding value in target is updated with
+ * value from corresponding `input` column, else it is left untouched.
+ *
+ * @code{.pseudo}
+ * Example:
+ * input: {{1, 5, 6, 8, 9}}
+ * boolean_mask: {true, false, false, false, true, true, false, true, true, false}
+ * target:       {{   2,     2,     3,     4,    4,     7,    7,    7,    8,    10}}
+ *
+ * output:       {{   1,     2,     3,     4,    5,     6,    7,    8,    9,    10}}
+ * @endcode
+ *
+ * @throw  cudf::logic_error if input.num_columns() != target.num_columns()
+ * @throws cudf::logic_error if any `i`th input_column type != `i`th target_column type
+ * @throws cudf::logic_error if boolean_mask.type() != bool
+ * @throws cudf::logic_error if boolean_mask.size() != target.num_rows()
+ * @throws cudf::logic_error if number of `true` in `boolean_mask` > input.num_rows()
+ *
+ * @param[in] input table_view (set of dense columns) to scatter
+ * @param[in] target table_view to modify with scattered values from `input`
+ * @param[in] boolean_mask column_view which acts as boolean mask.
+ * @param[in] mr Optional, The resource to use for all returned allocations
+ *
+ * @returns Returns a table by scattering `input` into `target` as per `boolean_mask`.
+ */
+std::unique_ptr<table> boolean_mask_scatter(
+  table_view const& input,
+  table_view const& target,
+  column_view const& boolean_mask,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
+/**
+ * @brief Scatters scalar values to rows of the output corresponding
+ * to true values in a boolean mask.
+ *
+ * @ingroup copy_scatter
+ *
+ * The `i`th scalar in `input` will be written to all columns of the output
+ * table at the location of the `i`th true value in `boolean_mask`.
+ * All other rows in the output will equal the same row in `target`.
+ *
+ * @code{.pseudo}
+ * Example:
+ * input: {11}
+ * boolean_mask: {true, false, false, false, true, true, false, true, true, false}
+ * target:      {{   2,     2,     3,     4,    4,     7,    7,    7,    8,    10}}
+ *
+ * output:       {{   11,    2,     3,     4,   11,    11,    7,   11,   11,    10}}
+ * @endcode
+ *
+ * @throw  cudf::logic_error if input.size() != target.num_columns()
+ * @throws cudf::logic_error if any `i`th input_scalar type != `i`th target_column type
+ * @throws cudf::logic_error if boolean_mask.type() != bool
+ * @throws cudf::logic_error if boolean_mask.size() != target.size()
+ *
+ * @param[in] input scalars to scatter
+ * @param[in] target table_view to modify with scattered values from `input`
+ * @param[in] boolean_mask column_view which acts as boolean mask.
+ * @param[in] mr Optional, The resource to use for all returned allocations
+ *
+ * @returns Returns a table by scattering `input` into `target` as per `boolean_mask`.
+ */
+std::unique_ptr<table> boolean_mask_scatter(
+  std::vector<std::reference_wrapper<scalar>> const& input,
+  table_view const& target,
+  column_view const& boolean_mask,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
+/**
+ * @brief Get the element at specified index from a column
+ *
+ * @warning This function is expensive (invokes a kernel launch). So, it is not
+ * recommended to be used in performance sensitive code or inside a loop.
+ *
+ * @throws cudf::logic_error if `index` is not within the range `[0, input.size())`
+ *
+ * @param input Column view to get the element from
+ * @param index Index into `input` to get the element at
+ * @param mr Optional, The resource to use for all returned allocations
+ * @return std::unique_ptr<scalar> Scalar containing the single value
+ */
+std::unique_ptr<scalar> get_element(
+  column_view const& input,
+  size_type index,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
+/** @} */
+}  // namespace cudf
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index a3fefb545d9..13068d9f773 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -10,6 +10,7 @@ from libc.stdint cimport int32_t, uint8_t
 
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
+import cudf
 from cudf._lib.column cimport Column
 from cudf._lib.scalar import as_scalar
 from cudf._lib.scalar cimport Scalar
diff --git a/python/cudf/cudf/core/abc.py b/python/cudf/cudf/core/abc.py
index 1da4104f475..553454be335 100644
--- a/python/cudf/cudf/core/abc.py
+++ b/python/cudf/cudf/core/abc.py
@@ -23,7 +23,6 @@ def deserialize(cls, header, frames):
 
     def device_serialize(self):
         header, frames = self.serialize()
-        assert all((type(f) is cudf.core.buffer.Buffer) for f in frames)
         header["type-serialized"] = pickle.dumps(type(self))
         header["lengths"] = [f.nbytes for f in frames]
         return header, frames
@@ -33,7 +32,8 @@ def device_deserialize(cls, header, frames):
         for f in frames:
             # some frames are empty -- meta/empty partitions/etc
             if len(f) > 0:
-                assert hasattr(f, "__cuda_array_interface__")
+                # assert hasattr(f, "__cuda_array_interface__")
+                pass
 
         typ = pickle.loads(header["type-serialized"])
         obj = typ.deserialize(header, frames)
@@ -42,7 +42,12 @@ def device_deserialize(cls, header, frames):
 
     def host_serialize(self):
         header, frames = self.device_serialize()
-        frames = [f.to_host_array().view("u1").data for f in frames]
+        frames = [
+            cudf.core.buffer.Buffer(f).to_host_array().view("u1").data
+            if hasattr(f, "__cuda_array_interface__")
+            else f
+            for f in frames
+        ]
         return header, frames
 
     @classmethod

From ff406d845c6acca96c4dd873d0deba2ec6bf2fb8 Mon Sep 17 00:00:00 2001
From: John Kirkham <jakirkham@gmail.com>
Date: Wed, 27 May 2020 22:22:06 -0700
Subject: [PATCH 88/90] Handle host frames in serialization

As some serialization strategies may rely on some or all frames on host,
add logic to `Serializable` to track which frames are on host or device
and ensure they are handled appropriately.
---
 CHANGELOG.md                 |  1 +
 python/cudf/cudf/core/abc.py | 22 ++++++++++++++++++----
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a61a4a7f50a..e422f07bdc3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@
 - PR #5288 Drop `auto_pickle` decorator #5288
 - PR #5231 Type `Buffer` as `uint8`
 - PR #5308 Coerce frames to `Buffer`s in deserialization
+- PR #5309 Handle host frames in serialization
 
 ## Bug Fixes
 
diff --git a/python/cudf/cudf/core/abc.py b/python/cudf/cudf/core/abc.py
index 3191f121224..533c4016b76 100644
--- a/python/cudf/cudf/core/abc.py
+++ b/python/cudf/cudf/core/abc.py
@@ -21,27 +21,41 @@ def deserialize(cls, header, frames):
 
     def device_serialize(self):
         header, frames = self.serialize()
-        assert all((type(f) is cudf.core.buffer.Buffer) for f in frames)
+        assert all(
+            (type(f) in [cudf.core.buffer.Buffer, memoryview]) for f in frames
+        )
         header["type-serialized"] = pickle.dumps(type(self))
+        header["is-cuda"] = [
+            hasattr(f, "__cuda_array_interface__") for f in frames
+        ]
         header["lengths"] = [f.nbytes for f in frames]
         return header, frames
 
     @classmethod
     def device_deserialize(cls, header, frames):
         typ = pickle.loads(header["type-serialized"])
-        frames = [cudf.core.buffer.Buffer(f) for f in frames]
+        frames = [
+            cudf.core.buffer.Buffer(f) if c else memoryview(f)
+            for c, f in zip(header["is-cuda"], frames)
+        ]
         obj = typ.deserialize(header, frames)
 
         return obj
 
     def host_serialize(self):
         header, frames = self.device_serialize()
-        frames = [f.to_host_array().data for f in frames]
+        frames = [
+            f.to_host_array().data if c else memoryview(f)
+            for c, f in zip(header["is-cuda"], frames)
+        ]
         return header, frames
 
     @classmethod
     def host_deserialize(cls, header, frames):
-        frames = [rmm.DeviceBuffer.to_device(memoryview(f)) for f in frames]
+        frames = [
+            rmm.DeviceBuffer.to_device(f) if c else f
+            for c, f in zip(header["is-cuda"], map(memoryview, frames))
+        ]
         obj = cls.device_deserialize(header, frames)
         return obj
 

From 83e9c4bfe7a02ff9f6ec43c3ebfb9bdc560bb468 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 28 May 2020 09:49:51 -0400
Subject: [PATCH 89/90] Revert "Hack around current Serializable requirements"

This reverts commit afc2b5e9e5277c6a3aeb74e1b3ca1fae5e69661c.
---
 cpp/include/cudf/copying.hpp      | 12 ++++++------
 python/cudf/cudf/_lib/copying.pyx |  1 -
 python/cudf/cudf/core/abc.py      | 11 +++--------
 3 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index 5ebc9ae2817..8a85e9ea006 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -616,7 +616,7 @@ std::unique_ptr<column> shift(column_view const& input,
  * @param[in] rhs right-hand column_view
  * @param[in] boolean_mask column of `BOOL8` representing "left (true) / right (false)" boolean for
  * each element. Null element represents false.
- * @param[in] mr Device memory resource used to allocate the returned column's device memory
+ * @param[in] mr resource for allocating device memory
  *
  * @returns new column with the selected elements
  */
@@ -640,7 +640,7 @@ std::unique_ptr<column> copy_if_else(
  * @param[in] rhs right-hand scalar
  * @param[in] boolean_mask column of `BOOL8` representing "left (true) / right (false)" boolean for
  * each element. Null element represents false.
- * @param[in] mr Device memory resource used to allocate the returned column's device memory
+ * @param[in] mr resource for allocating device memory
  *
  * @returns new column with the selected elements
  */
@@ -662,7 +662,7 @@ std::unique_ptr<column> copy_if_else(
  * @param[in] rhs right-hand scalar
  * @param[in] boolean_mask column of `BOOL8` representing "left (true) / right (false)" boolean for
  * each element. null element represents false.
- * @param[in] mr Device memory resource used to allocate the returned column's device memory
+ * @param[in] mr resource for allocating device memory
  *
  * @returns new column with the selected elements
  */
@@ -704,7 +704,7 @@ std::unique_ptr<column> copy_if_else(
  * @param[in] input table_view (set of dense columns) to scatter
  * @param[in] target table_view to modify with scattered values from `input`
  * @param[in] boolean_mask column_view which acts as boolean mask.
- * @param[in] mr Device memory resource used to allocate device memory of the returned table.
+ * @param[in] mr Optional, The resource to use for all returned allocations
  *
  * @returns Returns a table by scattering `input` into `target` as per `boolean_mask`.
  */
@@ -741,7 +741,7 @@ std::unique_ptr<table> boolean_mask_scatter(
  * @param[in] input scalars to scatter
  * @param[in] target table_view to modify with scattered values from `input`
  * @param[in] boolean_mask column_view which acts as boolean mask.
- * @param[in] mr Device memory resource used to allocate device memory of the returned table.
+ * @param[in] mr Optional, The resource to use for all returned allocations
  *
  * @returns Returns a table by scattering `input` into `target` as per `boolean_mask`.
  */
@@ -761,7 +761,7 @@ std::unique_ptr<table> boolean_mask_scatter(
  *
  * @param input Column view to get the element from
  * @param index Index into `input` to get the element at
- * @param mr Device memory resource used to allocate the returned scalar's device memory.
+ * @param mr Optional, The resource to use for all returned allocations
  * @return std::unique_ptr<scalar> Scalar containing the single value
  */
 std::unique_ptr<scalar> get_element(
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 13068d9f773..a3fefb545d9 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -10,7 +10,6 @@ from libc.stdint cimport int32_t, uint8_t
 
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
-import cudf
 from cudf._lib.column cimport Column
 from cudf._lib.scalar import as_scalar
 from cudf._lib.scalar cimport Scalar
diff --git a/python/cudf/cudf/core/abc.py b/python/cudf/cudf/core/abc.py
index 553454be335..1da4104f475 100644
--- a/python/cudf/cudf/core/abc.py
+++ b/python/cudf/cudf/core/abc.py
@@ -23,6 +23,7 @@ def deserialize(cls, header, frames):
 
     def device_serialize(self):
         header, frames = self.serialize()
+        assert all((type(f) is cudf.core.buffer.Buffer) for f in frames)
         header["type-serialized"] = pickle.dumps(type(self))
         header["lengths"] = [f.nbytes for f in frames]
         return header, frames
@@ -32,8 +33,7 @@ def device_deserialize(cls, header, frames):
         for f in frames:
             # some frames are empty -- meta/empty partitions/etc
             if len(f) > 0:
-                # assert hasattr(f, "__cuda_array_interface__")
-                pass
+                assert hasattr(f, "__cuda_array_interface__")
 
         typ = pickle.loads(header["type-serialized"])
         obj = typ.deserialize(header, frames)
@@ -42,12 +42,7 @@ def device_deserialize(cls, header, frames):
 
     def host_serialize(self):
         header, frames = self.device_serialize()
-        frames = [
-            cudf.core.buffer.Buffer(f).to_host_array().view("u1").data
-            if hasattr(f, "__cuda_array_interface__")
-            else f
-            for f in frames
-        ]
+        frames = [f.to_host_array().view("u1").data for f in frames]
         return header, frames
 
     @classmethod

From 0dec2e36d5869684b60cc2e951b437ca073fb6a1 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 28 May 2020 16:31:59 -0400
Subject: [PATCH 90/90] Convert DeviceBuffer<->Buffer when calling pack/unpack

---
 python/cudf/cudf/core/frame.py          | 10 ++++------
 python/cudf/cudf/tests/test_pickling.py |  5 +++--
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 85152528776..a1ee50dce1d 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1597,14 +1597,12 @@ def serialize(self):
             self._data.level_names
         )
 
-        frames = [packed_meta.data, packed_data]
+        frames = [packed_meta.data, cudf.core.buffer.Buffer(packed_data)]
 
         return header, frames
 
     @classmethod
     def deserialize(self, header, frames):
-        import rmm
-
         typ = pickle.loads(header["type-serialized"])
         column_names = pickle.loads(header["column_names"])
         categorical_column_names = pickle.loads(
@@ -1625,11 +1623,11 @@ def deserialize(self, header, frames):
         num_data_columns = len(column_names)
         num_categorical_columns = len(categorical_column_names)
 
-        if not isinstance(frames[1], rmm.DeviceBuffer):
-            frames[1] = rmm.DeviceBuffer.to_device(frames[1])
+        if not isinstance(frames[1], cudf.core.buffer.Buffer):
+            frames[1] = cudf.core.buffer.Buffer(frames[1])
 
         # unpack into columns
-        columns = libcudf.copying.unpack(frames[0], frames[1])
+        columns = libcudf.copying.unpack(frames[0], frames[1]._owner)
 
         # construct Index
         if num_index_columns:
diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py
index a639e81715a..08554edad83 100644
--- a/python/cudf/cudf/tests/test_pickling.py
+++ b/python/cudf/cudf/tests/test_pickling.py
@@ -73,8 +73,9 @@ def test_sizeof_dataframe():
     assert sizeof >= nbytes
 
     serialized_nbytes = len(pickle.dumps(df, protocol=pickle.HIGHEST_PROTOCOL))
-    # Serialized size should be close to what __sizeof__ is giving
-    np.testing.assert_approx_equal(sizeof, serialized_nbytes, significant=2)
+
+    # assert at least sizeof bytes were serialized
+    assert serialized_nbytes >= sizeof
 
 
 def test_pickle_index():