Add bytes_per_second to groupby max benchmark.

This patch adds memory statistics for the GROUPBY_NVBENCH benchmark using the `max` aggregation. For this purpose elper functions are introduced to compute the payload size for: - Column - Table - Groupby execution results This patch relates to rapidsai#13735.
Blonck · Aug 28, 2023 · e07038f · e07038f
1 parent 3c8ce98
commit e07038f
Show file tree

Hide file tree

Showing 4 changed files with 136 additions and 11 deletions.
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -40,8 +40,9 @@ target_include_directories(
 
 # Use an OBJECT library so we only compile these helper source files only once
 add_library(
-  cudf_benchmark_common OBJECT "${CUDF_SOURCE_DIR}/tests/utilities/base_fixture.cpp"
-                               synchronization/synchronization.cpp io/cuio_common.cpp
+  cudf_benchmark_common OBJECT
+  "${CUDF_SOURCE_DIR}/tests/utilities/base_fixture.cpp" synchronization/synchronization.cpp
+  io/cuio_common.cpp common/memory_statistics.cpp
 )
 target_link_libraries(cudf_benchmark_common PRIVATE cudf_datagen $<TARGET_NAME_IF_EXISTS:conda_env>)
 add_custom_command(

diff --git a/cpp/benchmarks/common/memory_statistics.cpp b/cpp/benchmarks/common/memory_statistics.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "memory_statistics.hpp"
+
+#include <cudf/column/column.hpp>
+#include <cudf/null_mask.hpp>
+
+#include <numeric>
+
+uint64_t required_bytes(const cudf::column_view& column)
+{
+  uint64_t read_bytes = column.size() * cudf::size_of(column.type());
+  if (column.nullable()) { read_bytes += cudf::bitmask_allocation_size_bytes(column.size()); }
+
+  return read_bytes;
+}
+
+uint64_t required_bytes(const cudf::table_view& table)
+{
+  return std::accumulate(table.begin(), table.end(), 0, [](uint64_t acc, const auto& col) {
+    return acc + required_bytes(col);
+  });
+}
+
+uint64_t required_bytes(
+  const cudf::host_span<cudf::groupby::aggregation_result>& aggregation_results)
+{
+  uint64_t read_bytes = 0;
+
+  for (auto const& aggregation : aggregation_results) {  // vector of aggregation results
+    for (auto const& col : aggregation.results) {        // vector of columns per result
+      read_bytes += required_bytes(col->view());
+    }
+  }
+
+  return read_bytes;
+}
diff --git a/cpp/benchmarks/common/memory_statistics.hpp b/cpp/benchmarks/common/memory_statistics.hpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/column/column_view.hpp>
+#include <cudf/groupby.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
+
+/**
+ * @brief Calculate the number of bytes needed to completely read/write the provided column.
+ *
+ * The functions computes only the size of the payload of the column in bytes, it excludes
+ * any metadata.
+ *
+ * @param column View of the input column
+ * @returns Number of bytes needed to read or write the column.
+ */
+uint64_t required_bytes(const cudf::column_view& column);
+
+/**
+ * @brief Calculate the number of bytes needed to completely read/write the provided table.
+ *
+ * The functions computes only the size of the payload of the table in bytes, it excludes
+ * any metadata.
+ *
+ * @param table View of the input table.
+ * @returns Number of bytes needed to read or write the table.
+ */
+uint64_t required_bytes(const cudf::table_view& table);
+
+/**
+ * @brief Calculate the number of bytes needed to completely read/write the provided sequence of
+ * aggregation results.
+ *
+ * The functions computes only the size of the payload of the aggregation results in bytes, it
+ * excludes any metadata.
+ *
+ * @param aggregation_results Sequence of aggregation results from groupby execution.
+ * @returns Number of bytes needed to read or write the aggregation results.
+ */
+uint64_t required_bytes(
+  const cudf::host_span<cudf::groupby::aggregation_result>& aggregation_results);
diff --git a/cpp/benchmarks/groupby/group_max.cpp b/cpp/benchmarks/groupby/group_max.cpp
@@ -15,11 +15,14 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/common/memory_statistics.hpp>
 
 #include <cudf/groupby.hpp>
 
 #include <nvbench/nvbench.cuh>
 
+#include <optional>
+
 template <typename Type>
 void bench_groupby_max(nvbench::state& state, nvbench::type_list<Type>)
 {
@@ -31,25 +34,38 @@ void bench_groupby_max(nvbench::state& state, nvbench::type_list<Type>)
     return create_random_column(cudf::type_to_id<int32_t>(), row_count{size}, profile);
   }();
 
+  auto const null_freq = state.get_float64("null_probability");
+  bool const has_null  = null_freq > 0;
+
   auto const vals = [&] {
-    auto builder = data_profile_builder().cardinality(0).distribution(
-      cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 1000);
-    if (const auto null_freq = state.get_float64("null_probability"); null_freq > 0) {
-      builder.null_probability(null_freq);
-    } else {
-      builder.no_validity();
-    }
+    auto builder = data_profile_builder()
+                     .cardinality(0)
+                     .null_probability(has_null ? std::optional<double>(null_freq) : std::nullopt)
+                     .distribution(cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 1000);
+
     return create_random_column(cudf::type_to_id<Type>(), row_count{size}, data_profile{builder});
   }();
 
-  auto keys_view = keys->view();
-  auto gb_obj    = cudf::groupby::groupby(cudf::table_view({keys_view, keys_view, keys_view}));
+  auto const keys_view  = keys->view();
+  auto const keys_table = cudf::table_view({keys_view, keys_view, keys_view});
+  auto gb_obj           = cudf::groupby::groupby(keys_table);
 
   std::vector<cudf::groupby::aggregation_request> requests;
   requests.emplace_back(cudf::groupby::aggregation_request());
   requests[0].values = vals->view();
   requests[0].aggregations.push_back(cudf::make_max_aggregation<cudf::groupby_aggregation>());
 
+  // Add memory statistics
+  state.add_global_memory_reads<nvbench::uint8_t>(required_bytes(vals->view()));
+  state.add_global_memory_reads<nvbench::uint8_t>(required_bytes(keys_table));
+
+  // The number of written bytes depends on random distribution of keys.
+  // For larger sizes it converges against the number of unique elements
+  // in the input distribution (101 elements)
+  auto [res_table, res_agg] = gb_obj.aggregate(requests);
+  state.add_global_memory_writes<uint8_t>(required_bytes(res_table->view()));
+  state.add_global_memory_writes<uint8_t>(required_bytes(res_agg));
+
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync,
              [&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); });