diff --git a/cpp/benchmarks/transpose/transpose.cpp b/cpp/benchmarks/transpose/transpose.cpp index 2f41bda4b88..b729bff286a 100644 --- a/cpp/benchmarks/transpose/transpose.cpp +++ b/cpp/benchmarks/transpose/transpose.cpp @@ -40,6 +40,18 @@ static void BM_transpose(benchmark::State& state) cuda_event_timer raii(state, true); auto output = cudf::transpose(input); } + + // collect memory statistics + auto const bytes_read = input.num_columns() * input.num_rows() * (sizeof(int32_t)); + // output size is input size + auto const bytes_written = bytes_read; + // both output and input are nullable + auto const null_bytes = + 2 * input.num_columns() * cudf::bitmask_allocation_size_bytes(input.num_rows()); + + // Use number of bytes read and written. + state.SetBytesProcessed(static_cast(state.iterations()) * + (bytes_read + bytes_written + null_bytes)); } class Transpose : public cudf::benchmark {};