Skip to content

Commit

Permalink
Add benchmark for strings/integers convert APIs (#8402)
Browse files Browse the repository at this point in the history
Reference #5698
This PR adds benchmark tests for `cudf::strings::to_integers` and `cudf::strings::from_integers`. The current `convert_floats_benchmark.cpp` was refactored to work with floats and integer types and changed to `convert_numerics_benchmark.cpp`

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Mike Wilson (https://github.com/hyperbolic2346)

URL: #8402
  • Loading branch information
davidwendt authored Jun 2, 2021
1 parent f31a094 commit 1ba59bd
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 121 deletions.
2 changes: 1 addition & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ ConfigureBench(STRINGS_BENCH
string/contains_benchmark.cpp
string/convert_datetime_benchmark.cpp
string/convert_durations_benchmark.cpp
string/convert_floats_benchmark.cpp
string/convert_numerics_benchmark.cpp
string/copy_benchmark.cpp
string/extract_benchmark.cpp
string/factory_benchmark.cu
Expand Down
120 changes: 0 additions & 120 deletions cpp/benchmarks/string/convert_floats_benchmark.cpp

This file was deleted.

136 changes: 136 additions & 0 deletions cpp/benchmarks/string/convert_numerics_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <fixture/benchmark_fixture.hpp>
#include <synchronization/synchronization.hpp>

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>

#include <cudf/strings/convert/convert_floats.hpp>
#include <cudf/strings/convert/convert_integers.hpp>
#include <cudf/types.hpp>

namespace {

template <typename NumericType>
std::unique_ptr<cudf::column> get_numerics_column(cudf::size_type rows)
{
std::unique_ptr<cudf::table> result =
create_random_table({cudf::type_to_id<NumericType>()}, 1, row_count{rows});
return std::move(result->release().front());
}

template <typename NumericType>
std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
{
auto const numerics_col = get_numerics_column<NumericType>(rows);
if constexpr (std::is_floating_point_v<NumericType>) {
return cudf::strings::from_floats(numerics_col->view());
} else {
return cudf::strings::from_integers(numerics_col->view());
}
}
} // anonymous namespace

class StringsToNumeric : public cudf::benchmark {
};

template <typename NumericType>
void convert_to_number(benchmark::State& state)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));

auto const strings_col = get_strings_column<NumericType>(rows);
auto const strings_view = cudf::strings_column_view(strings_col->view());
auto const col_type = cudf::type_to_id<NumericType>();

for (auto _ : state) {
cuda_event_timer raii(state, true);
if constexpr (std::is_floating_point_v<NumericType>) {
cudf::strings::to_floats(strings_view, cudf::data_type{col_type});
} else {
cudf::strings::to_integers(strings_view, cudf::data_type{col_type});
}
}

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(state.iterations() *
(strings_view.chars_size() + rows * sizeof(NumericType)));
}

class StringsFromNumeric : public cudf::benchmark {
};

template <typename NumericType>
void convert_from_number(benchmark::State& state)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));

auto const numerics_col = get_numerics_column<NumericType>(rows);
auto const numerics_view = numerics_col->view();

std::unique_ptr<cudf::column> results = nullptr;

for (auto _ : state) {
cuda_event_timer raii(state, true);
if constexpr (std::is_floating_point_v<NumericType>)
results = cudf::strings::from_floats(numerics_view);
else
results = cudf::strings::from_integers(numerics_view);
}

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(
state.iterations() *
(cudf::strings_column_view(results->view()).chars_size() + rows * sizeof(NumericType)));
}

#define CONVERT_TO_NUMERICS_BD(name, type) \
BENCHMARK_DEFINE_F(StringsToNumeric, name)(::benchmark::State & state) \
{ \
convert_to_number<type>(state); \
} \
BENCHMARK_REGISTER_F(StringsToNumeric, name) \
->RangeMultiplier(4) \
->Range(1 << 10, 1 << 17) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

#define CONVERT_FROM_NUMERICS_BD(name, type) \
BENCHMARK_DEFINE_F(StringsFromNumeric, name)(::benchmark::State & state) \
{ \
convert_from_number<type>(state); \
} \
BENCHMARK_REGISTER_F(StringsFromNumeric, name) \
->RangeMultiplier(4) \
->Range(1 << 10, 1 << 17) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

CONVERT_TO_NUMERICS_BD(strings_to_float32, float);
CONVERT_TO_NUMERICS_BD(strings_to_float64, double);
CONVERT_TO_NUMERICS_BD(strings_to_int32, int32_t);
CONVERT_TO_NUMERICS_BD(strings_to_int64, int64_t);
CONVERT_TO_NUMERICS_BD(strings_to_uint8, uint8_t);
CONVERT_TO_NUMERICS_BD(strings_to_uint16, uint16_t);

CONVERT_FROM_NUMERICS_BD(strings_from_float32, float);
CONVERT_FROM_NUMERICS_BD(strings_from_float64, double);
CONVERT_FROM_NUMERICS_BD(strings_from_int32, int32_t);
CONVERT_FROM_NUMERICS_BD(strings_from_int64, int64_t);
CONVERT_FROM_NUMERICS_BD(strings_from_uint8, uint8_t);
CONVERT_FROM_NUMERICS_BD(strings_from_uint16, uint16_t);

0 comments on commit 1ba59bd

Please sign in to comment.