From dfa37b915b72d7761e22d767b87466d6cd551663 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 18 Mar 2021 16:22:50 -0400 Subject: [PATCH 1/2] Add gbenchmark for converting strings to/from timestamps --- cpp/benchmarks/CMakeLists.txt | 1 + .../string/convert_datetime_benchmark.cpp | 78 +++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 cpp/benchmarks/string/convert_datetime_benchmark.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 682f1ac5fca..197b0d966fc 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -181,6 +181,7 @@ ConfigureBench(SUBWORD_TOKENIZER_BENCH text/subword_benchmark.cpp) ConfigureBench(STRINGS_BENCH string/case_benchmark.cpp string/contains_benchmark.cpp + string/convert_datetime_benchmark.cpp string/convert_durations_benchmark.cpp string/convert_floats_benchmark.cpp string/copy_benchmark.cpp diff --git a/cpp/benchmarks/string/convert_datetime_benchmark.cpp b/cpp/benchmarks/string/convert_datetime_benchmark.cpp new file mode 100644 index 00000000000..d6ac3f3fe18 --- /dev/null +++ b/cpp/benchmarks/string/convert_datetime_benchmark.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +class StringDateTime : public cudf::benchmark { +}; + +enum class direction { to, from }; + +template +void BM_convert_datetime(benchmark::State& state, direction dir) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + auto const data_type = cudf::data_type(cudf::type_to_id()); + + auto const table = create_random_table({data_type.id()}, 1, row_count{n_rows}); + cudf::column_view input(table->view().column(0)); + + auto source = dir == direction::to ? cudf::strings::from_timestamps(input, "%Y-%m-%d %H:%M:%S") + : make_empty_column(cudf::data_type{cudf::type_id::STRING}); + cudf::strings_column_view source_string(source->view()); + + for (auto _ : state) { + cuda_event_timer raii(state, true); + if (dir == direction::to) + cudf::strings::to_timestamps(source_string, data_type, "%Y-%m-%d %H:%M:%S"); + else + cudf::strings::from_timestamps(input, "%Y-%m-%d %H:%M:%S"); + } + + auto const bytes = dir == direction::to ? source_string.chars_size() : n_rows * sizeof(TypeParam); + state.SetBytesProcessed(state.iterations() * bytes); +} + +#define STR_BENCHMARK_DEFINE(name, type, dir) \ + BENCHMARK_DEFINE_F(StringDateTime, name)(::benchmark::State & state) \ + { \ + BM_convert_datetime(state, dir); \ + } \ + BENCHMARK_REGISTER_F(StringDateTime, name) \ + ->RangeMultiplier(1 << 5) \ + ->Range(1 << 10, 1 << 25) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); + +STR_BENCHMARK_DEFINE(from_days, cudf::timestamp_D, direction::from); +STR_BENCHMARK_DEFINE(from_seconds, cudf::timestamp_s, direction::from); +STR_BENCHMARK_DEFINE(from_mseconds, cudf::timestamp_ms, direction::from); +STR_BENCHMARK_DEFINE(from_useconds, cudf::timestamp_us, direction::from); +STR_BENCHMARK_DEFINE(from_nseconds, cudf::timestamp_ns, direction::from); + +STR_BENCHMARK_DEFINE(to_days, cudf::timestamp_D, direction::to); +STR_BENCHMARK_DEFINE(to_seconds, cudf::timestamp_s, direction::to); +STR_BENCHMARK_DEFINE(to_mseconds, cudf::timestamp_ms, direction::to); +STR_BENCHMARK_DEFINE(to_useconds, cudf::timestamp_us, direction::to); +STR_BENCHMARK_DEFINE(to_nseconds, cudf::timestamp_ns, direction::to); From 5f25df709209f55ce9529be451a4112c81d38b0a Mon Sep 17 00:00:00 2001 From: davidwendt Date: Fri, 19 Mar 2021 11:16:23 -0400 Subject: [PATCH 2/2] fix n_rows variable declaration --- cpp/benchmarks/string/convert_datetime_benchmark.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/string/convert_datetime_benchmark.cpp b/cpp/benchmarks/string/convert_datetime_benchmark.cpp index d6ac3f3fe18..dc7e891286a 100644 --- a/cpp/benchmarks/string/convert_datetime_benchmark.cpp +++ b/cpp/benchmarks/string/convert_datetime_benchmark.cpp @@ -32,7 +32,7 @@ enum class direction { to, from }; template void BM_convert_datetime(benchmark::State& state, direction dir) { - cudf::size_type const n_rows{static_cast(state.range(0))}; + auto const n_rows = static_cast(state.range(0)); auto const data_type = cudf::data_type(cudf::type_to_id()); auto const table = create_random_table({data_type.id()}, 1, row_count{n_rows});