diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 11eef015364..99aeff0df93 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -14,7 +14,7 @@ find_package(Threads REQUIRED) -add_library(cudf_datagen STATIC common/generate_input.cpp) +add_library(cudf_datagen STATIC common/generate_input.cpp common/generate_nullmask.cu) target_compile_features(cudf_datagen PUBLIC cxx_std_17 cuda_std_17) target_compile_options( diff --git a/cpp/benchmarks/ast/transform.cpp b/cpp/benchmarks/ast/transform.cpp index c17c288a6d3..de0429f74ad 100644 --- a/cpp/benchmarks/ast/transform.cpp +++ b/cpp/benchmarks/ast/transform.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,26 +14,18 @@ * limitations under the License. */ -#include -#include -#include +#include +#include +#include + #include #include -#include - -#include - -#include -#include -#include -#include #include #include #include -#include -#include +#include #include enum class TreeType { @@ -41,45 +33,23 @@ enum class TreeType { // child column reference }; +template class AST : public cudf::benchmark { }; template static void BM_ast_transform(benchmark::State& state) { - const cudf::size_type table_size{(cudf::size_type)state.range(0)}; - const cudf::size_type tree_levels = (cudf::size_type)state.range(1); + auto const table_size{static_cast(state.range(0))}; + auto const tree_levels{static_cast(state.range(1))}; // Create table data - auto n_cols = reuse_columns ? 1 : tree_levels + 1; - auto column_wrappers = std::vector>(n_cols); - auto columns = std::vector(n_cols); - - auto data_iterator = thrust::make_counting_iterator(0); - - if constexpr (Nullable) { - auto validities = std::vector(table_size); - std::random_device rd; - std::mt19937 gen(rd()); - - std::generate( - validities.begin(), validities.end(), [&]() { return gen() > (0.5 * gen.max()); }); - std::generate_n(column_wrappers.begin(), n_cols, [=]() { - return cudf::test::fixed_width_column_wrapper( - data_iterator, data_iterator + table_size, validities.begin()); - }); - } else { - std::generate_n(column_wrappers.begin(), n_cols, [=]() { - return cudf::test::fixed_width_column_wrapper(data_iterator, - data_iterator + table_size); - }); - } - std::transform( - column_wrappers.begin(), column_wrappers.end(), columns.begin(), [](auto const& col) { - return static_cast(col); - }); - - cudf::table_view table{columns}; + auto const n_cols = reuse_columns ? 1 : tree_levels + 1; + auto const source_table = + create_sequence_table(cycle_dtypes({cudf::type_to_id()}, n_cols), + row_count{table_size}, + Nullable ? 0.5 : -1.0); + auto table = source_table->view(); // Create column references auto column_refs = std::vector(); @@ -138,10 +108,15 @@ static void CustomRanges(benchmark::internal::Benchmark* b) } } -#define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \ - TEMPLATED_BENCHMARK_F(AST, BM_ast_transform, key_type, tree_type, reuse_columns, nullable) \ - ->Apply(CustomRanges) \ - ->Unit(benchmark::kMillisecond) \ +#define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(AST, name, key_type, tree_type, reuse_columns, nullable) \ + (::benchmark::State & st) \ + { \ + BM_ast_transform(st); \ + } \ + BENCHMARK_REGISTER_F(AST, name) \ + ->Apply(CustomRanges) \ + ->Unit(benchmark::kMillisecond) \ ->UseManualTime(); AST_TRANSFORM_BENCHMARK_DEFINE( diff --git a/cpp/benchmarks/binaryop/binaryop.cpp b/cpp/benchmarks/binaryop/binaryop.cpp index 314d657679b..e5bde94f1f9 100644 --- a/cpp/benchmarks/binaryop/binaryop.cpp +++ b/cpp/benchmarks/binaryop/binaryop.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,23 +14,15 @@ * limitations under the License. */ +#include +#include +#include + #include -#include -#include #include #include -#include - -#include - -#include -#include -#include - -#include #include -#include #include // This set of benchmarks is designed to be a comparison for the AST benchmarks @@ -47,40 +39,29 @@ class BINARYOP : public cudf::benchmark { template static void BM_binaryop_transform(benchmark::State& state) { - const cudf::size_type table_size{(cudf::size_type)state.range(0)}; - const cudf::size_type tree_levels = (cudf::size_type)state.range(1); + auto const table_size{static_cast(state.range(0))}; + auto const tree_levels{static_cast(state.range(1))}; // Create table data - auto n_cols = reuse_columns ? 1 : tree_levels + 1; - auto column_wrappers = std::vector>(); - auto columns = std::vector(n_cols); - - auto data_iterator = thrust::make_counting_iterator(0); - std::generate_n(std::back_inserter(column_wrappers), n_cols, [=]() { - return cudf::test::fixed_width_column_wrapper(data_iterator, - data_iterator + table_size); - }); - std::transform( - column_wrappers.begin(), column_wrappers.end(), columns.begin(), [](auto const& col) { - return static_cast(col); - }); - - cudf::table_view table{columns}; + auto const n_cols = reuse_columns ? 1 : tree_levels + 1; + auto const source_table = create_sequence_table( + cycle_dtypes({cudf::type_to_id()}, n_cols), row_count{table_size}); + cudf::table_view table{*source_table}; // Execute benchmark for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 // Execute tree that chains additions like (((a + b) + c) + d) - auto const op = cudf::binary_operator::ADD; - auto result_data_type = cudf::data_type(cudf::type_to_id()); + auto const op = cudf::binary_operator::ADD; + auto const result_data_type = cudf::data_type(cudf::type_to_id()); if (reuse_columns) { - auto result = cudf::binary_operation(columns.at(0), columns.at(0), op, result_data_type); + auto result = cudf::binary_operation(table.column(0), table.column(0), op, result_data_type); for (cudf::size_type i = 0; i < tree_levels - 1; i++) { - result = cudf::binary_operation(result->view(), columns.at(0), op, result_data_type); + result = cudf::binary_operation(result->view(), table.column(0), op, result_data_type); } } else { - auto result = cudf::binary_operation(columns.at(0), columns.at(1), op, result_data_type); - std::for_each(std::next(columns.cbegin(), 2), columns.cend(), [&](auto const& col) { + auto result = cudf::binary_operation(table.column(0), table.column(1), op, result_data_type); + std::for_each(std::next(table.begin(), 2), table.end(), [&](auto const& col) { result = cudf::binary_operation(result->view(), col, op, result_data_type); }); } diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp index f8226c7387a..50cd0b7b8d5 100644 --- a/cpp/benchmarks/binaryop/compiled_binaryop.cpp +++ b/cpp/benchmarks/binaryop/compiled_binaryop.cpp @@ -14,30 +14,25 @@ * limitations under the License. */ -#include -#include -#include - -#include +#include +#include +#include #include -#include - class COMPILED_BINARYOP : public cudf::benchmark { }; template void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop) { - const cudf::size_type column_size{(cudf::size_type)state.range(0)}; + auto const column_size{static_cast(state.range(0))}; - auto data_it = thrust::make_counting_iterator(0); - cudf::test::fixed_width_column_wrapper input1(data_it, data_it + column_size); - cudf::test::fixed_width_column_wrapper input2(data_it, data_it + column_size); + auto const source_table = create_sequence_table( + {cudf::type_to_id(), cudf::type_to_id()}, row_count{column_size}); - auto lhs = cudf::column_view(input1); - auto rhs = cudf::column_view(input2); + auto lhs = cudf::column_view(source_table->get_column(0)); + auto rhs = cudf::column_view(source_table->get_column(1)); auto output_dtype = cudf::data_type(cudf::type_to_id()); // Call once for hot cache. diff --git a/cpp/benchmarks/common/generate_input.cpp b/cpp/benchmarks/common/generate_input.cpp index 68eabd3f1cc..d6564428a2e 100644 --- a/cpp/benchmarks/common/generate_input.cpp +++ b/cpp/benchmarks/common/generate_input.cpp @@ -19,6 +19,8 @@ #include #include +#include +#include #include #include @@ -571,11 +573,11 @@ columns_vector create_random_columns(data_profile const& profile, } /** - * @brief Repeats the input data types in round-robin order to fill a vector of @ref num_cols + * @brief Repeats the input data types cyclically order to fill a vector of @ref num_cols * elements. */ -std::vector repeat_dtypes(std::vector const& dtype_ids, - cudf::size_type num_cols) +std::vector cycle_dtypes(std::vector const& dtype_ids, + cudf::size_type num_cols) { if (dtype_ids.size() == static_cast(num_cols)) { return dtype_ids; } std::vector out_dtypes; @@ -586,29 +588,26 @@ std::vector repeat_dtypes(std::vector const& dtype } std::unique_ptr create_random_table(std::vector const& dtype_ids, - cudf::size_type num_cols, table_size_bytes table_bytes, data_profile const& profile, unsigned seed) { - auto const out_dtype_ids = repeat_dtypes(dtype_ids, num_cols); size_t const avg_row_bytes = - std::accumulate(out_dtype_ids.begin(), out_dtype_ids.end(), 0ul, [&](size_t sum, auto tid) { + std::accumulate(dtype_ids.begin(), dtype_ids.end(), 0ul, [&](size_t sum, auto tid) { return sum + avg_element_size(profile, cudf::data_type(tid)); }); cudf::size_type const num_rows = table_bytes.size / avg_row_bytes; - return create_random_table(out_dtype_ids, num_cols, row_count{num_rows}, profile, seed); + return create_random_table(dtype_ids, row_count{num_rows}, profile, seed); } std::unique_ptr create_random_table(std::vector const& dtype_ids, - cudf::size_type num_cols, row_count num_rows, data_profile const& profile, unsigned seed) { - auto const out_dtype_ids = repeat_dtypes(dtype_ids, num_cols); - auto seed_engine = deterministic_engine(seed); + cudf::size_type const num_cols = dtype_ids.size(); + auto seed_engine = deterministic_engine(seed); auto const processor_count = std::thread::hardware_concurrency(); cudf::size_type const cols_per_thread = (num_cols + processor_count - 1) / processor_count; @@ -619,8 +618,8 @@ std::unique_ptr create_random_table(std::vector cons for (unsigned int i = 0; i < processor_count && next_col < num_cols; ++i) { auto thread_engine = deterministic_engine(seed_dist(seed_engine)); auto const thread_num_cols = std::min(num_cols - next_col, cols_per_thread); - std::vector thread_types(out_dtype_ids.begin() + next_col, - out_dtype_ids.begin() + next_col + thread_num_cols); + std::vector thread_types(dtype_ids.begin() + next_col, + dtype_ids.begin() + next_col + thread_num_cols); col_futures.emplace_back(std::async(std::launch::async, create_random_columns, std::cref(profile), @@ -642,6 +641,22 @@ std::unique_ptr create_random_table(std::vector cons return std::make_unique(std::move(output_columns)); } +std::unique_ptr create_sequence_table(std::vector const& dtype_ids, + row_count num_rows, + float null_probability, + unsigned seed) +{ + auto columns = std::vector>(dtype_ids.size()); + std::transform(dtype_ids.begin(), dtype_ids.end(), columns.begin(), [&](auto dtype) mutable { + auto init = cudf::make_default_constructed_scalar(cudf::data_type{dtype}); + auto col = cudf::sequence(num_rows.count, *init); + auto [mask, count] = create_random_null_mask(num_rows.count, null_probability, seed++); + col->set_null_mask(std::move(mask), count); + return col; + }); + return std::make_unique(std::move(columns)); +} + std::vector get_type_or_group(int32_t id) { // identity transformation when passing a concrete type_id diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp index 1999ccb8ec3..17bd650e722 100644 --- a/cpp/benchmarks/common/generate_input.hpp +++ b/cpp/benchmarks/common/generate_input.hpp @@ -19,6 +19,7 @@ #include #include +#include #include /** @@ -223,9 +224,9 @@ class data_profile { cudf::size_type avg_run_length = 4; public: - template < - typename T, - typename std::enable_if_t && std::is_integral_v, T>* = nullptr> + template && cuda::std::is_integral_v, T>* = + nullptr> distribution_params get_distribution_params() const { auto it = int_params.find(cudf::type_to_id()); @@ -306,7 +307,7 @@ class data_profile { // discrete distributions (integers, strings, lists). Otherwise the call with have no effect. template , T>* = nullptr> + typename std::enable_if_t, T>* = nullptr> void set_distribution_params(Type_enum type_or_group, distribution_id dist, T lower_bound, @@ -369,18 +370,13 @@ struct row_count { /** * @brief Deterministically generates a table filled with data with the given parameters. * - * If the number of passed types is smaller than the number of requested column, the columns types - * with be repeated in round-robin order to fill the table. - * * @param dtype_ids Vector of requested column types - * @param num_cols Number of columns in the output table * @param table_bytes Target size of the output table, in bytes. Some type may not produce columns * of exact size * @param data_params optional, set of data parameters describing the data profile for each type * @param seed optional, seed for the pseudo-random engine */ std::unique_ptr create_random_table(std::vector const& dtype_ids, - cudf::size_type num_cols, table_size_bytes table_bytes, data_profile const& data_params = data_profile{}, unsigned seed = 1); @@ -388,17 +384,51 @@ std::unique_ptr create_random_table(std::vector cons /** * @brief Deterministically generates a table filled with data with the given parameters. * - * If the number of passed types is smaller than the number of requested column, the columns types - * with be repeated in round-robin order to fill the table. - * * @param dtype_ids Vector of requested column types - * @param num_cols Number of columns in the output table * @param num_rows Number of rows in the output table * @param data_params optional, set of data parameters describing the data profile for each type * @param seed optional, seed for the pseudo-random engine */ std::unique_ptr create_random_table(std::vector const& dtype_ids, - cudf::size_type num_cols, row_count num_rows, data_profile const& data_params = data_profile{}, unsigned seed = 1); + +/** + * @brief Generate sequence columns starting with value 0 in first row and increasing by 1 in + * subsequent rows. + * + * @param dtype_ids Vector of requested column types + * @param num_rows Number of rows in the output table + * @param null_probability optional, probability of a null value + * <0 implies no null mask, =0 implies all valids, >=1 implies all nulls + * @param seed optional, seed for the pseudo-random engine + * @return A table with the sequence columns. + */ +std::unique_ptr create_sequence_table(std::vector const& dtype_ids, + row_count num_rows, + float null_probability = -1.0, + unsigned seed = 1); + +/** + * @brief Repeats the input data types cyclically to fill a vector of @ref num_cols + * elements. + * + * @param dtype_ids Vector of requested column types + * @param num_cols Number of types in the output vector + * @return A vector of type_ids + */ +std::vector cycle_dtypes(std::vector const& dtype_ids, + cudf::size_type num_cols); +/** + * @brief Create a random null mask object + * + * @param size number of rows + * @param null_probability probability of a null value + * <0 implies no null mask, =0 implies all valids, >=1 implies all nulls + * @param seed optional, seed for the pseudo-random engine + * @return null mask device buffer with random null mask data and null count + */ +std::pair create_random_null_mask(cudf::size_type size, + float null_probability, + unsigned seed = 1); diff --git a/cpp/benchmarks/common/generate_nullmask.cu b/cpp/benchmarks/common/generate_nullmask.cu new file mode 100644 index 00000000000..502af95a971 --- /dev/null +++ b/cpp/benchmarks/common/generate_nullmask.cu @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "generate_input.hpp" + +#include +#include + +#include + +/** + * @brief bool generator with given probability [0.0 - 1.0] of returning true. + * + */ +struct bool_generator { + thrust::minstd_rand engine; + thrust::uniform_real_distribution dist; + float probability_true; + bool_generator(unsigned seed, float probability_true) + : engine(seed), dist{0, 1}, probability_true{probability_true} + { + } + + __device__ bool operator()(size_t n) + { + engine.discard(n); + return dist(engine) < probability_true; + } +}; + +std::pair create_random_null_mask(cudf::size_type size, + float null_probability, + unsigned seed) +{ + if (null_probability < 0.0f) { + return {rmm::device_buffer{}, 0}; + } else if (null_probability == 0.0f) { + return {cudf::create_null_mask(size, cudf::mask_state::ALL_NULL), size}; + } else if (null_probability >= 1.0f) { + return {cudf::create_null_mask(size, cudf::mask_state::ALL_VALID), 0}; + } else { + return cudf::detail::valid_if(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size), + bool_generator{seed, 1.0f - null_probability}); + } +}; diff --git a/cpp/benchmarks/common/random_distribution_factory.hpp b/cpp/benchmarks/common/random_distribution_factory.hpp index 3289c6f40ab..df2b6e0a754 100644 --- a/cpp/benchmarks/common/random_distribution_factory.hpp +++ b/cpp/benchmarks/common/random_distribution_factory.hpp @@ -24,7 +24,7 @@ /** * @brief Generates a normal(binomial) distribution between zero and upper_bound. */ -template , T>* = nullptr> +template , T>* = nullptr> auto make_normal_dist(T upper_bound) { using uT = typename std::make_unsigned::type; @@ -42,7 +42,7 @@ auto make_normal_dist(T upper_bound) return std::normal_distribution(mean, stddev); } -template , T>* = nullptr> +template , T>* = nullptr> auto make_uniform_dist(T range_start, T range_end) { return std::uniform_int_distribution(range_start, range_end); @@ -62,7 +62,7 @@ double geometric_dist_p(T range_size) return p ? p : std::numeric_limits::epsilon(); } -template , T>* = nullptr> +template , T>* = nullptr> auto make_geometric_dist(T range_start, T range_end) { using uT = typename std::make_unsigned::type; @@ -82,7 +82,7 @@ auto make_geometric_dist(T range_start, T range_end) template using distribution_fn = std::function; -template , T>* = nullptr> +template , T>* = nullptr> distribution_fn make_distribution(distribution_id did, T lower_bound, T upper_bound) { switch (did) { diff --git a/cpp/benchmarks/copying/copy_if_else.cpp b/cpp/benchmarks/copying/copy_if_else.cpp index 6f3ba34e373..6f094aba680 100644 --- a/cpp/benchmarks/copying/copy_if_else.cpp +++ b/cpp/benchmarks/copying/copy_if_else.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ static void BM_copy_if_else(benchmark::State& state, bool nulls) cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; auto input_type = cudf::type_to_id(); auto bool_type = cudf::type_id::BOOL8; - auto const input = create_random_table({input_type, input_type, bool_type}, 3, row_count{n_rows}); + auto const input = create_random_table({input_type, input_type, bool_type}, row_count{n_rows}); if (!nulls) { input->get_column(2).set_null_mask(rmm::device_buffer{}, 0); diff --git a/cpp/benchmarks/copying/scatter.cu b/cpp/benchmarks/copying/scatter.cu index a9ab376c8c3..977937beaa2 100644 --- a/cpp/benchmarks/copying/scatter.cu +++ b/cpp/benchmarks/copying/scatter.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,24 +14,15 @@ * limitations under the License. */ -#include +#include +#include +#include #include - -#include -#include -// #include -#include -#include -#include - #include -#include -#include - -#include "../fixture/benchmark_fixture.hpp" -#include "../synchronization/synchronization.hpp" +#include +#include class Scatter : public cudf::benchmark { }; @@ -39,53 +30,33 @@ class Scatter : public cudf::benchmark { template void BM_scatter(benchmark::State& state) { - const cudf::size_type source_size{(cudf::size_type)state.range(0)}; - const auto n_cols = (cudf::size_type)state.range(1); - - // Every element is valid - auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); + auto const source_size{static_cast(state.range(0))}; + auto const n_cols{static_cast(state.range(1))}; // Gather indices - std::vector host_map_data(source_size); - std::iota(host_map_data.begin(), host_map_data.end(), 0); + auto scatter_map_table = + create_sequence_table({cudf::type_to_id()}, row_count{source_size}); + auto scatter_map = scatter_map_table->get_column(0).mutable_view(); if (coalesce) { - std::reverse(host_map_data.begin(), host_map_data.end()); + thrust::reverse( + thrust::device, scatter_map.begin(), scatter_map.end()); } else { - std::random_shuffle(host_map_data.begin(), host_map_data.end()); + thrust::shuffle(thrust::device, + scatter_map.begin(), + scatter_map.end(), + thrust::default_random_engine()); } - cudf::test::fixed_width_column_wrapper scatter_map(host_map_data.begin(), - host_map_data.end()); - - std::vector> source_column_wrappers; - std::vector source_columns(n_cols); - - std::vector> target_column_wrappers; - std::vector target_columns(n_cols); - - std::generate_n(std::back_inserter(source_column_wrappers), n_cols, [=]() { - return cudf::test::fixed_width_column_wrapper(data, data + source_size); - }); - std::transform(source_column_wrappers.begin(), - source_column_wrappers.end(), - source_columns.begin(), - [](auto const& col) { return static_cast(col); }); - - std::generate_n(std::back_inserter(target_column_wrappers), n_cols, [=]() { - return cudf::test::fixed_width_column_wrapper(data, data + source_size); - }); - std::transform(target_column_wrappers.begin(), - target_column_wrappers.end(), - target_columns.begin(), - [](auto const& col) { return static_cast(col); }); - - cudf::table_view source_table{source_columns}; - cudf::table_view target_table{target_columns}; + // Every element is valid + auto source_table = create_sequence_table(cycle_dtypes({cudf::type_to_id()}, n_cols), + row_count{source_size}); + auto target_table = create_sequence_table(cycle_dtypes({cudf::type_to_id()}, n_cols), + row_count{source_size}); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - cudf::scatter(source_table, scatter_map, target_table); + cudf::scatter(*source_table, scatter_map, *target_table); } state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0) * n_cols * 2 * diff --git a/cpp/benchmarks/groupby/group_struct.cu b/cpp/benchmarks/groupby/group_struct.cu index 355c7cbab6c..34f2d1adc75 100644 --- a/cpp/benchmarks/groupby/group_struct.cu +++ b/cpp/benchmarks/groupby/group_struct.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,18 +41,11 @@ static auto create_data_table(cudf::size_type n_rows) // The first two struct members are int32 and string. // The first column is also used as keys in groupby. - auto col_ids = std::vector{cudf::type_id::INT32, cudf::type_id::STRING}; - // The subsequent struct members are int32 and string again. - for (cudf::size_type i = 3; i <= num_struct_members; ++i) { - if (i % 2) { - col_ids.push_back(cudf::type_id::INT32); - } else { - col_ids.push_back(cudf::type_id::STRING); - } - } - - return create_random_table(col_ids, num_struct_members, row_count{n_rows}, table_profile); + return create_random_table( + cycle_dtypes({cudf::type_id::INT32, cudf::type_id::STRING}, num_struct_members), + row_count{n_rows}, + table_profile); } // Max aggregation/scan technically has the same performance as min. diff --git a/cpp/benchmarks/hashing/hash.cpp b/cpp/benchmarks/hashing/hash.cpp index e2ad38230a2..fe22795bb6b 100644 --- a/cpp/benchmarks/hashing/hash.cpp +++ b/cpp/benchmarks/hashing/hash.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ enum contains_nulls { no_nulls, nulls }; static void BM_hash(benchmark::State& state, cudf::hash_id hid, contains_nulls has_nulls) { cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; - auto const data = create_random_table({cudf::type_id::INT64}, 1, row_count{n_rows}); + auto const data = create_random_table({cudf::type_id::INT64}, row_count{n_rows}); if (has_nulls == contains_nulls::no_nulls) data->get_column(0).set_null_mask(rmm::device_buffer{}, 0); diff --git a/cpp/benchmarks/io/csv/csv_reader.cpp b/cpp/benchmarks/io/csv/csv_reader.cpp index 241ba4d5954..c50f5220200 100644 --- a/cpp/benchmarks/io/csv/csv_reader.cpp +++ b/cpp/benchmarks/io/csv/csv_reader.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,8 @@ void BM_csv_read_varying_input(benchmark::State& state) auto const data_types = get_type_or_group(state.range(0)); auto const source_type = static_cast(state.range(1)); - auto const tbl = create_random_table(data_types, num_cols, table_size_bytes{data_size}); + auto const tbl = + create_random_table(cycle_dtypes(data_types, num_cols), table_size_bytes{data_size}); auto const view = tbl->view(); cuio_source_sink_pair source_sink(source_type); @@ -75,7 +76,7 @@ void BM_csv_read_varying_options(benchmark::State& state) col_sel); auto const cols_to_read = select_column_indexes(data_types.size(), col_sel); - auto const tbl = create_random_table(data_types, data_types.size(), table_size_bytes{data_size}); + auto const tbl = create_random_table(data_types, table_size_bytes{data_size}); auto const view = tbl->view(); cuio_source_sink_pair source_sink(io_type::HOST_BUFFER); diff --git a/cpp/benchmarks/io/csv/csv_writer.cpp b/cpp/benchmarks/io/csv/csv_writer.cpp index 413a269bcb2..65aa31c68dc 100644 --- a/cpp/benchmarks/io/csv/csv_writer.cpp +++ b/cpp/benchmarks/io/csv/csv_writer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,8 @@ void BM_csv_write_varying_inout(benchmark::State& state) auto const data_types = get_type_or_group(state.range(0)); auto const sink_type = static_cast(state.range(1)); - auto const tbl = create_random_table(data_types, num_cols, table_size_bytes{data_size}); + auto const tbl = + create_random_table(cycle_dtypes(data_types, num_cols), table_size_bytes{data_size}); auto const view = tbl->view(); cuio_source_sink_pair source_sink(sink_type); @@ -66,7 +67,7 @@ void BM_csv_write_varying_options(benchmark::State& state) int32_t(type_group_id::TIMESTAMP), int32_t(cudf::type_id::STRING)}); - auto const tbl = create_random_table(data_types, data_types.size(), table_size_bytes{data_size}); + auto const tbl = create_random_table(data_types, table_size_bytes{data_size}); auto const view = tbl->view(); std::string const na_per(na_per_len, '#'); diff --git a/cpp/benchmarks/io/orc/orc_reader.cpp b/cpp/benchmarks/io/orc/orc_reader.cpp index e15513275ee..29d4860a0e5 100644 --- a/cpp/benchmarks/io/orc/orc_reader.cpp +++ b/cpp/benchmarks/io/orc/orc_reader.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,8 +45,8 @@ void BM_orc_read_varying_input(benchmark::State& state) data_profile table_data_profile; table_data_profile.set_cardinality(cardinality); table_data_profile.set_avg_run_length(run_length); - auto const tbl = - create_random_table(data_types, num_cols, table_size_bytes{data_size}, table_data_profile); + auto const tbl = create_random_table( + cycle_dtypes(data_types, num_cols), table_size_bytes{data_size}, table_data_profile); auto const view = tbl->view(); cuio_source_sink_pair source_sink(source_type); @@ -96,7 +96,7 @@ void BM_orc_read_varying_options(benchmark::State& state) int32_t(type_group_id::TIMESTAMP), int32_t(cudf::type_id::STRING)}), col_sel); - auto const tbl = create_random_table(data_types, data_types.size(), table_size_bytes{data_size}); + auto const tbl = create_random_table(data_types, table_size_bytes{data_size}); auto const view = tbl->view(); cuio_source_sink_pair source_sink(io_type::HOST_BUFFER); diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp index 50ae76e867c..e24ca7f749d 100644 --- a/cpp/benchmarks/io/orc/orc_writer.cpp +++ b/cpp/benchmarks/io/orc/orc_writer.cpp @@ -46,8 +46,8 @@ void BM_orc_write_varying_inout(benchmark::State& state) data_profile table_data_profile; table_data_profile.set_cardinality(cardinality); table_data_profile.set_avg_run_length(run_length); - auto const tbl = - create_random_table(data_types, num_cols, table_size_bytes{data_size}, table_data_profile); + auto const tbl = create_random_table( + cycle_dtypes(data_types, num_cols), table_size_bytes{data_size}, table_data_profile); auto const view = tbl->view(); cuio_source_sink_pair source_sink(sink_type); @@ -83,7 +83,7 @@ void BM_orc_write_varying_options(benchmark::State& state) int32_t(cudf::type_id::STRING), int32_t(cudf::type_id::LIST)}); - auto const tbl = create_random_table(data_types, data_types.size(), table_size_bytes{data_size}); + auto const tbl = create_random_table(data_types, table_size_bytes{data_size}); auto const view = tbl->view(); cuio_source_sink_pair source_sink(io_type::FILEPATH); diff --git a/cpp/benchmarks/io/parquet/parquet_reader.cpp b/cpp/benchmarks/io/parquet/parquet_reader.cpp index 09194931498..74613e50158 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,8 +45,8 @@ void BM_parq_read_varying_input(benchmark::State& state) data_profile table_data_profile; table_data_profile.set_cardinality(cardinality); table_data_profile.set_avg_run_length(run_length); - auto const tbl = - create_random_table(data_types, num_cols, table_size_bytes{data_size}, table_data_profile); + auto const tbl = create_random_table( + cycle_dtypes(data_types, num_cols), table_size_bytes{data_size}, table_data_profile); auto const view = tbl->view(); cuio_source_sink_pair source_sink(source_type); @@ -96,7 +96,7 @@ void BM_parq_read_varying_options(benchmark::State& state) static_cast(type_group_id::TIMESTAMP), static_cast(cudf::type_id::STRING)}), col_sel); - auto const tbl = create_random_table(data_types, data_types.size(), table_size_bytes{data_size}); + auto const tbl = create_random_table(data_types, table_size_bytes{data_size}); auto const view = tbl->view(); cuio_source_sink_pair source_sink(io_type::HOST_BUFFER); diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp index 8287c27f804..d203f0d27c8 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,8 +45,8 @@ void BM_parq_write_varying_inout(benchmark::State& state) data_profile table_data_profile; table_data_profile.set_cardinality(cardinality); table_data_profile.set_avg_run_length(run_length); - auto const tbl = - create_random_table(data_types, num_cols, table_size_bytes{data_size}, table_data_profile); + auto const tbl = create_random_table( + cycle_dtypes(data_types, num_cols), table_size_bytes{data_size}, table_data_profile); auto const view = tbl->view(); cuio_source_sink_pair source_sink(sink_type); @@ -77,7 +77,7 @@ void BM_parq_write_varying_options(benchmark::State& state) int32_t(cudf::type_id::STRING), int32_t(cudf::type_id::LIST)}); - auto const tbl = create_random_table(data_types, data_types.size(), table_size_bytes{data_size}); + auto const tbl = create_random_table(data_types, table_size_bytes{data_size}); auto const view = tbl->view(); cuio_source_sink_pair source_sink(io_type::FILEPATH); diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp index 98eaba213e5..30ed245ed9a 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,8 @@ void PQ_write(benchmark::State& state) { cudf::size_type num_cols = state.range(0); - auto tbl = create_random_table({cudf::type_id::INT32}, num_cols, table_size_bytes{data_size}); + auto tbl = create_random_table(cycle_dtypes({cudf::type_id::INT32}, num_cols), + table_size_bytes{data_size}); cudf::table_view view = tbl->view(); auto mem_stats_logger = cudf::memory_stats_logger(); @@ -69,8 +70,8 @@ void PQ_write_chunked(benchmark::State& state) std::vector> tables; for (cudf::size_type idx = 0; idx < num_tables; idx++) { - tables.push_back(create_random_table( - {cudf::type_id::INT32}, num_cols, table_size_bytes{size_t(data_size / num_tables)})); + tables.push_back(create_random_table(cycle_dtypes({cudf::type_id::INT32}, num_cols), + table_size_bytes{size_t(data_size / num_tables)})); } auto mem_stats_logger = cudf::memory_stats_logger(); diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp index b13835c15bb..8c4b10d928d 100644 --- a/cpp/benchmarks/io/text/multibyte_split.cpp +++ b/cpp/benchmarks/io/text/multibyte_split.cpp @@ -70,7 +70,6 @@ static cudf::string_scalar create_random_input(int32_t num_chars, auto const values_table = create_random_table( // {cudf::type_id::STRING}, - 1, row_count{num_rows}, table_profile); diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index f2b9cb1bdb9..c1957db7929 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -147,8 +147,8 @@ static void BM_join(state_type& state, Join JoinFunc) // Benchmark conditional join if constexpr (std::is_same_v and is_conditional) { // Common column references. - const auto col_ref_left_0 = cudf::ast::column_reference(0); - const auto col_ref_right_0 = cudf::ast::column_reference(0, cudf::ast::table_reference::RIGHT); + auto const col_ref_left_0 = cudf::ast::column_reference(0); + auto const col_ref_right_0 = cudf::ast::column_reference(0, cudf::ast::table_reference::RIGHT); auto left_zero_eq_right_zero = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, col_ref_left_0, col_ref_right_0); diff --git a/cpp/benchmarks/reduction/scan.cpp b/cpp/benchmarks/reduction/scan.cpp index 05c15a4fcb5..7a0d3f9515f 100644 --- a/cpp/benchmarks/reduction/scan.cpp +++ b/cpp/benchmarks/reduction/scan.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ static void BM_reduction_scan(benchmark::State& state, bool include_nulls) { cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; auto const dtype = cudf::type_to_id(); - auto const table = create_random_table({dtype}, 1, row_count{n_rows}); + auto const table = create_random_table({dtype}, row_count{n_rows}); if (!include_nulls) table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); cudf::column_view input(table->view().column(0)); diff --git a/cpp/benchmarks/replace/clamp.cpp b/cpp/benchmarks/replace/clamp.cpp index dd8b06227bc..d3a7415a478 100644 --- a/cpp/benchmarks/replace/clamp.cpp +++ b/cpp/benchmarks/replace/clamp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ static void BM_clamp(benchmark::State& state, bool include_nulls) { cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; auto const dtype = cudf::type_to_id(); - auto const table = create_random_table({dtype}, 1, row_count{n_rows}); + auto const table = create_random_table({dtype}, row_count{n_rows}); if (!include_nulls) { table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); } cudf::column_view input(table->view().column(0)); diff --git a/cpp/benchmarks/replace/nans.cpp b/cpp/benchmarks/replace/nans.cpp index 3faf217956b..e1b05bbc337 100644 --- a/cpp/benchmarks/replace/nans.cpp +++ b/cpp/benchmarks/replace/nans.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ static void BM_replace_nans(benchmark::State& state, bool include_nulls) { cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; auto const dtype = cudf::type_to_id(); - auto const table = create_random_table({dtype}, 1, row_count{n_rows}); + auto const table = create_random_table({dtype}, row_count{n_rows}); if (!include_nulls) { table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); } cudf::column_view input(table->view().column(0)); diff --git a/cpp/benchmarks/search/search.cpp b/cpp/benchmarks/search/search.cpp index c3529c7e79c..0bccbbaff54 100644 --- a/cpp/benchmarks/search/search.cpp +++ b/cpp/benchmarks/search/search.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,63 +14,47 @@ * limitations under the License. */ +#include +#include +#include + +#include #include +#include +#include #include #include #include -#include - #include -#include -#include -#include - class Search : public cudf::benchmark { }; -auto make_validity_iter() -{ - static constexpr int r_min = 1; - static constexpr int r_max = 10; - - cudf::test::UniformRandomGenerator rand_gen(r_min, r_max); - uint8_t mod_base = rand_gen.generate(); - return cudf::detail::make_counting_transform_iterator( - 0, [mod_base](auto row) { return (row % mod_base) > 0; }); -} - void BM_column(benchmark::State& state, bool nulls) { - const cudf::size_type column_size{(cudf::size_type)state.range(0)}; - const cudf::size_type values_size = column_size; - - auto col_data_it = cudf::detail::make_counting_transform_iterator( - 0, [=](cudf::size_type row) { return static_cast(row); }); - auto val_data_it = cudf::detail::make_counting_transform_iterator( - 0, [=](cudf::size_type row) { return static_cast(values_size - row); }); - - auto column = [&]() { - return nulls ? cudf::test::fixed_width_column_wrapper( - col_data_it, col_data_it + column_size, make_validity_iter()) - : cudf::test::fixed_width_column_wrapper(col_data_it, - col_data_it + column_size); - }(); - auto values = [&]() { - return nulls ? cudf::test::fixed_width_column_wrapper( - val_data_it, val_data_it + values_size, make_validity_iter()) - : cudf::test::fixed_width_column_wrapper(val_data_it, - val_data_it + values_size); - }(); - - auto data_table = cudf::sort(cudf::table_view({column})); + auto const column_size{static_cast(state.range(0))}; + auto const values_size = column_size; + + auto init_data = cudf::make_fixed_width_scalar(static_cast(0)); + auto init_value = cudf::make_fixed_width_scalar(static_cast(values_size)); + auto step = cudf::make_fixed_width_scalar(static_cast(-1)); + auto column = cudf::sequence(column_size, *init_data); + auto values = cudf::sequence(values_size, *init_value, *step); + if (nulls) { + auto [column_null_mask, column_null_count] = create_random_null_mask(column->size(), 0.1, 1); + column->set_null_mask(std::move(column_null_mask), column_null_count); + auto [values_null_mask, values_null_count] = create_random_null_mask(values->size(), 0.1, 2); + values->set_null_mask(std::move(values_null_mask), values_null_count); + } + + auto data_table = cudf::sort(cudf::table_view({*column})); for (auto _ : state) { cuda_event_timer timer(state, true); auto col = cudf::upper_bound(data_table->view(), - cudf::table_view({values}), + cudf::table_view({*values}), {cudf::order::ASCENDING}, {cudf::null_order::BEFORE}); } @@ -93,9 +77,9 @@ void BM_table(benchmark::State& state) { using wrapper = cudf::test::fixed_width_column_wrapper; - const cudf::size_type num_columns{(cudf::size_type)state.range(0)}; - const cudf::size_type column_size{(cudf::size_type)state.range(1)}; - const cudf::size_type values_size = column_size; + auto const num_columns{static_cast(state.range(0))}; + auto const column_size{static_cast(state.range(1))}; + auto const values_size = column_size; auto make_table = [&](cudf::size_type col_size) { cudf::test::UniformRandomGenerator random_gen(0, 100); @@ -142,30 +126,24 @@ BENCHMARK_REGISTER_F(Search, Table) void BM_contains(benchmark::State& state, bool nulls) { - const cudf::size_type column_size{(cudf::size_type)state.range(0)}; - const cudf::size_type values_size = column_size; - - auto col_data_it = cudf::detail::make_counting_transform_iterator( - 0, [=](cudf::size_type row) { return static_cast(row); }); - auto val_data_it = cudf::detail::make_counting_transform_iterator( - 0, [=](cudf::size_type row) { return static_cast(values_size - row); }); - - auto column = [&]() { - return nulls ? cudf::test::fixed_width_column_wrapper( - col_data_it, col_data_it + column_size, make_validity_iter()) - : cudf::test::fixed_width_column_wrapper(col_data_it, - col_data_it + column_size); - }(); - auto values = [&]() { - return nulls ? cudf::test::fixed_width_column_wrapper( - val_data_it, val_data_it + values_size, make_validity_iter()) - : cudf::test::fixed_width_column_wrapper(val_data_it, - val_data_it + values_size); - }(); + auto const column_size{static_cast(state.range(0))}; + auto const values_size = column_size; + + auto init_data = cudf::make_fixed_width_scalar(static_cast(0)); + auto init_value = cudf::make_fixed_width_scalar(static_cast(values_size)); + auto step = cudf::make_fixed_width_scalar(static_cast(-1)); + auto column = cudf::sequence(column_size, *init_data); + auto values = cudf::sequence(values_size, *init_value, *step); + if (nulls) { + auto [column_null_mask, column_null_count] = create_random_null_mask(column->size(), 0.1, 1); + column->set_null_mask(std::move(column_null_mask), column_null_count); + auto [values_null_mask, values_null_count] = create_random_null_mask(values->size(), 0.1, 2); + values->set_null_mask(std::move(values_null_mask), values_null_count); + } for (auto _ : state) { cuda_event_timer timer(state, true); - auto col = cudf::contains(column, values); + auto col = cudf::contains(*column, *values); } } diff --git a/cpp/benchmarks/sort/sort_strings.cpp b/cpp/benchmarks/sort/sort_strings.cpp index 8adeef21a79..30a7aee043b 100644 --- a/cpp/benchmarks/sort/sort_strings.cpp +++ b/cpp/benchmarks/sort/sort_strings.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ static void BM_sort(benchmark::State& state) { cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; - auto const table = create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}); for (auto _ : state) { cuda_event_timer raii(state, true, rmm::cuda_stream_default); diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp index 0f1653af2c6..0d74d0a6b7c 100644 --- a/cpp/benchmarks/string/case.cpp +++ b/cpp/benchmarks/string/case.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ class StringCase : public cudf::benchmark { static void BM_case(benchmark::State& state) { cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; - auto const table = create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}); cudf::strings_column_view input(table->view().column(0)); for (auto _ : state) { diff --git a/cpp/benchmarks/string/combine.cpp b/cpp/benchmarks/string/combine.cpp index 8983646b6f1..a0cfcd15fe8 100644 --- a/cpp/benchmarks/string/combine.cpp +++ b/cpp/benchmarks/string/combine.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,8 +36,8 @@ static void BM_combine(benchmark::State& state) data_profile table_profile; table_profile.set_distribution_params( cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 2, row_count{n_rows}, table_profile); + auto const table = create_random_table( + {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{n_rows}, table_profile); cudf::strings_column_view input1(table->view().column(0)); cudf::strings_column_view input2(table->view().column(1)); cudf::string_scalar separator("+"); diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp index fbcfabb4532..8c536372359 100644 --- a/cpp/benchmarks/string/contains.cpp +++ b/cpp/benchmarks/string/contains.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ enum contains_type { contains, count, findall }; static void BM_contains(benchmark::State& state, contains_type ct) { cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; - auto const table = create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}); cudf::strings_column_view input(table->view().column(0)); for (auto _ : state) { diff --git a/cpp/benchmarks/string/convert_datetime.cpp b/cpp/benchmarks/string/convert_datetime.cpp index af51b504ee8..3782fea1e36 100644 --- a/cpp/benchmarks/string/convert_datetime.cpp +++ b/cpp/benchmarks/string/convert_datetime.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ void BM_convert_datetime(benchmark::State& state, direction dir) auto const n_rows = static_cast(state.range(0)); auto const data_type = cudf::data_type(cudf::type_to_id()); - auto const table = create_random_table({data_type.id()}, 1, row_count{n_rows}); + auto const table = create_random_table({data_type.id()}, row_count{n_rows}); cudf::column_view input(table->view().column(0)); auto source = dir == direction::to ? cudf::strings::from_timestamps(input, "%Y-%m-%d %H:%M:%S") diff --git a/cpp/benchmarks/string/convert_fixed_point.cpp b/cpp/benchmarks/string/convert_fixed_point.cpp index 5c050592c7b..05b87906eca 100644 --- a/cpp/benchmarks/string/convert_fixed_point.cpp +++ b/cpp/benchmarks/string/convert_fixed_point.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ namespace { std::unique_ptr get_strings_column(cudf::size_type rows) { std::unique_ptr result = - create_random_table({cudf::type_id::FLOAT32}, 1, row_count{static_cast(rows)}); + create_random_table({cudf::type_id::FLOAT32}, row_count{static_cast(rows)}); return cudf::strings::from_floats(result->release().front()->view()); } diff --git a/cpp/benchmarks/string/convert_numerics.cpp b/cpp/benchmarks/string/convert_numerics.cpp index 02ccb17e74a..71a23c76829 100644 --- a/cpp/benchmarks/string/convert_numerics.cpp +++ b/cpp/benchmarks/string/convert_numerics.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ template std::unique_ptr get_numerics_column(cudf::size_type rows) { std::unique_ptr result = - create_random_table({cudf::type_to_id()}, 1, row_count{rows}); + create_random_table({cudf::type_to_id()}, row_count{rows}); return std::move(result->release().front()); } diff --git a/cpp/benchmarks/string/copy.cu b/cpp/benchmarks/string/copy.cu index 2f064e71c44..00eb818256c 100644 --- a/cpp/benchmarks/string/copy.cu +++ b/cpp/benchmarks/string/copy.cu @@ -40,9 +40,9 @@ static void BM_copy(benchmark::State& state, copy_type ct) cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); auto const source = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); auto const target = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); // scatter indices auto index_map_col = make_numeric_column( diff --git a/cpp/benchmarks/string/factory.cu b/cpp/benchmarks/string/factory.cu index 2a88def1871..47356af129e 100644 --- a/cpp/benchmarks/string/factory.cu +++ b/cpp/benchmarks/string/factory.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,9 +53,8 @@ static void BM_factory(benchmark::State& state) data_profile table_profile; table_profile.set_distribution_params( cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); - auto d_column = cudf::column_device_view::create(table->view().column(0)); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); + auto d_column = cudf::column_device_view::create(table->view().column(0)); rmm::device_uvector pairs(d_column->size(), rmm::cuda_stream_default); thrust::transform(thrust::device, d_column->pair_begin(), diff --git a/cpp/benchmarks/string/filter.cpp b/cpp/benchmarks/string/filter.cpp index fb030c2ccc2..b39cf25bc91 100644 --- a/cpp/benchmarks/string/filter.cpp +++ b/cpp/benchmarks/string/filter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,8 +41,7 @@ static void BM_filter_chars(benchmark::State& state, FilterAPI api) data_profile table_profile; table_profile.set_distribution_params( cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); cudf::strings_column_view input(table->view().column(0)); auto const types = cudf::strings::string_character_types::SPACE; diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index 167e9bc1348..55eb52c9b30 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,8 +39,7 @@ static void BM_find_scalar(benchmark::State& state, FindAPI find_api) data_profile table_profile; table_profile.set_distribution_params( cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); cudf::strings_column_view input(table->view().column(0)); cudf::string_scalar target("+"); cudf::test::strings_column_wrapper targets({"+", "-"}); diff --git a/cpp/benchmarks/string/repeat_strings.cpp b/cpp/benchmarks/string/repeat_strings.cpp index 86b8525023f..9044db18522 100644 --- a/cpp/benchmarks/string/repeat_strings.cpp +++ b/cpp/benchmarks/string/repeat_strings.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ static std::unique_ptr create_data_table(cudf::size_type n_cols, cudf::type_id::INT32, distribution_id::NORMAL, min_repeat_times, max_repeat_times); } - return create_random_table(dtype_ids, n_cols, row_count{n_rows}, table_profile); + return create_random_table(dtype_ids, row_count{n_rows}, table_profile); } static void BM_repeat_strings_scalar_times(benchmark::State& state) diff --git a/cpp/benchmarks/string/replace.cpp b/cpp/benchmarks/string/replace.cpp index 9be2e3a8627..0a3607c64f0 100644 --- a/cpp/benchmarks/string/replace.cpp +++ b/cpp/benchmarks/string/replace.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,8 +40,7 @@ static void BM_replace(benchmark::State& state, replace_type rt) data_profile table_profile; table_profile.set_distribution_params( cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); cudf::strings_column_view input(table->view().column(0)); cudf::string_scalar target("+"); cudf::string_scalar repl(""); diff --git a/cpp/benchmarks/string/replace_re.cpp b/cpp/benchmarks/string/replace_re.cpp index c106953bf69..b9d04630837 100644 --- a/cpp/benchmarks/string/replace_re.cpp +++ b/cpp/benchmarks/string/replace_re.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,8 +37,7 @@ static void BM_replace(benchmark::State& state, replace_type rt) data_profile table_profile; table_profile.set_distribution_params( cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); cudf::strings_column_view input(table->view().column(0)); cudf::test::strings_column_wrapper repls({"#", ""}); diff --git a/cpp/benchmarks/string/split.cpp b/cpp/benchmarks/string/split.cpp index fc879d1d0eb..ad25cfe54de 100644 --- a/cpp/benchmarks/string/split.cpp +++ b/cpp/benchmarks/string/split.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,8 +38,7 @@ static void BM_split(benchmark::State& state, split_type rt) data_profile table_profile; table_profile.set_distribution_params( cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); cudf::strings_column_view input(table->view().column(0)); cudf::string_scalar target("+"); diff --git a/cpp/benchmarks/string/substring.cpp b/cpp/benchmarks/string/substring.cpp index 8864fffc40b..2195cc56515 100644 --- a/cpp/benchmarks/string/substring.cpp +++ b/cpp/benchmarks/string/substring.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,8 +43,7 @@ static void BM_substring(benchmark::State& state, substring_type rt) data_profile table_profile; table_profile.set_distribution_params( cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); cudf::strings_column_view input(table->view().column(0)); auto starts_itr = thrust::constant_iterator(1); auto stops_itr = thrust::constant_iterator(max_str_length / 2); diff --git a/cpp/benchmarks/string/translate.cpp b/cpp/benchmarks/string/translate.cpp index 98688fa14fc..38c6ff9c701 100644 --- a/cpp/benchmarks/string/translate.cpp +++ b/cpp/benchmarks/string/translate.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,8 +41,7 @@ static void BM_translate(benchmark::State& state, int entry_count) data_profile table_profile; table_profile.set_distribution_params( cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); cudf::strings_column_view input(table->view().column(0)); std::vector entries(entry_count); diff --git a/cpp/benchmarks/text/ngrams.cpp b/cpp/benchmarks/text/ngrams.cpp index 7c39ebbb1bb..157c27ae48a 100644 --- a/cpp/benchmarks/text/ngrams.cpp +++ b/cpp/benchmarks/text/ngrams.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,8 +38,7 @@ static void BM_ngrams(benchmark::State& state, ngrams_type nt) data_profile table_profile; table_profile.set_distribution_params( cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); cudf::strings_column_view input(table->view().column(0)); for (auto _ : state) { diff --git a/cpp/benchmarks/text/normalize.cpp b/cpp/benchmarks/text/normalize.cpp index ac8e92b3376..2cc083f4ae8 100644 --- a/cpp/benchmarks/text/normalize.cpp +++ b/cpp/benchmarks/text/normalize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,8 +36,7 @@ static void BM_normalize(benchmark::State& state, bool to_lower) data_profile table_profile; table_profile.set_distribution_params( cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); cudf::strings_column_view input(table->view().column(0)); for (auto _ : state) { diff --git a/cpp/benchmarks/text/normalize_spaces.cpp b/cpp/benchmarks/text/normalize_spaces.cpp index 34749b579b9..3bd636d4aa9 100644 --- a/cpp/benchmarks/text/normalize_spaces.cpp +++ b/cpp/benchmarks/text/normalize_spaces.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,8 +37,7 @@ static void BM_normalize(benchmark::State& state) data_profile table_profile; table_profile.set_distribution_params( cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); cudf::strings_column_view input(table->view().column(0)); for (auto _ : state) { diff --git a/cpp/benchmarks/text/tokenize.cpp b/cpp/benchmarks/text/tokenize.cpp index fa3f816db59..4cb9c9e5271 100644 --- a/cpp/benchmarks/text/tokenize.cpp +++ b/cpp/benchmarks/text/tokenize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,8 +40,7 @@ static void BM_tokenize(benchmark::State& state, tokenize_type tt) data_profile table_profile; table_profile.set_distribution_params( cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const table = - create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); cudf::strings_column_view input(table->view().column(0)); cudf::test::strings_column_wrapper delimiters({" ", "+", "-"});