diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index de6530084ad..41974095fcd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -232,7 +232,6 @@ add_library(cudf src/groupby/sort/group_sum.cu src/groupby/sort/scan.cpp src/groupby/sort/group_count_scan.cu - src/groupby/sort/group_dense_rank_scan.cu src/groupby/sort/group_max_scan.cu src/groupby/sort/group_min_scan.cu src/groupby/sort/group_rank_scan.cu diff --git a/cpp/src/groupby/sort/group_dense_rank_scan.cu b/cpp/src/groupby/sort/group_dense_rank_scan.cu deleted file mode 100644 index 123c7569424..00000000000 --- a/cpp/src/groupby/sort/group_dense_rank_scan.cu +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -#include -#include - -#include - -namespace cudf { -namespace groupby { -namespace detail { -namespace { -template -std::unique_ptr generate_dense_ranks(column_view const& order_by, - device_span group_labels, - device_span group_offsets, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto const flat_order = - order_by.type().id() == type_id::STRUCT - ? table_view{std::vector{order_by.child_begin(), order_by.child_end()}} - : table_view{{order_by}}; - auto const d_flat_order = table_device_view::create(flat_order, stream); - row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); - auto ranks = make_fixed_width_column( - data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); - auto mutable_ranks = ranks->mutable_view(); - - if (order_by.type().id() == type_id::STRUCT && order_by.has_nulls()) { - auto const d_col_order = column_device_view::create(order_by, stream); - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator, - d_col_order = *d_col_order, - labels = group_labels.data(), - offsets = group_offsets.data()] __device__(size_type row_index) { - if (row_index == offsets[labels[row_index]]) { return true; } - bool const lhs_is_null{d_col_order.is_null(row_index)}; - bool const rhs_is_null{d_col_order.is_null(row_index - 1)}; - if (lhs_is_null && rhs_is_null) { - return false; - } else if (lhs_is_null != rhs_is_null) { - return true; - } - return !comparator(row_index, row_index - 1); - }); - - } else { - thrust::tabulate( - rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator, labels = group_labels.data(), offsets = group_offsets.data()] __device__( - size_type row_index) { - return row_index == offsets[labels[row_index]] || !comparator(row_index, row_index - 1); - }); - } - - thrust::inclusive_scan_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - mutable_ranks.begin(), - mutable_ranks.begin()); - return ranks; -} -} // namespace -std::unique_ptr dense_rank_scan(column_view const& order_by, - device_span group_labels, - device_span group_offsets, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - if ((order_by.type().id() == type_id::STRUCT && - has_nested_nulls( - table_view{std::vector{order_by.child_begin(), order_by.child_end()}})) || - (order_by.type().id() != type_id::STRUCT && order_by.has_nulls())) { - return generate_dense_ranks(order_by, group_labels, group_offsets, stream, mr); - } - return generate_dense_ranks(order_by, group_labels, group_offsets, stream, mr); -} - -} // namespace detail -} // namespace groupby -} // namespace cudf diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index f19babdc84e..5f4dda294fd 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -16,71 +16,66 @@ #include #include -#include +#include #include -#include #include #include #include -#include +#include namespace cudf { namespace groupby { namespace detail { namespace { -template -std::unique_ptr generate_ranks(column_view const& order_by, +/** + * @brief generate grouped row ranks or dense ranks using a row comparison then scan the results + * + * @tparam has_nulls if the order_by column has nulls + * @tparam value_resolver flag value resolver function with boolean first and row number arguments + * @tparam scan_operator scan function ran on the flag values + * @param order_by input column to generate ranks for + * @param group_labels ID of group that the corresponding value belongs to + * @param group_offsets group index offsets with group ID indices + * @param resolver flag value resolver + * @param scan_op scan operation ran on the flag results + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return std::unique_ptr rank values + */ +template +std::unique_ptr rank_generator(column_view const& order_by, device_span group_labels, device_span group_offsets, + value_resolver resolver, + scan_operator scan_op, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto const flat_order = - order_by.type().id() == type_id::STRUCT - ? table_view{std::vector{order_by.child_begin(), order_by.child_end()}} - : table_view{{order_by}}; - auto const d_flat_order = table_device_view::create(flat_order, stream); - row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); - auto ranks = make_fixed_width_column( - data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); + auto const superimposed = structs::detail::superimpose_parent_nulls(order_by, stream, mr); + table_view const order_table{{std::get<0>(superimposed)}}; + auto const flattener = cudf::structs::detail::flatten_nested_columns( + order_table, {}, {}, structs::detail::column_nullability::MATCH_INCOMING); + auto const d_flat_order = table_device_view::create(std::get<0>(flattener), stream); + row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); + auto ranks = make_fixed_width_column(data_type{type_to_id()}, + order_table.num_rows(), + mask_state::UNALLOCATED, + stream, + mr); auto mutable_ranks = ranks->mutable_view(); - if (order_by.type().id() == type_id::STRUCT && order_by.has_nulls()) { - auto const d_col_order = column_device_view::create(order_by, stream); - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator, - d_col_order = *d_col_order, - labels = group_labels.data(), - offsets = group_offsets.data()] __device__(size_type row_index) { - auto group_start = offsets[labels[row_index]]; - if (row_index == group_start) { return 1; } - bool const lhs_is_null{d_col_order.is_null(row_index)}; - bool const rhs_is_null{d_col_order.is_null(row_index - 1)}; - if (lhs_is_null && rhs_is_null) { - return 0; - } else if (lhs_is_null != rhs_is_null) { - return row_index - group_start + 1; - } - return comparator(row_index, row_index - 1) ? 0 - : row_index - group_start + 1; - }); - } else { - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator, - labels = group_labels.data(), - offsets = group_offsets.data()] __device__(size_type row_index) { - auto group_start = offsets[labels[row_index]]; - return row_index != group_start && comparator(row_index, row_index - 1) - ? 0 - : row_index - group_start + 1; - }); - } + thrust::tabulate( + rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + [comparator, resolver, labels = group_labels.data(), offsets = group_offsets.data()] __device__( + size_type row_index) { + auto group_start = offsets[labels[row_index]]; + return resolver(row_index == group_start || !comparator(row_index, row_index - 1), + row_index - group_start); + }); thrust::inclusive_scan_by_key(rmm::exec_policy(stream), group_labels.begin(), @@ -88,24 +83,62 @@ std::unique_ptr generate_ranks(column_view const& order_by, mutable_ranks.begin(), mutable_ranks.begin(), thrust::equal_to{}, - DeviceMax{}); + scan_op); return ranks; } } // namespace + std::unique_ptr rank_scan(column_view const& order_by, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if ((order_by.type().id() == type_id::STRUCT && - has_nested_nulls( - table_view{std::vector{order_by.child_begin(), order_by.child_end()}})) || - (order_by.type().id() != type_id::STRUCT && order_by.has_nulls())) { - return generate_ranks(order_by, group_labels, group_offsets, stream, mr); + if (has_nested_nulls(table_view{{order_by}})) { + return rank_generator( + order_by, + group_labels, + group_offsets, + [] __device__(bool equality, auto row_index) { return equality ? row_index + 1 : 0; }, + DeviceMax{}, + stream, + mr); + } + return rank_generator( + order_by, + group_labels, + group_offsets, + [] __device__(bool equality, auto row_index) { return equality ? row_index + 1 : 0; }, + DeviceMax{}, + stream, + mr); +} + +std::unique_ptr dense_rank_scan(column_view const& order_by, + device_span group_labels, + device_span group_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (has_nested_nulls(table_view{{order_by}})) { + return rank_generator( + order_by, + group_labels, + group_offsets, + [] __device__(bool equality, auto row_index) { return equality; }, + DeviceSum{}, + stream, + mr); } - return generate_ranks(order_by, group_labels, group_offsets, stream, mr); + return rank_generator( + order_by, + group_labels, + group_offsets, + [] __device__(bool equality, auto row_index) { return equality; }, + DeviceSum{}, + stream, + mr); } } // namespace detail diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index c43df77bb5e..dade6881bbd 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -29,6 +29,8 @@ #include +#include + #include namespace cudf { @@ -114,12 +116,8 @@ void scan_result_functor::operator()(aggregation const& agg) CUDF_EXPECTS(helper.is_presorted(), "Rank aggregate in groupby scan requires the keys to be presorted"); auto const order_by = get_grouped_values(); - CUDF_EXPECTS(order_by.type().id() != type_id::LIST, + CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(order_by), "Unsupported list type in grouped rank scan."); - CUDF_EXPECTS(std::none_of(order_by.child_begin(), - order_by.child_end(), - [](auto const& col) { return is_nested(col.type()); }), - "Unsupported nested columns in grouped rank scan."); cache.add_result( col_idx, @@ -135,12 +133,8 @@ void scan_result_functor::operator()(aggregation const& CUDF_EXPECTS(helper.is_presorted(), "Dense rank aggregate in groupby scan requires the keys to be presorted"); auto const order_by = get_grouped_values(); - CUDF_EXPECTS(order_by.type().id() != type_id::LIST, + CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(order_by), "Unsupported list type in grouped dense_rank scan."); - CUDF_EXPECTS(std::none_of(order_by.child_begin(), - order_by.child_end(), - [](auto const& col) { return is_nested(col.type()); }), - "Unsupported nested columns in grouped dense_rank scan."); cache.add_result( col_idx, diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index 7f01ddedf03..33cc072c256 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -30,7 +30,8 @@ #include #include -#include +#include + #include namespace cudf { @@ -196,99 +197,52 @@ struct scan_dispatcher { } }; -template -std::unique_ptr generate_dense_ranks(column_view const& order_by, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto const flat_order = - order_by.type().id() == type_id::STRUCT - ? table_view{std::vector{order_by.child_begin(), order_by.child_end()}} - : table_view{{order_by}}; - auto const d_flat_order = table_device_view::create(flat_order, stream); - row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); - auto ranks = make_fixed_width_column( - data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); - auto mutable_ranks = ranks->mutable_view(); - - if (order_by.type().id() == type_id::STRUCT && order_by.has_nulls()) { - auto const d_col_order = column_device_view::create(order_by, stream); - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator, d_col_order = *d_col_order] __device__(size_type row_index) { - if (row_index == 0) { return true; } - bool const lhs_is_null{d_col_order.is_null(row_index)}; - bool const rhs_is_null{d_col_order.is_null(row_index - 1)}; - if (lhs_is_null && rhs_is_null) { - return false; - } else if (lhs_is_null != rhs_is_null) { - return true; - } - return !comparator(row_index, row_index - 1); - }); - } else { - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator] __device__(size_type row_index) { - return row_index == 0 || !comparator(row_index, row_index - 1); - }); - } - - thrust::inclusive_scan(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - mutable_ranks.begin()); - return ranks; -} - -template -std::unique_ptr generate_ranks(column_view const& order_by, +/** + * @brief generate row ranks or dense ranks using a row comparison then scan the results + * + * @tparam has_nulls if the order_by column has nulls + * @tparam value_resolver flag value resolver with boolean first and row number arguments + * @tparam scan_operator scan function ran on the flag values + * @param order_by input column to generate ranks for + * @param resolver flag value resolver + * @param scan_op scan operation ran on the flag results + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return std::unique_ptr rank values + */ +template +std::unique_ptr rank_generator(column_view const& order_by, + value_resolver resolver, + scan_operator scan_op, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto const flat_order = - order_by.type().id() == type_id::STRUCT - ? table_view{std::vector{order_by.child_begin(), order_by.child_end()}} - : table_view{{order_by}}; - auto const d_flat_order = table_device_view::create(flat_order, stream); - row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); - auto ranks = make_fixed_width_column( - data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); + auto const superimposed = structs::detail::superimpose_parent_nulls(order_by, stream, mr); + table_view const order_table{{std::get<0>(superimposed)}}; + auto const flattener = cudf::structs::detail::flatten_nested_columns( + order_table, {}, {}, structs::detail::column_nullability::MATCH_INCOMING); + auto const d_flat_order = table_device_view::create(std::get<0>(flattener), stream); + row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); + auto ranks = make_fixed_width_column(data_type{type_to_id()}, + order_table.num_rows(), + mask_state::UNALLOCATED, + stream, + mr); auto mutable_ranks = ranks->mutable_view(); - if (order_by.type().id() == type_id::STRUCT && order_by.has_nulls()) { - auto const d_col_order = column_device_view::create(order_by, stream); - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator, d_col_order = *d_col_order] __device__(size_type row_index) { - if (row_index == 0) { return 1; } - bool const lhs_is_null{d_col_order.is_null(row_index)}; - bool const rhs_is_null{d_col_order.is_null(row_index - 1)}; - if (lhs_is_null and rhs_is_null) { - return 0; - } else if (lhs_is_null != rhs_is_null) { - return row_index + 1; - } - return comparator(row_index, row_index - 1) ? 0 : row_index + 1; - }); - } else { - thrust::tabulate( - rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator] __device__(size_type row_index) { - return row_index != 0 && comparator(row_index, row_index - 1) ? 0 : row_index + 1; - }); - } + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + [comparator, resolver] __device__(size_type row_index) { + return resolver(row_index == 0 || !comparator(row_index, row_index - 1), + row_index); + }); thrust::inclusive_scan(rmm::exec_policy(stream), mutable_ranks.begin(), mutable_ranks.end(), mutable_ranks.begin(), - DeviceMax{}); + scan_op); return ranks; } @@ -298,36 +252,44 @@ std::unique_ptr inclusive_dense_rank_scan(column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(order_by.type().id() != type_id::LIST, "Unsupported list type in dense_rank scan."); - CUDF_EXPECTS(std::none_of(order_by.child_begin(), - order_by.child_end(), - [](auto const& col) { return is_nested(col.type()); }), - "Unsupported nested columns in dense_rank scan."); - if ((order_by.type().id() == type_id::STRUCT && - has_nested_nulls( - table_view{std::vector{order_by.child_begin(), order_by.child_end()}})) || - (order_by.type().id() != type_id::STRUCT && order_by.has_nulls())) { - return generate_dense_ranks(order_by, stream, mr); + CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(order_by), + "Unsupported list type in dense_rank scan."); + if (has_nested_nulls(table_view{{order_by}})) { + return rank_generator( + order_by, + [] __device__(bool equality, auto row_index) { return equality; }, + DeviceSum{}, + stream, + mr); } - return generate_dense_ranks(order_by, stream, mr); + return rank_generator( + order_by, + [] __device__(bool equality, auto row_index) { return equality; }, + DeviceSum{}, + stream, + mr); } std::unique_ptr inclusive_rank_scan(column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(order_by.type().id() != type_id::LIST, "Unsupported list type in rank scan."); - CUDF_EXPECTS(std::none_of(order_by.child_begin(), - order_by.child_end(), - [](auto const& col) { return is_nested(col.type()); }), - "Unsupported nested columns in rank scan."); - if ((order_by.type().id() == type_id::STRUCT && - has_nested_nulls( - table_view{std::vector{order_by.child_begin(), order_by.child_end()}})) || - (order_by.type().id() != type_id::STRUCT && order_by.has_nulls())) { - return generate_ranks(order_by, stream, mr); + CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(order_by), + "Unsupported list type in rank scan."); + if (has_nested_nulls(table_view{{order_by}})) { + return rank_generator( + order_by, + [] __device__(bool equality, auto row_index) { return equality ? row_index + 1 : 0; }, + DeviceMax{}, + stream, + mr); } - return generate_ranks(order_by, stream, mr); + return rank_generator( + order_by, + [] __device__(bool equality, auto row_index) { return equality ? row_index + 1 : 0; }, + DeviceMax{}, + stream, + mr); } std::unique_ptr scan_inclusive( diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index ace9a608bdb..b84af73b681 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -72,18 +72,14 @@ namespace { * @brief Check whether the specified column is of type `STRUCT`. */ bool is_struct(cudf::column_view const& col) { return col.type().id() == type_id::STRUCT; } +} // namespace -/** - * @brief Check whether the specified column is of type LIST, or any LISTs in its descendent - * columns. - */ bool is_or_has_nested_lists(cudf::column_view const& col) { auto is_list = [](cudf::column_view const& col) { return col.type().id() == type_id::LIST; }; return is_list(col) || std::any_of(col.child_begin(), col.child_end(), is_or_has_nested_lists); } -} // namespace /** * @brief Flattens struct columns to constituent non-struct columns in the input table. diff --git a/cpp/src/structs/utilities.hpp b/cpp/src/structs/utilities.hpp index 1683518a1ef..24b80b58669 100644 --- a/cpp/src/structs/utilities.hpp +++ b/cpp/src/structs/utilities.hpp @@ -56,6 +56,14 @@ enum class column_nullability { std::vector> extract_ordered_struct_children( host_span struct_cols); +/** + * @brief Check whether the specified column is of type LIST, or any LISTs in its descendent + * columns. + * @param col column to check for lists. + * @return true if the column or any of it's children is a list, false otherwise. + */ +bool is_or_has_nested_lists(cudf::column_view const& col); + /** * @brief Flatten table with struct columns to table with constituent columns of struct columns. * diff --git a/cpp/tests/groupby/rank_scan_tests.cpp b/cpp/tests/groupby/rank_scan_tests.cpp index 37e75e2e906..d08bf011618 100644 --- a/cpp/tests/groupby/rank_scan_tests.cpp +++ b/cpp/tests/groupby/rank_scan_tests.cpp @@ -220,16 +220,15 @@ TYPED_TEST(typed_groupby_rank_scan_test, mixedStructs) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[0].results[1], expected_rank_vals); } -/* Nested struct support dependent on https://github.com/rapidsai/cudf/issues/8683 TYPED_TEST(typed_groupby_rank_scan_test, nestedStructs) { using T = TypeParam; - auto col1 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, - null_at(5)}; auto col2 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, - 9}, null_at(5)}; auto col3 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, - 9, 9}, null_at(5)}; auto col4 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, - 9, 9, 9}, null_at(5)}; auto strings1 = strings_column_wrapper{ + auto col1 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, null_at(5)}; + auto col2 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, null_at(5)}; + auto col3 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, null_at(5)}; + auto col4 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, null_at(5)}; + auto strings1 = strings_column_wrapper{ {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; auto strings2 = strings_column_wrapper{ {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; @@ -240,26 +239,80 @@ TYPED_TEST(typed_groupby_rank_scan_test, nestedStructs) strings_column_wrapper keys = {{"0", "0", "0", "0", "0", "0", "1", "1", "1", "1", "0", "1"}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0}}; - std::vector requests; - requests.emplace_back(groupby::aggregation_request()); - requests.emplace_back(groupby::aggregation_request()); + std::vector requests; + requests.emplace_back(groupby::scan_request()); + requests.emplace_back(groupby::scan_request()); requests[0].values = *nested_col; - requests[0].aggregations.push_back(make_dense_rank_aggregation()); - requests[0].aggregations.push_back(make_rank_aggregation()); + requests[0].aggregations.push_back(make_dense_rank_aggregation()); + requests[0].aggregations.push_back(make_rank_aggregation()); requests[1].values = *flattened_col; - requests[1].aggregations.push_back(make_dense_rank_aggregation()); - requests[1].aggregations.push_back(make_rank_aggregation()); + requests[1].aggregations.push_back(make_dense_rank_aggregation()); + requests[1].aggregations.push_back(make_rank_aggregation()); groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES); auto result = gb_obj.scan(requests); CUDF_TEST_EXPECT_TABLES_EQUAL(table_view({keys}), result.first->view()); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( - *result.second[0].results[0], *result.second[1].results[0]); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( - *result.second[0].results[2], *result.second[1].results[2]); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[0].results[0], *result.second[1].results[0]); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[0].results[1], *result.second[1].results[1]); +} + +TYPED_TEST(typed_groupby_rank_scan_test, structsWithNullPushdown) +{ + using T = TypeParam; + + auto col1 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, null_at(5)}; + auto col2 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, null_at(5)}; + auto strings1 = strings_column_wrapper{ + {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; + auto strings2 = strings_column_wrapper{ + {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; + + std::vector> struct_columns; + struct_columns.push_back(col1.release()); + struct_columns.push_back(strings1.release()); + auto struct_col = + cudf::make_structs_column(12, std::move(struct_columns), 0, rmm::device_buffer{}); + auto const struct_nulls = + thrust::host_vector(std::vector{1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0}); + struct_col->set_null_mask( + cudf::test::detail::make_null_mask(struct_nulls.begin(), struct_nulls.end())); + + std::vector> null_struct_columns; + null_struct_columns.push_back(col2.release()); + null_struct_columns.push_back(strings2.release()); + auto null_col = + cudf::make_structs_column(12, std::move(null_struct_columns), 0, rmm::device_buffer{}); + null_col->set_null_mask(create_null_mask(12, cudf::mask_state::ALL_NULL)); + + strings_column_wrapper keys = {{"0", "0", "0", "0", "0", "0", "1", "1", "1", "1", "0", "1"}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0}}; + + std::vector requests; + requests.emplace_back(groupby::scan_request()); + requests.emplace_back(groupby::scan_request()); + requests[0].values = *struct_col; + requests[0].aggregations.push_back(make_dense_rank_aggregation()); + requests[0].aggregations.push_back(make_rank_aggregation()); + requests[1].values = *null_col; + requests[1].aggregations.push_back(make_dense_rank_aggregation()); + requests[1].aggregations.push_back(make_rank_aggregation()); + + groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES); + auto result = gb_obj.scan(requests); + + auto expected_dense_vals = + fixed_width_column_wrapper{1, 2, 2, 3, 4, 5, 1, 1, 2, 1, 1, 2}; + auto expected_rank_vals = + fixed_width_column_wrapper{1, 2, 2, 4, 5, 6, 1, 1, 3, 1, 1, 3}; + auto expected_null_result = + fixed_width_column_wrapper{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[0].results[0], expected_dense_vals); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[0].results[1], expected_rank_vals); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[1].results[0], expected_null_result); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[1].results[1], expected_null_result); } -*/ /* List support dependent on https://github.com/rapidsai/cudf/issues/8683 template @@ -296,15 +349,15 @@ TYPED_TEST(list_groupby_rank_scan_test, lists) fixed_width_column_wrapper keys = {{0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0}}; - std::vector requests; + std::vector requests; requests.emplace_back(groupby::aggregation_request()); requests.emplace_back(groupby::aggregation_request()); requests[0].values = list_col; - requests[0].aggregations.push_back(make_dense_rank_aggregation()); - requests[0].aggregations.push_back(make_rank_aggregation()); + requests[0].aggregations.push_back(make_dense_rank_aggregation()); + requests[0].aggregations.push_back(make_rank_aggregation()); requests[1].values = struct_col; - requests[1].aggregations.push_back(make_dense_rank_aggregation()); - requests[1].aggregations.push_back(make_rank_aggregation()); + requests[1].aggregations.push_back(make_dense_rank_aggregation()); + requests[1].aggregations.push_back(make_rank_aggregation()); groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES); auto result = gb_obj.scan(requests); diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index 242f2ff94db..49424f037c4 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -511,7 +511,6 @@ TYPED_TEST(TypedRankScanTest, mixedStructs) *struct_col, expected_rank_vals, make_rank_aggregation(), null_policy::INCLUDE); } -/* Nested struct support dependent on https://github.com/rapidsai/cudf/issues/8683 TYPED_TEST(TypedRankScanTest, nestedStructs) { auto const v = [] { @@ -519,10 +518,12 @@ TYPED_TEST(TypedRankScanTest, nestedStructs) return make_vector({-1, -1, -4, -4, -4, 5, 7, 7, 7, 9, 9, 9}); return make_vector({0, 0, 4, 4, 4, 5, 7, 7, 7, 9, 9, 9}); }(); - auto const b = thrust::host_vector(std::vector{1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, - 1}); auto col1 = this->make_column(v, b); auto col2 = this->make_column(v, b); auto - col3 = this->make_column(v, b); auto col4 = this->make_column(v, b); auto strings1 = - strings_column_wrapper{ + auto const b = thrust::host_vector(std::vector{1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1}); + auto col1 = this->make_column(v, b); + auto col2 = this->make_column(v, b); + auto col3 = this->make_column(v, b); + auto col4 = this->make_column(v, b); + auto strings1 = strings_column_wrapper{ {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; auto strings2 = strings_column_wrapper{ {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; @@ -541,20 +542,62 @@ TYPED_TEST(TypedRankScanTest, nestedStructs) flat_columns.push_back(std::move(col4)); auto flat_col = structs_column_wrapper{std::move(flat_columns)}; - auto dense_out = scan( - nested_col, make_dense_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); - auto dense_expected = scan( - flat_col, make_dense_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); - auto rank_out = scan( - nested_col, make_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + auto dense_out = + scan(nested_col, make_dense_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + auto dense_expected = + scan(flat_col, make_dense_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + auto rank_out = + scan(nested_col, make_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); auto rank_expected = - scan(flat_col, make_rank_aggregation(), scan_type::INCLUSIVE, - null_policy::INCLUDE); + scan(flat_col, make_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(dense_out->view(), dense_expected->view()); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(rank_out->view(), rank_expected->view()); } -*/ + +TYPED_TEST(TypedRankScanTest, structsWithNullPushdown) +{ + auto const v = [] { + if (std::is_signed::value) + return make_vector({-1, -1, -4, -4, -4, 5, 7, 7, 7, 9, 9, 9}); + return make_vector({0, 0, 4, 4, 4, 5, 7, 7, 7, 9, 9, 9}); + }(); + auto const b = thrust::host_vector(std::vector{1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1}); + auto col = this->make_column(v, b); + auto strings = strings_column_wrapper{ + {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; + std::vector> struct_columns; + struct_columns.push_back(std::move(col)); + struct_columns.push_back(strings.release()); + + auto struct_col = + cudf::make_structs_column(12, std::move(struct_columns), 0, rmm::device_buffer{}); + + struct_col->set_null_mask(create_null_mask(12, cudf::mask_state::ALL_NULL)); + auto expected_null_result = + fixed_width_column_wrapper{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + auto dense_null_out = + scan(*struct_col, make_dense_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + auto rank_null_out = + scan(*struct_col, make_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(dense_null_out->view(), expected_null_result); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(rank_null_out->view(), expected_null_result); + + auto const struct_nulls = + thrust::host_vector(std::vector{1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + struct_col->set_null_mask( + cudf::test::detail::make_null_mask(struct_nulls.begin(), struct_nulls.end())); + auto expected_dense_vals = + fixed_width_column_wrapper{1, 2, 2, 3, 4, 5, 6, 6, 7, 8, 8, 9}; + auto expected_rank_vals = + fixed_width_column_wrapper{1, 2, 2, 4, 5, 6, 7, 7, 9, 10, 10, 12}; + auto dense_out = + scan(*struct_col, make_dense_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + auto rank_out = + scan(*struct_col, make_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(dense_out->view(), expected_dense_vals); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(rank_out->view(), expected_rank_vals); +} /* List support dependent on https://github.com/rapidsai/cudf/issues/8683 template