diff --git a/cpp/src/groupby/sort/group_dense_rank_scan.cu b/cpp/src/groupby/sort/group_dense_rank_scan.cu index 123c7569424..1407ed04b3a 100644 --- a/cpp/src/groupby/sort/group_dense_rank_scan.cu +++ b/cpp/src/groupby/sort/group_dense_rank_scan.cu @@ -17,65 +17,40 @@ #include #include #include -#include #include #include #include -#include +#include namespace cudf { namespace groupby { namespace detail { namespace { -template -std::unique_ptr generate_dense_ranks(column_view const& order_by, +template +std::unique_ptr generate_dense_ranks(table_view const& order_by, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto const flat_order = - order_by.type().id() == type_id::STRUCT - ? table_view{std::vector{order_by.child_begin(), order_by.child_end()}} - : table_view{{order_by}}; - auto const d_flat_order = table_device_view::create(flat_order, stream); - row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); + auto const flattener = cudf::structs::detail::flatten_nested_columns( + order_by, {}, {}, structs::detail::column_nullability::MATCH_INCOMING); + auto const d_flat_order = table_device_view::create(std::get<0>(flattener), stream); + row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); auto ranks = make_fixed_width_column( - data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); + data_type{type_to_id()}, order_by.num_rows(), mask_state::UNALLOCATED, stream, mr); auto mutable_ranks = ranks->mutable_view(); - if (order_by.type().id() == type_id::STRUCT && order_by.has_nulls()) { - auto const d_col_order = column_device_view::create(order_by, stream); - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator, - d_col_order = *d_col_order, - labels = group_labels.data(), - offsets = group_offsets.data()] __device__(size_type row_index) { - if (row_index == offsets[labels[row_index]]) { return true; } - bool const lhs_is_null{d_col_order.is_null(row_index)}; - bool const rhs_is_null{d_col_order.is_null(row_index - 1)}; - if (lhs_is_null && rhs_is_null) { - return false; - } else if (lhs_is_null != rhs_is_null) { - return true; - } - return !comparator(row_index, row_index - 1); - }); - - } else { - thrust::tabulate( - rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator, labels = group_labels.data(), offsets = group_offsets.data()] __device__( - size_type row_index) { - return row_index == offsets[labels[row_index]] || !comparator(row_index, row_index - 1); - }); - } + thrust::tabulate( + rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + [comparator, labels = group_labels.data(), offsets = group_offsets.data()] __device__( + size_type row_index) { + return row_index == offsets[labels[row_index]] || !comparator(row_index, row_index - 1); + }); thrust::inclusive_scan_by_key(rmm::exec_policy(stream), group_labels.begin(), @@ -91,13 +66,12 @@ std::unique_ptr dense_rank_scan(column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if ((order_by.type().id() == type_id::STRUCT && - has_nested_nulls( - table_view{std::vector{order_by.child_begin(), order_by.child_end()}})) || - (order_by.type().id() != type_id::STRUCT && order_by.has_nulls())) { - return generate_dense_ranks(order_by, group_labels, group_offsets, stream, mr); + auto const superimposed = structs::detail::superimpose_parent_nulls(order_by, stream, mr); + table_view const order_table{{std::get<0>(superimposed)}}; + if (has_nested_nulls(table_view{{order_by}})) { + return generate_dense_ranks(order_table, group_labels, group_offsets, stream, mr); } - return generate_dense_ranks(order_by, group_labels, group_offsets, stream, mr); + return generate_dense_ranks(order_table, group_labels, group_offsets, stream, mr); } } // namespace detail diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index f19babdc84e..5db2731b5ff 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -16,71 +16,45 @@ #include #include -#include +#include #include -#include #include #include #include -#include +#include namespace cudf { namespace groupby { namespace detail { namespace { -template -std::unique_ptr generate_ranks(column_view const& order_by, +template +std::unique_ptr generate_ranks(table_view const& order_by, device_span group_labels, device_span group_offsets, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto const flat_order = - order_by.type().id() == type_id::STRUCT - ? table_view{std::vector{order_by.child_begin(), order_by.child_end()}} - : table_view{{order_by}}; - auto const d_flat_order = table_device_view::create(flat_order, stream); - row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); + auto const flattener = cudf::structs::detail::flatten_nested_columns( + order_by, {}, {}, structs::detail::column_nullability::MATCH_INCOMING); + auto const d_flat_order = table_device_view::create(std::get<0>(flattener), stream); + row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); auto ranks = make_fixed_width_column( - data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); + data_type{type_to_id()}, order_by.num_rows(), mask_state::UNALLOCATED, stream, mr); auto mutable_ranks = ranks->mutable_view(); - if (order_by.type().id() == type_id::STRUCT && order_by.has_nulls()) { - auto const d_col_order = column_device_view::create(order_by, stream); - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator, - d_col_order = *d_col_order, - labels = group_labels.data(), - offsets = group_offsets.data()] __device__(size_type row_index) { - auto group_start = offsets[labels[row_index]]; - if (row_index == group_start) { return 1; } - bool const lhs_is_null{d_col_order.is_null(row_index)}; - bool const rhs_is_null{d_col_order.is_null(row_index - 1)}; - if (lhs_is_null && rhs_is_null) { - return 0; - } else if (lhs_is_null != rhs_is_null) { - return row_index - group_start + 1; - } - return comparator(row_index, row_index - 1) ? 0 - : row_index - group_start + 1; - }); - } else { - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator, - labels = group_labels.data(), - offsets = group_offsets.data()] __device__(size_type row_index) { - auto group_start = offsets[labels[row_index]]; - return row_index != group_start && comparator(row_index, row_index - 1) - ? 0 - : row_index - group_start + 1; - }); - } + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + [comparator, + labels = group_labels.data(), + offsets = group_offsets.data()] __device__(size_type row_index) { + auto group_start = offsets[labels[row_index]]; + return row_index != group_start && comparator(row_index, row_index - 1) + ? 0 + : row_index - group_start + 1; + }); thrust::inclusive_scan_by_key(rmm::exec_policy(stream), group_labels.begin(), @@ -99,13 +73,12 @@ std::unique_ptr rank_scan(column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if ((order_by.type().id() == type_id::STRUCT && - has_nested_nulls( - table_view{std::vector{order_by.child_begin(), order_by.child_end()}})) || - (order_by.type().id() != type_id::STRUCT && order_by.has_nulls())) { - return generate_ranks(order_by, group_labels, group_offsets, stream, mr); + auto const superimposed = structs::detail::superimpose_parent_nulls(order_by, stream, mr); + table_view const order_table{{std::get<0>(superimposed)}}; + if (has_nested_nulls(table_view{{order_by}})) { + return generate_ranks(order_table, group_labels, group_offsets, stream, mr); } - return generate_ranks(order_by, group_labels, group_offsets, stream, mr); + return generate_ranks(order_table, group_labels, group_offsets, stream, mr); } } // namespace detail diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index c43df77bb5e..c707e1e11e3 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -29,6 +29,8 @@ #include +#include + #include namespace cudf { @@ -114,12 +116,8 @@ void scan_result_functor::operator()(aggregation const& agg) CUDF_EXPECTS(helper.is_presorted(), "Rank aggregate in groupby scan requires the keys to be presorted"); auto const order_by = get_grouped_values(); - CUDF_EXPECTS(order_by.type().id() != type_id::LIST, + CUDF_EXPECTS(!cudf::structs::detail::contains_list(order_by), "Unsupported list type in grouped rank scan."); - CUDF_EXPECTS(std::none_of(order_by.child_begin(), - order_by.child_end(), - [](auto const& col) { return is_nested(col.type()); }), - "Unsupported nested columns in grouped rank scan."); cache.add_result( col_idx, @@ -135,12 +133,8 @@ void scan_result_functor::operator()(aggregation const& CUDF_EXPECTS(helper.is_presorted(), "Dense rank aggregate in groupby scan requires the keys to be presorted"); auto const order_by = get_grouped_values(); - CUDF_EXPECTS(order_by.type().id() != type_id::LIST, + CUDF_EXPECTS(!cudf::structs::detail::contains_list(order_by), "Unsupported list type in grouped dense_rank scan."); - CUDF_EXPECTS(std::none_of(order_by.child_begin(), - order_by.child_end(), - [](auto const& col) { return is_nested(col.type()); }), - "Unsupported nested columns in grouped dense_rank scan."); cache.add_result( col_idx, diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index 7f01ddedf03..53e903e604c 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -30,7 +30,8 @@ #include #include -#include +#include + #include namespace cudf { @@ -196,45 +197,25 @@ struct scan_dispatcher { } }; -template -std::unique_ptr generate_dense_ranks(column_view const& order_by, +template +std::unique_ptr generate_dense_ranks(table_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto const flat_order = - order_by.type().id() == type_id::STRUCT - ? table_view{std::vector{order_by.child_begin(), order_by.child_end()}} - : table_view{{order_by}}; - auto const d_flat_order = table_device_view::create(flat_order, stream); - row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); + auto const flattener = cudf::structs::detail::flatten_nested_columns( + order_by, {}, {}, structs::detail::column_nullability::MATCH_INCOMING); + auto const d_flat_order = table_device_view::create(std::get<0>(flattener), stream); + row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); auto ranks = make_fixed_width_column( - data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); + data_type{type_to_id()}, order_by.num_rows(), mask_state::UNALLOCATED, stream, mr); auto mutable_ranks = ranks->mutable_view(); - if (order_by.type().id() == type_id::STRUCT && order_by.has_nulls()) { - auto const d_col_order = column_device_view::create(order_by, stream); - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator, d_col_order = *d_col_order] __device__(size_type row_index) { - if (row_index == 0) { return true; } - bool const lhs_is_null{d_col_order.is_null(row_index)}; - bool const rhs_is_null{d_col_order.is_null(row_index - 1)}; - if (lhs_is_null && rhs_is_null) { - return false; - } else if (lhs_is_null != rhs_is_null) { - return true; - } - return !comparator(row_index, row_index - 1); - }); - } else { - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator] __device__(size_type row_index) { - return row_index == 0 || !comparator(row_index, row_index - 1); - }); - } + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + [comparator] __device__(size_type row_index) { + return row_index == 0 || !comparator(row_index, row_index - 1); + }); thrust::inclusive_scan(rmm::exec_policy(stream), mutable_ranks.begin(), @@ -243,46 +224,26 @@ std::unique_ptr generate_dense_ranks(column_view const& order_by, return ranks; } -template -std::unique_ptr generate_ranks(column_view const& order_by, +template +std::unique_ptr generate_ranks(table_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto const flat_order = - order_by.type().id() == type_id::STRUCT - ? table_view{std::vector{order_by.child_begin(), order_by.child_end()}} - : table_view{{order_by}}; - auto const d_flat_order = table_device_view::create(flat_order, stream); - row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); + auto const flattener = cudf::structs::detail::flatten_nested_columns( + order_by, {}, {}, structs::detail::column_nullability::MATCH_INCOMING); + auto const d_flat_order = table_device_view::create(std::get<0>(flattener), stream); + row_equality_comparator comparator(*d_flat_order, *d_flat_order, true); auto ranks = make_fixed_width_column( - data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); + data_type{type_to_id()}, order_by.num_rows(), mask_state::UNALLOCATED, stream, mr); auto mutable_ranks = ranks->mutable_view(); - if (order_by.type().id() == type_id::STRUCT && order_by.has_nulls()) { - auto const d_col_order = column_device_view::create(order_by, stream); - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator, d_col_order = *d_col_order] __device__(size_type row_index) { - if (row_index == 0) { return 1; } - bool const lhs_is_null{d_col_order.is_null(row_index)}; - bool const rhs_is_null{d_col_order.is_null(row_index - 1)}; - if (lhs_is_null and rhs_is_null) { - return 0; - } else if (lhs_is_null != rhs_is_null) { - return row_index + 1; - } - return comparator(row_index, row_index - 1) ? 0 : row_index + 1; - }); - } else { - thrust::tabulate( - rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator] __device__(size_type row_index) { - return row_index != 0 && comparator(row_index, row_index - 1) ? 0 : row_index + 1; - }); - } + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + [comparator] __device__(size_type row_index) { + return row_index != 0 && comparator(row_index, row_index - 1) ? 0 + : row_index + 1; + }); thrust::inclusive_scan(rmm::exec_policy(stream), mutable_ranks.begin(), @@ -298,36 +259,28 @@ std::unique_ptr inclusive_dense_rank_scan(column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(order_by.type().id() != type_id::LIST, "Unsupported list type in dense_rank scan."); - CUDF_EXPECTS(std::none_of(order_by.child_begin(), - order_by.child_end(), - [](auto const& col) { return is_nested(col.type()); }), - "Unsupported nested columns in dense_rank scan."); - if ((order_by.type().id() == type_id::STRUCT && - has_nested_nulls( - table_view{std::vector{order_by.child_begin(), order_by.child_end()}})) || - (order_by.type().id() != type_id::STRUCT && order_by.has_nulls())) { - return generate_dense_ranks(order_by, stream, mr); + CUDF_EXPECTS(!cudf::structs::detail::contains_list(order_by), + "Unsupported list type in dense_rank scan."); + auto const superimposed = structs::detail::superimpose_parent_nulls(order_by, stream, mr); + table_view const order_table{{std::get<0>(superimposed)}}; + if (has_nested_nulls(table_view{{order_by}})) { + return generate_dense_ranks(order_table, stream, mr); } - return generate_dense_ranks(order_by, stream, mr); + return generate_dense_ranks(order_table, stream, mr); } std::unique_ptr inclusive_rank_scan(column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(order_by.type().id() != type_id::LIST, "Unsupported list type in rank scan."); - CUDF_EXPECTS(std::none_of(order_by.child_begin(), - order_by.child_end(), - [](auto const& col) { return is_nested(col.type()); }), - "Unsupported nested columns in rank scan."); - if ((order_by.type().id() == type_id::STRUCT && - has_nested_nulls( - table_view{std::vector{order_by.child_begin(), order_by.child_end()}})) || - (order_by.type().id() != type_id::STRUCT && order_by.has_nulls())) { - return generate_ranks(order_by, stream, mr); + CUDF_EXPECTS(!cudf::structs::detail::contains_list(order_by), + "Unsupported list type in rank scan."); + auto const superimposed = structs::detail::superimpose_parent_nulls(order_by, stream, mr); + table_view const order_table{{std::get<0>(superimposed)}}; + if (has_nested_nulls(table_view{{order_by}})) { + return generate_ranks(order_table, stream, mr); } - return generate_ranks(order_by, stream, mr); + return generate_ranks(order_table, stream, mr); } std::unique_ptr scan_inclusive( diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index ace9a608bdb..f735d10d729 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -341,6 +341,17 @@ void superimpose_parent_nulls(bitmask_type const* parent_null_mask, } } +/** + * @copydoc cudf::structs::detail::contains_list + */ +bool contains_list(column_view const& col) +{ + return col.type().id() == type_id::LIST || + std::any_of(col.child_begin(), col.child_end(), [](auto const& child) { + return contains_list(child); + }); +} + std::tuple> superimpose_parent_nulls( column_view const& parent, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/structs/utilities.hpp b/cpp/src/structs/utilities.hpp index 1683518a1ef..ff763c9a725 100644 --- a/cpp/src/structs/utilities.hpp +++ b/cpp/src/structs/utilities.hpp @@ -125,6 +125,14 @@ void superimpose_parent_nulls(bitmask_type const* parent_null_mask, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Indicates if the column or any of its child columns are list columns. + * + * @param col column to check for lists. + * @return true if the column or any of it's children is a list, false otherwise. + */ +bool contains_list(column_view const& col); + /** * @brief Push down nulls from a parent mask into a child column, using bitwise AND. * diff --git a/cpp/tests/groupby/rank_scan_tests.cpp b/cpp/tests/groupby/rank_scan_tests.cpp index 37e75e2e906..d08bf011618 100644 --- a/cpp/tests/groupby/rank_scan_tests.cpp +++ b/cpp/tests/groupby/rank_scan_tests.cpp @@ -220,16 +220,15 @@ TYPED_TEST(typed_groupby_rank_scan_test, mixedStructs) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[0].results[1], expected_rank_vals); } -/* Nested struct support dependent on https://github.com/rapidsai/cudf/issues/8683 TYPED_TEST(typed_groupby_rank_scan_test, nestedStructs) { using T = TypeParam; - auto col1 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, - null_at(5)}; auto col2 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, - 9}, null_at(5)}; auto col3 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, - 9, 9}, null_at(5)}; auto col4 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, - 9, 9, 9}, null_at(5)}; auto strings1 = strings_column_wrapper{ + auto col1 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, null_at(5)}; + auto col2 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, null_at(5)}; + auto col3 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, null_at(5)}; + auto col4 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, null_at(5)}; + auto strings1 = strings_column_wrapper{ {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; auto strings2 = strings_column_wrapper{ {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; @@ -240,26 +239,80 @@ TYPED_TEST(typed_groupby_rank_scan_test, nestedStructs) strings_column_wrapper keys = {{"0", "0", "0", "0", "0", "0", "1", "1", "1", "1", "0", "1"}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0}}; - std::vector requests; - requests.emplace_back(groupby::aggregation_request()); - requests.emplace_back(groupby::aggregation_request()); + std::vector requests; + requests.emplace_back(groupby::scan_request()); + requests.emplace_back(groupby::scan_request()); requests[0].values = *nested_col; - requests[0].aggregations.push_back(make_dense_rank_aggregation()); - requests[0].aggregations.push_back(make_rank_aggregation()); + requests[0].aggregations.push_back(make_dense_rank_aggregation()); + requests[0].aggregations.push_back(make_rank_aggregation()); requests[1].values = *flattened_col; - requests[1].aggregations.push_back(make_dense_rank_aggregation()); - requests[1].aggregations.push_back(make_rank_aggregation()); + requests[1].aggregations.push_back(make_dense_rank_aggregation()); + requests[1].aggregations.push_back(make_rank_aggregation()); groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES); auto result = gb_obj.scan(requests); CUDF_TEST_EXPECT_TABLES_EQUAL(table_view({keys}), result.first->view()); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( - *result.second[0].results[0], *result.second[1].results[0]); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( - *result.second[0].results[2], *result.second[1].results[2]); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[0].results[0], *result.second[1].results[0]); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[0].results[1], *result.second[1].results[1]); +} + +TYPED_TEST(typed_groupby_rank_scan_test, structsWithNullPushdown) +{ + using T = TypeParam; + + auto col1 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, null_at(5)}; + auto col2 = fixed_width_column_wrapper{{0, 0, 7, 7, 7, 5, 4, 4, 4, 9, 9, 9}, null_at(5)}; + auto strings1 = strings_column_wrapper{ + {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; + auto strings2 = strings_column_wrapper{ + {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; + + std::vector> struct_columns; + struct_columns.push_back(col1.release()); + struct_columns.push_back(strings1.release()); + auto struct_col = + cudf::make_structs_column(12, std::move(struct_columns), 0, rmm::device_buffer{}); + auto const struct_nulls = + thrust::host_vector(std::vector{1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0}); + struct_col->set_null_mask( + cudf::test::detail::make_null_mask(struct_nulls.begin(), struct_nulls.end())); + + std::vector> null_struct_columns; + null_struct_columns.push_back(col2.release()); + null_struct_columns.push_back(strings2.release()); + auto null_col = + cudf::make_structs_column(12, std::move(null_struct_columns), 0, rmm::device_buffer{}); + null_col->set_null_mask(create_null_mask(12, cudf::mask_state::ALL_NULL)); + + strings_column_wrapper keys = {{"0", "0", "0", "0", "0", "0", "1", "1", "1", "1", "0", "1"}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0}}; + + std::vector requests; + requests.emplace_back(groupby::scan_request()); + requests.emplace_back(groupby::scan_request()); + requests[0].values = *struct_col; + requests[0].aggregations.push_back(make_dense_rank_aggregation()); + requests[0].aggregations.push_back(make_rank_aggregation()); + requests[1].values = *null_col; + requests[1].aggregations.push_back(make_dense_rank_aggregation()); + requests[1].aggregations.push_back(make_rank_aggregation()); + + groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES); + auto result = gb_obj.scan(requests); + + auto expected_dense_vals = + fixed_width_column_wrapper{1, 2, 2, 3, 4, 5, 1, 1, 2, 1, 1, 2}; + auto expected_rank_vals = + fixed_width_column_wrapper{1, 2, 2, 4, 5, 6, 1, 1, 3, 1, 1, 3}; + auto expected_null_result = + fixed_width_column_wrapper{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[0].results[0], expected_dense_vals); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[0].results[1], expected_rank_vals); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[1].results[0], expected_null_result); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result.second[1].results[1], expected_null_result); } -*/ /* List support dependent on https://github.com/rapidsai/cudf/issues/8683 template @@ -296,15 +349,15 @@ TYPED_TEST(list_groupby_rank_scan_test, lists) fixed_width_column_wrapper keys = {{0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0}}; - std::vector requests; + std::vector requests; requests.emplace_back(groupby::aggregation_request()); requests.emplace_back(groupby::aggregation_request()); requests[0].values = list_col; - requests[0].aggregations.push_back(make_dense_rank_aggregation()); - requests[0].aggregations.push_back(make_rank_aggregation()); + requests[0].aggregations.push_back(make_dense_rank_aggregation()); + requests[0].aggregations.push_back(make_rank_aggregation()); requests[1].values = struct_col; - requests[1].aggregations.push_back(make_dense_rank_aggregation()); - requests[1].aggregations.push_back(make_rank_aggregation()); + requests[1].aggregations.push_back(make_dense_rank_aggregation()); + requests[1].aggregations.push_back(make_rank_aggregation()); groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES); auto result = gb_obj.scan(requests); diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index 242f2ff94db..49424f037c4 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -511,7 +511,6 @@ TYPED_TEST(TypedRankScanTest, mixedStructs) *struct_col, expected_rank_vals, make_rank_aggregation(), null_policy::INCLUDE); } -/* Nested struct support dependent on https://github.com/rapidsai/cudf/issues/8683 TYPED_TEST(TypedRankScanTest, nestedStructs) { auto const v = [] { @@ -519,10 +518,12 @@ TYPED_TEST(TypedRankScanTest, nestedStructs) return make_vector({-1, -1, -4, -4, -4, 5, 7, 7, 7, 9, 9, 9}); return make_vector({0, 0, 4, 4, 4, 5, 7, 7, 7, 9, 9, 9}); }(); - auto const b = thrust::host_vector(std::vector{1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, - 1}); auto col1 = this->make_column(v, b); auto col2 = this->make_column(v, b); auto - col3 = this->make_column(v, b); auto col4 = this->make_column(v, b); auto strings1 = - strings_column_wrapper{ + auto const b = thrust::host_vector(std::vector{1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1}); + auto col1 = this->make_column(v, b); + auto col2 = this->make_column(v, b); + auto col3 = this->make_column(v, b); + auto col4 = this->make_column(v, b); + auto strings1 = strings_column_wrapper{ {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; auto strings2 = strings_column_wrapper{ {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; @@ -541,20 +542,62 @@ TYPED_TEST(TypedRankScanTest, nestedStructs) flat_columns.push_back(std::move(col4)); auto flat_col = structs_column_wrapper{std::move(flat_columns)}; - auto dense_out = scan( - nested_col, make_dense_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); - auto dense_expected = scan( - flat_col, make_dense_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); - auto rank_out = scan( - nested_col, make_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + auto dense_out = + scan(nested_col, make_dense_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + auto dense_expected = + scan(flat_col, make_dense_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + auto rank_out = + scan(nested_col, make_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); auto rank_expected = - scan(flat_col, make_rank_aggregation(), scan_type::INCLUSIVE, - null_policy::INCLUDE); + scan(flat_col, make_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(dense_out->view(), dense_expected->view()); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(rank_out->view(), rank_expected->view()); } -*/ + +TYPED_TEST(TypedRankScanTest, structsWithNullPushdown) +{ + auto const v = [] { + if (std::is_signed::value) + return make_vector({-1, -1, -4, -4, -4, 5, 7, 7, 7, 9, 9, 9}); + return make_vector({0, 0, 4, 4, 4, 5, 7, 7, 7, 9, 9, 9}); + }(); + auto const b = thrust::host_vector(std::vector{1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1}); + auto col = this->make_column(v, b); + auto strings = strings_column_wrapper{ + {"0a", "0a", "2a", "2a", "3b", "5", "6c", "6c", "6c", "9", "9", "10d"}, null_at(8)}; + std::vector> struct_columns; + struct_columns.push_back(std::move(col)); + struct_columns.push_back(strings.release()); + + auto struct_col = + cudf::make_structs_column(12, std::move(struct_columns), 0, rmm::device_buffer{}); + + struct_col->set_null_mask(create_null_mask(12, cudf::mask_state::ALL_NULL)); + auto expected_null_result = + fixed_width_column_wrapper{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + auto dense_null_out = + scan(*struct_col, make_dense_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + auto rank_null_out = + scan(*struct_col, make_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(dense_null_out->view(), expected_null_result); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(rank_null_out->view(), expected_null_result); + + auto const struct_nulls = + thrust::host_vector(std::vector{1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + struct_col->set_null_mask( + cudf::test::detail::make_null_mask(struct_nulls.begin(), struct_nulls.end())); + auto expected_dense_vals = + fixed_width_column_wrapper{1, 2, 2, 3, 4, 5, 6, 6, 7, 8, 8, 9}; + auto expected_rank_vals = + fixed_width_column_wrapper{1, 2, 2, 4, 5, 6, 7, 7, 9, 10, 10, 12}; + auto dense_out = + scan(*struct_col, make_dense_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + auto rank_out = + scan(*struct_col, make_rank_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(dense_out->view(), expected_dense_vals); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(rank_out->view(), expected_rank_vals); +} /* List support dependent on https://github.com/rapidsai/cudf/issues/8683 template