Skip to content

Commit

Permalink
Fix a corner case of list lexicographic comparator (#13701)
Browse files Browse the repository at this point in the history
This fixes a bug showing up in spark-rapids (NVIDIA/spark-rapids#8702) when the lexicographic comparator encounters a corner case with an input table having:
 * At least two structs-of-lists columns followed by at least one more column, and
 * All elements of the first structs-of-lists column are all null.

This bug is due to a short circuit in the lexicographic comparator that forgets to keep track of the number of processed lists columns during early exit.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Divye Gala (https://github.com/divyegala)

URL: #13701
  • Loading branch information
ttnghia authored Jul 17, 2023
1 parent 0784543 commit 45763fa
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 10 deletions.
19 changes: 9 additions & 10 deletions cpp/include/cudf/table/experimental/row_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -542,8 +542,10 @@ class device_row_comparator {
size_type const rhs_index) const noexcept
{
int last_null_depth = std::numeric_limits<int>::max();
size_type list_column_index{0};
size_type list_column_index{-1};
for (size_type i = 0; i < _lhs.num_columns(); ++i) {
if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; }

int const depth = _depth.has_value() ? (*_depth)[i] : 0;
if (depth > last_null_depth) { continue; }

Expand All @@ -556,15 +558,12 @@ class device_row_comparator {
// TODO: At what point do we verify that the columns of lhs and rhs are
// all of the same types? I assume that it's already happened before
// here, otherwise the current code would be failing.
auto [l_dremel_i, r_dremel_i] = [&]() {
if (_lhs.column(i).type().id() == type_id::LIST) {
auto idx = list_column_index++;
return std::make_tuple(optional_dremel_view(_l_dremel[idx]),
optional_dremel_view(_r_dremel[idx]));
} else {
return std::make_tuple(optional_dremel_view{}, optional_dremel_view{});
}
}();
auto const [l_dremel_i, r_dremel_i] =
_lhs.column(i).type().id() == type_id::LIST
? std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]),
optional_dremel_view(_r_dremel[list_column_index]))
: std::make_tuple(optional_dremel_view{}, optional_dremel_view{});

auto element_comp = element_comparator{_check_nulls,
_lhs.column(i),
_rhs.column(i),
Expand Down
20 changes: 20 additions & 0 deletions cpp/tests/sort/sort_nested_types_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -441,3 +441,23 @@ TEST_F(NestedListTest, ListsOfListsOfStructsNoNulls)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_order, order->view());
}
}

TEST_F(NestedListTest, MultipleListsColumnsWithNulls)
{
// A STRUCT<LIST<INT>> column with all nulls.
auto const col0 = [] {
auto child = int32s_lists{{int32s_lists{}, int32s_lists{}}, all_nulls()};
return structs_col{{child}, all_nulls()};
}();

auto const col1 = [] {
auto child = int32s_lists{{0, 1, 2}, {10, 11, 12}};
return structs_col{{child}};
}();

auto const col2 = int32s_col{1, 0};

auto const expected_order = int32s_col{0, 1};
auto const order = cudf::sorted_order(cudf::table_view{{col0, col1, col2}});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_order, order->view());
}

0 comments on commit 45763fa

Please sign in to comment.