From 45763fa1f90c174ff9123b4839c729e6c0dee700 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Sun, 16 Jul 2023 18:01:41 -0700 Subject: [PATCH] Fix a corner case of list lexicographic comparator (#13701) This fixes a bug showing up in spark-rapids (https://github.com/NVIDIA/spark-rapids/issues/8702) when the lexicographic comparator encounters a corner case with an input table having: * At least two structs-of-lists columns followed by at least one more column, and * All elements of the first structs-of-lists column are all null. This bug is due to a short circuit in the lexicographic comparator that forgets to keep track of the number of processed lists columns during early exit. Authors: - Nghia Truong (https://github.com/ttnghia) Approvers: - Bradley Dice (https://github.com/bdice) - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/cudf/pull/13701 --- .../cudf/table/experimental/row_operators.cuh | 19 +++++++++--------- cpp/tests/sort/sort_nested_types_tests.cpp | 20 +++++++++++++++++++ 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 5fe9dcbdf1b..6b024d902a9 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -542,8 +542,10 @@ class device_row_comparator { size_type const rhs_index) const noexcept { int last_null_depth = std::numeric_limits::max(); - size_type list_column_index{0}; + size_type list_column_index{-1}; for (size_type i = 0; i < _lhs.num_columns(); ++i) { + if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; } + int const depth = _depth.has_value() ? (*_depth)[i] : 0; if (depth > last_null_depth) { continue; } @@ -556,15 +558,12 @@ class device_row_comparator { // TODO: At what point do we verify that the columns of lhs and rhs are // all of the same types? I assume that it's already happened before // here, otherwise the current code would be failing. - auto [l_dremel_i, r_dremel_i] = [&]() { - if (_lhs.column(i).type().id() == type_id::LIST) { - auto idx = list_column_index++; - return std::make_tuple(optional_dremel_view(_l_dremel[idx]), - optional_dremel_view(_r_dremel[idx])); - } else { - return std::make_tuple(optional_dremel_view{}, optional_dremel_view{}); - } - }(); + auto const [l_dremel_i, r_dremel_i] = + _lhs.column(i).type().id() == type_id::LIST + ? std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]), + optional_dremel_view(_r_dremel[list_column_index])) + : std::make_tuple(optional_dremel_view{}, optional_dremel_view{}); + auto element_comp = element_comparator{_check_nulls, _lhs.column(i), _rhs.column(i), diff --git a/cpp/tests/sort/sort_nested_types_tests.cpp b/cpp/tests/sort/sort_nested_types_tests.cpp index ff6256c2408..8ab23936ceb 100644 --- a/cpp/tests/sort/sort_nested_types_tests.cpp +++ b/cpp/tests/sort/sort_nested_types_tests.cpp @@ -441,3 +441,23 @@ TEST_F(NestedListTest, ListsOfListsOfStructsNoNulls) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_order, order->view()); } } + +TEST_F(NestedListTest, MultipleListsColumnsWithNulls) +{ + // A STRUCT> column with all nulls. + auto const col0 = [] { + auto child = int32s_lists{{int32s_lists{}, int32s_lists{}}, all_nulls()}; + return structs_col{{child}, all_nulls()}; + }(); + + auto const col1 = [] { + auto child = int32s_lists{{0, 1, 2}, {10, 11, 12}}; + return structs_col{{child}}; + }(); + + auto const col2 = int32s_col{1, 0}; + + auto const expected_order = int32s_col{0, 1}; + auto const order = cudf::sorted_order(cudf::table_view{{col0, col1, col2}}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_order, order->view()); +}