diff --git a/cpp/src/lists/copying/segmented_gather.cu b/cpp/src/lists/copying/segmented_gather.cu index 77d41c5ddc9..8cbcddc1c58 100644 --- a/cpp/src/lists/copying/segmented_gather.cu +++ b/cpp/src/lists/copying/segmented_gather.cu @@ -45,6 +45,7 @@ std::unique_ptr segmented_gather(lists_column_view const& value_column, auto const gather_index_begin = gather_map.offsets_begin() + 1; auto const gather_index_end = gather_map.offsets_end(); auto const value_offsets = value_column.offsets_begin(); + auto const value_device_view = column_device_view::create(value_column.parent(), stream); auto const map_begin = cudf::detail::indexalator_factory::make_input_iterator(gather_map_sliced_child); auto const out_of_bounds = [] __device__(auto const index, auto const list_size) { @@ -52,7 +53,8 @@ std::unique_ptr segmented_gather(lists_column_view const& value_column, }; // Calculate Flattened gather indices (value_offset[row]+sub_index - auto transformer = [value_offsets, + auto transformer = [values_lists_view = *value_device_view, + value_offsets, map_begin, gather_index_begin, gather_index_end, @@ -64,8 +66,9 @@ std::unique_ptr segmented_gather(lists_column_view const& value_column, thrust::seq, gather_index_begin, gather_index_end, gather_index_begin[-1] + index) - gather_index_begin; // Get each sub_index in list in each row of gather_map. - auto sub_index = map_begin[index]; - auto list_size = value_offsets[offset_idx + 1] - value_offsets[offset_idx]; + auto sub_index = map_begin[index]; + auto list_is_null = values_lists_view.is_null(offset_idx); + auto list_size = list_is_null ? 0 : (value_offsets[offset_idx + 1] - value_offsets[offset_idx]); auto wrapped_sub_index = sub_index < 0 ? sub_index + list_size : sub_index; auto constexpr null_idx = cuda::std::numeric_limits::max(); // Add sub_index to value_column offsets, to get gather indices of child of value_column diff --git a/cpp/tests/copying/segmented_gather_list_tests.cpp b/cpp/tests/copying/segmented_gather_list_tests.cpp index 514969dbeba..e3a003c51d1 100644 --- a/cpp/tests/copying/segmented_gather_list_tests.cpp +++ b/cpp/tests/copying/segmented_gather_list_tests.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -34,9 +35,6 @@ using FixedWidthTypesNotBool = cudf::test::Concat; TYPED_TEST_SUITE(SegmentedGatherTest, FixedWidthTypesNotBool); -class SegmentedGatherTestList : public cudf::test::BaseFixture { -}; - // to disambiguate between {} == 0 and {} == List{0} // Also, see note about compiler issues when declaring nested // empty lists in lists_column_wrapper documentation @@ -44,9 +42,7 @@ template using LCW = cudf::test::lists_column_wrapper; using cudf::lists_column_view; using cudf::lists::detail::segmented_gather; -using cudf::test::iterators::no_nulls; -using cudf::test::iterators::null_at; -using cudf::test::iterators::nulls_at; +using namespace cudf::test::iterators; auto constexpr NULLIFY = cudf::out_of_bounds_policy::NULLIFY; TYPED_TEST(SegmentedGatherTest, Gather) @@ -300,6 +296,25 @@ TYPED_TEST(SegmentedGatherTest, GatherNegatives) } } +TYPED_TEST(SegmentedGatherTest, GatherOnNonCompactedNullLists) +{ + using T = TypeParam; + auto constexpr X = -1; // Signifies null value. + + // List + auto list = LCW{{{1, 2, 3, 4}, {5}, {6, 7}, {8, 9, 0}, {}, {1, 2}, {3, 4, 5}}, no_nulls()}; + auto const input = list.release(); + + // Set non-empty list row at index 5 to null. + cudf::detail::set_null_mask(input->mutable_view().null_mask(), 5, 6, false); + + auto const gather_map = LCW{{-1, 2, 1, -4}, {0}, {-2, 1}, {0, 2, 1}, {}, {0}, {1, 2}}; + auto const expected = + LCW{{{4, 3, 2, 1}, {5}, {6, 7}, {8, 0, 9}, {}, {{X}, all_nulls()}, {4, 5}}, null_at(5)}; + auto const results = segmented_gather(lists_column_view{*input}, lists_column_view{gather_map}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); +} + TYPED_TEST(SegmentedGatherTest, GatherNestedNulls) { using T = TypeParam; diff --git a/cpp/tests/lists/extract_tests.cpp b/cpp/tests/lists/extract_tests.cpp index 19bdf871f0c..d6ee62a7731 100644 --- a/cpp/tests/lists/extract_tests.cpp +++ b/cpp/tests/lists/extract_tests.cpp @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -213,6 +214,34 @@ TYPED_TEST(ListsExtractNumericsTest, ExtractElementNestedLists) } } +TYPED_TEST(ListsExtractNumericsTest, ExtractElementsFromNonCompactedNullLists) +{ + using namespace cudf::test::iterators; + using indices = cudf::test::fixed_width_column_wrapper; + using lcw = cudf::test::lists_column_wrapper; + using result_column = cudf::test::fixed_width_column_wrapper; + auto constexpr X = -1; // Value indicating null. + + auto input = + lcw{{{1, 2, 3}, {4, 5, 6}, {}, {7, 8, 9}, {0, 1, 2}, {}, {3, 4, 5}}, nulls_at({2, 5})} + .release(); + + // Set null at index 4. + cudf::detail::set_null_mask(input->mutable_view().null_mask(), 4, 5, false); + + { + auto result = cudf::lists::extract_list_element(cudf::lists_column_view{*input}, 0); + auto expected = result_column{{1, 4, X, 7, X, X, 3}, nulls_at({2, 4, 5})}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + { + auto index = indices{0, 1, 2, 0, 1, 2, 0}; + auto result = cudf::lists::extract_list_element(cudf::lists_column_view{*input}, index); + auto expected = result_column{{1, 5, X, 7, X, X, 3}, nulls_at({2, 4, 5})}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } +} + TEST_F(ListsExtractTest, ExtractElementEmpty) { using LCW = cudf::test::lists_column_wrapper;