diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index 869218f9643..b15708c5cf8 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -29,32 +29,111 @@ namespace cudf { namespace detail { namespace { -struct interleave_columns_functor { - template - std::enable_if_t() and not std::is_same_v and - not std::is_same_v, - std::unique_ptr> - operator()(Args&&...) +// Error case when no other overload or specialization is available +template +struct interleave_columns_impl { + template + std::unique_ptr operator()(Args&&...) { - CUDF_FAIL("Called `interleave_columns` on none-supported data type."); + CUDF_FAIL("Unsupported type in `interleave_columns`."); } +}; +struct interleave_columns_functor { template - std::enable_if_t, std::unique_ptr> operator()( - table_view const& lists_columns, - bool create_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + std::unique_ptr operator()(table_view const& input, + bool create_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + return interleave_columns_impl{}(input, create_mask, stream, mr); + } +}; + +template +struct interleave_columns_impl>> { + std::unique_ptr operator()(table_view const& lists_columns, + bool create_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { return lists::detail::interleave_columns(lists_columns, create_mask, stream, mr); } +}; - template - std::enable_if_t, std::unique_ptr> operator()( - table_view const& strings_columns, - bool create_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +template +struct interleave_columns_impl>> { + std::unique_ptr operator()(table_view const& structs_columns, + bool create_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + // We can safely call `column(0)` as the number of columns is known to be non zero. + auto const num_children = structs_columns.column(0).num_children(); + CUDF_EXPECTS( + std::all_of(structs_columns.begin(), + structs_columns.end(), + [num_children](auto const& col) { return col.num_children() == num_children; }), + "Number of children of the input structs columns must be the same"); + + auto const num_columns = structs_columns.num_columns(); + auto const num_rows = structs_columns.num_rows(); + auto const output_size = num_columns * num_rows; + + // Interleave the children of the structs columns. + std::vector> output_struct_members; + for (size_type child_idx = 0; child_idx < num_children; ++child_idx) { + // Collect children columns from the input structs columns at index `child_idx`. + auto const child_iter = + thrust::make_transform_iterator(structs_columns.begin(), [child_idx](auto const& col) { + return structs_column_view(col).get_sliced_child(child_idx); + }); + auto children = std::vector(child_iter, child_iter + num_columns); + + auto const child_type = children.front().type(); + CUDF_EXPECTS( + std::all_of(children.cbegin(), + children.cend(), + [child_type](auto const& col) { return child_type == col.type(); }), + "Children of the input structs columns at the same child index must have the same type"); + + auto const children_nullable = std::any_of( + children.cbegin(), children.cend(), [](auto const& col) { return col.nullable(); }); + output_struct_members.emplace_back( + type_dispatcher(child_type, + interleave_columns_functor{}, + table_view{std::move(children)}, + children_nullable, + stream, + mr)); + } + + auto const create_mask_fn = [&] { + auto const input_dv_ptr = table_device_view::create(structs_columns); + auto const validity_fn = [input_dv = *input_dv_ptr, num_columns] __device__(auto const idx) { + return input_dv.column(idx % num_columns).is_valid(idx / num_columns); + }; + return cudf::detail::valid_if(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(output_size), + validity_fn, + stream, + mr); + }; + + // Only create null mask if at least one input structs column is nullable. + auto [null_mask, null_count] = + create_mask ? create_mask_fn() : std::pair{rmm::device_buffer{0, stream, mr}, size_type{0}}; + return make_structs_column( + output_size, std::move(output_struct_members), null_count, std::move(null_mask), stream, mr); + } +}; + +template +struct interleave_columns_impl>> { + std::unique_ptr operator()(table_view const& strings_columns, + bool create_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto num_columns = strings_columns.num_columns(); if (num_columns == 1) // Single strings column returns a copy @@ -106,7 +185,7 @@ struct interleave_columns_functor { cudf::detail::get_value(offsets_column->view(), num_strings, stream); auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr); // Fill the chars column - auto d_results_chars = chars_column->mutable_view().data(); + auto d_results_chars = chars_column->mutable_view().template data(); thrust::for_each_n( rmm::exec_policy(stream), thrust::make_counting_iterator(0), @@ -132,13 +211,14 @@ struct interleave_columns_functor { stream, mr); } +}; - template - std::enable_if_t(), std::unique_ptr> operator()( - table_view const& input, - bool create_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +template +struct interleave_columns_impl()>> { + std::unique_ptr operator()(table_view const& input, + bool create_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto arch_column = input.column(0); auto output_size = input.num_columns() * input.num_rows(); @@ -193,11 +273,10 @@ std::unique_ptr interleave_columns(table_view const& input, CUDF_EXPECTS(input.num_columns() > 0, "input must have at least one column to determine dtype."); auto const dtype = input.column(0).type(); - CUDF_EXPECTS(std::all_of(std::cbegin(input), std::cend(input), [dtype](auto const& col) { return dtype == col.type(); }), - "DTYPE mismatch"); + "Input columns must have the same type"); auto const output_needs_mask = std::any_of( std::cbegin(input), std::cend(input), [](auto const& col) { return col.nullable(); }); diff --git a/cpp/tests/reshape/interleave_columns_tests.cpp b/cpp/tests/reshape/interleave_columns_tests.cpp index 386fd9d08ee..e51f0740787 100644 --- a/cpp/tests/reshape/interleave_columns_tests.cpp +++ b/cpp/tests/reshape/interleave_columns_tests.cpp @@ -24,7 +24,7 @@ using namespace cudf::test::iterators; -constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; +constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::FIRST_ERROR}; template struct InterleaveColumnsTest : public cudf::test::BaseFixture { @@ -378,7 +378,7 @@ using IntListsCol = cudf::test::lists_column_wrapper; using IntCol = cudf::test::fixed_width_column_wrapper; using TView = cudf::table_view; -constexpr int32_t null{0}; +constexpr int32_t null{0}; // mark for null elements } // namespace struct ListsColumnsInterleaveTest : public cudf::test::BaseFixture { @@ -731,4 +731,341 @@ TEST_F(ListsColumnsInterleaveTest, SlicedStringsColumnsInputWithNulls) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, verbosity); } +namespace { +using StructsCol = cudf::test::structs_column_wrapper; +using StringsCol = cudf::test::strings_column_wrapper; +} // namespace + +struct StructsColumnsInterleaveTest : public cudf::test::BaseFixture { +}; + +TEST_F(StructsColumnsInterleaveTest, InvalidInput) +{ + // Input table contains non-structs column + { + auto const col1 = IntCol{}; + auto const col2 = StructsCol{}; + EXPECT_THROW(cudf::interleave_columns(TView{{col1, col2}}), cudf::logic_error); + } + + // Types mismatch + { + auto const structs1 = [] { + auto child1 = IntCol{1, 2, 3}; + auto child2 = IntCol{4, 5, 6}; + return StructsCol{{child1, child2}}; + }(); + + auto const structs2 = [] { + auto child1 = IntCol{7, 8, 9}; + auto child2 = StringsCol{"", "abc", "123"}; + return StructsCol{{child1, child2}}; + }(); + + EXPECT_THROW(cudf::interleave_columns(TView{{structs1, structs2}}), cudf::logic_error); + } + + // Numbers of children mismatch + { + auto const structs1 = [] { + auto child1 = IntCol{1, 2, 3}; + auto child2 = IntCol{4, 5, 6}; + return StructsCol{{child1, child2}}; + }(); + + auto const structs2 = [] { + auto child1 = IntCol{7, 8, 9}; + auto child2 = IntCol{10, 11, 12}; + auto child3 = IntCol{13, 14, 15}; + return StructsCol{{child1, child2, child3}}; + }(); + + EXPECT_THROW(cudf::interleave_columns(TView{{structs1, structs2}}), cudf::logic_error); + } +} + +TEST_F(StructsColumnsInterleaveTest, InterleaveEmptyColumns) +{ + auto const structs = StructsCol{}; + auto const results = cudf::interleave_columns(TView{{structs, structs}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs, *results, verbosity); +} + +template +struct StructsColumnsInterleaveTypedTest : public cudf::test::BaseFixture { +}; + +using TypesForTest = cudf::test::Concat; +TYPED_TEST_SUITE(StructsColumnsInterleaveTypedTest, TypesForTest); + +TYPED_TEST(StructsColumnsInterleaveTypedTest, InterleaveOneColumnNotNull) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + + auto const structs = [] { + auto child1 = ColWrapper{1, 2, 3}; + auto child2 = ColWrapper{4, 5, 6}; + auto child3 = StringsCol{"Banana", "Mango", "Apple"}; + return StructsCol{{child1, child2, child3}}; + }(); + auto const results = cudf::interleave_columns(TView{{structs}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs, *results, verbosity); +} + +TYPED_TEST(StructsColumnsInterleaveTypedTest, InterleaveOneColumnWithNulls) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + + auto const structs = [] { + auto child1 = ColWrapper{{1, 2, null, 3}, null_at(2)}; + auto child2 = ColWrapper{{4, null, 5, 6}, null_at(1)}; + auto child3 = StringsCol{{"" /*NULL*/, "Banana", "Mango", "Apple"}, null_at(0)}; + return StructsCol{{child1, child2, child3}, null_at(3)}; + }(); + auto const results = cudf::interleave_columns(TView{{structs}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs, *results, verbosity); +} + +TYPED_TEST(StructsColumnsInterleaveTypedTest, SimpleInputNoNull) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + + auto const structs1 = [] { + auto child1 = ColWrapper{1, 2, 3}; + auto child2 = ColWrapper{4, 5, 6}; + auto child3 = StringsCol{"Banana", "Mango", "Apple"}; + return StructsCol{{child1, child2, child3}}; + }(); + + auto const structs2 = [] { + auto child1 = ColWrapper{7, 8, 9}; + auto child2 = ColWrapper{10, 11, 12}; + auto child3 = StringsCol{"Bear", "Duck", "Cat"}; + return StructsCol{{child1, child2, child3}}; + }(); + + auto const expected = [] { + auto child1 = ColWrapper{1, 7, 2, 8, 3, 9}; + auto child2 = ColWrapper{4, 10, 5, 11, 6, 12}; + auto child3 = StringsCol{"Banana", "Bear", "Mango", "Duck", "Apple", "Cat"}; + return StructsCol{{child1, child2, child3}}; + }(); + + auto const results = cudf::interleave_columns(TView{{structs1, structs2}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results, verbosity); +} + +TYPED_TEST(StructsColumnsInterleaveTypedTest, SimpleInputWithNulls) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + + auto const structs1 = [] { + auto child1 = ColWrapper{{1, 2, null, 3, 4}, null_at(2)}; + auto child2 = ColWrapper{{4, null, 5, 6, 7}, null_at(1)}; + auto child3 = StringsCol{{"" /*NULL*/, "Banana", "Mango", "Apple", "Cherry"}, null_at(0)}; + return StructsCol{{child1, child2, child3}, null_at(0)}; + }(); + + auto const structs2 = [] { + auto child1 = ColWrapper{{7, null, null, 8, 9}, nulls_at({1, 2})}; + auto child2 = ColWrapper{{10, 11, 12, null, 14}, null_at(3)}; + auto child3 = StringsCol{"Bear", "Duck", "Cat", "Dog", "Panda"}; + return StructsCol{{child1, child2, child3}, null_at(4)}; + }(); + + auto const structs3 = [] { + auto child1 = ColWrapper{{-1, -2, -3, 0, null}, null_at(4)}; + auto child2 = ColWrapper{{-5, 0, null, -1, -10}, null_at(2)}; + auto child3 = StringsCol{"111", "Bànànà", "abcxyz", "é á í", "zzz"}; + return StructsCol{{child1, child2, child3}, null_at(1)}; + }(); + + auto const expected = [] { + auto child1 = ColWrapper{{1, 7, -1, 2, null, -2, null, null, -3, 3, 8, 0, 4, 9, null}, + nulls_at({4, 6, 7, 14})}; + auto child2 = ColWrapper{{4, 10, -5, null, 11, 0, 5, 12, null, 6, null, -1, 7, 14, -10}, + nulls_at({3, 8, 10})}; + auto child3 = StringsCol{{"" /*NULL*/, + "Bear", + "111", + "Banana", + "Duck", + "Bànànà", + "Mango", + "Cat", + "abcxyz", + "Apple", + "Dog", + "é á í", + "Cherry", + "Panda", + "zzz"}, + null_at(0)}; + return StructsCol{{child1, child2, child3}, nulls_at({0, 5, 13})}; + }(); + + auto const results = cudf::interleave_columns(TView{{structs1, structs2, structs3}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results, verbosity); +} + +TYPED_TEST(StructsColumnsInterleaveTypedTest, NestedInputStructsColumns) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + + auto const structs1 = [] { + auto child_structs1 = [] { + auto child1 = ColWrapper{{null, 2, 3, 4, 5}, null_at(0)}; + auto child2 = ColWrapper{{6, 7, 8, null, 10}, null_at(3)}; + return StructsCol{{child1, child2}, null_at(0)}; + }(); + + auto child_structs2 = [] { + auto child1 = ColWrapper{{11, null, 13, 14, 15}, null_at(1)}; + auto child2 = ColWrapper{{null, 17, 18, 19, 20}, null_at(0)}; + return StructsCol{{child1, child2}, nulls_at({0, 1})}; + }(); + + auto child_strings = [] { return StringsCol{"Banana", "Mango", "Apple", "Cherry", "Kiwi"}; }(); + + return StructsCol{{child_structs1, child_structs2, child_strings}, null_at(0)}; + }(); + + auto const structs2 = [] { + auto child_structs1 = [] { + auto child1 = ColWrapper{{-1, null, -3, -4, -5}, null_at(1)}; + auto child2 = ColWrapper{{-6, -7, -8, null, -10}, null_at(3)}; + return StructsCol{{child1, child2}}; + }(); + + auto child_structs2 = [] { + auto child1 = ColWrapper{{-11, -12, null, -14, -15}, null_at(2)}; + auto child2 = ColWrapper{{-16, -17, -18, -19, null}, null_at(4)}; + return StructsCol{{child1, child2}, null_at(2)}; + }(); + + auto child_strings = [] { return StringsCol{"Bear", "Duck", "Cat", "Dog", "Rabbit"}; }(); + + return StructsCol{{child_structs1, child_structs2, child_strings}, null_at(2)}; + }(); + + auto const expected = [] { + auto child_structs1 = [] { + auto child1 = ColWrapper{{null, -1, 2, null, 3, -3, 4, -4, 5, -5}, nulls_at({0, 3})}; + auto child2 = ColWrapper{{6, -6, 7, -7, 8, -8, null, null, 10, -10}, nulls_at({6, 7})}; + return StructsCol{{child1, child2}, null_at(0)}; + }(); + + auto child_structs2 = [] { + auto child1 = ColWrapper{{11, -11, null, -12, 13, null, 14, -14, 15, -15}, nulls_at({2, 5})}; + auto child2 = ColWrapper{{null, -16, 17, -17, 18, -18, 19, -19, 20, null}, nulls_at({0, 9})}; + return StructsCol{{child1, child2}, nulls_at({0, 2, 5})}; + }(); + + auto child_strings = [] { + return StringsCol{ + "Banana", "Bear", "Mango", "Duck", "Apple", "Cat", "Cherry", "Dog", "Kiwi", "Rabbit"}; + }(); + + return StructsCol{{child_structs1, child_structs2, child_strings}, nulls_at({0, 5})}; + }(); + + auto const results = cudf::interleave_columns(TView{{structs1, structs2}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results, verbosity); +} + +TYPED_TEST(StructsColumnsInterleaveTypedTest, SlicedColumnsInputNoNull) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + constexpr int32_t NOT_USE{-1}; // mark for elements that we don't care + + auto const structs1_original = [] { + auto child1 = ColWrapper{NOT_USE, NOT_USE, 1, 2, 3, NOT_USE}; + auto child2 = ColWrapper{NOT_USE, NOT_USE, 4, 5, 6, NOT_USE}; + auto child3 = StringsCol{"NOT_USE", "NOT_USE", "Banana", "Mango", "Apple", "NOT_USE"}; + return StructsCol{{child1, child2, child3}}; + }(); + + // structs2 has more rows than structs1 + auto const structs2_original = [] { + auto child1 = ColWrapper{NOT_USE, 7, 8, 9, NOT_USE, NOT_USE, NOT_USE}; + auto child2 = ColWrapper{NOT_USE, 10, 11, 12, NOT_USE, NOT_USE, NOT_USE}; + auto child3 = StringsCol{"NOT_USE", "Bear", "Duck", "Cat", "NOT_USE", "NOT_USE", "NOT_USE"}; + return StructsCol{{child1, child2, child3}}; + }(); + + auto const expected = [] { + auto child1 = ColWrapper{1, 7, 2, 8, 3, 9}; + auto child2 = ColWrapper{4, 10, 5, 11, 6, 12}; + auto child3 = StringsCol{"Banana", "Bear", "Mango", "Duck", "Apple", "Cat"}; + return StructsCol{{child1, child2, child3}}; + }(); + + auto const structs1 = cudf::slice(structs1_original, {2, 5})[0]; + auto const structs2 = cudf::slice(structs2_original, {1, 4})[0]; + auto const results = cudf::interleave_columns(TView{{structs1, structs2}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results, verbosity); +} + +TYPED_TEST(StructsColumnsInterleaveTypedTest, SlicedColumnsInputWithNulls) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + constexpr int32_t NOT_USE{-1}; // mark for elements that we don't care + + auto const structs1_original = [] { + auto child1 = ColWrapper{{NOT_USE, NOT_USE, 1, 2, null, 3, 4, NOT_USE}, null_at(4)}; + auto child2 = ColWrapper{{NOT_USE, NOT_USE, 4, null, 5, 6, 7, NOT_USE}, null_at(3)}; + auto child3 = StringsCol{ + {"NOT_USE", "NOT_USE", "" /*NULL*/, "Banana", "Mango", "Apple", "Cherry", "NOT_USE"}, + null_at(2)}; + return StructsCol{{child1, child2, child3}, null_at(2)}; + }(); + + auto const structs2_original = [] { + auto child1 = ColWrapper{{7, null, null, 8, 9, NOT_USE, NOT_USE}, nulls_at({1, 2})}; + auto child2 = ColWrapper{{10, 11, 12, null, 14, NOT_USE, NOT_USE}, null_at(3)}; + auto child3 = StringsCol{"Bear", "Duck", "Cat", "Dog", "Panda", "NOT_USE", "NOT_USE"}; + return StructsCol{{child1, child2, child3}, null_at(4)}; + }(); + + auto const structs3_original = [] { + auto child1 = ColWrapper{{NOT_USE, NOT_USE, NOT_USE, -1, -2, -3, 0, null}, null_at(7)}; + auto child2 = ColWrapper{{NOT_USE, NOT_USE, NOT_USE, -5, 0, null, -1, -10}, null_at(5)}; + auto child3 = + StringsCol{"NOT_USE", "NOT_USE", "NOT_USE", "111", "Bànànà", "abcxyz", "é á í", "zzz"}; + return StructsCol{{child1, child2, child3}, null_at(4)}; + }(); + + auto const expected = [] { + auto child1 = ColWrapper{{1, 7, -1, 2, null, -2, null, null, -3, 3, 8, 0, 4, 9, null}, + nulls_at({4, 6, 7, 14})}; + auto child2 = ColWrapper{{4, 10, -5, null, 11, 0, 5, 12, null, 6, null, -1, 7, 14, -10}, + nulls_at({3, 8, 10})}; + auto child3 = StringsCol{{"" /*NULL*/, + "Bear", + "111", + "Banana", + "Duck", + "Bànànà", + "Mango", + "Cat", + "abcxyz", + "Apple", + "Dog", + "é á í", + "Cherry", + "Panda", + "zzz"}, + null_at(0)}; + return StructsCol{{child1, child2, child3}, nulls_at({0, 5, 13})}; + }(); + + auto const structs1 = cudf::slice(structs1_original, {2, 7})[0]; + auto const structs2 = cudf::slice(structs2_original, {0, 5})[0]; + auto const structs3 = cudf::slice(structs3_original, {3, 8})[0]; + auto const results = cudf::interleave_columns(TView{{structs1, structs2, structs3}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results, verbosity); +} + CUDF_TEST_PROGRAM_MAIN()