From ce441137fe5e9dfa89768d03f60a4f4f8967c326 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 10 Aug 2021 10:23:17 -0600 Subject: [PATCH 1/9] Implement interleave_columns for structs --- cpp/src/reshape/interleave_columns.cu | 74 ++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index 869218f9643..bc894edb333 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -32,7 +32,8 @@ namespace { struct interleave_columns_functor { template std::enable_if_t() and not std::is_same_v and - not std::is_same_v, + not std::is_same_v and + not std::is_same_v, std::unique_ptr> operator()(Args&&...) { @@ -49,6 +50,77 @@ struct interleave_columns_functor { return lists::detail::interleave_columns(lists_columns, create_mask, stream, mr); } + template + std::enable_if_t, std::unique_ptr> operator()( + table_view const& structs_columns, + bool create_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + // We can safely call `column(0)` as the number of columns is known to be non zero. + auto const num_children = structs_columns.column(0).num_children(); + CUDF_EXPECTS( + std::all_of(structs_columns.begin(), + structs_columns.end(), + [num_children](auto const& col) { return col.num_children() == num_children; }), + "Number of children of the input structs columns must be the same"); + + auto const num_columns = structs_columns.num_columns(); + auto const num_rows = structs_columns.num_rows(); + auto const output_size = num_columns * num_rows; + + // Interleave the children of the structs columns. + std::vector> output_struct_members; + for (size_type child_idx = 0; child_idx < num_children; ++child_idx) { + // Collect children columns from the input structs columns at index `child_idx`. + std::vector children; + for (auto const& col : structs_columns) { + children.push_back(structs_column_view(col).get_sliced_child(child_idx)); + } + + auto const child_type = children.front().type(); + CUDF_EXPECTS( + std::all_of(children.cbegin(), + children.cend(), + [child_type](auto const& col) { return child_type == col.type(); }), + "Children of the input structs columns at the same child index must have the same type"); + + auto const children_nullable = std::any_of( + children.cbegin(), children.cend(), [](auto const& col) { return col.nullable(); }); + output_struct_members.emplace_back( + type_dispatcher(child_type, + interleave_columns_functor{}, + table_view{std::move(children)}, + children_nullable, + stream, + mr)); + } + + auto output = make_structs_column(output_size, + std::move(output_struct_members), + 0, + rmm::device_buffer{0, stream, mr}, + stream, + mr); + + // Only create null mask if at least one input structs columstructs1n is nullable. + if (create_mask) { + auto const input_dv_ptr = table_device_view::create(structs_columns); + auto const validity_fn = [input_dv = *input_dv_ptr, num_columns] __device__(auto const idx) { + return input_dv.column(idx % num_columns).is_valid(idx / num_columns); + }; + auto [null_mask, null_count] = + cudf::detail::valid_if(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(output_size), + validity_fn, + stream, + mr); + output->set_null_mask(std::move(null_mask), null_count); + } + + return output; + } + template std::enable_if_t, std::unique_ptr> operator()( table_view const& strings_columns, From fdab8d6de200a38ed76d2a02ba2e7ba578dbe334 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 10 Aug 2021 15:47:43 -0600 Subject: [PATCH 2/9] Implement unit tests --- .../reshape/interleave_columns_tests.cpp | 282 +++++++++++++++++- 1 file changed, 281 insertions(+), 1 deletion(-) diff --git a/cpp/tests/reshape/interleave_columns_tests.cpp b/cpp/tests/reshape/interleave_columns_tests.cpp index 386fd9d08ee..b2bdf211c12 100644 --- a/cpp/tests/reshape/interleave_columns_tests.cpp +++ b/cpp/tests/reshape/interleave_columns_tests.cpp @@ -376,9 +376,11 @@ namespace { using StrListsCol = cudf::test::lists_column_wrapper; using IntListsCol = cudf::test::lists_column_wrapper; using IntCol = cudf::test::fixed_width_column_wrapper; +using StructsCol = cudf::test::structs_column_wrapper; +using StringsCol = cudf::test::strings_column_wrapper; using TView = cudf::table_view; -constexpr int32_t null{0}; +constexpr int32_t null{0}; // mark for null elements } // namespace struct ListsColumnsInterleaveTest : public cudf::test::BaseFixture { @@ -731,4 +733,282 @@ TEST_F(ListsColumnsInterleaveTest, SlicedStringsColumnsInputWithNulls) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, verbosity); } +namespace { +using StrListsCol = cudf::test::lists_column_wrapper; +using IntListsCol = cudf::test::lists_column_wrapper; +using IntCol = cudf::test::fixed_width_column_wrapper; +using StructsCol = cudf::test::structs_column_wrapper; +using StringsCol = cudf::test::strings_column_wrapper; +using TView = cudf::table_view; + +constexpr int32_t null{0}; // mark for null elements +} // namespace + +struct StructsColumnsInterleaveTest : public cudf::test::BaseFixture { +}; + +TEST_F(StructsColumnsInterleaveTest, InvalidInput) +{ + // Input table contains non-structs column + { + auto const col1 = IntCol{}; + auto const col2 = StructsCol{}; + EXPECT_THROW(cudf::interleave_columns(TView{{col1, col2}}), cudf::logic_error); + } + + // Types mismatch + { + auto const structs1 = [] { + auto child1 = IntCol{1, 2, 3}; + auto child2 = IntCol{4, 5, 6}; + return StructsCol{{child1, child2}}; + }(); + + auto const structs2 = [] { + auto child3 = IntCol{7, 8, 9}; + auto child4 = StringsCol{"", "abc", "123"}; + return StructsCol{{child3, child4}}; + }(); + + EXPECT_THROW(cudf::interleave_columns(TView{{structs1, structs2}}), cudf::logic_error); + } + + // Numbers of children mismatch + { + auto const structs1 = [] { + auto child1 = IntCol{1, 2, 3}; + auto child2 = IntCol{4, 5, 6}; + return StructsCol{{child1, child2}}; + }(); + + auto const structs2 = [] { + auto child3 = IntCol{7, 8, 9}; + auto child4 = IntCol{10, 11, 12}; + auto child5 = IntCol{13, 14, 15}; + return StructsCol{{child3, child4, child5}}; + }(); + + EXPECT_THROW(cudf::interleave_columns(TView{{structs1, structs2}}), cudf::logic_error); + } +} + +TEST_F(StructsColumnsInterleaveTest, InterleaveEmptyColumns) +{ + auto const structs = StructsCol{}; + auto const results = cudf::interleave_columns(TView{{structs, structs}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs, *results, verbosity); +} + +template +struct StructsColumnsInterleaveTypedTest : public cudf::test::BaseFixture { +}; + +using TypesForTest = cudf::test::Concat; +TYPED_TEST_SUITE(StructsColumnsInterleaveTypedTest, TypesForTest); + +TYPED_TEST(StructsColumnsInterleaveTypedTest, InterleaveOneColumnNotNull) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + + auto const structs = [] { + auto child1 = ColWrapper{1, 2, 3}; + auto child2 = ColWrapper{4, 5, 6}; + auto child3 = StringsCol{"Banana", "Mango", "Apple"}; + return StructsCol{{child1, child2, child3}}; + }(); + auto const results = cudf::interleave_columns(TView{{structs}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs, *results, verbosity); +} + +TYPED_TEST(StructsColumnsInterleaveTypedTest, InterleaveOneColumnWithNulls) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + + auto const structs = [] { + auto child1 = ColWrapper{{1, 2, null, 3}, null_at(2)}; + auto child2 = ColWrapper{{4, null, 5, 6}, null_at(1)}; + auto child3 = StringsCol{{"" /*NULL*/, "Banana", "Mango", "Apple"}, null_at(0)}; + return StructsCol{{child1, child2, child3}, null_at(3)}; + }(); + auto const results = cudf::interleave_columns(TView{{structs}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs, *results, verbosity); +} + +TYPED_TEST(StructsColumnsInterleaveTypedTest, SimpleInputNoNull) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + + auto const structs1 = [] { + auto child1 = ColWrapper{1, 2, 3}; + auto child2 = ColWrapper{4, 5, 6}; + auto child3 = StringsCol{"Banana", "Mango", "Apple"}; + return StructsCol{{child1, child2, child3}}; + }(); + + auto const structs2 = [] { + auto child1 = ColWrapper{7, 8, 9}; + auto child2 = ColWrapper{10, 11, 12}; + auto child3 = StringsCol{"Bear", "Duck", "Cat"}; + return StructsCol{{child1, child2, child3}}; + }(); + + auto const expected = [] { + auto child1 = ColWrapper{1, 7, 2, 8, 3, 9}; + auto child2 = ColWrapper{4, 10, 5, 11, 6, 12}; + auto child3 = StringsCol{"Banana", "Bear", "Mango", "Duck", "Apple", "Cat"}; + return StructsCol{{child1, child2, child3}}; + }(); + + auto const results = cudf::interleave_columns(TView{{structs1, structs2}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results, verbosity); +} + +TYPED_TEST(StructsColumnsInterleaveTypedTest, SimpleInputWithNulls) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + + auto const structs1 = [] { + auto child1 = ColWrapper{{1, 2, null, 3, 4}, null_at(2)}; + auto child2 = ColWrapper{{4, null, 5, 6, 7}, null_at(1)}; + auto child3 = StringsCol{{"" /*NULL*/, "Banana", "Mango", "Apple", "Cherry"}, null_at(0)}; + return StructsCol{{child1, child2, child3}, null_at(0)}; + }(); + + auto const structs2 = [] { + auto child1 = ColWrapper{{7, null, null, 8, 9}, nulls_at({1, 2})}; + auto child2 = ColWrapper{{10, 11, 12, null, 14}, null_at(3)}; + auto child3 = StringsCol{"Bear", "Duck", "Cat", "Dog", "Panda"}; + return StructsCol{{child1, child2, child3}, null_at(4)}; + }(); + + auto const structs3 = [] { + auto child1 = ColWrapper{{-1, -2, -3, 0, null}, null_at(4)}; + auto child2 = ColWrapper{{-5, 0, null, -1, -10}, null_at(2)}; + auto child3 = StringsCol{"111", "Bànànà", "abcxyz", "é á í", "zzz"}; + return StructsCol{{child1, child2, child3}, null_at(1)}; + }(); + + auto const expected = [] { + auto child1 = ColWrapper{{1, 7, -1, 2, null, -2, null, null, -3, 3, 8, 0, 4, 9, null}, + nulls_at({4, 6, 7, 14})}; + auto child2 = ColWrapper{{4, 10, -5, null, 11, 0, 5, 12, null, 6, null, -1, 7, 14, -10}, + nulls_at({3, 8, 10})}; + auto child3 = StringsCol{{"" /*NULL*/, + "Bear", + "111", + "Banana", + "Duck", + "Bànànà", + "Mango", + "Cat", + "abcxyz", + "Apple", + "Dog", + "é á í", + "Cherry", + "Panda", + "zzz"}, + null_at(0)}; + return StructsCol{{child1, child2, child3}, nulls_at({0, 5, 13})}; + }(); + + auto const results = cudf::interleave_columns(TView{{structs1, structs2, structs3}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results, verbosity); +} + +TYPED_TEST(StructsColumnsInterleaveTypedTest, SlicedColumnsInputNoNull) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + constexpr int32_t NOT_USE{-1}; // mark for elements that we don't care + + auto const structs1_original = [] { + auto child1 = ColWrapper{NOT_USE, NOT_USE, 1, 2, 3, NOT_USE}; + auto child2 = ColWrapper{NOT_USE, NOT_USE, 4, 5, 6, NOT_USE}; + auto child3 = StringsCol{"NOT_USE", "NOT_USE", "Banana", "Mango", "Apple", "NOT_USE"}; + return StructsCol{{child1, child2, child3}}; + }(); + + // structs2 has more rows than structs1 + auto const structs2_original = [] { + auto child1 = ColWrapper{NOT_USE, 7, 8, 9, NOT_USE, NOT_USE, NOT_USE}; + auto child2 = ColWrapper{NOT_USE, 10, 11, 12, NOT_USE, NOT_USE, NOT_USE}; + auto child3 = StringsCol{"NOT_USE", "Bear", "Duck", "Cat", "NOT_USE", "NOT_USE", "NOT_USE"}; + return StructsCol{{child1, child2, child3}}; + }(); + + auto const expected = [] { + auto child1 = ColWrapper{1, 7, 2, 8, 3, 9}; + auto child2 = ColWrapper{4, 10, 5, 11, 6, 12}; + auto child3 = StringsCol{"Banana", "Bear", "Mango", "Duck", "Apple", "Cat"}; + return StructsCol{{child1, child2, child3}}; + }(); + + auto const structs1 = cudf::slice(structs1_original, {2, 5})[0]; + auto const structs2 = cudf::slice(structs2_original, {1, 4})[0]; + auto const results = cudf::interleave_columns(TView{{structs1, structs2}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results, verbosity); +} + +TYPED_TEST(StructsColumnsInterleaveTypedTest, SlicedColumnsInputWithNulls) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + constexpr int32_t NOT_USE{-1}; // mark for elements that we don't care + + auto const structs1_original = [] { + auto child1 = ColWrapper{{NOT_USE, NOT_USE, 1, 2, null, 3, 4, NOT_USE}, null_at(4)}; + auto child2 = ColWrapper{{NOT_USE, NOT_USE, 4, null, 5, 6, 7, NOT_USE}, null_at(3)}; + auto child3 = StringsCol{ + {"NOT_USE", "NOT_USE", "" /*NULL*/, "Banana", "Mango", "Apple", "Cherry", "NOT_USE"}, + null_at(2)}; + return StructsCol{{child1, child2, child3}, null_at(2)}; + }(); + + auto const structs2_original = [] { + auto child1 = ColWrapper{{7, null, null, 8, 9, NOT_USE, NOT_USE}, nulls_at({1, 2})}; + auto child2 = ColWrapper{{10, 11, 12, null, 14, NOT_USE, NOT_USE}, null_at(3)}; + auto child3 = StringsCol{"Bear", "Duck", "Cat", "Dog", "Panda", "NOT_USE", "NOT_USE"}; + return StructsCol{{child1, child2, child3}, null_at(4)}; + }(); + + auto const structs3_original = [] { + auto child1 = ColWrapper{{NOT_USE, NOT_USE, NOT_USE, -1, -2, -3, 0, null}, null_at(7)}; + auto child2 = ColWrapper{{NOT_USE, NOT_USE, NOT_USE, -5, 0, null, -1, -10}, null_at(5)}; + auto child3 = + StringsCol{"NOT_USE", "NOT_USE", "NOT_USE", "111", "Bànànà", "abcxyz", "é á í", "zzz"}; + return StructsCol{{child1, child2, child3}, null_at(4)}; + }(); + + auto const expected = [] { + auto child1 = ColWrapper{{1, 7, -1, 2, null, -2, null, null, -3, 3, 8, 0, 4, 9, null}, + nulls_at({4, 6, 7, 14})}; + auto child2 = ColWrapper{{4, 10, -5, null, 11, 0, 5, 12, null, 6, null, -1, 7, 14, -10}, + nulls_at({3, 8, 10})}; + auto child3 = StringsCol{{"" /*NULL*/, + "Bear", + "111", + "Banana", + "Duck", + "Bànànà", + "Mango", + "Cat", + "abcxyz", + "Apple", + "Dog", + "é á í", + "Cherry", + "Panda", + "zzz"}, + null_at(0)}; + return StructsCol{{child1, child2, child3}, nulls_at({0, 5, 13})}; + }(); + + auto const structs1 = cudf::slice(structs1_original, {2, 7})[0]; + auto const structs2 = cudf::slice(structs2_original, {0, 5})[0]; + auto const structs3 = cudf::slice(structs3_original, {3, 8})[0]; + auto const results = cudf::interleave_columns(TView{{structs1, structs2, structs3}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results, verbosity); +} + CUDF_TEST_PROGRAM_MAIN() From c302dd9ab0324b410dd2eee3dd2a5a2629c5f4be Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 10 Aug 2021 15:51:41 -0600 Subject: [PATCH 3/9] Fix typo --- cpp/src/reshape/interleave_columns.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index bc894edb333..a482243e140 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -103,7 +103,7 @@ struct interleave_columns_functor { stream, mr); - // Only create null mask if at least one input structs columstructs1n is nullable. + // Only create null mask if at least one input structs column is nullable. if (create_mask) { auto const input_dv_ptr = table_device_view::create(structs_columns); auto const validity_fn = [input_dv = *input_dv_ptr, num_columns] __device__(auto const idx) { From ee480ff34c942375dbe06eda31aa318e5a5b4239 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 10 Aug 2021 16:29:21 -0600 Subject: [PATCH 4/9] Cleanup tests --- cpp/tests/reshape/interleave_columns_tests.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/cpp/tests/reshape/interleave_columns_tests.cpp b/cpp/tests/reshape/interleave_columns_tests.cpp index b2bdf211c12..79b6868910f 100644 --- a/cpp/tests/reshape/interleave_columns_tests.cpp +++ b/cpp/tests/reshape/interleave_columns_tests.cpp @@ -376,8 +376,6 @@ namespace { using StrListsCol = cudf::test::lists_column_wrapper; using IntListsCol = cudf::test::lists_column_wrapper; using IntCol = cudf::test::fixed_width_column_wrapper; -using StructsCol = cudf::test::structs_column_wrapper; -using StringsCol = cudf::test::strings_column_wrapper; using TView = cudf::table_view; constexpr int32_t null{0}; // mark for null elements @@ -734,14 +732,8 @@ TEST_F(ListsColumnsInterleaveTest, SlicedStringsColumnsInputWithNulls) } namespace { -using StrListsCol = cudf::test::lists_column_wrapper; -using IntListsCol = cudf::test::lists_column_wrapper; -using IntCol = cudf::test::fixed_width_column_wrapper; -using StructsCol = cudf::test::structs_column_wrapper; -using StringsCol = cudf::test::strings_column_wrapper; -using TView = cudf::table_view; - -constexpr int32_t null{0}; // mark for null elements +using StructsCol = cudf::test::structs_column_wrapper; +using StringsCol = cudf::test::strings_column_wrapper; } // namespace struct StructsColumnsInterleaveTest : public cudf::test::BaseFixture { From 9aca9af041f4e899625a9ff4ca1242854160cecc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 10 Aug 2021 21:19:48 -0600 Subject: [PATCH 5/9] Rewrite SFINAE --- cpp/src/reshape/interleave_columns.cu | 80 +++++++++++++++------------ 1 file changed, 46 insertions(+), 34 deletions(-) diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index a482243e140..51139d0efa8 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -29,33 +29,44 @@ namespace cudf { namespace detail { namespace { -struct interleave_columns_functor { - template - std::enable_if_t() and not std::is_same_v and - not std::is_same_v and - not std::is_same_v, - std::unique_ptr> - operator()(Args&&...) +// Error case when no other overload or specialization is available +template +struct interleave_columns_impl { + template + std::unique_ptr operator()(Args&&...) { - CUDF_FAIL("Called `interleave_columns` on none-supported data type."); + CUDF_FAIL("Unsupported type in `interleave_columns`."); } +}; +struct interleave_columns_functor { template - std::enable_if_t, std::unique_ptr> operator()( - table_view const& lists_columns, - bool create_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + std::unique_ptr operator()(table_view const& input, + bool create_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + return interleave_columns_impl{}(input, create_mask, stream, mr); + } +}; + +template +struct interleave_columns_impl>> { + std::unique_ptr operator()(table_view const& lists_columns, + bool create_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { return lists::detail::interleave_columns(lists_columns, create_mask, stream, mr); } +}; - template - std::enable_if_t, std::unique_ptr> operator()( - table_view const& structs_columns, - bool create_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +template +struct interleave_columns_impl>> { + std::unique_ptr operator()(table_view const& structs_columns, + bool create_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // We can safely call `column(0)` as the number of columns is known to be non zero. auto const num_children = structs_columns.column(0).num_children(); @@ -120,13 +131,14 @@ struct interleave_columns_functor { return output; } +}; - template - std::enable_if_t, std::unique_ptr> operator()( - table_view const& strings_columns, - bool create_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +template +struct interleave_columns_impl>> { + std::unique_ptr operator()(table_view const& strings_columns, + bool create_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto num_columns = strings_columns.num_columns(); if (num_columns == 1) // Single strings column returns a copy @@ -178,7 +190,7 @@ struct interleave_columns_functor { cudf::detail::get_value(offsets_column->view(), num_strings, stream); auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr); // Fill the chars column - auto d_results_chars = chars_column->mutable_view().data(); + auto d_results_chars = chars_column->mutable_view().template data(); thrust::for_each_n( rmm::exec_policy(stream), thrust::make_counting_iterator(0), @@ -204,13 +216,14 @@ struct interleave_columns_functor { stream, mr); } +}; - template - std::enable_if_t(), std::unique_ptr> operator()( - table_view const& input, - bool create_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +template +struct interleave_columns_impl()>> { + std::unique_ptr operator()(table_view const& input, + bool create_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto arch_column = input.column(0); auto output_size = input.num_columns() * input.num_rows(); @@ -265,11 +278,10 @@ std::unique_ptr interleave_columns(table_view const& input, CUDF_EXPECTS(input.num_columns() > 0, "input must have at least one column to determine dtype."); auto const dtype = input.column(0).type(); - CUDF_EXPECTS(std::all_of(std::cbegin(input), std::cend(input), [dtype](auto const& col) { return dtype == col.type(); }), - "DTYPE mismatch"); + "Input columns must have the same type"); auto const output_needs_mask = std::any_of( std::cbegin(input), std::cend(input), [](auto const& col) { return col.nullable(); }); From cbca8f15dfc058e73b17cc4f902eed72cdc7d837 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 10 Aug 2021 21:24:00 -0600 Subject: [PATCH 6/9] Rename variables --- cpp/tests/reshape/interleave_columns_tests.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/tests/reshape/interleave_columns_tests.cpp b/cpp/tests/reshape/interleave_columns_tests.cpp index 79b6868910f..4e45e49ee51 100644 --- a/cpp/tests/reshape/interleave_columns_tests.cpp +++ b/cpp/tests/reshape/interleave_columns_tests.cpp @@ -757,9 +757,9 @@ TEST_F(StructsColumnsInterleaveTest, InvalidInput) }(); auto const structs2 = [] { - auto child3 = IntCol{7, 8, 9}; - auto child4 = StringsCol{"", "abc", "123"}; - return StructsCol{{child3, child4}}; + auto child1 = IntCol{7, 8, 9}; + auto child2 = StringsCol{"", "abc", "123"}; + return StructsCol{{child1, child2}}; }(); EXPECT_THROW(cudf::interleave_columns(TView{{structs1, structs2}}), cudf::logic_error); @@ -774,10 +774,10 @@ TEST_F(StructsColumnsInterleaveTest, InvalidInput) }(); auto const structs2 = [] { - auto child3 = IntCol{7, 8, 9}; - auto child4 = IntCol{10, 11, 12}; - auto child5 = IntCol{13, 14, 15}; - return StructsCol{{child3, child4, child5}}; + auto child1 = IntCol{7, 8, 9}; + auto child2 = IntCol{10, 11, 12}; + auto child3 = IntCol{13, 14, 15}; + return StructsCol{{child1, child2, child3}}; }(); EXPECT_THROW(cudf::interleave_columns(TView{{structs1, structs2}}), cudf::logic_error); From 667149ed1e53bf365195564733cdb620ce57bb47 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 11 Aug 2021 09:46:01 -0600 Subject: [PATCH 7/9] Rewrite null mask generation --- cpp/src/reshape/interleave_columns.cu | 30 +++++++++++---------------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index 51139d0efa8..87ebb6936ec 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -107,29 +107,23 @@ struct interleave_columns_impl(0), - thrust::make_counting_iterator(output_size), - validity_fn, - stream, - mr); - output->set_null_mask(std::move(null_mask), null_count); - } + return cudf::detail::valid_if(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(output_size), + validity_fn, + stream, + mr); + }; - return output; + // Only create null mask if at least one input structs column is nullable. + auto [null_mask, null_count] = + create_mask ? create_mask_fn() : std::pair{rmm::device_buffer{0, stream, mr}, size_type{0}}; + return make_structs_column( + output_size, std::move(output_struct_members), null_count, std::move(null_mask), stream, mr); } }; From 5b1781a7fcf848b2496df294df006092e5fda7d4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 11 Aug 2021 10:09:35 -0600 Subject: [PATCH 8/9] Rewrite children columns collector --- cpp/src/reshape/interleave_columns.cu | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index 87ebb6936ec..b15708c5cf8 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -84,10 +84,11 @@ struct interleave_columns_impl> output_struct_members; for (size_type child_idx = 0; child_idx < num_children; ++child_idx) { // Collect children columns from the input structs columns at index `child_idx`. - std::vector children; - for (auto const& col : structs_columns) { - children.push_back(structs_column_view(col).get_sliced_child(child_idx)); - } + auto const child_iter = + thrust::make_transform_iterator(structs_columns.begin(), [child_idx](auto const& col) { + return structs_column_view(col).get_sliced_child(child_idx); + }); + auto children = std::vector(child_iter, child_iter + num_columns); auto const child_type = children.front().type(); CUDF_EXPECTS( From 9b5ddba27915f474a1ee571369e89b315c47c7ec Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 17 Aug 2021 10:38:32 -0600 Subject: [PATCH 9/9] Add test case for nested input structs columns --- .../reshape/interleave_columns_tests.cpp | 67 ++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/cpp/tests/reshape/interleave_columns_tests.cpp b/cpp/tests/reshape/interleave_columns_tests.cpp index 4e45e49ee51..e51f0740787 100644 --- a/cpp/tests/reshape/interleave_columns_tests.cpp +++ b/cpp/tests/reshape/interleave_columns_tests.cpp @@ -24,7 +24,7 @@ using namespace cudf::test::iterators; -constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; +constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::FIRST_ERROR}; template struct InterleaveColumnsTest : public cudf::test::BaseFixture { @@ -910,6 +910,71 @@ TYPED_TEST(StructsColumnsInterleaveTypedTest, SimpleInputWithNulls) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results, verbosity); } +TYPED_TEST(StructsColumnsInterleaveTypedTest, NestedInputStructsColumns) +{ + using ColWrapper = cudf::test::fixed_width_column_wrapper; + + auto const structs1 = [] { + auto child_structs1 = [] { + auto child1 = ColWrapper{{null, 2, 3, 4, 5}, null_at(0)}; + auto child2 = ColWrapper{{6, 7, 8, null, 10}, null_at(3)}; + return StructsCol{{child1, child2}, null_at(0)}; + }(); + + auto child_structs2 = [] { + auto child1 = ColWrapper{{11, null, 13, 14, 15}, null_at(1)}; + auto child2 = ColWrapper{{null, 17, 18, 19, 20}, null_at(0)}; + return StructsCol{{child1, child2}, nulls_at({0, 1})}; + }(); + + auto child_strings = [] { return StringsCol{"Banana", "Mango", "Apple", "Cherry", "Kiwi"}; }(); + + return StructsCol{{child_structs1, child_structs2, child_strings}, null_at(0)}; + }(); + + auto const structs2 = [] { + auto child_structs1 = [] { + auto child1 = ColWrapper{{-1, null, -3, -4, -5}, null_at(1)}; + auto child2 = ColWrapper{{-6, -7, -8, null, -10}, null_at(3)}; + return StructsCol{{child1, child2}}; + }(); + + auto child_structs2 = [] { + auto child1 = ColWrapper{{-11, -12, null, -14, -15}, null_at(2)}; + auto child2 = ColWrapper{{-16, -17, -18, -19, null}, null_at(4)}; + return StructsCol{{child1, child2}, null_at(2)}; + }(); + + auto child_strings = [] { return StringsCol{"Bear", "Duck", "Cat", "Dog", "Rabbit"}; }(); + + return StructsCol{{child_structs1, child_structs2, child_strings}, null_at(2)}; + }(); + + auto const expected = [] { + auto child_structs1 = [] { + auto child1 = ColWrapper{{null, -1, 2, null, 3, -3, 4, -4, 5, -5}, nulls_at({0, 3})}; + auto child2 = ColWrapper{{6, -6, 7, -7, 8, -8, null, null, 10, -10}, nulls_at({6, 7})}; + return StructsCol{{child1, child2}, null_at(0)}; + }(); + + auto child_structs2 = [] { + auto child1 = ColWrapper{{11, -11, null, -12, 13, null, 14, -14, 15, -15}, nulls_at({2, 5})}; + auto child2 = ColWrapper{{null, -16, 17, -17, 18, -18, 19, -19, 20, null}, nulls_at({0, 9})}; + return StructsCol{{child1, child2}, nulls_at({0, 2, 5})}; + }(); + + auto child_strings = [] { + return StringsCol{ + "Banana", "Bear", "Mango", "Duck", "Apple", "Cat", "Cherry", "Dog", "Kiwi", "Rabbit"}; + }(); + + return StructsCol{{child_structs1, child_structs2, child_strings}, nulls_at({0, 5})}; + }(); + + auto const results = cudf::interleave_columns(TView{{structs1, structs2}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results, verbosity); +} + TYPED_TEST(StructsColumnsInterleaveTypedTest, SlicedColumnsInputNoNull) { using ColWrapper = cudf::test::fixed_width_column_wrapper;