From 1ab2f3b98506ea1508343d57d39ee8d042e740c0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Mar 2021 11:15:21 -0600 Subject: [PATCH 01/32] Add scatter for struct type --- cpp/include/cudf/detail/scatter.cuh | 57 +++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 2cb1cbffc68..b10630e40c0 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -206,6 +206,63 @@ struct column_scatterer_impl { } }; +template +struct column_scatterer_impl { + std::unique_ptr operator()(column_view const& source, + MapItRoot scatter_map_begin, + MapItRoot scatter_map_end, + column_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const + { + CUDF_EXPECTS(source.num_children() == target.num_children(), + "Scatter source and target are not of the same type."); + + auto const scatter_map_size = std::distance(scatter_map_begin, scatter_map_end); + if (scatter_map_size == 0) { return empty_like(source); } + + structs_column_view structs_src(source); + structs_column_view structs_target(target); + std::vector> output_struct_members(structs_src.num_children()); + + std::transform(structs_src.child_begin(), + structs_src.child_end(), + structs_target.child_begin(), + output_struct_members.begin(), + [&scatter_map_begin, &scatter_map_end, stream, mr](auto const& source_col, + auto const& target_col) { + return type_dispatcher(source_col.type(), + column_scatterer{}, + source_col, + scatter_map_begin, + scatter_map_end, + target_col, + stream, + mr); + }); + + auto const gather_map = + scatter_to_gather(scatter_map_begin, scatter_map_end, scatter_map_size, stream); + gather_bitmask( + // Table view of struct column. + cudf::table_view{ + std::vector{structs_src.child_begin(), structs_src.child_end()}}, + gather_map.begin(), + output_struct_members, + gather_bitmask_op::NULLIFY, + stream, + mr); + + return cudf::make_structs_column( + source.size(), + std::move(output_struct_members), + 0, + rmm::device_buffer{0, stream, mr}, // Null mask will be fixed up in cudf::scatter(). + stream, + mr); + } +}; + /** * @brief Scatters the rows of the source table into a copy of the target table * according to a scatter map. From 174bc3e48e732d358e59303dab558bbd6707c8a7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Mar 2021 11:50:58 -0600 Subject: [PATCH 02/32] Improve performance for scatter: only generate a gather_map if necessary --- cpp/include/cudf/detail/scatter.cuh | 39 +++++++++++++++++------------ 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index b10630e40c0..f54a68ceefe 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -241,17 +241,22 @@ struct column_scatterer_impl { mr); }); - auto const gather_map = - scatter_to_gather(scatter_map_begin, scatter_map_end, scatter_map_size, stream); - gather_bitmask( - // Table view of struct column. - cudf::table_view{ - std::vector{structs_src.child_begin(), structs_src.child_end()}}, - gather_map.begin(), - output_struct_members, - gather_bitmask_op::NULLIFY, - stream, - mr); + auto const nullable = std::any_of(structs_src.child_begin(), + structs_src.child_end(), + [](auto const& col) { return col.nullable(); }); + if (nullable) { + auto const gather_map = + scatter_to_gather(scatter_map_begin, scatter_map_end, scatter_map_size, stream); + gather_bitmask( + // Table view of struct column. + cudf::table_view{ + std::vector{structs_src.child_begin(), structs_src.child_end()}}, + gather_map.begin(), + output_struct_members, + gather_bitmask_op::NULLIFY, + stream, + mr); + } return cudf::make_structs_column( source.size(), @@ -350,11 +355,13 @@ std::unique_ptr scatter( mr); }); - auto gather_map = scatter_to_gather( - updated_scatter_map_begin, updated_scatter_map_end, target.num_rows(), stream); - - gather_bitmask(source, gather_map.begin(), result, gather_bitmask_op::PASSTHROUGH, stream, mr); - + auto const nullable = + std::any_of(source.begin(), source.end(), [](auto const& col) { return col.nullable(); }); + if (nullable) { + auto gather_map = scatter_to_gather( + updated_scatter_map_begin, updated_scatter_map_end, target.num_rows(), stream); + gather_bitmask(source, gather_map.begin(), result, gather_bitmask_op::PASSTHROUGH, stream, mr); + } return std::make_unique
(std::move(result)); } } // namespace detail From 46f9148985c5cd780d5bcaf5d673df7c2549c4c5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Mar 2021 12:09:51 -0600 Subject: [PATCH 03/32] Add a partition test for partitioning a column of struct type --- cpp/tests/partitioning/partition_test.cpp | 29 +++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/cpp/tests/partitioning/partition_test.cpp b/cpp/tests/partitioning/partition_test.cpp index a6838112a54..ed994da20f8 100644 --- a/cpp/tests/partitioning/partition_test.cpp +++ b/cpp/tests/partitioning/partition_test.cpp @@ -141,6 +141,35 @@ TYPED_TEST(PartitionTest, Identity) run_partition_test(table_to_partition, map, 6, table_to_partition, expected_offsets); } +TYPED_TEST(PartitionTest, Struct) +{ + using value_type = cudf::test::GetType; + using map_type = cudf::test::GetType; + + fixed_width_column_wrapper A({1, 2}, {0, 1}); + auto struct_col = cudf::test::structs_column_wrapper({A}, {0, 1}).release(); + auto table_to_partition = cudf::table_view{{*struct_col}}; + + fixed_width_column_wrapper map{9, 2}; + + fixed_width_column_wrapper A_expected({2, 1}, {1, 0}); + auto struct_expected = cudf::test::structs_column_wrapper({A_expected}, {1, 0}).release(); + auto expected = cudf::table_view{{*struct_expected}}; + + std::vector expected_offsets{0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2}; + + // This does not work because we cannot sort a struct right now... + // run_partition_test(table_to_partition, map, 12, expected, expected_offsets); + // But there is no ambiguity in the ordering so I'll just copy it all here for now. + auto num_partitions = 12; + auto result = cudf::partition(table_to_partition, map, num_partitions); + auto const& actual_partitioned_table = result.first; + auto const& actual_offsets = result.second; + EXPECT_EQ(actual_offsets, expected_offsets); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *actual_partitioned_table); +} + TYPED_TEST(PartitionTest, Reverse) { using value_type = cudf::test::GetType; From c5112e84cd85a0a09764019fef3a9df3b2b3d1d0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Mar 2021 12:53:54 -0600 Subject: [PATCH 04/32] Fix bitmask gathering during scattering data --- cpp/include/cudf/detail/scatter.cuh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index f54a68ceefe..b5bde5e3d84 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -243,10 +243,13 @@ struct column_scatterer_impl { auto const nullable = std::any_of(structs_src.child_begin(), structs_src.child_end(), + [](auto const& col) { return col.nullable(); }) or + std::any_of(structs_target.child_begin(), + structs_target.child_end(), [](auto const& col) { return col.nullable(); }); if (nullable) { auto const gather_map = - scatter_to_gather(scatter_map_begin, scatter_map_end, scatter_map_size, stream); + scatter_to_gather(scatter_map_begin, scatter_map_end, source.size(), stream); gather_bitmask( // Table view of struct column. cudf::table_view{ @@ -356,7 +359,8 @@ std::unique_ptr
scatter( }); auto const nullable = - std::any_of(source.begin(), source.end(), [](auto const& col) { return col.nullable(); }); + std::any_of(source.begin(), source.end(), [](auto const& col) { return col.nullable(); }) or + std::any_of(target.begin(), target.end(), [](auto const& col) { return col.nullable(); }); if (nullable) { auto gather_map = scatter_to_gather( updated_scatter_map_begin, updated_scatter_map_end, target.num_rows(), stream); From 93d725118d1ff392da816ba633e9f7ed436593eb Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Mar 2021 12:54:23 -0600 Subject: [PATCH 05/32] Optimize bitmask gathering for struct type --- cpp/include/cudf/detail/gather.cuh | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 73647ac2292..fd26e2db378 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -567,15 +567,20 @@ struct column_gatherer_impl { mr); }); - gather_bitmask( - // Table view of struct column. - cudf::table_view{ - std::vector{structs_column.child_begin(), structs_column.child_end()}}, - gather_map_begin, - output_struct_members, - nullify_out_of_bounds ? gather_bitmask_op::NULLIFY : gather_bitmask_op::DONT_CHECK, - stream, - mr); + auto const nullable = std::any_of(structs_column.child_begin(), + structs_column.child_end(), + [](auto const& col) { return col.nullable(); }); + if (nullable) { + gather_bitmask( + // Table view of struct column. + cudf::table_view{ + std::vector{structs_column.child_begin(), structs_column.child_end()}}, + gather_map_begin, + output_struct_members, + nullify_out_of_bounds ? gather_bitmask_op::NULLIFY : gather_bitmask_op::DONT_CHECK, + stream, + mr); + } return cudf::make_structs_column( gather_map_size, From 55152ff7dea8180e8f7561e17bbd630153cc1700 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Mar 2021 13:01:13 -0600 Subject: [PATCH 06/32] Rewrite `scatter_to_gether`, changing from using `device_vector` to `device_uvector` --- cpp/include/cudf/detail/scatter.cuh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index b5bde5e3d84..1ffff4dcc07 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -32,6 +32,8 @@ #include #include +#include + namespace cudf { namespace detail { @@ -66,7 +68,9 @@ auto scatter_to_gather(MapIterator scatter_map_begin, // when calling the gather_bitmask() which applies a pass-through whenever it finds a // value outside the range of the target column. // We'll use the gather_rows value for this since it should always be outside the valid range. - auto gather_map = rmm::device_vector(gather_rows, gather_rows); + auto gather_map = rmm::device_uvector(gather_rows, stream); + thrust::uninitialized_fill( + rmm::exec_policy(stream), gather_map.begin(), gather_map.end(), gather_rows); // Convert scatter map to a gather map thrust::scatter( From 8735879b2aba4b03f1f5269c57db7b33d82ec2c9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Mar 2021 13:20:15 -0600 Subject: [PATCH 07/32] Re-organize source file list, and add `scatter_struct_tests.cu` --- cpp/tests/CMakeLists.txt | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ab14c2577bb..8b182f347ba 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -200,24 +200,25 @@ ConfigureTest(SORT_TEST ################################################################################################### # - copying tests --------------------------------------------------------------------------------- ConfigureTest(COPYING_TEST - copying/utility_tests.cpp + copying/concatenate_tests.cu + copying/copy_range_tests.cpp + copying/copy_tests.cu + copying/detail_gather_tests.cu + copying/gather_struct_tests.cu copying/gather_tests.cu copying/gather_str_tests.cu copying/gather_list_tests.cu - copying/segmented_gather_list_tests.cpp - copying/gather_struct_tests.cu - copying/detail_gather_tests.cu + copying/get_value_tests.cpp + copying/pack_tests.cu + copying/sample_tests.cpp copying/scatter_tests.cpp copying/scatter_list_tests.cu - copying/copy_range_tests.cpp + copying/scatter_struct_tests.cu + copying/segmented_gather_list_tests.cpp + copying/shift_tests.cpp copying/slice_tests.cpp copying/split_tests.cpp - copying/copy_tests.cu - copying/shift_tests.cpp - copying/get_value_tests.cpp - copying/sample_tests.cpp - copying/concatenate_tests.cu - copying/pack_tests.cu) + copying/utility_tests.cpp) ################################################################################################### # - utilities tests ------------------------------------------------------------------------------- From 81872d22fcd6e2f00bc2b5e3f6db2138b57e7cb1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Mar 2021 13:53:12 -0600 Subject: [PATCH 08/32] Implement test for empty scatter map --- cpp/tests/copying/scatter_struct_tests.cu | 449 ++++++++++++++++++++++ 1 file changed, 449 insertions(+) create mode 100644 cpp/tests/copying/scatter_struct_tests.cu diff --git a/cpp/tests/copying/scatter_struct_tests.cu b/cpp/tests/copying/scatter_struct_tests.cu new file mode 100644 index 00000000000..1134000535a --- /dev/null +++ b/cpp/tests/copying/scatter_struct_tests.cu @@ -0,0 +1,449 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +using column_vector = std::vector>; +using cudf::size_type; +using namespace cudf::test; + +struct StructScatterTest : public cudf::test::BaseFixture { +}; + +template +struct TypedStructScatterTest : public cudf::test::BaseFixture { +}; + +using TestTypes = cudf::test::Concat; + +TYPED_TEST_CASE(TypedStructScatterTest, TestTypes); + +namespace { +template +struct column_wrapper_constructor { + template + auto operator()(ValueIter begin, ValueIter end, ValidityIter validity_begin) const + { + return cudf::test::fixed_width_column_wrapper{ + begin, end, validity_begin}; + } +}; + +template <> +struct column_wrapper_constructor { + template + cudf::test::strings_column_wrapper operator()(ValueIter begin, + ValueIter end, + ValidityIter validity_begin) const + { + return cudf::test::strings_column_wrapper{begin, end, validity_begin}; + } +}; + +template +auto get_expected_column(std::vector const& input_values, + std::vector const& input_validity, + std::vector const& struct_validity, + std::vector const& scatter_map) +{ + auto is_valid = // Validity predicate. + [&input_values, &input_validity, &struct_validity, &scatter_map](auto gather_index) { + assert(gather_index >= 0 && gather_index < scatter_map.size() || + "Gather-index out of range."); + + auto i{scatter_map[gather_index]}; // Index into input_values. + + return (i >= 0 && i < static_cast(input_values.size())) && + (struct_validity.empty() || struct_validity[i]) && + (input_validity.empty() || input_validity[i]); + }; + + auto expected_row_count{scatter_map.size()}; + auto gather_iter = cudf::detail::make_counting_transform_iterator( + 0, [is_valid, &input_values, &scatter_map](auto i) { + return is_valid(i) ? input_values[scatter_map[i]] : SourceElementT{}; + }); + + return column_wrapper_constructor()( + gather_iter, + gather_iter + expected_row_count, + cudf::detail::make_counting_transform_iterator(0, is_valid)) + .release(); +} +} // namespace + +TYPED_TEST(TypedStructScatterTest, EmptyScatterMapTest) +{ + auto const ages_src = std::vector{5, 10, 15, 20, 25, 30}; + auto const ages_validity_src = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); + auto ages_col_src = fixed_width_column_wrapper{ + ages_src.begin(), ages_src.end(), ages_validity_src}; + + auto const structs_validity_src = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; }); + auto const structs_src = structs_column_wrapper{{ages_col_src}, structs_validity_src}.release(); + + auto const ages_tgt = std::vector{50, 40, 55, 70, 85, 90}; + auto const ages_validity_tgt = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; }); + auto ages_col_tgt = fixed_width_column_wrapper{ + ages_tgt.begin(), ages_tgt.end(), ages_validity_tgt}; + + auto const structs_validity_tgt = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; }); + auto const structs_tgt = structs_column_wrapper{{ages_col_tgt}, structs_validity_tgt}.release(); + + auto const source = cudf::table_view{std::vector{structs_src->view()}}; + auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; + auto const scatter_map = fixed_width_column_wrapper{}.release(); + + auto const result = cudf::scatter(source, scatter_map->view(), target); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs_tgt->view(), result->get_column(0)); +} +#if 0 +TYPED_TEST(TypedStructScatterTest, TestSimpleStructGather) +{ + // Testing gather() on struct. + + // 1. String "names" column. + auto const names = + std::vector{"Vimes", "Carrot", "Angua", "Cheery", "Detritus", "Slant"}; + auto const names_validity = std::vector{1, 1, 1, 1, 1, 1}; + auto names_column = strings_column_wrapper{names.begin(), names.end(), names_validity.begin()}; + + // 2. Numeric "ages" column. + auto const ages = std::vector{5, 10, 15, 20, 25, 30}; + auto const ages_validity = std::vector{1, 1, 1, 1, 0, 1}; + auto ages_column = + fixed_width_column_wrapper{ages.begin(), ages.end(), ages_validity.begin()}; + + // 3. Boolean "is_human" column. + auto const is_human = {true, true, false, false, false, false}; + auto const is_human_validity = std::vector{1, 1, 1, 0, 1, 1}; + auto is_human_col = + fixed_width_column_wrapper{is_human.begin(), is_human.end(), is_human_validity.begin()}; + + // Assemble struct column. + auto const struct_validity = std::vector{1, 1, 1, 1, 1, 0}; + auto struct_column = + structs_column_wrapper{{names_column, ages_column, is_human_col}, struct_validity.begin()} + .release(); + + // Gather to new struct column. + auto const scatter_map = std::vector{-1, 4, 3, 2, 1}; + auto const gather_map_col = + fixed_width_column_wrapper(scatter_map.begin(), scatter_map.end()).release(); + + auto const gathered_table = + cudf::gather(cudf::table_view{std::vector{struct_column->view()}}, + gather_map_col->view()); + + auto const gathered_struct_col = gathered_table->get_column(0); + auto const gathered_struct_col_view = cudf::structs_column_view{gathered_struct_col}; + + // Verify that the gathered struct's fields are as expected. + + auto expected_names_column = + get_expected_column(names, names_validity, struct_validity, scatter_map); + expect_columns_equivalent(*expected_names_column, gathered_struct_col.child(0)); + + auto expected_ages_column = + get_expected_column(ages, ages_validity, struct_validity, scatter_map); + expect_columns_equivalent(*expected_ages_column, gathered_struct_col.child(1)); + + auto expected_bool_column = + get_expected_column(std::vector(is_human.begin(), is_human.end()), + is_human_validity, + struct_validity, + scatter_map); + expect_columns_equivalent(*expected_bool_column, gathered_struct_col.child(2)); + + std::vector> expected_columns; + expected_columns.push_back(std::move(expected_names_column)); + expected_columns.push_back(std::move(expected_ages_column)); + expected_columns.push_back(std::move(expected_bool_column)); + auto const expected_struct_column = + structs_column_wrapper{std::move(expected_columns), std::vector{0, 1, 1, 1, 1}}.release(); + + expect_columns_equivalent(*expected_struct_column, gathered_struct_col); +} + +TYPED_TEST(TypedStructScatterTest, TestGatherStructOfLists) +{ + using namespace cudf::test; + + // Testing gather() on struct> + + auto lists_column_exemplar = []() { + return lists_column_wrapper{ + {{5}, {10, 15}, {20, 25, 30}, {35, 40, 45, 50}, {55, 60, 65}, {70, 75}, {80}, {}, {}}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return !(i % 3); })}; + }; + + auto lists_column = std::make_unique(cudf::column(lists_column_exemplar(), 0)); + + // Assemble struct column. + std::vector> column_vector; + column_vector.push_back(std::move(lists_column)); + auto const struct_column = structs_column_wrapper{std::move(column_vector)}.release(); + + // Gather to new struct column. + auto const scatter_map = std::vector{-1, 4, 3, 2, 1, 7, 3}; + auto const gather_map_col = + fixed_width_column_wrapper(scatter_map.begin(), scatter_map.end()).release(); + + auto const gathered_table = + cudf::gather(cudf::table_view{std::vector{struct_column->view()}}, + gather_map_col->view()); + + auto const gathered_struct_col = gathered_table->get_column(0); + auto const gathered_struct_col_view = cudf::structs_column_view{gathered_struct_col}; + + // Verify that the gathered struct column's list member presents as if + // it had itself been gathered individually. + + auto const list_column_before_gathering = lists_column_exemplar().release(); + + auto const expected_gathered_list_column = + cudf::gather( + cudf::table_view{std::vector{list_column_before_gathering->view()}}, + gather_map_col->view()) + ->get_column(0); + + expect_columns_equivalent(expected_gathered_list_column.view(), gathered_struct_col.child(0)); +} + +TYPED_TEST(TypedStructScatterTest, TestGatherStructOfListsOfLists) +{ + using namespace cudf::test; + + // Testing gather() on struct>> + + auto const lists_column_exemplar = []() { + return lists_column_wrapper{ + {{{5, 5}}, + {{10, 15}}, + {{20, 25}, {30}}, + {{35, 40}, {45, 50}}, + {{55}, {60, 65}}, + {{70, 75}}, + {{80, 80}}, + {}, + {}}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return !(i % 3); })}; + }; + + auto lists_column = std::make_unique(cudf::column(lists_column_exemplar(), 0)); + + // Assemble struct column. + std::vector> column_vector; + column_vector.push_back(std::move(lists_column)); + auto const struct_column = structs_column_wrapper{std::move(column_vector)}.release(); + + // Gather to new struct column. + auto const scatter_map = std::vector{-1, 4, 3, 2, 1, 7, 3}; + auto const gather_map_col = + fixed_width_column_wrapper(scatter_map.begin(), scatter_map.end()).release(); + + auto const gathered_table = + cudf::gather(cudf::table_view{std::vector{struct_column->view()}}, + gather_map_col->view()); + + auto const gathered_struct_col = gathered_table->get_column(0); + auto const gathered_struct_col_view = cudf::structs_column_view{gathered_struct_col}; + + // Verify that the gathered struct column's list member presents as if + // it had itself been gathered individually. + + auto const list_column_before_gathering = lists_column_exemplar().release(); + + auto const expected_gathered_list_column = + cudf::gather( + cudf::table_view{std::vector{list_column_before_gathering->view()}}, + gather_map_col->view()) + ->get_column(0); + + expect_columns_equivalent(expected_gathered_list_column.view(), gathered_struct_col.child(0)); +} + +TYPED_TEST(TypedStructScatterTest, TestGatherStructOfStructs) +{ + using namespace cudf::test; + + // Testing gather() on struct> + + auto const numeric_column_exemplar = []() { + return fixed_width_column_wrapper{ + {5, 10, 15, 20, 25, 30, 35, 45, 50, 55, 60, 65, 70, 75}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return !(i % 3); })}; + }; + + auto numeric_column = numeric_column_exemplar(); + auto structs_column = structs_column_wrapper{{numeric_column}}; + + auto const struct_of_structs_column = structs_column_wrapper{{structs_column}}.release(); + + // Gather to new struct column. + auto const scatter_map = std::vector{-1, 4, 3, 2, 1, 7, 3}; + auto const gather_map_col = + fixed_width_column_wrapper(scatter_map.begin(), scatter_map.end()).release(); + + auto const gathered_table = + cudf::gather(cudf::table_view{std::vector{struct_of_structs_column->view()}}, + gather_map_col->view()); + + auto const gathered_struct_col = gathered_table->get_column(0); + auto const gathered_struct_col_view = cudf::structs_column_view{gathered_struct_col}; + + // Verify that the underlying numeric column presents as if + // it had itself been gathered individually. + + auto const numeric_column_before_gathering = numeric_column_exemplar().release(); + auto const expected_gathered_column = + cudf::gather( + cudf::table_view{std::vector{numeric_column_before_gathering->view()}}, + gather_map_col->view()) + ->get_column(0); + + expect_columns_equivalent(expected_gathered_column, gathered_struct_col.child(0).child(0).view()); +} + +TYPED_TEST(TypedStructScatterTest, TestGatherStructOfListOfStructs) +{ + using namespace cudf::test; + + // Testing gather() on struct> + + auto const numeric_column_exemplar = []() { + return fixed_width_column_wrapper{ + {5, 10, 15, 20, 25, 30, 35, 45, 50, 55, 60, 65, 70, 75}}; + }; + + auto numeric_column = numeric_column_exemplar(); + auto structs_column = structs_column_wrapper{{numeric_column}}.release(); + auto list_of_structs_column = cudf::make_lists_column( + 7, + fixed_width_column_wrapper{0, 2, 4, 6, 8, 10, 12, 14}.release(), + std::move(structs_column), + cudf::UNKNOWN_NULL_COUNT, + {}); + + std::vector> column_vector; + column_vector.push_back(std::move(list_of_structs_column)); + auto const struct_of_list_of_structs = structs_column_wrapper{std::move(column_vector)}.release(); + + // Gather to new struct column. + auto const scatter_map = std::vector{-1, 4, 3, 2, 1}; + auto const gather_map_col = + fixed_width_column_wrapper(scatter_map.begin(), scatter_map.end()).release(); + + auto const gathered_table = cudf::gather( + cudf::table_view{std::vector{struct_of_list_of_structs->view()}}, + gather_map_col->view()); + + auto const gathered_struct_col = gathered_table->get_column(0); + auto const gathered_struct_col_view = cudf::structs_column_view{gathered_struct_col}; + + // Construct expected gather result. + + auto expected_numeric_col = + fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; + auto expected_struct_col = structs_column_wrapper{{expected_numeric_col}}.release(); + auto expected_list_of_structs_column = + cudf::make_lists_column(5, + fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release(), + std::move(expected_struct_col), + cudf::UNKNOWN_NULL_COUNT, + {}); + std::vector> expected_vector_of_columns; + expected_vector_of_columns.push_back(std::move(expected_list_of_structs_column)); + auto const expected_struct_of_list_of_struct = + structs_column_wrapper{std::move(expected_vector_of_columns)}.release(); + + expect_columns_equivalent(expected_struct_of_list_of_struct->view(), gathered_struct_col.view()); +} + +TYPED_TEST(TypedStructScatterTest, TestGatherStructOfStructsWithValidity) +{ + using namespace cudf::test; + + // Testing gather() on struct> + + // Factory to construct numeric column with configurable null-mask. + auto const numeric_column_exemplar = [](nvstd::function pred) { + return fixed_width_column_wrapper{ + {5, 10, 15, 20, 25, 30, 35, 45, 50, 55, 60, 65, 70, 75}, + cudf::detail::make_counting_transform_iterator(0, [=](auto i) { return pred(i); })}; + }; + + // Validity predicates. + auto const every_3rd_element_null = [](size_type i) { return !(i % 3); }; + auto const twelfth_element_null = [](size_type i) { return i != 11; }; + + // Construct struct-of-struct-of-numerics. + auto numeric_column = numeric_column_exemplar(every_3rd_element_null); + auto structs_column = structs_column_wrapper{ + {numeric_column}, cudf::detail::make_counting_transform_iterator(0, twelfth_element_null)}; + auto struct_of_structs_column = structs_column_wrapper{{structs_column}}.release(); + + // Gather to new struct column. + auto const scatter_map = std::vector{-1, 4, 3, 2, 1, 7, 3}; + auto const gather_map_col = + fixed_width_column_wrapper(scatter_map.begin(), scatter_map.end()).release(); + + auto const gathered_table = + cudf::gather(cudf::table_view{std::vector{struct_of_structs_column->view()}}, + gather_map_col->view()); + + auto const gathered_struct_col = gathered_table->get_column(0); + auto const gathered_struct_col_view = cudf::structs_column_view{gathered_struct_col}; + + // Verify that the underlying numeric column presents as if + // it had itself been gathered individually. + + auto const final_predicate = [=](size_type i) { + return every_3rd_element_null(i) && twelfth_element_null(i); + }; + auto const numeric_column_before_gathering = numeric_column_exemplar(final_predicate).release(); + auto const expected_gathered_column = + cudf::gather( + cudf::table_view{std::vector{numeric_column_before_gathering->view()}}, + gather_map_col->view()) + ->get_column(0); + + expect_columns_equivalent(expected_gathered_column, gathered_struct_col.child(0).child(0).view()); +} +#endif From ed9109560d52f26de5bd7c1e201bc18c7ba71a24 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Mar 2021 14:01:35 -0600 Subject: [PATCH 09/32] Rename variables, and add EmptyInputTest test case --- cpp/tests/copying/scatter_struct_tests.cu | 36 ++++++++++++++++------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/cpp/tests/copying/scatter_struct_tests.cu b/cpp/tests/copying/scatter_struct_tests.cu index 1134000535a..105e958ac46 100644 --- a/cpp/tests/copying/scatter_struct_tests.cu +++ b/cpp/tests/copying/scatter_struct_tests.cu @@ -104,27 +104,43 @@ auto get_expected_column(std::vector const& input_values, } } // namespace +TYPED_TEST(TypedStructScatterTest, EmptyInputTest) +{ + auto child_col_src = fixed_width_column_wrapper{}; + auto const structs_src = structs_column_wrapper{{child_col_src}, std::vector{}}.release(); + + auto child_col_tgt = fixed_width_column_wrapper{}; + auto const structs_tgt = structs_column_wrapper{{child_col_tgt}, std::vector{}}.release(); + + auto const source = cudf::table_view{std::vector{structs_src->view()}}; + auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; + auto const scatter_map = fixed_width_column_wrapper{}.release(); + + auto const result = cudf::scatter(source, scatter_map->view(), target); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs_tgt->view(), result->get_column(0)); +} + TYPED_TEST(TypedStructScatterTest, EmptyScatterMapTest) { - auto const ages_src = std::vector{5, 10, 15, 20, 25, 30}; - auto const ages_validity_src = + auto const data_src = std::vector{5, 10, 15, 20, 25, 30}; + auto const child_validity_src = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); - auto ages_col_src = fixed_width_column_wrapper{ - ages_src.begin(), ages_src.end(), ages_validity_src}; + auto child_col_src = fixed_width_column_wrapper{ + data_src.begin(), data_src.end(), child_validity_src}; auto const structs_validity_src = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; }); - auto const structs_src = structs_column_wrapper{{ages_col_src}, structs_validity_src}.release(); + auto const structs_src = structs_column_wrapper{{child_col_src}, structs_validity_src}.release(); - auto const ages_tgt = std::vector{50, 40, 55, 70, 85, 90}; - auto const ages_validity_tgt = + auto const data_tgt = std::vector{50, 40, 55, 70, 85, 90}; + auto const child_validity_tgt = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; }); - auto ages_col_tgt = fixed_width_column_wrapper{ - ages_tgt.begin(), ages_tgt.end(), ages_validity_tgt}; + auto child_col_tgt = fixed_width_column_wrapper{ + data_tgt.begin(), data_tgt.end(), child_validity_tgt}; auto const structs_validity_tgt = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; }); - auto const structs_tgt = structs_column_wrapper{{ages_col_tgt}, structs_validity_tgt}.release(); + auto const structs_tgt = structs_column_wrapper{{child_col_tgt}, structs_validity_tgt}.release(); auto const source = cudf::table_view{std::vector{structs_src->view()}}; auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; From fd664b4c72c0af36aebb05aca94573192c87c54a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Mar 2021 14:12:08 -0600 Subject: [PATCH 10/32] Add SimpleScatterTest test case --- cpp/tests/copying/scatter_struct_tests.cu | 41 +++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/cpp/tests/copying/scatter_struct_tests.cu b/cpp/tests/copying/scatter_struct_tests.cu index 105e958ac46..cebabced7be 100644 --- a/cpp/tests/copying/scatter_struct_tests.cu +++ b/cpp/tests/copying/scatter_struct_tests.cu @@ -149,6 +149,47 @@ TYPED_TEST(TypedStructScatterTest, EmptyScatterMapTest) auto const result = cudf::scatter(source, scatter_map->view(), target); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs_tgt->view(), result->get_column(0)); } + +TYPED_TEST(TypedStructScatterTest, SimpleScatterTest) +{ + auto const data_src = std::vector{5, 10, 15, 20, 25, 30}; + auto const child_validity_src = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); + auto child_col_src = fixed_width_column_wrapper{ + data_src.begin(), data_src.end(), child_validity_src}; + + auto const structs_validity_src = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; }); + auto const structs_src = structs_column_wrapper{{child_col_src}, structs_validity_src}.release(); + + auto const data_tgt = std::vector{50, 40, 55, 70, 85, 90}; + auto const child_validity_tgt = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; }); + auto child_col_tgt = fixed_width_column_wrapper{ + data_tgt.begin(), data_tgt.end(), child_validity_tgt}; + + auto const structs_validity_tgt = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; }); + auto const structs_tgt = structs_column_wrapper{{child_col_tgt}, structs_validity_tgt}.release(); + + auto const data_expected = std::vector{50, 40, 55, 70, 85, 90}; + auto const child_validity_expected = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; }); + auto child_col_expected = fixed_width_column_wrapper{ + data_tgt.begin(), data_tgt.end(), child_validity_expected}; + + auto const structs_validity_expected = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; }); + auto const structs_expected = + structs_column_wrapper{{child_col_expected}, structs_validity_expected}.release(); + + auto const source = cudf::table_view{std::vector{structs_src->view()}}; + auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; + auto const scatter_map = fixed_width_column_wrapper{}.release(); + + auto const result = cudf::scatter(source, scatter_map->view(), target); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs_expected->view(), result->get_column(0)); +} #if 0 TYPED_TEST(TypedStructScatterTest, TestSimpleStructGather) { From ab66753c5458b103b77191afe187f115bef55d4f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Mar 2021 14:55:21 -0600 Subject: [PATCH 11/32] Rewrite test functions --- cpp/tests/copying/scatter_struct_tests.cu | 63 +++++++++++++---------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/cpp/tests/copying/scatter_struct_tests.cu b/cpp/tests/copying/scatter_struct_tests.cu index cebabced7be..7f52549505a 100644 --- a/cpp/tests/copying/scatter_struct_tests.cu +++ b/cpp/tests/copying/scatter_struct_tests.cu @@ -51,6 +51,15 @@ using TestTypes = cudf::test::Concat struct column_wrapper_constructor { template @@ -106,48 +115,48 @@ auto get_expected_column(std::vector const& input_values, TYPED_TEST(TypedStructScatterTest, EmptyInputTest) { - auto child_col_src = fixed_width_column_wrapper{}; - auto const structs_src = structs_column_wrapper{{child_col_src}, std::vector{}}.release(); + auto child_col_src = fixed_width_column_wrapper{}; + auto child_col_tgt = fixed_width_column_wrapper{}; - auto child_col_tgt = fixed_width_column_wrapper{}; + auto const structs_src = structs_column_wrapper{{child_col_src}, std::vector{}}.release(); auto const structs_tgt = structs_column_wrapper{{child_col_tgt}, std::vector{}}.release(); - auto const source = cudf::table_view{std::vector{structs_src->view()}}; - auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; + auto const source = cudf::table_view{std::vector{structs_src->view()}}; + auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; + auto const scatter_map = fixed_width_column_wrapper{}.release(); + auto const result = cudf::scatter(source, scatter_map->view(), target); - auto const result = cudf::scatter(source, scatter_map->view(), target); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs_tgt->view(), result->get_column(0)); + check_columns(structs_src->view(), result->get_column(0)); + check_columns(structs_tgt->view(), result->get_column(0)); } TYPED_TEST(TypedStructScatterTest, EmptyScatterMapTest) { - auto const data_src = std::vector{5, 10, 15, 20, 25, 30}; - auto const child_validity_src = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); auto child_col_src = fixed_width_column_wrapper{ - data_src.begin(), data_src.end(), child_validity_src}; + {5, 10, 15, 20, 25, 30}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto child_col_tgt = fixed_width_column_wrapper{ + {50, 40, 55, 70, 85, 90}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_validity_src = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; }); - auto const structs_src = structs_column_wrapper{{child_col_src}, structs_validity_src}.release(); + auto const structs_src = structs_column_wrapper{ + {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 5; + })}.release(); - auto const data_tgt = std::vector{50, 40, 55, 70, 85, 90}; - auto const child_validity_tgt = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; }); - auto child_col_tgt = fixed_width_column_wrapper{ - data_tgt.begin(), data_tgt.end(), child_validity_tgt}; + auto const structs_tgt = structs_column_wrapper{ + {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); - auto const structs_validity_tgt = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; }); - auto const structs_tgt = structs_column_wrapper{{child_col_tgt}, structs_validity_tgt}.release(); + auto const source = cudf::table_view{std::vector{structs_src->view()}}; + auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; - auto const source = cudf::table_view{std::vector{structs_src->view()}}; - auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; auto const scatter_map = fixed_width_column_wrapper{}.release(); + auto const result = cudf::scatter(source, scatter_map->view(), target); - auto const result = cudf::scatter(source, scatter_map->view(), target); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs_tgt->view(), result->get_column(0)); + check_columns(structs_tgt->view(), result->get_column(0)); } TYPED_TEST(TypedStructScatterTest, SimpleScatterTest) @@ -188,7 +197,7 @@ TYPED_TEST(TypedStructScatterTest, SimpleScatterTest) auto const scatter_map = fixed_width_column_wrapper{}.release(); auto const result = cudf::scatter(source, scatter_map->view(), target); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs_expected->view(), result->get_column(0)); + check_columns(structs_expected->view(), result->get_column(0)); } #if 0 TYPED_TEST(TypedStructScatterTest, TestSimpleStructGather) From 3528b1083c466e3b22a230611db8e59211568a11 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 29 Mar 2021 15:16:53 -0600 Subject: [PATCH 12/32] Minor changes --- cpp/tests/copying/scatter_struct_tests.cu | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/cpp/tests/copying/scatter_struct_tests.cu b/cpp/tests/copying/scatter_struct_tests.cu index 7f52549505a..b7d6ce3a747 100644 --- a/cpp/tests/copying/scatter_struct_tests.cu +++ b/cpp/tests/copying/scatter_struct_tests.cu @@ -136,15 +136,14 @@ TYPED_TEST(TypedStructScatterTest, EmptyScatterMapTest) auto child_col_src = fixed_width_column_wrapper{ {5, 10, 15, 20, 25, 30}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; - auto child_col_tgt = fixed_width_column_wrapper{ - {50, 40, 55, 70, 85, 90}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_src = structs_column_wrapper{ {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })}.release(); + auto child_col_tgt = fixed_width_column_wrapper{ + {50, 40, 55, 70, 85, 90}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; auto const structs_tgt = structs_column_wrapper{ {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; @@ -192,11 +191,11 @@ TYPED_TEST(TypedStructScatterTest, SimpleScatterTest) auto const structs_expected = structs_column_wrapper{{child_col_expected}, structs_validity_expected}.release(); - auto const source = cudf::table_view{std::vector{structs_src->view()}}; - auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; - auto const scatter_map = fixed_width_column_wrapper{}.release(); + auto const source = cudf::table_view{std::vector{structs_src->view()}}; + auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; - auto const result = cudf::scatter(source, scatter_map->view(), target); + auto const scatter_map = fixed_width_column_wrapper{}.release(); + auto const result = cudf::scatter(source, scatter_map->view(), target); check_columns(structs_expected->view(), result->get_column(0)); } #if 0 From b3ee1a7a3d7f236e38498a3daecbed590bca8a10 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 09:34:35 -0600 Subject: [PATCH 13/32] Rewrite tests for struct scattering --- cpp/tests/copying/scatter_struct_tests.cu | 248 ++++++++++++---------- 1 file changed, 139 insertions(+), 109 deletions(-) diff --git a/cpp/tests/copying/scatter_struct_tests.cu b/cpp/tests/copying/scatter_struct_tests.cu index b7d6ce3a747..dec0cef52b5 100644 --- a/cpp/tests/copying/scatter_struct_tests.cu +++ b/cpp/tests/copying/scatter_struct_tests.cu @@ -32,13 +32,10 @@ #include -using column_vector = std::vector>; -using cudf::size_type; +// using column_vector = std::vector>; +// using cudf::size_type; using namespace cudf::test; -struct StructScatterTest : public cudf::test::BaseFixture { -}; - template struct TypedStructScatterTest : public cudf::test::BaseFixture { }; @@ -48,156 +45,189 @@ using TestTypes = cudf::test::Concat; -TYPED_TEST_CASE(TypedStructScatterTest, TestTypes); +TYPED_TEST_CASE(TypedStructScatterTest, int); -namespace { -void check_columns(cudf::column_view const& lhs, cudf::column_view const& rhs) +// Test case when all input columns are empty +TYPED_TEST(TypedStructScatterTest, EmptyInputTest) { - if (cudf::is_floating_point(lhs.type())) { - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(lhs, rhs); - } else { - CUDF_TEST_EXPECT_COLUMNS_EQUAL(lhs, rhs); - } -} + auto child_col_src = fixed_width_column_wrapper{}; + auto const structs_src = structs_column_wrapper{{child_col_src}, std::vector{}}.release(); + auto const source = cudf::table_view{std::vector{structs_src->view()}}; -template -struct column_wrapper_constructor { - template - auto operator()(ValueIter begin, ValueIter end, ValidityIter validity_begin) const - { - return cudf::test::fixed_width_column_wrapper{ - begin, end, validity_begin}; - } -}; + auto child_col_tgt = fixed_width_column_wrapper{}; + auto const structs_tgt = structs_column_wrapper{{child_col_tgt}, std::vector{}}.release(); + auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; -template <> -struct column_wrapper_constructor { - template - cudf::test::strings_column_wrapper operator()(ValueIter begin, - ValueIter end, - ValidityIter validity_begin) const - { - return cudf::test::strings_column_wrapper{begin, end, validity_begin}; - } -}; + auto const scatter_map = fixed_width_column_wrapper{}.release(); + auto const result = cudf::scatter(source, scatter_map->view(), target); -template -auto get_expected_column(std::vector const& input_values, - std::vector const& input_validity, - std::vector const& struct_validity, - std::vector const& scatter_map) -{ - auto is_valid = // Validity predicate. - [&input_values, &input_validity, &struct_validity, &scatter_map](auto gather_index) { - assert(gather_index >= 0 && gather_index < scatter_map.size() || - "Gather-index out of range."); - - auto i{scatter_map[gather_index]}; // Index into input_values. - - return (i >= 0 && i < static_cast(input_values.size())) && - (struct_validity.empty() || struct_validity[i]) && - (input_validity.empty() || input_validity[i]); - }; - - auto expected_row_count{scatter_map.size()}; - auto gather_iter = cudf::detail::make_counting_transform_iterator( - 0, [is_valid, &input_values, &scatter_map](auto i) { - return is_valid(i) ? input_values[scatter_map[i]] : SourceElementT{}; - }); - - return column_wrapper_constructor()( - gather_iter, - gather_iter + expected_row_count, - cudf::detail::make_counting_transform_iterator(0, is_valid)) - .release(); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_src->view(), result->get_column(0)); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_tgt->view(), result->get_column(0)); } -} // namespace -TYPED_TEST(TypedStructScatterTest, EmptyInputTest) +// Test case when only the scatter map is empty +TYPED_TEST(TypedStructScatterTest, EmptyScatterMapTest) { - auto child_col_src = fixed_width_column_wrapper{}; - auto child_col_tgt = fixed_width_column_wrapper{}; - - auto const structs_src = structs_column_wrapper{{child_col_src}, std::vector{}}.release(); - auto const structs_tgt = structs_column_wrapper{{child_col_tgt}, std::vector{}}.release(); + auto constexpr null = std::numeric_limits::max(); // Null child element + auto constexpr XXX = std::numeric_limits::max(); // Null struct element + auto child_col_src = fixed_width_column_wrapper{ + {0, 1, 2, 3, null, XXX}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto const structs_src = structs_column_wrapper{ + {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 5; + })}.release(); auto const source = cudf::table_view{std::vector{structs_src->view()}}; + + auto child_col_tgt = fixed_width_column_wrapper{ + {50, null, 70, XXX, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto const structs_tgt = structs_column_wrapper{ + {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; auto const scatter_map = fixed_width_column_wrapper{}.release(); auto const result = cudf::scatter(source, scatter_map->view(), target); - - check_columns(structs_src->view(), result->get_column(0)); - check_columns(structs_tgt->view(), result->get_column(0)); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_tgt->view(), result->get_column(0)); } -TYPED_TEST(TypedStructScatterTest, EmptyScatterMapTest) +TYPED_TEST(TypedStructScatterTest, ScatterAsCopyTest) { + auto constexpr null = std::numeric_limits::max(); // Null child element + auto constexpr XXX = std::numeric_limits::max(); // Null struct element + auto child_col_src = fixed_width_column_wrapper{ - {5, 10, 15, 20, 25, 30}, + {0, 1, 2, 3, null, XXX}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; auto const structs_src = structs_column_wrapper{ {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })}.release(); + auto const source = cudf::table_view{std::vector{structs_src->view()}}; auto child_col_tgt = fixed_width_column_wrapper{ - {50, 40, 55, 70, 85, 90}, + {50, null, 70, XXX, 90, 100}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; auto const structs_tgt = structs_column_wrapper{ {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); + auto target = cudf::table_view{std::vector{structs_tgt->view()}}; - auto const source = cudf::table_view{std::vector{structs_src->view()}}; - auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; - - auto const scatter_map = fixed_width_column_wrapper{}.release(); + // Scatter as copy: the target should be the same as source + auto const scatter_map = fixed_width_column_wrapper{0, 1, 2, 3, 4, 5}.release(); auto const result = cudf::scatter(source, scatter_map->view(), target); - - check_columns(structs_tgt->view(), result->get_column(0)); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_src->view(), result->get_column(0)); } -TYPED_TEST(TypedStructScatterTest, SimpleScatterTest) +TYPED_TEST(TypedStructScatterTest, ScatterAsLeftShiftTest) { - auto const data_src = std::vector{5, 10, 15, 20, 25, 30}; - auto const child_validity_src = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); - auto child_col_src = fixed_width_column_wrapper{ - data_src.begin(), data_src.end(), child_validity_src}; + auto constexpr null = std::numeric_limits::max(); // Null child element + auto constexpr XXX = std::numeric_limits::max(); // Null struct element - auto const structs_validity_src = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; }); - auto const structs_src = structs_column_wrapper{{child_col_src}, structs_validity_src}.release(); + auto child_col_src = fixed_width_column_wrapper{ + {0, 1, 2, 3, null, XXX}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto const structs_src = structs_column_wrapper{ + {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 5; + })}.release(); + auto const source = cudf::table_view{std::vector{structs_src->view()}}; - auto const data_tgt = std::vector{50, 40, 55, 70, 85, 90}; - auto const child_validity_tgt = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; }); auto child_col_tgt = fixed_width_column_wrapper{ - data_tgt.begin(), data_tgt.end(), child_validity_tgt}; - - auto const structs_validity_tgt = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; }); - auto const structs_tgt = structs_column_wrapper{{child_col_tgt}, structs_validity_tgt}.release(); + {50, null, 70, XXX, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto const structs_tgt = structs_column_wrapper{ + {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); + auto target = cudf::table_view{std::vector{structs_tgt->view()}}; - auto const data_expected = std::vector{50, 40, 55, 70, 85, 90}; - auto const child_validity_expected = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; }); auto child_col_expected = fixed_width_column_wrapper{ - data_tgt.begin(), data_tgt.end(), child_validity_expected}; + {2, 3, null, XXX, 0, 1}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; })}; + auto structs_expected = structs_column_wrapper{ + {child_col_expected}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); + + auto const scatter_map = fixed_width_column_wrapper{-2, -1, 0, 1, 2, 3}.release(); + auto const result = cudf::scatter(source, scatter_map->view(), target); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected->view(), result->get_column(0)); +} - auto const structs_validity_expected = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; }); - auto const structs_expected = - structs_column_wrapper{{child_col_expected}, structs_validity_expected}.release(); +TYPED_TEST(TypedStructScatterTest, PartiallyScatterTest) +{ + auto constexpr null = std::numeric_limits::max(); // Null child element + auto constexpr XXX = std::numeric_limits::max(); // Null struct element + auto child_col_src = fixed_width_column_wrapper{ + {0, 1, 2, 3, null, XXX}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto const structs_src = structs_column_wrapper{ + {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 5; + })}.release(); auto const source = cudf::table_view{std::vector{structs_src->view()}}; - auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; - auto const scatter_map = fixed_width_column_wrapper{}.release(); + auto child_col_tgt = fixed_width_column_wrapper{ + {50, null, 70, XXX, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto const structs_tgt = structs_column_wrapper{ + {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); + auto target = cudf::table_view{std::vector{structs_tgt->view()}}; + + auto child_col_expected = fixed_width_column_wrapper{ + {1, null, 70, 80, 0, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto structs_expected = structs_column_wrapper{ + {child_col_expected}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return true; + })}.release(); + + auto const scatter_map = fixed_width_column_wrapper{-2, 0}.release(); auto const result = cudf::scatter(source, scatter_map->view(), target); - check_columns(structs_expected->view(), result->get_column(0)); + + printf("line %d\n\n\n", __LINE__); + print(structs_expected->view()); + printf("line %d\n\n\n", __LINE__); + print(result->get_column(0)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected->view(), result->get_column(0)); } + +TYPED_TEST(TypedStructScatterTest, X) +{ +#if 0 + child_col_expected = fixed_width_column_wrapper{ + {50, 60, 70, 80, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + structs_expected = structs_column_wrapper{ + {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); + scatter_map = fixed_width_column_wrapper{}.release(); + result = cudf::scatter(source, scatter_map->view(), target); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected->view(), result->get_column(0)); + + child_col_expected = fixed_width_column_wrapper{ + {50, 60, 70, 80, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + structs_expected = structs_column_wrapper{ + {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); + scatter_map = fixed_width_column_wrapper{}.release(); + result = cudf::scatter(source, scatter_map->view(), target); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected->view(), result->get_column(0)); +#endif +} + #if 0 TYPED_TEST(TypedStructScatterTest, TestSimpleStructGather) { From 9663c3c03059a457cf36928ae949a4f733d88d04 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 11:11:02 -0600 Subject: [PATCH 14/32] Fix null mask: it should be copied from the target to the result before gathering --- cpp/include/cudf/detail/scatter.cuh | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 1ffff4dcc07..b5e75a2eba8 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -254,22 +255,21 @@ struct column_scatterer_impl { if (nullable) { auto const gather_map = scatter_to_gather(scatter_map_begin, scatter_map_end, source.size(), stream); - gather_bitmask( - // Table view of struct column. - cudf::table_view{ - std::vector{structs_src.child_begin(), structs_src.child_end()}}, - gather_map.begin(), - output_struct_members, - gather_bitmask_op::NULLIFY, - stream, - mr); + gather_bitmask(cudf::table_view{std::vector{structs_src.child_begin(), + structs_src.child_end()}}, + gather_map.begin(), + output_struct_members, + gather_bitmask_op::PASSTHROUGH, + stream, + mr); } return cudf::make_structs_column( - source.size(), + target.size(), std::move(output_struct_members), - 0, - rmm::device_buffer{0, stream, mr}, // Null mask will be fixed up in cudf::scatter(). + target.null_count(), + cudf::detail::copy_bitmask( + target, stream, mr), // Null mask will be fixed up in cudf::scatter(). stream, mr); } From cade29c1da5d9b34863db972f03ce29b76c447ae Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 13:24:42 -0600 Subject: [PATCH 15/32] Add some debugging, and fix null mask again --- cpp/include/cudf/detail/scatter.cuh | 110 ++++++++++++++++++++++++++-- 1 file changed, 103 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index b5e75a2eba8..101fb3425bf 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -35,6 +35,8 @@ #include +#include + namespace cudf { namespace detail { @@ -84,6 +86,31 @@ auto scatter_to_gather(MapIterator scatter_map_begin, return gather_map; } +template +rmm::device_uvector scatter_to_gather_inv(MapIterator scatter_map_begin, + MapIterator scatter_map_end, + size_type gather_rows, + rmm::cuda_stream_view stream) +{ + using MapValueType = typename thrust::iterator_traits::value_type; + + auto gather_map = rmm::device_uvector(gather_rows, stream); + thrust::sequence(rmm::exec_policy(stream), gather_map.begin(), gather_map.end(), 0); + + // Convert scatter map to a gather map + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(std::distance(scatter_map_begin, scatter_map_end)), + [gather_rows, ptr = gather_map.begin(), scatter_map_begin = scatter_map_begin] __device__( + MapValueType idx) { + MapValueType row = *(scatter_map_begin + idx); + ptr[row] = gather_rows; + }); + + return gather_map; +} + template struct column_scatterer_impl { std::unique_ptr operator()(column_view const& source, @@ -255,6 +282,30 @@ struct column_scatterer_impl { if (nullable) { auto const gather_map = scatter_to_gather(scatter_map_begin, scatter_map_end, source.size(), stream); + + int n = (int)std::distance(gather_map.begin(), gather_map.end()); + // thrust::host_vector h(gather_map.begin(), gather_map.end()); + printf("\n\n"); + // for (int i = 0; i < n; ++i) { printf("h: %d\n", gather_map.element(i, stream)); } + + printf("line %d \n\n", __LINE__); + cudf::test::print(*structs_src.child_begin()); + + printf("line %d \n\n", __LINE__); + cudf::test::print(*structs_target.child_begin()); + + printf("line %d \n\n", __LINE__); + cudf::test::print((*output_struct_members.begin())->view()); + + printf("map siE:%d\n", (int)std::distance(gather_map.begin(), gather_map.end())); + printf("\n\n"); + printf("num row: %d\n", source.size()); + printf("num c row: %d\n", structs_src.child_begin()->size()); + + printf("source null count: %d\n", (*structs_src.child_begin()).null_count()); + printf("target null count: %d\n", (*structs_target.child_begin()).null_count()); + printf("result null count: %d\n", (*output_struct_members.begin())->view().null_count()); + gather_bitmask(cudf::table_view{std::vector{structs_src.child_begin(), structs_src.child_end()}}, gather_map.begin(), @@ -262,16 +313,46 @@ struct column_scatterer_impl { gather_bitmask_op::PASSTHROUGH, stream, mr); + + printf("result null count again: %d\n", + (*output_struct_members.begin())->view().null_count()); + + printf("line %d \n\n", __LINE__); + cudf::test::print((*output_struct_members.begin())->view()); } - return cudf::make_structs_column( - target.size(), + std::vector> result; + result.emplace_back(cudf::make_structs_column( + source.size(), std::move(output_struct_members), - target.null_count(), - cudf::detail::copy_bitmask( - target, stream, mr), // Null mask will be fixed up in cudf::scatter(). + 0, + rmm::device_buffer{0, stream, mr}, // Null mask will be fixed up in cudf::scatter(). stream, - mr); + mr)); + + // Only gather bitmask from the target at the positions that have not been scatter onto + auto const gather_map = + scatter_to_gather_inv(scatter_map_begin, scatter_map_end, source.size(), stream); + gather_bitmask(table_view{std::vector{target}}, + gather_map.begin(), + result, + gather_bitmask_op::PASSTHROUGH, + stream, + mr); + + return std::move(result.front()); + + // std::vector> output_struct_members(structs_src.num_children()); + // for (auto& col : output_struct_members) { col->set_null_count(0); } + // + // return cudf::make_structs_column( + // source.size(), + // std::move(output_struct_members), + // target.null_count(), + // cudf::detail::copy_bitmask( + // target, stream, mr), // Null mask will be fixed up in cudf::scatter(). + // stream, + // mr); } }; @@ -362,14 +443,29 @@ std::unique_ptr
scatter( mr); }); + printf("line %d \n\n", __LINE__); + auto const nullable = std::any_of(source.begin(), source.end(), [](auto const& col) { return col.nullable(); }) or std::any_of(target.begin(), target.end(), [](auto const& col) { return col.nullable(); }); if (nullable) { + printf("nullable\n"); auto gather_map = scatter_to_gather( updated_scatter_map_begin, updated_scatter_map_end, target.num_rows(), stream); + + int n = (int)std::distance(gather_map.begin(), gather_map.end()); + // thrust::host_vector h(gather_map.begin(), gather_map.end()); + printf("\n\n"); + // for (int i = 0; i < n; ++i) { printf("gather map: %d\n", gather_map.element(i, stream)); } + printf("source null count: %d\n", (*source.begin()).null_count()); + printf("target null count: %d\n", (*target.begin()).null_count()); + printf("result null count: %d\n", (*result.begin())->null_count()); + gather_bitmask(source, gather_map.begin(), result, gather_bitmask_op::PASSTHROUGH, stream, mr); - } + + printf("result null count again: %d\n", (*result.begin())->null_count()); + } else + printf("no t nullable\n"); return std::make_unique
(std::move(result)); } } // namespace detail From 723d0eba1f11ba5fd0f92a6d512daba177a1eeaf Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 14:20:31 -0600 Subject: [PATCH 16/32] Finish scatter test for complex struct data --- cpp/tests/copying/scatter_struct_tests.cu | 144 ++++++++++++++++------ 1 file changed, 106 insertions(+), 38 deletions(-) diff --git a/cpp/tests/copying/scatter_struct_tests.cu b/cpp/tests/copying/scatter_struct_tests.cu index dec0cef52b5..90eb46f97f9 100644 --- a/cpp/tests/copying/scatter_struct_tests.cu +++ b/cpp/tests/copying/scatter_struct_tests.cu @@ -36,6 +36,9 @@ // using cudf::size_type; using namespace cudf::test; +struct StructScatterTest : public cudf::test::BaseFixture { +}; + template struct TypedStructScatterTest : public cudf::test::BaseFixture { }; @@ -120,6 +123,7 @@ TYPED_TEST(TypedStructScatterTest, ScatterAsCopyTest) // Scatter as copy: the target should be the same as source auto const scatter_map = fixed_width_column_wrapper{0, 1, 2, 3, 4, 5}.release(); auto const result = cudf::scatter(source, scatter_map->view(), target); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_src->view(), result->get_column(0)); } @@ -144,7 +148,7 @@ TYPED_TEST(TypedStructScatterTest, ScatterAsLeftShiftTest) {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); - auto target = cudf::table_view{std::vector{structs_tgt->view()}}; + auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; auto child_col_expected = fixed_width_column_wrapper{ {2, 3, null, XXX, 0, 1}, @@ -159,7 +163,7 @@ TYPED_TEST(TypedStructScatterTest, ScatterAsLeftShiftTest) CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected->view(), result->get_column(0)); } -TYPED_TEST(TypedStructScatterTest, PartiallyScatterTest) +TYPED_TEST(TypedStructScatterTest, SimpleScatterTests) { auto constexpr null = std::numeric_limits::max(); // Null child element auto constexpr XXX = std::numeric_limits::max(); // Null struct element @@ -182,50 +186,114 @@ TYPED_TEST(TypedStructScatterTest, PartiallyScatterTest) })}.release(); auto target = cudf::table_view{std::vector{structs_tgt->view()}}; - auto child_col_expected = fixed_width_column_wrapper{ - {1, null, 70, 80, 0, 100}, + auto child_col_expected1 = fixed_width_column_wrapper{ + {1, null, 70, XXX, 0, 2}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto structs_expected = structs_column_wrapper{ - {child_col_expected}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + auto const structs_expected1 = structs_column_wrapper{ + {child_col_expected1}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); + auto const scatter_map1 = fixed_width_column_wrapper{-2, 0, 5}.release(); + auto const result1 = cudf::scatter(source, scatter_map1->view(), target); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected1->view(), result1->get_column(0)); + + auto child_col_expected2 = fixed_width_column_wrapper{ + {1, null, 70, 3, 0, 2}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto const structs_expected2 = structs_column_wrapper{ + {child_col_expected2}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; })}.release(); + auto const scatter_map2 = fixed_width_column_wrapper{-2, 0, 5, 3}.release(); + auto const result2 = cudf::scatter(source, scatter_map2->view(), target); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected2->view(), result2->get_column(0)); +} - auto const scatter_map = fixed_width_column_wrapper{-2, 0}.release(); - auto const result = cudf::scatter(source, scatter_map->view(), target); +TYPED_TEST(TypedStructScatterTest, ComplexDataTest) +{ + // Testing scatter() on struct. - printf("line %d\n\n\n", __LINE__); - print(structs_expected->view()); - printf("line %d\n\n\n", __LINE__); - print(result->get_column(0)); + // 1. String "names" column. + auto const names_src = + std::vector{"Newton", "Washington", "Cherry", "Kiwi", "Lemon", "Tomato"}; + auto const names_validity_src = std::vector{1, 1, 1, 1, 1, 1}; + auto names_column_src = + strings_column_wrapper{names_src.begin(), names_src.end(), names_validity_src.begin()}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected->view(), result->get_column(0)); -} + // 2. Numeric "ages" column. + auto const ages_src = std::vector{5, 10, 15, 20, 25, 30}; + auto const ages_validity_src = std::vector{1, 1, 1, 1, 0, 1}; + auto ages_column_src = fixed_width_column_wrapper{ + ages_src.begin(), ages_src.end(), ages_validity_src.begin()}; -TYPED_TEST(TypedStructScatterTest, X) -{ -#if 0 - child_col_expected = fixed_width_column_wrapper{ - {50, 60, 70, 80, 90, 100}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - structs_expected = structs_column_wrapper{ - {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 3; - })}.release(); - scatter_map = fixed_width_column_wrapper{}.release(); - result = cudf::scatter(source, scatter_map->view(), target); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected->view(), result->get_column(0)); + // 3. Boolean "is_human" column. + auto const is_human_src = {true, true, false, false, false, false}; + auto const is_human_validity_src = std::vector{1, 1, 1, 0, 1, 1}; + auto is_human_col_src = fixed_width_column_wrapper{ + is_human_src.begin(), is_human_src.end(), is_human_validity_src.begin()}; - child_col_expected = fixed_width_column_wrapper{ - {50, 60, 70, 80, 90, 100}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - structs_expected = structs_column_wrapper{ - {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 3; - })}.release(); - scatter_map = fixed_width_column_wrapper{}.release(); - result = cudf::scatter(source, scatter_map->view(), target); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected->view(), result->get_column(0)); -#endif + // Assemble struct column. + auto const struct_validity_src = std::vector{1, 1, 1, 1, 1, 0}; + auto structs_src = structs_column_wrapper{{names_column_src, ages_column_src, is_human_col_src}, + struct_validity_src.begin()} + .release(); + + // 1. String "names" column. + auto const names_tgt = std::vector{ + "String 0", "String 1", "String 2", "String 3", "String 4", "String 5"}; + auto const names_validity_tgt = std::vector{0, 1, 1, 1, 1, 1}; + auto names_column_tgt = + strings_column_wrapper{names_tgt.begin(), names_tgt.end(), names_validity_tgt.begin()}; + + // 2. Numeric "ages" column. + auto const ages_tgt = std::vector{50, 60, 70, 80, 90, 100}; + auto const ages_validity_tgt = std::vector{1, 0, 1, 1, 1, 1}; + auto ages_column_tgt = fixed_width_column_wrapper{ + ages_tgt.begin(), ages_tgt.end(), ages_validity_tgt.begin()}; + + // 3. Boolean "is_human" column. + auto const is_human_tgt = {true, true, true, true, true, true}; + auto const is_human_validity_tgt = std::vector{1, 1, 1, 1, 1, 1}; + auto is_human_col_tgt = fixed_width_column_wrapper{ + is_human_tgt.begin(), is_human_tgt.end(), is_human_validity_tgt.begin()}; + + // Assemble struct column. + auto const struct_validity_tgt = std::vector{1, 1, 0, 1, 1, 1}; + auto structs_tgt = structs_column_wrapper{{names_column_tgt, ages_column_tgt, is_human_col_tgt}, + struct_validity_tgt.begin()} + .release(); + + // 1. String "names" column. + auto const names_expected = + std::vector{"String 0", "Lemon", "Kiwi", "Cherry", "Washington", "Newton"}; + auto const names_validity_expected = std::vector{0, 1, 1, 1, 1, 1}; + auto names_column_expected = strings_column_wrapper{ + names_expected.begin(), names_expected.end(), names_validity_expected.begin()}; + + // 2. Numeric "ages" column. + auto const ages_expected = std::vector{50, 25, 20, 15, 10, 5}; + auto const ages_validity_expected = std::vector{1, 0, 1, 1, 1, 1}; + auto ages_column_expected = fixed_width_column_wrapper{ + ages_expected.begin(), ages_expected.end(), ages_validity_expected.begin()}; + + // 3. Boolean "is_human" column. + auto const is_human_expected = {true, false, false, false, true, true}; + auto const is_human_validity_expected = std::vector{1, 1, 0, 1, 1, 1}; + auto is_human_col_expected = fixed_width_column_wrapper{ + is_human_expected.begin(), is_human_expected.end(), is_human_validity_expected.begin()}; + + // Assemble struct column. + auto const struct_validity_expected = std::vector{1, 1, 1, 1, 1, 1}; + auto structs_expected = + structs_column_wrapper{{names_column_expected, ages_column_expected, is_human_col_expected}, + struct_validity_expected.begin()} + .release(); + + auto const scatter_map = fixed_width_column_wrapper{-1, 4, 3, 2, 1}.release(); + auto const source = cudf::table_view{std::vector{structs_src->view()}}; + auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; + auto const result = cudf::scatter(source, scatter_map->view(), target); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected->view(), result->get_column(0), true); } #if 0 From 079d60b25eca6ce839cec114b9e189d93eba0eb7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 15:00:24 -0600 Subject: [PATCH 17/32] Add scatter test for struct of lists --- cpp/include/cudf/detail/scatter.cuh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 101fb3425bf..dec3eaea7de 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -333,6 +333,8 @@ struct column_scatterer_impl { // Only gather bitmask from the target at the positions that have not been scatter onto auto const gather_map = scatter_to_gather_inv(scatter_map_begin, scatter_map_end, source.size(), stream); + int n = (int)std::distance(gather_map.begin(), gather_map.end()); + for (int i = 0; i < n; ++i) { printf("h n: %d\n", gather_map.element(i, stream)); } gather_bitmask(table_view{std::vector{target}}, gather_map.begin(), result, From 35d643007824bc881a055ebb928d10479551d8a4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 15:51:37 -0600 Subject: [PATCH 18/32] Remove redundant headers, and rewrite tests --- cpp/tests/copying/scatter_struct_tests.cu | 471 +++++----------------- 1 file changed, 98 insertions(+), 373 deletions(-) diff --git a/cpp/tests/copying/scatter_struct_tests.cu b/cpp/tests/copying/scatter_struct_tests.cu index 90eb46f97f9..f5dd80b8e1a 100644 --- a/cpp/tests/copying/scatter_struct_tests.cu +++ b/cpp/tests/copying/scatter_struct_tests.cu @@ -19,25 +19,16 @@ #include #include -#include #include #include #include -#include #include -#include #include -#include - #include -// using column_vector = std::vector>; -// using cudf::size_type; using namespace cudf::test; - -struct StructScatterTest : public cudf::test::BaseFixture { -}; +using structs_col = structs_column_wrapper; template struct TypedStructScatterTest : public cudf::test::BaseFixture { @@ -50,168 +41,170 @@ using TestTypes = cudf::test::Concat const& structs_src, + std::unique_ptr const& structs_tgt, + std::unique_ptr const& structs_expected, + std::unique_ptr const& scatter_map) +{ + auto const source = cudf::table_view{std::vector{structs_src->view()}}; + auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; + auto const result = cudf::scatter(source, scatter_map->view(), target); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected->view(), result->get_column(0)); +} +} // namespace + // Test case when all input columns are empty TYPED_TEST(TypedStructScatterTest, EmptyInputTest) { - auto child_col_src = fixed_width_column_wrapper{}; - auto const structs_src = structs_column_wrapper{{child_col_src}, std::vector{}}.release(); - auto const source = cudf::table_view{std::vector{structs_src->view()}}; + using col_wrapper = cudf::test::fixed_width_column_wrapper; + + auto child_col_src = col_wrapper{}; + auto const structs_src = structs_col{{child_col_src}, std::vector{}}.release(); - auto child_col_tgt = fixed_width_column_wrapper{}; + auto child_col_tgt = col_wrapper{}; auto const structs_tgt = structs_column_wrapper{{child_col_tgt}, std::vector{}}.release(); - auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; auto const scatter_map = fixed_width_column_wrapper{}.release(); - auto const result = cudf::scatter(source, scatter_map->view(), target); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_src->view(), result->get_column(0)); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_tgt->view(), result->get_column(0)); + test_scatter(structs_src, structs_tgt, structs_src, scatter_map); + test_scatter(structs_src, structs_tgt, structs_tgt, scatter_map); } // Test case when only the scatter map is empty TYPED_TEST(TypedStructScatterTest, EmptyScatterMapTest) { + using col_wrapper = cudf::test::fixed_width_column_wrapper; auto constexpr null = std::numeric_limits::max(); // Null child element auto constexpr XXX = std::numeric_limits::max(); // Null struct element - auto child_col_src = fixed_width_column_wrapper{ - {0, 1, 2, 3, null, XXX}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto child_col_src = + col_wrapper{{0, 1, 2, 3, null, XXX}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; auto const structs_src = structs_column_wrapper{ {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })}.release(); - auto const source = cudf::table_view{std::vector{structs_src->view()}}; - auto child_col_tgt = fixed_width_column_wrapper{ - {50, null, 70, XXX, 90, 100}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto child_col_tgt = + col_wrapper{{50, null, 70, XXX, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; auto const structs_tgt = structs_column_wrapper{ {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); - auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; auto const scatter_map = fixed_width_column_wrapper{}.release(); - auto const result = cudf::scatter(source, scatter_map->view(), target); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_tgt->view(), result->get_column(0)); + test_scatter(structs_src, structs_tgt, structs_tgt, scatter_map); } TYPED_TEST(TypedStructScatterTest, ScatterAsCopyTest) { + using col_wrapper = cudf::test::fixed_width_column_wrapper; auto constexpr null = std::numeric_limits::max(); // Null child element auto constexpr XXX = std::numeric_limits::max(); // Null struct element - auto child_col_src = fixed_width_column_wrapper{ - {0, 1, 2, 3, null, XXX}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto child_col_src = + col_wrapper{{0, 1, 2, 3, null, XXX}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; auto const structs_src = structs_column_wrapper{ {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })}.release(); - auto const source = cudf::table_view{std::vector{structs_src->view()}}; - auto child_col_tgt = fixed_width_column_wrapper{ - {50, null, 70, XXX, 90, 100}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto child_col_tgt = + col_wrapper{{50, null, 70, XXX, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; auto const structs_tgt = structs_column_wrapper{ {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); - auto target = cudf::table_view{std::vector{structs_tgt->view()}}; // Scatter as copy: the target should be the same as source auto const scatter_map = fixed_width_column_wrapper{0, 1, 2, 3, 4, 5}.release(); - auto const result = cudf::scatter(source, scatter_map->view(), target); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_src->view(), result->get_column(0)); + test_scatter(structs_src, structs_tgt, structs_src, scatter_map); } TYPED_TEST(TypedStructScatterTest, ScatterAsLeftShiftTest) { + using col_wrapper = cudf::test::fixed_width_column_wrapper; auto constexpr null = std::numeric_limits::max(); // Null child element auto constexpr XXX = std::numeric_limits::max(); // Null struct element - auto child_col_src = fixed_width_column_wrapper{ - {0, 1, 2, 3, null, XXX}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto child_col_src = + col_wrapper{{0, 1, 2, 3, null, XXX}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; auto const structs_src = structs_column_wrapper{ {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })}.release(); - auto const source = cudf::table_view{std::vector{structs_src->view()}}; - auto child_col_tgt = fixed_width_column_wrapper{ - {50, null, 70, XXX, 90, 100}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto child_col_tgt = + col_wrapper{{50, null, 70, XXX, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; auto const structs_tgt = structs_column_wrapper{ {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); - auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; - auto child_col_expected = fixed_width_column_wrapper{ - {2, 3, null, XXX, 0, 1}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; })}; + auto child_col_expected = + col_wrapper{{2, 3, null, XXX, 0, 1}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; })}; auto structs_expected = structs_column_wrapper{ {child_col_expected}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); auto const scatter_map = fixed_width_column_wrapper{-2, -1, 0, 1, 2, 3}.release(); - auto const result = cudf::scatter(source, scatter_map->view(), target); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected->view(), result->get_column(0)); + test_scatter(structs_src, structs_tgt, structs_expected, scatter_map); } TYPED_TEST(TypedStructScatterTest, SimpleScatterTests) { + using col_wrapper = cudf::test::fixed_width_column_wrapper; auto constexpr null = std::numeric_limits::max(); // Null child element auto constexpr XXX = std::numeric_limits::max(); // Null struct element - auto child_col_src = fixed_width_column_wrapper{ - {0, 1, 2, 3, null, XXX}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto child_col_src = + col_wrapper{{0, 1, 2, 3, null, XXX}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; auto const structs_src = structs_column_wrapper{ {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })}.release(); - auto const source = cudf::table_view{std::vector{structs_src->view()}}; - auto child_col_tgt = fixed_width_column_wrapper{ - {50, null, 70, XXX, 90, 100}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto child_col_tgt = + col_wrapper{{50, null, 70, XXX, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; auto const structs_tgt = structs_column_wrapper{ {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); - auto target = cudf::table_view{std::vector{structs_tgt->view()}}; - auto child_col_expected1 = fixed_width_column_wrapper{ - {1, null, 70, XXX, 0, 2}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto child_col_expected1 = + col_wrapper{{1, null, 70, XXX, 0, 2}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; auto const structs_expected1 = structs_column_wrapper{ {child_col_expected1}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); auto const scatter_map1 = fixed_width_column_wrapper{-2, 0, 5}.release(); - auto const result1 = cudf::scatter(source, scatter_map1->view(), target); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected1->view(), result1->get_column(0)); + test_scatter(structs_src, structs_tgt, structs_expected1, scatter_map1); - auto child_col_expected2 = fixed_width_column_wrapper{ - {1, null, 70, 3, 0, 2}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto child_col_expected2 = + col_wrapper{{1, null, 70, 3, 0, 2}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; auto const structs_expected2 = structs_column_wrapper{ {child_col_expected2}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; })}.release(); auto const scatter_map2 = fixed_width_column_wrapper{-2, 0, 5, 3}.release(); - auto const result2 = cudf::scatter(source, scatter_map2->view(), target); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected2->view(), result2->get_column(0)); + test_scatter(structs_src, structs_tgt, structs_expected2, scatter_map2); } -TYPED_TEST(TypedStructScatterTest, ComplexDataTest) +TYPED_TEST(TypedStructScatterTest, ComplexDataScatterTest) { // Testing scatter() on struct. + using col_wrapper = cudf::test::fixed_width_column_wrapper; // 1. String "names" column. auto const names_src = @@ -223,8 +216,7 @@ TYPED_TEST(TypedStructScatterTest, ComplexDataTest) // 2. Numeric "ages" column. auto const ages_src = std::vector{5, 10, 15, 20, 25, 30}; auto const ages_validity_src = std::vector{1, 1, 1, 1, 0, 1}; - auto ages_column_src = fixed_width_column_wrapper{ - ages_src.begin(), ages_src.end(), ages_validity_src.begin()}; + auto ages_column_src = col_wrapper{ages_src.begin(), ages_src.end(), ages_validity_src.begin()}; // 3. Boolean "is_human" column. auto const is_human_src = {true, true, false, false, false, false}; @@ -248,8 +240,7 @@ TYPED_TEST(TypedStructScatterTest, ComplexDataTest) // 2. Numeric "ages" column. auto const ages_tgt = std::vector{50, 60, 70, 80, 90, 100}; auto const ages_validity_tgt = std::vector{1, 0, 1, 1, 1, 1}; - auto ages_column_tgt = fixed_width_column_wrapper{ - ages_tgt.begin(), ages_tgt.end(), ages_validity_tgt.begin()}; + auto ages_column_tgt = col_wrapper{ages_tgt.begin(), ages_tgt.end(), ages_validity_tgt.begin()}; // 3. Boolean "is_human" column. auto const is_human_tgt = {true, true, true, true, true, true}; @@ -273,8 +264,8 @@ TYPED_TEST(TypedStructScatterTest, ComplexDataTest) // 2. Numeric "ages" column. auto const ages_expected = std::vector{50, 25, 20, 15, 10, 5}; auto const ages_validity_expected = std::vector{1, 0, 1, 1, 1, 1}; - auto ages_column_expected = fixed_width_column_wrapper{ - ages_expected.begin(), ages_expected.end(), ages_validity_expected.begin()}; + auto ages_column_expected = + col_wrapper{ages_expected.begin(), ages_expected.end(), ages_validity_expected.begin()}; // 3. Boolean "is_human" column. auto const is_human_expected = {true, false, false, false, true, true}; @@ -289,324 +280,58 @@ TYPED_TEST(TypedStructScatterTest, ComplexDataTest) struct_validity_expected.begin()} .release(); + // The first element of the target is not overwritten auto const scatter_map = fixed_width_column_wrapper{-1, 4, 3, 2, 1}.release(); - auto const source = cudf::table_view{std::vector{structs_src->view()}}; - auto const target = cudf::table_view{std::vector{structs_tgt->view()}}; - auto const result = cudf::scatter(source, scatter_map->view(), target); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(structs_expected->view(), result->get_column(0), true); + test_scatter(structs_src, structs_tgt, structs_expected, scatter_map); } -#if 0 -TYPED_TEST(TypedStructScatterTest, TestSimpleStructGather) +TYPED_TEST(TypedStructScatterTest, ScatterStructOfListsTest) { - // Testing gather() on struct. - - // 1. String "names" column. - auto const names = - std::vector{"Vimes", "Carrot", "Angua", "Cheery", "Detritus", "Slant"}; - auto const names_validity = std::vector{1, 1, 1, 1, 1, 1}; - auto names_column = strings_column_wrapper{names.begin(), names.end(), names_validity.begin()}; - - // 2. Numeric "ages" column. - auto const ages = std::vector{5, 10, 15, 20, 25, 30}; - auto const ages_validity = std::vector{1, 1, 1, 1, 0, 1}; - auto ages_column = - fixed_width_column_wrapper{ages.begin(), ages.end(), ages_validity.begin()}; - - // 3. Boolean "is_human" column. - auto const is_human = {true, true, false, false, false, false}; - auto const is_human_validity = std::vector{1, 1, 1, 0, 1, 1}; - auto is_human_col = - fixed_width_column_wrapper{is_human.begin(), is_human.end(), is_human_validity.begin()}; - - // Assemble struct column. - auto const struct_validity = std::vector{1, 1, 1, 1, 1, 0}; - auto struct_column = - structs_column_wrapper{{names_column, ages_column, is_human_col}, struct_validity.begin()} - .release(); - - // Gather to new struct column. - auto const scatter_map = std::vector{-1, 4, 3, 2, 1}; - auto const gather_map_col = - fixed_width_column_wrapper(scatter_map.begin(), scatter_map.end()).release(); - - auto const gathered_table = - cudf::gather(cudf::table_view{std::vector{struct_column->view()}}, - gather_map_col->view()); - - auto const gathered_struct_col = gathered_table->get_column(0); - auto const gathered_struct_col_view = cudf::structs_column_view{gathered_struct_col}; - - // Verify that the gathered struct's fields are as expected. - - auto expected_names_column = - get_expected_column(names, names_validity, struct_validity, scatter_map); - expect_columns_equivalent(*expected_names_column, gathered_struct_col.child(0)); - - auto expected_ages_column = - get_expected_column(ages, ages_validity, struct_validity, scatter_map); - expect_columns_equivalent(*expected_ages_column, gathered_struct_col.child(1)); - - auto expected_bool_column = - get_expected_column(std::vector(is_human.begin(), is_human.end()), - is_human_validity, - struct_validity, - scatter_map); - expect_columns_equivalent(*expected_bool_column, gathered_struct_col.child(2)); - - std::vector> expected_columns; - expected_columns.push_back(std::move(expected_names_column)); - expected_columns.push_back(std::move(expected_ages_column)); - expected_columns.push_back(std::move(expected_bool_column)); - auto const expected_struct_column = - structs_column_wrapper{std::move(expected_columns), std::vector{0, 1, 1, 1, 1}}.release(); - - expect_columns_equivalent(*expected_struct_column, gathered_struct_col); -} - -TYPED_TEST(TypedStructScatterTest, TestGatherStructOfLists) -{ - using namespace cudf::test; - // Testing gather() on struct> + using col_wrapper = cudf::test::fixed_width_column_wrapper; auto lists_column_exemplar = []() { return lists_column_wrapper{ {{5}, {10, 15}, {20, 25, 30}, {35, 40, 45, 50}, {55, 60, 65}, {70, 75}, {80}, {}, {}}, + // Valid for elements 0, 3, 6,... cudf::detail::make_counting_transform_iterator(0, [](auto i) { return !(i % 3); })}; }; auto lists_column = std::make_unique(cudf::column(lists_column_exemplar(), 0)); - // Assemble struct column. - std::vector> column_vector; - column_vector.push_back(std::move(lists_column)); - auto const struct_column = structs_column_wrapper{std::move(column_vector)}.release(); - - // Gather to new struct column. - auto const scatter_map = std::vector{-1, 4, 3, 2, 1, 7, 3}; - auto const gather_map_col = - fixed_width_column_wrapper(scatter_map.begin(), scatter_map.end()).release(); - - auto const gathered_table = - cudf::gather(cudf::table_view{std::vector{struct_column->view()}}, - gather_map_col->view()); - - auto const gathered_struct_col = gathered_table->get_column(0); - auto const gathered_struct_col_view = cudf::structs_column_view{gathered_struct_col}; - - // Verify that the gathered struct column's list member presents as if - // it had itself been gathered individually. - - auto const list_column_before_gathering = lists_column_exemplar().release(); - - auto const expected_gathered_list_column = - cudf::gather( - cudf::table_view{std::vector{list_column_before_gathering->view()}}, - gather_map_col->view()) - ->get_column(0); - - expect_columns_equivalent(expected_gathered_list_column.view(), gathered_struct_col.child(0)); -} - -TYPED_TEST(TypedStructScatterTest, TestGatherStructOfListsOfLists) -{ - using namespace cudf::test; - - // Testing gather() on struct>> + auto lists_column_exemplar_tgt = []() { + return lists_column_wrapper{ + {{1}, {2, 3}, {4, 5, 6}, {7, 8}, {9}, {10, 11, 12, 13}, {}, {14}, {15, 16}}, + // Valid for elements 1, 3, 5, 7,... + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; })}; + }; + auto lists_column_tgt = + std::make_unique(cudf::column(lists_column_exemplar_tgt(), 0)); - auto const lists_column_exemplar = []() { + auto const validity_expected = std::vector{0, 1, 1, 0, 0, 1, 1, 0, 0}; + auto lists_column_exemplar_expected = [validity_expected]() { return lists_column_wrapper{ - {{{5, 5}}, - {{10, 15}}, - {{20, 25}, {30}}, - {{35, 40}, {45, 50}}, - {{55}, {60, 65}}, - {{70, 75}}, - {{80, 80}}, - {}, - {}}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return !(i % 3); })}; + {{1}, {2, 3}, {80}, {70, 75}, {55, 60, 65}, {35, 40, 45, 50}, {5}, {10, 15}, {20, 25, 30}}, + validity_expected.begin()}; }; - auto lists_column = std::make_unique(cudf::column(lists_column_exemplar(), 0)); + auto lists_column_expected = + std::make_unique(cudf::column(lists_column_exemplar_expected(), 0)); // Assemble struct column. std::vector> column_vector; column_vector.push_back(std::move(lists_column)); - auto const struct_column = structs_column_wrapper{std::move(column_vector)}.release(); - - // Gather to new struct column. - auto const scatter_map = std::vector{-1, 4, 3, 2, 1, 7, 3}; - auto const gather_map_col = - fixed_width_column_wrapper(scatter_map.begin(), scatter_map.end()).release(); - - auto const gathered_table = - cudf::gather(cudf::table_view{std::vector{struct_column->view()}}, - gather_map_col->view()); - - auto const gathered_struct_col = gathered_table->get_column(0); - auto const gathered_struct_col_view = cudf::structs_column_view{gathered_struct_col}; - - // Verify that the gathered struct column's list member presents as if - // it had itself been gathered individually. - - auto const list_column_before_gathering = lists_column_exemplar().release(); - - auto const expected_gathered_list_column = - cudf::gather( - cudf::table_view{std::vector{list_column_before_gathering->view()}}, - gather_map_col->view()) - ->get_column(0); - - expect_columns_equivalent(expected_gathered_list_column.view(), gathered_struct_col.child(0)); -} - -TYPED_TEST(TypedStructScatterTest, TestGatherStructOfStructs) -{ - using namespace cudf::test; - - // Testing gather() on struct> - - auto const numeric_column_exemplar = []() { - return fixed_width_column_wrapper{ - {5, 10, 15, 20, 25, 30, 35, 45, 50, 55, 60, 65, 70, 75}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return !(i % 3); })}; - }; - - auto numeric_column = numeric_column_exemplar(); - auto structs_column = structs_column_wrapper{{numeric_column}}; - - auto const struct_of_structs_column = structs_column_wrapper{{structs_column}}.release(); - - // Gather to new struct column. - auto const scatter_map = std::vector{-1, 4, 3, 2, 1, 7, 3}; - auto const gather_map_col = - fixed_width_column_wrapper(scatter_map.begin(), scatter_map.end()).release(); + auto const structs_src = structs_column_wrapper{std::move(column_vector)}.release(); - auto const gathered_table = - cudf::gather(cudf::table_view{std::vector{struct_of_structs_column->view()}}, - gather_map_col->view()); + std::vector> column_vector_tgt; + column_vector_tgt.push_back(std::move(lists_column_tgt)); + auto const structs_tgt = structs_column_wrapper{std::move(column_vector_tgt)}.release(); - auto const gathered_struct_col = gathered_table->get_column(0); - auto const gathered_struct_col_view = cudf::structs_column_view{gathered_struct_col}; + std::vector> column_vector_expected; + column_vector_expected.push_back(std::move(lists_column_expected)); + auto const structs_expected = structs_column_wrapper{std::move(column_vector_expected)}.release(); - // Verify that the underlying numeric column presents as if - // it had itself been gathered individually. - - auto const numeric_column_before_gathering = numeric_column_exemplar().release(); - auto const expected_gathered_column = - cudf::gather( - cudf::table_view{std::vector{numeric_column_before_gathering->view()}}, - gather_map_col->view()) - ->get_column(0); - - expect_columns_equivalent(expected_gathered_column, gathered_struct_col.child(0).child(0).view()); -} - -TYPED_TEST(TypedStructScatterTest, TestGatherStructOfListOfStructs) -{ - using namespace cudf::test; - - // Testing gather() on struct> - - auto const numeric_column_exemplar = []() { - return fixed_width_column_wrapper{ - {5, 10, 15, 20, 25, 30, 35, 45, 50, 55, 60, 65, 70, 75}}; - }; - - auto numeric_column = numeric_column_exemplar(); - auto structs_column = structs_column_wrapper{{numeric_column}}.release(); - auto list_of_structs_column = cudf::make_lists_column( - 7, - fixed_width_column_wrapper{0, 2, 4, 6, 8, 10, 12, 14}.release(), - std::move(structs_column), - cudf::UNKNOWN_NULL_COUNT, - {}); - - std::vector> column_vector; - column_vector.push_back(std::move(list_of_structs_column)); - auto const struct_of_list_of_structs = structs_column_wrapper{std::move(column_vector)}.release(); - - // Gather to new struct column. - auto const scatter_map = std::vector{-1, 4, 3, 2, 1}; - auto const gather_map_col = - fixed_width_column_wrapper(scatter_map.begin(), scatter_map.end()).release(); - - auto const gathered_table = cudf::gather( - cudf::table_view{std::vector{struct_of_list_of_structs->view()}}, - gather_map_col->view()); - - auto const gathered_struct_col = gathered_table->get_column(0); - auto const gathered_struct_col_view = cudf::structs_column_view{gathered_struct_col}; - - // Construct expected gather result. - - auto expected_numeric_col = - fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; - auto expected_struct_col = structs_column_wrapper{{expected_numeric_col}}.release(); - auto expected_list_of_structs_column = - cudf::make_lists_column(5, - fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release(), - std::move(expected_struct_col), - cudf::UNKNOWN_NULL_COUNT, - {}); - std::vector> expected_vector_of_columns; - expected_vector_of_columns.push_back(std::move(expected_list_of_structs_column)); - auto const expected_struct_of_list_of_struct = - structs_column_wrapper{std::move(expected_vector_of_columns)}.release(); - - expect_columns_equivalent(expected_struct_of_list_of_struct->view(), gathered_struct_col.view()); -} - -TYPED_TEST(TypedStructScatterTest, TestGatherStructOfStructsWithValidity) -{ - using namespace cudf::test; - - // Testing gather() on struct> - - // Factory to construct numeric column with configurable null-mask. - auto const numeric_column_exemplar = [](nvstd::function pred) { - return fixed_width_column_wrapper{ - {5, 10, 15, 20, 25, 30, 35, 45, 50, 55, 60, 65, 70, 75}, - cudf::detail::make_counting_transform_iterator(0, [=](auto i) { return pred(i); })}; - }; - - // Validity predicates. - auto const every_3rd_element_null = [](size_type i) { return !(i % 3); }; - auto const twelfth_element_null = [](size_type i) { return i != 11; }; - - // Construct struct-of-struct-of-numerics. - auto numeric_column = numeric_column_exemplar(every_3rd_element_null); - auto structs_column = structs_column_wrapper{ - {numeric_column}, cudf::detail::make_counting_transform_iterator(0, twelfth_element_null)}; - auto struct_of_structs_column = structs_column_wrapper{{structs_column}}.release(); - - // Gather to new struct column. - auto const scatter_map = std::vector{-1, 4, 3, 2, 1, 7, 3}; - auto const gather_map_col = - fixed_width_column_wrapper(scatter_map.begin(), scatter_map.end()).release(); - - auto const gathered_table = - cudf::gather(cudf::table_view{std::vector{struct_of_structs_column->view()}}, - gather_map_col->view()); - - auto const gathered_struct_col = gathered_table->get_column(0); - auto const gathered_struct_col_view = cudf::structs_column_view{gathered_struct_col}; - - // Verify that the underlying numeric column presents as if - // it had itself been gathered individually. - - auto const final_predicate = [=](size_type i) { - return every_3rd_element_null(i) && twelfth_element_null(i); - }; - auto const numeric_column_before_gathering = numeric_column_exemplar(final_predicate).release(); - auto const expected_gathered_column = - cudf::gather( - cudf::table_view{std::vector{numeric_column_before_gathering->view()}}, - gather_map_col->view()) - ->get_column(0); - - expect_columns_equivalent(expected_gathered_column, gathered_struct_col.child(0).child(0).view()); + // The first 2 elements of the target is not overwritten + auto const scatter_map = fixed_width_column_wrapper{-3, -2, -1, 5, 4, 3, 2}.release(); + test_scatter(structs_src, structs_tgt, structs_expected, scatter_map); } -#endif From eff5d682675b0cbe64178794dff95e9b94936e15 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 16:26:30 -0600 Subject: [PATCH 19/32] Rerwrite all tests --- cpp/tests/copying/scatter_struct_tests.cu | 206 +++++++++------------- 1 file changed, 84 insertions(+), 122 deletions(-) diff --git a/cpp/tests/copying/scatter_struct_tests.cu b/cpp/tests/copying/scatter_struct_tests.cu index f5dd80b8e1a..59752424670 100644 --- a/cpp/tests/copying/scatter_struct_tests.cu +++ b/cpp/tests/copying/scatter_struct_tests.cu @@ -28,7 +28,9 @@ #include using namespace cudf::test; +using bools_col = fixed_width_column_wrapper; using structs_col = structs_column_wrapper; +using strings_col = strings_column_wrapper; template struct TypedStructScatterTest : public cudf::test::BaseFixture { @@ -63,7 +65,7 @@ TYPED_TEST(TypedStructScatterTest, EmptyInputTest) auto const structs_src = structs_col{{child_col_src}, std::vector{}}.release(); auto child_col_tgt = col_wrapper{}; - auto const structs_tgt = structs_column_wrapper{{child_col_tgt}, std::vector{}}.release(); + auto const structs_tgt = structs_col{{child_col_tgt}, std::vector{}}.release(); auto const scatter_map = fixed_width_column_wrapper{}.release(); test_scatter(structs_src, structs_tgt, structs_src, scatter_map); @@ -80,7 +82,7 @@ TYPED_TEST(TypedStructScatterTest, EmptyScatterMapTest) auto child_col_src = col_wrapper{{0, 1, 2, 3, null, XXX}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; - auto const structs_src = structs_column_wrapper{ + auto const structs_src = structs_col{ {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })}.release(); @@ -88,7 +90,7 @@ TYPED_TEST(TypedStructScatterTest, EmptyScatterMapTest) auto child_col_tgt = col_wrapper{{50, null, 70, XXX, 90, 100}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_tgt = structs_column_wrapper{ + auto const structs_tgt = structs_col{ {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); @@ -106,7 +108,7 @@ TYPED_TEST(TypedStructScatterTest, ScatterAsCopyTest) auto child_col_src = col_wrapper{{0, 1, 2, 3, null, XXX}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; - auto const structs_src = structs_column_wrapper{ + auto const structs_src = structs_col{ {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })}.release(); @@ -114,7 +116,7 @@ TYPED_TEST(TypedStructScatterTest, ScatterAsCopyTest) auto child_col_tgt = col_wrapper{{50, null, 70, XXX, 90, 100}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_tgt = structs_column_wrapper{ + auto const structs_tgt = structs_col{ {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); @@ -133,7 +135,7 @@ TYPED_TEST(TypedStructScatterTest, ScatterAsLeftShiftTest) auto child_col_src = col_wrapper{{0, 1, 2, 3, null, XXX}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; - auto const structs_src = structs_column_wrapper{ + auto const structs_src = structs_col{ {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })}.release(); @@ -141,7 +143,7 @@ TYPED_TEST(TypedStructScatterTest, ScatterAsLeftShiftTest) auto child_col_tgt = col_wrapper{{50, null, 70, XXX, 90, 100}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_tgt = structs_column_wrapper{ + auto const structs_tgt = structs_col{ {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); @@ -149,7 +151,7 @@ TYPED_TEST(TypedStructScatterTest, ScatterAsLeftShiftTest) auto child_col_expected = col_wrapper{{2, 3, null, XXX, 0, 1}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; })}; - auto structs_expected = structs_column_wrapper{ + auto structs_expected = structs_col{ {child_col_expected}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); @@ -164,36 +166,40 @@ TYPED_TEST(TypedStructScatterTest, SimpleScatterTests) auto constexpr null = std::numeric_limits::max(); // Null child element auto constexpr XXX = std::numeric_limits::max(); // Null struct element + // Source data auto child_col_src = col_wrapper{{0, 1, 2, 3, null, XXX}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; - auto const structs_src = structs_column_wrapper{ + auto const structs_src = structs_col{ {child_col_src}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })}.release(); + // Target data auto child_col_tgt = col_wrapper{{50, null, 70, XXX, 90, 100}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_tgt = structs_column_wrapper{ + auto const structs_tgt = structs_col{ {child_col_tgt}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); + // Expected data auto child_col_expected1 = col_wrapper{{1, null, 70, XXX, 0, 2}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_expected1 = structs_column_wrapper{ + auto const structs_expected1 = structs_col{ {child_col_expected1}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); auto const scatter_map1 = fixed_width_column_wrapper{-2, 0, 5}.release(); test_scatter(structs_src, structs_tgt, structs_expected1, scatter_map1); + // Expected data auto child_col_expected2 = col_wrapper{{1, null, 70, 3, 0, 2}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_expected2 = structs_column_wrapper{ + auto const structs_expected2 = structs_col{ {child_col_expected2}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; })}.release(); @@ -206,79 +212,54 @@ TYPED_TEST(TypedStructScatterTest, ComplexDataScatterTest) // Testing scatter() on struct. using col_wrapper = cudf::test::fixed_width_column_wrapper; - // 1. String "names" column. - auto const names_src = - std::vector{"Newton", "Washington", "Cherry", "Kiwi", "Lemon", "Tomato"}; - auto const names_validity_src = std::vector{1, 1, 1, 1, 1, 1}; + // Source data auto names_column_src = - strings_column_wrapper{names_src.begin(), names_src.end(), names_validity_src.begin()}; - - // 2. Numeric "ages" column. - auto const ages_src = std::vector{5, 10, 15, 20, 25, 30}; - auto const ages_validity_src = std::vector{1, 1, 1, 1, 0, 1}; - auto ages_column_src = col_wrapper{ages_src.begin(), ages_src.end(), ages_validity_src.begin()}; - - // 3. Boolean "is_human" column. - auto const is_human_src = {true, true, false, false, false, false}; - auto const is_human_validity_src = std::vector{1, 1, 1, 0, 1, 1}; - auto is_human_col_src = fixed_width_column_wrapper{ - is_human_src.begin(), is_human_src.end(), is_human_validity_src.begin()}; - - // Assemble struct column. - auto const struct_validity_src = std::vector{1, 1, 1, 1, 1, 0}; - auto structs_src = structs_column_wrapper{{names_column_src, ages_column_src, is_human_col_src}, - struct_validity_src.begin()} - .release(); - - // 1. String "names" column. - auto const names_tgt = std::vector{ - "String 0", "String 1", "String 2", "String 3", "String 4", "String 5"}; - auto const names_validity_tgt = std::vector{0, 1, 1, 1, 1, 1}; + strings_col{{"Newton", "Washington", "Cherry", "Kiwi", "Lemon", "Tomato"}, + cudf::detail::make_counting_transform_iterator(0, [](auto) { return true; })}; + auto ages_column_src = + col_wrapper{{5, 10, 15, 20, 25, 30}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto is_human_col_src = + bools_col{{true, true, false, false, false, false}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}; + + // Target data auto names_column_tgt = - strings_column_wrapper{names_tgt.begin(), names_tgt.end(), names_validity_tgt.begin()}; - - // 2. Numeric "ages" column. - auto const ages_tgt = std::vector{50, 60, 70, 80, 90, 100}; - auto const ages_validity_tgt = std::vector{1, 0, 1, 1, 1, 1}; - auto ages_column_tgt = col_wrapper{ages_tgt.begin(), ages_tgt.end(), ages_validity_tgt.begin()}; - - // 3. Boolean "is_human" column. - auto const is_human_tgt = {true, true, true, true, true, true}; - auto const is_human_validity_tgt = std::vector{1, 1, 1, 1, 1, 1}; - auto is_human_col_tgt = fixed_width_column_wrapper{ - is_human_tgt.begin(), is_human_tgt.end(), is_human_validity_tgt.begin()}; - - // Assemble struct column. - auto const struct_validity_tgt = std::vector{1, 1, 0, 1, 1, 1}; - auto structs_tgt = structs_column_wrapper{{names_column_tgt, ages_column_tgt, is_human_col_tgt}, - struct_validity_tgt.begin()} - .release(); - - // 1. String "names" column. - auto const names_expected = - std::vector{"String 0", "Lemon", "Kiwi", "Cherry", "Washington", "Newton"}; - auto const names_validity_expected = std::vector{0, 1, 1, 1, 1, 1}; - auto names_column_expected = strings_column_wrapper{ - names_expected.begin(), names_expected.end(), names_validity_expected.begin()}; - - // 2. Numeric "ages" column. - auto const ages_expected = std::vector{50, 25, 20, 15, 10, 5}; - auto const ages_validity_expected = std::vector{1, 0, 1, 1, 1, 1}; + strings_col{{"String 0", "String 1", "String 2", "String 3", "String 4", "String 5"}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0; })}; + auto ages_column_tgt = + col_wrapper{{50, 60, 70, 80, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto is_human_col_tgt = + bools_col{{true, true, true, true, true, true}, + cudf::detail::make_counting_transform_iterator(0, [](auto) { return true; })}; + + // Expected data + auto names_column_expected = + strings_col{{"String 0", "Lemon", "Kiwi", "Cherry", "Washington", "Newton"}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0; })}; auto ages_column_expected = - col_wrapper{ages_expected.begin(), ages_expected.end(), ages_validity_expected.begin()}; - - // 3. Boolean "is_human" column. - auto const is_human_expected = {true, false, false, false, true, true}; - auto const is_human_validity_expected = std::vector{1, 1, 0, 1, 1, 1}; - auto is_human_col_expected = fixed_width_column_wrapper{ - is_human_expected.begin(), is_human_expected.end(), is_human_validity_expected.begin()}; + col_wrapper{{50, 25, 20, 15, 10, 5}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto is_human_col_expected = + bools_col{{true, false, false, false, true, true}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; })}; - // Assemble struct column. - auto const struct_validity_expected = std::vector{1, 1, 1, 1, 1, 1}; - auto structs_expected = - structs_column_wrapper{{names_column_expected, ages_column_expected, is_human_col_expected}, - struct_validity_expected.begin()} - .release(); + auto const structs_src = structs_col{ + {names_column_src, ages_column_src, is_human_col_src}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 5; + })}.release(); + auto const structs_tgt = structs_col{ + {names_column_tgt, ages_column_tgt, is_human_col_tgt}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 2; + })}.release(); + auto const structs_expected = structs_col{ + {names_column_expected, ages_column_expected, is_human_col_expected}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return true; + })}.release(); // The first element of the target is not overwritten auto const scatter_map = fixed_width_column_wrapper{-1, 4, 3, 2, 1}.release(); @@ -289,47 +270,28 @@ TYPED_TEST(TypedStructScatterTest, ScatterStructOfListsTest) { // Testing gather() on struct> using col_wrapper = cudf::test::fixed_width_column_wrapper; - - auto lists_column_exemplar = []() { - return lists_column_wrapper{ - {{5}, {10, 15}, {20, 25, 30}, {35, 40, 45, 50}, {55, 60, 65}, {70, 75}, {80}, {}, {}}, - // Valid for elements 0, 3, 6,... - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return !(i % 3); })}; - }; - - auto lists_column = std::make_unique(cudf::column(lists_column_exemplar(), 0)); - - auto lists_column_exemplar_tgt = []() { - return lists_column_wrapper{ - {{1}, {2, 3}, {4, 5, 6}, {7, 8}, {9}, {10, 11, 12, 13}, {}, {14}, {15, 16}}, - // Valid for elements 1, 3, 5, 7,... - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; })}; - }; - auto lists_column_tgt = - std::make_unique(cudf::column(lists_column_exemplar_tgt(), 0)); - - auto const validity_expected = std::vector{0, 1, 1, 0, 0, 1, 1, 0, 0}; - auto lists_column_exemplar_expected = [validity_expected]() { - return lists_column_wrapper{ - {{1}, {2, 3}, {80}, {70, 75}, {55, 60, 65}, {35, 40, 45, 50}, {5}, {10, 15}, {20, 25, 30}}, - validity_expected.begin()}; - }; - - auto lists_column_expected = - std::make_unique(cudf::column(lists_column_exemplar_expected(), 0)); - - // Assemble struct column. - std::vector> column_vector; - column_vector.push_back(std::move(lists_column)); - auto const structs_src = structs_column_wrapper{std::move(column_vector)}.release(); - - std::vector> column_vector_tgt; - column_vector_tgt.push_back(std::move(lists_column_tgt)); - auto const structs_tgt = structs_column_wrapper{std::move(column_vector_tgt)}.release(); - - std::vector> column_vector_expected; - column_vector_expected.push_back(std::move(lists_column_expected)); - auto const structs_expected = structs_column_wrapper{std::move(column_vector_expected)}.release(); + using lists_col = cudf::test::lists_column_wrapper; + + // Source data + auto lists_col_src = + lists_col{{{5}, {10, 15}, {20, 25, 30}, {35, 40, 45, 50}, {55, 60, 65}, {70, 75}, {80}, {}, {}}, + // Valid for elements 0, 3, 6,... + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return !(i % 3); })}; + auto const structs_src = structs_col{{lists_col_src}}.release(); + + // Target data + auto lists_col_tgt = + lists_col{{{1}, {2, 3}, {4, 5, 6}, {7, 8}, {9}, {10, 11, 12, 13}, {}, {14}, {15, 16}}, + // Valid for elements 1, 3, 5, 7,... + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; })}; + auto const structs_tgt = structs_col{{lists_col_tgt}}.release(); + + // Expected data + auto const validity_expected = std::vector{0, 1, 1, 0, 0, 1, 1, 0, 0}; + auto lists_col_expected = lists_col{ + {{1}, {2, 3}, {80}, {70, 75}, {55, 60, 65}, {35, 40, 45, 50}, {5}, {10, 15}, {20, 25, 30}}, + validity_expected.begin()}; + auto const structs_expected = structs_col{{lists_col_expected}}.release(); // The first 2 elements of the target is not overwritten auto const scatter_map = fixed_width_column_wrapper{-3, -2, -1, 5, 4, 3, 2}.release(); From 846816f611b449c8724b5601cd4db23b72f42cbc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 16:27:38 -0600 Subject: [PATCH 20/32] Minor cleanup for the tests --- cpp/tests/copying/scatter_struct_tests.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/tests/copying/scatter_struct_tests.cu b/cpp/tests/copying/scatter_struct_tests.cu index 59752424670..8ca07ff1dd1 100644 --- a/cpp/tests/copying/scatter_struct_tests.cu +++ b/cpp/tests/copying/scatter_struct_tests.cu @@ -269,8 +269,7 @@ TYPED_TEST(TypedStructScatterTest, ComplexDataScatterTest) TYPED_TEST(TypedStructScatterTest, ScatterStructOfListsTest) { // Testing gather() on struct> - using col_wrapper = cudf::test::fixed_width_column_wrapper; - using lists_col = cudf::test::lists_column_wrapper; + using lists_col = cudf::test::lists_column_wrapper; // Source data auto lists_col_src = From 5aee3ec60480b917a2d4c2f4ccad55796fd093b3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 17:57:59 -0600 Subject: [PATCH 21/32] Cleanup `scatter.cuh` --- cpp/include/cudf/detail/scatter.cuh | 155 +++++++++------------------- 1 file changed, 51 insertions(+), 104 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index dec3eaea7de..82ba5b70912 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -86,28 +86,33 @@ auto scatter_to_gather(MapIterator scatter_map_begin, return gather_map; } +/** + * @brief Create a complement map of `scatter_to_gather` map + * + * The output result of this mapping is firstly initialized as a identity-mapping (`output[i] = i`). + * Then, for each `i`, the value `output[scatter_map[i]]` is set to `gather_rows`, where + * `gather_rows` is an out-of-bound index to identify the pass-through entries when calling the + * `gather_bitmask()` function. + * + * The purpose of this map is to create a identity-mapping for the rows that are not touched by the + * `scatter_map`. + */ template -rmm::device_uvector scatter_to_gather_inv(MapIterator scatter_map_begin, - MapIterator scatter_map_end, - size_type gather_rows, - rmm::cuda_stream_view stream) +rmm::device_uvector scatter_to_gather_complement(MapIterator scatter_map_begin, + MapIterator scatter_map_end, + size_type gather_rows, + rmm::cuda_stream_view stream) { using MapValueType = typename thrust::iterator_traits::value_type; auto gather_map = rmm::device_uvector(gather_rows, stream); thrust::sequence(rmm::exec_policy(stream), gather_map.begin(), gather_map.end(), 0); - - // Convert scatter map to a gather map thrust::for_each( rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(std::distance(scatter_map_begin, scatter_map_end)), - [gather_rows, ptr = gather_map.begin(), scatter_map_begin = scatter_map_begin] __device__( - MapValueType idx) { - MapValueType row = *(scatter_map_begin + idx); - ptr[row] = gather_rows; - }); - + [gather_rows, out_ptr = gather_map.begin(), scatter_map_ptr = scatter_map_begin] __device__( + MapValueType idx) { out_ptr[scatter_map_ptr[idx]] = gather_rows; }); return gather_map; } @@ -251,10 +256,10 @@ struct column_scatterer_impl { "Scatter source and target are not of the same type."); auto const scatter_map_size = std::distance(scatter_map_begin, scatter_map_end); - if (scatter_map_size == 0) { return empty_like(source); } + if (scatter_map_size == 0) { return std::make_unique(target, stream, mr); } - structs_column_view structs_src(source); - structs_column_view structs_target(target); + structs_column_view const structs_src(source); + structs_column_view const structs_target(target); std::vector> output_struct_members(structs_src.num_children()); std::transform(structs_src.child_begin(), @@ -273,39 +278,15 @@ struct column_scatterer_impl { mr); }); - auto const nullable = std::any_of(structs_src.child_begin(), - structs_src.child_end(), - [](auto const& col) { return col.nullable(); }) or - std::any_of(structs_target.child_begin(), - structs_target.child_end(), - [](auto const& col) { return col.nullable(); }); - if (nullable) { + auto const child_nullable = std::any_of(structs_src.child_begin(), + structs_src.child_end(), + [](auto const& col) { return col.nullable(); }) or + std::any_of(structs_target.child_begin(), + structs_target.child_end(), + [](auto const& col) { return col.nullable(); }); + if (child_nullable) { auto const gather_map = scatter_to_gather(scatter_map_begin, scatter_map_end, source.size(), stream); - - int n = (int)std::distance(gather_map.begin(), gather_map.end()); - // thrust::host_vector h(gather_map.begin(), gather_map.end()); - printf("\n\n"); - // for (int i = 0; i < n; ++i) { printf("h: %d\n", gather_map.element(i, stream)); } - - printf("line %d \n\n", __LINE__); - cudf::test::print(*structs_src.child_begin()); - - printf("line %d \n\n", __LINE__); - cudf::test::print(*structs_target.child_begin()); - - printf("line %d \n\n", __LINE__); - cudf::test::print((*output_struct_members.begin())->view()); - - printf("map siE:%d\n", (int)std::distance(gather_map.begin(), gather_map.end())); - printf("\n\n"); - printf("num row: %d\n", source.size()); - printf("num c row: %d\n", structs_src.child_begin()->size()); - - printf("source null count: %d\n", (*structs_src.child_begin()).null_count()); - printf("target null count: %d\n", (*structs_target.child_begin()).null_count()); - printf("result null count: %d\n", (*output_struct_members.begin())->view().null_count()); - gather_bitmask(cudf::table_view{std::vector{structs_src.child_begin(), structs_src.child_end()}}, gather_map.begin(), @@ -313,48 +294,31 @@ struct column_scatterer_impl { gather_bitmask_op::PASSTHROUGH, stream, mr); - - printf("result null count again: %d\n", - (*output_struct_members.begin())->view().null_count()); - - printf("line %d \n\n", __LINE__); - cudf::test::print((*output_struct_members.begin())->view()); } + // Need to put the result column in a vector to call gather_bitmask std::vector> result; - result.emplace_back(cudf::make_structs_column( - source.size(), - std::move(output_struct_members), - 0, - rmm::device_buffer{0, stream, mr}, // Null mask will be fixed up in cudf::scatter(). - stream, - mr)); - - // Only gather bitmask from the target at the positions that have not been scatter onto - auto const gather_map = - scatter_to_gather_inv(scatter_map_begin, scatter_map_end, source.size(), stream); - int n = (int)std::distance(gather_map.begin(), gather_map.end()); - for (int i = 0; i < n; ++i) { printf("h n: %d\n", gather_map.element(i, stream)); } - gather_bitmask(table_view{std::vector{target}}, - gather_map.begin(), - result, - gather_bitmask_op::PASSTHROUGH, - stream, - mr); + result.emplace_back(cudf::make_structs_column(source.size(), + std::move(output_struct_members), + 0, + rmm::device_buffer{0, stream, mr}, + stream, + mr)); + + // Only gather bitmask from the target column at the rows that have not been scatter onto + // bitmask from the source column will be gather at the top level call to `scatter()` + if (target.nullable()) { + auto const gather_map = + scatter_to_gather_complement(scatter_map_begin, scatter_map_end, target.size(), stream); + gather_bitmask(table_view{std::vector{target}}, + gather_map.begin(), + result, + gather_bitmask_op::PASSTHROUGH, + stream, + mr); + } return std::move(result.front()); - - // std::vector> output_struct_members(structs_src.num_children()); - // for (auto& col : output_struct_members) { col->set_null_count(0); } - // - // return cudf::make_structs_column( - // source.size(), - // std::move(output_struct_members), - // target.null_count(), - // cudf::detail::copy_bitmask( - // target, stream, mr), // Null mask will be fixed up in cudf::scatter(). - // stream, - // mr); } }; @@ -422,13 +386,11 @@ std::unique_ptr
scatter( // Transform negative indices to index + target size auto updated_scatter_map_begin = thrust::make_transform_iterator(scatter_map_begin, index_converter{target.num_rows()}); - auto updated_scatter_map_end = thrust::make_transform_iterator(scatter_map_end, index_converter{target.num_rows()}); - auto result = std::vector>(target.num_columns()); - - auto scatter_functor = column_scatterer{}; + auto result = std::vector>(target.num_columns()); + auto const scatter_functor = column_scatterer{}; std::transform(source.begin(), source.end(), @@ -445,29 +407,14 @@ std::unique_ptr
scatter( mr); }); - printf("line %d \n\n", __LINE__); - auto const nullable = std::any_of(source.begin(), source.end(), [](auto const& col) { return col.nullable(); }) or std::any_of(target.begin(), target.end(), [](auto const& col) { return col.nullable(); }); if (nullable) { - printf("nullable\n"); - auto gather_map = scatter_to_gather( + auto const gather_map = scatter_to_gather( updated_scatter_map_begin, updated_scatter_map_end, target.num_rows(), stream); - - int n = (int)std::distance(gather_map.begin(), gather_map.end()); - // thrust::host_vector h(gather_map.begin(), gather_map.end()); - printf("\n\n"); - // for (int i = 0; i < n; ++i) { printf("gather map: %d\n", gather_map.element(i, stream)); } - printf("source null count: %d\n", (*source.begin()).null_count()); - printf("target null count: %d\n", (*target.begin()).null_count()); - printf("result null count: %d\n", (*result.begin())->null_count()); - gather_bitmask(source, gather_map.begin(), result, gather_bitmask_op::PASSTHROUGH, stream, mr); - - printf("result null count again: %d\n", (*result.begin())->null_count()); - } else - printf("no t nullable\n"); + } return std::make_unique
(std::move(result)); } } // namespace detail From 3d39a6c5bd3e47d85342972aa6371db6bcec767c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 18:52:25 -0600 Subject: [PATCH 22/32] Fix build error due to dependency order --- cpp/include/cudf/detail/scatter.cuh | 30 ++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 33954fc3e6f..536500e3139 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -237,6 +237,20 @@ struct column_scatterer_impl { } }; +struct column_scatterer { + template + std::unique_ptr operator()(column_view const& source, + MapIterator scatter_map_begin, + MapIterator scatter_map_end, + column_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const + { + column_scatterer_impl scatterer{}; + return scatterer(source, scatter_map_begin, scatter_map_end, target, stream, mr); + } +}; + template <> struct column_scatterer_impl { template @@ -264,7 +278,7 @@ struct column_scatterer_impl { [&scatter_map_begin, &scatter_map_end, stream, mr](auto const& source_col, auto const& target_col) { return type_dispatcher(source_col.type(), - column_scatterer{}, + column_scatterer{}, source_col, scatter_map_begin, scatter_map_end, @@ -317,20 +331,6 @@ struct column_scatterer_impl { } }; -struct column_scatterer { - template - std::unique_ptr operator()(column_view const& source, - MapIterator scatter_map_begin, - MapIterator scatter_map_end, - column_view const& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const - { - column_scatterer_impl scatterer{}; - return scatterer(source, scatter_map_begin, scatter_map_end, target, stream, mr); - } -}; - /** * @brief Scatters the rows of the source table into a copy of the target table * according to a scatter map. From 635753d05d2cbd227e3ddb5a04a91a0663c8450b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 19:45:54 -0600 Subject: [PATCH 23/32] Cleanup header --- cpp/include/cudf/detail/scatter.cuh | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 536500e3139..d2e0e66be92 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -35,8 +35,6 @@ #include -#include - namespace cudf { namespace detail { From baab2240c7e4bdefb9eb84c5ce2790446b7d90bf Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 19:53:16 -0600 Subject: [PATCH 24/32] Rewrite comments --- cpp/include/cudf/detail/scatter.cuh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index d2e0e66be92..e8b4520e69a 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -87,13 +87,13 @@ auto scatter_to_gather(MapIterator scatter_map_begin, /** * @brief Create a complement map of `scatter_to_gather` map * - * The output result of this mapping is firstly initialized as a identity-mapping (`output[i] = i`). - * Then, for each `i`, the value `output[scatter_map[i]]` is set to `gather_rows`, where + * The output result of this mapping is firstly initialized as an identity-mapping (`output[i] = + * i`). Then, for each `i`, the value `output[scatter_map[i]]` is set to `gather_rows`, where * `gather_rows` is an out-of-bound index to identify the pass-through entries when calling the * `gather_bitmask()` function. * - * The purpose of this map is to create a identity-mapping for the rows that are not touched by the - * `scatter_map`. + * Therefore, the purpose of this map is to create an identity-mapping for the rows that are not + * touched by the `scatter_map`. */ template rmm::device_uvector scatter_to_gather_complement(MapIterator scatter_map_begin, @@ -303,7 +303,7 @@ struct column_scatterer_impl { mr); } - // Need to put the result column in a vector to call gather_bitmask + // Need to put the result column in a vector to call `gather_bitmask` std::vector> result; result.emplace_back(cudf::make_structs_column(source.size(), std::move(output_struct_members), @@ -312,8 +312,8 @@ struct column_scatterer_impl { stream, mr)); - // Only gather bitmask from the target column at the rows that have not been scatter onto - // bitmask from the source column will be gather at the top level call to `scatter()` + // Only gather bitmask from the target column for the rows that have not been scatter onto + // The bitmask from the source column will be gathered at the top level `scatter()` call if (target.nullable()) { auto const gather_map = scatter_to_gather_complement(scatter_map_begin, scatter_map_end, target.size(), stream); From 08ea7fb8d98981de28c89013ae1ce80b4d9b3a0b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 19:54:18 -0600 Subject: [PATCH 25/32] Fix types for typed tests --- cpp/tests/copying/scatter_struct_tests.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/copying/scatter_struct_tests.cu b/cpp/tests/copying/scatter_struct_tests.cu index 8ca07ff1dd1..90b8be55306 100644 --- a/cpp/tests/copying/scatter_struct_tests.cu +++ b/cpp/tests/copying/scatter_struct_tests.cu @@ -41,7 +41,7 @@ using TestTypes = cudf::test::Concat; -TYPED_TEST_CASE(TypedStructScatterTest, int); +TYPED_TEST_CASE(TypedStructScatterTest, TestTypes); namespace { void test_scatter(std::unique_ptr const& structs_src, From b61a779b79599b6665cef5658e24188294b063a2 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 30 Mar 2021 20:12:27 -0600 Subject: [PATCH 26/32] Rewrite tests --- cpp/tests/copying/scatter_struct_tests.cu | 43 ++++++++++------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/cpp/tests/copying/scatter_struct_tests.cu b/cpp/tests/copying/scatter_struct_tests.cu index 90b8be55306..a9bb1980d53 100644 --- a/cpp/tests/copying/scatter_struct_tests.cu +++ b/cpp/tests/copying/scatter_struct_tests.cu @@ -27,10 +27,13 @@ #include -using namespace cudf::test; -using bools_col = fixed_width_column_wrapper; -using structs_col = structs_column_wrapper; -using strings_col = strings_column_wrapper; +using bools_col = cudf::test::fixed_width_column_wrapper; +using int32s_col = cudf::test::fixed_width_column_wrapper; +using structs_col = cudf::test::structs_column_wrapper; +using strings_col = cudf::test::strings_column_wrapper; + +constexpr int32_t null{0}; // Mark for null child elements +constexpr int32_t XXX{0}; // Mark for null struct elements template struct TypedStructScatterTest : public cudf::test::BaseFixture { @@ -67,7 +70,7 @@ TYPED_TEST(TypedStructScatterTest, EmptyInputTest) auto child_col_tgt = col_wrapper{}; auto const structs_tgt = structs_col{{child_col_tgt}, std::vector{}}.release(); - auto const scatter_map = fixed_width_column_wrapper{}.release(); + auto const scatter_map = int32s_col{}.release(); test_scatter(structs_src, structs_tgt, structs_src, scatter_map); test_scatter(structs_src, structs_tgt, structs_tgt, scatter_map); } @@ -75,9 +78,7 @@ TYPED_TEST(TypedStructScatterTest, EmptyInputTest) // Test case when only the scatter map is empty TYPED_TEST(TypedStructScatterTest, EmptyScatterMapTest) { - using col_wrapper = cudf::test::fixed_width_column_wrapper; - auto constexpr null = std::numeric_limits::max(); // Null child element - auto constexpr XXX = std::numeric_limits::max(); // Null struct element + using col_wrapper = cudf::test::fixed_width_column_wrapper; auto child_col_src = col_wrapper{{0, 1, 2, 3, null, XXX}, @@ -95,15 +96,13 @@ TYPED_TEST(TypedStructScatterTest, EmptyScatterMapTest) return i != 3; })}.release(); - auto const scatter_map = fixed_width_column_wrapper{}.release(); + auto const scatter_map = int32s_col{}.release(); test_scatter(structs_src, structs_tgt, structs_tgt, scatter_map); } TYPED_TEST(TypedStructScatterTest, ScatterAsCopyTest) { - using col_wrapper = cudf::test::fixed_width_column_wrapper; - auto constexpr null = std::numeric_limits::max(); // Null child element - auto constexpr XXX = std::numeric_limits::max(); // Null struct element + using col_wrapper = cudf::test::fixed_width_column_wrapper; auto child_col_src = col_wrapper{{0, 1, 2, 3, null, XXX}, @@ -122,15 +121,13 @@ TYPED_TEST(TypedStructScatterTest, ScatterAsCopyTest) })}.release(); // Scatter as copy: the target should be the same as source - auto const scatter_map = fixed_width_column_wrapper{0, 1, 2, 3, 4, 5}.release(); + auto const scatter_map = int32s_col{0, 1, 2, 3, 4, 5}.release(); test_scatter(structs_src, structs_tgt, structs_src, scatter_map); } TYPED_TEST(TypedStructScatterTest, ScatterAsLeftShiftTest) { - using col_wrapper = cudf::test::fixed_width_column_wrapper; - auto constexpr null = std::numeric_limits::max(); // Null child element - auto constexpr XXX = std::numeric_limits::max(); // Null struct element + using col_wrapper = cudf::test::fixed_width_column_wrapper; auto child_col_src = col_wrapper{{0, 1, 2, 3, null, XXX}, @@ -156,15 +153,13 @@ TYPED_TEST(TypedStructScatterTest, ScatterAsLeftShiftTest) return i != 3; })}.release(); - auto const scatter_map = fixed_width_column_wrapper{-2, -1, 0, 1, 2, 3}.release(); + auto const scatter_map = int32s_col{-2, -1, 0, 1, 2, 3}.release(); test_scatter(structs_src, structs_tgt, structs_expected, scatter_map); } TYPED_TEST(TypedStructScatterTest, SimpleScatterTests) { - using col_wrapper = cudf::test::fixed_width_column_wrapper; - auto constexpr null = std::numeric_limits::max(); // Null child element - auto constexpr XXX = std::numeric_limits::max(); // Null struct element + using col_wrapper = cudf::test::fixed_width_column_wrapper; // Source data auto child_col_src = @@ -192,7 +187,7 @@ TYPED_TEST(TypedStructScatterTest, SimpleScatterTests) {child_col_expected1}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); - auto const scatter_map1 = fixed_width_column_wrapper{-2, 0, 5}.release(); + auto const scatter_map1 = int32s_col{-2, 0, 5}.release(); test_scatter(structs_src, structs_tgt, structs_expected1, scatter_map1); // Expected data @@ -203,7 +198,7 @@ TYPED_TEST(TypedStructScatterTest, SimpleScatterTests) {child_col_expected2}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; })}.release(); - auto const scatter_map2 = fixed_width_column_wrapper{-2, 0, 5, 3}.release(); + auto const scatter_map2 = int32s_col{-2, 0, 5, 3}.release(); test_scatter(structs_src, structs_tgt, structs_expected2, scatter_map2); } @@ -262,7 +257,7 @@ TYPED_TEST(TypedStructScatterTest, ComplexDataScatterTest) })}.release(); // The first element of the target is not overwritten - auto const scatter_map = fixed_width_column_wrapper{-1, 4, 3, 2, 1}.release(); + auto const scatter_map = int32s_col{-1, 4, 3, 2, 1}.release(); test_scatter(structs_src, structs_tgt, structs_expected, scatter_map); } @@ -293,6 +288,6 @@ TYPED_TEST(TypedStructScatterTest, ScatterStructOfListsTest) auto const structs_expected = structs_col{{lists_col_expected}}.release(); // The first 2 elements of the target is not overwritten - auto const scatter_map = fixed_width_column_wrapper{-3, -2, -1, 5, 4, 3, 2}.release(); + auto const scatter_map = int32s_col{-3, -2, -1, 5, 4, 3, 2}.release(); test_scatter(structs_src, structs_tgt, structs_expected, scatter_map); } From 580aa1f91ab8adbd25cbdd66fd2abd44dc208da9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 31 Mar 2021 07:55:29 -0600 Subject: [PATCH 27/32] Rewrite `scatter_to_gather_complement` function --- cpp/include/cudf/detail/scatter.cuh | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index e8b4520e69a..9482ea1c844 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -88,9 +88,10 @@ auto scatter_to_gather(MapIterator scatter_map_begin, * @brief Create a complement map of `scatter_to_gather` map * * The output result of this mapping is firstly initialized as an identity-mapping (`output[i] = - * i`). Then, for each `i`, the value `output[scatter_map[i]]` is set to `gather_rows`, where - * `gather_rows` is an out-of-bound index to identify the pass-through entries when calling the - * `gather_bitmask()` function. + * i`). Then, for each index `idx` from `scatter_map`, the value `output[idx]` is set to + * `gather_rows`, where `gather_rows` is the number of rows in the target column, which is also an + * out-of-bound index to identify the pass-through entries when calling the `gather_bitmask()` + * function. * * Therefore, the purpose of this map is to create an identity-mapping for the rows that are not * touched by the `scatter_map`. @@ -101,16 +102,14 @@ rmm::device_uvector scatter_to_gather_complement(MapIterator scatter_ size_type gather_rows, rmm::cuda_stream_view stream) { - using MapValueType = typename thrust::iterator_traits::value_type; - auto gather_map = rmm::device_uvector(gather_rows, stream); thrust::sequence(rmm::exec_policy(stream), gather_map.begin(), gather_map.end(), 0); - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(std::distance(scatter_map_begin, scatter_map_end)), - [gather_rows, out_ptr = gather_map.begin(), scatter_map_ptr = scatter_map_begin] __device__( - MapValueType idx) { out_ptr[scatter_map_ptr[idx]] = gather_rows; }); + thrust::for_each(rmm::exec_policy(stream), + scatter_map_begin, + scatter_map_end, + [gather_rows, out_ptr = gather_map.begin()] __device__(auto idx) { + out_ptr[idx] = gather_rows; + }); return gather_map; } From 1111ff532914e447e7bc7a1b5e752c1887ffde51 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 31 Mar 2021 12:51:24 -0600 Subject: [PATCH 28/32] Replace `gather_rows` by `numeric_limits::lowest()`, and replace `thrust::for_each` by `thrust::scatter` --- cpp/include/cudf/detail/scatter.cuh | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 9482ea1c844..5c39e239368 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -97,19 +97,23 @@ auto scatter_to_gather(MapIterator scatter_map_begin, * touched by the `scatter_map`. */ template -rmm::device_uvector scatter_to_gather_complement(MapIterator scatter_map_begin, - MapIterator scatter_map_end, - size_type gather_rows, - rmm::cuda_stream_view stream) +auto scatter_to_gather_complement(MapIterator scatter_map_begin, + MapIterator scatter_map_end, + size_type gather_rows, + rmm::cuda_stream_view stream) { auto gather_map = rmm::device_uvector(gather_rows, stream); thrust::sequence(rmm::exec_policy(stream), gather_map.begin(), gather_map.end(), 0); - thrust::for_each(rmm::exec_policy(stream), - scatter_map_begin, - scatter_map_end, - [gather_rows, out_ptr = gather_map.begin()] __device__(auto idx) { - out_ptr[idx] = gather_rows; - }); + + auto const out_of_bounds_begin = + thrust::make_constant_iterator(std::numeric_limits::lowest()); + auto const out_of_bounds_end = + out_of_bounds_begin + thrust::distance(scatter_map_begin, scatter_map_end); + thrust::scatter(rmm::exec_policy(stream), + out_of_bounds_begin, + out_of_bounds_end, + scatter_map_begin, + gather_map.begin()); return gather_map; } @@ -311,7 +315,7 @@ struct column_scatterer_impl { stream, mr)); - // Only gather bitmask from the target column for the rows that have not been scatter onto + // Only gather bitmask from the target column for the rows that have not been scattered onto // The bitmask from the source column will be gathered at the top level `scatter()` call if (target.nullable()) { auto const gather_map = From 3f66dbd9fe4e92755d9a3133294a753171ee0ff6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 31 Mar 2021 12:59:04 -0600 Subject: [PATCH 29/32] Replace `gather_rows` by `numeric_limits::lowest()` again, and rewrite comments --- cpp/include/cudf/detail/scatter.cuh | 34 +++++++++++++++-------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 5c39e239368..f6f16584b7a 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -45,10 +45,9 @@ namespace detail { * function using the PASSTHROUGH op since the resulting map may contain index * values outside the target's range. * - * First, the gather-map is initialized with invalid entries. - * The gather_rows is used since it should always be outside the target size. - * - * Then, the `output[scatter_map[i]] = i`. + * First, the gather-map is initialized with an invalid index. + * The value `numeric_limits::lowest()` is used since it should always be outside the target size. + * Then, `output[scatter_map[i]] = i` for each `i`. * * @tparam MapIterator Iterator type of the input scatter map. * @param scatter_map_begin Beginning of scatter map. @@ -65,13 +64,16 @@ auto scatter_to_gather(MapIterator scatter_map_begin, { using MapValueType = typename thrust::iterator_traits::value_type; - // The gather_map is initialized with gather_rows value to identify pass-through entries - // when calling the gather_bitmask() which applies a pass-through whenever it finds a + // The gather_map is initialized with `numeric_limits::lowest()` value to identify pass-through + // entries when calling the gather_bitmask() which applies a pass-through whenever it finds a // value outside the range of the target column. - // We'll use the gather_rows value for this since it should always be outside the valid range. + // We'll use the `numeric_limits::lowest()` value for this since it should always be outside the + // valid range. auto gather_map = rmm::device_uvector(gather_rows, stream); - thrust::uninitialized_fill( - rmm::exec_policy(stream), gather_map.begin(), gather_map.end(), gather_rows); + thrust::uninitialized_fill(rmm::exec_policy(stream), + gather_map.begin(), + gather_map.end(), + std::numeric_limits::lowest()); // Convert scatter map to a gather map thrust::scatter( @@ -87,14 +89,14 @@ auto scatter_to_gather(MapIterator scatter_map_begin, /** * @brief Create a complement map of `scatter_to_gather` map * - * The output result of this mapping is firstly initialized as an identity-mapping (`output[i] = - * i`). Then, for each index `idx` from `scatter_map`, the value `output[idx]` is set to - * `gather_rows`, where `gather_rows` is the number of rows in the target column, which is also an - * out-of-bound index to identify the pass-through entries when calling the `gather_bitmask()` - * function. - * - * Therefore, the purpose of this map is to create an identity-mapping for the rows that are not + * The purpose of this map is to create an identity-mapping for the rows that are not * touched by the `scatter_map`. + * + * The output result of this mapping is firstly initialized as an identity-mapping + * (i.e., `output[i] = i`). Then, for each value `idx` from `scatter_map`, the value `output[idx]` + * is set to `numeric_limits::lowest()`, which is an invalid, out-of-bound index to identify the + * pass-through entries when calling the `gather_bitmask()` function. + * */ template auto scatter_to_gather_complement(MapIterator scatter_map_begin, From 72facd75e6357fc9a6368b14df4d5e60c203cfb6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 31 Mar 2021 13:18:51 -0600 Subject: [PATCH 30/32] Small optimization for `gather()`: Avoid calling to `gather_bitmask` if don't need to --- cpp/include/cudf/detail/gather.cuh | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index e1aa7ba980c..e4ed6f10e42 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -657,11 +657,15 @@ std::unique_ptr
gather( mr)); } - gather_bitmask_op const op = bounds_policy == out_of_bounds_policy::NULLIFY - ? gather_bitmask_op::NULLIFY - : gather_bitmask_op::DONT_CHECK; - - gather_bitmask(source_table, gather_map_begin, destination_columns, op, stream, mr); + auto const nullable = bounds_policy == out_of_bounds_policy::NULLIFY || + std::any_of(source_table.begin(), source_table.end(), [](auto const& col) { + return col.nullable(); + }); + if (nullable) { + auto const op = bounds_policy == out_of_bounds_policy::NULLIFY ? gather_bitmask_op::NULLIFY + : gather_bitmask_op::DONT_CHECK; + gather_bitmask(source_table, gather_map_begin, destination_columns, op, stream, mr); + } return std::make_unique
(std::move(destination_columns)); } From d562bfc5aa1c12b179b2d88926f60cbb0b80c790 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 31 Mar 2021 13:26:56 -0600 Subject: [PATCH 31/32] Add comments --- cpp/include/cudf/detail/scatter.cuh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index f6f16584b7a..23f0c238913 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -290,6 +290,9 @@ struct column_scatterer_impl { mr); }); + // We still need to call `gather_bitmask` even when the source's children are not nullable, + // as if the target's children have null_masks, then those null_masks need to be updated after + // being scattered onto auto const child_nullable = std::any_of(structs_src.child_begin(), structs_src.child_end(), [](auto const& col) { return col.nullable(); }) or @@ -417,6 +420,8 @@ std::unique_ptr
scatter( mr); }); + // We still need to call `gather_bitmask` even when the source columns are not nullable, + // as if the target has null_mask, then that null_mask needs to be updated after scattering auto const nullable = std::any_of(source.begin(), source.end(), [](auto const& col) { return col.nullable(); }) or std::any_of(target.begin(), target.end(), [](auto const& col) { return col.nullable(); }); From 871be28e1476cfa038ab49832278a65ab2725293 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 31 Mar 2021 13:36:11 -0600 Subject: [PATCH 32/32] Rewrite comments --- cpp/include/cudf/detail/scatter.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 23f0c238913..c1e246122e5 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -291,7 +291,7 @@ struct column_scatterer_impl { }); // We still need to call `gather_bitmask` even when the source's children are not nullable, - // as if the target's children have null_masks, then those null_masks need to be updated after + // as if the target's children have null_masks, those null_masks need to be updated after // being scattered onto auto const child_nullable = std::any_of(structs_src.child_begin(), structs_src.child_end(), @@ -421,7 +421,7 @@ std::unique_ptr
scatter( }); // We still need to call `gather_bitmask` even when the source columns are not nullable, - // as if the target has null_mask, then that null_mask needs to be updated after scattering + // as if the target has null_mask, that null_mask needs to be updated after scattering auto const nullable = std::any_of(source.begin(), source.end(), [](auto const& col) { return col.nullable(); }) or std::any_of(target.begin(), target.end(), [](auto const& col) { return col.nullable(); });