From f60b127648cb9447842e2873fb417d68778ff6b8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Apr 2021 12:53:13 -0600 Subject: [PATCH 01/26] Extract DictionarySearchTest from SearchTest --- cpp/tests/CMakeLists.txt | 5 +- cpp/tests/search/dictionary_search_test.cpp | 108 ++++++++++++++++++++ cpp/tests/search/search_test.cpp | 79 +------------- 3 files changed, 113 insertions(+), 79 deletions(-) create mode 100644 cpp/tests/search/dictionary_search_test.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 11ee7f6c458..d0d7b7f4be6 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -288,7 +288,10 @@ ConfigureTest(FILLING_TEST ################################################################################################### # - search test ----------------------------------------------------------------------------------- -ConfigureTest(SEARCH_TEST search/search_test.cpp) +ConfigureTest(SEARCH_TEST + search/search_test.cpp + search/dictionary_search_test.cpp + search/struct_search_test.cpp) ################################################################################################### # - reshape test ---------------------------------------------------------------------------------- diff --git a/cpp/tests/search/dictionary_search_test.cpp b/cpp/tests/search/dictionary_search_test.cpp new file mode 100644 index 00000000000..0f70776ef0f --- /dev/null +++ b/cpp/tests/search/dictionary_search_test.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +struct DictionarySearchTest : public cudf::test::BaseFixture { +}; + +using cudf::numeric_scalar; +using cudf::size_type; +using cudf::string_scalar; +using cudf::test::fixed_width_column_wrapper; + +TEST_F(DictionarySearchTest, search_dictionary) +{ + cudf::test::dictionary_column_wrapper input( + {"", "", "10", "10", "20", "20", "30", "40"}, {0, 0, 1, 1, 1, 1, 1, 1}); + cudf::test::dictionary_column_wrapper values( + {"", "08", "10", "11", "30", "32", "90"}, {0, 1, 1, 1, 1, 1, 1}); + + auto result = cudf::upper_bound({cudf::table_view{{input}}}, + {cudf::table_view{{values}}}, + {cudf::order::ASCENDING}, + {cudf::null_order::BEFORE}); + fixed_width_column_wrapper expect_upper{2, 2, 4, 4, 7, 7, 8}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_upper); + + result = cudf::lower_bound({cudf::table_view{{input}}}, + {cudf::table_view{{values}}}, + {cudf::order::ASCENDING}, + {cudf::null_order::BEFORE}); + fixed_width_column_wrapper expect_lower{0, 2, 2, 4, 6, 7, 8}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_lower); +} + +TEST_F(DictionarySearchTest, search_table_dictionary) +{ + fixed_width_column_wrapper column_0{{10, 10, 20, 20, 20, 20, 20, 20, 20, 50, 30}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0}}; + fixed_width_column_wrapper column_1{{5.0, 6.0, .5, .5, .5, .5, .7, .7, .7, .7, .5}, + {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}}; + cudf::test::dictionary_column_wrapper column_2{ + {90, 95, 77, 78, 79, 76, 61, 62, 63, 41, 50}, {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1}}; + cudf::table_view input({column_0, column_1, column_2}); + + fixed_width_column_wrapper values_0{{10, 40, 20}, {1, 0, 1}}; + fixed_width_column_wrapper values_1{{6., .5, .5}, {0, 1, 1}}; + cudf::test::dictionary_column_wrapper values_2{{95, 50, 77}, {1, 1, 0}}; + cudf::table_view values({values_0, values_1, values_2}); + + std::vector order_flags{ + {cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING}}; + std::vector null_order_flags{ + {cudf::null_order::AFTER, cudf::null_order::AFTER, cudf::null_order::AFTER}}; + + auto result = cudf::lower_bound(input, values, order_flags, null_order_flags); + fixed_width_column_wrapper expect_lower{1, 10, 2}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_lower); + + result = cudf::upper_bound(input, values, order_flags, null_order_flags); + fixed_width_column_wrapper expect_upper{2, 11, 6}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_upper); +} + +TEST_F(DictionarySearchTest, contains_dictionary) +{ + cudf::test::dictionary_column_wrapper column( + {"00", "00", "17", "17", "23", "23", "29"}); + EXPECT_TRUE(cudf::contains(column, string_scalar{"23"})); + EXPECT_FALSE(cudf::contains(column, string_scalar{"28"})); + + cudf::test::dictionary_column_wrapper needles({"00", "17", "23", "27"}); + fixed_width_column_wrapper expect{1, 1, 1, 1, 1, 1, 0}; + auto result = cudf::contains(column, needles); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); +} + +TEST_F(DictionarySearchTest, contains_nullable_dictionary) +{ + cudf::test::dictionary_column_wrapper column({0, 0, 17, 17, 23, 23, 29}, + {1, 0, 1, 1, 1, 1, 1}); + EXPECT_TRUE(cudf::contains(column, numeric_scalar{23})); + EXPECT_FALSE(cudf::contains(column, numeric_scalar{28})); + + cudf::test::dictionary_column_wrapper needles({0, 17, 23, 27}); + fixed_width_column_wrapper expect({1, 0, 1, 1, 1, 1, 0}, {1, 0, 1, 1, 1, 1, 1}); + auto result = cudf::contains(column, needles); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); +} diff --git a/cpp/tests/search/search_test.cpp b/cpp/tests/search/search_test.cpp index f5136f321da..bf52c2609c4 100644 --- a/cpp/tests/search/search_test.cpp +++ b/cpp/tests/search/search_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1618,83 +1618,6 @@ TEST_F(SearchTest, contains_nullable_column_false_string) ASSERT_EQ(result, expect); } -TEST_F(SearchTest, search_dictionary) -{ - cudf::test::dictionary_column_wrapper input( - {"", "", "10", "10", "20", "20", "30", "40"}, {0, 0, 1, 1, 1, 1, 1, 1}); - cudf::test::dictionary_column_wrapper values( - {"", "08", "10", "11", "30", "32", "90"}, {0, 1, 1, 1, 1, 1, 1}); - - auto result = cudf::upper_bound({cudf::table_view{{input}}}, - {cudf::table_view{{values}}}, - {cudf::order::ASCENDING}, - {cudf::null_order::BEFORE}); - fixed_width_column_wrapper expect_upper{2, 2, 4, 4, 7, 7, 8}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_upper); - - result = cudf::lower_bound({cudf::table_view{{input}}}, - {cudf::table_view{{values}}}, - {cudf::order::ASCENDING}, - {cudf::null_order::BEFORE}); - fixed_width_column_wrapper expect_lower{0, 2, 2, 4, 6, 7, 8}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_lower); -} - -TEST_F(SearchTest, search_table_dictionary) -{ - fixed_width_column_wrapper column_0{{10, 10, 20, 20, 20, 20, 20, 20, 20, 50, 30}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0}}; - fixed_width_column_wrapper column_1{{5.0, 6.0, .5, .5, .5, .5, .7, .7, .7, .7, .5}, - {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}}; - cudf::test::dictionary_column_wrapper column_2{ - {90, 95, 77, 78, 79, 76, 61, 62, 63, 41, 50}, {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1}}; - cudf::table_view input({column_0, column_1, column_2}); - - fixed_width_column_wrapper values_0{{10, 40, 20}, {1, 0, 1}}; - fixed_width_column_wrapper values_1{{6., .5, .5}, {0, 1, 1}}; - cudf::test::dictionary_column_wrapper values_2{{95, 50, 77}, {1, 1, 0}}; - cudf::table_view values({values_0, values_1, values_2}); - - std::vector order_flags{ - {cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING}}; - std::vector null_order_flags{ - {cudf::null_order::AFTER, cudf::null_order::AFTER, cudf::null_order::AFTER}}; - - auto result = cudf::lower_bound(input, values, order_flags, null_order_flags); - fixed_width_column_wrapper expect_lower{1, 10, 2}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_lower); - - result = cudf::upper_bound(input, values, order_flags, null_order_flags); - fixed_width_column_wrapper expect_upper{2, 11, 6}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_upper); -} - -TEST_F(SearchTest, contains_dictionary) -{ - cudf::test::dictionary_column_wrapper column( - {"00", "00", "17", "17", "23", "23", "29"}); - EXPECT_TRUE(cudf::contains(column, string_scalar{"23"})); - EXPECT_FALSE(cudf::contains(column, string_scalar{"28"})); - - cudf::test::dictionary_column_wrapper needles({"00", "17", "23", "27"}); - fixed_width_column_wrapper expect{1, 1, 1, 1, 1, 1, 0}; - auto result = cudf::contains(column, needles); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); -} - -TEST_F(SearchTest, contains_nullable_dictionary) -{ - cudf::test::dictionary_column_wrapper column({0, 0, 17, 17, 23, 23, 29}, - {1, 0, 1, 1, 1, 1, 1}); - EXPECT_TRUE(cudf::contains(column, numeric_scalar{23})); - EXPECT_FALSE(cudf::contains(column, numeric_scalar{28})); - - cudf::test::dictionary_column_wrapper needles({0, 17, 23, 27}); - fixed_width_column_wrapper expect({1, 0, 1, 1, 1, 1, 0}, {1, 0, 1, 1, 1, 1, 1}); - auto result = cudf::contains(column, needles); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); -} - TEST_F(SearchTest, multi_contains_some) { using element_type = int64_t; From 611252bd387fff060246667b13acdd355bab0922 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Apr 2021 12:53:23 -0600 Subject: [PATCH 02/26] Add file for StructSearchTest --- cpp/tests/search/struct_search_test.cpp | 108 ++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 cpp/tests/search/struct_search_test.cpp diff --git a/cpp/tests/search/struct_search_test.cpp b/cpp/tests/search/struct_search_test.cpp new file mode 100644 index 00000000000..1766948b833 --- /dev/null +++ b/cpp/tests/search/struct_search_test.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +struct StructSearchTest : public cudf::test::BaseFixture { +}; + +using cudf::numeric_scalar; +using cudf::size_type; +using cudf::string_scalar; +using cudf::test::fixed_width_column_wrapper; + +TEST_F(StructSearchTest, search_dictionary) +{ + cudf::test::dictionary_column_wrapper input( + {"", "", "10", "10", "20", "20", "30", "40"}, {0, 0, 1, 1, 1, 1, 1, 1}); + cudf::test::dictionary_column_wrapper values( + {"", "08", "10", "11", "30", "32", "90"}, {0, 1, 1, 1, 1, 1, 1}); + + auto result = cudf::upper_bound({cudf::table_view{{input}}}, + {cudf::table_view{{values}}}, + {cudf::order::ASCENDING}, + {cudf::null_order::BEFORE}); + fixed_width_column_wrapper expect_upper{2, 2, 4, 4, 7, 7, 8}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_upper); + + result = cudf::lower_bound({cudf::table_view{{input}}}, + {cudf::table_view{{values}}}, + {cudf::order::ASCENDING}, + {cudf::null_order::BEFORE}); + fixed_width_column_wrapper expect_lower{0, 2, 2, 4, 6, 7, 8}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_lower); +} + +TEST_F(StructSearchTest, search_table_dictionary) +{ + fixed_width_column_wrapper column_0{{10, 10, 20, 20, 20, 20, 20, 20, 20, 50, 30}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0}}; + fixed_width_column_wrapper column_1{{5.0, 6.0, .5, .5, .5, .5, .7, .7, .7, .7, .5}, + {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}}; + cudf::test::dictionary_column_wrapper column_2{ + {90, 95, 77, 78, 79, 76, 61, 62, 63, 41, 50}, {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1}}; + cudf::table_view input({column_0, column_1, column_2}); + + fixed_width_column_wrapper values_0{{10, 40, 20}, {1, 0, 1}}; + fixed_width_column_wrapper values_1{{6., .5, .5}, {0, 1, 1}}; + cudf::test::dictionary_column_wrapper values_2{{95, 50, 77}, {1, 1, 0}}; + cudf::table_view values({values_0, values_1, values_2}); + + std::vector order_flags{ + {cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING}}; + std::vector null_order_flags{ + {cudf::null_order::AFTER, cudf::null_order::AFTER, cudf::null_order::AFTER}}; + + auto result = cudf::lower_bound(input, values, order_flags, null_order_flags); + fixed_width_column_wrapper expect_lower{1, 10, 2}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_lower); + + result = cudf::upper_bound(input, values, order_flags, null_order_flags); + fixed_width_column_wrapper expect_upper{2, 11, 6}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_upper); +} + +TEST_F(StructSearchTest, contains_dictionary) +{ + cudf::test::dictionary_column_wrapper column( + {"00", "00", "17", "17", "23", "23", "29"}); + EXPECT_TRUE(cudf::contains(column, string_scalar{"23"})); + EXPECT_FALSE(cudf::contains(column, string_scalar{"28"})); + + cudf::test::dictionary_column_wrapper needles({"00", "17", "23", "27"}); + fixed_width_column_wrapper expect{1, 1, 1, 1, 1, 1, 0}; + auto result = cudf::contains(column, needles); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); +} + +TEST_F(StructSearchTest, contains_nullable_dictionary) +{ + cudf::test::dictionary_column_wrapper column({0, 0, 17, 17, 23, 23, 29}, + {1, 0, 1, 1, 1, 1, 1}); + EXPECT_TRUE(cudf::contains(column, numeric_scalar{23})); + EXPECT_FALSE(cudf::contains(column, numeric_scalar{28})); + + cudf::test::dictionary_column_wrapper needles({0, 17, 23, 27}); + fixed_width_column_wrapper expect({1, 0, 1, 1, 1, 1, 0}, {1, 0, 1, 1, 1, 1, 1}); + auto result = cudf::contains(column, needles); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); +} From aef67cc2c4907b2052cd538cfe4e95f185e9f89a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Apr 2021 13:02:24 -0600 Subject: [PATCH 03/26] Rename files --- cpp/tests/CMakeLists.txt | 6 +++--- ...ictionary_search_test.cpp => search_dictionary_test.cpp} | 1 - .../{struct_search_test.cpp => search_struct_test.cpp} | 4 +++- 3 files changed, 6 insertions(+), 5 deletions(-) rename cpp/tests/search/{dictionary_search_test.cpp => search_dictionary_test.cpp} (99%) rename cpp/tests/search/{struct_search_test.cpp => search_struct_test.cpp} (97%) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index d0d7b7f4be6..ba525707550 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -289,9 +289,9 @@ ConfigureTest(FILLING_TEST ################################################################################################### # - search test ----------------------------------------------------------------------------------- ConfigureTest(SEARCH_TEST - search/search_test.cpp - search/dictionary_search_test.cpp - search/struct_search_test.cpp) + search/search_dictionary_test.cpp + search/search_struct_test.cpp + search/search_test.cpp) ################################################################################################### # - reshape test ---------------------------------------------------------------------------------- diff --git a/cpp/tests/search/dictionary_search_test.cpp b/cpp/tests/search/search_dictionary_test.cpp similarity index 99% rename from cpp/tests/search/dictionary_search_test.cpp rename to cpp/tests/search/search_dictionary_test.cpp index 0f70776ef0f..6b1caa5ed6f 100644 --- a/cpp/tests/search/dictionary_search_test.cpp +++ b/cpp/tests/search/search_dictionary_test.cpp @@ -19,7 +19,6 @@ #include #include -#include #include struct DictionarySearchTest : public cudf::test::BaseFixture { diff --git a/cpp/tests/search/struct_search_test.cpp b/cpp/tests/search/search_struct_test.cpp similarity index 97% rename from cpp/tests/search/struct_search_test.cpp rename to cpp/tests/search/search_struct_test.cpp index 1766948b833..1423e1ac36b 100644 --- a/cpp/tests/search/struct_search_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -19,8 +19,10 @@ #include #include -#include +//#include +//#include #include +#include struct StructSearchTest : public cudf::test::BaseFixture { }; From 9e73bbb0da4dc6bc1406ecc41e6c1282d7dd2dde Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Apr 2021 13:14:04 -0600 Subject: [PATCH 04/26] Remove test --- cpp/tests/search/search_struct_test.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index 1423e1ac36b..14ab1e7c271 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -95,16 +95,3 @@ TEST_F(StructSearchTest, contains_dictionary) auto result = cudf::contains(column, needles); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); } - -TEST_F(StructSearchTest, contains_nullable_dictionary) -{ - cudf::test::dictionary_column_wrapper column({0, 0, 17, 17, 23, 23, 29}, - {1, 0, 1, 1, 1, 1, 1}); - EXPECT_TRUE(cudf::contains(column, numeric_scalar{23})); - EXPECT_FALSE(cudf::contains(column, numeric_scalar{28})); - - cudf::test::dictionary_column_wrapper needles({0, 17, 23, 27}); - fixed_width_column_wrapper expect({1, 0, 1, 1, 1, 1, 0}, {1, 0, 1, 1, 1, 1, 1}); - auto result = cudf::contains(column, needles); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); -} From 55bf49576e7f2e925210b2308ec9c61ef0c5079d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Apr 2021 14:18:30 -0600 Subject: [PATCH 05/26] Add one more test for StructSearchTest --- cpp/tests/search/search_struct_test.cpp | 273 +++++++++++++++++++++++- 1 file changed, 271 insertions(+), 2 deletions(-) diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index 14ab1e7c271..ca068af6cb9 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -19,11 +19,279 @@ #include #include -//#include -//#include +#include +#include #include #include +#include + +using bools_col = cudf::test::fixed_width_column_wrapper; +using int32s_col = cudf::test::fixed_width_column_wrapper; +using structs_col = cudf::test::structs_column_wrapper; +using strings_col = cudf::test::strings_column_wrapper; + +constexpr int32_t null{0}; // Mark for null child elements +constexpr int32_t XXX{0}; // Mark for null struct elements + +template +struct TypedStructSearchTest : public cudf::test::BaseFixture { +}; + +using TestTypes = cudf::test::Concat; + +TYPED_TEST_CASE(TypedStructSearchTest, TestTypes); + +namespace { +void test_search(std::unique_ptr const& t_col, + std::unique_ptr const& values_col, + int32s_col const& expected_lower_bound, + int32s_col const& expected_upper_bound, + std::vector const& column_orders = {cudf::order::ASCENDING}, + std::vector const& null_precedence = {cudf::null_order::BEFORE}) +{ + auto const t = cudf::table_view{std::vector{t_col->view()}}; + auto const values = cudf::table_view{std::vector{values_col->view()}}; + + auto const result_lower_bound = cudf::lower_bound(t, values, column_orders, null_precedence); + auto const result_upper_bound = cudf::upper_bound(t, values, column_orders, null_precedence); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, result_lower_bound->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, result_upper_bound->view()); +} +} // namespace + +// Test case when all input columns are empty +TYPED_TEST(TypedStructSearchTest, EmptyInputTest) +{ + using col_wrapper = cudf::test::fixed_width_column_wrapper; + + auto child_col_t = col_wrapper{}; + auto const structs_t = structs_col{{child_col_t}, std::vector{}}.release(); + + auto child_col_values = col_wrapper{}; + auto const structs_values = structs_col{{child_col_values}, std::vector{}}.release(); + + auto const expected = int32s_col{}; + test_search(structs_t, structs_values, expected, expected); +} + +// Test case when only the scatter map is empty +TYPED_TEST(TypedStructSearchTest, TrivialTest) +{ + using col_wrapper = cudf::test::fixed_width_column_wrapper; + + auto child_col_t = col_wrapper{10, 20, 30, 40, 50}; + auto const structs_t = structs_col{{child_col_t}}.release(); + + auto child_col_values1 = col_wrapper{0, 1, 2, 3, 4}; + auto const structs_values1 = structs_col{{child_col_values1}}.release(); + + auto child_col_values2 = col_wrapper{100, 101, 102, 103, 104}; + auto const structs_values2 = structs_col{{child_col_values2}}.release(); + + auto const expected1 = int32s_col{0, 0, 0, 0, 0}; + auto const expected2 = int32s_col{4, 4, 4, 4, 4}; + test_search(structs_t, structs_values1, expected1, expected1); + test_search(structs_t, structs_values2, expected2, expected2); +} + +#if 0 +TYPED_TEST(TypedStructSearchTest, ScatterAsCopyTest) +{ + using col_wrapper = cudf::test::fixed_width_column_wrapper; + + auto child_col_t = + col_wrapper{{0, 1, 2, 3, null, XXX}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto const structs_t = structs_col{ + {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 5; + })}.release(); + + auto child_col_values = + col_wrapper{{50, null, 70, XXX, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto const structs_values = structs_col{ + {child_col_values}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); + + // Scatter as copy: the target should be the same as source + auto const scatter_map = int32s_col{0, 1, 2, 3, 4, 5}.release(); + test_search(structs_t, structs_values, structs_t, scatter_map); +} + +TYPED_TEST(TypedStructSearchTest, ScatterAsLeftShiftTest) +{ + using col_wrapper = cudf::test::fixed_width_column_wrapper; + + auto child_col_t = + col_wrapper{{0, 1, 2, 3, null, XXX}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto const structs_t = structs_col{ + {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 5; + })}.release(); + + auto child_col_values = + col_wrapper{{50, null, 70, XXX, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto const structs_values = structs_col{ + {child_col_values}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); + + auto child_col_expected = + col_wrapper{{2, 3, null, XXX, 0, 1}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; })}; + auto structs_expected = structs_col{ + {child_col_expected}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); + + auto const scatter_map = int32s_col{-2, -1, 0, 1, 2, 3}.release(); + test_search(structs_t, structs_values, structs_expected, scatter_map); +} + +TYPED_TEST(TypedStructSearchTest, SimpleScatterTests) +{ + using col_wrapper = cudf::test::fixed_width_column_wrapper; + + // Source data + auto child_col_t = + col_wrapper{{0, 1, 2, 3, null, XXX}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto const structs_t = structs_col{ + {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 5; + })}.release(); + + // Target data + auto child_col_values = + col_wrapper{{50, null, 70, XXX, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto const structs_values = structs_col{ + {child_col_values}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); + + // Expected data + auto child_col_expected1 = + col_wrapper{{1, null, 70, XXX, 0, 2}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto const structs_expected1 = structs_col{ + {child_col_expected1}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); + auto const scatter_map1 = int32s_col{-2, 0, 5}.release(); + test_search(structs_t, structs_values, structs_expected1, scatter_map1); + + // Expected data + auto child_col_expected2 = + col_wrapper{{1, null, 70, 3, 0, 2}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto const structs_expected2 = structs_col{ + {child_col_expected2}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return true; + })}.release(); + auto const scatter_map2 = int32s_col{-2, 0, 5, 3}.release(); + test_search(structs_t, structs_values, structs_expected2, scatter_map2); +} + +TYPED_TEST(TypedStructSearchTest, ComplexDataScatterTest) +{ + // Testing scatter() on struct. + using col_wrapper = cudf::test::fixed_width_column_wrapper; + + // Source data + auto names_column_t = + strings_col{{"Newton", "Washington", "Cherry", "Kiwi", "Lemon", "Tomato"}, + cudf::detail::make_counting_transform_iterator(0, [](auto) { return true; })}; + auto ages_column_t = + col_wrapper{{5, 10, 15, 20, 25, 30}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto is_human_col_t = + bools_col{{true, true, false, false, false, false}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}; + + // Target data + auto names_column_values = + strings_col{{"String 0", "String 1", "String 2", "String 3", "String 4", "String 5"}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0; })}; + auto ages_column_values = + col_wrapper{{50, 60, 70, 80, 90, 100}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto is_human_col_values = + bools_col{{true, true, true, true, true, true}, + cudf::detail::make_counting_transform_iterator(0, [](auto) { return true; })}; + + // Expected data + auto names_column_expected = + strings_col{{"String 0", "Lemon", "Kiwi", "Cherry", "Washington", "Newton"}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0; })}; + auto ages_column_expected = + col_wrapper{{50, 25, 20, 15, 10, 5}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto is_human_col_expected = + bools_col{{true, false, false, false, true, true}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; })}; + + auto const structs_t = structs_col{ + {names_column_t, ages_column_t, is_human_col_t}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 5; + })}.release(); + auto const structs_values = structs_col{ + {names_column_values, ages_column_values, is_human_col_values}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 2; + })}.release(); + auto const structs_expected = structs_col{ + {names_column_expected, ages_column_expected, is_human_col_expected}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return true; + })}.release(); + + // The first element of the target is not overwritten + auto const scatter_map = int32s_col{-1, 4, 3, 2, 1}.release(); + test_search(structs_t, structs_values, structs_expected, scatter_map); +} + +TYPED_TEST(TypedStructSearchTest, ScatterStructOfListsTest) +{ + // Testing gather() on struct> + using lists_col = cudf::test::lists_column_wrapper; + + // Source data + auto lists_col_t = + lists_col{{{5}, {10, 15}, {20, 25, 30}, {35, 40, 45, 50}, {55, 60, 65}, {70, 75}, {80}, {}, {}}, + // Valid for elements 0, 3, 6,... + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return !(i % 3); })}; + auto const structs_t = structs_col{{lists_col_t}}.release(); + + // Target data + auto lists_col_values = + lists_col{{{1}, {2, 3}, {4, 5, 6}, {7, 8}, {9}, {10, 11, 12, 13}, {}, {14}, {15, 16}}, + // Valid for elements 1, 3, 5, 7,... + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; })}; + auto const structs_values = structs_col{{lists_col_values}}.release(); + + // Expected data + auto const validity_expected = std::vector{0, 1, 1, 0, 0, 1, 1, 0, 0}; + auto lists_col_expected = lists_col{ + {{1}, {2, 3}, {80}, {70, 75}, {55, 60, 65}, {35, 40, 45, 50}, {5}, {10, 15}, {20, 25, 30}}, + validity_expected.begin()}; + auto const structs_expected = structs_col{{lists_col_expected}}.release(); + + // The first 2 elements of the target is not overwritten + auto const scatter_map = int32s_col{-3, -2, -1, 5, 4, 3, 2}.release(); + test_search(structs_t, structs_values, structs_expected, scatter_map); +} + struct StructSearchTest : public cudf::test::BaseFixture { }; @@ -95,3 +363,4 @@ TEST_F(StructSearchTest, contains_dictionary) auto result = cudf::contains(column, needles); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); } +#endif From a8d0a67a36d85d7610d5c11a0a602a14f707357b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Apr 2021 16:52:24 -0600 Subject: [PATCH 06/26] And TrivialInputTests for StructSearchTest --- cpp/tests/search/search_struct_test.cpp | 69 +++---------------------- 1 file changed, 6 insertions(+), 63 deletions(-) diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index ca068af6cb9..f8ecf9bd3f7 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -38,7 +38,7 @@ template struct TypedStructSearchTest : public cudf::test::BaseFixture { }; -using TestTypes = cudf::test::Concat; @@ -79,8 +79,7 @@ TYPED_TEST(TypedStructSearchTest, EmptyInputTest) test_search(structs_t, structs_values, expected, expected); } -// Test case when only the scatter map is empty -TYPED_TEST(TypedStructSearchTest, TrivialTest) +TYPED_TEST(TypedStructSearchTest, TrivialInputTests) { using col_wrapper = cudf::test::fixed_width_column_wrapper; @@ -94,70 +93,12 @@ TYPED_TEST(TypedStructSearchTest, TrivialTest) auto const structs_values2 = structs_col{{child_col_values2}}.release(); auto const expected1 = int32s_col{0, 0, 0, 0, 0}; - auto const expected2 = int32s_col{4, 4, 4, 4, 4}; + auto const expected2 = int32s_col{5, 5, 5, 5, 5}; test_search(structs_t, structs_values1, expected1, expected1); test_search(structs_t, structs_values2, expected2, expected2); } -#if 0 -TYPED_TEST(TypedStructSearchTest, ScatterAsCopyTest) -{ - using col_wrapper = cudf::test::fixed_width_column_wrapper; - - auto child_col_t = - col_wrapper{{0, 1, 2, 3, null, XXX}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; - auto const structs_t = structs_col{ - {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 5; - })}.release(); - - auto child_col_values = - col_wrapper{{50, null, 70, XXX, 90, 100}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_values = structs_col{ - {child_col_values}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 3; - })}.release(); - - // Scatter as copy: the target should be the same as source - auto const scatter_map = int32s_col{0, 1, 2, 3, 4, 5}.release(); - test_search(structs_t, structs_values, structs_t, scatter_map); -} - -TYPED_TEST(TypedStructSearchTest, ScatterAsLeftShiftTest) -{ - using col_wrapper = cudf::test::fixed_width_column_wrapper; - - auto child_col_t = - col_wrapper{{0, 1, 2, 3, null, XXX}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; - auto const structs_t = structs_col{ - {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 5; - })}.release(); - - auto child_col_values = - col_wrapper{{50, null, 70, XXX, 90, 100}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_values = structs_col{ - {child_col_values}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 3; - })}.release(); - - auto child_col_expected = - col_wrapper{{2, 3, null, XXX, 0, 1}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; })}; - auto structs_expected = structs_col{ - {child_col_expected}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 3; - })}.release(); - - auto const scatter_map = int32s_col{-2, -1, 0, 1, 2, 3}.release(); - test_search(structs_t, structs_values, structs_expected, scatter_map); -} - -TYPED_TEST(TypedStructSearchTest, SimpleScatterTests) +TYPED_TEST(TypedStructSearchTest, SimpleInputWithNullsTests) { using col_wrapper = cudf::test::fixed_width_column_wrapper; @@ -201,6 +142,8 @@ TYPED_TEST(TypedStructSearchTest, SimpleScatterTests) auto const scatter_map2 = int32s_col{-2, 0, 5, 3}.release(); test_search(structs_t, structs_values, structs_expected2, scatter_map2); } +#if 0 + TYPED_TEST(TypedStructSearchTest, ComplexDataScatterTest) { From 1b063d0c63b4dfde742022d5707770becac53777 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Apr 2021 16:52:40 -0600 Subject: [PATCH 07/26] Flatten columns for binary search --- cpp/src/search/search.cu | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 051d302c710..0ec5bff84b7 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -100,13 +101,22 @@ std::unique_ptr search_ordered(table_view const& t, // This utility will ensure all corresponding dictionary columns have matching keys. // It will return any new dictionary columns created as well as updated table_views. - auto matched = dictionary::detail::match_dictionaries({t, values}, stream); - auto d_t = table_device_view::create(matched.second.front(), stream); - auto d_values = table_device_view::create(matched.second.back(), stream); + auto matched = dictionary::detail::match_dictionaries({t, values}, stream); + + // 0-table_view, 1-column_order, 2-null_precedence, 3-validity_columns + auto flattened_t = + structs::detail::flatten_nested_columns(matched.second.front(), column_order, null_precedence); + auto flattened_values = + structs::detail::flatten_nested_columns(matched.second.back(), column_order, null_precedence); + + auto d_t = table_device_view::create(std::get<0>(flattened_t), stream); + auto d_values = table_device_view::create(std::get<0>(flattened_values), stream); auto count_it = thrust::make_counting_iterator(0); - rmm::device_vector d_column_order(column_order.begin(), column_order.end()); - rmm::device_vector d_null_precedence(null_precedence.begin(), null_precedence.end()); + rmm::device_vector d_column_order(std::get<1>(flattened_t).begin(), + std::get<1>(flattened_t).end()); + rmm::device_vector d_null_precedence(std::get<2>(flattened_t).begin(), + std::get<2>(flattened_t).end()); if (has_nulls(t) or has_nulls(values)) { auto ineq_op = From a5984d0355249d9db915f5c16a8a8ee97f9dbe22 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 5 Apr 2021 07:57:33 -0600 Subject: [PATCH 08/26] Simplify binary search --- cpp/src/search/search.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 0ec5bff84b7..ef6c32412a3 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -106,8 +106,7 @@ std::unique_ptr search_ordered(table_view const& t, // 0-table_view, 1-column_order, 2-null_precedence, 3-validity_columns auto flattened_t = structs::detail::flatten_nested_columns(matched.second.front(), column_order, null_precedence); - auto flattened_values = - structs::detail::flatten_nested_columns(matched.second.back(), column_order, null_precedence); + auto flattened_values = structs::detail::flatten_nested_columns(matched.second.back(), {}, {}); auto d_t = table_device_view::create(std::get<0>(flattened_t), stream); auto d_values = table_device_view::create(std::get<0>(flattened_values), stream); From 4fb33f957bcab1d150ace29fab5627606ae8be72 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 5 Apr 2021 07:58:02 -0600 Subject: [PATCH 09/26] Add more tests --- cpp/tests/search/search_struct_test.cpp | 293 ++++++++---------------- 1 file changed, 99 insertions(+), 194 deletions(-) diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index f8ecf9bd3f7..044efeae150 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -25,12 +25,14 @@ #include #include +#include using bools_col = cudf::test::fixed_width_column_wrapper; using int32s_col = cudf::test::fixed_width_column_wrapper; using structs_col = cudf::test::structs_column_wrapper; using strings_col = cudf::test::strings_column_wrapper; +constexpr bool print_all{true}; constexpr int32_t null{0}; // Mark for null child elements constexpr int32_t XXX{0}; // Mark for null struct elements @@ -46,21 +48,17 @@ using TestTypes = cudf::test::Concat const& t_col, - std::unique_ptr const& values_col, - int32s_col const& expected_lower_bound, - int32s_col const& expected_upper_bound, - std::vector const& column_orders = {cudf::order::ASCENDING}, - std::vector const& null_precedence = {cudf::null_order::BEFORE}) +auto search_bounds(std::unique_ptr const& t_col, + std::unique_ptr const& values_col, + std::vector const& column_orders = {cudf::order::ASCENDING}, + std::vector const& null_precedence = { + cudf::null_order::BEFORE}) { - auto const t = cudf::table_view{std::vector{t_col->view()}}; - auto const values = cudf::table_view{std::vector{values_col->view()}}; - - auto const result_lower_bound = cudf::lower_bound(t, values, column_orders, null_precedence); - auto const result_upper_bound = cudf::upper_bound(t, values, column_orders, null_precedence); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, result_lower_bound->view()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, result_upper_bound->view()); + auto const t = cudf::table_view{std::vector{t_col->view()}}; + auto const values = cudf::table_view{std::vector{values_col->view()}}; + auto result_lower_bound = cudf::lower_bound(t, values, column_orders, null_precedence); + auto result_upper_bound = cudf::upper_bound(t, values, column_orders, null_precedence); + return std::make_pair(std::move(result_lower_bound), std::move(result_upper_bound)); } } // namespace @@ -75,8 +73,10 @@ TYPED_TEST(TypedStructSearchTest, EmptyInputTest) auto child_col_values = col_wrapper{}; auto const structs_values = structs_col{{child_col_values}, std::vector{}}.release(); + auto const results = search_bounds(structs_t, structs_values); auto const expected = int32s_col{}; - test_search(structs_t, structs_values, expected, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results.first->view(), print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results.second->view(), print_all); } TYPED_TEST(TypedStructSearchTest, TrivialInputTests) @@ -92,218 +92,123 @@ TYPED_TEST(TypedStructSearchTest, TrivialInputTests) auto child_col_values2 = col_wrapper{100, 101, 102, 103, 104}; auto const structs_values2 = structs_col{{child_col_values2}}.release(); + auto const results1 = search_bounds(structs_t, structs_values1); auto const expected1 = int32s_col{0, 0, 0, 0, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, results1.first->view(), print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, results1.second->view(), print_all); + + auto const results2 = search_bounds(structs_t, structs_values2); auto const expected2 = int32s_col{5, 5, 5, 5, 5}; - test_search(structs_t, structs_values1, expected1, expected1); - test_search(structs_t, structs_values2, expected2, expected2); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, results2.first->view(), print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, results2.second->view(), print_all); } TYPED_TEST(TypedStructSearchTest, SimpleInputWithNullsTests) { using col_wrapper = cudf::test::fixed_width_column_wrapper; - // Source data - auto child_col_t = - col_wrapper{{0, 1, 2, 3, null, XXX}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; - auto const structs_t = structs_col{ - {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 5; - })}.release(); - - // Target data auto child_col_values = - col_wrapper{{50, null, 70, XXX, 90, 100}, + col_wrapper{{1, null, 70, XXX, 2, 100}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; auto const structs_values = structs_col{ {child_col_values}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}.release(); - // Expected data - auto child_col_expected1 = - col_wrapper{{1, null, 70, XXX, 0, 2}, + // Sorted asc, nulls first + auto child_col_t1 = + col_wrapper{{XXX, null, 0, 1, 2, 2, 2, 2, 3, 3, 4}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_expected1 = structs_col{ - {child_col_expected1}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 3; + auto const structs_t1 = structs_col{ + {child_col_t1}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 0; })}.release(); - auto const scatter_map1 = int32s_col{-2, 0, 5}.release(); - test_search(structs_t, structs_values, structs_expected1, scatter_map1); - // Expected data - auto child_col_expected2 = - col_wrapper{{1, null, 70, 3, 0, 2}, + auto results = + search_bounds(structs_t1, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::BEFORE}); + auto expected_lower_bound = int32s_col{3, 1, 11, 0, 4, 11}; + auto expected_upper_bound = int32s_col{4, 2, 11, 1, 8, 11}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); + + // Sorted asc, nulls last + auto child_col_t2 = + col_wrapper{{0, 1, 2, 2, 2, 2, 3, 3, 4, null, XXX}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 9; })}; + auto const structs_t2 = structs_col{ + {child_col_t2}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 10; + })}.release(); + results = + search_bounds(structs_t2, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); + expected_lower_bound = int32s_col{1, 0, 10, 10, 2, 10}; + expected_upper_bound = int32s_col{1, 0, 10, 11, 6, 10}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); + + // Sorted dsc, nulls first + auto child_col_t3 = + col_wrapper{{XXX, null, 4, 3, 3, 2, 2, 2, 2, 1, 0}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_expected2 = structs_col{ - {child_col_expected2}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return true; + auto const structs_t3 = structs_col{ + {child_col_t3}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 0; + })}.release(); + results = + search_bounds(structs_t2, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); + expected_lower_bound = int32s_col{1, 0, 10, 10, 2, 10}; + expected_upper_bound = int32s_col{1, 0, 10, 11, 6, 10}; + // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); + // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); + + // Sorted dsc, nulls last + auto child_col_t4 = + col_wrapper{{4, 3, 3, 2, 2, 2, 2, 1, 0, null, XXX}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 9; })}; + auto const structs_t4 = structs_col{ + {child_col_t4}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 10; })}.release(); - auto const scatter_map2 = int32s_col{-2, 0, 5, 3}.release(); - test_search(structs_t, structs_values, structs_expected2, scatter_map2); + results = + search_bounds(structs_t2, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); + expected_lower_bound = int32s_col{1, 0, 10, 10, 2, 10}; + expected_upper_bound = int32s_col{1, 0, 10, 11, 6, 10}; + // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); + // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); } -#if 0 - -TYPED_TEST(TypedStructSearchTest, ComplexDataScatterTest) +TYPED_TEST(TypedStructSearchTest, ComplexStructTest) { - // Testing scatter() on struct. + // Testing on struct. using col_wrapper = cudf::test::fixed_width_column_wrapper; - // Source data - auto names_column_t = - strings_col{{"Newton", "Washington", "Cherry", "Kiwi", "Lemon", "Tomato"}, - cudf::detail::make_counting_transform_iterator(0, [](auto) { return true; })}; - auto ages_column_t = - col_wrapper{{5, 10, 15, 20, 25, 30}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; - auto is_human_col_t = - bools_col{{true, true, false, false, false, false}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })}; - - // Target data - auto names_column_values = - strings_col{{"String 0", "String 1", "String 2", "String 3", "String 4", "String 5"}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0; })}; + auto names_column_values = strings_col{nullptr, "Bagel", "Lemonade", "Donut", "Butter"}; auto ages_column_values = - col_wrapper{{50, 60, 70, 80, 90, 100}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto is_human_col_values = - bools_col{{true, true, true, true, true, true}, - cudf::detail::make_counting_transform_iterator(0, [](auto) { return true; })}; - - // Expected data - auto names_column_expected = - strings_col{{"String 0", "Lemon", "Kiwi", "Cherry", "Washington", "Newton"}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0; })}; - auto ages_column_expected = - col_wrapper{{50, 25, 20, 15, 10, 5}, + col_wrapper{{15, null, 10, 21, 17}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto is_human_col_expected = - bools_col{{true, false, false, false, true, true}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; })}; - - auto const structs_t = structs_col{ - {names_column_t, ages_column_t, is_human_col_t}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 5; - })}.release(); + auto is_human_col_values = bools_col{false, false, false, false, false}; auto const structs_values = structs_col{ {names_column_values, ages_column_values, is_human_col_values}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; })}.release(); - auto const structs_expected = structs_col{ - {names_column_expected, ages_column_expected, is_human_col_expected}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return true; - })}.release(); - - // The first element of the target is not overwritten - auto const scatter_map = int32s_col{-1, 4, 3, 2, 1}.release(); - test_search(structs_t, structs_values, structs_expected, scatter_map); -} - -TYPED_TEST(TypedStructSearchTest, ScatterStructOfListsTest) -{ - // Testing gather() on struct> - using lists_col = cudf::test::lists_column_wrapper; - - // Source data - auto lists_col_t = - lists_col{{{5}, {10, 15}, {20, 25, 30}, {35, 40, 45, 50}, {55, 60, 65}, {70, 75}, {80}, {}, {}}, - // Valid for elements 0, 3, 6,... - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return !(i % 3); })}; - auto const structs_t = structs_col{{lists_col_t}}.release(); - - // Target data - auto lists_col_values = - lists_col{{{1}, {2, 3}, {4, 5, 6}, {7, 8}, {9}, {10, 11, 12, 13}, {}, {14}, {15, 16}}, - // Valid for elements 1, 3, 5, 7,... - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; })}; - auto const structs_values = structs_col{{lists_col_values}}.release(); - - // Expected data - auto const validity_expected = std::vector{0, 1, 1, 0, 0, 1, 1, 0, 0}; - auto lists_col_expected = lists_col{ - {{1}, {2, 3}, {80}, {70, 75}, {55, 60, 65}, {35, 40, 45, 50}, {5}, {10, 15}, {20, 25, 30}}, - validity_expected.begin()}; - auto const structs_expected = structs_col{{lists_col_expected}}.release(); - - // The first 2 elements of the target is not overwritten - auto const scatter_map = int32s_col{-3, -2, -1, 5, 4, 3, 2}.release(); - test_search(structs_t, structs_values, structs_expected, scatter_map); -} - -struct StructSearchTest : public cudf::test::BaseFixture { -}; - -using cudf::numeric_scalar; -using cudf::size_type; -using cudf::string_scalar; -using cudf::test::fixed_width_column_wrapper; - -TEST_F(StructSearchTest, search_dictionary) -{ - cudf::test::dictionary_column_wrapper input( - {"", "", "10", "10", "20", "20", "30", "40"}, {0, 0, 1, 1, 1, 1, 1, 1}); - cudf::test::dictionary_column_wrapper values( - {"", "08", "10", "11", "30", "32", "90"}, {0, 1, 1, 1, 1, 1, 1}); - - auto result = cudf::upper_bound({cudf::table_view{{input}}}, - {cudf::table_view{{values}}}, - {cudf::order::ASCENDING}, - {cudf::null_order::BEFORE}); - fixed_width_column_wrapper expect_upper{2, 2, 4, 4, 7, 7, 8}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_upper); - - result = cudf::lower_bound({cudf::table_view{{input}}}, - {cudf::table_view{{values}}}, - {cudf::order::ASCENDING}, - {cudf::null_order::BEFORE}); - fixed_width_column_wrapper expect_lower{0, 2, 2, 4, 6, 7, 8}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_lower); -} -TEST_F(StructSearchTest, search_table_dictionary) -{ - fixed_width_column_wrapper column_0{{10, 10, 20, 20, 20, 20, 20, 20, 20, 50, 30}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0}}; - fixed_width_column_wrapper column_1{{5.0, 6.0, .5, .5, .5, .5, .7, .7, .7, .7, .5}, - {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}}; - cudf::test::dictionary_column_wrapper column_2{ - {90, 95, 77, 78, 79, 76, 61, 62, 63, 41, 50}, {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1}}; - cudf::table_view input({column_0, column_1, column_2}); - - fixed_width_column_wrapper values_0{{10, 40, 20}, {1, 0, 1}}; - fixed_width_column_wrapper values_1{{6., .5, .5}, {0, 1, 1}}; - cudf::test::dictionary_column_wrapper values_2{{95, 50, 77}, {1, 1, 0}}; - cudf::table_view values({values_0, values_1, values_2}); - - std::vector order_flags{ - {cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING}}; - std::vector null_order_flags{ - {cudf::null_order::AFTER, cudf::null_order::AFTER, cudf::null_order::AFTER}}; - - auto result = cudf::lower_bound(input, values, order_flags, null_order_flags); - fixed_width_column_wrapper expect_lower{1, 10, 2}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_lower); - - result = cudf::upper_bound(input, values, order_flags, null_order_flags); - fixed_width_column_wrapper expect_upper{2, 11, 6}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect_upper); -} + auto names_column_t = strings_col{"Cherry", "Kiwi", "Lemon", "Newton", "Tomato", "Washington"}; + auto ages_column_t = + col_wrapper{{5, 10, 15, 20, null, 30}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto is_human_col_t = bools_col{false, false, false, false, false, true}; -TEST_F(StructSearchTest, contains_dictionary) -{ - cudf::test::dictionary_column_wrapper column( - {"00", "00", "17", "17", "23", "23", "29"}); - EXPECT_TRUE(cudf::contains(column, string_scalar{"23"})); - EXPECT_FALSE(cudf::contains(column, string_scalar{"28"})); + auto const structs_t = structs_col{ + {names_column_t, ages_column_t, is_human_col_t}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 5; + })}.release(); - cudf::test::dictionary_column_wrapper needles({"00", "17", "23", "27"}); - fixed_width_column_wrapper expect{1, 1, 1, 1, 1, 1, 0}; - auto result = cudf::contains(column, needles); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect); + auto results = + search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::BEFORE}); + auto expected_lower_bound = int32s_col{3, 1, 11, 0, 4, 11}; + auto expected_upper_bound = int32s_col{4, 2, 11, 1, 8, 11}; + // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); + // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); } -#endif From aafea019cd5d24555434cadbdf0695f6ffa986d5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 5 Apr 2021 11:47:16 -0600 Subject: [PATCH 10/26] Rewrite tests --- cpp/tests/search/search_struct_test.cpp | 32 ++++++++++++------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index 044efeae150..d48cbebcaec 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -116,61 +116,61 @@ TYPED_TEST(TypedStructSearchTest, SimpleInputWithNullsTests) })}.release(); // Sorted asc, nulls first - auto child_col_t1 = + auto child_col_t = col_wrapper{{XXX, null, 0, 1, 2, 2, 2, 2, 3, 3, 4}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_t1 = structs_col{ - {child_col_t1}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + auto structs_t = structs_col{ + {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0; })}.release(); auto results = - search_bounds(structs_t1, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::BEFORE}); + search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::BEFORE}); auto expected_lower_bound = int32s_col{3, 1, 11, 0, 4, 11}; auto expected_upper_bound = int32s_col{4, 2, 11, 1, 8, 11}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); // Sorted asc, nulls last - auto child_col_t2 = + child_col_t = col_wrapper{{0, 1, 2, 2, 2, 2, 3, 3, 4, null, XXX}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 9; })}; - auto const structs_t2 = structs_col{ - {child_col_t2}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + structs_t = structs_col{ + {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 10; })}.release(); results = - search_bounds(structs_t2, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); + search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); expected_lower_bound = int32s_col{1, 0, 10, 10, 2, 10}; expected_upper_bound = int32s_col{1, 0, 10, 11, 6, 10}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); // Sorted dsc, nulls first - auto child_col_t3 = + child_col_t = col_wrapper{{XXX, null, 4, 3, 3, 2, 2, 2, 2, 1, 0}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_t3 = structs_col{ - {child_col_t3}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + structs_t = structs_col{ + {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0; })}.release(); results = - search_bounds(structs_t2, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); + search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); expected_lower_bound = int32s_col{1, 0, 10, 10, 2, 10}; expected_upper_bound = int32s_col{1, 0, 10, 11, 6, 10}; // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); // Sorted dsc, nulls last - auto child_col_t4 = + child_col_t = col_wrapper{{4, 3, 3, 2, 2, 2, 2, 1, 0, null, XXX}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 9; })}; - auto const structs_t4 = structs_col{ - {child_col_t4}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { + structs_t = structs_col{ + {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 10; })}.release(); results = - search_bounds(structs_t2, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); + search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); expected_lower_bound = int32s_col{1, 0, 10, 10, 2, 10}; expected_upper_bound = int32s_col{1, 0, 10, 11, 6, 10}; // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); From 30348ccf723ed5d26b42455e3e69e98d4d539cc8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 5 Apr 2021 15:58:03 -0600 Subject: [PATCH 11/26] Fix SimpleInputWithNullsTests --- cpp/tests/search/search_struct_test.cpp | 26 ++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index d48cbebcaec..b622de9f2b5 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -142,7 +142,7 @@ TYPED_TEST(TypedStructSearchTest, SimpleInputWithNullsTests) results = search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); expected_lower_bound = int32s_col{1, 0, 10, 10, 2, 10}; - expected_upper_bound = int32s_col{1, 0, 10, 11, 6, 10}; + expected_upper_bound = int32s_col{2, 0, 10, 11, 6, 10}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); @@ -155,11 +155,11 @@ TYPED_TEST(TypedStructSearchTest, SimpleInputWithNullsTests) return i != 0; })}.release(); results = - search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); - expected_lower_bound = int32s_col{1, 0, 10, 10, 2, 10}; - expected_upper_bound = int32s_col{1, 0, 10, 11, 6, 10}; - // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); - // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); + search_bounds(structs_t, structs_values, {cudf::order::DESCENDING}, {cudf::null_order::BEFORE}); + expected_lower_bound = int32s_col{9, 11, 0, 11, 5, 0}; + expected_upper_bound = int32s_col{10, 11, 0, 11, 9, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); // Sorted dsc, nulls last child_col_t = @@ -170,11 +170,11 @@ TYPED_TEST(TypedStructSearchTest, SimpleInputWithNullsTests) return i != 10; })}.release(); results = - search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); - expected_lower_bound = int32s_col{1, 0, 10, 10, 2, 10}; - expected_upper_bound = int32s_col{1, 0, 10, 11, 6, 10}; - // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); - // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); + search_bounds(structs_t, structs_values, {cudf::order::DESCENDING}, {cudf::null_order::AFTER}); + expected_lower_bound = int32s_col{7, 11, 0, 0, 3, 0}; + expected_upper_bound = int32s_col{8, 11, 0, 0, 7, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); } TYPED_TEST(TypedStructSearchTest, ComplexStructTest) @@ -209,6 +209,6 @@ TYPED_TEST(TypedStructSearchTest, ComplexStructTest) search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::BEFORE}); auto expected_lower_bound = int32s_col{3, 1, 11, 0, 4, 11}; auto expected_upper_bound = int32s_col{4, 2, 11, 1, 8, 11}; - // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); - // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); } From a842198a87d8583b2a705dea4b282b8a84a1daa1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 5 Apr 2021 16:27:39 -0600 Subject: [PATCH 12/26] Finish ComplexStructTest --- cpp/tests/search/search_struct_test.cpp | 37 +++++++++++++------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index b622de9f2b5..8ca2db5fd85 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -182,22 +182,12 @@ TYPED_TEST(TypedStructSearchTest, ComplexStructTest) // Testing on struct. using col_wrapper = cudf::test::fixed_width_column_wrapper; - auto names_column_values = strings_col{nullptr, "Bagel", "Lemonade", "Donut", "Butter"}; - auto ages_column_values = - col_wrapper{{15, null, 10, 21, 17}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto is_human_col_values = bools_col{false, false, false, false, false}; - auto const structs_values = structs_col{ - {names_column_values, ages_column_values, is_human_col_values}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 2; - })}.release(); - - auto names_column_t = strings_col{"Cherry", "Kiwi", "Lemon", "Newton", "Tomato", "Washington"}; + auto names_column_t = + strings_col{"Cherry", "Kiwi", "Lemon", "Newton", "Tomato", /*NULL*/ "Washington"}; auto ages_column_t = - col_wrapper{{5, 10, 15, 20, null, 30}, + col_wrapper{{5, 10, 15, 20, null, XXX}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; - auto is_human_col_t = bools_col{false, false, false, false, false, true}; + auto is_human_col_t = bools_col{false, false, false, false, false, /*NULL*/ true}; auto const structs_t = structs_col{ {names_column_t, ages_column_t, is_human_col_t}, @@ -205,10 +195,21 @@ TYPED_TEST(TypedStructSearchTest, ComplexStructTest) return i != 5; })}.release(); - auto results = - search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::BEFORE}); - auto expected_lower_bound = int32s_col{3, 1, 11, 0, 4, 11}; - auto expected_upper_bound = int32s_col{4, 2, 11, 1, 8, 11}; + auto names_column_values = strings_col{"Bagel", "Tomato", "Lemonade", /*NULL*/ "Donut", "Butter"}; + auto ages_column_values = + col_wrapper{{10, 15, null, XXX, 17}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + auto is_human_col_values = bools_col{false, false, true, /*NULL*/ true, true}; + auto const structs_values = structs_col{ + {names_column_values, ages_column_values, is_human_col_values}, + cudf::detail::make_counting_transform_iterator(0, [](auto i) { + return i != 3; + })}.release(); + + auto const results = + search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); + auto const expected_lower_bound = int32s_col{0, 4, 3, 5, 0}; + auto const expected_upper_bound = int32s_col{0, 5, 3, 6, 0}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); } From 02988f18077f0153e0220246b97845bb165ae13f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 5 Apr 2021 18:09:30 -0600 Subject: [PATCH 13/26] Rename variables --- cpp/src/search/search.cu | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index ef6c32412a3..2f875d62658 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -104,26 +104,26 @@ std::unique_ptr search_ordered(table_view const& t, auto matched = dictionary::detail::match_dictionaries({t, values}, stream); // 0-table_view, 1-column_order, 2-null_precedence, 3-validity_columns - auto flattened_t = + auto f_flattened = structs::detail::flatten_nested_columns(matched.second.front(), column_order, null_precedence); - auto flattened_values = structs::detail::flatten_nested_columns(matched.second.back(), {}, {}); + auto values_flattened = structs::detail::flatten_nested_columns(matched.second.back(), {}, {}); - auto d_t = table_device_view::create(std::get<0>(flattened_t), stream); - auto d_values = table_device_view::create(std::get<0>(flattened_values), stream); + auto t_d = table_device_view::create(std::get<0>(f_flattened), stream); + auto values_d = table_device_view::create(std::get<0>(values_flattened), stream); auto count_it = thrust::make_counting_iterator(0); - rmm::device_vector d_column_order(std::get<1>(flattened_t).begin(), - std::get<1>(flattened_t).end()); - rmm::device_vector d_null_precedence(std::get<2>(flattened_t).begin(), - std::get<2>(flattened_t).end()); + rmm::device_vector column_order_dv(std::get<1>(f_flattened).begin(), + std::get<1>(f_flattened).end()); + rmm::device_vector null_precedence_dv(std::get<2>(f_flattened).begin(), + std::get<2>(f_flattened).end()); if (has_nulls(t) or has_nulls(values)) { auto ineq_op = (find_first) ? row_lexicographic_comparator( - *d_t, *d_values, d_column_order.data().get(), d_null_precedence.data().get()) + *t_d, *values_d, column_order_dv.data().get(), null_precedence_dv.data().get()) : row_lexicographic_comparator( - *d_values, *d_t, d_column_order.data().get(), d_null_precedence.data().get()); + *values_d, *t_d, column_order_dv.data().get(), null_precedence_dv.data().get()); launch_search(count_it, count_it, @@ -137,9 +137,9 @@ std::unique_ptr search_ordered(table_view const& t, auto ineq_op = (find_first) ? row_lexicographic_comparator( - *d_t, *d_values, d_column_order.data().get(), d_null_precedence.data().get()) + *t_d, *values_d, column_order_dv.data().get(), null_precedence_dv.data().get()) : row_lexicographic_comparator( - *d_values, *d_t, d_column_order.data().get(), d_null_precedence.data().get()); + *values_d, *t_d, column_order_dv.data().get(), null_precedence_dv.data().get()); launch_search(count_it, count_it, From c7c10aa110069289ef5bbf65fea2dab97722d895 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 6 Apr 2021 07:24:05 -0600 Subject: [PATCH 14/26] Fix typo --- cpp/src/search/search.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 2f875d62658..8c9288fb0be 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -104,18 +104,18 @@ std::unique_ptr search_ordered(table_view const& t, auto matched = dictionary::detail::match_dictionaries({t, values}, stream); // 0-table_view, 1-column_order, 2-null_precedence, 3-validity_columns - auto f_flattened = + auto t_flattened = structs::detail::flatten_nested_columns(matched.second.front(), column_order, null_precedence); auto values_flattened = structs::detail::flatten_nested_columns(matched.second.back(), {}, {}); - auto t_d = table_device_view::create(std::get<0>(f_flattened), stream); + auto t_d = table_device_view::create(std::get<0>(t_flattened), stream); auto values_d = table_device_view::create(std::get<0>(values_flattened), stream); auto count_it = thrust::make_counting_iterator(0); - rmm::device_vector column_order_dv(std::get<1>(f_flattened).begin(), - std::get<1>(f_flattened).end()); - rmm::device_vector null_precedence_dv(std::get<2>(f_flattened).begin(), - std::get<2>(f_flattened).end()); + rmm::device_vector column_order_dv(std::get<1>(t_flattened).begin(), + std::get<1>(t_flattened).end()); + rmm::device_vector null_precedence_dv(std::get<2>(t_flattened).begin(), + std::get<2>(t_flattened).end()); if (has_nulls(t) or has_nulls(values)) { auto ineq_op = From dbbe480e77dbad65873fe6150503d76c303405ca Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 6 Apr 2021 13:22:42 -0600 Subject: [PATCH 15/26] Rewrite `search_ordered`, replacing `device_vector` by `device_uvector` along with several other improvements --- cpp/src/search/search.cu | 83 ++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 45 deletions(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 8c9288fb0be..1d876ab5d31 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -77,15 +78,13 @@ std::unique_ptr search_ordered(table_view const& t, rmm::mr::device_memory_resource* mr) { // Allocate result column - std::unique_ptr result = make_numeric_column( + auto result = make_numeric_column( data_type{type_to_id()}, values.num_rows(), mask_state::UNALLOCATED, stream, mr); - - mutable_column_view result_view = result.get()->mutable_view(); + auto const result_out = result->mutable_view().data(); // Handle empty inputs if (t.num_rows() == 0) { - CUDA_TRY(cudaMemsetAsync( - result_view.data(), 0, values.num_rows() * sizeof(size_type), stream.value())); + CUDA_TRY(cudaMemsetAsync(result_out, 0, values.num_rows() * sizeof(size_type), stream.value())); return result; } @@ -101,54 +100,48 @@ std::unique_ptr search_ordered(table_view const& t, // This utility will ensure all corresponding dictionary columns have matching keys. // It will return any new dictionary columns created as well as updated table_views. - auto matched = dictionary::detail::match_dictionaries({t, values}, stream); + auto const matched = dictionary::detail::match_dictionaries({t, values}, stream); // 0-table_view, 1-column_order, 2-null_precedence, 3-validity_columns - auto t_flattened = + auto const t_flattened = structs::detail::flatten_nested_columns(matched.second.front(), column_order, null_precedence); - auto values_flattened = structs::detail::flatten_nested_columns(matched.second.back(), {}, {}); + auto const values_flattened = + structs::detail::flatten_nested_columns(matched.second.back(), {}, {}); + + auto const t_d = table_device_view::create(std::get<0>(t_flattened), stream); + auto const values_d = table_device_view::create(std::get<0>(values_flattened), stream); + + auto const column_order_flattened = std::get<1>(t_flattened); + auto const null_precedence_flattened = std::get<2>(t_flattened); + + rmm::device_uvector column_order_dv(column_order_flattened.size(), stream); + rmm::device_uvector null_precedence_dv(null_precedence_flattened.size(), stream); - auto t_d = table_device_view::create(std::get<0>(t_flattened), stream); - auto values_d = table_device_view::create(std::get<0>(values_flattened), stream); - auto count_it = thrust::make_counting_iterator(0); + CUDA_TRY(cudaMemcpyAsync(column_order_dv.data(), + column_order_flattened.data(), + sizeof(order) * column_order_flattened.size(), + cudaMemcpyDefault, + stream.value())); + CUDA_TRY(cudaMemcpyAsync(null_precedence_dv.data(), + null_precedence_flattened.data(), + sizeof(null_order) * null_precedence_flattened.size(), + cudaMemcpyDefault, + stream.value())); - rmm::device_vector column_order_dv(std::get<1>(t_flattened).begin(), - std::get<1>(t_flattened).end()); - rmm::device_vector null_precedence_dv(std::get<2>(t_flattened).begin(), - std::get<2>(t_flattened).end()); + auto const& lhs = find_first ? *t_d : *values_d; + auto const& rhs = find_first ? *values_d : *t_d; + auto const count_it = thrust::make_counting_iterator(0); if (has_nulls(t) or has_nulls(values)) { - auto ineq_op = - (find_first) - ? row_lexicographic_comparator( - *t_d, *values_d, column_order_dv.data().get(), null_precedence_dv.data().get()) - : row_lexicographic_comparator( - *values_d, *t_d, column_order_dv.data().get(), null_precedence_dv.data().get()); - - launch_search(count_it, - count_it, - t.num_rows(), - values.num_rows(), - result_view.data(), - ineq_op, - find_first, - stream); + auto const comp = row_lexicographic_comparator( + lhs, rhs, column_order_dv.data(), null_precedence_dv.data()); + launch_search( + count_it, count_it, t.num_rows(), values.num_rows(), result_out, comp, find_first, stream); } else { - auto ineq_op = - (find_first) - ? row_lexicographic_comparator( - *t_d, *values_d, column_order_dv.data().get(), null_precedence_dv.data().get()) - : row_lexicographic_comparator( - *values_d, *t_d, column_order_dv.data().get(), null_precedence_dv.data().get()); - - launch_search(count_it, - count_it, - t.num_rows(), - values.num_rows(), - result_view.data(), - ineq_op, - find_first, - stream); + auto const comp = row_lexicographic_comparator( + lhs, rhs, column_order_dv.data(), null_precedence_dv.data()); + launch_search( + count_it, count_it, t.num_rows(), values.num_rows(), result_out, comp, find_first, stream); } return result; From 304cfe8a1a96bb381387ce64eb61317eec77ad03 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 6 Apr 2021 13:25:40 -0600 Subject: [PATCH 16/26] Reorder variables' declaration --- cpp/src/search/search.cu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 1d876ab5d31..26bc866abdc 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -110,13 +110,13 @@ std::unique_ptr search_ordered(table_view const& t, auto const t_d = table_device_view::create(std::get<0>(t_flattened), stream); auto const values_d = table_device_view::create(std::get<0>(values_flattened), stream); + auto const& lhs = find_first ? *t_d : *values_d; + auto const& rhs = find_first ? *values_d : *t_d; auto const column_order_flattened = std::get<1>(t_flattened); auto const null_precedence_flattened = std::get<2>(t_flattened); - rmm::device_uvector column_order_dv(column_order_flattened.size(), stream); rmm::device_uvector null_precedence_dv(null_precedence_flattened.size(), stream); - CUDA_TRY(cudaMemcpyAsync(column_order_dv.data(), column_order_flattened.data(), sizeof(order) * column_order_flattened.size(), @@ -128,8 +128,6 @@ std::unique_ptr search_ordered(table_view const& t, cudaMemcpyDefault, stream.value())); - auto const& lhs = find_first ? *t_d : *values_d; - auto const& rhs = find_first ? *values_d : *t_d; auto const count_it = thrust::make_counting_iterator(0); if (has_nulls(t) or has_nulls(values)) { From 36f478cd30b92f9e77214bf3b9f40863c03b492c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 6 Apr 2021 13:26:56 -0600 Subject: [PATCH 17/26] Fix copyright year in header --- cpp/tests/search/search_struct_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index 8ca2db5fd85..c6ed5b95fce 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 63655d9ee339ad28184724d4948a5b270dae20dc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 7 Apr 2021 07:49:40 -0600 Subject: [PATCH 18/26] Simplify StructSearchTests --- cpp/tests/search/search_struct_test.cpp | 74 ++++++++----------------- 1 file changed, 22 insertions(+), 52 deletions(-) diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index c6ed5b95fce..2fdd5a287f0 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -60,6 +61,9 @@ auto search_bounds(std::unique_ptr const& t_col, auto result_upper_bound = cudf::upper_bound(t, values, column_orders, null_precedence); return std::make_pair(std::move(result_lower_bound), std::move(result_upper_bound)); } + +auto null_at(cudf::size_type idx) { return cudf::test::iterator_with_null_at(idx); } + } // namespace // Test case when all input columns are empty @@ -107,22 +111,12 @@ TYPED_TEST(TypedStructSearchTest, SimpleInputWithNullsTests) { using col_wrapper = cudf::test::fixed_width_column_wrapper; - auto child_col_values = - col_wrapper{{1, null, 70, XXX, 2, 100}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto const structs_values = structs_col{ - {child_col_values}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 3; - })}.release(); + auto child_col_values = col_wrapper{{1, null, 70, XXX, 2, 100}, null_at(1)}; + auto const structs_values = structs_col{{child_col_values}, null_at(3)}.release(); // Sorted asc, nulls first - auto child_col_t = - col_wrapper{{XXX, null, 0, 1, 2, 2, 2, 2, 3, 3, 4}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto structs_t = structs_col{ - {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 0; - })}.release(); + auto child_col_t = col_wrapper{{XXX, null, 0, 1, 2, 2, 2, 2, 3, 3, 4}, null_at(1)}; + auto structs_t = structs_col{{child_col_t}, null_at(0)}.release(); auto results = search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::BEFORE}); @@ -132,13 +126,8 @@ TYPED_TEST(TypedStructSearchTest, SimpleInputWithNullsTests) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); // Sorted asc, nulls last - child_col_t = - col_wrapper{{0, 1, 2, 2, 2, 2, 3, 3, 4, null, XXX}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 9; })}; - structs_t = structs_col{ - {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 10; - })}.release(); + child_col_t = col_wrapper{{0, 1, 2, 2, 2, 2, 3, 3, 4, null, XXX}, null_at(9)}; + structs_t = structs_col{{child_col_t}, null_at(10)}.release(); results = search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); expected_lower_bound = int32s_col{1, 0, 10, 10, 2, 10}; @@ -147,13 +136,8 @@ TYPED_TEST(TypedStructSearchTest, SimpleInputWithNullsTests) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); // Sorted dsc, nulls first - child_col_t = - col_wrapper{{XXX, null, 4, 3, 3, 2, 2, 2, 2, 1, 0}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - structs_t = structs_col{ - {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 0; - })}.release(); + child_col_t = col_wrapper{{XXX, null, 4, 3, 3, 2, 2, 2, 2, 1, 0}, null_at(1)}; + structs_t = structs_col{{child_col_t}, null_at(0)}.release(); results = search_bounds(structs_t, structs_values, {cudf::order::DESCENDING}, {cudf::null_order::BEFORE}); expected_lower_bound = int32s_col{9, 11, 0, 11, 5, 0}; @@ -162,13 +146,8 @@ TYPED_TEST(TypedStructSearchTest, SimpleInputWithNullsTests) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); // Sorted dsc, nulls last - child_col_t = - col_wrapper{{4, 3, 3, 2, 2, 2, 2, 1, 0, null, XXX}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 9; })}; - structs_t = structs_col{ - {child_col_t}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 10; - })}.release(); + child_col_t = col_wrapper{{4, 3, 3, 2, 2, 2, 2, 1, 0, null, XXX}, null_at(9)}; + structs_t = structs_col{{child_col_t}, null_at(10)}.release(); results = search_bounds(structs_t, structs_values, {cudf::order::DESCENDING}, {cudf::null_order::AFTER}); expected_lower_bound = int32s_col{7, 11, 0, 0, 3, 0}; @@ -184,27 +163,18 @@ TYPED_TEST(TypedStructSearchTest, ComplexStructTest) auto names_column_t = strings_col{"Cherry", "Kiwi", "Lemon", "Newton", "Tomato", /*NULL*/ "Washington"}; - auto ages_column_t = - col_wrapper{{5, 10, 15, 20, null, XXX}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; })}; + auto ages_column_t = col_wrapper{{5, 10, 15, 20, null, XXX}, null_at(4)}; auto is_human_col_t = bools_col{false, false, false, false, false, /*NULL*/ true}; - auto const structs_t = structs_col{ - {names_column_t, ages_column_t, is_human_col_t}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 5; - })}.release(); + auto const structs_t = + structs_col{{names_column_t, ages_column_t, is_human_col_t}, null_at(5)}.release(); auto names_column_values = strings_col{"Bagel", "Tomato", "Lemonade", /*NULL*/ "Donut", "Butter"}; - auto ages_column_values = - col_wrapper{{10, 15, null, XXX, 17}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; - auto is_human_col_values = bools_col{false, false, true, /*NULL*/ true, true}; - auto const structs_values = structs_col{ - {names_column_values, ages_column_values, is_human_col_values}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return i != 3; - })}.release(); + auto ages_column_values = col_wrapper{{10, 15, null, XXX, 17}, null_at(1)}; + auto is_human_col_values = bools_col{false, false, true, /*NULL*/ true, true}; + auto const structs_values = + structs_col{{names_column_values, ages_column_values, is_human_col_values}, null_at(3)} + .release(); auto const results = search_bounds(structs_t, structs_values, {cudf::order::ASCENDING}, {cudf::null_order::AFTER}); From d17f09a7352c104048cd0b916b2680073f7da3ec Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 7 Apr 2021 12:39:20 -0600 Subject: [PATCH 19/26] Change copied variables into references. --- cpp/src/search/search.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 26bc866abdc..65ed4359910 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -113,8 +113,8 @@ std::unique_ptr search_ordered(table_view const& t, auto const& lhs = find_first ? *t_d : *values_d; auto const& rhs = find_first ? *values_d : *t_d; - auto const column_order_flattened = std::get<1>(t_flattened); - auto const null_precedence_flattened = std::get<2>(t_flattened); + auto const& column_order_flattened = std::get<1>(t_flattened); + auto const& null_precedence_flattened = std::get<2>(t_flattened); rmm::device_uvector column_order_dv(column_order_flattened.size(), stream); rmm::device_uvector null_precedence_dv(null_precedence_flattened.size(), stream); CUDA_TRY(cudaMemcpyAsync(column_order_dv.data(), From ede552ab837c4b6f8fbe700274d6b270d9cc5299 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 7 Apr 2021 21:02:27 -0600 Subject: [PATCH 20/26] Fix test for ComplexStructTest --- cpp/tests/search/search_struct_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index 2fdd5a287f0..ceb955a19e1 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -170,7 +170,7 @@ TYPED_TEST(TypedStructSearchTest, ComplexStructTest) structs_col{{names_column_t, ages_column_t, is_human_col_t}, null_at(5)}.release(); auto names_column_values = strings_col{"Bagel", "Tomato", "Lemonade", /*NULL*/ "Donut", "Butter"}; - auto ages_column_values = col_wrapper{{10, 15, null, XXX, 17}, null_at(1)}; + auto ages_column_values = col_wrapper{{10, null, 15, XXX, 17}, null_at(1)}; auto is_human_col_values = bools_col{false, false, true, /*NULL*/ true, true}; auto const structs_values = structs_col{{names_column_values, ages_column_values, is_human_col_values}, null_at(3)} From 07a7ca06e570d868afcc77c2cbbae4abd6c3032d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 8 Apr 2021 11:56:34 -0600 Subject: [PATCH 21/26] Use structure binding to simplify code --- cpp/src/search/search.cu | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 65ed4359910..83eabb32b57 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -100,21 +100,19 @@ std::unique_ptr search_ordered(table_view const& t, // This utility will ensure all corresponding dictionary columns have matching keys. // It will return any new dictionary columns created as well as updated table_views. - auto const matched = dictionary::detail::match_dictionaries({t, values}, stream); + auto const [_, t_vals_matched] = dictionary::detail::match_dictionaries({t, values}, stream); // 0-table_view, 1-column_order, 2-null_precedence, 3-validity_columns - auto const t_flattened = - structs::detail::flatten_nested_columns(matched.second.front(), column_order, null_precedence); - auto const values_flattened = - structs::detail::flatten_nested_columns(matched.second.back(), {}, {}); + auto const [t_flattened, column_order_flattened, null_precedence_flattened, t_validity] = + structs::detail::flatten_nested_columns(t_vals_matched.front(), column_order, null_precedence); + auto const [values_flattened, __, ___, ____] = + structs::detail::flatten_nested_columns(t_vals_matched.back(), {}, {}); - auto const t_d = table_device_view::create(std::get<0>(t_flattened), stream); - auto const values_d = table_device_view::create(std::get<0>(values_flattened), stream); + auto const t_d = table_device_view::create(t_flattened, stream); + auto const values_d = table_device_view::create(values_flattened, stream); auto const& lhs = find_first ? *t_d : *values_d; auto const& rhs = find_first ? *values_d : *t_d; - auto const& column_order_flattened = std::get<1>(t_flattened); - auto const& null_precedence_flattened = std::get<2>(t_flattened); rmm::device_uvector column_order_dv(column_order_flattened.size(), stream); rmm::device_uvector null_precedence_dv(null_precedence_flattened.size(), stream); CUDA_TRY(cudaMemcpyAsync(column_order_dv.data(), From 43139b87bc2f4de521297b177dd2ca2dd76f745e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 8 Apr 2021 11:57:30 -0600 Subject: [PATCH 22/26] Remove redundant comment --- cpp/src/search/search.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 83eabb32b57..86e88abc4d3 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -102,7 +102,6 @@ std::unique_ptr search_ordered(table_view const& t, // It will return any new dictionary columns created as well as updated table_views. auto const [_, t_vals_matched] = dictionary::detail::match_dictionaries({t, values}, stream); - // 0-table_view, 1-column_order, 2-null_precedence, 3-validity_columns auto const [t_flattened, column_order_flattened, null_precedence_flattened, t_validity] = structs::detail::flatten_nested_columns(t_vals_matched.front(), column_order, null_precedence); auto const [values_flattened, __, ___, ____] = From eead52ffc9db82a167c5af6d8525c2d1d51129ac Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 8 Apr 2021 11:59:43 -0600 Subject: [PATCH 23/26] Ignore variable in structure binding --- cpp/src/search/search.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 86e88abc4d3..330a59cd850 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -102,9 +102,9 @@ std::unique_ptr search_ordered(table_view const& t, // It will return any new dictionary columns created as well as updated table_views. auto const [_, t_vals_matched] = dictionary::detail::match_dictionaries({t, values}, stream); - auto const [t_flattened, column_order_flattened, null_precedence_flattened, t_validity] = + auto const [t_flattened, column_order_flattened, null_precedence_flattened, __] = structs::detail::flatten_nested_columns(t_vals_matched.front(), column_order, null_precedence); - auto const [values_flattened, __, ___, ____] = + auto const [values_flattened, ___, ____, _____] = structs::detail::flatten_nested_columns(t_vals_matched.back(), {}, {}); auto const t_d = table_device_view::create(t_flattened, stream); From fc3b252bea4302a191a736c76f9fd87019efcae3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 8 Apr 2021 12:23:30 -0600 Subject: [PATCH 24/26] Add SlicedColumnInputTests --- cpp/tests/search/search_struct_test.cpp | 49 ++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index ceb955a19e1..f7872fb153e 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -20,14 +20,10 @@ #include #include -#include #include #include #include -#include -#include - using bools_col = cudf::test::fixed_width_column_wrapper; using int32s_col = cudf::test::fixed_width_column_wrapper; using structs_col = cudf::test::structs_column_wrapper; @@ -49,19 +45,28 @@ using TestTypes = cudf::test::Concat const& t_col, +auto search_bounds(cudf::column_view const& t_col_view, std::unique_ptr const& values_col, std::vector const& column_orders = {cudf::order::ASCENDING}, std::vector const& null_precedence = { cudf::null_order::BEFORE}) { - auto const t = cudf::table_view{std::vector{t_col->view()}}; + auto const t = cudf::table_view{std::vector{t_col_view}}; auto const values = cudf::table_view{std::vector{values_col->view()}}; auto result_lower_bound = cudf::lower_bound(t, values, column_orders, null_precedence); auto result_upper_bound = cudf::upper_bound(t, values, column_orders, null_precedence); return std::make_pair(std::move(result_lower_bound), std::move(result_upper_bound)); } +auto search_bounds(std::unique_ptr const& t_col, + std::unique_ptr const& values_col, + std::vector const& column_orders = {cudf::order::ASCENDING}, + std::vector const& null_precedence = { + cudf::null_order::BEFORE}) +{ + return search_bounds(t_col->view(), values_col, column_orders, null_precedence); +} + auto null_at(cudf::size_type idx) { return cudf::test::iterator_with_null_at(idx); } } // namespace @@ -107,6 +112,38 @@ TYPED_TEST(TypedStructSearchTest, TrivialInputTests) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, results2.second->view(), print_all); } +TYPED_TEST(TypedStructSearchTest, SlicedColumnInputTests) +{ + using col_wrapper = cudf::test::fixed_width_column_wrapper; + + auto child_col_values = col_wrapper{0, 1, 2, 3, 4, 5}; + auto const structs_values = structs_col{child_col_values}.release(); + + auto child_col_t = col_wrapper{0, 1, 2, 2, 2, 2, 3, 3, 4, 4}; + auto const structs_t_original = structs_col{child_col_t}.release(); + + auto structs_t = cudf::slice(structs_t_original->view(), {0, 10})[0]; // the entire column t + auto results = search_bounds(structs_t, structs_values); + auto expected_lower_bound = int32s_col{0, 1, 2, 6, 8, 10}; + auto expected_upper_bound = int32s_col{1, 2, 6, 8, 10, 10}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); + + structs_t = cudf::slice(structs_t_original->view(), {0, 5})[0]; + results = search_bounds(structs_t, structs_values); + expected_lower_bound = int32s_col{0, 1, 2, 5, 5, 5}; + expected_upper_bound = int32s_col{1, 2, 5, 5, 5, 5}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); + + structs_t = cudf::slice(structs_t_original->view(), {5, 10})[0]; + results = search_bounds(structs_t, structs_values); + expected_lower_bound = int32s_col{0, 0, 0, 1, 3, 5}; + expected_upper_bound = int32s_col{0, 0, 1, 3, 5, 5}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_bound, results.first->view(), print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_upper_bound, results.second->view(), print_all); +} + TYPED_TEST(TypedStructSearchTest, SimpleInputWithNullsTests) { using col_wrapper = cudf::test::fixed_width_column_wrapper; From 661a5cd4a77259db08b1b6ee9b39eed4ebd74d25 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 9 Apr 2021 08:16:34 -0600 Subject: [PATCH 25/26] Disable debug printing --- cpp/tests/search/search_struct_test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index f7872fb153e..50c326269a0 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -29,9 +29,9 @@ using int32s_col = cudf::test::fixed_width_column_wrapper; using structs_col = cudf::test::structs_column_wrapper; using strings_col = cudf::test::strings_column_wrapper; -constexpr bool print_all{true}; -constexpr int32_t null{0}; // Mark for null child elements -constexpr int32_t XXX{0}; // Mark for null struct elements +constexpr bool print_all{false}; // For debugging only +constexpr int32_t null{0}; // Mark for null child elements +constexpr int32_t XXX{0}; // Mark for null struct elements template struct TypedStructSearchTest : public cudf::test::BaseFixture { From 4ba910a44d625f5fd7f16fdbcc1f66dcb9b3455c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 19 Apr 2021 14:16:43 -0600 Subject: [PATCH 26/26] Use `make_device_uvector_async`, reverse structured binding, and re-organize input validity --- cpp/src/search/search.cu | 51 +++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 330a59cd850..7c37b8e4d75 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -77,6 +78,13 @@ std::unique_ptr search_ordered(table_view const& t, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_EXPECTS( + column_order.empty() or static_cast(t.num_columns()) == column_order.size(), + "Mismatch between number of columns and column order."); + CUDF_EXPECTS( + null_precedence.empty() or static_cast(t.num_columns()) == null_precedence.size(), + "Mismatch between number of columns and null precedence."); + // Allocate result column auto result = make_numeric_column( data_type{type_to_id()}, values.num_rows(), mask_state::UNALLOCATED, stream, mr); @@ -88,45 +96,28 @@ std::unique_ptr search_ordered(table_view const& t, return result; } - if (not column_order.empty()) { - CUDF_EXPECTS(static_cast(t.num_columns()) == column_order.size(), - "Mismatch between number of columns and column order."); - } - - if (not null_precedence.empty()) { - CUDF_EXPECTS(static_cast(t.num_columns()) == null_precedence.size(), - "Mismatch between number of columns and null precedence."); - } - // This utility will ensure all corresponding dictionary columns have matching keys. // It will return any new dictionary columns created as well as updated table_views. - auto const [_, t_vals_matched] = dictionary::detail::match_dictionaries({t, values}, stream); + auto const matched = dictionary::detail::match_dictionaries({t, values}, stream); - auto const [t_flattened, column_order_flattened, null_precedence_flattened, __] = - structs::detail::flatten_nested_columns(t_vals_matched.front(), column_order, null_precedence); - auto const [values_flattened, ___, ____, _____] = - structs::detail::flatten_nested_columns(t_vals_matched.back(), {}, {}); + // 0-table_view, 1-column_order, 2-null_precedence, 3-validity_columns + auto const t_flattened = + structs::detail::flatten_nested_columns(matched.second.front(), column_order, null_precedence); + auto const values_flattened = + structs::detail::flatten_nested_columns(matched.second.back(), {}, {}); - auto const t_d = table_device_view::create(t_flattened, stream); - auto const values_d = table_device_view::create(values_flattened, stream); + auto const t_d = table_device_view::create(std::get<0>(t_flattened), stream); + auto const values_d = table_device_view::create(std::get<0>(values_flattened), stream); auto const& lhs = find_first ? *t_d : *values_d; auto const& rhs = find_first ? *values_d : *t_d; - rmm::device_uvector column_order_dv(column_order_flattened.size(), stream); - rmm::device_uvector null_precedence_dv(null_precedence_flattened.size(), stream); - CUDA_TRY(cudaMemcpyAsync(column_order_dv.data(), - column_order_flattened.data(), - sizeof(order) * column_order_flattened.size(), - cudaMemcpyDefault, - stream.value())); - CUDA_TRY(cudaMemcpyAsync(null_precedence_dv.data(), - null_precedence_flattened.data(), - sizeof(null_order) * null_precedence_flattened.size(), - cudaMemcpyDefault, - stream.value())); + auto const& column_order_flattened = std::get<1>(t_flattened); + auto const& null_precedence_flattened = std::get<2>(t_flattened); + auto const column_order_dv = detail::make_device_uvector_async(column_order_flattened, stream); + auto const null_precedence_dv = + detail::make_device_uvector_async(null_precedence_flattened, stream); auto const count_it = thrust::make_counting_iterator(0); - if (has_nulls(t) or has_nulls(values)) { auto const comp = row_lexicographic_comparator( lhs, rhs, column_order_dv.data(), null_precedence_dv.data());