From 933c974ddc0a29d002251e9558cdd41c9ff3cd2c Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 26 Aug 2021 19:28:51 +0530 Subject: [PATCH 01/78] First commit Many iterations already happened. I just realized late that I should commit --- cpp/benchmarks/CMakeLists.txt | 2 + .../compare/comparator_benchmark.cu | 92 ++++ cpp/include/cudf/sort2.cuh | 77 ++++ cpp/include/cudf/table/row_operator2.cuh | 420 ++++++++++++++++++ cpp/tests/CMakeLists.txt | 4 + cpp/tests/sort/sort2_test.cu | 66 +++ 6 files changed, 661 insertions(+) create mode 100644 cpp/benchmarks/compare/comparator_benchmark.cu create mode 100644 cpp/include/cudf/sort2.cuh create mode 100644 cpp/include/cudf/table/row_operator2.cuh create mode 100644 cpp/tests/sort/sort2_test.cu diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 56f17dc7090..9ec90bd6afa 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -113,6 +113,8 @@ ConfigureBench(ITERATOR_BENCH iterator/iterator_benchmark.cu) # - search benchmark ------------------------------------------------------------------------------ ConfigureBench(SEARCH_BENCH search/search_benchmark.cpp) +ConfigureBench(COMPARE_BENCH compare/comparator_benchmark.cu) + ################################################################################################### # - sort benchmark -------------------------------------------------------------------------------- ConfigureBench(SORT_BENCH diff --git a/cpp/benchmarks/compare/comparator_benchmark.cu b/cpp/benchmarks/compare/comparator_benchmark.cu new file mode 100644 index 00000000000..f012456ac39 --- /dev/null +++ b/cpp/benchmarks/compare/comparator_benchmark.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +template +class Sort : public cudf::benchmark { +}; + +template +static void BM_sort(benchmark::State& state, bool nulls) +{ + using Type = int; + using column_wrapper = cudf::test::fixed_width_column_wrapper; + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, 100); + + const cudf::size_type n_rows{(cudf::size_type)state.range(0)}; + const cudf::size_type n_cols{1}; + + // Create columns with values in the range [0,100) + std::vector columns; + columns.reserve(n_cols); + std::generate_n(std::back_inserter(columns), n_cols, [&, n_rows]() { + auto elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto row) { return distribution(generator); }); + if (!nulls) return column_wrapper(elements, elements + n_rows); + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 100 == 0 ? false : true; }); + return column_wrapper(elements, elements + n_rows, valids); + }); + + std::vector> cols; + std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) { + return col.release(); + }); + + // Lets add some nulls + std::vector struct_validity; + std::uniform_int_distribution bool_distribution(0, 1000); + std::generate_n(std::back_inserter(struct_validity), cols[0]->size(), [&]() { + return bool_distribution(generator); + }); + cudf::test::structs_column_wrapper struct_col(std::move(cols), struct_validity); + + // // Create table view + auto input = cudf::table_view({struct_col}); + // auto input = cudf::table_view({cols[0]->view()}); + + for (auto _ : state) { + cuda_event_timer raii(state, true, rmm::cuda_stream_default); + + // auto result = cudf::sorted_order(input); + auto result = cudf::detail::sorted_order2(input); + } +} + +#define SORT_BENCHMARK_DEFINE(name, stable, nulls) \ + BENCHMARK_TEMPLATE_DEFINE_F(Sort, name, stable) \ + (::benchmark::State & st) { BM_sort(st, nulls); } \ + BENCHMARK_REGISTER_F(Sort, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 10, 1 << 26}, {1, 1}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +SORT_BENCHMARK_DEFINE(unstable, false, true) diff --git a/cpp/include/cudf/sort2.cuh b/cpp/include/cudf/sort2.cuh new file mode 100644 index 00000000000..96006550716 --- /dev/null +++ b/cpp/include/cudf/sort2.cuh @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +namespace cudf { +namespace detail { + +/** + * @copydoc + * sorted_order(table_view&,std::vector,std::vector,rmm::mr::device_memory_resource*) + * + * @param stream CUDA stream used for device memory operations and kernel launches + */ +template +std::unique_ptr sorted_order2( + table_view input, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + if (input.num_rows() == 0 or input.num_columns() == 0) { + return cudf::make_numeric_column(data_type(type_to_id()), 0); + } + + std::unique_ptr sorted_indices = cudf::make_numeric_column( + data_type(type_to_id()), input.num_rows(), mask_state::UNALLOCATED, stream, mr); + mutable_column_view mutable_indices_view = sorted_indices->mutable_view(); + thrust::sequence(rmm::exec_policy(stream), + mutable_indices_view.begin(), + mutable_indices_view.end(), + 0); + + auto device_table = table_device_view::create(input, stream); + auto const comparator = row_lexicographic_comparator2(*device_table, *device_table); + + thrust::sort(rmm::exec_policy(stream), + mutable_indices_view.begin(), + mutable_indices_view.end(), + comparator); + // protection for temporary d_column_order and d_null_precedence + stream.synchronize(); + + return sorted_indices; +} + +} // namespace detail +} // namespace cudf diff --git a/cpp/include/cudf/table/row_operator2.cuh b/cpp/include/cudf/table/row_operator2.cuh new file mode 100644 index 00000000000..9edb60cc112 --- /dev/null +++ b/cpp/include/cudf/table/row_operator2.cuh @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace cudf { + +/** + * @brief Result type of the `element_relational_comparator2` function object. + * + * Indicates how two elements `a` and `b` compare with one and another. + * + * Equivalence is defined as `not (a +__device__ weak_ordering2 compare_elements2(Element lhs, Element rhs) +{ + if (lhs < rhs) { + return weak_ordering2::LESS; + } else if (rhs < lhs) { + return weak_ordering2::GREATER; + } + return weak_ordering2::EQUIVALENT; +} +} // namespace detail + +/* + * @brief A specialization for floating-point `Element` type relational comparison + * to derive the order of the elements with respect to `lhs`. Specialization is to + * handle `nan` in the order shown below. + * `[-Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN, null] (for null_order::AFTER)` + * `[null, -Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN] (for null_order::BEFORE)` + * + * @param[in] lhs first element + * @param[in] rhs second element + * @return weak_ordering2 Indicates the relationship between the elements in + * the `lhs` and `rhs` columns. + */ +template ::value>* = nullptr> +__device__ weak_ordering2 relational_compare2(Element lhs, Element rhs) +{ + if (isnan(lhs) and isnan(rhs)) { + return weak_ordering2::EQUIVALENT; + } else if (isnan(rhs)) { + return weak_ordering2::LESS; + } else if (isnan(lhs)) { + return weak_ordering2::GREATER; + } + + return detail::compare_elements2(lhs, rhs); +} + +/** + * @brief Compare the nulls according to null order. + * + * @param lhs_is_null boolean representing if lhs is null + * @param rhs_is_null boolean representing if lhs is null + * @param null_precedence null order + * @return Indicates the relationship between null in lhs and rhs columns. + */ +inline __device__ auto null_compare2(bool lhs_is_null, bool rhs_is_null, null_order null_precedence) +{ + if (lhs_is_null and rhs_is_null) { // null ::value>* = nullptr> +__device__ weak_ordering2 relational_compare2(Element lhs, Element rhs) +{ + return detail::compare_elements2(lhs, rhs); +} + +/** + * @brief A specialization for floating-point `Element` type to check if + * `lhs` is equivalent to `rhs`. `nan == nan`. + * + * @param[in] lhs first element + * @param[in] rhs second element + * @return bool `true` if `lhs` == `rhs` else `false`. + */ +template ::value>* = nullptr> +__device__ bool equality_compare2(Element lhs, Element rhs) +{ + if (isnan(lhs) and isnan(rhs)) { return true; } + return lhs == rhs; +} + +/** + * @brief A specialization for non-floating-point `Element` type to check if + * `lhs` is equivalent to `rhs`. + * + * @param[in] lhs first element + * @param[in] rhs second element + * @return bool `true` if `lhs` == `rhs` else `false`. + */ +template ::value>* = nullptr> +__device__ bool equality_compare2(Element const lhs, Element const rhs) +{ + return lhs == rhs; +} + +/** + * @brief Performs an equality comparison between two elements in two columns. + * + * @tparam has_nulls Indicates the potential for null values in either column. + */ +template +class element_equality_comparator2 { + public: + /** + * @brief Construct type-dispatched function object for comparing equality + * between two elements. + * + * @note `lhs` and `rhs` may be the same. + * + * @param lhs The column containing the first element + * @param rhs The column containing the second element (may be the same as lhs) + * @param nulls_are_equal Indicates if two null elements are treated as equivalent + */ + __host__ __device__ element_equality_comparator2(column_device_view lhs, + column_device_view rhs, + bool nulls_are_equal = true) + : lhs{lhs}, rhs{rhs}, nulls_are_equal{nulls_are_equal} + { + } + + /** + * @brief Compares the specified elements for equality. + * + * @param lhs_element_index The index of the first element + * @param rhs_element_index The index of the second element + * + */ + template ()>* = nullptr> + __device__ bool operator()(size_type lhs_element_index, + size_type rhs_element_index) const noexcept + { + if (has_nulls) { + bool const lhs_is_null{lhs.is_null(lhs_element_index)}; + bool const rhs_is_null{rhs.is_null(rhs_element_index)}; + if (lhs_is_null and rhs_is_null) { + return nulls_are_equal; + } else if (lhs_is_null != rhs_is_null) { + return false; + } + } + + return equality_compare2(lhs.element(lhs_element_index), + rhs.element(rhs_element_index)); + } + + template ()>* = nullptr> + __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index) + { + cudf_assert(false && "Attempted to compare elements of uncomparable types."); + return false; + } + + private: + column_device_view lhs; + column_device_view rhs; + bool nulls_are_equal; +}; + +template +class row_equality_comparator2 { + public: + row_equality_comparator2(table_device_view lhs, + table_device_view rhs, + bool nulls_are_equal = true) + : lhs{lhs}, rhs{rhs}, nulls_are_equal{nulls_are_equal} + { + CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Mismatched number of columns."); + } + + __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept + { + auto equal_elements = [=](column_device_view l, column_device_view r) { + return cudf::type_dispatcher(l.type(), + element_equality_comparator2{l, r, nulls_are_equal}, + lhs_row_index, + rhs_row_index); + }; + + return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements); + } + + private: + table_device_view lhs; + table_device_view rhs; + bool nulls_are_equal; +}; + +/** + * @brief Performs a relational comparison between two elements in two columns. + * + * @tparam has_nulls Indicates the potential for null values in either column. + */ +template +class element_relational_comparator2 { + public: + /** + * @brief Construct type-dispatched function object for performing a + * relational comparison between two elements. + * + * @note `lhs` and `rhs` may be the same. + * + * @param lhs The column containing the first element + * @param rhs The column containing the second element (may be the same as lhs) + * @param null_precedence Indicates how null values are ordered with other + * values + */ + __host__ __device__ element_relational_comparator2(column_device_view lhs, + column_device_view rhs, + null_order null_precedence) + : lhs{lhs}, rhs{rhs}, null_precedence{null_precedence} + { + } + + /** + * @brief Performs a relational comparison between the specified elements + * + * @param lhs_element_index The index of the first element + * @param rhs_element_index The index of the second element + * @param null_precedence Indicates how null values are ordered with other + * values + * @return weak_ordering2 Indicates the relationship between the elements in + * the `lhs` and `rhs` columns. + */ + template ()>* = nullptr> + __device__ weak_ordering2 operator()(size_type lhs_element_index, + size_type rhs_element_index) const noexcept + { + if (has_nulls) { + bool const lhs_is_null{lhs.is_null(lhs_element_index)}; + bool const rhs_is_null{rhs.is_null(rhs_element_index)}; + + if (lhs_is_null or rhs_is_null) { // atleast one is null + return null_compare2(lhs_is_null, rhs_is_null, null_precedence); + } + } + + return relational_compare2(lhs.element(lhs_element_index), + rhs.element(rhs_element_index)); + } + + template ()>* = nullptr> + __device__ weak_ordering2 operator()(size_type lhs_element_index, size_type rhs_element_index) + { + cudf_assert(false && "Attempted to compare elements of uncomparable types."); + return weak_ordering2::LESS; + } + + private: + column_device_view lhs; + column_device_view rhs; + null_order null_precedence; +}; + +/** + * @brief Computes whether one row is lexicographically *less* than another row. + * + * Lexicographic ordering is determined by: + * - Two rows are compared element by element. + * - The first mismatching element defines which row is lexicographically less + * or greater than the other. + * + * Lexicographic ordering is exactly equivalent to doing an alphabetical sort of + * two words, for example, `aac` would be *less* than (or precede) `abb`. The + * second letter in both words is the first non-equal letter, and `a < b`, thus + * `aac < abb`. + * + * @tparam has_nulls Indicates the potential for null values in either row. + */ +template +class row_lexicographic_comparator2 { + public: + /** + * @brief Construct a function object for performing a lexicographic + * comparison between the rows of two tables. + * + * @throws cudf::logic_error if `lhs.num_columns() != rhs.num_columns()` + * @throws cudf::logic_error if column types of `lhs` and `rhs` are not comparable. + * + * @param lhs The first table + * @param rhs The second table (may be the same table as `lhs`) + * @param column_order Optional, device array the same length as a row that + * indicates the desired ascending/descending order of each column in a row. + * If `nullptr`, it is assumed all columns are sorted in ascending order. + * @param null_precedence Optional, device array the same length as a row + * and indicates how null values compare to all other for every column. If + * it is nullptr, then null precedence would be `null_order::BEFORE` for all + * columns. + */ + row_lexicographic_comparator2(table_device_view lhs, + table_device_view rhs, + order const* column_order = nullptr, + null_order const* null_precedence = nullptr) + : _lhs{lhs}, _rhs{rhs}, _column_order{column_order}, _null_precedence{null_precedence} + { + CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns."); + // CUDF_EXPECTS(detail::is_relationally_comparable(_lhs, _rhs), + // "Attempted to compare elements of uncomparable types."); + } + + /** + * @brief Checks whether the row at `lhs_index` in the `lhs` table compares + * lexicographically less than the row at `rhs_index` in the `rhs` table. + * + * @param lhs_index The index of row in the `lhs` table to examine + * @param rhs_index The index of the row in the `rhs` table to examine + * @return `true` if row from the `lhs` table compares less than row in the + * `rhs` table + */ + __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept + { + for (size_type i = 0; i < _lhs.num_columns(); ++i) { + bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); + + weak_ordering2 state{weak_ordering2::EQUIVALENT}; + null_order null_precedence = + _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; + + column_device_view lcol = _lhs.column(i); + column_device_view rcol = _rhs.column(i); + while (lcol.type().id() == type_id::STRUCT) { + bool const lhs_is_null{lcol.is_null(lhs_index)}; + bool const rhs_is_null{rcol.is_null(rhs_index)}; + + if (lhs_is_null or rhs_is_null) { // atleast one is null + state = null_compare2(lhs_is_null, rhs_is_null, null_precedence); + if (state != weak_ordering2::EQUIVALENT) break; + } + + lcol = lcol.children()[0]; + rcol = rcol.children()[0]; + } + + if (state == weak_ordering2::EQUIVALENT) { + auto comparator = element_relational_comparator2{lcol, rcol, null_precedence}; + state = cudf::type_dispatcher(lcol.type(), comparator, lhs_index, rhs_index); + } + + if (state == weak_ordering2::EQUIVALENT) { continue; } + + return state == (ascending ? weak_ordering2::LESS : weak_ordering2::GREATER); + } + return false; + } + + private: + table_device_view _lhs; + table_device_view _rhs; + null_order const* _null_precedence{}; + order const* _column_order{}; +}; // class row_lexicographic_comparator2 + +} // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index c82826b8c60..de690a4af79 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -206,6 +206,10 @@ ConfigureTest(SORT_TEST sort/sort_test.cpp sort/rank_test.cpp) +ConfigureTest(SORT2_TEST + sort/sort2_test.cu +) + ################################################################################################### # - copying tests --------------------------------------------------------------------------------- ConfigureTest(COPYING_TEST diff --git a/cpp/tests/sort/sort2_test.cu b/cpp/tests/sort/sort2_test.cu new file mode 100644 index 00000000000..3ab67b0d0df --- /dev/null +++ b/cpp/tests/sort/sort2_test.cu @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +struct NewRowOp : public cudf::test::BaseFixture { +}; + +#include + +TEST_F(NewRowOp, BasicTest) +{ + using Type = int; + using column_wrapper = cudf::test::fixed_width_column_wrapper; + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, 100); + + const cudf::size_type n_rows{1 << 10}; + const cudf::size_type n_cols{1}; + + // Create columns with values in the range [0,100) + std::vector columns; + columns.reserve(n_cols); + std::generate_n(std::back_inserter(columns), n_cols, [&, n_rows]() { + auto elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto row) { return distribution(generator); }); + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 3 == 0 ? false : true; }); + return column_wrapper(elements, elements + n_rows, valids); + }); + + std::vector> cols; + std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) { + return col.release(); + }); + + // Lets add some nulls + std::vector struct_validity; + std::uniform_int_distribution bool_distribution(0, 10); + std::generate_n(std::back_inserter(struct_validity), cols[0]->size(), [&]() { + return bool_distribution(generator); + }); + cudf::test::structs_column_wrapper struct_col(std::move(cols), struct_validity); + + // cudf::test::print(struct_col); + + // // Create table view + auto input = cudf::table_view({struct_col}); + + auto result1 = cudf::sorted_order(input); + // cudf::test::print(result1->view()); + auto result2 = cudf::detail::sorted_order2(input); + // cudf::test::print(result2->view()); + cudf::test::expect_columns_equal(result1->view(), result2->view()); +} From a1636e52a925460d27e2b4bb7ec2bac8c3710f75 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Sat, 28 Aug 2021 05:17:22 +0530 Subject: [PATCH 02/78] testing and profiling deep single hierarchy struct --- .../compare/comparator_benchmark.cu | 25 +++++++++++------- cpp/tests/sort/sort2_test.cu | 26 ++++++++++++------- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/cpp/benchmarks/compare/comparator_benchmark.cu b/cpp/benchmarks/compare/comparator_benchmark.cu index f012456ac39..e4f690102b0 100644 --- a/cpp/benchmarks/compare/comparator_benchmark.cu +++ b/cpp/benchmarks/compare/comparator_benchmark.cu @@ -41,6 +41,7 @@ static void BM_sort(benchmark::State& state, bool nulls) std::uniform_int_distribution distribution(0, 100); const cudf::size_type n_rows{(cudf::size_type)state.range(0)}; + const cudf::size_type depth{(cudf::size_type)state.range(1)}; const cudf::size_type n_cols{1}; // Create columns with values in the range [0,100) @@ -51,7 +52,7 @@ static void BM_sort(benchmark::State& state, bool nulls) 0, [&](auto row) { return distribution(generator); }); if (!nulls) return column_wrapper(elements, elements + n_rows); auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 100 == 0 ? false : true; }); + 0, [](auto i) { return i % 10 == 0 ? false : true; }); return column_wrapper(elements, elements + n_rows, valids); }); @@ -60,16 +61,20 @@ static void BM_sort(benchmark::State& state, bool nulls) return col.release(); }); - // Lets add some nulls - std::vector struct_validity; - std::uniform_int_distribution bool_distribution(0, 1000); - std::generate_n(std::back_inserter(struct_validity), cols[0]->size(), [&]() { - return bool_distribution(generator); - }); - cudf::test::structs_column_wrapper struct_col(std::move(cols), struct_validity); + std::vector> child_cols = std::move(cols); + // Lets add some layers + for (int i = 0; i < depth; i++) { + std::vector struct_validity; + std::uniform_int_distribution bool_distribution(0, 100 * (i + 1)); + std::generate_n( + std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); }); + cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity); + child_cols = std::vector>{}; + child_cols.push_back(struct_col.release()); + } // // Create table view - auto input = cudf::table_view({struct_col}); + auto input = cudf::table(std::move(child_cols)); // auto input = cudf::table_view({cols[0]->view()}); for (auto _ : state) { @@ -85,7 +90,7 @@ static void BM_sort(benchmark::State& state, bool nulls) (::benchmark::State & st) { BM_sort(st, nulls); } \ BENCHMARK_REGISTER_F(Sort, name) \ ->RangeMultiplier(8) \ - ->Ranges({{1 << 10, 1 << 26}, {1, 1}}) \ + ->Ranges({{1 << 10, 1 << 26}, {1, 8}}) \ ->UseManualTime() \ ->Unit(benchmark::kMillisecond); diff --git a/cpp/tests/sort/sort2_test.cu b/cpp/tests/sort/sort2_test.cu index 3ab67b0d0df..44abf545a71 100644 --- a/cpp/tests/sort/sort2_test.cu +++ b/cpp/tests/sort/sort2_test.cu @@ -26,8 +26,9 @@ TEST_F(NewRowOp, BasicTest) std::default_random_engine generator; std::uniform_int_distribution distribution(0, 100); - const cudf::size_type n_rows{1 << 10}; + const cudf::size_type n_rows{1 << 6}; const cudf::size_type n_cols{1}; + const cudf::size_type depth{8}; // Create columns with values in the range [0,100) std::vector columns; @@ -45,18 +46,23 @@ TEST_F(NewRowOp, BasicTest) return col.release(); }); - // Lets add some nulls - std::vector struct_validity; - std::uniform_int_distribution bool_distribution(0, 10); - std::generate_n(std::back_inserter(struct_validity), cols[0]->size(), [&]() { - return bool_distribution(generator); - }); - cudf::test::structs_column_wrapper struct_col(std::move(cols), struct_validity); + std::vector> child_cols = std::move(cols); + // Lets add some layers + for (int i = 0; i < depth; i++) { + std::vector struct_validity; + std::uniform_int_distribution bool_distribution(0, 10 * (i + 1)); + std::generate_n( + std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); }); + cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity); + child_cols = std::vector>{}; + child_cols.push_back(struct_col.release()); + } - // cudf::test::print(struct_col); + cudf::test::print(child_cols[0]->view()); // // Create table view - auto input = cudf::table_view({struct_col}); + // auto input = cudf::table_view({struct_col}); + auto input = cudf::table(std::move(child_cols)); auto result1 = cudf::sorted_order(input); // cudf::test::print(result1->view()); From 3d21daf53efe83e82f088c61ac4ff19d68d8a431 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Fri, 14 Jan 2022 20:18:32 +0530 Subject: [PATCH 03/78] Make the sandboxed test compile again --- cpp/include/cudf/sort2.cuh | 2 +- cpp/tests/CMakeLists.txt | 4 +--- cpp/tests/sort/sort2_test.cu | 2 ++ 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/sort2.cuh b/cpp/include/cudf/sort2.cuh index 96006550716..6732e5a5db1 100644 --- a/cpp/include/cudf/sort2.cuh +++ b/cpp/include/cudf/sort2.cuh @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index f9f157dbcb4..23a158af4c2 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -205,9 +205,7 @@ endif() # * sort tests ------------------------------------------------------------------------------------ ConfigureTest(SORT_TEST sort/segmented_sort_tests.cpp sort/sort_test.cpp sort/rank_test.cpp) -ConfigureTest(SORT2_TEST - sort/sort2_test.cu -) +ConfigureTest(SORT2_TEST sort/sort2_test.cu) # ################################################################################################## # * copying tests --------------------------------------------------------------------------------- diff --git a/cpp/tests/sort/sort2_test.cu b/cpp/tests/sort/sort2_test.cu index 44abf545a71..649951d1d5b 100644 --- a/cpp/tests/sort/sort2_test.cu +++ b/cpp/tests/sort/sort2_test.cu @@ -70,3 +70,5 @@ TEST_F(NewRowOp, BasicTest) // cudf::test::print(result2->view()); cudf::test::expect_columns_equal(result1->view(), result2->view()); } + +CUDF_TEST_PROGRAM_MAIN() From 9f32e6b1eafe5db74ae6157cbec057eb1259b58d Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Sat, 15 Jan 2022 05:37:18 +0530 Subject: [PATCH 04/78] Update my row_comparator with nullate --- cpp/include/cudf/sort2.cuh | 7 +- cpp/include/cudf/table/row_operator3.cuh | 441 +++++++++++++++++++++++ 2 files changed, 446 insertions(+), 2 deletions(-) create mode 100644 cpp/include/cudf/table/row_operator3.cuh diff --git a/cpp/include/cudf/sort2.cuh b/cpp/include/cudf/sort2.cuh index 6732e5a5db1..1d1a5300787 100644 --- a/cpp/include/cudf/sort2.cuh +++ b/cpp/include/cudf/sort2.cuh @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -60,8 +61,10 @@ std::unique_ptr sorted_order2( mutable_indices_view.end(), 0); - auto device_table = table_device_view::create(input, stream); - auto const comparator = row_lexicographic_comparator2(*device_table, *device_table); + auto device_table = table_device_view::create(input, stream); + // auto const comparator = row_lexicographic_comparator2(*device_table, *device_table); + auto const comparator = + row_lexicographic_comparator3(nullate::DYNAMIC{true}, *device_table, *device_table); thrust::sort(rmm::exec_policy(stream), mutable_indices_view.begin(), diff --git a/cpp/include/cudf/table/row_operator3.cuh b/cpp/include/cudf/table/row_operator3.cuh new file mode 100644 index 00000000000..1e3c2f7d75b --- /dev/null +++ b/cpp/include/cudf/table/row_operator3.cuh @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace cudf { + +/** + * @brief Result type of the `element_relational_comparator2` function object. + * + * Indicates how two elements `a` and `b` compare with one and another. + * + * Equivalence is defined as `not (a +__device__ weak_ordering3 compare_elements3(Element lhs, Element rhs) +{ + if (lhs < rhs) { + return weak_ordering3::LESS; + } else if (rhs < lhs) { + return weak_ordering3::GREATER; + } + return weak_ordering3::EQUIVALENT; +} +} // namespace detail + +/** + * @brief A specialization for floating-point `Element` type relational comparison + * to derive the order of the elements with respect to `lhs`. + * + * This Specialization handles `nan` in the following order: + * `[-Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN, null] (for null_order::AFTER)` + * `[null, -Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN] (for null_order::BEFORE)` + * + * @param lhs first element + * @param rhs second element + * @return Indicates the relationship between the elements in + * the `lhs` and `rhs` columns. + */ +template ::value>* = nullptr> +__device__ weak_ordering3 relational_compare3(Element lhs, Element rhs) +{ + if (isnan(lhs) and isnan(rhs)) { + return weak_ordering3::EQUIVALENT; + } else if (isnan(rhs)) { + return weak_ordering3::LESS; + } else if (isnan(lhs)) { + return weak_ordering3::GREATER; + } + + return detail::compare_elements3(lhs, rhs); +} + +/** + * @brief Compare the nulls according to null order. + * + * @param lhs_is_null boolean representing if lhs is null + * @param rhs_is_null boolean representing if lhs is null + * @param null_precedence null order + * @return Indicates the relationship between null in lhs and rhs columns. + */ +inline __device__ auto null_compare3(bool lhs_is_null, bool rhs_is_null, null_order null_precedence) +{ + if (lhs_is_null and rhs_is_null) { // null ::value>* = nullptr> +__device__ weak_ordering3 relational_compare3(Element lhs, Element rhs) +{ + return detail::compare_elements3(lhs, rhs); +} + +/** + * @brief A specialization for floating-point `Element` type to check if + * `lhs` is equivalent to `rhs`. `nan == nan`. + * + * @param lhs first element + * @param rhs second element + * @return `true` if `lhs` == `rhs` else `false`. + */ +template ::value>* = nullptr> +__device__ bool equality_compare3(Element lhs, Element rhs) +{ + if (isnan(lhs) and isnan(rhs)) { return true; } + return lhs == rhs; +} + +/** + * @brief A specialization for non-floating-point `Element` type to check if + * `lhs` is equivalent to `rhs`. + * + * @param lhs first element + * @param rhs second element + * @return `true` if `lhs` == `rhs` else `false`. + */ +template ::value>* = nullptr> +__device__ bool equality_compare3(Element const lhs, Element const rhs) +{ + return lhs == rhs; +} + +/** + * @brief Performs an equality comparison between two elements in two columns. + * + * @tparam Nullate A cudf::nullate type describing how to check for nulls. + */ +template +class element_equality_comparator3 { + public: + /** + * @brief Construct type-dispatched function object for comparing equality + * between two elements. + * + * @note `lhs` and `rhs` may be the same. + * + * @param has_nulls Indicates if either input column contains nulls. + * @param lhs The column containing the first element + * @param rhs The column containing the second element (may be the same as lhs) + * @param nulls_are_equal Indicates if two null elements are treated as equivalent + */ + __host__ __device__ + element_equality_comparator3(Nullate has_nulls, + column_device_view lhs, + column_device_view rhs, + null_equality nulls_are_equal = null_equality::EQUAL) + : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal} + { + } + + /** + * @brief Compares the specified elements for equality. + * + * @param lhs_element_index The index of the first element + * @param rhs_element_index The index of the second element + * @return True if both lhs and rhs element are both nulls and `nulls_are_equal` is true, or equal + */ + template ()>* = nullptr> + __device__ bool operator()(size_type lhs_element_index, + size_type rhs_element_index) const noexcept + { + if (nulls) { + bool const lhs_is_null{lhs.is_null(lhs_element_index)}; + bool const rhs_is_null{rhs.is_null(rhs_element_index)}; + if (lhs_is_null and rhs_is_null) { + return nulls_are_equal == null_equality::EQUAL; + } else if (lhs_is_null != rhs_is_null) { + return false; + } + } + + return equality_compare3(lhs.element(lhs_element_index), + rhs.element(rhs_element_index)); + } + + template ()>* = nullptr> + __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index) + { + cudf_assert(false && "Attempted to compare elements of uncomparable types."); + return false; + } + + private: + column_device_view lhs; + column_device_view rhs; + Nullate nulls; + null_equality nulls_are_equal; +}; + +template +class row_equality_comparator3 { + public: + row_equality_comparator3(Nullate has_nulls, + table_device_view lhs, + table_device_view rhs, + null_equality nulls_are_equal = true) + : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal} + { + CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Mismatched number of columns."); + } + + __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept + { + auto equal_elements = [=](column_device_view l, column_device_view r) { + return cudf::type_dispatcher(l.type(), + element_equality_comparator3{nulls, l, r, nulls_are_equal}, + lhs_row_index, + rhs_row_index); + }; + + return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements); + } + + private: + table_device_view lhs; + table_device_view rhs; + Nullate nulls; + null_equality nulls_are_equal; +}; + +/** + * @brief Performs a relational comparison between two elements in two columns. + * + * @tparam Nullate A cudf::nullate type describing how to check for nulls. + */ +template +class element_relational_comparator3 { + public: + /** + * @brief Construct type-dispatched function object for performing a + * relational comparison between two elements. + * + * @note `lhs` and `rhs` may be the same. + * + * @param lhs The column containing the first element + * @param rhs The column containing the second element (may be the same as lhs) + * @param has_nulls Indicates if either input column contains nulls. + * @param null_precedence Indicates how null values are ordered with other values + */ + __host__ __device__ element_relational_comparator3(Nullate has_nulls, + column_device_view lhs, + column_device_view rhs, + null_order null_precedence) + : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, null_precedence{null_precedence} + { + } + + __host__ __device__ element_relational_comparator3(Nullate has_nulls, + column_device_view lhs, + column_device_view rhs) + : lhs{lhs}, rhs{rhs}, nulls{has_nulls} + { + } + + /** + * @brief Performs a relational comparison between the specified elements + * + * @param lhs_element_index The index of the first element + * @param rhs_element_index The index of the second element + * @return Indicates the relationship between the elements in + * the `lhs` and `rhs` columns. + */ + template ()>* = nullptr> + __device__ weak_ordering3 operator()(size_type lhs_element_index, + size_type rhs_element_index) const noexcept + { + if (nulls) { + bool const lhs_is_null{lhs.is_null(lhs_element_index)}; + bool const rhs_is_null{rhs.is_null(rhs_element_index)}; + + if (lhs_is_null or rhs_is_null) { // at least one is null + return null_compare3(lhs_is_null, rhs_is_null, null_precedence); + } + } + + return relational_compare3(lhs.element(lhs_element_index), + rhs.element(rhs_element_index)); + } + + template ()>* = nullptr> + __device__ weak_ordering3 operator()(size_type lhs_element_index, size_type rhs_element_index) + { + cudf_assert(false && "Attempted to compare elements of uncomparable types."); + return weak_ordering3::LESS; + } + + private: + column_device_view lhs; + column_device_view rhs; + Nullate nulls; + null_order null_precedence{}; +}; + +/** + * @brief Computes whether one row is lexicographically *less* than another row. + * + * Lexicographic ordering is determined by: + * - Two rows are compared element by element. + * - The first mismatching element defines which row is lexicographically less + * or greater than the other. + * + * Lexicographic ordering is exactly equivalent to doing an alphabetical sort of + * two words, for example, `aac` would be *less* than (or precede) `abb`. The + * second letter in both words is the first non-equal letter, and `a < b`, thus + * `aac < abb`. + * + * @tparam Nullate A cudf::nullate type describing how to check for nulls. + */ +template +class row_lexicographic_comparator3 { + public: + /** + * @brief Construct a function object for performing a lexicographic + * comparison between the rows of two tables. + * + * @throws cudf::logic_error if `lhs.num_columns() != rhs.num_columns()` + * @throws cudf::logic_error if column types of `lhs` and `rhs` are not comparable. + * + * @param lhs The first table + * @param rhs The second table (may be the same table as `lhs`) + * @param has_nulls Indicates if either input table contains columns with nulls. + * @param column_order Optional, device array the same length as a row that + * indicates the desired ascending/descending order of each column in a row. + * If `nullptr`, it is assumed all columns are sorted in ascending order. + * @param null_precedence Optional, device array the same length as a row + * and indicates how null values compare to all other for every column. If + * it is nullptr, then null precedence would be `null_order::BEFORE` for all + * columns. + */ + row_lexicographic_comparator3(Nullate has_nulls, + table_device_view lhs, + table_device_view rhs, + order const* column_order = nullptr, + null_order const* null_precedence = nullptr) + : _lhs{lhs}, + _rhs{rhs}, + _nulls{has_nulls}, + _column_order{column_order}, + _null_precedence{null_precedence} + { + CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns."); + // CUDF_EXPECTS(detail::is_relationally_comparable(_lhs, _rhs), + // "Attempted to compare elements of uncomparable types."); + } + + /** + * @brief Checks whether the row at `lhs_index` in the `lhs` table compares + * lexicographically less than the row at `rhs_index` in the `rhs` table. + * + * @param lhs_index The index of row in the `lhs` table to examine + * @param rhs_index The index of the row in the `rhs` table to examine + * @return `true` if row from the `lhs` table compares less than row in the + * `rhs` table + */ + __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept + { + for (size_type i = 0; i < _lhs.num_columns(); ++i) { + bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); + + weak_ordering3 state{weak_ordering3::EQUIVALENT}; + null_order null_precedence = + _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; + + column_device_view lcol = _lhs.column(i); + column_device_view rcol = _rhs.column(i); + while (lcol.type().id() == type_id::STRUCT) { + bool const lhs_is_null{lcol.is_null(lhs_index)}; + bool const rhs_is_null{rcol.is_null(rhs_index)}; + + if (lhs_is_null or rhs_is_null) { // atleast one is null + state = null_compare3(lhs_is_null, rhs_is_null, null_precedence); + if (state != weak_ordering3::EQUIVALENT) break; + } + + lcol = lcol.children()[0]; + rcol = rcol.children()[0]; + } + + if (state == weak_ordering3::EQUIVALENT) { + auto comparator = element_relational_comparator3{_nulls, lcol, rcol, null_precedence}; + state = cudf::type_dispatcher(lcol.type(), comparator, lhs_index, rhs_index); + } + + if (state == weak_ordering3::EQUIVALENT) { continue; } + + return state == (ascending ? weak_ordering3::LESS : weak_ordering3::GREATER); + } + return false; + } + + private: + table_device_view _lhs; + table_device_view _rhs; + Nullate _nulls{}; + null_order const* _null_precedence{}; + order const* _column_order{}; +}; // class row_lexicographic_comparator3 + +} // namespace cudf From 022e2a49d0cc4b6cc65da972595e5b3c7f2d7e9a Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Tue, 25 Jan 2022 02:35:22 +0530 Subject: [PATCH 05/78] Basic verticalization utility and experimental namespace --- cpp/include/cudf/detail/structs/utilities.hpp | 5 + cpp/include/cudf/sort2.cuh | 8 +- cpp/include/cudf/table/row_operator3.cuh | 124 +++++++++--------- cpp/src/structs/utilities.cpp | 52 ++++++++ cpp/tests/sort/sort2_test.cu | 105 +++++++++++++++ 5 files changed, 230 insertions(+), 64 deletions(-) diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index 751b7c00e8a..23601548cbf 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -150,6 +150,11 @@ flattened_table flatten_nested_columns( std::vector const& null_precedence, column_nullability nullability = column_nullability::MATCH_INCOMING); +namespace experimental { +std::tuple> verticalize_nested_columns( + table_view input); +} + /** * @brief Unflatten columns flattened as by `flatten_nested_columns()`, * based on the provided `blueprint`. diff --git a/cpp/include/cudf/sort2.cuh b/cpp/include/cudf/sort2.cuh index 1d1a5300787..125d696d055 100644 --- a/cpp/include/cudf/sort2.cuh +++ b/cpp/include/cudf/sort2.cuh @@ -61,10 +61,12 @@ std::unique_ptr sorted_order2( mutable_indices_view.end(), 0); - auto device_table = table_device_view::create(input, stream); + auto [vertical_table, nullmasks] = + cudf::structs::detail::experimental::verticalize_nested_columns(input); + auto device_table = table_device_view::create(vertical_table, stream); // auto const comparator = row_lexicographic_comparator2(*device_table, *device_table); - auto const comparator = - row_lexicographic_comparator3(nullate::DYNAMIC{true}, *device_table, *device_table); + auto const comparator = experimental::row_lexicographic_comparator( + nullate::DYNAMIC{true}, *device_table, *device_table); thrust::sort(rmm::exec_policy(stream), mutable_indices_view.begin(), diff --git a/cpp/include/cudf/table/row_operator3.cuh b/cpp/include/cudf/table/row_operator3.cuh index 1e3c2f7d75b..71bbb59339e 100644 --- a/cpp/include/cudf/table/row_operator3.cuh +++ b/cpp/include/cudf/table/row_operator3.cuh @@ -31,6 +31,7 @@ #include namespace cudf { +namespace experimental { /** * @brief Result type of the `element_relational_comparator2` function object. @@ -40,7 +41,7 @@ namespace cudf { * Equivalence is defined as `not (a -__device__ weak_ordering3 compare_elements3(Element lhs, Element rhs) +__device__ weak_ordering compare_elements(Element lhs, Element rhs) { if (lhs < rhs) { - return weak_ordering3::LESS; + return weak_ordering::LESS; } else if (rhs < lhs) { - return weak_ordering3::GREATER; + return weak_ordering::GREATER; } - return weak_ordering3::EQUIVALENT; + return weak_ordering::EQUIVALENT; } } // namespace detail @@ -81,17 +82,17 @@ __device__ weak_ordering3 compare_elements3(Element lhs, Element rhs) * the `lhs` and `rhs` columns. */ template ::value>* = nullptr> -__device__ weak_ordering3 relational_compare3(Element lhs, Element rhs) +__device__ weak_ordering relational_compare(Element lhs, Element rhs) { if (isnan(lhs) and isnan(rhs)) { - return weak_ordering3::EQUIVALENT; + return weak_ordering::EQUIVALENT; } else if (isnan(rhs)) { - return weak_ordering3::LESS; + return weak_ordering::LESS; } else if (isnan(lhs)) { - return weak_ordering3::GREATER; + return weak_ordering::GREATER; } - return detail::compare_elements3(lhs, rhs); + return detail::compare_elements(lhs, rhs); } /** @@ -102,16 +103,16 @@ __device__ weak_ordering3 relational_compare3(Element lhs, Element rhs) * @param null_precedence null order * @return Indicates the relationship between null in lhs and rhs columns. */ -inline __device__ auto null_compare3(bool lhs_is_null, bool rhs_is_null, null_order null_precedence) +inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence) { if (lhs_is_null and rhs_is_null) { // null ::value>* = nullptr> -__device__ weak_ordering3 relational_compare3(Element lhs, Element rhs) +__device__ weak_ordering relational_compare(Element lhs, Element rhs) { - return detail::compare_elements3(lhs, rhs); + return detail::compare_elements(lhs, rhs); } /** @@ -138,7 +139,7 @@ __device__ weak_ordering3 relational_compare3(Element lhs, Element rhs) * @return `true` if `lhs` == `rhs` else `false`. */ template ::value>* = nullptr> -__device__ bool equality_compare3(Element lhs, Element rhs) +__device__ bool equality_compare(Element lhs, Element rhs) { if (isnan(lhs) and isnan(rhs)) { return true; } return lhs == rhs; @@ -153,7 +154,7 @@ __device__ bool equality_compare3(Element lhs, Element rhs) * @return `true` if `lhs` == `rhs` else `false`. */ template ::value>* = nullptr> -__device__ bool equality_compare3(Element const lhs, Element const rhs) +__device__ bool equality_compare(Element const lhs, Element const rhs) { return lhs == rhs; } @@ -164,7 +165,7 @@ __device__ bool equality_compare3(Element const lhs, Element const rhs) * @tparam Nullate A cudf::nullate type describing how to check for nulls. */ template -class element_equality_comparator3 { +class element_equality_comparator { public: /** * @brief Construct type-dispatched function object for comparing equality @@ -178,10 +179,10 @@ class element_equality_comparator3 { * @param nulls_are_equal Indicates if two null elements are treated as equivalent */ __host__ __device__ - element_equality_comparator3(Nullate has_nulls, - column_device_view lhs, - column_device_view rhs, - null_equality nulls_are_equal = null_equality::EQUAL) + element_equality_comparator(Nullate has_nulls, + column_device_view lhs, + column_device_view rhs, + null_equality nulls_are_equal = null_equality::EQUAL) : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal} { } @@ -208,8 +209,8 @@ class element_equality_comparator3 { } } - return equality_compare3(lhs.element(lhs_element_index), - rhs.element(rhs_element_index)); + return equality_compare(lhs.element(lhs_element_index), + rhs.element(rhs_element_index)); } template -class row_equality_comparator3 { +class row_equality_comparator { public: - row_equality_comparator3(Nullate has_nulls, - table_device_view lhs, - table_device_view rhs, - null_equality nulls_are_equal = true) + row_equality_comparator(Nullate has_nulls, + table_device_view lhs, + table_device_view rhs, + null_equality nulls_are_equal = true) : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal} { CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Mismatched number of columns."); @@ -243,7 +244,7 @@ class row_equality_comparator3 { { auto equal_elements = [=](column_device_view l, column_device_view r) { return cudf::type_dispatcher(l.type(), - element_equality_comparator3{nulls, l, r, nulls_are_equal}, + element_equality_comparator{nulls, l, r, nulls_are_equal}, lhs_row_index, rhs_row_index); }; @@ -264,7 +265,7 @@ class row_equality_comparator3 { * @tparam Nullate A cudf::nullate type describing how to check for nulls. */ template -class element_relational_comparator3 { +class element_relational_comparator { public: /** * @brief Construct type-dispatched function object for performing a @@ -277,17 +278,17 @@ class element_relational_comparator3 { * @param has_nulls Indicates if either input column contains nulls. * @param null_precedence Indicates how null values are ordered with other values */ - __host__ __device__ element_relational_comparator3(Nullate has_nulls, - column_device_view lhs, - column_device_view rhs, - null_order null_precedence) + __host__ __device__ element_relational_comparator(Nullate has_nulls, + column_device_view lhs, + column_device_view rhs, + null_order null_precedence) : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, null_precedence{null_precedence} { } - __host__ __device__ element_relational_comparator3(Nullate has_nulls, - column_device_view lhs, - column_device_view rhs) + __host__ __device__ element_relational_comparator(Nullate has_nulls, + column_device_view lhs, + column_device_view rhs) : lhs{lhs}, rhs{rhs}, nulls{has_nulls} { } @@ -302,28 +303,28 @@ class element_relational_comparator3 { */ template ()>* = nullptr> - __device__ weak_ordering3 operator()(size_type lhs_element_index, - size_type rhs_element_index) const noexcept + __device__ weak_ordering operator()(size_type lhs_element_index, + size_type rhs_element_index) const noexcept { if (nulls) { bool const lhs_is_null{lhs.is_null(lhs_element_index)}; bool const rhs_is_null{rhs.is_null(rhs_element_index)}; if (lhs_is_null or rhs_is_null) { // at least one is null - return null_compare3(lhs_is_null, rhs_is_null, null_precedence); + return null_compare(lhs_is_null, rhs_is_null, null_precedence); } } - return relational_compare3(lhs.element(lhs_element_index), - rhs.element(rhs_element_index)); + return relational_compare(lhs.element(lhs_element_index), + rhs.element(rhs_element_index)); } template ()>* = nullptr> - __device__ weak_ordering3 operator()(size_type lhs_element_index, size_type rhs_element_index) + __device__ weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index) { cudf_assert(false && "Attempted to compare elements of uncomparable types."); - return weak_ordering3::LESS; + return weak_ordering::LESS; } private: @@ -349,7 +350,7 @@ class element_relational_comparator3 { * @tparam Nullate A cudf::nullate type describing how to check for nulls. */ template -class row_lexicographic_comparator3 { +class row_lexicographic_comparator { public: /** * @brief Construct a function object for performing a lexicographic @@ -369,11 +370,11 @@ class row_lexicographic_comparator3 { * it is nullptr, then null precedence would be `null_order::BEFORE` for all * columns. */ - row_lexicographic_comparator3(Nullate has_nulls, - table_device_view lhs, - table_device_view rhs, - order const* column_order = nullptr, - null_order const* null_precedence = nullptr) + row_lexicographic_comparator(Nullate has_nulls, + table_device_view lhs, + table_device_view rhs, + order const* column_order = nullptr, + null_order const* null_precedence = nullptr) : _lhs{lhs}, _rhs{rhs}, _nulls{has_nulls}, @@ -399,7 +400,7 @@ class row_lexicographic_comparator3 { for (size_type i = 0; i < _lhs.num_columns(); ++i) { bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); - weak_ordering3 state{weak_ordering3::EQUIVALENT}; + weak_ordering state{weak_ordering::EQUIVALENT}; null_order null_precedence = _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; @@ -410,22 +411,22 @@ class row_lexicographic_comparator3 { bool const rhs_is_null{rcol.is_null(rhs_index)}; if (lhs_is_null or rhs_is_null) { // atleast one is null - state = null_compare3(lhs_is_null, rhs_is_null, null_precedence); - if (state != weak_ordering3::EQUIVALENT) break; + state = null_compare(lhs_is_null, rhs_is_null, null_precedence); + if (state != weak_ordering::EQUIVALENT) break; } lcol = lcol.children()[0]; rcol = rcol.children()[0]; } - if (state == weak_ordering3::EQUIVALENT) { - auto comparator = element_relational_comparator3{_nulls, lcol, rcol, null_precedence}; + if (state == weak_ordering::EQUIVALENT) { + auto comparator = element_relational_comparator{_nulls, lcol, rcol, null_precedence}; state = cudf::type_dispatcher(lcol.type(), comparator, lhs_index, rhs_index); } - if (state == weak_ordering3::EQUIVALENT) { continue; } + if (state == weak_ordering::EQUIVALENT) { continue; } - return state == (ascending ? weak_ordering3::LESS : weak_ordering3::GREATER); + return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER); } return false; } @@ -436,6 +437,7 @@ class row_lexicographic_comparator3 { Nullate _nulls{}; null_order const* _null_precedence{}; order const* _column_order{}; -}; // class row_lexicographic_comparator3 +}; // class row_lexicographic_comparator +} // namespace experimental } // namespace cudf diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index afea8a55b16..1fc0c4457f8 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -208,6 +208,58 @@ flattened_table flatten_nested_columns(table_view const& input, return table_flattener{input, column_order, null_precedence, nullability}(); } +namespace experimental { + +std::tuple> verticalize_nested_columns( + table_view input) +{ + auto [table, null_masks] = superimpose_parent_nulls(input); + std::vector verticalized_columns; + for (auto const& col : table) { + if (is_nested(col.type())) { + // convert and insert + std::vector r_verticalized_columns; + std::vector flattened; + // TODO: Here I added a bogus leaf column at the beginning to help in the while loop below. + // Refactor the while loop so that it can handle the last case. + flattened.push_back(make_empty_column(type_id::INT32)->view()); + std::function recursive_child = [&](column_view const& c) { + flattened.push_back(c); + for (int child_idx = 0; child_idx < c.num_children(); ++child_idx) { + recursive_child(c.child(child_idx)); + } + }; + recursive_child(col); + int curr_col_idx = flattened.size() - 1; + column_view curr_col = flattened[curr_col_idx]; + while (curr_col_idx > 0) { + auto const& prev_col = flattened[curr_col_idx - 1]; + if (not is_nested(prev_col.type())) { + // We hit a column that's a leaf so seal this hierarchy + r_verticalized_columns.push_back(curr_col); + curr_col = prev_col; + } else { + curr_col = column_view(prev_col.type(), + prev_col.size(), + nullptr, + prev_col.null_mask(), + UNKNOWN_NULL_COUNT, + prev_col.offset(), + {curr_col}); + } + --curr_col_idx; + } + verticalized_columns.insert( + verticalized_columns.end(), r_verticalized_columns.rbegin(), r_verticalized_columns.rend()); + } else { + verticalized_columns.push_back(col); + } + } + return std::make_tuple(table_view(verticalized_columns), std::move(null_masks)); +} + +} // namespace experimental + namespace { using vector_of_columns = std::vector>; using column_index_t = typename vector_of_columns::size_type; diff --git a/cpp/tests/sort/sort2_test.cu b/cpp/tests/sort/sort2_test.cu index 649951d1d5b..346a4983560 100644 --- a/cpp/tests/sort/sort2_test.cu +++ b/cpp/tests/sort/sort2_test.cu @@ -71,4 +71,109 @@ TEST_F(NewRowOp, BasicTest) cudf::test::expect_columns_equal(result1->view(), result2->view()); } +TEST_F(NewRowOp, StructTwoChildTest) +{ + using Type = int; + using column_wrapper = cudf::test::fixed_width_column_wrapper; + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, 100); + + const cudf::size_type n_rows{1 << 2}; + const cudf::size_type n_cols{2}; + + // Create columns with values in the range [0,100) + std::vector columns; + columns.reserve(n_cols); + std::generate_n(std::back_inserter(columns), n_cols, [&]() { + auto elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto row) { return distribution(generator); }); + return column_wrapper(elements, elements + n_rows); + }); + + std::vector> cols; + std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) { + return col.release(); + }); + + auto make_struct = [&](std::vector> child_cols) { + cudf::test::structs_column_wrapper struct_col(std::move(child_cols)); + return struct_col.release(); + }; + + std::vector> s2_children; + s2_children.push_back(std::move(cols[0])); + s2_children.push_back(std::move(cols[1])); + auto s2 = make_struct(std::move(s2_children)); + + cudf::test::print(s2->view()); + + // // Create table view + // auto input = cudf::table_view({struct_col}); + auto input = cudf::table_view({s2->view()}); + + auto result1 = cudf::sorted_order(input); + cudf::test::print(result1->view()); + auto result2 = cudf::detail::sorted_order2(input); + cudf::test::print(result2->view()); + cudf::test::expect_columns_equal(result1->view(), result2->view()); +} + +TEST_F(NewRowOp, SampleStructTest) +{ + using Type = int; + using column_wrapper = cudf::test::fixed_width_column_wrapper; + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, 100); + + const cudf::size_type n_rows{1 << 6}; + const cudf::size_type n_cols{3}; + + // Create columns with values in the range [0,100) + std::vector columns; + columns.reserve(n_cols); + std::generate_n(std::back_inserter(columns), n_cols, [&]() { + auto elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto row) { return distribution(generator); }); + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 3 == 0 ? false : true; }); + return column_wrapper(elements, elements + n_rows, valids); + }); + + std::vector> cols; + std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) { + return col.release(); + }); + + auto make_struct = [&](std::vector> child_cols, int nullfreq) { + std::vector struct_validity; + std::uniform_int_distribution bool_distribution(0, 10 * (nullfreq)); + std::generate_n( + std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); }); + cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity); + return struct_col.release(); + }; + + std::vector> s2_children; + s2_children.push_back(std::move(cols[0])); + s2_children.push_back(std::move(cols[1])); + auto s2 = make_struct(std::move(s2_children), 1); + + std::vector> s1_children; + s1_children.push_back(std::move(s2)); + s1_children.push_back(std::move(cols[2])); + auto s1 = make_struct(std::move(s1_children), 2); + + cudf::test::print(s1->view()); + + // // Create table view + // auto input = cudf::table_view({struct_col}); + auto input = cudf::table_view({s1->view()}); + + auto result1 = cudf::sorted_order(input); + cudf::test::print(result1->view()); + auto result2 = cudf::detail::sorted_order2(input); + cudf::test::print(result2->view()); + cudf::test::expect_columns_equal(result1->view(), result2->view()); +} + CUDF_TEST_PROGRAM_MAIN() From 7fef64335d5581ebf36a490d9efcf4808c577b2c Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 27 Jan 2022 05:16:10 +0530 Subject: [PATCH 06/78] clean up most of row operators that I didn't change. --- cpp/include/cudf/table/row_operator3.cuh | 302 +---------------------- cpp/tests/sort/sort2_test.cu | 4 +- 2 files changed, 3 insertions(+), 303 deletions(-) diff --git a/cpp/include/cudf/table/row_operator3.cuh b/cpp/include/cudf/table/row_operator3.cuh index 71bbb59339e..47ab65ff0e1 100644 --- a/cpp/include/cudf/table/row_operator3.cuh +++ b/cpp/include/cudf/table/row_operator3.cuh @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -33,307 +34,6 @@ namespace cudf { namespace experimental { -/** - * @brief Result type of the `element_relational_comparator2` function object. - * - * Indicates how two elements `a` and `b` compare with one and another. - * - * Equivalence is defined as `not (a -__device__ weak_ordering compare_elements(Element lhs, Element rhs) -{ - if (lhs < rhs) { - return weak_ordering::LESS; - } else if (rhs < lhs) { - return weak_ordering::GREATER; - } - return weak_ordering::EQUIVALENT; -} -} // namespace detail - -/** - * @brief A specialization for floating-point `Element` type relational comparison - * to derive the order of the elements with respect to `lhs`. - * - * This Specialization handles `nan` in the following order: - * `[-Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN, null] (for null_order::AFTER)` - * `[null, -Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN] (for null_order::BEFORE)` - * - * @param lhs first element - * @param rhs second element - * @return Indicates the relationship between the elements in - * the `lhs` and `rhs` columns. - */ -template ::value>* = nullptr> -__device__ weak_ordering relational_compare(Element lhs, Element rhs) -{ - if (isnan(lhs) and isnan(rhs)) { - return weak_ordering::EQUIVALENT; - } else if (isnan(rhs)) { - return weak_ordering::LESS; - } else if (isnan(lhs)) { - return weak_ordering::GREATER; - } - - return detail::compare_elements(lhs, rhs); -} - -/** - * @brief Compare the nulls according to null order. - * - * @param lhs_is_null boolean representing if lhs is null - * @param rhs_is_null boolean representing if lhs is null - * @param null_precedence null order - * @return Indicates the relationship between null in lhs and rhs columns. - */ -inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence) -{ - if (lhs_is_null and rhs_is_null) { // null ::value>* = nullptr> -__device__ weak_ordering relational_compare(Element lhs, Element rhs) -{ - return detail::compare_elements(lhs, rhs); -} - -/** - * @brief A specialization for floating-point `Element` type to check if - * `lhs` is equivalent to `rhs`. `nan == nan`. - * - * @param lhs first element - * @param rhs second element - * @return `true` if `lhs` == `rhs` else `false`. - */ -template ::value>* = nullptr> -__device__ bool equality_compare(Element lhs, Element rhs) -{ - if (isnan(lhs) and isnan(rhs)) { return true; } - return lhs == rhs; -} - -/** - * @brief A specialization for non-floating-point `Element` type to check if - * `lhs` is equivalent to `rhs`. - * - * @param lhs first element - * @param rhs second element - * @return `true` if `lhs` == `rhs` else `false`. - */ -template ::value>* = nullptr> -__device__ bool equality_compare(Element const lhs, Element const rhs) -{ - return lhs == rhs; -} - -/** - * @brief Performs an equality comparison between two elements in two columns. - * - * @tparam Nullate A cudf::nullate type describing how to check for nulls. - */ -template -class element_equality_comparator { - public: - /** - * @brief Construct type-dispatched function object for comparing equality - * between two elements. - * - * @note `lhs` and `rhs` may be the same. - * - * @param has_nulls Indicates if either input column contains nulls. - * @param lhs The column containing the first element - * @param rhs The column containing the second element (may be the same as lhs) - * @param nulls_are_equal Indicates if two null elements are treated as equivalent - */ - __host__ __device__ - element_equality_comparator(Nullate has_nulls, - column_device_view lhs, - column_device_view rhs, - null_equality nulls_are_equal = null_equality::EQUAL) - : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal} - { - } - - /** - * @brief Compares the specified elements for equality. - * - * @param lhs_element_index The index of the first element - * @param rhs_element_index The index of the second element - * @return True if both lhs and rhs element are both nulls and `nulls_are_equal` is true, or equal - */ - template ()>* = nullptr> - __device__ bool operator()(size_type lhs_element_index, - size_type rhs_element_index) const noexcept - { - if (nulls) { - bool const lhs_is_null{lhs.is_null(lhs_element_index)}; - bool const rhs_is_null{rhs.is_null(rhs_element_index)}; - if (lhs_is_null and rhs_is_null) { - return nulls_are_equal == null_equality::EQUAL; - } else if (lhs_is_null != rhs_is_null) { - return false; - } - } - - return equality_compare(lhs.element(lhs_element_index), - rhs.element(rhs_element_index)); - } - - template ()>* = nullptr> - __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index) - { - cudf_assert(false && "Attempted to compare elements of uncomparable types."); - return false; - } - - private: - column_device_view lhs; - column_device_view rhs; - Nullate nulls; - null_equality nulls_are_equal; -}; - -template -class row_equality_comparator { - public: - row_equality_comparator(Nullate has_nulls, - table_device_view lhs, - table_device_view rhs, - null_equality nulls_are_equal = true) - : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal} - { - CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Mismatched number of columns."); - } - - __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept - { - auto equal_elements = [=](column_device_view l, column_device_view r) { - return cudf::type_dispatcher(l.type(), - element_equality_comparator{nulls, l, r, nulls_are_equal}, - lhs_row_index, - rhs_row_index); - }; - - return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements); - } - - private: - table_device_view lhs; - table_device_view rhs; - Nullate nulls; - null_equality nulls_are_equal; -}; - -/** - * @brief Performs a relational comparison between two elements in two columns. - * - * @tparam Nullate A cudf::nullate type describing how to check for nulls. - */ -template -class element_relational_comparator { - public: - /** - * @brief Construct type-dispatched function object for performing a - * relational comparison between two elements. - * - * @note `lhs` and `rhs` may be the same. - * - * @param lhs The column containing the first element - * @param rhs The column containing the second element (may be the same as lhs) - * @param has_nulls Indicates if either input column contains nulls. - * @param null_precedence Indicates how null values are ordered with other values - */ - __host__ __device__ element_relational_comparator(Nullate has_nulls, - column_device_view lhs, - column_device_view rhs, - null_order null_precedence) - : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, null_precedence{null_precedence} - { - } - - __host__ __device__ element_relational_comparator(Nullate has_nulls, - column_device_view lhs, - column_device_view rhs) - : lhs{lhs}, rhs{rhs}, nulls{has_nulls} - { - } - - /** - * @brief Performs a relational comparison between the specified elements - * - * @param lhs_element_index The index of the first element - * @param rhs_element_index The index of the second element - * @return Indicates the relationship between the elements in - * the `lhs` and `rhs` columns. - */ - template ()>* = nullptr> - __device__ weak_ordering operator()(size_type lhs_element_index, - size_type rhs_element_index) const noexcept - { - if (nulls) { - bool const lhs_is_null{lhs.is_null(lhs_element_index)}; - bool const rhs_is_null{rhs.is_null(rhs_element_index)}; - - if (lhs_is_null or rhs_is_null) { // at least one is null - return null_compare(lhs_is_null, rhs_is_null, null_precedence); - } - } - - return relational_compare(lhs.element(lhs_element_index), - rhs.element(rhs_element_index)); - } - - template ()>* = nullptr> - __device__ weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index) - { - cudf_assert(false && "Attempted to compare elements of uncomparable types."); - return weak_ordering::LESS; - } - - private: - column_device_view lhs; - column_device_view rhs; - Nullate nulls; - null_order null_precedence{}; -}; - /** * @brief Computes whether one row is lexicographically *less* than another row. * diff --git a/cpp/tests/sort/sort2_test.cu b/cpp/tests/sort/sort2_test.cu index 346a4983560..fb78e4a4629 100644 --- a/cpp/tests/sort/sort2_test.cu +++ b/cpp/tests/sort/sort2_test.cu @@ -123,7 +123,7 @@ TEST_F(NewRowOp, SampleStructTest) using Type = int; using column_wrapper = cudf::test::fixed_width_column_wrapper; std::default_random_engine generator; - std::uniform_int_distribution distribution(0, 100); + std::uniform_int_distribution distribution(0, 20); const cudf::size_type n_rows{1 << 6}; const cudf::size_type n_cols{3}; @@ -135,7 +135,7 @@ TEST_F(NewRowOp, SampleStructTest) auto elements = cudf::detail::make_counting_transform_iterator( 0, [&](auto row) { return distribution(generator); }); auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 3 == 0 ? false : true; }); + 0, [](auto i) { return i % 5 == 0 ? false : true; }); return column_wrapper(elements, elements + n_rows, valids); }); From 930d8de8ca9b06322c9ef15849d0ec9b968fafcd Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Fri, 28 Jan 2022 04:03:56 +0530 Subject: [PATCH 07/78] Sliced column test --- cpp/tests/sort/sort2_test.cu | 38 ++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/cpp/tests/sort/sort2_test.cu b/cpp/tests/sort/sort2_test.cu index fb78e4a4629..bd24befc88e 100644 --- a/cpp/tests/sort/sort2_test.cu +++ b/cpp/tests/sort/sort2_test.cu @@ -14,12 +14,12 @@ #include -struct NewRowOp : public cudf::test::BaseFixture { +struct NewRowOpTest : public cudf::test::BaseFixture { }; #include -TEST_F(NewRowOp, BasicTest) +TEST_F(NewRowOpTest, DeepStruct) { using Type = int; using column_wrapper = cudf::test::fixed_width_column_wrapper; @@ -64,14 +64,16 @@ TEST_F(NewRowOp, BasicTest) // auto input = cudf::table_view({struct_col}); auto input = cudf::table(std::move(child_cols)); - auto result1 = cudf::sorted_order(input); - // cudf::test::print(result1->view()); - auto result2 = cudf::detail::sorted_order2(input); - // cudf::test::print(result2->view()); + auto sliced_input = cudf::slice(input, {7, input.num_rows() - 12}); + + auto result1 = cudf::sorted_order(sliced_input); + cudf::test::print(result1->view()); + auto result2 = cudf::detail::sorted_order2(sliced_input); + cudf::test::print(result2->view()); cudf::test::expect_columns_equal(result1->view(), result2->view()); } -TEST_F(NewRowOp, StructTwoChildTest) +TEST_F(NewRowOpTest, StructTwoChildTest) { using Type = int; using column_wrapper = cudf::test::fixed_width_column_wrapper; @@ -118,15 +120,15 @@ TEST_F(NewRowOp, StructTwoChildTest) cudf::test::expect_columns_equal(result1->view(), result2->view()); } -TEST_F(NewRowOp, SampleStructTest) +TEST_F(NewRowOpTest, SampleStructTest) { using Type = int; using column_wrapper = cudf::test::fixed_width_column_wrapper; std::default_random_engine generator; - std::uniform_int_distribution distribution(0, 20); + std::uniform_int_distribution distribution(0, 10); const cudf::size_type n_rows{1 << 6}; - const cudf::size_type n_cols{3}; + const cudf::size_type n_cols{6}; // Create columns with values in the range [0,100) std::vector columns; @@ -135,7 +137,7 @@ TEST_F(NewRowOp, SampleStructTest) auto elements = cudf::detail::make_counting_transform_iterator( 0, [&](auto row) { return distribution(generator); }); auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 5 == 0 ? false : true; }); + 0, [](auto i) { return i % 7 == 0 ? false : true; }); return column_wrapper(elements, elements + n_rows, valids); }); @@ -165,9 +167,21 @@ TEST_F(NewRowOp, SampleStructTest) cudf::test::print(s1->view()); + std::vector> s22_children; + s22_children.push_back(std::move(cols[3])); + s22_children.push_back(std::move(cols[4])); + auto s22 = make_struct(std::move(s22_children), 1); + + std::vector> s12_children; + s12_children.push_back(std::move(cols[5])); + s12_children.push_back(std::move(s22)); + auto s12 = make_struct(std::move(s12_children), 2); + + cudf::test::print(s1->view()); + // // Create table view // auto input = cudf::table_view({struct_col}); - auto input = cudf::table_view({s1->view()}); + auto input = cudf::table_view({s1->view(), s12->view()}); auto result1 = cudf::sorted_order(input); cudf::test::print(result1->view()); From 0ecc4f87c20df2a03c1f1d4b869632058843ad58 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Sat, 29 Jan 2022 00:37:59 +0530 Subject: [PATCH 08/78] column order and null precendence support --- cpp/include/cudf/detail/structs/utilities.hpp | 5 +- cpp/include/cudf/sort2.cuh | 22 +++-- cpp/src/structs/utilities.cpp | 24 ++++- cpp/tests/sort/sort2_test.cu | 94 ++++++++++--------- 4 files changed, 89 insertions(+), 56 deletions(-) diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index 23601548cbf..1f29a247121 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -151,8 +151,9 @@ flattened_table flatten_nested_columns( column_nullability nullability = column_nullability::MATCH_INCOMING); namespace experimental { -std::tuple> verticalize_nested_columns( - table_view input); +flattened_table verticalize_nested_columns(table_view input, + std::vector const& column_order, + std::vector const& null_precedence); } /** diff --git a/cpp/include/cudf/sort2.cuh b/cpp/include/cudf/sort2.cuh index 125d696d055..6710615aa35 100644 --- a/cpp/include/cudf/sort2.cuh +++ b/cpp/include/cudf/sort2.cuh @@ -46,8 +46,10 @@ namespace detail { template std::unique_ptr sorted_order2( table_view input, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::vector const& column_order = {}, + std::vector const& null_precedence = {}, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { if (input.num_rows() == 0 or input.num_columns() == 0) { return cudf::make_numeric_column(data_type(type_to_id()), 0); @@ -61,12 +63,18 @@ std::unique_ptr sorted_order2( mutable_indices_view.end(), 0); - auto [vertical_table, nullmasks] = - cudf::structs::detail::experimental::verticalize_nested_columns(input); - auto device_table = table_device_view::create(vertical_table, stream); + auto verticalized = cudf::structs::detail::experimental::verticalize_nested_columns( + input, column_order, null_precedence); + auto device_table = table_device_view::create(verticalized.flattened_columns(), stream); + auto const d_column_order = make_device_uvector_async(verticalized.orders(), stream); + auto const d_null_precedence = make_device_uvector_async(verticalized.null_orders(), stream); + // auto const comparator = row_lexicographic_comparator2(*device_table, *device_table); - auto const comparator = experimental::row_lexicographic_comparator( - nullate::DYNAMIC{true}, *device_table, *device_table); + auto const comparator = experimental::row_lexicographic_comparator(nullate::DYNAMIC{true}, + *device_table, + *device_table, + d_column_order.data(), + d_null_precedence.data()); thrust::sort(rmm::exec_policy(stream), mutable_indices_view.begin(), diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 1fc0c4457f8..42c167466c1 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -210,12 +210,16 @@ flattened_table flatten_nested_columns(table_view const& input, namespace experimental { -std::tuple> verticalize_nested_columns( - table_view input) +flattened_table verticalize_nested_columns(table_view input, + std::vector const& column_order, + std::vector const& null_precedence) { auto [table, null_masks] = superimpose_parent_nulls(input); std::vector verticalized_columns; - for (auto const& col : table) { + std::vector new_column_order; + std::vector new_null_precedence; + for (size_type col_idx = 0; col_idx < table.num_columns(); ++col_idx) { + auto const& col = table.column(col_idx); if (is_nested(col.type())) { // convert and insert std::vector r_verticalized_columns; @@ -251,11 +255,23 @@ std::tuple> verticalize_nested } verticalized_columns.insert( verticalized_columns.end(), r_verticalized_columns.rbegin(), r_verticalized_columns.rend()); + if (not column_order.empty()) { + new_column_order.insert( + new_column_order.end(), r_verticalized_columns.size(), column_order[col_idx]); + } + if (not null_precedence.empty()) { + new_null_precedence.insert( + new_null_precedence.end(), r_verticalized_columns.size(), null_precedence[col_idx]); + } } else { verticalized_columns.push_back(col); } } - return std::make_tuple(table_view(verticalized_columns), std::move(null_masks)); + return flattened_table(table_view(verticalized_columns), + new_column_order, + new_null_precedence, + {}, + std::move(null_masks)); } } // namespace experimental diff --git a/cpp/tests/sort/sort2_test.cu b/cpp/tests/sort/sort2_test.cu index bd24befc88e..98bd28fb181 100644 --- a/cpp/tests/sort/sort2_test.cu +++ b/cpp/tests/sort/sort2_test.cu @@ -19,26 +19,23 @@ struct NewRowOpTest : public cudf::test::BaseFixture { #include -TEST_F(NewRowOpTest, DeepStruct) +TEST_F(NewRowOpTest, BasicStructTwoChild) { using Type = int; using column_wrapper = cudf::test::fixed_width_column_wrapper; std::default_random_engine generator; std::uniform_int_distribution distribution(0, 100); - const cudf::size_type n_rows{1 << 6}; - const cudf::size_type n_cols{1}; - const cudf::size_type depth{8}; + const cudf::size_type n_rows{1 << 2}; + const cudf::size_type n_cols{2}; // Create columns with values in the range [0,100) std::vector columns; columns.reserve(n_cols); - std::generate_n(std::back_inserter(columns), n_cols, [&, n_rows]() { + std::generate_n(std::back_inserter(columns), n_cols, [&]() { auto elements = cudf::detail::make_counting_transform_iterator( 0, [&](auto row) { return distribution(generator); }); - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 3 == 0 ? false : true; }); - return column_wrapper(elements, elements + n_rows, valids); + return column_wrapper(elements, elements + n_rows); }); std::vector> cols; @@ -46,50 +43,49 @@ TEST_F(NewRowOpTest, DeepStruct) return col.release(); }); - std::vector> child_cols = std::move(cols); - // Lets add some layers - for (int i = 0; i < depth; i++) { - std::vector struct_validity; - std::uniform_int_distribution bool_distribution(0, 10 * (i + 1)); - std::generate_n( - std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); }); - cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity); - child_cols = std::vector>{}; - child_cols.push_back(struct_col.release()); - } + auto make_struct = [&](std::vector> child_cols) { + cudf::test::structs_column_wrapper struct_col(std::move(child_cols)); + return struct_col.release(); + }; - cudf::test::print(child_cols[0]->view()); + std::vector> s2_children; + s2_children.push_back(std::move(cols[0])); + s2_children.push_back(std::move(cols[1])); + auto s2 = make_struct(std::move(s2_children)); + + cudf::test::print(s2->view()); // // Create table view // auto input = cudf::table_view({struct_col}); - auto input = cudf::table(std::move(child_cols)); - - auto sliced_input = cudf::slice(input, {7, input.num_rows() - 12}); + auto input = cudf::table_view({s2->view()}); - auto result1 = cudf::sorted_order(sliced_input); + auto result1 = cudf::sorted_order(input); cudf::test::print(result1->view()); - auto result2 = cudf::detail::sorted_order2(sliced_input); + auto result2 = cudf::detail::sorted_order2(input); cudf::test::print(result2->view()); cudf::test::expect_columns_equal(result1->view(), result2->view()); } -TEST_F(NewRowOpTest, StructTwoChildTest) +TEST_F(NewRowOpTest, DeepStruct) { using Type = int; using column_wrapper = cudf::test::fixed_width_column_wrapper; std::default_random_engine generator; std::uniform_int_distribution distribution(0, 100); - const cudf::size_type n_rows{1 << 2}; - const cudf::size_type n_cols{2}; + const cudf::size_type n_rows{1 << 6}; + const cudf::size_type n_cols{1}; + const cudf::size_type depth{8}; // Create columns with values in the range [0,100) std::vector columns; columns.reserve(n_cols); - std::generate_n(std::back_inserter(columns), n_cols, [&]() { + std::generate_n(std::back_inserter(columns), n_cols, [&, n_rows]() { auto elements = cudf::detail::make_counting_transform_iterator( 0, [&](auto row) { return distribution(generator); }); - return column_wrapper(elements, elements + n_rows); + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 3 == 0 ? false : true; }); + return column_wrapper(elements, elements + n_rows, valids); }); std::vector> cols; @@ -97,25 +93,29 @@ TEST_F(NewRowOpTest, StructTwoChildTest) return col.release(); }); - auto make_struct = [&](std::vector> child_cols) { - cudf::test::structs_column_wrapper struct_col(std::move(child_cols)); - return struct_col.release(); - }; - - std::vector> s2_children; - s2_children.push_back(std::move(cols[0])); - s2_children.push_back(std::move(cols[1])); - auto s2 = make_struct(std::move(s2_children)); + std::vector> child_cols = std::move(cols); + // Lets add some layers + for (int i = 0; i < depth; i++) { + std::vector struct_validity; + std::uniform_int_distribution bool_distribution(0, 10 * (i + 1)); + std::generate_n( + std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); }); + cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity); + child_cols = std::vector>{}; + child_cols.push_back(struct_col.release()); + } - cudf::test::print(s2->view()); + cudf::test::print(child_cols[0]->view()); // // Create table view // auto input = cudf::table_view({struct_col}); - auto input = cudf::table_view({s2->view()}); + auto input = cudf::table(std::move(child_cols)); - auto result1 = cudf::sorted_order(input); + auto sliced_input = cudf::slice(input, {7, input.num_rows() - 12}); + + auto result1 = cudf::sorted_order(sliced_input); cudf::test::print(result1->view()); - auto result2 = cudf::detail::sorted_order2(input); + auto result2 = cudf::detail::sorted_order2(sliced_input); cudf::test::print(result2->view()); cudf::test::expect_columns_equal(result1->view(), result2->view()); } @@ -188,6 +188,14 @@ TEST_F(NewRowOpTest, SampleStructTest) auto result2 = cudf::detail::sorted_order2(input); cudf::test::print(result2->view()); cudf::test::expect_columns_equal(result1->view(), result2->view()); + + std::vector col_order = {cudf::order::DESCENDING, cudf::order::ASCENDING}; + std::vector null_order = {cudf::null_order::BEFORE, cudf::null_order::AFTER}; + result1 = cudf::sorted_order(input, col_order, null_order); + result2 = cudf::detail::sorted_order2(input, col_order, null_order); + cudf::test::print(result1->view()); + cudf::test::print(result2->view()); + cudf::test::expect_columns_equal(result1->view(), result2->view()); } CUDF_TEST_PROGRAM_MAIN() From ff36d2dbe3e535625ee011c2ff8656e7a78b0bed Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Sat, 29 Jan 2022 04:37:46 +0530 Subject: [PATCH 09/78] Manually managed stack --- cpp/include/cudf/table/row_operator2.cuh | 79 ++++++++++++++++++++---- 1 file changed, 68 insertions(+), 11 deletions(-) diff --git a/cpp/include/cudf/table/row_operator2.cuh b/cpp/include/cudf/table/row_operator2.cuh index 9edb60cc112..1288cef70a9 100644 --- a/cpp/include/cudf/table/row_operator2.cuh +++ b/cpp/include/cudf/table/row_operator2.cuh @@ -319,6 +319,35 @@ class element_relational_comparator2 { null_order null_precedence; }; +template +struct device_stack { + __device__ device_stack(T* stack_storage, int capacity) + : stack(stack_storage), capacity(capacity), size(0) + { + } + __device__ void push(T const& val) + { + cudf_assert(size < capacity and "Stack overflow"); + stack[size++] = val; + } + __device__ T pop() + { + cudf_assert(size > 0 and "Stack underflow"); + return stack[--size]; + } + __device__ T top() + { + cudf_assert(size > 0 and "Stack underflow"); + return stack[size - 1]; + } + __device__ bool empty() { return size == 0; } + + private: + T* stack; + int capacity; + int size; +}; + /** * @brief Computes whether one row is lexicographically *less* than another row. * @@ -376,31 +405,59 @@ class row_lexicographic_comparator2 { */ __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept { + using stack_value_type = + thrust::tuple; + stack_value_type stack_storage[10]; + for (size_type i = 0; i < _lhs.num_columns(); ++i) { + device_stack stack(stack_storage, 9); bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); weak_ordering2 state{weak_ordering2::EQUIVALENT}; null_order null_precedence = _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; - column_device_view lcol = _lhs.column(i); - column_device_view rcol = _rhs.column(i); - while (lcol.type().id() == type_id::STRUCT) { - bool const lhs_is_null{lcol.is_null(lhs_index)}; - bool const rhs_is_null{rcol.is_null(rhs_index)}; + column_device_view const* lcol = _lhs.begin() + i; + column_device_view const* rcol = _rhs.begin() + i; + size_t curr_child = 0; + + while (true) { + bool const lhs_is_null{lcol->is_null(lhs_index)}; + bool const rhs_is_null{rcol->is_null(rhs_index)}; if (lhs_is_null or rhs_is_null) { // atleast one is null state = null_compare2(lhs_is_null, rhs_is_null, null_precedence); if (state != weak_ordering2::EQUIVALENT) break; + } else if (lcol->type().id() != type_id::STRUCT) { + auto comparator = + element_relational_comparator2{*lcol, *rcol, null_precedence}; + state = cudf::type_dispatcher(lcol->type(), comparator, lhs_index, rhs_index); + if (state != weak_ordering2::EQUIVALENT) break; } - lcol = lcol.children()[0]; - rcol = rcol.children()[0]; - } + // Reaching here means the nullability was same and we need to continue comparing + if (lcol->type().id() == type_id::STRUCT) { + stack.push({lcol, rcol, 0}); + } else { + // unwind stack until we reach a struct level with children still left to compare + bool completed_comparison = false; + do { + if (stack.empty()) { + completed_comparison = true; + break; + } + thrust::tie(lcol, rcol, curr_child) = stack.pop(); + } while (lcol->num_child_columns() <= curr_child + 1); + if (completed_comparison) { break; } + stack.push({lcol, rcol, curr_child + 1}); + // break; + } + + // The top of the stack now is where we have to continue comparing from + thrust::tie(lcol, rcol, curr_child) = stack.top(); - if (state == weak_ordering2::EQUIVALENT) { - auto comparator = element_relational_comparator2{lcol, rcol, null_precedence}; - state = cudf::type_dispatcher(lcol.type(), comparator, lhs_index, rhs_index); + lcol = &lcol->children()[curr_child]; + rcol = &rcol->children()[curr_child]; } if (state == weak_ordering2::EQUIVALENT) { continue; } From cd0f93885efb17f643e269e4d73e9f52c216b30c Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 3 Feb 2022 01:01:19 +0530 Subject: [PATCH 10/78] New depth based method to avoid superimpose nulls sliced no longer works --- cpp/include/cudf/detail/structs/utilities.hpp | 7 +-- cpp/include/cudf/sort2.cuh | 6 ++- cpp/include/cudf/table/row_operator3.cuh | 19 +++++++- cpp/src/structs/utilities.cpp | 34 +++++++++----- cpp/tests/sort/sort2_test.cu | 45 +++++++++++++------ 5 files changed, 80 insertions(+), 31 deletions(-) diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index 1f29a247121..6b3a297cbaa 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -151,9 +151,10 @@ flattened_table flatten_nested_columns( column_nullability nullability = column_nullability::MATCH_INCOMING); namespace experimental { -flattened_table verticalize_nested_columns(table_view input, - std::vector const& column_order, - std::vector const& null_precedence); +std::tuple> verticalize_nested_columns( + table_view input, + std::vector const& column_order, + std::vector const& null_precedence); } /** diff --git a/cpp/include/cudf/sort2.cuh b/cpp/include/cudf/sort2.cuh index 6710615aa35..e612a5340d9 100644 --- a/cpp/include/cudf/sort2.cuh +++ b/cpp/include/cudf/sort2.cuh @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -51,6 +52,7 @@ std::unique_ptr sorted_order2( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { + CUDF_FUNC_RANGE(); if (input.num_rows() == 0 or input.num_columns() == 0) { return cudf::make_numeric_column(data_type(type_to_id()), 0); } @@ -63,16 +65,18 @@ std::unique_ptr sorted_order2( mutable_indices_view.end(), 0); - auto verticalized = cudf::structs::detail::experimental::verticalize_nested_columns( + auto [verticalized, depths] = cudf::structs::detail::experimental::verticalize_nested_columns( input, column_order, null_precedence); auto device_table = table_device_view::create(verticalized.flattened_columns(), stream); auto const d_column_order = make_device_uvector_async(verticalized.orders(), stream); auto const d_null_precedence = make_device_uvector_async(verticalized.null_orders(), stream); + auto const d_depths = make_device_uvector_async(depths, stream); // auto const comparator = row_lexicographic_comparator2(*device_table, *device_table); auto const comparator = experimental::row_lexicographic_comparator(nullate::DYNAMIC{true}, *device_table, *device_table, + d_depths.data(), d_column_order.data(), d_null_precedence.data()); diff --git a/cpp/include/cudf/table/row_operator3.cuh b/cpp/include/cudf/table/row_operator3.cuh index 47ab65ff0e1..6f40ff0c414 100644 --- a/cpp/include/cudf/table/row_operator3.cuh +++ b/cpp/include/cudf/table/row_operator3.cuh @@ -73,11 +73,13 @@ class row_lexicographic_comparator { row_lexicographic_comparator(Nullate has_nulls, table_device_view lhs, table_device_view rhs, + int const* depth = nullptr, order const* column_order = nullptr, null_order const* null_precedence = nullptr) : _lhs{lhs}, _rhs{rhs}, _nulls{has_nulls}, + _depth{depth}, _column_order{column_order}, _null_precedence{null_precedence} { @@ -97,7 +99,15 @@ class row_lexicographic_comparator { */ __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept { + int last_null_depth = std::numeric_limits::max(); for (size_type i = 0; i < _lhs.num_columns(); ++i) { + if (_depth[i] > last_null_depth) { + continue; + } else { + last_null_depth = std::numeric_limits::max(); + } + + bool continue_to_next_col = false; bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); weak_ordering state{weak_ordering::EQUIVALENT}; @@ -106,19 +116,25 @@ class row_lexicographic_comparator { column_device_view lcol = _lhs.column(i); column_device_view rcol = _rhs.column(i); + int depth = _depth[i]; while (lcol.type().id() == type_id::STRUCT) { bool const lhs_is_null{lcol.is_null(lhs_index)}; bool const rhs_is_null{rcol.is_null(rhs_index)}; if (lhs_is_null or rhs_is_null) { // atleast one is null state = null_compare(lhs_is_null, rhs_is_null, null_precedence); - if (state != weak_ordering::EQUIVALENT) break; + if (state == weak_ordering::EQUIVALENT) { continue_to_next_col = true; } + last_null_depth = depth; + break; } lcol = lcol.children()[0]; rcol = rcol.children()[0]; + ++depth; } + if (continue_to_next_col) { continue; } + if (state == weak_ordering::EQUIVALENT) { auto comparator = element_relational_comparator{_nulls, lcol, rcol, null_precedence}; state = cudf::type_dispatcher(lcol.type(), comparator, lhs_index, rhs_index); @@ -137,6 +153,7 @@ class row_lexicographic_comparator { Nullate _nulls{}; null_order const* _null_precedence{}; order const* _column_order{}; + int const* _depth; }; // class row_lexicographic_comparator } // namespace experimental diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 42c167466c1..3ad9917db56 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -210,30 +210,37 @@ flattened_table flatten_nested_columns(table_view const& input, namespace experimental { -flattened_table verticalize_nested_columns(table_view input, - std::vector const& column_order, - std::vector const& null_precedence) +std::tuple> verticalize_nested_columns( + table_view input, + std::vector const& column_order, + std::vector const& null_precedence) { - auto [table, null_masks] = superimpose_parent_nulls(input); + // auto [table, null_masks] = superimpose_parent_nulls(input); + auto table = input; std::vector verticalized_columns; std::vector new_column_order; std::vector new_null_precedence; + std::vector verticalized_col_depths; for (size_type col_idx = 0; col_idx < table.num_columns(); ++col_idx) { auto const& col = table.column(col_idx); if (is_nested(col.type())) { // convert and insert std::vector r_verticalized_columns; + std::vector r_verticalized_col_depths; std::vector flattened; + std::vector depths; // TODO: Here I added a bogus leaf column at the beginning to help in the while loop below. // Refactor the while loop so that it can handle the last case. flattened.push_back(make_empty_column(type_id::INT32)->view()); - std::function recursive_child = [&](column_view const& c) { + std::function recursive_child = [&](column_view const& c, + int depth) { flattened.push_back(c); + depths.push_back(depth); for (int child_idx = 0; child_idx < c.num_children(); ++child_idx) { - recursive_child(c.child(child_idx)); + recursive_child(c.child(child_idx), depth + 1); } }; - recursive_child(col); + recursive_child(col, 0); int curr_col_idx = flattened.size() - 1; column_view curr_col = flattened[curr_col_idx]; while (curr_col_idx > 0) { @@ -241,6 +248,7 @@ flattened_table verticalize_nested_columns(table_view input, if (not is_nested(prev_col.type())) { // We hit a column that's a leaf so seal this hierarchy r_verticalized_columns.push_back(curr_col); + r_verticalized_col_depths.push_back(depths[curr_col_idx - 1]); curr_col = prev_col; } else { curr_col = column_view(prev_col.type(), @@ -255,6 +263,9 @@ flattened_table verticalize_nested_columns(table_view input, } verticalized_columns.insert( verticalized_columns.end(), r_verticalized_columns.rbegin(), r_verticalized_columns.rend()); + verticalized_col_depths.insert(verticalized_col_depths.end(), + r_verticalized_col_depths.rbegin(), + r_verticalized_col_depths.rend()); if (not column_order.empty()) { new_column_order.insert( new_column_order.end(), r_verticalized_columns.size(), column_order[col_idx]); @@ -267,11 +278,10 @@ flattened_table verticalize_nested_columns(table_view input, verticalized_columns.push_back(col); } } - return flattened_table(table_view(verticalized_columns), - new_column_order, - new_null_precedence, - {}, - std::move(null_masks)); + return std::make_tuple( + flattened_table( + table_view(verticalized_columns), new_column_order, new_null_precedence, {}, {}), + std::move(verticalized_col_depths)); } } // namespace experimental diff --git a/cpp/tests/sort/sort2_test.cu b/cpp/tests/sort/sort2_test.cu index 98bd28fb181..56ef562be39 100644 --- a/cpp/tests/sort/sort2_test.cu +++ b/cpp/tests/sort/sort2_test.cu @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -26,7 +27,7 @@ TEST_F(NewRowOpTest, BasicStructTwoChild) std::default_random_engine generator; std::uniform_int_distribution distribution(0, 100); - const cudf::size_type n_rows{1 << 2}; + const cudf::size_type n_rows{1 << 4}; const cudf::size_type n_cols{2}; // Create columns with values in the range [0,100) @@ -35,7 +36,9 @@ TEST_F(NewRowOpTest, BasicStructTwoChild) std::generate_n(std::back_inserter(columns), n_cols, [&]() { auto elements = cudf::detail::make_counting_transform_iterator( 0, [&](auto row) { return distribution(generator); }); - return column_wrapper(elements, elements + n_rows); + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 4 == 0 ? false : true; }); + return column_wrapper(elements, elements + n_rows, valids); }); std::vector> cols; @@ -43,15 +46,25 @@ TEST_F(NewRowOpTest, BasicStructTwoChild) return col.release(); }); - auto make_struct = [&](std::vector> child_cols) { + auto make_struct = [&](std::vector> child_cols, int nullfreq) { + // std::vector struct_validity; + std::uniform_int_distribution bool_distribution(0, 10 * (nullfreq)); + // std::generate_n( + // std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); + // }); + auto null_iter = cudf::detail::make_counting_transform_iterator( + 0, [&](int i) { return bool_distribution(generator); }); + cudf::test::structs_column_wrapper struct_col(std::move(child_cols)); - return struct_col.release(); + auto struct_ = struct_col.release(); + struct_->set_null_mask(cudf::test::detail::make_null_mask(null_iter, null_iter + n_rows)); + return struct_; }; std::vector> s2_children; s2_children.push_back(std::move(cols[0])); s2_children.push_back(std::move(cols[1])); - auto s2 = make_struct(std::move(s2_children)); + auto s2 = make_struct(std::move(s2_children), 1); cudf::test::print(s2->view()); @@ -111,11 +124,11 @@ TEST_F(NewRowOpTest, DeepStruct) // auto input = cudf::table_view({struct_col}); auto input = cudf::table(std::move(child_cols)); - auto sliced_input = cudf::slice(input, {7, input.num_rows() - 12}); + // auto sliced_input = cudf::slice(input, {7, input.num_rows() - 12}); - auto result1 = cudf::sorted_order(sliced_input); + auto result1 = cudf::sorted_order(input); cudf::test::print(result1->view()); - auto result2 = cudf::detail::sorted_order2(sliced_input); + auto result2 = cudf::detail::sorted_order2(input); cudf::test::print(result2->view()); cudf::test::expect_columns_equal(result1->view(), result2->view()); } @@ -136,8 +149,9 @@ TEST_F(NewRowOpTest, SampleStructTest) std::generate_n(std::back_inserter(columns), n_cols, [&]() { auto elements = cudf::detail::make_counting_transform_iterator( 0, [&](auto row) { return distribution(generator); }); + int start = distribution(generator); auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 7 == 0 ? false : true; }); + 0, [&](auto i) { return (i + start) % 7 == 0 ? false : true; }); return column_wrapper(elements, elements + n_rows, valids); }); @@ -149,10 +163,13 @@ TEST_F(NewRowOpTest, SampleStructTest) auto make_struct = [&](std::vector> child_cols, int nullfreq) { std::vector struct_validity; std::uniform_int_distribution bool_distribution(0, 10 * (nullfreq)); - std::generate_n( - std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); }); - cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity); - return struct_col.release(); + auto null_iter = cudf::detail::make_counting_transform_iterator( + 0, [&](int i) { return bool_distribution(generator); }); + + cudf::test::structs_column_wrapper struct_col(std::move(child_cols)); + auto struct_ = struct_col.release(); + struct_->set_null_mask(cudf::test::detail::make_null_mask(null_iter, null_iter + n_rows)); + return struct_; }; std::vector> s2_children; @@ -177,7 +194,7 @@ TEST_F(NewRowOpTest, SampleStructTest) s12_children.push_back(std::move(s22)); auto s12 = make_struct(std::move(s12_children), 2); - cudf::test::print(s1->view()); + cudf::test::print(s12->view()); // // Create table view // auto input = cudf::table_view({struct_col}); From 7b8e060ab1bd646408154882c349de1b0f2502b0 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 3 Feb 2022 01:06:06 +0530 Subject: [PATCH 11/78] Put sort2 impl in separate TU --- cpp/CMakeLists.txt | 1 + .../compare/comparator_benchmark.cu | 2 +- cpp/include/cudf/sort2.cuh | 43 +-------- cpp/src/sort/sort2.cu | 93 +++++++++++++++++++ cpp/tests/sort/sort2_test.cu | 8 +- 5 files changed, 102 insertions(+), 45 deletions(-) create mode 100644 cpp/src/sort/sort2.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4db9f6de4d5..50d664a0661 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -391,6 +391,7 @@ add_library( src/sort/segmented_sort.cu src/sort/sort_column.cu src/sort/sort.cu + src/sort/sort2.cu src/sort/stable_sort_column.cu src/sort/stable_sort.cu src/stream_compaction/apply_boolean_mask.cu diff --git a/cpp/benchmarks/compare/comparator_benchmark.cu b/cpp/benchmarks/compare/comparator_benchmark.cu index e4f690102b0..8c9accc0a00 100644 --- a/cpp/benchmarks/compare/comparator_benchmark.cu +++ b/cpp/benchmarks/compare/comparator_benchmark.cu @@ -81,7 +81,7 @@ static void BM_sort(benchmark::State& state, bool nulls) cuda_event_timer raii(state, true, rmm::cuda_stream_default); // auto result = cudf::sorted_order(input); - auto result = cudf::detail::sorted_order2(input); + auto result = cudf::detail::experimental::sorted_order2(input); } } diff --git a/cpp/include/cudf/sort2.cuh b/cpp/include/cudf/sort2.cuh index e612a5340d9..f9feab8246b 100644 --- a/cpp/include/cudf/sort2.cuh +++ b/cpp/include/cudf/sort2.cuh @@ -37,6 +37,7 @@ namespace cudf { namespace detail { +namespace experimental { /** * @copydoc @@ -44,51 +45,13 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches */ -template std::unique_ptr sorted_order2( table_view input, std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - CUDF_FUNC_RANGE(); - if (input.num_rows() == 0 or input.num_columns() == 0) { - return cudf::make_numeric_column(data_type(type_to_id()), 0); - } - - std::unique_ptr sorted_indices = cudf::make_numeric_column( - data_type(type_to_id()), input.num_rows(), mask_state::UNALLOCATED, stream, mr); - mutable_column_view mutable_indices_view = sorted_indices->mutable_view(); - thrust::sequence(rmm::exec_policy(stream), - mutable_indices_view.begin(), - mutable_indices_view.end(), - 0); - - auto [verticalized, depths] = cudf::structs::detail::experimental::verticalize_nested_columns( - input, column_order, null_precedence); - auto device_table = table_device_view::create(verticalized.flattened_columns(), stream); - auto const d_column_order = make_device_uvector_async(verticalized.orders(), stream); - auto const d_null_precedence = make_device_uvector_async(verticalized.null_orders(), stream); - auto const d_depths = make_device_uvector_async(depths, stream); - - // auto const comparator = row_lexicographic_comparator2(*device_table, *device_table); - auto const comparator = experimental::row_lexicographic_comparator(nullate::DYNAMIC{true}, - *device_table, - *device_table, - d_depths.data(), - d_column_order.data(), - d_null_precedence.data()); - - thrust::sort(rmm::exec_policy(stream), - mutable_indices_view.begin(), - mutable_indices_view.end(), - comparator); - // protection for temporary d_column_order and d_null_precedence - stream.synchronize(); - - return sorted_indices; -} + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +} // namespace experimental } // namespace detail } // namespace cudf diff --git a/cpp/src/sort/sort2.cu b/cpp/src/sort/sort2.cu new file mode 100644 index 00000000000..81deb1789fa --- /dev/null +++ b/cpp/src/sort/sort2.cu @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +namespace cudf { +namespace detail { +namespace experimental { + +/** + * @copydoc + * sorted_order(table_view&,std::vector,std::vector,rmm::mr::device_memory_resource*) + * + * @param stream CUDA stream used for device memory operations and kernel launches + */ +std::unique_ptr sorted_order2(table_view input, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + if (input.num_rows() == 0 or input.num_columns() == 0) { + return cudf::make_numeric_column(data_type(type_to_id()), 0); + } + + std::unique_ptr sorted_indices = cudf::make_numeric_column( + data_type(type_to_id()), input.num_rows(), mask_state::UNALLOCATED, stream, mr); + mutable_column_view mutable_indices_view = sorted_indices->mutable_view(); + thrust::sequence(rmm::exec_policy(stream), + mutable_indices_view.begin(), + mutable_indices_view.end(), + 0); + + auto [verticalized, depths] = cudf::structs::detail::experimental::verticalize_nested_columns( + input, column_order, null_precedence); + auto device_table = table_device_view::create(verticalized.flattened_columns(), stream); + auto const d_column_order = make_device_uvector_async(verticalized.orders(), stream); + auto const d_null_precedence = make_device_uvector_async(verticalized.null_orders(), stream); + auto const d_depths = make_device_uvector_async(depths, stream); + + // auto const comparator = row_lexicographic_comparator2(*device_table, *device_table); + auto const comparator = + cudf::experimental::row_lexicographic_comparator(nullate::DYNAMIC{true}, + *device_table, + *device_table, + d_depths.data(), + d_column_order.data(), + d_null_precedence.data()); + + thrust::sort(rmm::exec_policy(stream), + mutable_indices_view.begin(), + mutable_indices_view.end(), + comparator); + // protection for temporary d_column_order and d_null_precedence + stream.synchronize(); + + return sorted_indices; +} + +} // namespace experimental +} // namespace detail +} // namespace cudf diff --git a/cpp/tests/sort/sort2_test.cu b/cpp/tests/sort/sort2_test.cu index 56ef562be39..2c173e0a4e2 100644 --- a/cpp/tests/sort/sort2_test.cu +++ b/cpp/tests/sort/sort2_test.cu @@ -74,7 +74,7 @@ TEST_F(NewRowOpTest, BasicStructTwoChild) auto result1 = cudf::sorted_order(input); cudf::test::print(result1->view()); - auto result2 = cudf::detail::sorted_order2(input); + auto result2 = cudf::detail::experimental::sorted_order2(input); cudf::test::print(result2->view()); cudf::test::expect_columns_equal(result1->view(), result2->view()); } @@ -128,7 +128,7 @@ TEST_F(NewRowOpTest, DeepStruct) auto result1 = cudf::sorted_order(input); cudf::test::print(result1->view()); - auto result2 = cudf::detail::sorted_order2(input); + auto result2 = cudf::detail::experimental::sorted_order2(input); cudf::test::print(result2->view()); cudf::test::expect_columns_equal(result1->view(), result2->view()); } @@ -202,14 +202,14 @@ TEST_F(NewRowOpTest, SampleStructTest) auto result1 = cudf::sorted_order(input); cudf::test::print(result1->view()); - auto result2 = cudf::detail::sorted_order2(input); + auto result2 = cudf::detail::experimental::sorted_order2(input); cudf::test::print(result2->view()); cudf::test::expect_columns_equal(result1->view(), result2->view()); std::vector col_order = {cudf::order::DESCENDING, cudf::order::ASCENDING}; std::vector null_order = {cudf::null_order::BEFORE, cudf::null_order::AFTER}; result1 = cudf::sorted_order(input, col_order, null_order); - result2 = cudf::detail::sorted_order2(input, col_order, null_order); + result2 = cudf::detail::experimental::sorted_order2(input, col_order, null_order); cudf::test::print(result1->view()); cudf::test::print(result2->view()); cudf::test::expect_columns_equal(result1->view(), result2->view()); From d55c9c7e0b3b410ec2008046b282b1eca671ab34 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 16 Feb 2022 03:13:51 +0530 Subject: [PATCH 12/78] Move verticalization code to row_comparator.cpp --- cpp/CMakeLists.txt | 1 + cpp/src/structs/utilities.cpp | 78 ------------------------ cpp/src/table/row_operators.cpp | 101 ++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 78 deletions(-) create mode 100644 cpp/src/table/row_operators.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9844dfb758f..808d7730d2a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -459,6 +459,7 @@ add_library( src/structs/structs_column_factories.cu src/structs/structs_column_view.cpp src/structs/utilities.cpp + src/table/row_operators.cpp src/table/table.cpp src/table/table_device_view.cu src/table/table_view.cpp diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 3ad9917db56..afea8a55b16 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -208,84 +208,6 @@ flattened_table flatten_nested_columns(table_view const& input, return table_flattener{input, column_order, null_precedence, nullability}(); } -namespace experimental { - -std::tuple> verticalize_nested_columns( - table_view input, - std::vector const& column_order, - std::vector const& null_precedence) -{ - // auto [table, null_masks] = superimpose_parent_nulls(input); - auto table = input; - std::vector verticalized_columns; - std::vector new_column_order; - std::vector new_null_precedence; - std::vector verticalized_col_depths; - for (size_type col_idx = 0; col_idx < table.num_columns(); ++col_idx) { - auto const& col = table.column(col_idx); - if (is_nested(col.type())) { - // convert and insert - std::vector r_verticalized_columns; - std::vector r_verticalized_col_depths; - std::vector flattened; - std::vector depths; - // TODO: Here I added a bogus leaf column at the beginning to help in the while loop below. - // Refactor the while loop so that it can handle the last case. - flattened.push_back(make_empty_column(type_id::INT32)->view()); - std::function recursive_child = [&](column_view const& c, - int depth) { - flattened.push_back(c); - depths.push_back(depth); - for (int child_idx = 0; child_idx < c.num_children(); ++child_idx) { - recursive_child(c.child(child_idx), depth + 1); - } - }; - recursive_child(col, 0); - int curr_col_idx = flattened.size() - 1; - column_view curr_col = flattened[curr_col_idx]; - while (curr_col_idx > 0) { - auto const& prev_col = flattened[curr_col_idx - 1]; - if (not is_nested(prev_col.type())) { - // We hit a column that's a leaf so seal this hierarchy - r_verticalized_columns.push_back(curr_col); - r_verticalized_col_depths.push_back(depths[curr_col_idx - 1]); - curr_col = prev_col; - } else { - curr_col = column_view(prev_col.type(), - prev_col.size(), - nullptr, - prev_col.null_mask(), - UNKNOWN_NULL_COUNT, - prev_col.offset(), - {curr_col}); - } - --curr_col_idx; - } - verticalized_columns.insert( - verticalized_columns.end(), r_verticalized_columns.rbegin(), r_verticalized_columns.rend()); - verticalized_col_depths.insert(verticalized_col_depths.end(), - r_verticalized_col_depths.rbegin(), - r_verticalized_col_depths.rend()); - if (not column_order.empty()) { - new_column_order.insert( - new_column_order.end(), r_verticalized_columns.size(), column_order[col_idx]); - } - if (not null_precedence.empty()) { - new_null_precedence.insert( - new_null_precedence.end(), r_verticalized_columns.size(), null_precedence[col_idx]); - } - } else { - verticalized_columns.push_back(col); - } - } - return std::make_tuple( - flattened_table( - table_view(verticalized_columns), new_column_order, new_null_precedence, {}, {}), - std::move(verticalized_col_depths)); -} - -} // namespace experimental - namespace { using vector_of_columns = std::vector>; using column_index_t = typename vector_of_columns::size_type; diff --git a/cpp/src/table/row_operators.cpp b/cpp/src/table/row_operators.cpp new file mode 100644 index 00000000000..32827372c26 --- /dev/null +++ b/cpp/src/table/row_operators.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace cudf { +namespace structs::detail::experimental { + +std::tuple> verticalize_nested_columns( + table_view input, + std::vector const& column_order, + std::vector const& null_precedence) +{ + // auto [table, null_masks] = superimpose_parent_nulls(input); + + auto table = input; + std::vector verticalized_columns; + std::vector new_column_order; + std::vector new_null_precedence; + std::vector verticalized_col_depths; + for (size_type col_idx = 0; col_idx < table.num_columns(); ++col_idx) { + auto const& col = table.column(col_idx); + if (is_nested(col.type())) { + // convert and insert + std::vector r_verticalized_columns; + std::vector r_verticalized_col_depths; + std::vector flattened; + std::vector depths; + // TODO: Here I added a bogus leaf column at the beginning to help in the while loop below. + // Refactor the while loop so that it can handle the last case. + flattened.push_back(make_empty_column(type_id::INT32)->view()); + std::function recursive_child = [&](column_view const& c, + int depth) { + flattened.push_back(c); + depths.push_back(depth); + for (int child_idx = 0; child_idx < c.num_children(); ++child_idx) { + recursive_child(c.child(child_idx), depth + 1); + } + }; + recursive_child(col, 0); + int curr_col_idx = flattened.size() - 1; + column_view curr_col = flattened[curr_col_idx]; + while (curr_col_idx > 0) { + auto const& prev_col = flattened[curr_col_idx - 1]; + if (not is_nested(prev_col.type())) { + // We hit a column that's a leaf so seal this hierarchy + r_verticalized_columns.push_back(curr_col); + r_verticalized_col_depths.push_back(depths[curr_col_idx - 1]); + curr_col = prev_col; + } else { + curr_col = column_view(prev_col.type(), + prev_col.size(), + nullptr, + prev_col.null_mask(), + UNKNOWN_NULL_COUNT, + prev_col.offset(), + {curr_col}); + } + --curr_col_idx; + } + verticalized_columns.insert( + verticalized_columns.end(), r_verticalized_columns.rbegin(), r_verticalized_columns.rend()); + verticalized_col_depths.insert(verticalized_col_depths.end(), + r_verticalized_col_depths.rbegin(), + r_verticalized_col_depths.rend()); + if (not column_order.empty()) { + new_column_order.insert( + new_column_order.end(), r_verticalized_columns.size(), column_order[col_idx]); + } + if (not null_precedence.empty()) { + new_null_precedence.insert( + new_null_precedence.end(), r_verticalized_columns.size(), null_precedence[col_idx]); + } + } else { + verticalized_columns.push_back(col); + } + } + return std::make_tuple( + structs::detail::flattened_table( + table_view(verticalized_columns), new_column_order, new_null_precedence, {}, {}), + std::move(verticalized_col_depths)); +} + +} // namespace structs::detail::experimental +} // namespace cudf From 3bd749ed8e91b5f1e19ed60cf0c75f01718ce9e4 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Tue, 22 Feb 2022 13:18:07 +0530 Subject: [PATCH 13/78] Owning row lex operator --- cpp/CMakeLists.txt | 2 +- cpp/include/cudf/table/row_operator3.cuh | 43 +++++++++++++++++ cpp/src/sort/sort2.cu | 19 ++------ .../{row_operators.cpp => row_operators.cu} | 46 +++++++++++++++---- 4 files changed, 83 insertions(+), 27 deletions(-) rename cpp/src/table/{row_operators.cpp => row_operators.cu} (66%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 808d7730d2a..5878456721f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -459,7 +459,7 @@ add_library( src/structs/structs_column_factories.cu src/structs/structs_column_view.cpp src/structs/utilities.cpp - src/table/row_operators.cpp + src/table/row_operators.cu src/table/table.cpp src/table/table_device_view.cu src/table/table_view.cpp diff --git a/cpp/include/cudf/table/row_operator3.cuh b/cpp/include/cudf/table/row_operator3.cuh index 6f40ff0c414..1937b6c4928 100644 --- a/cpp/include/cudf/table/row_operator3.cuh +++ b/cpp/include/cudf/table/row_operator3.cuh @@ -156,5 +156,48 @@ class row_lexicographic_comparator { int const* _depth; }; // class row_lexicographic_comparator +struct row_lex_operator { + // Problems I see here: + // 1. What if lhs == rhs. We're doing duplicate work then. We didn't need to before. + // Possible to have a table_view::operator==() so we can figure out internally + row_lex_operator(table_view const& lhs, + table_view const& rhs, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::cuda_stream_view stream); + + template + row_lexicographic_comparator device_comparator() + { + // TODO: checks, if constexpr dynamic then based on + if constexpr (std::is_same_v) { + return row_lexicographic_comparator(Nullate{any_nulls}, + **d_lhs, + **d_rhs, + d_depths.data(), + d_column_order.data(), + d_null_precedence.data()); + } else { + return row_lexicographic_comparator(Nullate{}, + **d_lhs, + **d_rhs, + d_depths.data(), + d_column_order.data(), + d_null_precedence.data()); + } + } + + private: + using table_device_view_owner = + std::invoke_result_t; + + std::unique_ptr d_lhs; + std::unique_ptr d_rhs; + rmm::device_uvector d_column_order; + rmm::device_uvector d_null_precedence; + rmm::device_uvector d_depths; + bool any_nulls; +}; + } // namespace experimental } // namespace cudf diff --git a/cpp/src/sort/sort2.cu b/cpp/src/sort/sort2.cu index 81deb1789fa..1ed312d4987 100644 --- a/cpp/src/sort/sort2.cu +++ b/cpp/src/sort/sort2.cu @@ -62,26 +62,13 @@ std::unique_ptr sorted_order2(table_view input, mutable_indices_view.end(), 0); - auto [verticalized, depths] = cudf::structs::detail::experimental::verticalize_nested_columns( - input, column_order, null_precedence); - auto device_table = table_device_view::create(verticalized.flattened_columns(), stream); - auto const d_column_order = make_device_uvector_async(verticalized.orders(), stream); - auto const d_null_precedence = make_device_uvector_async(verticalized.null_orders(), stream); - auto const d_depths = make_device_uvector_async(depths, stream); - - // auto const comparator = row_lexicographic_comparator2(*device_table, *device_table); - auto const comparator = - cudf::experimental::row_lexicographic_comparator(nullate::DYNAMIC{true}, - *device_table, - *device_table, - d_depths.data(), - d_column_order.data(), - d_null_precedence.data()); + auto comp = + cudf::experimental::row_lex_operator(input, input, column_order, null_precedence, stream); thrust::sort(rmm::exec_policy(stream), mutable_indices_view.begin(), mutable_indices_view.end(), - comparator); + comp.device_comparator()); // protection for temporary d_column_order and d_null_precedence stream.synchronize(); diff --git a/cpp/src/table/row_operators.cpp b/cpp/src/table/row_operators.cu similarity index 66% rename from cpp/src/table/row_operators.cpp rename to cpp/src/table/row_operators.cu index 32827372c26..8ade2338296 100644 --- a/cpp/src/table/row_operators.cpp +++ b/cpp/src/table/row_operators.cu @@ -17,15 +17,16 @@ #include #include #include +#include +#include #include namespace cudf { -namespace structs::detail::experimental { +namespace experimental { -std::tuple> verticalize_nested_columns( - table_view input, - std::vector const& column_order, - std::vector const& null_precedence) +auto struct_lex_verticalize(table_view input, + std::vector const& column_order = {}, + std::vector const& null_precedence = {}) { // auto [table, null_masks] = superimpose_parent_nulls(input); @@ -91,11 +92,36 @@ std::tuple> verticalize_neste verticalized_columns.push_back(col); } } - return std::make_tuple( - structs::detail::flattened_table( - table_view(verticalized_columns), new_column_order, new_null_precedence, {}, {}), - std::move(verticalized_col_depths)); + return std::make_tuple(table_view(verticalized_columns), + std::move(new_column_order), + std::move(new_null_precedence), + std::move(verticalized_col_depths)); } -} // namespace structs::detail::experimental +row_lex_operator::row_lex_operator(table_view const& lhs, + table_view const& rhs, + std::vector const& column_order, + std::vector const& null_precedence, + rmm::cuda_stream_view stream) + : d_column_order(0, stream), + d_null_precedence(0, stream), + d_depths(0, stream), + any_nulls(has_nested_nulls(lhs) or has_nested_nulls(rhs)) +{ + auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = + struct_lex_verticalize(lhs, column_order, null_precedence); + table_view verticalized_rhs; + std::tie(verticalized_rhs, std::ignore, std::ignore, std::ignore) = struct_lex_verticalize(rhs); + + d_lhs = + std::make_unique(table_device_view::create(verticalized_lhs, stream)); + d_rhs = + std::make_unique(table_device_view::create(verticalized_rhs, stream)); + + d_column_order = detail::make_device_uvector_async(new_column_order, stream); + d_null_precedence = detail::make_device_uvector_async(new_null_precedence, stream); + d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); +} + +} // namespace experimental } // namespace cudf From 613d6640cb909cf5a6c9c7466f9a888bf6f097bf Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 23 Feb 2022 12:38:19 +0530 Subject: [PATCH 14/78] merge fixes --- cpp/benchmarks/compare/comparator_benchmark.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/compare/comparator_benchmark.cu b/cpp/benchmarks/compare/comparator_benchmark.cu index 8c9accc0a00..13513b55f64 100644 --- a/cpp/benchmarks/compare/comparator_benchmark.cu +++ b/cpp/benchmarks/compare/comparator_benchmark.cu @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include From 2ef3ac7b76be7a9ede8752f1a90a50ca220994f4 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 24 Feb 2022 11:43:06 +0530 Subject: [PATCH 15/78] Move struct logic out of main row loop and into element_relational_comparator --- cpp/include/cudf/table/row_operator3.cuh | 156 +++++++++++++++++++---- 1 file changed, 131 insertions(+), 25 deletions(-) diff --git a/cpp/include/cudf/table/row_operator3.cuh b/cpp/include/cudf/table/row_operator3.cuh index 1937b6c4928..a3b41fcf528 100644 --- a/cpp/include/cudf/table/row_operator3.cuh +++ b/cpp/include/cudf/table/row_operator3.cuh @@ -34,6 +34,128 @@ namespace cudf { namespace experimental { +template +struct non_nested_id_to_type { + using type = std::conditional_t>; +}; + +/** + * @brief Performs a relational comparison between two elements in two columns. + * + * @tparam Nullate A cudf::nullate type describing how to check for nulls. + */ +template +class element_relational_comparator { + public: + /** + * @brief Construct type-dispatched function object for performing a + * relational comparison between two elements. + * + * @note `lhs` and `rhs` may be the same. + * + * @param lhs The column containing the first element + * @param rhs The column containing the second element (may be the same as lhs) + * @param has_nulls Indicates if either input column contains nulls. + * @param null_precedence Indicates how null values are ordered with other values + */ + __host__ __device__ element_relational_comparator(Nullate has_nulls, + column_device_view lhs, + column_device_view rhs, + null_order null_precedence, + int depth = 0) + : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, null_precedence{null_precedence}, depth{depth} + { + } + + __host__ __device__ element_relational_comparator(Nullate has_nulls, + column_device_view lhs, + column_device_view rhs) + : lhs{lhs}, rhs{rhs}, nulls{has_nulls} + { + } + + /** + * @brief Performs a relational comparison between the specified elements + * + * @param lhs_element_index The index of the first element + * @param rhs_element_index The index of the second element + * @return Indicates the relationship between the elements in + * the `lhs` and `rhs` columns. + */ + template ()>* = nullptr> + __device__ weak_ordering operator()(size_type lhs_element_index, + size_type rhs_element_index) const noexcept + { + if (nulls) { + bool const lhs_is_null{lhs.is_null(lhs_element_index)}; + bool const rhs_is_null{rhs.is_null(rhs_element_index)}; + + if (lhs_is_null or rhs_is_null) { // at least one is null + return null_compare(lhs_is_null, rhs_is_null, null_precedence); + } + } + + return relational_compare(lhs.element(lhs_element_index), + rhs.element(rhs_element_index)); + } + + template () and + not std::is_same_v)> + __device__ weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index) + { + cudf_assert(false && "Attempted to compare elements of uncomparable types."); + return weak_ordering::LESS; + } + + template () and + std::is_same_v)> + __device__ thrust::pair operator()(size_type lhs_element_index, + size_type rhs_element_index, + int depth) + { + weak_ordering state{weak_ordering::EQUIVALENT}; + int last_null_depth = std::numeric_limits::max(); + + column_device_view lcol = lhs; + column_device_view rcol = rhs; + while (lcol.type().id() == type_id::STRUCT) { + bool const lhs_is_null{lcol.is_null(lhs_element_index)}; + bool const rhs_is_null{rcol.is_null(rhs_element_index)}; + + if (lhs_is_null or rhs_is_null) { // atleast one is null + state = null_compare(lhs_is_null, rhs_is_null, null_precedence); + last_null_depth = depth; + if (state == weak_ordering::EQUIVALENT) { + return thrust::make_pair(state, last_null_depth); + } + break; + } + + lcol = lcol.children()[0]; + rcol = rcol.children()[0]; + ++depth; + } + + if (state == weak_ordering::EQUIVALENT) { + auto comparator = element_relational_comparator{nulls, lcol, rcol, null_precedence}; + state = cudf::type_dispatcher( + lcol.type(), comparator, lhs_element_index, rhs_element_index); + } + + return thrust::make_pair(state, last_null_depth); + } + + private: + column_device_view lhs; + column_device_view rhs; + Nullate nulls; + null_order null_precedence{}; + int depth{}; +}; + /** * @brief Computes whether one row is lexicographically *less* than another row. * @@ -107,37 +229,21 @@ class row_lexicographic_comparator { last_null_depth = std::numeric_limits::max(); } - bool continue_to_next_col = false; bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); - weak_ordering state{weak_ordering::EQUIVALENT}; null_order null_precedence = _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; - column_device_view lcol = _lhs.column(i); - column_device_view rcol = _rhs.column(i); - int depth = _depth[i]; - while (lcol.type().id() == type_id::STRUCT) { - bool const lhs_is_null{lcol.is_null(lhs_index)}; - bool const rhs_is_null{rcol.is_null(rhs_index)}; - - if (lhs_is_null or rhs_is_null) { // atleast one is null - state = null_compare(lhs_is_null, rhs_is_null, null_precedence); - if (state == weak_ordering::EQUIVALENT) { continue_to_next_col = true; } - last_null_depth = depth; - break; - } - - lcol = lcol.children()[0]; - rcol = rcol.children()[0]; - ++depth; - } - - if (continue_to_next_col) { continue; } + auto comparator = + element_relational_comparator{_nulls, _lhs.column(i), _rhs.column(i), null_precedence}; - if (state == weak_ordering::EQUIVALENT) { - auto comparator = element_relational_comparator{_nulls, lcol, rcol, null_precedence}; - state = cudf::type_dispatcher(lcol.type(), comparator, lhs_index, rhs_index); + weak_ordering state; + if (is_nested(_lhs.column(i).type())) { + thrust::tie(state, last_null_depth) = + comparator.template operator()(lhs_index, rhs_index, _depth[i]); + } else { + state = cudf::type_dispatcher( + _lhs.column(i).type(), comparator, lhs_index, rhs_index); } if (state == weak_ordering::EQUIVALENT) { continue; } From 55774313c2e5d2d9328091132859da2e782fdca6 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 24 Feb 2022 12:49:31 +0530 Subject: [PATCH 16/78] pushing even more logic into element_relational_comparator --- cpp/include/cudf/table/row_operator3.cuh | 38 +++++++++++------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/cpp/include/cudf/table/row_operator3.cuh b/cpp/include/cudf/table/row_operator3.cuh index a3b41fcf528..273f7863dc2 100644 --- a/cpp/include/cudf/table/row_operator3.cuh +++ b/cpp/include/cudf/table/row_operator3.cuh @@ -62,7 +62,7 @@ class element_relational_comparator { column_device_view lhs, column_device_view rhs, null_order null_precedence, - int depth = 0) + int depth = std::numeric_limits::max()) : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, null_precedence{null_precedence}, depth{depth} { } @@ -84,37 +84,38 @@ class element_relational_comparator { */ template ()>* = nullptr> - __device__ weak_ordering operator()(size_type lhs_element_index, - size_type rhs_element_index) const noexcept + __device__ thrust::pair operator()(size_type lhs_element_index, + size_type rhs_element_index) const noexcept { if (nulls) { bool const lhs_is_null{lhs.is_null(lhs_element_index)}; bool const rhs_is_null{rhs.is_null(rhs_element_index)}; if (lhs_is_null or rhs_is_null) { // at least one is null - return null_compare(lhs_is_null, rhs_is_null, null_precedence); + return thrust::make_pair(null_compare(lhs_is_null, rhs_is_null, null_precedence), depth); } } - return relational_compare(lhs.element(lhs_element_index), - rhs.element(rhs_element_index)); + return thrust::make_pair(relational_compare(lhs.element(lhs_element_index), + rhs.element(rhs_element_index)), + std::numeric_limits::max()); } template () and not std::is_same_v)> - __device__ weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index) + __device__ thrust::pair operator()(size_type lhs_element_index, + size_type rhs_element_index) { cudf_assert(false && "Attempted to compare elements of uncomparable types."); - return weak_ordering::LESS; + return thrust::make_pair(weak_ordering::LESS, std::numeric_limits::max()); } template () and std::is_same_v)> __device__ thrust::pair operator()(size_type lhs_element_index, - size_type rhs_element_index, - int depth) + size_type rhs_element_index) { weak_ordering state{weak_ordering::EQUIVALENT}; int last_null_depth = std::numeric_limits::max(); @@ -141,7 +142,7 @@ class element_relational_comparator { if (state == weak_ordering::EQUIVALENT) { auto comparator = element_relational_comparator{nulls, lcol, rcol, null_precedence}; - state = cudf::type_dispatcher( + thrust::tie(state, last_null_depth) = cudf::type_dispatcher( lcol.type(), comparator, lhs_element_index, rhs_element_index); } @@ -153,7 +154,7 @@ class element_relational_comparator { column_device_view rhs; Nullate nulls; null_order null_precedence{}; - int depth{}; + int depth{std::numeric_limits::max()}; }; /** @@ -234,17 +235,12 @@ class row_lexicographic_comparator { null_order null_precedence = _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; - auto comparator = - element_relational_comparator{_nulls, _lhs.column(i), _rhs.column(i), null_precedence}; + auto comparator = element_relational_comparator{ + _nulls, _lhs.column(i), _rhs.column(i), null_precedence, _depth[i]}; weak_ordering state; - if (is_nested(_lhs.column(i).type())) { - thrust::tie(state, last_null_depth) = - comparator.template operator()(lhs_index, rhs_index, _depth[i]); - } else { - state = cudf::type_dispatcher( - _lhs.column(i).type(), comparator, lhs_index, rhs_index); - } + thrust::tie(state, last_null_depth) = + cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index); if (state == weak_ordering::EQUIVALENT) { continue; } From f037bc0150c6818aa8869864cd055cc0078f4730 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 24 Feb 2022 13:29:19 +0530 Subject: [PATCH 17/78] More optimizations. early return and remove unnecessary statements --- cpp/include/cudf/table/row_operator3.cuh | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/cpp/include/cudf/table/row_operator3.cuh b/cpp/include/cudf/table/row_operator3.cuh index 273f7863dc2..70458c0d66e 100644 --- a/cpp/include/cudf/table/row_operator3.cuh +++ b/cpp/include/cudf/table/row_operator3.cuh @@ -118,7 +118,7 @@ class element_relational_comparator { size_type rhs_element_index) { weak_ordering state{weak_ordering::EQUIVALENT}; - int last_null_depth = std::numeric_limits::max(); + int last_null_depth; column_device_view lcol = lhs; column_device_view rcol = rhs; @@ -129,10 +129,7 @@ class element_relational_comparator { if (lhs_is_null or rhs_is_null) { // atleast one is null state = null_compare(lhs_is_null, rhs_is_null, null_precedence); last_null_depth = depth; - if (state == weak_ordering::EQUIVALENT) { - return thrust::make_pair(state, last_null_depth); - } - break; + return thrust::make_pair(state, last_null_depth); } lcol = lcol.children()[0]; @@ -224,11 +221,8 @@ class row_lexicographic_comparator { { int last_null_depth = std::numeric_limits::max(); for (size_type i = 0; i < _lhs.num_columns(); ++i) { - if (_depth[i] > last_null_depth) { - continue; - } else { - last_null_depth = std::numeric_limits::max(); - } + int depth = _depth == nullptr ? std::numeric_limits::max() : _depth[i]; + if (depth > last_null_depth) { continue; } bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); @@ -236,7 +230,7 @@ class row_lexicographic_comparator { _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; auto comparator = element_relational_comparator{ - _nulls, _lhs.column(i), _rhs.column(i), null_precedence, _depth[i]}; + _nulls, _lhs.column(i), _rhs.column(i), null_precedence, depth}; weak_ordering state; thrust::tie(state, last_null_depth) = From 8c54a85214134c45827d9f0246e8ef2198fc46c3 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 24 Feb 2022 15:51:58 +0530 Subject: [PATCH 18/78] review changes --- cpp/include/cudf/table/row_operator3.cuh | 27 +++++++++--------- cpp/src/sort/sort2.cu | 5 ++-- cpp/src/table/row_operators.cu | 35 ++++++++++++++++-------- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/cpp/include/cudf/table/row_operator3.cuh b/cpp/include/cudf/table/row_operator3.cuh index 70458c0d66e..6c2e127b050 100644 --- a/cpp/include/cudf/table/row_operator3.cuh +++ b/cpp/include/cudf/table/row_operator3.cuh @@ -253,33 +253,32 @@ class row_lexicographic_comparator { }; // class row_lexicographic_comparator struct row_lex_operator { - // Problems I see here: - // 1. What if lhs == rhs. We're doing duplicate work then. We didn't need to before. - // Possible to have a table_view::operator==() so we can figure out internally row_lex_operator(table_view const& lhs, table_view const& rhs, - std::vector const& column_order, - std::vector const& null_precedence, + host_span column_order, + host_span null_precedence, + rmm::cuda_stream_view stream); + + row_lex_operator(table_view const& t, + host_span column_order, + host_span null_precedence, rmm::cuda_stream_view stream); template row_lexicographic_comparator device_comparator() { - // TODO: checks, if constexpr dynamic then based on + auto lhs = **d_lhs; + auto rhs = (d_rhs ? **d_rhs : **d_lhs); if constexpr (std::is_same_v) { return row_lexicographic_comparator(Nullate{any_nulls}, - **d_lhs, - **d_rhs, + lhs, + rhs, d_depths.data(), d_column_order.data(), d_null_precedence.data()); } else { - return row_lexicographic_comparator(Nullate{}, - **d_lhs, - **d_rhs, - d_depths.data(), - d_column_order.data(), - d_null_precedence.data()); + return row_lexicographic_comparator( + Nullate{}, lhs, rhs, d_depths.data(), d_column_order.data(), d_null_precedence.data()); } } diff --git a/cpp/src/sort/sort2.cu b/cpp/src/sort/sort2.cu index 1ed312d4987..0b25d003cac 100644 --- a/cpp/src/sort/sort2.cu +++ b/cpp/src/sort/sort2.cu @@ -62,14 +62,13 @@ std::unique_ptr sorted_order2(table_view input, mutable_indices_view.end(), 0); - auto comp = - cudf::experimental::row_lex_operator(input, input, column_order, null_precedence, stream); + auto comp = cudf::experimental::row_lex_operator(input, column_order, null_precedence, stream); thrust::sort(rmm::exec_policy(stream), mutable_indices_view.begin(), mutable_indices_view.end(), comp.device_comparator()); - // protection for temporary d_column_order and d_null_precedence + // protection for temporary owning comparison object stream.synchronize(); return sorted_indices; diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 8ade2338296..e4eaddeca9d 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -25,8 +25,8 @@ namespace cudf { namespace experimental { auto struct_lex_verticalize(table_view input, - std::vector const& column_order = {}, - std::vector const& null_precedence = {}) + host_span column_order = {}, + host_span null_precedence = {}) { // auto [table, null_masks] = superimpose_parent_nulls(input); @@ -98,30 +98,41 @@ auto struct_lex_verticalize(table_view input, std::move(verticalized_col_depths)); } -row_lex_operator::row_lex_operator(table_view const& lhs, - table_view const& rhs, - std::vector const& column_order, - std::vector const& null_precedence, +row_lex_operator::row_lex_operator(table_view const& t, + host_span column_order, + host_span null_precedence, rmm::cuda_stream_view stream) : d_column_order(0, stream), d_null_precedence(0, stream), d_depths(0, stream), - any_nulls(has_nested_nulls(lhs) or has_nested_nulls(rhs)) + any_nulls(has_nested_nulls(t)) { auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = - struct_lex_verticalize(lhs, column_order, null_precedence); - table_view verticalized_rhs; - std::tie(verticalized_rhs, std::ignore, std::ignore, std::ignore) = struct_lex_verticalize(rhs); + struct_lex_verticalize(t, column_order, null_precedence); d_lhs = std::make_unique(table_device_view::create(verticalized_lhs, stream)); - d_rhs = - std::make_unique(table_device_view::create(verticalized_rhs, stream)); d_column_order = detail::make_device_uvector_async(new_column_order, stream); d_null_precedence = detail::make_device_uvector_async(new_null_precedence, stream); d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); } +row_lex_operator::row_lex_operator(table_view const& lhs, + table_view const& rhs, + host_span column_order, + host_span null_precedence, + rmm::cuda_stream_view stream) + : row_lex_operator(lhs, column_order, null_precedence, stream) +{ + table_view verticalized_rhs; + std::tie(verticalized_rhs, std::ignore, std::ignore, std::ignore) = struct_lex_verticalize(rhs); + + d_rhs = + std::make_unique(table_device_view::create(verticalized_rhs, stream)); + + any_nulls |= has_nested_nulls(rhs); +} + } // namespace experimental } // namespace cudf From 9d24a8794e90b748df2842e9e2bb3631bfe6969c Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 24 Feb 2022 20:16:02 +0530 Subject: [PATCH 19/78] Checks to ensure tables can be compared --- cpp/src/table/row_operators.cu | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index e4eaddeca9d..98bff7f6983 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -24,6 +24,8 @@ namespace cudf { namespace experimental { +namespace { + auto struct_lex_verticalize(table_view input, host_span column_order = {}, host_span null_precedence = {}) @@ -98,6 +100,42 @@ auto struct_lex_verticalize(table_view input, std::move(verticalized_col_depths)); } +void check_lex_compatibility(table_view const& input) +{ + // Basically check if there's any LIST hiding anywhere in the table + std::function check_column = [&](column_view const& c) { + CUDF_EXPECTS(c.type().id() != type_id::LIST, + "Cannot lexicographic compare a table with a LIST column"); + for (int i = 0; i < c.num_children(); ++i) { + check_column(c.child(i)); + } + }; + for (column_view const& c : input) { + check_column(c); + } +} + +void check_shape_compatibility(table_view const& lhs, table_view const& rhs) +{ + std::function check_column = + [&](column_view const& l, column_view const& r) { + CUDF_EXPECTS(l.type().id() == r.type().id(), + "Cannot compare tables with different column types"); + CUDF_EXPECTS(l.num_children() == r.num_children(), "Mismatched number of children"); + for (size_type i = 0; i < l.num_children(); ++i) { + check_column(l.child(i), r.child(i)); + } + }; + + CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), + "Cannot compare tables with different number of columns"); + for (size_type i = 0; i < lhs.num_columns(); ++i) { + check_column(lhs.column(i), rhs.column(i)); + } +} + +} // namespace + row_lex_operator::row_lex_operator(table_view const& t, host_span column_order, host_span null_precedence, @@ -107,6 +145,8 @@ row_lex_operator::row_lex_operator(table_view const& t, d_depths(0, stream), any_nulls(has_nested_nulls(t)) { + check_lex_compatibility(t); + auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = struct_lex_verticalize(t, column_order, null_precedence); @@ -125,6 +165,9 @@ row_lex_operator::row_lex_operator(table_view const& lhs, rmm::cuda_stream_view stream) : row_lex_operator(lhs, column_order, null_precedence, stream) { + check_lex_compatibility(rhs); + check_shape_compatibility(lhs, rhs); + table_view verticalized_rhs; std::tie(verticalized_rhs, std::ignore, std::ignore, std::ignore) = struct_lex_verticalize(rhs); From a664c810533b367304d8224e332a8aa603390285 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 2 Mar 2022 13:56:33 +0530 Subject: [PATCH 20/78] Super basic list lex working --- cpp/include/cudf/table/row_operator3.cuh | 158 ++++++++- cpp/src/table/row_operators.cu | 434 ++++++++++++++++++++++- cpp/tests/sort/sort2_test.cu | 21 ++ 3 files changed, 597 insertions(+), 16 deletions(-) diff --git a/cpp/include/cudf/table/row_operator3.cuh b/cpp/include/cudf/table/row_operator3.cuh index 6c2e127b050..ab1fe9af01a 100644 --- a/cpp/include/cudf/table/row_operator3.cuh +++ b/cpp/include/cudf/table/row_operator3.cuh @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,21 @@ struct non_nested_id_to_type { using type = std::conditional_t>; }; +inline size_type __device__ row_to_value_idx(size_type idx, column_device_view col) +{ + while (col.type().id() == type_id::LIST or col.type().id() == type_id::STRUCT) { + if (col.type().id() == type_id::STRUCT) { + idx += col.offset(); + col = col.child(0); + } else { + auto offset_col = col.child(lists_column_view::offsets_column_index); + idx = offset_col.element(idx + col.offset()); + col = col.child(lists_column_view::child_column_index); + } + } + return idx; +} + /** * @brief Performs a relational comparison between two elements in two columns. * @@ -62,8 +78,20 @@ class element_relational_comparator { column_device_view lhs, column_device_view rhs, null_order null_precedence, - int depth = std::numeric_limits::max()) - : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, null_precedence{null_precedence}, depth{depth} + int depth = std::numeric_limits::max(), + size_type* dremel_offsets = nullptr, + uint8_t* rep_level = nullptr, + uint8_t* def_level = nullptr, + uint8_t max_def_level = 0) + : lhs{lhs}, + rhs{rhs}, + nulls{has_nulls}, + null_precedence{null_precedence}, + depth{depth}, + dremel_offsets{dremel_offsets}, + rep_level{rep_level}, + def_level{def_level}, + max_def_level{max_def_level} { } @@ -103,7 +131,7 @@ class element_relational_comparator { template () and - not std::is_same_v)> + not is_nested())> __device__ thrust::pair operator()(size_type lhs_element_index, size_type rhs_element_index) { @@ -146,12 +174,83 @@ class element_relational_comparator { return thrust::make_pair(state, last_null_depth); } + template () and + std::is_same_v)> + __device__ thrust::pair operator()(size_type lhs_element_index, + size_type rhs_element_index) + { + auto l_start = dremel_offsets[lhs_element_index]; + auto l_end = dremel_offsets[lhs_element_index + 1]; + auto r_start = dremel_offsets[rhs_element_index]; + auto r_end = dremel_offsets[rhs_element_index + 1]; + auto lc_start = row_to_value_idx(lhs_element_index, lhs); + auto rc_start = row_to_value_idx(rhs_element_index, rhs); + column_device_view lcol = lhs; + column_device_view rcol = rhs; + while (lcol.type().id() == type_id::LIST) { + lcol = lcol.child(lists_column_view::child_column_index); + rcol = rcol.child(lists_column_view::child_column_index); + } + printf("max_def_level: %d\n", max_def_level); + + printf("t: %d, lhs_element_index: %d, rhs_element_index: %d\n", + threadIdx.x, + lhs_element_index, + rhs_element_index); + printf("t: %d, l_start: %d, l_end: %d, r_start: %d, r_end: %d\n", + threadIdx.x, + l_start, + l_end, + r_start, + r_end); + weak_ordering state{weak_ordering::EQUIVALENT}; + for (int i = l_start, j = r_start, m = lc_start, n = rc_start; i < l_end and j < r_end; + ++i, ++j) { + printf("t: %d, i: %d, j: %d, m: %d, n: %d\n", threadIdx.x, i, j, m, n); + printf("t: %d, def_l: %d, def_r: %d, rep_l: %d, rep_r: %d\n", + threadIdx.x, + def_level[i], + def_level[j], + rep_level[i], + rep_level[j]); + if (def_level[i] != def_level[j]) { + state = (def_level[i] < def_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; + printf("t: %d, def, state: %d\n", threadIdx.x, state); + return thrust::make_pair(state, depth); + } + if (rep_level[i] != rep_level[j]) { + state = (rep_level[i] < rep_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; + printf("t: %d, rep, state: %d\n", threadIdx.x, state); + return thrust::make_pair(state, depth); + } + if (def_level[i] == max_def_level) { + auto comparator = element_relational_comparator{nulls, lcol, rcol, null_precedence}; + thrust::tie(state, depth) = + cudf::type_dispatcher(lcol.type(), comparator, m, n); + printf("t: %d, state: %d\n", threadIdx.x, state); + if (state != weak_ordering::EQUIVALENT) { return thrust::make_pair(state, depth); } + ++m; + ++n; + } + } + state = (l_end - l_start < r_end - r_start) ? weak_ordering::LESS + : (l_end - l_start > r_end - r_start) ? weak_ordering::GREATER + : weak_ordering::EQUIVALENT; + return thrust::make_pair(state, depth); + } + private: column_device_view lhs; column_device_view rhs; Nullate nulls; null_order null_precedence{}; int depth{std::numeric_limits::max()}; + size_type* dremel_offsets; + uint8_t* rep_level; + uint8_t* def_level; + uint8_t* max_def_levels; + uint8_t max_def_level{0}; }; /** @@ -195,13 +294,21 @@ class row_lexicographic_comparator { table_device_view rhs, int const* depth = nullptr, order const* column_order = nullptr, - null_order const* null_precedence = nullptr) + null_order const* null_precedence = nullptr, + size_type** dremel_offsets = nullptr, + uint8_t** rep_levels = nullptr, + uint8_t** def_levels = nullptr, + uint8_t* max_def_levels = nullptr) : _lhs{lhs}, _rhs{rhs}, _nulls{has_nulls}, _depth{depth}, _column_order{column_order}, - _null_precedence{null_precedence} + _null_precedence{null_precedence}, + _dremel_offsets{dremel_offsets}, + _rep_levels{rep_levels}, + _def_levels{def_levels}, + _max_def_levels{max_def_levels} { CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns."); // CUDF_EXPECTS(detail::is_relationally_comparable(_lhs, _rhs), @@ -229,8 +336,15 @@ class row_lexicographic_comparator { null_order null_precedence = _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; - auto comparator = element_relational_comparator{ - _nulls, _lhs.column(i), _rhs.column(i), null_precedence, depth}; + auto comparator = element_relational_comparator{_nulls, + _lhs.column(i), + _rhs.column(i), + null_precedence, + depth, + _dremel_offsets[i], + _rep_levels[i], + _def_levels[i], + _max_def_levels[i]}; weak_ordering state; thrust::tie(state, last_null_depth) = @@ -250,8 +364,25 @@ class row_lexicographic_comparator { null_order const* _null_precedence{}; order const* _column_order{}; int const* _depth; + size_type** _dremel_offsets; + uint8_t** _rep_levels; + uint8_t** _def_levels; + uint8_t* _max_def_levels; }; // class row_lexicographic_comparator +/** + * @brief Dremel data that describes one nested type column + * + * @see get_dremel_data() + */ +struct dremel_data { + rmm::device_uvector dremel_offsets; + rmm::device_uvector rep_level; + rmm::device_uvector def_level; + + size_type leaf_data_size; +}; + struct row_lex_operator { row_lex_operator(table_view const& lhs, table_view const& rhs, @@ -275,7 +406,11 @@ struct row_lex_operator { rhs, d_depths.data(), d_column_order.data(), - d_null_precedence.data()); + d_null_precedence.data(), + d_dremel_offsets.data(), + d_rep_levels.data(), + d_def_levels.data(), + d_max_def_levels.data()); } else { return row_lexicographic_comparator( Nullate{}, lhs, rhs, d_depths.data(), d_column_order.data(), d_null_precedence.data()); @@ -291,6 +426,13 @@ struct row_lex_operator { rmm::device_uvector d_column_order; rmm::device_uvector d_null_precedence; rmm::device_uvector d_depths; + + // List related pre-computation + std::vector dremel_data; + rmm::device_uvector d_dremel_offsets; + rmm::device_uvector d_rep_levels; + rmm::device_uvector d_def_levels; + rmm::device_uvector d_max_def_levels; bool any_nulls; }; diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 98bff7f6983..9dccff23d68 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -14,6 +14,10 @@ * limitations under the License. */ +#include "cudf/detail/iterator.cuh" +#include "cudf/detail/utilities/cuda.cuh" +#include "thrust/gather.h" +#include "thrust/iterator/discard_iterator.h" #include #include #include @@ -21,6 +25,8 @@ #include #include +#include + namespace cudf { namespace experimental { @@ -100,6 +106,413 @@ auto struct_lex_verticalize(table_view input, std::move(verticalized_col_depths)); } +struct def_level_fn { + column_device_view const* parent_col; + uint8_t const* d_nullability; + uint8_t sub_level_start; + uint8_t curr_def_level; + + __device__ uint32_t operator()(size_type i) + { + uint32_t def = curr_def_level; + uint8_t l = sub_level_start; + bool is_col_struct = false; + auto col = *parent_col; + do { + // If col not nullable then it does not contribute to def levels + if (d_nullability[l]) { + if (not col.nullable() or bit_is_set(col.null_mask(), i)) { + ++def; + } else { // We have found the shallowest level at which this row is null + break; + } + } + is_col_struct = (col.type().id() == type_id::STRUCT); + if (is_col_struct) { + col = col.child(0); + ++l; + } + } while (is_col_struct); + return def; + } +}; + +dremel_data get_dremel_data(column_view h_col, + // TODO(cp): use device_span once it is converted to a single hd_vec + rmm::device_uvector const& d_nullability, + std::vector const& nullability, + rmm::cuda_stream_view stream) +{ + auto get_list_level = [](column_view col) { + while (col.type().id() == type_id::STRUCT) { + col = col.child(0); + } + return col; + }; + + auto get_empties = [&](column_view col, size_type start, size_type end) { + auto lcv = lists_column_view(get_list_level(col)); + rmm::device_uvector empties_idx(lcv.size(), stream); + rmm::device_uvector empties(lcv.size(), stream); + auto d_off = lcv.offsets().data(); + + auto empties_idx_end = + thrust::copy_if(rmm::exec_policy(stream), + thrust::make_counting_iterator(start), + thrust::make_counting_iterator(end), + empties_idx.begin(), + [d_off] __device__(auto i) { return d_off[i] == d_off[i + 1]; }); + auto empties_end = thrust::gather(rmm::exec_policy(stream), + empties_idx.begin(), + empties_idx_end, + lcv.offsets().begin(), + empties.begin()); + + auto empties_size = empties_end - empties.begin(); + return std::make_tuple(std::move(empties), std::move(empties_idx), empties_size); + }; + + auto curr_col = h_col; + std::vector nesting_levels; + std::vector def_at_level; + std::vector start_at_sub_level; + uint8_t curr_nesting_level_idx = 0; + + auto add_def_at_level = [&](column_view col) { + // Add up all def level contributions in this column all the way till the first list column + // appears in the hierarchy or until we get to leaf + uint32_t def = 0; + start_at_sub_level.push_back(curr_nesting_level_idx); + while (col.type().id() == type_id::STRUCT) { + def += (nullability[curr_nesting_level_idx]) ? 1 : 0; + col = col.child(0); + ++curr_nesting_level_idx; + } + // At the end of all those structs is either a list column or the leaf. Leaf column contributes + // at least one def level. It doesn't matter what the leaf contributes because it'll be at the + // end of the exclusive scan. + def += (nullability[curr_nesting_level_idx]) ? 2 : 1; + def_at_level.push_back(def); + ++curr_nesting_level_idx; + }; + while (cudf::is_nested(curr_col.type())) { + nesting_levels.push_back(curr_col); + add_def_at_level(curr_col); + while (curr_col.type().id() == type_id::STRUCT) { + // Go down the hierarchy until we get to the LIST or the leaf level + curr_col = curr_col.child(0); + } + if (curr_col.type().id() == type_id::LIST) { + curr_col = curr_col.child(lists_column_view::child_column_index); + if (not is_nested(curr_col.type())) { + // Special case: when the leaf data column is the immediate child of the list col then we + // want it to be included right away. Otherwise the struct containing it will be included in + // the next iteration of this loop. + nesting_levels.push_back(curr_col); + add_def_at_level(curr_col); + break; + } + } + } + + std::unique_ptr device_view_owners; + column_device_view* d_nesting_levels; + std::tie(device_view_owners, d_nesting_levels) = + contiguous_copy_column_device_views(nesting_levels, stream); + + thrust::exclusive_scan( + thrust::host, def_at_level.begin(), def_at_level.end(), def_at_level.begin()); + + // Sliced list column views only have offsets applied to top level. Get offsets for each level. + rmm::device_uvector d_column_offsets(nesting_levels.size(), stream); + rmm::device_uvector d_column_ends(nesting_levels.size(), stream); + + auto d_col = column_device_view::create(h_col, stream); + cudf::detail::device_single_thread( + [offset_at_level = d_column_offsets.data(), + end_idx_at_level = d_column_ends.data(), + col = *d_col] __device__() { + auto curr_col = col; + size_type off = curr_col.offset(); + size_type end = off + curr_col.size(); + size_type level = 0; + offset_at_level[level] = off; + end_idx_at_level[level] = end; + ++level; + // Apply offset recursively until we get to leaf data + // Skip doing the following for any structs we encounter in between. + while (curr_col.type().id() == type_id::LIST or curr_col.type().id() == type_id::STRUCT) { + if (curr_col.type().id() == type_id::LIST) { + off = curr_col.child(lists_column_view::offsets_column_index).element(off); + end = curr_col.child(lists_column_view::offsets_column_index).element(end); + offset_at_level[level] = off; + end_idx_at_level[level] = end; + ++level; + curr_col = curr_col.child(lists_column_view::child_column_index); + } else { + curr_col = curr_col.child(0); + } + } + }, + stream); + + thrust::host_vector column_offsets = + cudf::detail::make_host_vector_async(d_column_offsets, stream); + thrust::host_vector column_ends = + cudf::detail::make_host_vector_async(d_column_ends, stream); + stream.synchronize(); + + size_t max_vals_size = 0; + for (size_t l = 0; l < column_offsets.size(); ++l) { + max_vals_size += column_ends[l] - column_offsets[l]; + } + + rmm::device_uvector rep_level(max_vals_size, stream); + rmm::device_uvector def_level(max_vals_size, stream); + + rmm::device_uvector temp_rep_vals(max_vals_size, stream); + rmm::device_uvector temp_def_vals(max_vals_size, stream); + rmm::device_uvector new_offsets(0, stream); + size_type curr_rep_values_size = 0; + { + // At this point, curr_col contains the leaf column. Max nesting level is + // nesting_levels.size(). + + // We are going to start by merging the last column in nesting_levels (the leaf, which is at the + // index `nesting_levels.size() - 1`) with the second-to-last (which is at + // `nesting_levels.size() - 2`). + size_t level = nesting_levels.size() - 2; + curr_col = nesting_levels[level]; + auto lcv = lists_column_view(get_list_level(curr_col)); + auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; + + // Get empties at this level + rmm::device_uvector empties(0, stream); + rmm::device_uvector empties_idx(0, stream); + size_t empties_size; + std::tie(empties, empties_idx, empties_size) = + get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); + + // Merge empty at deepest parent level with the rep, def level vals at leaf level + + auto input_parent_rep_it = thrust::make_constant_iterator(level); + auto input_parent_def_it = + thrust::make_transform_iterator(empties_idx.begin(), + def_level_fn{d_nesting_levels + level, + d_nullability.data(), + start_at_sub_level[level], + def_at_level[level]}); + + // `nesting_levels.size()` == no of list levels + leaf. Max repetition level = no of list levels + auto input_child_rep_it = thrust::make_constant_iterator(nesting_levels.size() - 1); + auto input_child_def_it = + thrust::make_transform_iterator(thrust::make_counting_iterator(column_offsets[level + 1]), + def_level_fn{d_nesting_levels + level + 1, + d_nullability.data(), + start_at_sub_level[level + 1], + def_at_level[level + 1]}); + + // Zip the input and output value iterators so that merge operation is done only once + auto input_parent_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); + + auto input_child_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(input_child_rep_it, input_child_def_it)); + + auto output_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); + + auto ends = thrust::merge_by_key(rmm::exec_policy(stream), + empties.begin(), + empties.begin() + empties_size, + thrust::make_counting_iterator(column_offsets[level + 1]), + thrust::make_counting_iterator(column_ends[level + 1]), + input_parent_zip_it, + input_child_zip_it, + thrust::make_discard_iterator(), + output_zip_it); + + curr_rep_values_size = ends.second - output_zip_it; + + // Scan to get distance by which each offset value is shifted due to the insertion of empties + auto scan_it = cudf::detail::make_counting_transform_iterator( + column_offsets[level], + [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( + auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); + rmm::device_uvector scan_out(offset_size_at_level, stream); + thrust::exclusive_scan( + rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); + + // Add scan output to existing offsets to get new offsets into merged rep level values + new_offsets = rmm::device_uvector(offset_size_at_level, stream); + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + offset_size_at_level, + [off = lcv.offsets().data() + column_offsets[level], + scan_out = scan_out.data(), + new_off = new_offsets.data()] __device__(auto i) { + new_off[i] = off[i] - off[0] + scan_out[i]; + }); + + // Set rep level values at level starts to appropriate rep level + auto scatter_it = thrust::make_constant_iterator(level); + thrust::scatter(rmm::exec_policy(stream), + scatter_it, + scatter_it + new_offsets.size() - 1, + new_offsets.begin(), + rep_level.begin()); + } + + // Having already merged the last two levels, we are now going to merge the result with the + // third-last level which is at index `nesting_levels.size() - 3`. + for (int level = nesting_levels.size() - 3; level >= 0; level--) { + curr_col = nesting_levels[level]; + auto lcv = lists_column_view(get_list_level(curr_col)); + auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; + + // Get empties at this level + rmm::device_uvector empties(0, stream); + rmm::device_uvector empties_idx(0, stream); + size_t empties_size; + std::tie(empties, empties_idx, empties_size) = + get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); + + auto offset_transformer = [new_child_offsets = new_offsets.data(), + child_start = column_offsets[level + 1]] __device__(auto x) { + return new_child_offsets[x - child_start]; // (x - child's offset) + }; + + // We will be reading from old rep_levels and writing again to rep_levels. Swap the current + // rep values into temp_rep_vals so it can become the input and rep_levels can again be output. + std::swap(temp_rep_vals, rep_level); + std::swap(temp_def_vals, def_level); + + // Merge empty at parent level with the rep, def level vals at current level + auto transformed_empties = thrust::make_transform_iterator(empties.begin(), offset_transformer); + + auto input_parent_rep_it = thrust::make_constant_iterator(level); + auto input_parent_def_it = + thrust::make_transform_iterator(empties_idx.begin(), + def_level_fn{d_nesting_levels + level, + d_nullability.data(), + start_at_sub_level[level], + def_at_level[level]}); + + // Zip the input and output value iterators so that merge operation is done only once + auto input_parent_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); + + auto input_child_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(temp_rep_vals.begin(), temp_def_vals.begin())); + + auto output_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); + + auto ends = thrust::merge_by_key(rmm::exec_policy(stream), + transformed_empties, + transformed_empties + empties_size, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(curr_rep_values_size), + input_parent_zip_it, + input_child_zip_it, + thrust::make_discard_iterator(), + output_zip_it); + + curr_rep_values_size = ends.second - output_zip_it; + + // Scan to get distance by which each offset value is shifted due to the insertion of dremel + // level value fof an empty list + auto scan_it = cudf::detail::make_counting_transform_iterator( + column_offsets[level], + [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( + auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); + rmm::device_uvector scan_out(offset_size_at_level, stream); + thrust::exclusive_scan( + rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); + + // Add scan output to existing offsets to get new offsets into merged rep level values + rmm::device_uvector temp_new_offsets(offset_size_at_level, stream); + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + offset_size_at_level, + [off = lcv.offsets().data() + column_offsets[level], + scan_out = scan_out.data(), + new_off = temp_new_offsets.data(), + offset_transformer] __device__(auto i) { + new_off[i] = offset_transformer(off[i]) + scan_out[i]; + }); + new_offsets = std::move(temp_new_offsets); + + // Set rep level values at level starts to appropriate rep level + auto scatter_it = thrust::make_constant_iterator(level); + thrust::scatter(rmm::exec_policy(stream), + scatter_it, + scatter_it + new_offsets.size() - 1, + new_offsets.begin(), + rep_level.begin()); + } + + size_t level_vals_size = new_offsets.back_element(stream); + rep_level.resize(level_vals_size, stream); + def_level.resize(level_vals_size, stream); + + stream.synchronize(); + + size_type leaf_data_size = column_ends.back() - column_offsets.back(); + + return dremel_data{ + std::move(new_offsets), std::move(rep_level), std::move(def_level), leaf_data_size}; +} + +auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) +{ + std::vector dremel_data; + std::vector max_def_levels; + for (auto const& col : table) { + if (col.type().id() == type_id::LIST) { + // Check nullability of the list + std::vector nullability; + auto cur_col = col; + uint8_t max_def_level = 0; + while (cur_col.type().id() == type_id::LIST) { + max_def_level += (cur_col.nullable() ? 2 : 1); + nullability.push_back(static_cast(cur_col.nullable())); + cur_col = cur_col.child(lists_column_view::child_column_index); + } + max_def_level += (cur_col.nullable() ? 1 : 0); + nullability.push_back(static_cast(cur_col.nullable())); + auto d_nullability = detail::make_device_uvector_async(nullability, stream); + dremel_data.push_back(get_dremel_data(col, d_nullability, nullability, stream)); + max_def_levels.push_back(max_def_level); + } + } + + std::vector dremel_offsets; + std::vector rep_levels; + std::vector def_levels; + for (size_type c = 0; c < table.num_columns(); ++c) { + auto const& col = table.column(c); + if (col.type().id() == type_id::LIST) { + dremel_offsets.push_back(dremel_data[c].dremel_offsets.data()); + rep_levels.push_back(dremel_data[c].rep_level.data()); + def_levels.push_back(dremel_data[c].def_level.data()); + } else { + dremel_offsets.push_back(nullptr); + rep_levels.push_back(nullptr); + def_levels.push_back(nullptr); + } + } + auto d_dremel_offsets = detail::make_device_uvector_async(dremel_offsets, stream); + auto d_rep_levels = detail::make_device_uvector_async(rep_levels, stream); + auto d_def_levels = detail::make_device_uvector_async(def_levels, stream); + auto d_max_def_levels = detail::make_device_uvector_async(max_def_levels, stream); + return std::make_tuple(std::move(dremel_data), + std::move(d_dremel_offsets), + std::move(d_rep_levels), + std::move(d_def_levels), + std::move(d_max_def_levels)); +} + void check_lex_compatibility(table_view const& input) { // Basically check if there's any LIST hiding anywhere in the table @@ -143,19 +556,24 @@ row_lex_operator::row_lex_operator(table_view const& t, : d_column_order(0, stream), d_null_precedence(0, stream), d_depths(0, stream), + d_dremel_offsets(0, stream), + d_rep_levels(0, stream), + d_def_levels(0, stream), + d_max_def_levels(0, stream), any_nulls(has_nested_nulls(t)) { - check_lex_compatibility(t); + // check_lex_compatibility(t); + std::tie(dremel_data, d_dremel_offsets, d_rep_levels, d_def_levels, d_max_def_levels) = + list_lex_preprocess(t, stream); - auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = - struct_lex_verticalize(t, column_order, null_precedence); + // auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = + // struct_lex_verticalize(t, column_order, null_precedence); - d_lhs = - std::make_unique(table_device_view::create(verticalized_lhs, stream)); + d_lhs = std::make_unique(table_device_view::create(t, stream)); - d_column_order = detail::make_device_uvector_async(new_column_order, stream); - d_null_precedence = detail::make_device_uvector_async(new_null_precedence, stream); - d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); + d_column_order = detail::make_device_uvector_async(column_order, stream); + d_null_precedence = detail::make_device_uvector_async(null_precedence, stream); + // d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); } row_lex_operator::row_lex_operator(table_view const& lhs, diff --git a/cpp/tests/sort/sort2_test.cu b/cpp/tests/sort/sort2_test.cu index 2c173e0a4e2..cb263bb079f 100644 --- a/cpp/tests/sort/sort2_test.cu +++ b/cpp/tests/sort/sort2_test.cu @@ -215,4 +215,25 @@ TEST_F(NewRowOpTest, SampleStructTest) cudf::test::expect_columns_equal(result1->view(), result2->view()); } +TEST_F(NewRowOpTest, List) +{ + using lcw = cudf::test::lists_column_wrapper; + lcw col{ + {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, + {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, + {{1, 2, 3}, {}, {4, 5}, {0, 6, 0}}, + {{7, 8}, {}}, + lcw{lcw{}, lcw{}, lcw{}}, + lcw{lcw{}}, + lcw{lcw{}}, + lcw{lcw{}}, + lcw{lcw{}, lcw{}, lcw{}}, + lcw{lcw{}, lcw{}, lcw{}}, + }; + print(col); + + auto result = cudf::detail::experimental::sorted_order2(cudf::table_view({col})); + cudf::test::print(result->view()); +} + CUDF_TEST_PROGRAM_MAIN() From 1ebd8774dd7011241f98ac4a0b6f8510506c84af Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 2 Mar 2022 16:07:32 +0530 Subject: [PATCH 21/78] list test expansion and cleanups. --- cpp/include/cudf/{sort2.cuh => sort2.hpp} | 20 ++----------------- cpp/include/cudf/table/row_operator3.cuh | 6 ++++-- cpp/tests/CMakeLists.txt | 2 +- .../sort/{sort2_test.cu => sort2_test.cpp} | 14 ++++++------- 4 files changed, 13 insertions(+), 29 deletions(-) rename cpp/include/cudf/{sort2.cuh => sort2.hpp} (69%) rename cpp/tests/sort/{sort2_test.cu => sort2_test.cpp} (97%) diff --git a/cpp/include/cudf/sort2.cuh b/cpp/include/cudf/sort2.hpp similarity index 69% rename from cpp/include/cudf/sort2.cuh rename to cpp/include/cudf/sort2.hpp index f9feab8246b..154b08767fd 100644 --- a/cpp/include/cudf/sort2.cuh +++ b/cpp/include/cudf/sort2.hpp @@ -16,24 +16,8 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include +#include +#include namespace cudf { namespace detail { diff --git a/cpp/include/cudf/table/row_operator3.cuh b/cpp/include/cudf/table/row_operator3.cuh index ab1fe9af01a..c769dc0076a 100644 --- a/cpp/include/cudf/table/row_operator3.cuh +++ b/cpp/include/cudf/table/row_operator3.cuh @@ -228,8 +228,10 @@ class element_relational_comparator { auto comparator = element_relational_comparator{nulls, lcol, rcol, null_precedence}; thrust::tie(state, depth) = cudf::type_dispatcher(lcol.type(), comparator, m, n); - printf("t: %d, state: %d\n", threadIdx.x, state); - if (state != weak_ordering::EQUIVALENT) { return thrust::make_pair(state, depth); } + if (state != weak_ordering::EQUIVALENT) { + printf("t: %d, leaf, state: %d\n", threadIdx.x, state); + return thrust::make_pair(state, depth); + } ++m; ++n; } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 2e8caef4ebd..a19ca08f6f1 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -212,7 +212,7 @@ endif() # * sort tests ------------------------------------------------------------------------------------ ConfigureTest(SORT_TEST sort/segmented_sort_tests.cpp sort/sort_test.cpp sort/rank_test.cpp) -ConfigureTest(SORT2_TEST sort/sort2_test.cu) +ConfigureTest(SORT2_TEST sort/sort2_test.cpp) # ################################################################################################## # * copying tests --------------------------------------------------------------------------------- diff --git a/cpp/tests/sort/sort2_test.cu b/cpp/tests/sort/sort2_test.cpp similarity index 97% rename from cpp/tests/sort/sort2_test.cu rename to cpp/tests/sort/sort2_test.cpp index cb263bb079f..a20ce7308f4 100644 --- a/cpp/tests/sort/sort2_test.cu +++ b/cpp/tests/sort/sort2_test.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -18,8 +19,6 @@ struct NewRowOpTest : public cudf::test::BaseFixture { }; -#include - TEST_F(NewRowOpTest, BasicStructTwoChild) { using Type = int; @@ -222,18 +221,17 @@ TEST_F(NewRowOpTest, List) {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{1, 2, 3}, {}, {4, 5}, {0, 6, 0}}, + {{1, 2}, {3}, {4, 5}, {0, 6, 0}}, {{7, 8}, {}}, lcw{lcw{}, lcw{}, lcw{}}, lcw{lcw{}}, - lcw{lcw{}}, - lcw{lcw{}}, - lcw{lcw{}, lcw{}, lcw{}}, - lcw{lcw{}, lcw{}, lcw{}}, + {lcw{10}}, + lcw{}, }; - print(col); + auto expect = cudf::test::fixed_width_column_wrapper{8, 6, 5, 3, 0, 1, 2, 4, 7}; auto result = cudf::detail::experimental::sorted_order2(cudf::table_view({col})); - cudf::test::print(result->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result); } CUDF_TEST_PROGRAM_MAIN() From 3e6e9f41f390fff78c09cf93764d475793c29ee4 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 2 Mar 2022 18:44:06 +0530 Subject: [PATCH 22/78] Make struct comp work again --- cpp/src/table/row_operators.cu | 39 +++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 9dccff23d68..3b9dad03194 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -45,7 +45,7 @@ auto struct_lex_verticalize(table_view input, std::vector verticalized_col_depths; for (size_type col_idx = 0; col_idx < table.num_columns(); ++col_idx) { auto const& col = table.column(col_idx); - if (is_nested(col.type())) { + if (col.type().id() == type_id::STRUCT) { // convert and insert std::vector r_verticalized_columns; std::vector r_verticalized_col_depths; @@ -484,18 +484,21 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) auto d_nullability = detail::make_device_uvector_async(nullability, stream); dremel_data.push_back(get_dremel_data(col, d_nullability, nullability, stream)); max_def_levels.push_back(max_def_level); + } else { + max_def_levels.push_back(0); } } std::vector dremel_offsets; std::vector rep_levels; std::vector def_levels; - for (size_type c = 0; c < table.num_columns(); ++c) { - auto const& col = table.column(c); + size_type c = 0; + for (auto const& col : table) { if (col.type().id() == type_id::LIST) { dremel_offsets.push_back(dremel_data[c].dremel_offsets.data()); rep_levels.push_back(dremel_data[c].rep_level.data()); def_levels.push_back(dremel_data[c].def_level.data()); + ++c; } else { dremel_offsets.push_back(nullptr); rep_levels.push_back(nullptr); @@ -515,11 +518,16 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) void check_lex_compatibility(table_view const& input) { - // Basically check if there's any LIST hiding anywhere in the table + // Basically check if there's any LIST of STRUCT or STRUCT of LIST hiding anywhere in the table std::function check_column = [&](column_view const& c) { - CUDF_EXPECTS(c.type().id() != type_id::LIST, - "Cannot lexicographic compare a table with a LIST column"); + if (c.type().id() == type_id::LIST) { + CUDF_EXPECTS(c.child(lists_column_view::child_column_index).type().id() != type_id::STRUCT, + "List of structs are not supported"); + } for (int i = 0; i < c.num_children(); ++i) { + if (c.type().id() == type_id::STRUCT) { + CUDF_EXPECTS(c.child(i).type().id() != type_id::LIST, "Struct of Lists are not supported"); + } check_column(c.child(i)); } }; @@ -562,18 +570,19 @@ row_lex_operator::row_lex_operator(table_view const& t, d_max_def_levels(0, stream), any_nulls(has_nested_nulls(t)) { - // check_lex_compatibility(t); - std::tie(dremel_data, d_dremel_offsets, d_rep_levels, d_def_levels, d_max_def_levels) = - list_lex_preprocess(t, stream); + check_lex_compatibility(t); + auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = + struct_lex_verticalize(t, column_order, null_precedence); - // auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = - // struct_lex_verticalize(t, column_order, null_precedence); + std::tie(dremel_data, d_dremel_offsets, d_rep_levels, d_def_levels, d_max_def_levels) = + list_lex_preprocess(verticalized_lhs, stream); - d_lhs = std::make_unique(table_device_view::create(t, stream)); + d_lhs = + std::make_unique(table_device_view::create(verticalized_lhs, stream)); - d_column_order = detail::make_device_uvector_async(column_order, stream); - d_null_precedence = detail::make_device_uvector_async(null_precedence, stream); - // d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); + d_column_order = detail::make_device_uvector_async(new_column_order, stream); + d_null_precedence = detail::make_device_uvector_async(new_null_precedence, stream); + d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); } row_lex_operator::row_lex_operator(table_view const& lhs, From facc0318568ec00564ce4ae66d764a652a9a7e2d Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 2 Mar 2022 22:41:02 +0530 Subject: [PATCH 23/78] List lex benchmark --- .../compare/comparator_benchmark.cu | 49 +++++++++++++++---- cpp/include/cudf/table/row_operator3.cuh | 44 ++++++++--------- 2 files changed, 62 insertions(+), 31 deletions(-) diff --git a/cpp/benchmarks/compare/comparator_benchmark.cu b/cpp/benchmarks/compare/comparator_benchmark.cu index 13513b55f64..c7aa5752929 100644 --- a/cpp/benchmarks/compare/comparator_benchmark.cu +++ b/cpp/benchmarks/compare/comparator_benchmark.cu @@ -33,7 +33,7 @@ class Sort : public cudf::benchmark { }; template -static void BM_sort(benchmark::State& state, bool nulls) +static void BM_struct_sort(benchmark::State& state, bool nulls) { using Type = int; using column_wrapper = cudf::test::fixed_width_column_wrapper; @@ -85,13 +85,44 @@ static void BM_sort(benchmark::State& state, bool nulls) } } -#define SORT_BENCHMARK_DEFINE(name, stable, nulls) \ - BENCHMARK_TEMPLATE_DEFINE_F(Sort, name, stable) \ - (::benchmark::State & st) { BM_sort(st, nulls); } \ - BENCHMARK_REGISTER_F(Sort, name) \ - ->RangeMultiplier(8) \ - ->Ranges({{1 << 10, 1 << 26}, {1, 8}}) \ - ->UseManualTime() \ +#define SORT_BENCHMARK_DEFINE(name, stable, nulls) \ + BENCHMARK_TEMPLATE_DEFINE_F(Sort, name, stable) \ + (::benchmark::State & st) { BM_struct_sort(st, nulls); } \ + BENCHMARK_REGISTER_F(Sort, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 10, 1 << 26}, {1, 8}}) \ + ->UseManualTime() \ ->Unit(benchmark::kMillisecond); -SORT_BENCHMARK_DEFINE(unstable, false, true) +// SORT_BENCHMARK_DEFINE(unstable, false, true) + +template +static void BM_list_sort(benchmark::State& state, bool nulls) +{ + const size_t size{(size_t)state.range(0)}; + const cudf::size_type depth{(cudf::size_type)state.range(1)}; + + data_profile table_profile; + table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5); + table_profile.set_list_depth(depth); + table_profile.set_null_frequency(0); + auto const table = + create_random_table({cudf::type_id::LIST}, 1, table_size_bytes{size}, table_profile); + + for (auto _ : state) { + cuda_event_timer raii(state, true, rmm::cuda_stream_default); + + auto result = cudf::detail::experimental::sorted_order2(*table); + } +} + +#define LIST_SORT_BENCHMARK_DEFINE(name, stable, nulls) \ + BENCHMARK_TEMPLATE_DEFINE_F(Sort, name, stable) \ + (::benchmark::State & st) { BM_list_sort(st, nulls); } \ + BENCHMARK_REGISTER_F(Sort, name) \ + ->RangeMultiplier(256) \ + ->Ranges({{1 << 10, 1 << 27}, {1, 4}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +LIST_SORT_BENCHMARK_DEFINE(list, false, true) diff --git a/cpp/include/cudf/table/row_operator3.cuh b/cpp/include/cudf/table/row_operator3.cuh index c769dc0076a..af7cf24269d 100644 --- a/cpp/include/cudf/table/row_operator3.cuh +++ b/cpp/include/cudf/table/row_operator3.cuh @@ -192,36 +192,36 @@ class element_relational_comparator { lcol = lcol.child(lists_column_view::child_column_index); rcol = rcol.child(lists_column_view::child_column_index); } - printf("max_def_level: %d\n", max_def_level); - - printf("t: %d, lhs_element_index: %d, rhs_element_index: %d\n", - threadIdx.x, - lhs_element_index, - rhs_element_index); - printf("t: %d, l_start: %d, l_end: %d, r_start: %d, r_end: %d\n", - threadIdx.x, - l_start, - l_end, - r_start, - r_end); + // printf("max_def_level: %d\n", max_def_level); + + // printf("t: %d, lhs_element_index: %d, rhs_element_index: %d\n", + // threadIdx.x, + // lhs_element_index, + // rhs_element_index); + // printf("t: %d, l_start: %d, l_end: %d, r_start: %d, r_end: %d\n", + // threadIdx.x, + // l_start, + // l_end, + // r_start, + // r_end); weak_ordering state{weak_ordering::EQUIVALENT}; for (int i = l_start, j = r_start, m = lc_start, n = rc_start; i < l_end and j < r_end; ++i, ++j) { - printf("t: %d, i: %d, j: %d, m: %d, n: %d\n", threadIdx.x, i, j, m, n); - printf("t: %d, def_l: %d, def_r: %d, rep_l: %d, rep_r: %d\n", - threadIdx.x, - def_level[i], - def_level[j], - rep_level[i], - rep_level[j]); + // printf("t: %d, i: %d, j: %d, m: %d, n: %d\n", threadIdx.x, i, j, m, n); + // printf("t: %d, def_l: %d, def_r: %d, rep_l: %d, rep_r: %d\n", + // threadIdx.x, + // def_level[i], + // def_level[j], + // rep_level[i], + // rep_level[j]); if (def_level[i] != def_level[j]) { state = (def_level[i] < def_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; - printf("t: %d, def, state: %d\n", threadIdx.x, state); + // printf("t: %d, def, state: %d\n", threadIdx.x, state); return thrust::make_pair(state, depth); } if (rep_level[i] != rep_level[j]) { state = (rep_level[i] < rep_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; - printf("t: %d, rep, state: %d\n", threadIdx.x, state); + // printf("t: %d, rep, state: %d\n", threadIdx.x, state); return thrust::make_pair(state, depth); } if (def_level[i] == max_def_level) { @@ -229,7 +229,7 @@ class element_relational_comparator { thrust::tie(state, depth) = cudf::type_dispatcher(lcol.type(), comparator, m, n); if (state != weak_ordering::EQUIVALENT) { - printf("t: %d, leaf, state: %d\n", threadIdx.x, state); + // printf("t: %d, leaf, state: %d\n", threadIdx.x, state); return thrust::make_pair(state, depth); } ++m; From 11bcf1652169716cd95100ca1afd049fd13b5d7b Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 8 Jun 2022 00:30:35 +0530 Subject: [PATCH 24/78] Add back code from old lex comparator that had list flattening --- cpp/CMakeLists.txt | 1 + cpp/src/table/row_operators2.cu | 608 ++++++++++++++++++++++++++++++++ 2 files changed, 609 insertions(+) create mode 100644 cpp/src/table/row_operators2.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 438e80b3330..40083764cad 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -520,6 +520,7 @@ add_library( src/structs/structs_column_view.cpp src/structs/utilities.cpp src/table/row_operators.cu + src/table/row_operators2.cu src/table/table.cpp src/table/table_device_view.cu src/table/table_view.cpp diff --git a/cpp/src/table/row_operators2.cu b/cpp/src/table/row_operators2.cu new file mode 100644 index 00000000000..3b9dad03194 --- /dev/null +++ b/cpp/src/table/row_operators2.cu @@ -0,0 +1,608 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cudf/detail/iterator.cuh" +#include "cudf/detail/utilities/cuda.cuh" +#include "thrust/gather.h" +#include "thrust/iterator/discard_iterator.h" +#include +#include +#include +#include +#include +#include + +#include + +namespace cudf { +namespace experimental { + +namespace { + +auto struct_lex_verticalize(table_view input, + host_span column_order = {}, + host_span null_precedence = {}) +{ + // auto [table, null_masks] = superimpose_parent_nulls(input); + + auto table = input; + std::vector verticalized_columns; + std::vector new_column_order; + std::vector new_null_precedence; + std::vector verticalized_col_depths; + for (size_type col_idx = 0; col_idx < table.num_columns(); ++col_idx) { + auto const& col = table.column(col_idx); + if (col.type().id() == type_id::STRUCT) { + // convert and insert + std::vector r_verticalized_columns; + std::vector r_verticalized_col_depths; + std::vector flattened; + std::vector depths; + // TODO: Here I added a bogus leaf column at the beginning to help in the while loop below. + // Refactor the while loop so that it can handle the last case. + flattened.push_back(make_empty_column(type_id::INT32)->view()); + std::function recursive_child = [&](column_view const& c, + int depth) { + flattened.push_back(c); + depths.push_back(depth); + for (int child_idx = 0; child_idx < c.num_children(); ++child_idx) { + recursive_child(c.child(child_idx), depth + 1); + } + }; + recursive_child(col, 0); + int curr_col_idx = flattened.size() - 1; + column_view curr_col = flattened[curr_col_idx]; + while (curr_col_idx > 0) { + auto const& prev_col = flattened[curr_col_idx - 1]; + if (not is_nested(prev_col.type())) { + // We hit a column that's a leaf so seal this hierarchy + r_verticalized_columns.push_back(curr_col); + r_verticalized_col_depths.push_back(depths[curr_col_idx - 1]); + curr_col = prev_col; + } else { + curr_col = column_view(prev_col.type(), + prev_col.size(), + nullptr, + prev_col.null_mask(), + UNKNOWN_NULL_COUNT, + prev_col.offset(), + {curr_col}); + } + --curr_col_idx; + } + verticalized_columns.insert( + verticalized_columns.end(), r_verticalized_columns.rbegin(), r_verticalized_columns.rend()); + verticalized_col_depths.insert(verticalized_col_depths.end(), + r_verticalized_col_depths.rbegin(), + r_verticalized_col_depths.rend()); + if (not column_order.empty()) { + new_column_order.insert( + new_column_order.end(), r_verticalized_columns.size(), column_order[col_idx]); + } + if (not null_precedence.empty()) { + new_null_precedence.insert( + new_null_precedence.end(), r_verticalized_columns.size(), null_precedence[col_idx]); + } + } else { + verticalized_columns.push_back(col); + } + } + return std::make_tuple(table_view(verticalized_columns), + std::move(new_column_order), + std::move(new_null_precedence), + std::move(verticalized_col_depths)); +} + +struct def_level_fn { + column_device_view const* parent_col; + uint8_t const* d_nullability; + uint8_t sub_level_start; + uint8_t curr_def_level; + + __device__ uint32_t operator()(size_type i) + { + uint32_t def = curr_def_level; + uint8_t l = sub_level_start; + bool is_col_struct = false; + auto col = *parent_col; + do { + // If col not nullable then it does not contribute to def levels + if (d_nullability[l]) { + if (not col.nullable() or bit_is_set(col.null_mask(), i)) { + ++def; + } else { // We have found the shallowest level at which this row is null + break; + } + } + is_col_struct = (col.type().id() == type_id::STRUCT); + if (is_col_struct) { + col = col.child(0); + ++l; + } + } while (is_col_struct); + return def; + } +}; + +dremel_data get_dremel_data(column_view h_col, + // TODO(cp): use device_span once it is converted to a single hd_vec + rmm::device_uvector const& d_nullability, + std::vector const& nullability, + rmm::cuda_stream_view stream) +{ + auto get_list_level = [](column_view col) { + while (col.type().id() == type_id::STRUCT) { + col = col.child(0); + } + return col; + }; + + auto get_empties = [&](column_view col, size_type start, size_type end) { + auto lcv = lists_column_view(get_list_level(col)); + rmm::device_uvector empties_idx(lcv.size(), stream); + rmm::device_uvector empties(lcv.size(), stream); + auto d_off = lcv.offsets().data(); + + auto empties_idx_end = + thrust::copy_if(rmm::exec_policy(stream), + thrust::make_counting_iterator(start), + thrust::make_counting_iterator(end), + empties_idx.begin(), + [d_off] __device__(auto i) { return d_off[i] == d_off[i + 1]; }); + auto empties_end = thrust::gather(rmm::exec_policy(stream), + empties_idx.begin(), + empties_idx_end, + lcv.offsets().begin(), + empties.begin()); + + auto empties_size = empties_end - empties.begin(); + return std::make_tuple(std::move(empties), std::move(empties_idx), empties_size); + }; + + auto curr_col = h_col; + std::vector nesting_levels; + std::vector def_at_level; + std::vector start_at_sub_level; + uint8_t curr_nesting_level_idx = 0; + + auto add_def_at_level = [&](column_view col) { + // Add up all def level contributions in this column all the way till the first list column + // appears in the hierarchy or until we get to leaf + uint32_t def = 0; + start_at_sub_level.push_back(curr_nesting_level_idx); + while (col.type().id() == type_id::STRUCT) { + def += (nullability[curr_nesting_level_idx]) ? 1 : 0; + col = col.child(0); + ++curr_nesting_level_idx; + } + // At the end of all those structs is either a list column or the leaf. Leaf column contributes + // at least one def level. It doesn't matter what the leaf contributes because it'll be at the + // end of the exclusive scan. + def += (nullability[curr_nesting_level_idx]) ? 2 : 1; + def_at_level.push_back(def); + ++curr_nesting_level_idx; + }; + while (cudf::is_nested(curr_col.type())) { + nesting_levels.push_back(curr_col); + add_def_at_level(curr_col); + while (curr_col.type().id() == type_id::STRUCT) { + // Go down the hierarchy until we get to the LIST or the leaf level + curr_col = curr_col.child(0); + } + if (curr_col.type().id() == type_id::LIST) { + curr_col = curr_col.child(lists_column_view::child_column_index); + if (not is_nested(curr_col.type())) { + // Special case: when the leaf data column is the immediate child of the list col then we + // want it to be included right away. Otherwise the struct containing it will be included in + // the next iteration of this loop. + nesting_levels.push_back(curr_col); + add_def_at_level(curr_col); + break; + } + } + } + + std::unique_ptr device_view_owners; + column_device_view* d_nesting_levels; + std::tie(device_view_owners, d_nesting_levels) = + contiguous_copy_column_device_views(nesting_levels, stream); + + thrust::exclusive_scan( + thrust::host, def_at_level.begin(), def_at_level.end(), def_at_level.begin()); + + // Sliced list column views only have offsets applied to top level. Get offsets for each level. + rmm::device_uvector d_column_offsets(nesting_levels.size(), stream); + rmm::device_uvector d_column_ends(nesting_levels.size(), stream); + + auto d_col = column_device_view::create(h_col, stream); + cudf::detail::device_single_thread( + [offset_at_level = d_column_offsets.data(), + end_idx_at_level = d_column_ends.data(), + col = *d_col] __device__() { + auto curr_col = col; + size_type off = curr_col.offset(); + size_type end = off + curr_col.size(); + size_type level = 0; + offset_at_level[level] = off; + end_idx_at_level[level] = end; + ++level; + // Apply offset recursively until we get to leaf data + // Skip doing the following for any structs we encounter in between. + while (curr_col.type().id() == type_id::LIST or curr_col.type().id() == type_id::STRUCT) { + if (curr_col.type().id() == type_id::LIST) { + off = curr_col.child(lists_column_view::offsets_column_index).element(off); + end = curr_col.child(lists_column_view::offsets_column_index).element(end); + offset_at_level[level] = off; + end_idx_at_level[level] = end; + ++level; + curr_col = curr_col.child(lists_column_view::child_column_index); + } else { + curr_col = curr_col.child(0); + } + } + }, + stream); + + thrust::host_vector column_offsets = + cudf::detail::make_host_vector_async(d_column_offsets, stream); + thrust::host_vector column_ends = + cudf::detail::make_host_vector_async(d_column_ends, stream); + stream.synchronize(); + + size_t max_vals_size = 0; + for (size_t l = 0; l < column_offsets.size(); ++l) { + max_vals_size += column_ends[l] - column_offsets[l]; + } + + rmm::device_uvector rep_level(max_vals_size, stream); + rmm::device_uvector def_level(max_vals_size, stream); + + rmm::device_uvector temp_rep_vals(max_vals_size, stream); + rmm::device_uvector temp_def_vals(max_vals_size, stream); + rmm::device_uvector new_offsets(0, stream); + size_type curr_rep_values_size = 0; + { + // At this point, curr_col contains the leaf column. Max nesting level is + // nesting_levels.size(). + + // We are going to start by merging the last column in nesting_levels (the leaf, which is at the + // index `nesting_levels.size() - 1`) with the second-to-last (which is at + // `nesting_levels.size() - 2`). + size_t level = nesting_levels.size() - 2; + curr_col = nesting_levels[level]; + auto lcv = lists_column_view(get_list_level(curr_col)); + auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; + + // Get empties at this level + rmm::device_uvector empties(0, stream); + rmm::device_uvector empties_idx(0, stream); + size_t empties_size; + std::tie(empties, empties_idx, empties_size) = + get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); + + // Merge empty at deepest parent level with the rep, def level vals at leaf level + + auto input_parent_rep_it = thrust::make_constant_iterator(level); + auto input_parent_def_it = + thrust::make_transform_iterator(empties_idx.begin(), + def_level_fn{d_nesting_levels + level, + d_nullability.data(), + start_at_sub_level[level], + def_at_level[level]}); + + // `nesting_levels.size()` == no of list levels + leaf. Max repetition level = no of list levels + auto input_child_rep_it = thrust::make_constant_iterator(nesting_levels.size() - 1); + auto input_child_def_it = + thrust::make_transform_iterator(thrust::make_counting_iterator(column_offsets[level + 1]), + def_level_fn{d_nesting_levels + level + 1, + d_nullability.data(), + start_at_sub_level[level + 1], + def_at_level[level + 1]}); + + // Zip the input and output value iterators so that merge operation is done only once + auto input_parent_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); + + auto input_child_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(input_child_rep_it, input_child_def_it)); + + auto output_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); + + auto ends = thrust::merge_by_key(rmm::exec_policy(stream), + empties.begin(), + empties.begin() + empties_size, + thrust::make_counting_iterator(column_offsets[level + 1]), + thrust::make_counting_iterator(column_ends[level + 1]), + input_parent_zip_it, + input_child_zip_it, + thrust::make_discard_iterator(), + output_zip_it); + + curr_rep_values_size = ends.second - output_zip_it; + + // Scan to get distance by which each offset value is shifted due to the insertion of empties + auto scan_it = cudf::detail::make_counting_transform_iterator( + column_offsets[level], + [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( + auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); + rmm::device_uvector scan_out(offset_size_at_level, stream); + thrust::exclusive_scan( + rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); + + // Add scan output to existing offsets to get new offsets into merged rep level values + new_offsets = rmm::device_uvector(offset_size_at_level, stream); + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + offset_size_at_level, + [off = lcv.offsets().data() + column_offsets[level], + scan_out = scan_out.data(), + new_off = new_offsets.data()] __device__(auto i) { + new_off[i] = off[i] - off[0] + scan_out[i]; + }); + + // Set rep level values at level starts to appropriate rep level + auto scatter_it = thrust::make_constant_iterator(level); + thrust::scatter(rmm::exec_policy(stream), + scatter_it, + scatter_it + new_offsets.size() - 1, + new_offsets.begin(), + rep_level.begin()); + } + + // Having already merged the last two levels, we are now going to merge the result with the + // third-last level which is at index `nesting_levels.size() - 3`. + for (int level = nesting_levels.size() - 3; level >= 0; level--) { + curr_col = nesting_levels[level]; + auto lcv = lists_column_view(get_list_level(curr_col)); + auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; + + // Get empties at this level + rmm::device_uvector empties(0, stream); + rmm::device_uvector empties_idx(0, stream); + size_t empties_size; + std::tie(empties, empties_idx, empties_size) = + get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); + + auto offset_transformer = [new_child_offsets = new_offsets.data(), + child_start = column_offsets[level + 1]] __device__(auto x) { + return new_child_offsets[x - child_start]; // (x - child's offset) + }; + + // We will be reading from old rep_levels and writing again to rep_levels. Swap the current + // rep values into temp_rep_vals so it can become the input and rep_levels can again be output. + std::swap(temp_rep_vals, rep_level); + std::swap(temp_def_vals, def_level); + + // Merge empty at parent level with the rep, def level vals at current level + auto transformed_empties = thrust::make_transform_iterator(empties.begin(), offset_transformer); + + auto input_parent_rep_it = thrust::make_constant_iterator(level); + auto input_parent_def_it = + thrust::make_transform_iterator(empties_idx.begin(), + def_level_fn{d_nesting_levels + level, + d_nullability.data(), + start_at_sub_level[level], + def_at_level[level]}); + + // Zip the input and output value iterators so that merge operation is done only once + auto input_parent_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); + + auto input_child_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(temp_rep_vals.begin(), temp_def_vals.begin())); + + auto output_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); + + auto ends = thrust::merge_by_key(rmm::exec_policy(stream), + transformed_empties, + transformed_empties + empties_size, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(curr_rep_values_size), + input_parent_zip_it, + input_child_zip_it, + thrust::make_discard_iterator(), + output_zip_it); + + curr_rep_values_size = ends.second - output_zip_it; + + // Scan to get distance by which each offset value is shifted due to the insertion of dremel + // level value fof an empty list + auto scan_it = cudf::detail::make_counting_transform_iterator( + column_offsets[level], + [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( + auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); + rmm::device_uvector scan_out(offset_size_at_level, stream); + thrust::exclusive_scan( + rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); + + // Add scan output to existing offsets to get new offsets into merged rep level values + rmm::device_uvector temp_new_offsets(offset_size_at_level, stream); + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + offset_size_at_level, + [off = lcv.offsets().data() + column_offsets[level], + scan_out = scan_out.data(), + new_off = temp_new_offsets.data(), + offset_transformer] __device__(auto i) { + new_off[i] = offset_transformer(off[i]) + scan_out[i]; + }); + new_offsets = std::move(temp_new_offsets); + + // Set rep level values at level starts to appropriate rep level + auto scatter_it = thrust::make_constant_iterator(level); + thrust::scatter(rmm::exec_policy(stream), + scatter_it, + scatter_it + new_offsets.size() - 1, + new_offsets.begin(), + rep_level.begin()); + } + + size_t level_vals_size = new_offsets.back_element(stream); + rep_level.resize(level_vals_size, stream); + def_level.resize(level_vals_size, stream); + + stream.synchronize(); + + size_type leaf_data_size = column_ends.back() - column_offsets.back(); + + return dremel_data{ + std::move(new_offsets), std::move(rep_level), std::move(def_level), leaf_data_size}; +} + +auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) +{ + std::vector dremel_data; + std::vector max_def_levels; + for (auto const& col : table) { + if (col.type().id() == type_id::LIST) { + // Check nullability of the list + std::vector nullability; + auto cur_col = col; + uint8_t max_def_level = 0; + while (cur_col.type().id() == type_id::LIST) { + max_def_level += (cur_col.nullable() ? 2 : 1); + nullability.push_back(static_cast(cur_col.nullable())); + cur_col = cur_col.child(lists_column_view::child_column_index); + } + max_def_level += (cur_col.nullable() ? 1 : 0); + nullability.push_back(static_cast(cur_col.nullable())); + auto d_nullability = detail::make_device_uvector_async(nullability, stream); + dremel_data.push_back(get_dremel_data(col, d_nullability, nullability, stream)); + max_def_levels.push_back(max_def_level); + } else { + max_def_levels.push_back(0); + } + } + + std::vector dremel_offsets; + std::vector rep_levels; + std::vector def_levels; + size_type c = 0; + for (auto const& col : table) { + if (col.type().id() == type_id::LIST) { + dremel_offsets.push_back(dremel_data[c].dremel_offsets.data()); + rep_levels.push_back(dremel_data[c].rep_level.data()); + def_levels.push_back(dremel_data[c].def_level.data()); + ++c; + } else { + dremel_offsets.push_back(nullptr); + rep_levels.push_back(nullptr); + def_levels.push_back(nullptr); + } + } + auto d_dremel_offsets = detail::make_device_uvector_async(dremel_offsets, stream); + auto d_rep_levels = detail::make_device_uvector_async(rep_levels, stream); + auto d_def_levels = detail::make_device_uvector_async(def_levels, stream); + auto d_max_def_levels = detail::make_device_uvector_async(max_def_levels, stream); + return std::make_tuple(std::move(dremel_data), + std::move(d_dremel_offsets), + std::move(d_rep_levels), + std::move(d_def_levels), + std::move(d_max_def_levels)); +} + +void check_lex_compatibility(table_view const& input) +{ + // Basically check if there's any LIST of STRUCT or STRUCT of LIST hiding anywhere in the table + std::function check_column = [&](column_view const& c) { + if (c.type().id() == type_id::LIST) { + CUDF_EXPECTS(c.child(lists_column_view::child_column_index).type().id() != type_id::STRUCT, + "List of structs are not supported"); + } + for (int i = 0; i < c.num_children(); ++i) { + if (c.type().id() == type_id::STRUCT) { + CUDF_EXPECTS(c.child(i).type().id() != type_id::LIST, "Struct of Lists are not supported"); + } + check_column(c.child(i)); + } + }; + for (column_view const& c : input) { + check_column(c); + } +} + +void check_shape_compatibility(table_view const& lhs, table_view const& rhs) +{ + std::function check_column = + [&](column_view const& l, column_view const& r) { + CUDF_EXPECTS(l.type().id() == r.type().id(), + "Cannot compare tables with different column types"); + CUDF_EXPECTS(l.num_children() == r.num_children(), "Mismatched number of children"); + for (size_type i = 0; i < l.num_children(); ++i) { + check_column(l.child(i), r.child(i)); + } + }; + + CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), + "Cannot compare tables with different number of columns"); + for (size_type i = 0; i < lhs.num_columns(); ++i) { + check_column(lhs.column(i), rhs.column(i)); + } +} + +} // namespace + +row_lex_operator::row_lex_operator(table_view const& t, + host_span column_order, + host_span null_precedence, + rmm::cuda_stream_view stream) + : d_column_order(0, stream), + d_null_precedence(0, stream), + d_depths(0, stream), + d_dremel_offsets(0, stream), + d_rep_levels(0, stream), + d_def_levels(0, stream), + d_max_def_levels(0, stream), + any_nulls(has_nested_nulls(t)) +{ + check_lex_compatibility(t); + auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = + struct_lex_verticalize(t, column_order, null_precedence); + + std::tie(dremel_data, d_dremel_offsets, d_rep_levels, d_def_levels, d_max_def_levels) = + list_lex_preprocess(verticalized_lhs, stream); + + d_lhs = + std::make_unique(table_device_view::create(verticalized_lhs, stream)); + + d_column_order = detail::make_device_uvector_async(new_column_order, stream); + d_null_precedence = detail::make_device_uvector_async(new_null_precedence, stream); + d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); +} + +row_lex_operator::row_lex_operator(table_view const& lhs, + table_view const& rhs, + host_span column_order, + host_span null_precedence, + rmm::cuda_stream_view stream) + : row_lex_operator(lhs, column_order, null_precedence, stream) +{ + check_lex_compatibility(rhs); + check_shape_compatibility(lhs, rhs); + + table_view verticalized_rhs; + std::tie(verticalized_rhs, std::ignore, std::ignore, std::ignore) = struct_lex_verticalize(rhs); + + d_rhs = + std::make_unique(table_device_view::create(verticalized_rhs, stream)); + + any_nulls |= has_nested_nulls(rhs); +} + +} // namespace experimental +} // namespace cudf From 53f4418ff755c45dd53545dc4d54c2cd02f0edd4 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Tue, 14 Jun 2022 22:44:43 +0530 Subject: [PATCH 25/78] Move list lex code to experimental header --- .../cudf/table/experimental/row_operators.cuh | 184 +++++++- cpp/src/table/row_operators.cu | 429 +++++++++++++++++- 2 files changed, 593 insertions(+), 20 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index cee117247b3..620251661f0 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -71,6 +71,21 @@ struct dispatch_void_if_nested { using type = std::conditional_t>; }; +inline size_type __device__ row_to_value_idx(size_type idx, column_device_view col) +{ + while (col.type().id() == type_id::LIST or col.type().id() == type_id::STRUCT) { + if (col.type().id() == type_id::STRUCT) { + idx += col.offset(); + col = col.child(0); + } else { + auto offset_col = col.child(lists_column_view::offsets_column_index); + idx = offset_col.element(idx + col.offset()); + col = col.child(lists_column_view::child_column_index); + } + } + return idx; +} + namespace row { enum class lhs_index_type : size_type {}; @@ -246,19 +261,28 @@ class device_row_comparator { * `null_order::BEFORE` for all columns. * @param comparator Physical element relational comparison functor. */ - device_row_comparator(Nullate check_nulls, - table_device_view lhs, - table_device_view rhs, - std::optional> depth = std::nullopt, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt, - PhysicalElementComparator comparator = {}) noexcept + device_row_comparator( + Nullate check_nulls, + table_device_view lhs, + table_device_view rhs, + std::optional> depth = std::nullopt, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt, + PhysicalElementComparator comparator = {}, + std::optional> dremel_offsets = std::nullopt, + std::optional> rep_levels = std::nullopt, + std::optional> def_levels = std::nullopt, + std::optional> max_def_levels = std::nullopt) noexcept : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, _depth{depth}, _column_order{column_order}, _null_precedence{null_precedence}, + _dremel_offsets{dremel_offsets}, + _rep_levels{rep_levels}, + _def_levels{def_levels}, + _max_def_levels{max_def_levels}, _comparator{comparator} { } @@ -287,12 +311,20 @@ class device_row_comparator { column_device_view rhs, null_order null_precedence = null_order::BEFORE, int depth = 0, - PhysicalElementComparator comparator = {}) + PhysicalElementComparator comparator = {}, + size_type* dremel_offsets = nullptr, + uint8_t* rep_level = nullptr, + uint8_t* def_level = nullptr, + uint8_t max_def_level = 0) : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, _null_precedence{null_precedence}, _depth{depth}, + dremel_offsets{dremel_offsets}, + rep_level{rep_level}, + def_level{def_level}, + max_def_level{max_def_level}, _comparator{comparator} { } @@ -366,12 +398,61 @@ class device_row_comparator { rhs_element_index); } + template () and + std::is_same_v)> + __device__ cuda::std::pair operator()(size_type lhs_element_index, + size_type rhs_element_index) + { + auto l_start = dremel_offsets[lhs_element_index]; + auto l_end = dremel_offsets[lhs_element_index + 1]; + auto r_start = dremel_offsets[rhs_element_index]; + auto r_end = dremel_offsets[rhs_element_index + 1]; + auto lc_start = row_to_value_idx(lhs_element_index, _lhs); + auto rc_start = row_to_value_idx(rhs_element_index, _rhs); + column_device_view lcol = _lhs; + column_device_view rcol = _rhs; + while (lcol.type().id() == type_id::LIST) { + lcol = lcol.child(lists_column_view::child_column_index); + rcol = rcol.child(lists_column_view::child_column_index); + } + weak_ordering state{weak_ordering::EQUIVALENT}; + for (int i = l_start, j = r_start, m = lc_start, n = rc_start; i < l_end and j < r_end; + ++i, ++j) { + if (def_level[i] != def_level[j]) { + state = (def_level[i] < def_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; + return cuda::std::pair(state, _depth); + } + if (rep_level[i] != rep_level[j]) { + state = (rep_level[i] < rep_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; + return cuda::std::pair(state, _depth); + } + if (def_level[i] == max_def_level) { + auto comparator = element_comparator{_check_nulls, lcol, rcol, _null_precedence}; + int last_null_depth = _depth; + cuda::std::tie(state, last_null_depth) = + cudf::type_dispatcher(lcol.type(), comparator, m, n); + if (state != weak_ordering::EQUIVALENT) { return cuda::std::pair(state, _depth); } + ++m; + ++n; + } + } + state = (l_end - l_start < r_end - r_start) ? weak_ordering::LESS + : (l_end - l_start > r_end - r_start) ? weak_ordering::GREATER + : weak_ordering::EQUIVALENT; + return cuda::std::pair(state, _depth); + } + private: column_device_view const _lhs; column_device_view const _rhs; Nullate const _check_nulls; null_order const _null_precedence; int const _depth; + size_type* dremel_offsets; + uint8_t* rep_level; + uint8_t* def_level; + uint8_t max_def_level{0}; PhysicalElementComparator const _comparator; }; @@ -399,13 +480,12 @@ class device_row_comparator { null_order const null_precedence = _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE; + auto element_comp = element_comparator{ + _check_nulls, _lhs.column(i), _rhs.column(i), null_precedence, depth, _comparator}; + weak_ordering state; - cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher( - _lhs.column(i).type(), - element_comparator{ - _check_nulls, _lhs.column(i), _rhs.column(i), null_precedence, depth, _comparator}, - lhs_index, - rhs_index); + cuda::std::tie(state, last_null_depth) = + cudf::type_dispatcher(_lhs.column(i).type(), element_comp, lhs_index, rhs_index); if (state == weak_ordering::EQUIVALENT) { continue; } @@ -424,6 +504,12 @@ class device_row_comparator { std::optional> const _column_order; std::optional> const _null_precedence; PhysicalElementComparator const _comparator; + + // List related members + std::optional> _dremel_offsets; + std::optional> _rep_levels; + std::optional> _def_levels; + std::optional> _max_def_levels; }; // class device_row_comparator /** @@ -493,6 +579,19 @@ struct less_equivalent_comparator } }; +/** + * @brief Dremel data that describes one nested type column + * + * @see get_dremel_data() + */ +struct dremel_data { + rmm::device_uvector dremel_offsets; + rmm::device_uvector rep_level; + rmm::device_uvector def_level; + + size_type leaf_data_size; +}; + /** * @brief Preprocessed table for use with lexicographical comparison * @@ -530,11 +629,20 @@ struct preprocessed_table { preprocessed_table(table_device_view_owner&& table, rmm::device_uvector&& column_order, rmm::device_uvector&& null_precedence, - rmm::device_uvector&& depths) + rmm::device_uvector&& depths, + std::vector&& dremel_data, + rmm::device_uvector&& dremel_offsets, + rmm::device_uvector&& rep_levels, + rmm::device_uvector&& def_levels, + rmm::device_uvector&& max_def_levels) : _t(std::move(table)), _column_order(std::move(column_order)), _null_precedence(std::move(null_precedence)), - _depths(std::move(depths)){}; + _depths(std::move(depths)), + _dremel_offsets(std::move(dremel_offsets)), + _rep_levels(std::move(rep_levels)), + _def_levels(std::move(def_levels)), + _max_def_levels(std::move(max_def_levels)){}; /** * @brief Implicit conversion operator to a `table_device_view` of the preprocessed table. @@ -583,11 +691,42 @@ struct preprocessed_table { return _depths.size() ? std::optional>(_depths) : std::nullopt; } + [[nodiscard]] std::optional> dremel_offsets() const + { + return _dremel_offsets.size() ? std::optional>(_dremel_offsets) + : std::nullopt; + } + + [[nodiscard]] std::optional> rep_levels() const + { + return _rep_levels.size() ? std::optional>(_rep_levels) + : std::nullopt; + } + + [[nodiscard]] std::optional> def_levels() const + { + return _def_levels.size() ? std::optional>(_def_levels) + : std::nullopt; + } + + [[nodiscard]] std::optional> max_def_levels() const + { + return _max_def_levels.size() ? std::optional>(_max_def_levels) + : std::nullopt; + } + private: table_device_view_owner const _t; rmm::device_uvector const _column_order; rmm::device_uvector const _null_precedence; rmm::device_uvector const _depths; + + // List related pre-computation + std::vector _dremel_data; + rmm::device_uvector _dremel_offsets; + rmm::device_uvector _rep_levels; + rmm::device_uvector _def_levels; + rmm::device_uvector _max_def_levels; }; /** @@ -657,8 +796,17 @@ class self_comparator { typename PhysicalElementComparator = sorting_physical_element_comparator> auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - return less_comparator{device_row_comparator{ - nullate, *d_t, *d_t, d_t->depths(), d_t->column_order(), d_t->null_precedence(), comparator}}; + return less_comparator{device_row_comparator{nullate, + *d_t, + *d_t, + d_t->depths(), + d_t->column_order(), + d_t->null_precedence(), + comparator, + d_t->dremel_offsets(), + d_t->rep_levels(), + d_t->def_levels(), + d_t->max_def_levels()}}; } /// @copydoc less() diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index f6ca9a04e6d..e81919cee0c 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -14,6 +14,10 @@ * limitations under the License. */ +#include "cudf/detail/utilities/cuda.cuh" +#include "rmm/exec_policy.hpp" +#include "thrust/gather.h" +#include "thrust/iterator/discard_iterator.h" #include #include #include @@ -253,6 +257,417 @@ auto decompose_structs(table_view table, std::move(verticalized_col_depths)); } +struct def_level_fn { + column_device_view const* parent_col; + uint8_t const* d_nullability; + uint8_t sub_level_start; + uint8_t curr_def_level; + + __device__ uint32_t operator()(size_type i) + { + uint32_t def = curr_def_level; + uint8_t l = sub_level_start; + bool is_col_struct = false; + auto col = *parent_col; + do { + // If col not nullable then it does not contribute to def levels + if (d_nullability[l]) { + if (not col.nullable() or bit_is_set(col.null_mask(), i)) { + ++def; + } else { // We have found the shallowest level at which this row is null + break; + } + } + is_col_struct = (col.type().id() == type_id::STRUCT); + if (is_col_struct) { + col = col.child(0); + ++l; + } + } while (is_col_struct); + return def; + } +}; + +row::lexicographic::dremel_data get_dremel_data( + column_view h_col, + // TODO(cp): use device_span once it is converted to a single hd_vec + rmm::device_uvector const& d_nullability, + std::vector const& nullability, + rmm::cuda_stream_view stream) +{ + auto get_list_level = [](column_view col) { + while (col.type().id() == type_id::STRUCT) { + col = col.child(0); + } + return col; + }; + + auto get_empties = [&](column_view col, size_type start, size_type end) { + auto lcv = lists_column_view(get_list_level(col)); + rmm::device_uvector empties_idx(lcv.size(), stream); + rmm::device_uvector empties(lcv.size(), stream); + auto d_off = lcv.offsets().data(); + + auto empties_idx_end = + thrust::copy_if(rmm::exec_policy(stream), + thrust::make_counting_iterator(start), + thrust::make_counting_iterator(end), + empties_idx.begin(), + [d_off] __device__(auto i) { return d_off[i] == d_off[i + 1]; }); + auto empties_end = thrust::gather(rmm::exec_policy(stream), + empties_idx.begin(), + empties_idx_end, + lcv.offsets().begin(), + empties.begin()); + + auto empties_size = empties_end - empties.begin(); + return std::make_tuple(std::move(empties), std::move(empties_idx), empties_size); + }; + + auto curr_col = h_col; + std::vector nesting_levels; + std::vector def_at_level; + std::vector start_at_sub_level; + uint8_t curr_nesting_level_idx = 0; + + auto add_def_at_level = [&](column_view col) { + // Add up all def level contributions in this column all the way till the first list column + // appears in the hierarchy or until we get to leaf + uint32_t def = 0; + start_at_sub_level.push_back(curr_nesting_level_idx); + while (col.type().id() == type_id::STRUCT) { + def += (nullability[curr_nesting_level_idx]) ? 1 : 0; + col = col.child(0); + ++curr_nesting_level_idx; + } + // At the end of all those structs is either a list column or the leaf. Leaf column contributes + // at least one def level. It doesn't matter what the leaf contributes because it'll be at the + // end of the exclusive scan. + def += (nullability[curr_nesting_level_idx]) ? 2 : 1; + def_at_level.push_back(def); + ++curr_nesting_level_idx; + }; + while (cudf::is_nested(curr_col.type())) { + nesting_levels.push_back(curr_col); + add_def_at_level(curr_col); + while (curr_col.type().id() == type_id::STRUCT) { + // Go down the hierarchy until we get to the LIST or the leaf level + curr_col = curr_col.child(0); + } + if (curr_col.type().id() == type_id::LIST) { + curr_col = curr_col.child(lists_column_view::child_column_index); + if (not is_nested(curr_col.type())) { + // Special case: when the leaf data column is the immediate child of the list col then we + // want it to be included right away. Otherwise the struct containing it will be included in + // the next iteration of this loop. + nesting_levels.push_back(curr_col); + add_def_at_level(curr_col); + break; + } + } + } + + std::unique_ptr device_view_owners; + column_device_view* d_nesting_levels; + std::tie(device_view_owners, d_nesting_levels) = + contiguous_copy_column_device_views(nesting_levels, stream); + + thrust::exclusive_scan( + thrust::host, def_at_level.begin(), def_at_level.end(), def_at_level.begin()); + + // Sliced list column views only have offsets applied to top level. Get offsets for each level. + rmm::device_uvector d_column_offsets(nesting_levels.size(), stream); + rmm::device_uvector d_column_ends(nesting_levels.size(), stream); + + auto d_col = column_device_view::create(h_col, stream); + cudf::detail::device_single_thread( + [offset_at_level = d_column_offsets.data(), + end_idx_at_level = d_column_ends.data(), + col = *d_col] __device__() { + auto curr_col = col; + size_type off = curr_col.offset(); + size_type end = off + curr_col.size(); + size_type level = 0; + offset_at_level[level] = off; + end_idx_at_level[level] = end; + ++level; + // Apply offset recursively until we get to leaf data + // Skip doing the following for any structs we encounter in between. + while (curr_col.type().id() == type_id::LIST or curr_col.type().id() == type_id::STRUCT) { + if (curr_col.type().id() == type_id::LIST) { + off = curr_col.child(lists_column_view::offsets_column_index).element(off); + end = curr_col.child(lists_column_view::offsets_column_index).element(end); + offset_at_level[level] = off; + end_idx_at_level[level] = end; + ++level; + curr_col = curr_col.child(lists_column_view::child_column_index); + } else { + curr_col = curr_col.child(0); + } + } + }, + stream); + + thrust::host_vector column_offsets = + cudf::detail::make_host_vector_async(d_column_offsets, stream); + thrust::host_vector column_ends = + cudf::detail::make_host_vector_async(d_column_ends, stream); + stream.synchronize(); + + size_t max_vals_size = 0; + for (size_t l = 0; l < column_offsets.size(); ++l) { + max_vals_size += column_ends[l] - column_offsets[l]; + } + + rmm::device_uvector rep_level(max_vals_size, stream); + rmm::device_uvector def_level(max_vals_size, stream); + + rmm::device_uvector temp_rep_vals(max_vals_size, stream); + rmm::device_uvector temp_def_vals(max_vals_size, stream); + rmm::device_uvector new_offsets(0, stream); + size_type curr_rep_values_size = 0; + { + // At this point, curr_col contains the leaf column. Max nesting level is + // nesting_levels.size(). + + // We are going to start by merging the last column in nesting_levels (the leaf, which is at the + // index `nesting_levels.size() - 1`) with the second-to-last (which is at + // `nesting_levels.size() - 2`). + size_t level = nesting_levels.size() - 2; + curr_col = nesting_levels[level]; + auto lcv = lists_column_view(get_list_level(curr_col)); + auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; + + // Get empties at this level + rmm::device_uvector empties(0, stream); + rmm::device_uvector empties_idx(0, stream); + size_t empties_size; + std::tie(empties, empties_idx, empties_size) = + get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); + + // Merge empty at deepest parent level with the rep, def level vals at leaf level + + auto input_parent_rep_it = thrust::make_constant_iterator(level); + auto input_parent_def_it = + thrust::make_transform_iterator(empties_idx.begin(), + def_level_fn{d_nesting_levels + level, + d_nullability.data(), + start_at_sub_level[level], + def_at_level[level]}); + + // `nesting_levels.size()` == no of list levels + leaf. Max repetition level = no of list levels + auto input_child_rep_it = thrust::make_constant_iterator(nesting_levels.size() - 1); + auto input_child_def_it = + thrust::make_transform_iterator(thrust::make_counting_iterator(column_offsets[level + 1]), + def_level_fn{d_nesting_levels + level + 1, + d_nullability.data(), + start_at_sub_level[level + 1], + def_at_level[level + 1]}); + + // Zip the input and output value iterators so that merge operation is done only once + auto input_parent_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); + + auto input_child_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(input_child_rep_it, input_child_def_it)); + + auto output_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); + + auto ends = thrust::merge_by_key(rmm::exec_policy(stream), + empties.begin(), + empties.begin() + empties_size, + thrust::make_counting_iterator(column_offsets[level + 1]), + thrust::make_counting_iterator(column_ends[level + 1]), + input_parent_zip_it, + input_child_zip_it, + thrust::make_discard_iterator(), + output_zip_it); + + curr_rep_values_size = ends.second - output_zip_it; + + // Scan to get distance by which each offset value is shifted due to the insertion of empties + auto scan_it = cudf::detail::make_counting_transform_iterator( + column_offsets[level], + [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( + auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); + rmm::device_uvector scan_out(offset_size_at_level, stream); + thrust::exclusive_scan( + rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); + + // Add scan output to existing offsets to get new offsets into merged rep level values + new_offsets = rmm::device_uvector(offset_size_at_level, stream); + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + offset_size_at_level, + [off = lcv.offsets().data() + column_offsets[level], + scan_out = scan_out.data(), + new_off = new_offsets.data()] __device__(auto i) { + new_off[i] = off[i] - off[0] + scan_out[i]; + }); + + // Set rep level values at level starts to appropriate rep level + auto scatter_it = thrust::make_constant_iterator(level); + thrust::scatter(rmm::exec_policy(stream), + scatter_it, + scatter_it + new_offsets.size() - 1, + new_offsets.begin(), + rep_level.begin()); + } + + // Having already merged the last two levels, we are now going to merge the result with the + // third-last level which is at index `nesting_levels.size() - 3`. + for (int level = nesting_levels.size() - 3; level >= 0; level--) { + curr_col = nesting_levels[level]; + auto lcv = lists_column_view(get_list_level(curr_col)); + auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; + + // Get empties at this level + rmm::device_uvector empties(0, stream); + rmm::device_uvector empties_idx(0, stream); + size_t empties_size; + std::tie(empties, empties_idx, empties_size) = + get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); + + auto offset_transformer = [new_child_offsets = new_offsets.data(), + child_start = column_offsets[level + 1]] __device__(auto x) { + return new_child_offsets[x - child_start]; // (x - child's offset) + }; + + // We will be reading from old rep_levels and writing again to rep_levels. Swap the current + // rep values into temp_rep_vals so it can become the input and rep_levels can again be output. + std::swap(temp_rep_vals, rep_level); + std::swap(temp_def_vals, def_level); + + // Merge empty at parent level with the rep, def level vals at current level + auto transformed_empties = thrust::make_transform_iterator(empties.begin(), offset_transformer); + + auto input_parent_rep_it = thrust::make_constant_iterator(level); + auto input_parent_def_it = + thrust::make_transform_iterator(empties_idx.begin(), + def_level_fn{d_nesting_levels + level, + d_nullability.data(), + start_at_sub_level[level], + def_at_level[level]}); + + // Zip the input and output value iterators so that merge operation is done only once + auto input_parent_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); + + auto input_child_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(temp_rep_vals.begin(), temp_def_vals.begin())); + + auto output_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); + + auto ends = thrust::merge_by_key(rmm::exec_policy(stream), + transformed_empties, + transformed_empties + empties_size, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(curr_rep_values_size), + input_parent_zip_it, + input_child_zip_it, + thrust::make_discard_iterator(), + output_zip_it); + + curr_rep_values_size = ends.second - output_zip_it; + + // Scan to get distance by which each offset value is shifted due to the insertion of dremel + // level value fof an empty list + auto scan_it = cudf::detail::make_counting_transform_iterator( + column_offsets[level], + [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( + auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); + rmm::device_uvector scan_out(offset_size_at_level, stream); + thrust::exclusive_scan( + rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); + + // Add scan output to existing offsets to get new offsets into merged rep level values + rmm::device_uvector temp_new_offsets(offset_size_at_level, stream); + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + offset_size_at_level, + [off = lcv.offsets().data() + column_offsets[level], + scan_out = scan_out.data(), + new_off = temp_new_offsets.data(), + offset_transformer] __device__(auto i) { + new_off[i] = offset_transformer(off[i]) + scan_out[i]; + }); + new_offsets = std::move(temp_new_offsets); + + // Set rep level values at level starts to appropriate rep level + auto scatter_it = thrust::make_constant_iterator(level); + thrust::scatter(rmm::exec_policy(stream), + scatter_it, + scatter_it + new_offsets.size() - 1, + new_offsets.begin(), + rep_level.begin()); + } + + size_t level_vals_size = new_offsets.back_element(stream); + rep_level.resize(level_vals_size, stream); + def_level.resize(level_vals_size, stream); + + stream.synchronize(); + + size_type leaf_data_size = column_ends.back() - column_offsets.back(); + + return row::lexicographic::dremel_data{ + std::move(new_offsets), std::move(rep_level), std::move(def_level), leaf_data_size}; +} + +auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) +{ + std::vector dremel_data; + std::vector max_def_levels; + for (auto const& col : table) { + if (col.type().id() == type_id::LIST) { + // Check nullability of the list + std::vector nullability; + auto cur_col = col; + uint8_t max_def_level = 0; + while (cur_col.type().id() == type_id::LIST) { + max_def_level += (cur_col.nullable() ? 2 : 1); + nullability.push_back(static_cast(cur_col.nullable())); + cur_col = cur_col.child(lists_column_view::child_column_index); + } + max_def_level += (cur_col.nullable() ? 1 : 0); + nullability.push_back(static_cast(cur_col.nullable())); + auto d_nullability = detail::make_device_uvector_async(nullability, stream); + dremel_data.push_back(get_dremel_data(col, d_nullability, nullability, stream)); + max_def_levels.push_back(max_def_level); + } else { + max_def_levels.push_back(0); + } + } + + std::vector dremel_offsets; + std::vector rep_levels; + std::vector def_levels; + size_type c = 0; + for (auto const& col : table) { + if (col.type().id() == type_id::LIST) { + dremel_offsets.push_back(dremel_data[c].dremel_offsets.data()); + rep_levels.push_back(dremel_data[c].rep_level.data()); + def_levels.push_back(dremel_data[c].def_level.data()); + ++c; + } else { + dremel_offsets.push_back(nullptr); + rep_levels.push_back(nullptr); + def_levels.push_back(nullptr); + } + } + auto d_dremel_offsets = detail::make_device_uvector_async(dremel_offsets, stream); + auto d_rep_levels = detail::make_device_uvector_async(rep_levels, stream); + auto d_def_levels = detail::make_device_uvector_async(def_levels, stream); + auto d_max_def_levels = detail::make_device_uvector_async(max_def_levels, stream); + return std::make_tuple(std::move(dremel_data), + std::move(d_dremel_offsets), + std::move(d_rep_levels), + std::move(d_def_levels), + std::move(d_max_def_levels)); +} + using column_checker_fn_t = std::function; /** @@ -329,13 +744,23 @@ std::shared_ptr preprocessed_table::create( auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = decompose_structs(t, column_order, null_precedence); + auto [dremel_data, d_dremel_offsets, d_rep_levels, d_def_levels, d_max_def_levels] = + list_lex_preprocess(verticalized_lhs, stream); + auto d_t = table_device_view::create(verticalized_lhs, stream); auto d_column_order = detail::make_device_uvector_async(new_column_order, stream); auto d_null_precedence = detail::make_device_uvector_async(new_null_precedence, stream); auto d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); - return std::shared_ptr(new preprocessed_table( - std::move(d_t), std::move(d_column_order), std::move(d_null_precedence), std::move(d_depths))); + return std::shared_ptr(new preprocessed_table(std::move(d_t), + std::move(d_column_order), + std::move(d_null_precedence), + std::move(d_depths), + std::move(dremel_data), + std::move(d_dremel_offsets), + std::move(d_rep_levels), + std::move(d_def_levels), + std::move(d_max_def_levels))); } two_table_comparator::two_table_comparator(table_view const& left, From 0e528f088c008b2daa76df01c7d56f94a7c23490 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 15 Jun 2022 02:23:59 +0530 Subject: [PATCH 26/78] get list lex working on code ported to exp header --- .../cudf/table/experimental/row_operators.cuh | 13 +++++++++++-- cpp/src/table/row_operators.cu | 9 ++++++--- cpp/tests/sort/sort2_test.cpp | 2 +- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 620251661f0..2aee96b169f 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -480,8 +480,16 @@ class device_row_comparator { null_order const null_precedence = _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE; - auto element_comp = element_comparator{ - _check_nulls, _lhs.column(i), _rhs.column(i), null_precedence, depth, _comparator}; + auto element_comp = element_comparator{_check_nulls, + _lhs.column(i), + _rhs.column(i), + null_precedence, + depth, + _comparator, + (*_dremel_offsets)[i], + (*_rep_levels)[i], + (*_def_levels)[i], + (*_max_def_levels)[i]}; weak_ordering state; cuda::std::tie(state, last_null_depth) = @@ -691,6 +699,7 @@ struct preprocessed_table { return _depths.size() ? std::optional>(_depths) : std::nullopt; } + // TODO: span of spans? [[nodiscard]] std::optional> dremel_offsets() const { return _dremel_offsets.size() ? std::optional>(_dremel_offsets) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index e81919cee0c..689b68f819f 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -677,10 +677,13 @@ using column_checker_fn_t = std::function; */ void check_lex_compatibility(table_view const& input) { - // Basically check if there's any LIST hiding anywhere in the table + // Basically check if there's any LIST of STRUCT or STRUCT of LIST hiding anywhere in the table column_checker_fn_t check_column = [&](column_view const& c) { - CUDF_EXPECTS(c.type().id() != type_id::LIST, - "Cannot lexicographic compare a table with a LIST column"); + if (c.type().id() == type_id::LIST) { + CUDF_EXPECTS(c.child(lists_column_view::child_column_index).type().id() != type_id::STRUCT, + "Cannot lexicographic compare a table with a LIST of STRUCT column"); + } + // TODO: more copying of logic from row_operators2.cu if (not is_nested(c.type())) { CUDF_EXPECTS(is_relationally_comparable(c.type()), "Cannot lexicographic compare a table with a column of type " + diff --git a/cpp/tests/sort/sort2_test.cpp b/cpp/tests/sort/sort2_test.cpp index a20ce7308f4..f71d91375c5 100644 --- a/cpp/tests/sort/sort2_test.cpp +++ b/cpp/tests/sort/sort2_test.cpp @@ -230,7 +230,7 @@ TEST_F(NewRowOpTest, List) }; auto expect = cudf::test::fixed_width_column_wrapper{8, 6, 5, 3, 0, 1, 2, 4, 7}; - auto result = cudf::detail::experimental::sorted_order2(cudf::table_view({col})); + auto result = cudf::sorted_order(cudf::table_view({col})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result); } From f545af0ac3ecf973a69dee830fd20785379c0000 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 16 Jun 2022 02:22:38 +0530 Subject: [PATCH 27/78] Add null handling --- .../cudf/table/experimental/row_operators.cuh | 29 +++++++++- cpp/tests/sort/sort2_test.cpp | 20 ------- cpp/tests/sort/sort_test.cpp | 57 +++++++++++++++---- 3 files changed, 75 insertions(+), 31 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 2aee96b169f..fc4c3d7bc24 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -416,15 +416,36 @@ class device_row_comparator { lcol = lcol.child(lists_column_view::child_column_index); rcol = rcol.child(lists_column_view::child_column_index); } + printf("max_def_level: %d\n", max_def_level); + + printf("t: %d, lhs_element_index: %d, rhs_element_index: %d\n", + threadIdx.x, + lhs_element_index, + rhs_element_index); + printf("t: %d, l_start: %d, l_end: %d, r_start: %d, r_end: %d\n", + threadIdx.x, + l_start, + l_end, + r_start, + r_end); weak_ordering state{weak_ordering::EQUIVALENT}; for (int i = l_start, j = r_start, m = lc_start, n = rc_start; i < l_end and j < r_end; ++i, ++j) { + printf("t: %d, i: %d, j: %d, m: %d, n: %d\n", threadIdx.x, i, j, m, n); + printf("t: %d, def_l: %d, def_r: %d, rep_l: %d, rep_r: %d\n", + threadIdx.x, + def_level[i], + def_level[j], + rep_level[i], + rep_level[j]); if (def_level[i] != def_level[j]) { state = (def_level[i] < def_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; + printf("t: %d, def, state: %d\n", threadIdx.x, state); return cuda::std::pair(state, _depth); } if (rep_level[i] != rep_level[j]) { state = (rep_level[i] < rep_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; + printf("t: %d, rep, state: %d\n", threadIdx.x, state); return cuda::std::pair(state, _depth); } if (def_level[i] == max_def_level) { @@ -432,7 +453,13 @@ class device_row_comparator { int last_null_depth = _depth; cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher(lcol.type(), comparator, m, n); - if (state != weak_ordering::EQUIVALENT) { return cuda::std::pair(state, _depth); } + if (state != weak_ordering::EQUIVALENT) { + printf("t: %d, leaf, state: %d\n", threadIdx.x, state); + return cuda::std::pair(state, _depth); + } + ++m; + ++n; + } else if (lcol.nullable() and def_level[i] == max_def_level - 1) { ++m; ++n; } diff --git a/cpp/tests/sort/sort2_test.cpp b/cpp/tests/sort/sort2_test.cpp index f71d91375c5..f67791e9291 100644 --- a/cpp/tests/sort/sort2_test.cpp +++ b/cpp/tests/sort/sort2_test.cpp @@ -214,24 +214,4 @@ TEST_F(NewRowOpTest, SampleStructTest) cudf::test::expect_columns_equal(result1->view(), result2->view()); } -TEST_F(NewRowOpTest, List) -{ - using lcw = cudf::test::lists_column_wrapper; - lcw col{ - {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, - {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, - {{1, 2, 3}, {}, {4, 5}, {0, 6, 0}}, - {{1, 2}, {3}, {4, 5}, {0, 6, 0}}, - {{7, 8}, {}}, - lcw{lcw{}, lcw{}, lcw{}}, - lcw{lcw{}}, - {lcw{10}}, - lcw{}, - }; - - auto expect = cudf::test::fixed_width_column_wrapper{8, 6, 5, 3, 0, 1, 2, 4, 7}; - auto result = cudf::sorted_order(cudf::table_view({col})); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result); -} - CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index 1dd7e21b821..c2c1467ba29 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -740,16 +741,52 @@ TYPED_TEST(Sort, ZeroSizedColumns) TYPED_TEST(Sort, WithListColumn) { - using T = int; - lists_column_wrapper lc{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; - CUDF_EXPECT_THROW_MESSAGE(cudf::sort(table_view({lc})), - "Cannot lexicographic compare a table with a LIST column"); - - std::vector> child_cols; - child_cols.push_back(lc.release()); - structs_column_wrapper sc{std::move(child_cols), {1, 0, 1}}; - CUDF_EXPECT_THROW_MESSAGE(cudf::sort(table_view({sc})), - "Cannot lexicographic compare a table with a LIST column"); + using T = TypeParam; + if (std::is_same_v) { GTEST_SKIP(); } + + using lcw = cudf::test::lists_column_wrapper; + lcw col{ + {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, + {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, + {{1, 2, 3}, {}, {4, 5}, {0, 6, 0}}, + {{1, 2}, {3}, {4, 5}, {0, 6, 0}}, + {{7, 8}, {}}, + lcw{lcw{}, lcw{}, lcw{}}, + lcw{lcw{}}, + {lcw{10}}, + lcw{}, + }; + + auto expect = cudf::test::fixed_width_column_wrapper{8, 6, 5, 3, 0, 1, 2, 4, 7}; + auto result = cudf::sorted_order(cudf::table_view({col})); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result); +} + +TYPED_TEST(Sort, WithNullableListColumn) +{ + using T = TypeParam; + if (std::is_same_v) { GTEST_SKIP(); } + + using lcw = cudf::test::lists_column_wrapper; + lcw col{ + {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, // 0 + {{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, cudf::test::iterators::nulls_at({3})}, // 1 + {{1, 2, 3}, {}, {4, 5}, {0, 6, 0}}, // 2 + {{1, 2}, {3}, {4, 5}, {0, 6, 0}}, // 3 + {{1, 2}, {3}, {4, 5}, {{0, 6, 0}, cudf::test::iterators::nulls_at({0})}}, // 4 + {{7, 8}, {}}, // 5 + lcw{lcw{}, lcw{}, lcw{}}, // 6 + lcw{lcw{}}, // 7 + {lcw{10}}, // 8 + lcw{}, // 9 + {{1, 2}, {3}, {4, 5}, {{0, 6, 0}, cudf::test::iterators::nulls_at({0, 2})}}, // 10 + {{1, 2}, {3}, {4, 5}, {{0, 7}, cudf::test::iterators::nulls_at({0})}}, // 11 + }; + + auto expect = + cudf::test::fixed_width_column_wrapper{9, 7, 6, 10, 4, 11, 3, 1, 0, 2, 5, 8}; + auto result = cudf::sorted_order(cudf::table_view({col})); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result); } struct SortByKey : public BaseFixture { From a4190a0faf2105e4dc651cf704f379da48470569 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 16 Jun 2022 15:37:31 +0530 Subject: [PATCH 28/78] handle empty lists --- .../cudf/table/experimental/row_operators.cuh | 25 +- cpp/src/table/row_operators.cu | 374 +----------------- cpp/tests/sort/sort_test.cpp | 19 + 3 files changed, 35 insertions(+), 383 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index fc4c3d7bc24..5f9fede1e4f 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -16,6 +16,8 @@ #pragma once +#include "io/parquet/parquet_gpu.hpp" + #include #include #include @@ -78,9 +80,9 @@ inline size_type __device__ row_to_value_idx(size_type idx, column_device_view c idx += col.offset(); col = col.child(0); } else { - auto offset_col = col.child(lists_column_view::offsets_column_index); - idx = offset_col.element(idx + col.offset()); - col = col.child(lists_column_view::child_column_index); + detail::lists_column_device_view lcol(col); + idx = lcol.offset_at(idx); + col = lcol.child(); } } return idx; @@ -614,19 +616,6 @@ struct less_equivalent_comparator } }; -/** - * @brief Dremel data that describes one nested type column - * - * @see get_dremel_data() - */ -struct dremel_data { - rmm::device_uvector dremel_offsets; - rmm::device_uvector rep_level; - rmm::device_uvector def_level; - - size_type leaf_data_size; -}; - /** * @brief Preprocessed table for use with lexicographical comparison * @@ -665,7 +654,7 @@ struct preprocessed_table { rmm::device_uvector&& column_order, rmm::device_uvector&& null_precedence, rmm::device_uvector&& depths, - std::vector&& dremel_data, + std::vector&& dremel_data, rmm::device_uvector&& dremel_offsets, rmm::device_uvector&& rep_levels, rmm::device_uvector&& def_levels, @@ -758,7 +747,7 @@ struct preprocessed_table { rmm::device_uvector const _depths; // List related pre-computation - std::vector _dremel_data; + std::vector _dremel_data; rmm::device_uvector _dremel_offsets; rmm::device_uvector _rep_levels; rmm::device_uvector _def_levels; diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 689b68f819f..14f366134bb 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -14,14 +14,11 @@ * limitations under the License. */ -#include "cudf/detail/utilities/cuda.cuh" -#include "rmm/exec_policy.hpp" -#include "thrust/gather.h" -#include "thrust/iterator/discard_iterator.h" #include #include #include #include +#include #include #include #include @@ -30,6 +27,11 @@ #include +#include + +#include +#include + namespace cudf { namespace experimental { @@ -257,368 +259,9 @@ auto decompose_structs(table_view table, std::move(verticalized_col_depths)); } -struct def_level_fn { - column_device_view const* parent_col; - uint8_t const* d_nullability; - uint8_t sub_level_start; - uint8_t curr_def_level; - - __device__ uint32_t operator()(size_type i) - { - uint32_t def = curr_def_level; - uint8_t l = sub_level_start; - bool is_col_struct = false; - auto col = *parent_col; - do { - // If col not nullable then it does not contribute to def levels - if (d_nullability[l]) { - if (not col.nullable() or bit_is_set(col.null_mask(), i)) { - ++def; - } else { // We have found the shallowest level at which this row is null - break; - } - } - is_col_struct = (col.type().id() == type_id::STRUCT); - if (is_col_struct) { - col = col.child(0); - ++l; - } - } while (is_col_struct); - return def; - } -}; - -row::lexicographic::dremel_data get_dremel_data( - column_view h_col, - // TODO(cp): use device_span once it is converted to a single hd_vec - rmm::device_uvector const& d_nullability, - std::vector const& nullability, - rmm::cuda_stream_view stream) -{ - auto get_list_level = [](column_view col) { - while (col.type().id() == type_id::STRUCT) { - col = col.child(0); - } - return col; - }; - - auto get_empties = [&](column_view col, size_type start, size_type end) { - auto lcv = lists_column_view(get_list_level(col)); - rmm::device_uvector empties_idx(lcv.size(), stream); - rmm::device_uvector empties(lcv.size(), stream); - auto d_off = lcv.offsets().data(); - - auto empties_idx_end = - thrust::copy_if(rmm::exec_policy(stream), - thrust::make_counting_iterator(start), - thrust::make_counting_iterator(end), - empties_idx.begin(), - [d_off] __device__(auto i) { return d_off[i] == d_off[i + 1]; }); - auto empties_end = thrust::gather(rmm::exec_policy(stream), - empties_idx.begin(), - empties_idx_end, - lcv.offsets().begin(), - empties.begin()); - - auto empties_size = empties_end - empties.begin(); - return std::make_tuple(std::move(empties), std::move(empties_idx), empties_size); - }; - - auto curr_col = h_col; - std::vector nesting_levels; - std::vector def_at_level; - std::vector start_at_sub_level; - uint8_t curr_nesting_level_idx = 0; - - auto add_def_at_level = [&](column_view col) { - // Add up all def level contributions in this column all the way till the first list column - // appears in the hierarchy or until we get to leaf - uint32_t def = 0; - start_at_sub_level.push_back(curr_nesting_level_idx); - while (col.type().id() == type_id::STRUCT) { - def += (nullability[curr_nesting_level_idx]) ? 1 : 0; - col = col.child(0); - ++curr_nesting_level_idx; - } - // At the end of all those structs is either a list column or the leaf. Leaf column contributes - // at least one def level. It doesn't matter what the leaf contributes because it'll be at the - // end of the exclusive scan. - def += (nullability[curr_nesting_level_idx]) ? 2 : 1; - def_at_level.push_back(def); - ++curr_nesting_level_idx; - }; - while (cudf::is_nested(curr_col.type())) { - nesting_levels.push_back(curr_col); - add_def_at_level(curr_col); - while (curr_col.type().id() == type_id::STRUCT) { - // Go down the hierarchy until we get to the LIST or the leaf level - curr_col = curr_col.child(0); - } - if (curr_col.type().id() == type_id::LIST) { - curr_col = curr_col.child(lists_column_view::child_column_index); - if (not is_nested(curr_col.type())) { - // Special case: when the leaf data column is the immediate child of the list col then we - // want it to be included right away. Otherwise the struct containing it will be included in - // the next iteration of this loop. - nesting_levels.push_back(curr_col); - add_def_at_level(curr_col); - break; - } - } - } - - std::unique_ptr device_view_owners; - column_device_view* d_nesting_levels; - std::tie(device_view_owners, d_nesting_levels) = - contiguous_copy_column_device_views(nesting_levels, stream); - - thrust::exclusive_scan( - thrust::host, def_at_level.begin(), def_at_level.end(), def_at_level.begin()); - - // Sliced list column views only have offsets applied to top level. Get offsets for each level. - rmm::device_uvector d_column_offsets(nesting_levels.size(), stream); - rmm::device_uvector d_column_ends(nesting_levels.size(), stream); - - auto d_col = column_device_view::create(h_col, stream); - cudf::detail::device_single_thread( - [offset_at_level = d_column_offsets.data(), - end_idx_at_level = d_column_ends.data(), - col = *d_col] __device__() { - auto curr_col = col; - size_type off = curr_col.offset(); - size_type end = off + curr_col.size(); - size_type level = 0; - offset_at_level[level] = off; - end_idx_at_level[level] = end; - ++level; - // Apply offset recursively until we get to leaf data - // Skip doing the following for any structs we encounter in between. - while (curr_col.type().id() == type_id::LIST or curr_col.type().id() == type_id::STRUCT) { - if (curr_col.type().id() == type_id::LIST) { - off = curr_col.child(lists_column_view::offsets_column_index).element(off); - end = curr_col.child(lists_column_view::offsets_column_index).element(end); - offset_at_level[level] = off; - end_idx_at_level[level] = end; - ++level; - curr_col = curr_col.child(lists_column_view::child_column_index); - } else { - curr_col = curr_col.child(0); - } - } - }, - stream); - - thrust::host_vector column_offsets = - cudf::detail::make_host_vector_async(d_column_offsets, stream); - thrust::host_vector column_ends = - cudf::detail::make_host_vector_async(d_column_ends, stream); - stream.synchronize(); - - size_t max_vals_size = 0; - for (size_t l = 0; l < column_offsets.size(); ++l) { - max_vals_size += column_ends[l] - column_offsets[l]; - } - - rmm::device_uvector rep_level(max_vals_size, stream); - rmm::device_uvector def_level(max_vals_size, stream); - - rmm::device_uvector temp_rep_vals(max_vals_size, stream); - rmm::device_uvector temp_def_vals(max_vals_size, stream); - rmm::device_uvector new_offsets(0, stream); - size_type curr_rep_values_size = 0; - { - // At this point, curr_col contains the leaf column. Max nesting level is - // nesting_levels.size(). - - // We are going to start by merging the last column in nesting_levels (the leaf, which is at the - // index `nesting_levels.size() - 1`) with the second-to-last (which is at - // `nesting_levels.size() - 2`). - size_t level = nesting_levels.size() - 2; - curr_col = nesting_levels[level]; - auto lcv = lists_column_view(get_list_level(curr_col)); - auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; - - // Get empties at this level - rmm::device_uvector empties(0, stream); - rmm::device_uvector empties_idx(0, stream); - size_t empties_size; - std::tie(empties, empties_idx, empties_size) = - get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); - - // Merge empty at deepest parent level with the rep, def level vals at leaf level - - auto input_parent_rep_it = thrust::make_constant_iterator(level); - auto input_parent_def_it = - thrust::make_transform_iterator(empties_idx.begin(), - def_level_fn{d_nesting_levels + level, - d_nullability.data(), - start_at_sub_level[level], - def_at_level[level]}); - - // `nesting_levels.size()` == no of list levels + leaf. Max repetition level = no of list levels - auto input_child_rep_it = thrust::make_constant_iterator(nesting_levels.size() - 1); - auto input_child_def_it = - thrust::make_transform_iterator(thrust::make_counting_iterator(column_offsets[level + 1]), - def_level_fn{d_nesting_levels + level + 1, - d_nullability.data(), - start_at_sub_level[level + 1], - def_at_level[level + 1]}); - - // Zip the input and output value iterators so that merge operation is done only once - auto input_parent_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); - - auto input_child_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(input_child_rep_it, input_child_def_it)); - - auto output_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); - - auto ends = thrust::merge_by_key(rmm::exec_policy(stream), - empties.begin(), - empties.begin() + empties_size, - thrust::make_counting_iterator(column_offsets[level + 1]), - thrust::make_counting_iterator(column_ends[level + 1]), - input_parent_zip_it, - input_child_zip_it, - thrust::make_discard_iterator(), - output_zip_it); - - curr_rep_values_size = ends.second - output_zip_it; - - // Scan to get distance by which each offset value is shifted due to the insertion of empties - auto scan_it = cudf::detail::make_counting_transform_iterator( - column_offsets[level], - [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( - auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); - rmm::device_uvector scan_out(offset_size_at_level, stream); - thrust::exclusive_scan( - rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); - - // Add scan output to existing offsets to get new offsets into merged rep level values - new_offsets = rmm::device_uvector(offset_size_at_level, stream); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - offset_size_at_level, - [off = lcv.offsets().data() + column_offsets[level], - scan_out = scan_out.data(), - new_off = new_offsets.data()] __device__(auto i) { - new_off[i] = off[i] - off[0] + scan_out[i]; - }); - - // Set rep level values at level starts to appropriate rep level - auto scatter_it = thrust::make_constant_iterator(level); - thrust::scatter(rmm::exec_policy(stream), - scatter_it, - scatter_it + new_offsets.size() - 1, - new_offsets.begin(), - rep_level.begin()); - } - - // Having already merged the last two levels, we are now going to merge the result with the - // third-last level which is at index `nesting_levels.size() - 3`. - for (int level = nesting_levels.size() - 3; level >= 0; level--) { - curr_col = nesting_levels[level]; - auto lcv = lists_column_view(get_list_level(curr_col)); - auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; - - // Get empties at this level - rmm::device_uvector empties(0, stream); - rmm::device_uvector empties_idx(0, stream); - size_t empties_size; - std::tie(empties, empties_idx, empties_size) = - get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); - - auto offset_transformer = [new_child_offsets = new_offsets.data(), - child_start = column_offsets[level + 1]] __device__(auto x) { - return new_child_offsets[x - child_start]; // (x - child's offset) - }; - - // We will be reading from old rep_levels and writing again to rep_levels. Swap the current - // rep values into temp_rep_vals so it can become the input and rep_levels can again be output. - std::swap(temp_rep_vals, rep_level); - std::swap(temp_def_vals, def_level); - - // Merge empty at parent level with the rep, def level vals at current level - auto transformed_empties = thrust::make_transform_iterator(empties.begin(), offset_transformer); - - auto input_parent_rep_it = thrust::make_constant_iterator(level); - auto input_parent_def_it = - thrust::make_transform_iterator(empties_idx.begin(), - def_level_fn{d_nesting_levels + level, - d_nullability.data(), - start_at_sub_level[level], - def_at_level[level]}); - - // Zip the input and output value iterators so that merge operation is done only once - auto input_parent_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); - - auto input_child_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(temp_rep_vals.begin(), temp_def_vals.begin())); - - auto output_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); - - auto ends = thrust::merge_by_key(rmm::exec_policy(stream), - transformed_empties, - transformed_empties + empties_size, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(curr_rep_values_size), - input_parent_zip_it, - input_child_zip_it, - thrust::make_discard_iterator(), - output_zip_it); - - curr_rep_values_size = ends.second - output_zip_it; - - // Scan to get distance by which each offset value is shifted due to the insertion of dremel - // level value fof an empty list - auto scan_it = cudf::detail::make_counting_transform_iterator( - column_offsets[level], - [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( - auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); - rmm::device_uvector scan_out(offset_size_at_level, stream); - thrust::exclusive_scan( - rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); - - // Add scan output to existing offsets to get new offsets into merged rep level values - rmm::device_uvector temp_new_offsets(offset_size_at_level, stream); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - offset_size_at_level, - [off = lcv.offsets().data() + column_offsets[level], - scan_out = scan_out.data(), - new_off = temp_new_offsets.data(), - offset_transformer] __device__(auto i) { - new_off[i] = offset_transformer(off[i]) + scan_out[i]; - }); - new_offsets = std::move(temp_new_offsets); - - // Set rep level values at level starts to appropriate rep level - auto scatter_it = thrust::make_constant_iterator(level); - thrust::scatter(rmm::exec_policy(stream), - scatter_it, - scatter_it + new_offsets.size() - 1, - new_offsets.begin(), - rep_level.begin()); - } - - size_t level_vals_size = new_offsets.back_element(stream); - rep_level.resize(level_vals_size, stream); - def_level.resize(level_vals_size, stream); - - stream.synchronize(); - - size_type leaf_data_size = column_ends.back() - column_offsets.back(); - - return row::lexicographic::dremel_data{ - std::move(new_offsets), std::move(rep_level), std::move(def_level), leaf_data_size}; -} - auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) { - std::vector dremel_data; + std::vector dremel_data; std::vector max_def_levels; for (auto const& col : table) { if (col.type().id() == type_id::LIST) { @@ -634,7 +277,8 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) max_def_level += (cur_col.nullable() ? 1 : 0); nullability.push_back(static_cast(cur_col.nullable())); auto d_nullability = detail::make_device_uvector_async(nullability, stream); - dremel_data.push_back(get_dremel_data(col, d_nullability, nullability, stream)); + dremel_data.push_back( + io::parquet::gpu::get_dremel_data(col, d_nullability, nullability, stream)); max_def_levels.push_back(max_def_level); } else { max_def_levels.push_back(0); diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index c2c1467ba29..5f7b4688513 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -789,6 +789,25 @@ TYPED_TEST(Sort, WithNullableListColumn) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result); } +TYPED_TEST(Sort, WithEmptyListColumn) +{ + using T = TypeParam; + if (std::is_same_v) { GTEST_SKIP(); } + + auto L1 = cudf::make_lists_column(0, + cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)), + cudf::make_empty_column(cudf::data_type{cudf::type_id::INT64}), + 0, + {}); + auto L0 = cudf::make_lists_column( + 3, cudf::test::fixed_width_column_wrapper{0, 0, 0, 0}.release(), std::move(L1), 0, {}); + + auto expect = cudf::test::fixed_width_column_wrapper{0, 1, 2}; + auto result = cudf::sorted_order(cudf::table_view({*L0})); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result); +} +// TODO: Sliced list test + struct SortByKey : public BaseFixture { }; From 9362b8d3bba50fa0f222023757fe2c52a65f0ed0 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 16 Jun 2022 16:17:45 +0530 Subject: [PATCH 29/78] Add sliced list test --- cpp/tests/sort/sort_test.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index 5f7b4688513..ca64a3fdad4 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -789,6 +789,34 @@ TYPED_TEST(Sort, WithNullableListColumn) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result); } +TYPED_TEST(Sort, WithSlicedListColumn) +{ + using T = TypeParam; + if (std::is_same_v) { GTEST_SKIP(); } + + using lcw = cudf::test::lists_column_wrapper; + lcw col{ + {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, // + {{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, cudf::test::iterators::nulls_at({3})}, // 0 + {{1, 2, 3}, {}, {4, 5}, {0, 6, 0}}, // 1 + {{1, 2}, {3}, {4, 5}, {0, 6, 0}}, // 2 + {{1, 2}, {3}, {4, 5}, {{0, 6, 0}, cudf::test::iterators::nulls_at({0})}}, // 3 + {{7, 8}, {}}, // 4 + lcw{lcw{}, lcw{}, lcw{}}, // 5 + lcw{lcw{}}, // 6 + {lcw{10}}, // 7 + lcw{}, // 8 + {{1, 2}, {3}, {4, 5}, {{0, 6, 0}, cudf::test::iterators::nulls_at({0, 2})}}, // 9 + {{1, 2}, {3}, {4, 5}, {{0, 7}, cudf::test::iterators::nulls_at({0})}}, // + }; + + auto sliced_col = cudf::slice(col, {1, 10}); + + auto expect = cudf::test::fixed_width_column_wrapper{8, 6, 5, 3, 2, 0, 1, 4, 7}; + auto result = cudf::sorted_order(cudf::table_view({sliced_col})); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result); +} + TYPED_TEST(Sort, WithEmptyListColumn) { using T = TypeParam; From a7ec09ba9bbd958e031e68a12ee1c0fb84e94b5e Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Fri, 17 Jun 2022 00:19:55 +0530 Subject: [PATCH 30/78] Use progressive slicing to get leaf column --- .../cudf/table/experimental/row_operators.cuh | 38 +++++-------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 5f9fede1e4f..fa2f075e015 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -73,21 +73,6 @@ struct dispatch_void_if_nested { using type = std::conditional_t>; }; -inline size_type __device__ row_to_value_idx(size_type idx, column_device_view col) -{ - while (col.type().id() == type_id::LIST or col.type().id() == type_id::STRUCT) { - if (col.type().id() == type_id::STRUCT) { - idx += col.offset(); - col = col.child(0); - } else { - detail::lists_column_device_view lcol(col); - idx = lcol.offset_at(idx); - col = lcol.child(); - } - } - return idx; -} - namespace row { enum class lhs_index_type : size_type {}; @@ -410,13 +395,11 @@ class device_row_comparator { auto l_end = dremel_offsets[lhs_element_index + 1]; auto r_start = dremel_offsets[rhs_element_index]; auto r_end = dremel_offsets[rhs_element_index + 1]; - auto lc_start = row_to_value_idx(lhs_element_index, _lhs); - auto rc_start = row_to_value_idx(rhs_element_index, _rhs); - column_device_view lcol = _lhs; - column_device_view rcol = _rhs; + column_device_view lcol = _lhs.slice(lhs_element_index, 1); + column_device_view rcol = _rhs.slice(rhs_element_index, 1); while (lcol.type().id() == type_id::LIST) { - lcol = lcol.child(lists_column_view::child_column_index); - rcol = rcol.child(lists_column_view::child_column_index); + lcol = detail::lists_column_device_view(lcol).get_sliced_child(); + rcol = detail::lists_column_device_view(rcol).get_sliced_child(); } printf("max_def_level: %d\n", max_def_level); @@ -431,9 +414,8 @@ class device_row_comparator { r_start, r_end); weak_ordering state{weak_ordering::EQUIVALENT}; - for (int i = l_start, j = r_start, m = lc_start, n = rc_start; i < l_end and j < r_end; - ++i, ++j) { - printf("t: %d, i: %d, j: %d, m: %d, n: %d\n", threadIdx.x, i, j, m, n); + for (int i = l_start, j = r_start, k = 0; i < l_end and j < r_end; ++i, ++j) { + printf("t: %d, i: %d, j: %d, k: %d\n", threadIdx.x, i, j, k); printf("t: %d, def_l: %d, def_r: %d, rep_l: %d, rep_r: %d\n", threadIdx.x, def_level[i], @@ -454,16 +436,14 @@ class device_row_comparator { auto comparator = element_comparator{_check_nulls, lcol, rcol, _null_precedence}; int last_null_depth = _depth; cuda::std::tie(state, last_null_depth) = - cudf::type_dispatcher(lcol.type(), comparator, m, n); + cudf::type_dispatcher(lcol.type(), comparator, k, k); if (state != weak_ordering::EQUIVALENT) { printf("t: %d, leaf, state: %d\n", threadIdx.x, state); return cuda::std::pair(state, _depth); } - ++m; - ++n; + ++k; } else if (lcol.nullable() and def_level[i] == max_def_level - 1) { - ++m; - ++n; + ++k; } } state = (l_end - l_start < r_end - r_start) ? weak_ordering::LESS From d6ef82215cbdf4cac66443dcf7cb208d299abe57 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Fri, 17 Jun 2022 02:16:03 +0530 Subject: [PATCH 31/78] Clean up old experiment files --- cpp/CMakeLists.txt | 2 - cpp/include/cudf/detail/structs/utilities.hpp | 7 - cpp/include/cudf/sort2.hpp | 41 -- cpp/include/cudf/table/row_operator2.cuh | 477 -------------- cpp/include/cudf/table/row_operator3.cuh | 442 ------------- cpp/src/sort/sort2.cu | 79 --- cpp/src/table/row_operators.cu | 13 +- cpp/src/table/row_operators2.cu | 608 ------------------ cpp/tests/CMakeLists.txt | 2 - cpp/tests/sort/sort2_test.cpp | 217 ------- 10 files changed, 9 insertions(+), 1879 deletions(-) delete mode 100644 cpp/include/cudf/sort2.hpp delete mode 100644 cpp/include/cudf/table/row_operator2.cuh delete mode 100644 cpp/include/cudf/table/row_operator3.cuh delete mode 100644 cpp/src/sort/sort2.cu delete mode 100644 cpp/src/table/row_operators2.cu delete mode 100644 cpp/tests/sort/sort2_test.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 40083764cad..fc21c2def4d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -453,7 +453,6 @@ add_library( src/sort/segmented_sort.cu src/sort/sort_column.cu src/sort/sort.cu - src/sort/sort2.cu src/sort/stable_sort_column.cu src/sort/stable_sort.cu src/stream_compaction/apply_boolean_mask.cu @@ -520,7 +519,6 @@ add_library( src/structs/structs_column_view.cpp src/structs/utilities.cpp src/table/row_operators.cu - src/table/row_operators2.cu src/table/table.cpp src/table/table_device_view.cu src/table/table_view.cpp diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index f0f537823b2..45d4c3b5ae4 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -150,13 +150,6 @@ flattened_table flatten_nested_columns( std::vector const& null_precedence, column_nullability nullability = column_nullability::MATCH_INCOMING); -namespace experimental { -std::tuple> verticalize_nested_columns( - table_view input, - std::vector const& column_order, - std::vector const& null_precedence); -} - /** * @brief Unflatten columns flattened as by `flatten_nested_columns()`, * based on the provided `blueprint`. diff --git a/cpp/include/cudf/sort2.hpp b/cpp/include/cudf/sort2.hpp deleted file mode 100644 index 154b08767fd..00000000000 --- a/cpp/include/cudf/sort2.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace cudf { -namespace detail { -namespace experimental { - -/** - * @copydoc - * sorted_order(table_view&,std::vector,std::vector,rmm::mr::device_memory_resource*) - * - * @param stream CUDA stream used for device memory operations and kernel launches - */ -std::unique_ptr sorted_order2( - table_view input, - std::vector const& column_order = {}, - std::vector const& null_precedence = {}, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -} // namespace experimental -} // namespace detail -} // namespace cudf diff --git a/cpp/include/cudf/table/row_operator2.cuh b/cpp/include/cudf/table/row_operator2.cuh deleted file mode 100644 index 1288cef70a9..00000000000 --- a/cpp/include/cudf/table/row_operator2.cuh +++ /dev/null @@ -1,477 +0,0 @@ -/* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -namespace cudf { - -/** - * @brief Result type of the `element_relational_comparator2` function object. - * - * Indicates how two elements `a` and `b` compare with one and another. - * - * Equivalence is defined as `not (a -__device__ weak_ordering2 compare_elements2(Element lhs, Element rhs) -{ - if (lhs < rhs) { - return weak_ordering2::LESS; - } else if (rhs < lhs) { - return weak_ordering2::GREATER; - } - return weak_ordering2::EQUIVALENT; -} -} // namespace detail - -/* - * @brief A specialization for floating-point `Element` type relational comparison - * to derive the order of the elements with respect to `lhs`. Specialization is to - * handle `nan` in the order shown below. - * `[-Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN, null] (for null_order::AFTER)` - * `[null, -Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN] (for null_order::BEFORE)` - * - * @param[in] lhs first element - * @param[in] rhs second element - * @return weak_ordering2 Indicates the relationship between the elements in - * the `lhs` and `rhs` columns. - */ -template ::value>* = nullptr> -__device__ weak_ordering2 relational_compare2(Element lhs, Element rhs) -{ - if (isnan(lhs) and isnan(rhs)) { - return weak_ordering2::EQUIVALENT; - } else if (isnan(rhs)) { - return weak_ordering2::LESS; - } else if (isnan(lhs)) { - return weak_ordering2::GREATER; - } - - return detail::compare_elements2(lhs, rhs); -} - -/** - * @brief Compare the nulls according to null order. - * - * @param lhs_is_null boolean representing if lhs is null - * @param rhs_is_null boolean representing if lhs is null - * @param null_precedence null order - * @return Indicates the relationship between null in lhs and rhs columns. - */ -inline __device__ auto null_compare2(bool lhs_is_null, bool rhs_is_null, null_order null_precedence) -{ - if (lhs_is_null and rhs_is_null) { // null ::value>* = nullptr> -__device__ weak_ordering2 relational_compare2(Element lhs, Element rhs) -{ - return detail::compare_elements2(lhs, rhs); -} - -/** - * @brief A specialization for floating-point `Element` type to check if - * `lhs` is equivalent to `rhs`. `nan == nan`. - * - * @param[in] lhs first element - * @param[in] rhs second element - * @return bool `true` if `lhs` == `rhs` else `false`. - */ -template ::value>* = nullptr> -__device__ bool equality_compare2(Element lhs, Element rhs) -{ - if (isnan(lhs) and isnan(rhs)) { return true; } - return lhs == rhs; -} - -/** - * @brief A specialization for non-floating-point `Element` type to check if - * `lhs` is equivalent to `rhs`. - * - * @param[in] lhs first element - * @param[in] rhs second element - * @return bool `true` if `lhs` == `rhs` else `false`. - */ -template ::value>* = nullptr> -__device__ bool equality_compare2(Element const lhs, Element const rhs) -{ - return lhs == rhs; -} - -/** - * @brief Performs an equality comparison between two elements in two columns. - * - * @tparam has_nulls Indicates the potential for null values in either column. - */ -template -class element_equality_comparator2 { - public: - /** - * @brief Construct type-dispatched function object for comparing equality - * between two elements. - * - * @note `lhs` and `rhs` may be the same. - * - * @param lhs The column containing the first element - * @param rhs The column containing the second element (may be the same as lhs) - * @param nulls_are_equal Indicates if two null elements are treated as equivalent - */ - __host__ __device__ element_equality_comparator2(column_device_view lhs, - column_device_view rhs, - bool nulls_are_equal = true) - : lhs{lhs}, rhs{rhs}, nulls_are_equal{nulls_are_equal} - { - } - - /** - * @brief Compares the specified elements for equality. - * - * @param lhs_element_index The index of the first element - * @param rhs_element_index The index of the second element - * - */ - template ()>* = nullptr> - __device__ bool operator()(size_type lhs_element_index, - size_type rhs_element_index) const noexcept - { - if (has_nulls) { - bool const lhs_is_null{lhs.is_null(lhs_element_index)}; - bool const rhs_is_null{rhs.is_null(rhs_element_index)}; - if (lhs_is_null and rhs_is_null) { - return nulls_are_equal; - } else if (lhs_is_null != rhs_is_null) { - return false; - } - } - - return equality_compare2(lhs.element(lhs_element_index), - rhs.element(rhs_element_index)); - } - - template ()>* = nullptr> - __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index) - { - cudf_assert(false && "Attempted to compare elements of uncomparable types."); - return false; - } - - private: - column_device_view lhs; - column_device_view rhs; - bool nulls_are_equal; -}; - -template -class row_equality_comparator2 { - public: - row_equality_comparator2(table_device_view lhs, - table_device_view rhs, - bool nulls_are_equal = true) - : lhs{lhs}, rhs{rhs}, nulls_are_equal{nulls_are_equal} - { - CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Mismatched number of columns."); - } - - __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept - { - auto equal_elements = [=](column_device_view l, column_device_view r) { - return cudf::type_dispatcher(l.type(), - element_equality_comparator2{l, r, nulls_are_equal}, - lhs_row_index, - rhs_row_index); - }; - - return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements); - } - - private: - table_device_view lhs; - table_device_view rhs; - bool nulls_are_equal; -}; - -/** - * @brief Performs a relational comparison between two elements in two columns. - * - * @tparam has_nulls Indicates the potential for null values in either column. - */ -template -class element_relational_comparator2 { - public: - /** - * @brief Construct type-dispatched function object for performing a - * relational comparison between two elements. - * - * @note `lhs` and `rhs` may be the same. - * - * @param lhs The column containing the first element - * @param rhs The column containing the second element (may be the same as lhs) - * @param null_precedence Indicates how null values are ordered with other - * values - */ - __host__ __device__ element_relational_comparator2(column_device_view lhs, - column_device_view rhs, - null_order null_precedence) - : lhs{lhs}, rhs{rhs}, null_precedence{null_precedence} - { - } - - /** - * @brief Performs a relational comparison between the specified elements - * - * @param lhs_element_index The index of the first element - * @param rhs_element_index The index of the second element - * @param null_precedence Indicates how null values are ordered with other - * values - * @return weak_ordering2 Indicates the relationship between the elements in - * the `lhs` and `rhs` columns. - */ - template ()>* = nullptr> - __device__ weak_ordering2 operator()(size_type lhs_element_index, - size_type rhs_element_index) const noexcept - { - if (has_nulls) { - bool const lhs_is_null{lhs.is_null(lhs_element_index)}; - bool const rhs_is_null{rhs.is_null(rhs_element_index)}; - - if (lhs_is_null or rhs_is_null) { // atleast one is null - return null_compare2(lhs_is_null, rhs_is_null, null_precedence); - } - } - - return relational_compare2(lhs.element(lhs_element_index), - rhs.element(rhs_element_index)); - } - - template ()>* = nullptr> - __device__ weak_ordering2 operator()(size_type lhs_element_index, size_type rhs_element_index) - { - cudf_assert(false && "Attempted to compare elements of uncomparable types."); - return weak_ordering2::LESS; - } - - private: - column_device_view lhs; - column_device_view rhs; - null_order null_precedence; -}; - -template -struct device_stack { - __device__ device_stack(T* stack_storage, int capacity) - : stack(stack_storage), capacity(capacity), size(0) - { - } - __device__ void push(T const& val) - { - cudf_assert(size < capacity and "Stack overflow"); - stack[size++] = val; - } - __device__ T pop() - { - cudf_assert(size > 0 and "Stack underflow"); - return stack[--size]; - } - __device__ T top() - { - cudf_assert(size > 0 and "Stack underflow"); - return stack[size - 1]; - } - __device__ bool empty() { return size == 0; } - - private: - T* stack; - int capacity; - int size; -}; - -/** - * @brief Computes whether one row is lexicographically *less* than another row. - * - * Lexicographic ordering is determined by: - * - Two rows are compared element by element. - * - The first mismatching element defines which row is lexicographically less - * or greater than the other. - * - * Lexicographic ordering is exactly equivalent to doing an alphabetical sort of - * two words, for example, `aac` would be *less* than (or precede) `abb`. The - * second letter in both words is the first non-equal letter, and `a < b`, thus - * `aac < abb`. - * - * @tparam has_nulls Indicates the potential for null values in either row. - */ -template -class row_lexicographic_comparator2 { - public: - /** - * @brief Construct a function object for performing a lexicographic - * comparison between the rows of two tables. - * - * @throws cudf::logic_error if `lhs.num_columns() != rhs.num_columns()` - * @throws cudf::logic_error if column types of `lhs` and `rhs` are not comparable. - * - * @param lhs The first table - * @param rhs The second table (may be the same table as `lhs`) - * @param column_order Optional, device array the same length as a row that - * indicates the desired ascending/descending order of each column in a row. - * If `nullptr`, it is assumed all columns are sorted in ascending order. - * @param null_precedence Optional, device array the same length as a row - * and indicates how null values compare to all other for every column. If - * it is nullptr, then null precedence would be `null_order::BEFORE` for all - * columns. - */ - row_lexicographic_comparator2(table_device_view lhs, - table_device_view rhs, - order const* column_order = nullptr, - null_order const* null_precedence = nullptr) - : _lhs{lhs}, _rhs{rhs}, _column_order{column_order}, _null_precedence{null_precedence} - { - CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns."); - // CUDF_EXPECTS(detail::is_relationally_comparable(_lhs, _rhs), - // "Attempted to compare elements of uncomparable types."); - } - - /** - * @brief Checks whether the row at `lhs_index` in the `lhs` table compares - * lexicographically less than the row at `rhs_index` in the `rhs` table. - * - * @param lhs_index The index of row in the `lhs` table to examine - * @param rhs_index The index of the row in the `rhs` table to examine - * @return `true` if row from the `lhs` table compares less than row in the - * `rhs` table - */ - __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept - { - using stack_value_type = - thrust::tuple; - stack_value_type stack_storage[10]; - - for (size_type i = 0; i < _lhs.num_columns(); ++i) { - device_stack stack(stack_storage, 9); - bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); - - weak_ordering2 state{weak_ordering2::EQUIVALENT}; - null_order null_precedence = - _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; - - column_device_view const* lcol = _lhs.begin() + i; - column_device_view const* rcol = _rhs.begin() + i; - size_t curr_child = 0; - - while (true) { - bool const lhs_is_null{lcol->is_null(lhs_index)}; - bool const rhs_is_null{rcol->is_null(rhs_index)}; - - if (lhs_is_null or rhs_is_null) { // atleast one is null - state = null_compare2(lhs_is_null, rhs_is_null, null_precedence); - if (state != weak_ordering2::EQUIVALENT) break; - } else if (lcol->type().id() != type_id::STRUCT) { - auto comparator = - element_relational_comparator2{*lcol, *rcol, null_precedence}; - state = cudf::type_dispatcher(lcol->type(), comparator, lhs_index, rhs_index); - if (state != weak_ordering2::EQUIVALENT) break; - } - - // Reaching here means the nullability was same and we need to continue comparing - if (lcol->type().id() == type_id::STRUCT) { - stack.push({lcol, rcol, 0}); - } else { - // unwind stack until we reach a struct level with children still left to compare - bool completed_comparison = false; - do { - if (stack.empty()) { - completed_comparison = true; - break; - } - thrust::tie(lcol, rcol, curr_child) = stack.pop(); - } while (lcol->num_child_columns() <= curr_child + 1); - if (completed_comparison) { break; } - stack.push({lcol, rcol, curr_child + 1}); - // break; - } - - // The top of the stack now is where we have to continue comparing from - thrust::tie(lcol, rcol, curr_child) = stack.top(); - - lcol = &lcol->children()[curr_child]; - rcol = &rcol->children()[curr_child]; - } - - if (state == weak_ordering2::EQUIVALENT) { continue; } - - return state == (ascending ? weak_ordering2::LESS : weak_ordering2::GREATER); - } - return false; - } - - private: - table_device_view _lhs; - table_device_view _rhs; - null_order const* _null_precedence{}; - order const* _column_order{}; -}; // class row_lexicographic_comparator2 - -} // namespace cudf diff --git a/cpp/include/cudf/table/row_operator3.cuh b/cpp/include/cudf/table/row_operator3.cuh deleted file mode 100644 index af7cf24269d..00000000000 --- a/cpp/include/cudf/table/row_operator3.cuh +++ /dev/null @@ -1,442 +0,0 @@ -/* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -namespace cudf { -namespace experimental { - -template -struct non_nested_id_to_type { - using type = std::conditional_t>; -}; - -inline size_type __device__ row_to_value_idx(size_type idx, column_device_view col) -{ - while (col.type().id() == type_id::LIST or col.type().id() == type_id::STRUCT) { - if (col.type().id() == type_id::STRUCT) { - idx += col.offset(); - col = col.child(0); - } else { - auto offset_col = col.child(lists_column_view::offsets_column_index); - idx = offset_col.element(idx + col.offset()); - col = col.child(lists_column_view::child_column_index); - } - } - return idx; -} - -/** - * @brief Performs a relational comparison between two elements in two columns. - * - * @tparam Nullate A cudf::nullate type describing how to check for nulls. - */ -template -class element_relational_comparator { - public: - /** - * @brief Construct type-dispatched function object for performing a - * relational comparison between two elements. - * - * @note `lhs` and `rhs` may be the same. - * - * @param lhs The column containing the first element - * @param rhs The column containing the second element (may be the same as lhs) - * @param has_nulls Indicates if either input column contains nulls. - * @param null_precedence Indicates how null values are ordered with other values - */ - __host__ __device__ element_relational_comparator(Nullate has_nulls, - column_device_view lhs, - column_device_view rhs, - null_order null_precedence, - int depth = std::numeric_limits::max(), - size_type* dremel_offsets = nullptr, - uint8_t* rep_level = nullptr, - uint8_t* def_level = nullptr, - uint8_t max_def_level = 0) - : lhs{lhs}, - rhs{rhs}, - nulls{has_nulls}, - null_precedence{null_precedence}, - depth{depth}, - dremel_offsets{dremel_offsets}, - rep_level{rep_level}, - def_level{def_level}, - max_def_level{max_def_level} - { - } - - __host__ __device__ element_relational_comparator(Nullate has_nulls, - column_device_view lhs, - column_device_view rhs) - : lhs{lhs}, rhs{rhs}, nulls{has_nulls} - { - } - - /** - * @brief Performs a relational comparison between the specified elements - * - * @param lhs_element_index The index of the first element - * @param rhs_element_index The index of the second element - * @return Indicates the relationship between the elements in - * the `lhs` and `rhs` columns. - */ - template ()>* = nullptr> - __device__ thrust::pair operator()(size_type lhs_element_index, - size_type rhs_element_index) const noexcept - { - if (nulls) { - bool const lhs_is_null{lhs.is_null(lhs_element_index)}; - bool const rhs_is_null{rhs.is_null(rhs_element_index)}; - - if (lhs_is_null or rhs_is_null) { // at least one is null - return thrust::make_pair(null_compare(lhs_is_null, rhs_is_null, null_precedence), depth); - } - } - - return thrust::make_pair(relational_compare(lhs.element(lhs_element_index), - rhs.element(rhs_element_index)), - std::numeric_limits::max()); - } - - template () and - not is_nested())> - __device__ thrust::pair operator()(size_type lhs_element_index, - size_type rhs_element_index) - { - cudf_assert(false && "Attempted to compare elements of uncomparable types."); - return thrust::make_pair(weak_ordering::LESS, std::numeric_limits::max()); - } - - template () and - std::is_same_v)> - __device__ thrust::pair operator()(size_type lhs_element_index, - size_type rhs_element_index) - { - weak_ordering state{weak_ordering::EQUIVALENT}; - int last_null_depth; - - column_device_view lcol = lhs; - column_device_view rcol = rhs; - while (lcol.type().id() == type_id::STRUCT) { - bool const lhs_is_null{lcol.is_null(lhs_element_index)}; - bool const rhs_is_null{rcol.is_null(rhs_element_index)}; - - if (lhs_is_null or rhs_is_null) { // atleast one is null - state = null_compare(lhs_is_null, rhs_is_null, null_precedence); - last_null_depth = depth; - return thrust::make_pair(state, last_null_depth); - } - - lcol = lcol.children()[0]; - rcol = rcol.children()[0]; - ++depth; - } - - if (state == weak_ordering::EQUIVALENT) { - auto comparator = element_relational_comparator{nulls, lcol, rcol, null_precedence}; - thrust::tie(state, last_null_depth) = cudf::type_dispatcher( - lcol.type(), comparator, lhs_element_index, rhs_element_index); - } - - return thrust::make_pair(state, last_null_depth); - } - - template () and - std::is_same_v)> - __device__ thrust::pair operator()(size_type lhs_element_index, - size_type rhs_element_index) - { - auto l_start = dremel_offsets[lhs_element_index]; - auto l_end = dremel_offsets[lhs_element_index + 1]; - auto r_start = dremel_offsets[rhs_element_index]; - auto r_end = dremel_offsets[rhs_element_index + 1]; - auto lc_start = row_to_value_idx(lhs_element_index, lhs); - auto rc_start = row_to_value_idx(rhs_element_index, rhs); - column_device_view lcol = lhs; - column_device_view rcol = rhs; - while (lcol.type().id() == type_id::LIST) { - lcol = lcol.child(lists_column_view::child_column_index); - rcol = rcol.child(lists_column_view::child_column_index); - } - // printf("max_def_level: %d\n", max_def_level); - - // printf("t: %d, lhs_element_index: %d, rhs_element_index: %d\n", - // threadIdx.x, - // lhs_element_index, - // rhs_element_index); - // printf("t: %d, l_start: %d, l_end: %d, r_start: %d, r_end: %d\n", - // threadIdx.x, - // l_start, - // l_end, - // r_start, - // r_end); - weak_ordering state{weak_ordering::EQUIVALENT}; - for (int i = l_start, j = r_start, m = lc_start, n = rc_start; i < l_end and j < r_end; - ++i, ++j) { - // printf("t: %d, i: %d, j: %d, m: %d, n: %d\n", threadIdx.x, i, j, m, n); - // printf("t: %d, def_l: %d, def_r: %d, rep_l: %d, rep_r: %d\n", - // threadIdx.x, - // def_level[i], - // def_level[j], - // rep_level[i], - // rep_level[j]); - if (def_level[i] != def_level[j]) { - state = (def_level[i] < def_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; - // printf("t: %d, def, state: %d\n", threadIdx.x, state); - return thrust::make_pair(state, depth); - } - if (rep_level[i] != rep_level[j]) { - state = (rep_level[i] < rep_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; - // printf("t: %d, rep, state: %d\n", threadIdx.x, state); - return thrust::make_pair(state, depth); - } - if (def_level[i] == max_def_level) { - auto comparator = element_relational_comparator{nulls, lcol, rcol, null_precedence}; - thrust::tie(state, depth) = - cudf::type_dispatcher(lcol.type(), comparator, m, n); - if (state != weak_ordering::EQUIVALENT) { - // printf("t: %d, leaf, state: %d\n", threadIdx.x, state); - return thrust::make_pair(state, depth); - } - ++m; - ++n; - } - } - state = (l_end - l_start < r_end - r_start) ? weak_ordering::LESS - : (l_end - l_start > r_end - r_start) ? weak_ordering::GREATER - : weak_ordering::EQUIVALENT; - return thrust::make_pair(state, depth); - } - - private: - column_device_view lhs; - column_device_view rhs; - Nullate nulls; - null_order null_precedence{}; - int depth{std::numeric_limits::max()}; - size_type* dremel_offsets; - uint8_t* rep_level; - uint8_t* def_level; - uint8_t* max_def_levels; - uint8_t max_def_level{0}; -}; - -/** - * @brief Computes whether one row is lexicographically *less* than another row. - * - * Lexicographic ordering is determined by: - * - Two rows are compared element by element. - * - The first mismatching element defines which row is lexicographically less - * or greater than the other. - * - * Lexicographic ordering is exactly equivalent to doing an alphabetical sort of - * two words, for example, `aac` would be *less* than (or precede) `abb`. The - * second letter in both words is the first non-equal letter, and `a < b`, thus - * `aac < abb`. - * - * @tparam Nullate A cudf::nullate type describing how to check for nulls. - */ -template -class row_lexicographic_comparator { - public: - /** - * @brief Construct a function object for performing a lexicographic - * comparison between the rows of two tables. - * - * @throws cudf::logic_error if `lhs.num_columns() != rhs.num_columns()` - * @throws cudf::logic_error if column types of `lhs` and `rhs` are not comparable. - * - * @param lhs The first table - * @param rhs The second table (may be the same table as `lhs`) - * @param has_nulls Indicates if either input table contains columns with nulls. - * @param column_order Optional, device array the same length as a row that - * indicates the desired ascending/descending order of each column in a row. - * If `nullptr`, it is assumed all columns are sorted in ascending order. - * @param null_precedence Optional, device array the same length as a row - * and indicates how null values compare to all other for every column. If - * it is nullptr, then null precedence would be `null_order::BEFORE` for all - * columns. - */ - row_lexicographic_comparator(Nullate has_nulls, - table_device_view lhs, - table_device_view rhs, - int const* depth = nullptr, - order const* column_order = nullptr, - null_order const* null_precedence = nullptr, - size_type** dremel_offsets = nullptr, - uint8_t** rep_levels = nullptr, - uint8_t** def_levels = nullptr, - uint8_t* max_def_levels = nullptr) - : _lhs{lhs}, - _rhs{rhs}, - _nulls{has_nulls}, - _depth{depth}, - _column_order{column_order}, - _null_precedence{null_precedence}, - _dremel_offsets{dremel_offsets}, - _rep_levels{rep_levels}, - _def_levels{def_levels}, - _max_def_levels{max_def_levels} - { - CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns."); - // CUDF_EXPECTS(detail::is_relationally_comparable(_lhs, _rhs), - // "Attempted to compare elements of uncomparable types."); - } - - /** - * @brief Checks whether the row at `lhs_index` in the `lhs` table compares - * lexicographically less than the row at `rhs_index` in the `rhs` table. - * - * @param lhs_index The index of row in the `lhs` table to examine - * @param rhs_index The index of the row in the `rhs` table to examine - * @return `true` if row from the `lhs` table compares less than row in the - * `rhs` table - */ - __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept - { - int last_null_depth = std::numeric_limits::max(); - for (size_type i = 0; i < _lhs.num_columns(); ++i) { - int depth = _depth == nullptr ? std::numeric_limits::max() : _depth[i]; - if (depth > last_null_depth) { continue; } - - bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); - - null_order null_precedence = - _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; - - auto comparator = element_relational_comparator{_nulls, - _lhs.column(i), - _rhs.column(i), - null_precedence, - depth, - _dremel_offsets[i], - _rep_levels[i], - _def_levels[i], - _max_def_levels[i]}; - - weak_ordering state; - thrust::tie(state, last_null_depth) = - cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index); - - if (state == weak_ordering::EQUIVALENT) { continue; } - - return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER); - } - return false; - } - - private: - table_device_view _lhs; - table_device_view _rhs; - Nullate _nulls{}; - null_order const* _null_precedence{}; - order const* _column_order{}; - int const* _depth; - size_type** _dremel_offsets; - uint8_t** _rep_levels; - uint8_t** _def_levels; - uint8_t* _max_def_levels; -}; // class row_lexicographic_comparator - -/** - * @brief Dremel data that describes one nested type column - * - * @see get_dremel_data() - */ -struct dremel_data { - rmm::device_uvector dremel_offsets; - rmm::device_uvector rep_level; - rmm::device_uvector def_level; - - size_type leaf_data_size; -}; - -struct row_lex_operator { - row_lex_operator(table_view const& lhs, - table_view const& rhs, - host_span column_order, - host_span null_precedence, - rmm::cuda_stream_view stream); - - row_lex_operator(table_view const& t, - host_span column_order, - host_span null_precedence, - rmm::cuda_stream_view stream); - - template - row_lexicographic_comparator device_comparator() - { - auto lhs = **d_lhs; - auto rhs = (d_rhs ? **d_rhs : **d_lhs); - if constexpr (std::is_same_v) { - return row_lexicographic_comparator(Nullate{any_nulls}, - lhs, - rhs, - d_depths.data(), - d_column_order.data(), - d_null_precedence.data(), - d_dremel_offsets.data(), - d_rep_levels.data(), - d_def_levels.data(), - d_max_def_levels.data()); - } else { - return row_lexicographic_comparator( - Nullate{}, lhs, rhs, d_depths.data(), d_column_order.data(), d_null_precedence.data()); - } - } - - private: - using table_device_view_owner = - std::invoke_result_t; - - std::unique_ptr d_lhs; - std::unique_ptr d_rhs; - rmm::device_uvector d_column_order; - rmm::device_uvector d_null_precedence; - rmm::device_uvector d_depths; - - // List related pre-computation - std::vector dremel_data; - rmm::device_uvector d_dremel_offsets; - rmm::device_uvector d_rep_levels; - rmm::device_uvector d_def_levels; - rmm::device_uvector d_max_def_levels; - bool any_nulls; -}; - -} // namespace experimental -} // namespace cudf diff --git a/cpp/src/sort/sort2.cu b/cpp/src/sort/sort2.cu deleted file mode 100644 index 0b25d003cac..00000000000 --- a/cpp/src/sort/sort2.cu +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include - -namespace cudf { -namespace detail { -namespace experimental { - -/** - * @copydoc - * sorted_order(table_view&,std::vector,std::vector,rmm::mr::device_memory_resource*) - * - * @param stream CUDA stream used for device memory operations and kernel launches - */ -std::unique_ptr sorted_order2(table_view input, - std::vector const& column_order, - std::vector const& null_precedence, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - if (input.num_rows() == 0 or input.num_columns() == 0) { - return cudf::make_numeric_column(data_type(type_to_id()), 0); - } - - std::unique_ptr sorted_indices = cudf::make_numeric_column( - data_type(type_to_id()), input.num_rows(), mask_state::UNALLOCATED, stream, mr); - mutable_column_view mutable_indices_view = sorted_indices->mutable_view(); - thrust::sequence(rmm::exec_policy(stream), - mutable_indices_view.begin(), - mutable_indices_view.end(), - 0); - - auto comp = cudf::experimental::row_lex_operator(input, column_order, null_precedence, stream); - - thrust::sort(rmm::exec_policy(stream), - mutable_indices_view.begin(), - mutable_indices_view.end(), - comp.device_comparator()); - // protection for temporary owning comparison object - stream.synchronize(); - - return sorted_indices; -} - -} // namespace experimental -} // namespace detail -} // namespace cudf diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 14f366134bb..6017720b733 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -324,8 +324,16 @@ void check_lex_compatibility(table_view const& input) // Basically check if there's any LIST of STRUCT or STRUCT of LIST hiding anywhere in the table column_checker_fn_t check_column = [&](column_view const& c) { if (c.type().id() == type_id::LIST) { - CUDF_EXPECTS(c.child(lists_column_view::child_column_index).type().id() != type_id::STRUCT, + auto const& list_col = lists_column_view(c); + CUDF_EXPECTS(list_col.child().type().id() != type_id::STRUCT, "Cannot lexicographic compare a table with a LIST of STRUCT column"); + check_column(list_col.child()); + } else if (c.type().id() == type_id::STRUCT) { + for (auto child = c.child_begin(); child < c.child_end(); ++child) { + CUDF_EXPECTS(child->type().id() != type_id::LIST, + "Cannot lexicographic compare a table with a STRUCT of LIST column"); + check_column(*child); + } } // TODO: more copying of logic from row_operators2.cu if (not is_nested(c.type())) { @@ -333,9 +341,6 @@ void check_lex_compatibility(table_view const& input) "Cannot lexicographic compare a table with a column of type " + jit::get_type_name(c.type())); } - for (auto child = c.child_begin(); child < c.child_end(); ++child) { - check_column(*child); - } }; for (column_view const& c : input) { check_column(c); diff --git a/cpp/src/table/row_operators2.cu b/cpp/src/table/row_operators2.cu deleted file mode 100644 index 3b9dad03194..00000000000 --- a/cpp/src/table/row_operators2.cu +++ /dev/null @@ -1,608 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "cudf/detail/iterator.cuh" -#include "cudf/detail/utilities/cuda.cuh" -#include "thrust/gather.h" -#include "thrust/iterator/discard_iterator.h" -#include -#include -#include -#include -#include -#include - -#include - -namespace cudf { -namespace experimental { - -namespace { - -auto struct_lex_verticalize(table_view input, - host_span column_order = {}, - host_span null_precedence = {}) -{ - // auto [table, null_masks] = superimpose_parent_nulls(input); - - auto table = input; - std::vector verticalized_columns; - std::vector new_column_order; - std::vector new_null_precedence; - std::vector verticalized_col_depths; - for (size_type col_idx = 0; col_idx < table.num_columns(); ++col_idx) { - auto const& col = table.column(col_idx); - if (col.type().id() == type_id::STRUCT) { - // convert and insert - std::vector r_verticalized_columns; - std::vector r_verticalized_col_depths; - std::vector flattened; - std::vector depths; - // TODO: Here I added a bogus leaf column at the beginning to help in the while loop below. - // Refactor the while loop so that it can handle the last case. - flattened.push_back(make_empty_column(type_id::INT32)->view()); - std::function recursive_child = [&](column_view const& c, - int depth) { - flattened.push_back(c); - depths.push_back(depth); - for (int child_idx = 0; child_idx < c.num_children(); ++child_idx) { - recursive_child(c.child(child_idx), depth + 1); - } - }; - recursive_child(col, 0); - int curr_col_idx = flattened.size() - 1; - column_view curr_col = flattened[curr_col_idx]; - while (curr_col_idx > 0) { - auto const& prev_col = flattened[curr_col_idx - 1]; - if (not is_nested(prev_col.type())) { - // We hit a column that's a leaf so seal this hierarchy - r_verticalized_columns.push_back(curr_col); - r_verticalized_col_depths.push_back(depths[curr_col_idx - 1]); - curr_col = prev_col; - } else { - curr_col = column_view(prev_col.type(), - prev_col.size(), - nullptr, - prev_col.null_mask(), - UNKNOWN_NULL_COUNT, - prev_col.offset(), - {curr_col}); - } - --curr_col_idx; - } - verticalized_columns.insert( - verticalized_columns.end(), r_verticalized_columns.rbegin(), r_verticalized_columns.rend()); - verticalized_col_depths.insert(verticalized_col_depths.end(), - r_verticalized_col_depths.rbegin(), - r_verticalized_col_depths.rend()); - if (not column_order.empty()) { - new_column_order.insert( - new_column_order.end(), r_verticalized_columns.size(), column_order[col_idx]); - } - if (not null_precedence.empty()) { - new_null_precedence.insert( - new_null_precedence.end(), r_verticalized_columns.size(), null_precedence[col_idx]); - } - } else { - verticalized_columns.push_back(col); - } - } - return std::make_tuple(table_view(verticalized_columns), - std::move(new_column_order), - std::move(new_null_precedence), - std::move(verticalized_col_depths)); -} - -struct def_level_fn { - column_device_view const* parent_col; - uint8_t const* d_nullability; - uint8_t sub_level_start; - uint8_t curr_def_level; - - __device__ uint32_t operator()(size_type i) - { - uint32_t def = curr_def_level; - uint8_t l = sub_level_start; - bool is_col_struct = false; - auto col = *parent_col; - do { - // If col not nullable then it does not contribute to def levels - if (d_nullability[l]) { - if (not col.nullable() or bit_is_set(col.null_mask(), i)) { - ++def; - } else { // We have found the shallowest level at which this row is null - break; - } - } - is_col_struct = (col.type().id() == type_id::STRUCT); - if (is_col_struct) { - col = col.child(0); - ++l; - } - } while (is_col_struct); - return def; - } -}; - -dremel_data get_dremel_data(column_view h_col, - // TODO(cp): use device_span once it is converted to a single hd_vec - rmm::device_uvector const& d_nullability, - std::vector const& nullability, - rmm::cuda_stream_view stream) -{ - auto get_list_level = [](column_view col) { - while (col.type().id() == type_id::STRUCT) { - col = col.child(0); - } - return col; - }; - - auto get_empties = [&](column_view col, size_type start, size_type end) { - auto lcv = lists_column_view(get_list_level(col)); - rmm::device_uvector empties_idx(lcv.size(), stream); - rmm::device_uvector empties(lcv.size(), stream); - auto d_off = lcv.offsets().data(); - - auto empties_idx_end = - thrust::copy_if(rmm::exec_policy(stream), - thrust::make_counting_iterator(start), - thrust::make_counting_iterator(end), - empties_idx.begin(), - [d_off] __device__(auto i) { return d_off[i] == d_off[i + 1]; }); - auto empties_end = thrust::gather(rmm::exec_policy(stream), - empties_idx.begin(), - empties_idx_end, - lcv.offsets().begin(), - empties.begin()); - - auto empties_size = empties_end - empties.begin(); - return std::make_tuple(std::move(empties), std::move(empties_idx), empties_size); - }; - - auto curr_col = h_col; - std::vector nesting_levels; - std::vector def_at_level; - std::vector start_at_sub_level; - uint8_t curr_nesting_level_idx = 0; - - auto add_def_at_level = [&](column_view col) { - // Add up all def level contributions in this column all the way till the first list column - // appears in the hierarchy or until we get to leaf - uint32_t def = 0; - start_at_sub_level.push_back(curr_nesting_level_idx); - while (col.type().id() == type_id::STRUCT) { - def += (nullability[curr_nesting_level_idx]) ? 1 : 0; - col = col.child(0); - ++curr_nesting_level_idx; - } - // At the end of all those structs is either a list column or the leaf. Leaf column contributes - // at least one def level. It doesn't matter what the leaf contributes because it'll be at the - // end of the exclusive scan. - def += (nullability[curr_nesting_level_idx]) ? 2 : 1; - def_at_level.push_back(def); - ++curr_nesting_level_idx; - }; - while (cudf::is_nested(curr_col.type())) { - nesting_levels.push_back(curr_col); - add_def_at_level(curr_col); - while (curr_col.type().id() == type_id::STRUCT) { - // Go down the hierarchy until we get to the LIST or the leaf level - curr_col = curr_col.child(0); - } - if (curr_col.type().id() == type_id::LIST) { - curr_col = curr_col.child(lists_column_view::child_column_index); - if (not is_nested(curr_col.type())) { - // Special case: when the leaf data column is the immediate child of the list col then we - // want it to be included right away. Otherwise the struct containing it will be included in - // the next iteration of this loop. - nesting_levels.push_back(curr_col); - add_def_at_level(curr_col); - break; - } - } - } - - std::unique_ptr device_view_owners; - column_device_view* d_nesting_levels; - std::tie(device_view_owners, d_nesting_levels) = - contiguous_copy_column_device_views(nesting_levels, stream); - - thrust::exclusive_scan( - thrust::host, def_at_level.begin(), def_at_level.end(), def_at_level.begin()); - - // Sliced list column views only have offsets applied to top level. Get offsets for each level. - rmm::device_uvector d_column_offsets(nesting_levels.size(), stream); - rmm::device_uvector d_column_ends(nesting_levels.size(), stream); - - auto d_col = column_device_view::create(h_col, stream); - cudf::detail::device_single_thread( - [offset_at_level = d_column_offsets.data(), - end_idx_at_level = d_column_ends.data(), - col = *d_col] __device__() { - auto curr_col = col; - size_type off = curr_col.offset(); - size_type end = off + curr_col.size(); - size_type level = 0; - offset_at_level[level] = off; - end_idx_at_level[level] = end; - ++level; - // Apply offset recursively until we get to leaf data - // Skip doing the following for any structs we encounter in between. - while (curr_col.type().id() == type_id::LIST or curr_col.type().id() == type_id::STRUCT) { - if (curr_col.type().id() == type_id::LIST) { - off = curr_col.child(lists_column_view::offsets_column_index).element(off); - end = curr_col.child(lists_column_view::offsets_column_index).element(end); - offset_at_level[level] = off; - end_idx_at_level[level] = end; - ++level; - curr_col = curr_col.child(lists_column_view::child_column_index); - } else { - curr_col = curr_col.child(0); - } - } - }, - stream); - - thrust::host_vector column_offsets = - cudf::detail::make_host_vector_async(d_column_offsets, stream); - thrust::host_vector column_ends = - cudf::detail::make_host_vector_async(d_column_ends, stream); - stream.synchronize(); - - size_t max_vals_size = 0; - for (size_t l = 0; l < column_offsets.size(); ++l) { - max_vals_size += column_ends[l] - column_offsets[l]; - } - - rmm::device_uvector rep_level(max_vals_size, stream); - rmm::device_uvector def_level(max_vals_size, stream); - - rmm::device_uvector temp_rep_vals(max_vals_size, stream); - rmm::device_uvector temp_def_vals(max_vals_size, stream); - rmm::device_uvector new_offsets(0, stream); - size_type curr_rep_values_size = 0; - { - // At this point, curr_col contains the leaf column. Max nesting level is - // nesting_levels.size(). - - // We are going to start by merging the last column in nesting_levels (the leaf, which is at the - // index `nesting_levels.size() - 1`) with the second-to-last (which is at - // `nesting_levels.size() - 2`). - size_t level = nesting_levels.size() - 2; - curr_col = nesting_levels[level]; - auto lcv = lists_column_view(get_list_level(curr_col)); - auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; - - // Get empties at this level - rmm::device_uvector empties(0, stream); - rmm::device_uvector empties_idx(0, stream); - size_t empties_size; - std::tie(empties, empties_idx, empties_size) = - get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); - - // Merge empty at deepest parent level with the rep, def level vals at leaf level - - auto input_parent_rep_it = thrust::make_constant_iterator(level); - auto input_parent_def_it = - thrust::make_transform_iterator(empties_idx.begin(), - def_level_fn{d_nesting_levels + level, - d_nullability.data(), - start_at_sub_level[level], - def_at_level[level]}); - - // `nesting_levels.size()` == no of list levels + leaf. Max repetition level = no of list levels - auto input_child_rep_it = thrust::make_constant_iterator(nesting_levels.size() - 1); - auto input_child_def_it = - thrust::make_transform_iterator(thrust::make_counting_iterator(column_offsets[level + 1]), - def_level_fn{d_nesting_levels + level + 1, - d_nullability.data(), - start_at_sub_level[level + 1], - def_at_level[level + 1]}); - - // Zip the input and output value iterators so that merge operation is done only once - auto input_parent_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); - - auto input_child_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(input_child_rep_it, input_child_def_it)); - - auto output_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); - - auto ends = thrust::merge_by_key(rmm::exec_policy(stream), - empties.begin(), - empties.begin() + empties_size, - thrust::make_counting_iterator(column_offsets[level + 1]), - thrust::make_counting_iterator(column_ends[level + 1]), - input_parent_zip_it, - input_child_zip_it, - thrust::make_discard_iterator(), - output_zip_it); - - curr_rep_values_size = ends.second - output_zip_it; - - // Scan to get distance by which each offset value is shifted due to the insertion of empties - auto scan_it = cudf::detail::make_counting_transform_iterator( - column_offsets[level], - [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( - auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); - rmm::device_uvector scan_out(offset_size_at_level, stream); - thrust::exclusive_scan( - rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); - - // Add scan output to existing offsets to get new offsets into merged rep level values - new_offsets = rmm::device_uvector(offset_size_at_level, stream); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - offset_size_at_level, - [off = lcv.offsets().data() + column_offsets[level], - scan_out = scan_out.data(), - new_off = new_offsets.data()] __device__(auto i) { - new_off[i] = off[i] - off[0] + scan_out[i]; - }); - - // Set rep level values at level starts to appropriate rep level - auto scatter_it = thrust::make_constant_iterator(level); - thrust::scatter(rmm::exec_policy(stream), - scatter_it, - scatter_it + new_offsets.size() - 1, - new_offsets.begin(), - rep_level.begin()); - } - - // Having already merged the last two levels, we are now going to merge the result with the - // third-last level which is at index `nesting_levels.size() - 3`. - for (int level = nesting_levels.size() - 3; level >= 0; level--) { - curr_col = nesting_levels[level]; - auto lcv = lists_column_view(get_list_level(curr_col)); - auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; - - // Get empties at this level - rmm::device_uvector empties(0, stream); - rmm::device_uvector empties_idx(0, stream); - size_t empties_size; - std::tie(empties, empties_idx, empties_size) = - get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); - - auto offset_transformer = [new_child_offsets = new_offsets.data(), - child_start = column_offsets[level + 1]] __device__(auto x) { - return new_child_offsets[x - child_start]; // (x - child's offset) - }; - - // We will be reading from old rep_levels and writing again to rep_levels. Swap the current - // rep values into temp_rep_vals so it can become the input and rep_levels can again be output. - std::swap(temp_rep_vals, rep_level); - std::swap(temp_def_vals, def_level); - - // Merge empty at parent level with the rep, def level vals at current level - auto transformed_empties = thrust::make_transform_iterator(empties.begin(), offset_transformer); - - auto input_parent_rep_it = thrust::make_constant_iterator(level); - auto input_parent_def_it = - thrust::make_transform_iterator(empties_idx.begin(), - def_level_fn{d_nesting_levels + level, - d_nullability.data(), - start_at_sub_level[level], - def_at_level[level]}); - - // Zip the input and output value iterators so that merge operation is done only once - auto input_parent_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); - - auto input_child_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(temp_rep_vals.begin(), temp_def_vals.begin())); - - auto output_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); - - auto ends = thrust::merge_by_key(rmm::exec_policy(stream), - transformed_empties, - transformed_empties + empties_size, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(curr_rep_values_size), - input_parent_zip_it, - input_child_zip_it, - thrust::make_discard_iterator(), - output_zip_it); - - curr_rep_values_size = ends.second - output_zip_it; - - // Scan to get distance by which each offset value is shifted due to the insertion of dremel - // level value fof an empty list - auto scan_it = cudf::detail::make_counting_transform_iterator( - column_offsets[level], - [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( - auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); - rmm::device_uvector scan_out(offset_size_at_level, stream); - thrust::exclusive_scan( - rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); - - // Add scan output to existing offsets to get new offsets into merged rep level values - rmm::device_uvector temp_new_offsets(offset_size_at_level, stream); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - offset_size_at_level, - [off = lcv.offsets().data() + column_offsets[level], - scan_out = scan_out.data(), - new_off = temp_new_offsets.data(), - offset_transformer] __device__(auto i) { - new_off[i] = offset_transformer(off[i]) + scan_out[i]; - }); - new_offsets = std::move(temp_new_offsets); - - // Set rep level values at level starts to appropriate rep level - auto scatter_it = thrust::make_constant_iterator(level); - thrust::scatter(rmm::exec_policy(stream), - scatter_it, - scatter_it + new_offsets.size() - 1, - new_offsets.begin(), - rep_level.begin()); - } - - size_t level_vals_size = new_offsets.back_element(stream); - rep_level.resize(level_vals_size, stream); - def_level.resize(level_vals_size, stream); - - stream.synchronize(); - - size_type leaf_data_size = column_ends.back() - column_offsets.back(); - - return dremel_data{ - std::move(new_offsets), std::move(rep_level), std::move(def_level), leaf_data_size}; -} - -auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) -{ - std::vector dremel_data; - std::vector max_def_levels; - for (auto const& col : table) { - if (col.type().id() == type_id::LIST) { - // Check nullability of the list - std::vector nullability; - auto cur_col = col; - uint8_t max_def_level = 0; - while (cur_col.type().id() == type_id::LIST) { - max_def_level += (cur_col.nullable() ? 2 : 1); - nullability.push_back(static_cast(cur_col.nullable())); - cur_col = cur_col.child(lists_column_view::child_column_index); - } - max_def_level += (cur_col.nullable() ? 1 : 0); - nullability.push_back(static_cast(cur_col.nullable())); - auto d_nullability = detail::make_device_uvector_async(nullability, stream); - dremel_data.push_back(get_dremel_data(col, d_nullability, nullability, stream)); - max_def_levels.push_back(max_def_level); - } else { - max_def_levels.push_back(0); - } - } - - std::vector dremel_offsets; - std::vector rep_levels; - std::vector def_levels; - size_type c = 0; - for (auto const& col : table) { - if (col.type().id() == type_id::LIST) { - dremel_offsets.push_back(dremel_data[c].dremel_offsets.data()); - rep_levels.push_back(dremel_data[c].rep_level.data()); - def_levels.push_back(dremel_data[c].def_level.data()); - ++c; - } else { - dremel_offsets.push_back(nullptr); - rep_levels.push_back(nullptr); - def_levels.push_back(nullptr); - } - } - auto d_dremel_offsets = detail::make_device_uvector_async(dremel_offsets, stream); - auto d_rep_levels = detail::make_device_uvector_async(rep_levels, stream); - auto d_def_levels = detail::make_device_uvector_async(def_levels, stream); - auto d_max_def_levels = detail::make_device_uvector_async(max_def_levels, stream); - return std::make_tuple(std::move(dremel_data), - std::move(d_dremel_offsets), - std::move(d_rep_levels), - std::move(d_def_levels), - std::move(d_max_def_levels)); -} - -void check_lex_compatibility(table_view const& input) -{ - // Basically check if there's any LIST of STRUCT or STRUCT of LIST hiding anywhere in the table - std::function check_column = [&](column_view const& c) { - if (c.type().id() == type_id::LIST) { - CUDF_EXPECTS(c.child(lists_column_view::child_column_index).type().id() != type_id::STRUCT, - "List of structs are not supported"); - } - for (int i = 0; i < c.num_children(); ++i) { - if (c.type().id() == type_id::STRUCT) { - CUDF_EXPECTS(c.child(i).type().id() != type_id::LIST, "Struct of Lists are not supported"); - } - check_column(c.child(i)); - } - }; - for (column_view const& c : input) { - check_column(c); - } -} - -void check_shape_compatibility(table_view const& lhs, table_view const& rhs) -{ - std::function check_column = - [&](column_view const& l, column_view const& r) { - CUDF_EXPECTS(l.type().id() == r.type().id(), - "Cannot compare tables with different column types"); - CUDF_EXPECTS(l.num_children() == r.num_children(), "Mismatched number of children"); - for (size_type i = 0; i < l.num_children(); ++i) { - check_column(l.child(i), r.child(i)); - } - }; - - CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), - "Cannot compare tables with different number of columns"); - for (size_type i = 0; i < lhs.num_columns(); ++i) { - check_column(lhs.column(i), rhs.column(i)); - } -} - -} // namespace - -row_lex_operator::row_lex_operator(table_view const& t, - host_span column_order, - host_span null_precedence, - rmm::cuda_stream_view stream) - : d_column_order(0, stream), - d_null_precedence(0, stream), - d_depths(0, stream), - d_dremel_offsets(0, stream), - d_rep_levels(0, stream), - d_def_levels(0, stream), - d_max_def_levels(0, stream), - any_nulls(has_nested_nulls(t)) -{ - check_lex_compatibility(t); - auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = - struct_lex_verticalize(t, column_order, null_precedence); - - std::tie(dremel_data, d_dremel_offsets, d_rep_levels, d_def_levels, d_max_def_levels) = - list_lex_preprocess(verticalized_lhs, stream); - - d_lhs = - std::make_unique(table_device_view::create(verticalized_lhs, stream)); - - d_column_order = detail::make_device_uvector_async(new_column_order, stream); - d_null_precedence = detail::make_device_uvector_async(new_null_precedence, stream); - d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); -} - -row_lex_operator::row_lex_operator(table_view const& lhs, - table_view const& rhs, - host_span column_order, - host_span null_precedence, - rmm::cuda_stream_view stream) - : row_lex_operator(lhs, column_order, null_precedence, stream) -{ - check_lex_compatibility(rhs); - check_shape_compatibility(lhs, rhs); - - table_view verticalized_rhs; - std::tie(verticalized_rhs, std::ignore, std::ignore, std::ignore) = struct_lex_verticalize(rhs); - - d_rhs = - std::make_unique(table_device_view::create(verticalized_rhs, stream)); - - any_nulls |= has_nested_nulls(rhs); -} - -} // namespace experimental -} // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b9da1efa027..816c5a1c59c 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -235,8 +235,6 @@ ConfigureTest( sort/rank_test.cpp ) -ConfigureTest(SORT2_TEST sort/sort2_test.cpp) - # ################################################################################################## # * copying tests --------------------------------------------------------------------------------- ConfigureTest( diff --git a/cpp/tests/sort/sort2_test.cpp b/cpp/tests/sort/sort2_test.cpp deleted file mode 100644 index f67791e9291..00000000000 --- a/cpp/tests/sort/sort2_test.cpp +++ /dev/null @@ -1,217 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -struct NewRowOpTest : public cudf::test::BaseFixture { -}; - -TEST_F(NewRowOpTest, BasicStructTwoChild) -{ - using Type = int; - using column_wrapper = cudf::test::fixed_width_column_wrapper; - std::default_random_engine generator; - std::uniform_int_distribution distribution(0, 100); - - const cudf::size_type n_rows{1 << 4}; - const cudf::size_type n_cols{2}; - - // Create columns with values in the range [0,100) - std::vector columns; - columns.reserve(n_cols); - std::generate_n(std::back_inserter(columns), n_cols, [&]() { - auto elements = cudf::detail::make_counting_transform_iterator( - 0, [&](auto row) { return distribution(generator); }); - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 4 == 0 ? false : true; }); - return column_wrapper(elements, elements + n_rows, valids); - }); - - std::vector> cols; - std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) { - return col.release(); - }); - - auto make_struct = [&](std::vector> child_cols, int nullfreq) { - // std::vector struct_validity; - std::uniform_int_distribution bool_distribution(0, 10 * (nullfreq)); - // std::generate_n( - // std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); - // }); - auto null_iter = cudf::detail::make_counting_transform_iterator( - 0, [&](int i) { return bool_distribution(generator); }); - - cudf::test::structs_column_wrapper struct_col(std::move(child_cols)); - auto struct_ = struct_col.release(); - struct_->set_null_mask(cudf::test::detail::make_null_mask(null_iter, null_iter + n_rows)); - return struct_; - }; - - std::vector> s2_children; - s2_children.push_back(std::move(cols[0])); - s2_children.push_back(std::move(cols[1])); - auto s2 = make_struct(std::move(s2_children), 1); - - cudf::test::print(s2->view()); - - // // Create table view - // auto input = cudf::table_view({struct_col}); - auto input = cudf::table_view({s2->view()}); - - auto result1 = cudf::sorted_order(input); - cudf::test::print(result1->view()); - auto result2 = cudf::detail::experimental::sorted_order2(input); - cudf::test::print(result2->view()); - cudf::test::expect_columns_equal(result1->view(), result2->view()); -} - -TEST_F(NewRowOpTest, DeepStruct) -{ - using Type = int; - using column_wrapper = cudf::test::fixed_width_column_wrapper; - std::default_random_engine generator; - std::uniform_int_distribution distribution(0, 100); - - const cudf::size_type n_rows{1 << 6}; - const cudf::size_type n_cols{1}; - const cudf::size_type depth{8}; - - // Create columns with values in the range [0,100) - std::vector columns; - columns.reserve(n_cols); - std::generate_n(std::back_inserter(columns), n_cols, [&, n_rows]() { - auto elements = cudf::detail::make_counting_transform_iterator( - 0, [&](auto row) { return distribution(generator); }); - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 3 == 0 ? false : true; }); - return column_wrapper(elements, elements + n_rows, valids); - }); - - std::vector> cols; - std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) { - return col.release(); - }); - - std::vector> child_cols = std::move(cols); - // Lets add some layers - for (int i = 0; i < depth; i++) { - std::vector struct_validity; - std::uniform_int_distribution bool_distribution(0, 10 * (i + 1)); - std::generate_n( - std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); }); - cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity); - child_cols = std::vector>{}; - child_cols.push_back(struct_col.release()); - } - - cudf::test::print(child_cols[0]->view()); - - // // Create table view - // auto input = cudf::table_view({struct_col}); - auto input = cudf::table(std::move(child_cols)); - - // auto sliced_input = cudf::slice(input, {7, input.num_rows() - 12}); - - auto result1 = cudf::sorted_order(input); - cudf::test::print(result1->view()); - auto result2 = cudf::detail::experimental::sorted_order2(input); - cudf::test::print(result2->view()); - cudf::test::expect_columns_equal(result1->view(), result2->view()); -} - -TEST_F(NewRowOpTest, SampleStructTest) -{ - using Type = int; - using column_wrapper = cudf::test::fixed_width_column_wrapper; - std::default_random_engine generator; - std::uniform_int_distribution distribution(0, 10); - - const cudf::size_type n_rows{1 << 6}; - const cudf::size_type n_cols{6}; - - // Create columns with values in the range [0,100) - std::vector columns; - columns.reserve(n_cols); - std::generate_n(std::back_inserter(columns), n_cols, [&]() { - auto elements = cudf::detail::make_counting_transform_iterator( - 0, [&](auto row) { return distribution(generator); }); - int start = distribution(generator); - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [&](auto i) { return (i + start) % 7 == 0 ? false : true; }); - return column_wrapper(elements, elements + n_rows, valids); - }); - - std::vector> cols; - std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) { - return col.release(); - }); - - auto make_struct = [&](std::vector> child_cols, int nullfreq) { - std::vector struct_validity; - std::uniform_int_distribution bool_distribution(0, 10 * (nullfreq)); - auto null_iter = cudf::detail::make_counting_transform_iterator( - 0, [&](int i) { return bool_distribution(generator); }); - - cudf::test::structs_column_wrapper struct_col(std::move(child_cols)); - auto struct_ = struct_col.release(); - struct_->set_null_mask(cudf::test::detail::make_null_mask(null_iter, null_iter + n_rows)); - return struct_; - }; - - std::vector> s2_children; - s2_children.push_back(std::move(cols[0])); - s2_children.push_back(std::move(cols[1])); - auto s2 = make_struct(std::move(s2_children), 1); - - std::vector> s1_children; - s1_children.push_back(std::move(s2)); - s1_children.push_back(std::move(cols[2])); - auto s1 = make_struct(std::move(s1_children), 2); - - cudf::test::print(s1->view()); - - std::vector> s22_children; - s22_children.push_back(std::move(cols[3])); - s22_children.push_back(std::move(cols[4])); - auto s22 = make_struct(std::move(s22_children), 1); - - std::vector> s12_children; - s12_children.push_back(std::move(cols[5])); - s12_children.push_back(std::move(s22)); - auto s12 = make_struct(std::move(s12_children), 2); - - cudf::test::print(s12->view()); - - // // Create table view - // auto input = cudf::table_view({struct_col}); - auto input = cudf::table_view({s1->view(), s12->view()}); - - auto result1 = cudf::sorted_order(input); - cudf::test::print(result1->view()); - auto result2 = cudf::detail::experimental::sorted_order2(input); - cudf::test::print(result2->view()); - cudf::test::expect_columns_equal(result1->view(), result2->view()); - - std::vector col_order = {cudf::order::DESCENDING, cudf::order::ASCENDING}; - std::vector null_order = {cudf::null_order::BEFORE, cudf::null_order::AFTER}; - result1 = cudf::sorted_order(input, col_order, null_order); - result2 = cudf::detail::experimental::sorted_order2(input, col_order, null_order); - cudf::test::print(result1->view()); - cudf::test::print(result2->view()); - cudf::test::expect_columns_equal(result1->view(), result2->view()); -} - -CUDF_TEST_PROGRAM_MAIN() From 5f0d36ebd345643c79c689b5bd0ddf57f2a62415 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Fri, 17 Jun 2022 16:58:37 +0530 Subject: [PATCH 32/78] Turn dremel data raw pointers to spans --- .../cudf/table/experimental/row_operators.cuh | 109 +++++++++--------- cpp/src/table/row_operators.cu | 19 ++- cpp/tests/sort/sort_test.cpp | 1 - 3 files changed, 64 insertions(+), 65 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index fa2f075e015..afc27927715 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -252,14 +252,14 @@ class device_row_comparator { Nullate check_nulls, table_device_view lhs, table_device_view rhs, - std::optional> depth = std::nullopt, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt, - PhysicalElementComparator comparator = {}, - std::optional> dremel_offsets = std::nullopt, - std::optional> rep_levels = std::nullopt, - std::optional> def_levels = std::nullopt, - std::optional> max_def_levels = std::nullopt) noexcept + std::optional> depth = std::nullopt, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt, + PhysicalElementComparator comparator = {}, + std::optional const>> dremel_offsets = std::nullopt, + std::optional const>> rep_levels = std::nullopt, + std::optional const>> def_levels = std::nullopt, + std::optional> max_def_levels = std::nullopt) noexcept : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, @@ -296,22 +296,22 @@ class device_row_comparator { __device__ element_comparator(Nullate check_nulls, column_device_view lhs, column_device_view rhs, - null_order null_precedence = null_order::BEFORE, - int depth = 0, - PhysicalElementComparator comparator = {}, - size_type* dremel_offsets = nullptr, - uint8_t* rep_level = nullptr, - uint8_t* def_level = nullptr, - uint8_t max_def_level = 0) + null_order null_precedence = null_order::BEFORE, + int depth = 0, + PhysicalElementComparator comparator = {}, + device_span dremel_offsets = {}, + device_span rep_level = {}, + device_span def_level = {}, + uint8_t max_def_level = 0) : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, _null_precedence{null_precedence}, _depth{depth}, - dremel_offsets{dremel_offsets}, - rep_level{rep_level}, - def_level{def_level}, - max_def_level{max_def_level}, + _dremel_offsets{dremel_offsets}, + _rep_level{rep_level}, + _def_level{def_level}, + _max_def_level{max_def_level}, _comparator{comparator} { } @@ -391,17 +391,17 @@ class device_row_comparator { __device__ cuda::std::pair operator()(size_type lhs_element_index, size_type rhs_element_index) { - auto l_start = dremel_offsets[lhs_element_index]; - auto l_end = dremel_offsets[lhs_element_index + 1]; - auto r_start = dremel_offsets[rhs_element_index]; - auto r_end = dremel_offsets[rhs_element_index + 1]; + auto l_start = _dremel_offsets[lhs_element_index]; + auto l_end = _dremel_offsets[lhs_element_index + 1]; + auto r_start = _dremel_offsets[rhs_element_index]; + auto r_end = _dremel_offsets[rhs_element_index + 1]; column_device_view lcol = _lhs.slice(lhs_element_index, 1); column_device_view rcol = _rhs.slice(rhs_element_index, 1); while (lcol.type().id() == type_id::LIST) { lcol = detail::lists_column_device_view(lcol).get_sliced_child(); rcol = detail::lists_column_device_view(rcol).get_sliced_child(); } - printf("max_def_level: %d\n", max_def_level); + printf("max_def_level: %d\n", _max_def_level); printf("t: %d, lhs_element_index: %d, rhs_element_index: %d\n", threadIdx.x, @@ -418,21 +418,21 @@ class device_row_comparator { printf("t: %d, i: %d, j: %d, k: %d\n", threadIdx.x, i, j, k); printf("t: %d, def_l: %d, def_r: %d, rep_l: %d, rep_r: %d\n", threadIdx.x, - def_level[i], - def_level[j], - rep_level[i], - rep_level[j]); - if (def_level[i] != def_level[j]) { - state = (def_level[i] < def_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; + _def_level[i], + _def_level[j], + _rep_level[i], + _rep_level[j]); + if (_def_level[i] != _def_level[j]) { + state = (_def_level[i] < _def_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; printf("t: %d, def, state: %d\n", threadIdx.x, state); return cuda::std::pair(state, _depth); } - if (rep_level[i] != rep_level[j]) { - state = (rep_level[i] < rep_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; + if (_rep_level[i] != _rep_level[j]) { + state = (_rep_level[i] < _rep_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; printf("t: %d, rep, state: %d\n", threadIdx.x, state); return cuda::std::pair(state, _depth); } - if (def_level[i] == max_def_level) { + if (_def_level[i] == _max_def_level) { auto comparator = element_comparator{_check_nulls, lcol, rcol, _null_precedence}; int last_null_depth = _depth; cuda::std::tie(state, last_null_depth) = @@ -442,7 +442,7 @@ class device_row_comparator { return cuda::std::pair(state, _depth); } ++k; - } else if (lcol.nullable() and def_level[i] == max_def_level - 1) { + } else if (lcol.nullable() and _def_level[i] == _max_def_level - 1) { ++k; } } @@ -458,10 +458,10 @@ class device_row_comparator { Nullate const _check_nulls; null_order const _null_precedence; int const _depth; - size_type* dremel_offsets; - uint8_t* rep_level; - uint8_t* def_level; - uint8_t max_def_level{0}; + device_span _dremel_offsets; + device_span _rep_level; + device_span _def_level; + uint8_t _max_def_level{0}; PhysicalElementComparator const _comparator; }; @@ -523,9 +523,9 @@ class device_row_comparator { PhysicalElementComparator const _comparator; // List related members - std::optional> _dremel_offsets; - std::optional> _rep_levels; - std::optional> _def_levels; + std::optional const>> _dremel_offsets; + std::optional const>> _rep_levels; + std::optional const>> _def_levels; std::optional> _max_def_levels; }; // class device_row_comparator @@ -635,9 +635,9 @@ struct preprocessed_table { rmm::device_uvector&& null_precedence, rmm::device_uvector&& depths, std::vector&& dremel_data, - rmm::device_uvector&& dremel_offsets, - rmm::device_uvector&& rep_levels, - rmm::device_uvector&& def_levels, + rmm::device_uvector>&& dremel_offsets, + rmm::device_uvector>&& rep_levels, + rmm::device_uvector>&& def_levels, rmm::device_uvector&& max_def_levels) : _t(std::move(table)), _column_order(std::move(column_order)), @@ -696,21 +696,22 @@ struct preprocessed_table { } // TODO: span of spans? - [[nodiscard]] std::optional> dremel_offsets() const + [[nodiscard]] std::optional const>> dremel_offsets() const { - return _dremel_offsets.size() ? std::optional>(_dremel_offsets) - : std::nullopt; + return _dremel_offsets.size() + ? std::optional const>>(_dremel_offsets) + : std::nullopt; } - [[nodiscard]] std::optional> rep_levels() const + [[nodiscard]] std::optional const>> rep_levels() const { - return _rep_levels.size() ? std::optional>(_rep_levels) + return _rep_levels.size() ? std::optional const>>(_rep_levels) : std::nullopt; } - [[nodiscard]] std::optional> def_levels() const + [[nodiscard]] std::optional const>> def_levels() const { - return _def_levels.size() ? std::optional>(_def_levels) + return _def_levels.size() ? std::optional const>>(_def_levels) : std::nullopt; } @@ -728,9 +729,9 @@ struct preprocessed_table { // List related pre-computation std::vector _dremel_data; - rmm::device_uvector _dremel_offsets; - rmm::device_uvector _rep_levels; - rmm::device_uvector _def_levels; + rmm::device_uvector> _dremel_offsets; + rmm::device_uvector> _rep_levels; + rmm::device_uvector> _def_levels; rmm::device_uvector _max_def_levels; }; diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 6017720b733..f5eec534a66 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -285,20 +285,20 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) } } - std::vector dremel_offsets; - std::vector rep_levels; - std::vector def_levels; + std::vector> dremel_offsets; + std::vector> rep_levels; + std::vector> def_levels; size_type c = 0; for (auto const& col : table) { if (col.type().id() == type_id::LIST) { - dremel_offsets.push_back(dremel_data[c].dremel_offsets.data()); - rep_levels.push_back(dremel_data[c].rep_level.data()); - def_levels.push_back(dremel_data[c].def_level.data()); + dremel_offsets.emplace_back(dremel_data[c].dremel_offsets); + rep_levels.emplace_back(dremel_data[c].rep_level); + def_levels.emplace_back(dremel_data[c].def_level); ++c; } else { - dremel_offsets.push_back(nullptr); - rep_levels.push_back(nullptr); - def_levels.push_back(nullptr); + dremel_offsets.emplace_back(); + rep_levels.emplace_back(); + def_levels.emplace_back(); } } auto d_dremel_offsets = detail::make_device_uvector_async(dremel_offsets, stream); @@ -335,7 +335,6 @@ void check_lex_compatibility(table_view const& input) check_column(*child); } } - // TODO: more copying of logic from row_operators2.cu if (not is_nested(c.type())) { CUDF_EXPECTS(is_relationally_comparable(c.type()), "Cannot lexicographic compare a table with a column of type " + diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index ca64a3fdad4..9a31db82114 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -834,7 +834,6 @@ TYPED_TEST(Sort, WithEmptyListColumn) auto result = cudf::sorted_order(cudf::table_view({*L0})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result); } -// TODO: Sliced list test struct SortByKey : public BaseFixture { }; From 941b80830b236e8e2deabee425d0b3d4bba4c877 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Tue, 21 Jun 2022 01:07:04 +0530 Subject: [PATCH 33/78] Replace bench with nvbench and fix destroyed dremel data issue --- cpp/benchmarks/CMakeLists.txt | 4 +- .../compare/comparator_benchmark.cu | 128 ------------------ cpp/benchmarks/sort/sort_lists.cpp | 49 +++++++ .../cudf/table/experimental/row_operators.cuh | 1 + 4 files changed, 51 insertions(+), 131 deletions(-) delete mode 100644 cpp/benchmarks/compare/comparator_benchmark.cu create mode 100644 cpp/benchmarks/sort/sort_lists.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 01ad5756d4e..89edfd4466f 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -161,12 +161,10 @@ ConfigureBench(ITERATOR_BENCH iterator/iterator.cu) # * search benchmark ------------------------------------------------------------------------------ ConfigureBench(SEARCH_BENCH search/search.cpp) -ConfigureBench(COMPARE_BENCH compare/comparator_benchmark.cu) - # ################################################################################################## # * sort benchmark -------------------------------------------------------------------------------- ConfigureBench(SORT_BENCH sort/rank.cpp sort/sort.cpp sort/sort_strings.cpp) -ConfigureNVBench(SORT_NVBENCH sort/sort_structs.cpp) +ConfigureNVBench(SORT_NVBENCH sort/sort_lists.cpp sort/sort_structs.cpp) # ################################################################################################## # * quantiles benchmark diff --git a/cpp/benchmarks/compare/comparator_benchmark.cu b/cpp/benchmarks/compare/comparator_benchmark.cu deleted file mode 100644 index c7aa5752929..00000000000 --- a/cpp/benchmarks/compare/comparator_benchmark.cu +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -template -class Sort : public cudf::benchmark { -}; - -template -static void BM_struct_sort(benchmark::State& state, bool nulls) -{ - using Type = int; - using column_wrapper = cudf::test::fixed_width_column_wrapper; - std::default_random_engine generator; - std::uniform_int_distribution distribution(0, 100); - - const cudf::size_type n_rows{(cudf::size_type)state.range(0)}; - const cudf::size_type depth{(cudf::size_type)state.range(1)}; - const cudf::size_type n_cols{1}; - - // Create columns with values in the range [0,100) - std::vector columns; - columns.reserve(n_cols); - std::generate_n(std::back_inserter(columns), n_cols, [&, n_rows]() { - auto elements = cudf::detail::make_counting_transform_iterator( - 0, [&](auto row) { return distribution(generator); }); - if (!nulls) return column_wrapper(elements, elements + n_rows); - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 10 == 0 ? false : true; }); - return column_wrapper(elements, elements + n_rows, valids); - }); - - std::vector> cols; - std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) { - return col.release(); - }); - - std::vector> child_cols = std::move(cols); - // Lets add some layers - for (int i = 0; i < depth; i++) { - std::vector struct_validity; - std::uniform_int_distribution bool_distribution(0, 100 * (i + 1)); - std::generate_n( - std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); }); - cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity); - child_cols = std::vector>{}; - child_cols.push_back(struct_col.release()); - } - - // // Create table view - auto input = cudf::table(std::move(child_cols)); - // auto input = cudf::table_view({cols[0]->view()}); - - for (auto _ : state) { - cuda_event_timer raii(state, true, rmm::cuda_stream_default); - - // auto result = cudf::sorted_order(input); - auto result = cudf::detail::experimental::sorted_order2(input); - } -} - -#define SORT_BENCHMARK_DEFINE(name, stable, nulls) \ - BENCHMARK_TEMPLATE_DEFINE_F(Sort, name, stable) \ - (::benchmark::State & st) { BM_struct_sort(st, nulls); } \ - BENCHMARK_REGISTER_F(Sort, name) \ - ->RangeMultiplier(8) \ - ->Ranges({{1 << 10, 1 << 26}, {1, 8}}) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -// SORT_BENCHMARK_DEFINE(unstable, false, true) - -template -static void BM_list_sort(benchmark::State& state, bool nulls) -{ - const size_t size{(size_t)state.range(0)}; - const cudf::size_type depth{(cudf::size_type)state.range(1)}; - - data_profile table_profile; - table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5); - table_profile.set_list_depth(depth); - table_profile.set_null_frequency(0); - auto const table = - create_random_table({cudf::type_id::LIST}, 1, table_size_bytes{size}, table_profile); - - for (auto _ : state) { - cuda_event_timer raii(state, true, rmm::cuda_stream_default); - - auto result = cudf::detail::experimental::sorted_order2(*table); - } -} - -#define LIST_SORT_BENCHMARK_DEFINE(name, stable, nulls) \ - BENCHMARK_TEMPLATE_DEFINE_F(Sort, name, stable) \ - (::benchmark::State & st) { BM_list_sort(st, nulls); } \ - BENCHMARK_REGISTER_F(Sort, name) \ - ->RangeMultiplier(256) \ - ->Ranges({{1 << 10, 1 << 27}, {1, 4}}) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -LIST_SORT_BENCHMARK_DEFINE(list, false, true) diff --git a/cpp/benchmarks/sort/sort_lists.cpp b/cpp/benchmarks/sort/sort_lists.cpp new file mode 100644 index 00000000000..3ebeae207c2 --- /dev/null +++ b/cpp/benchmarks/sort/sort_lists.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include + +void nvbench_sort_lists(nvbench::state& state) +{ + cudf::rmm_pool_raii pool_raii; + + const size_t size_bytes(state.get_int64("size_bytes")); + const cudf::size_type depth{static_cast(state.get_int64("depth"))}; + const double null_frequency{state.get_float64("null_frequency")}; + + data_profile table_profile; + table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5); + table_profile.set_list_depth(depth); + table_profile.set_null_frequency(null_frequency); + auto const table = + create_random_table({cudf::type_id::LIST}, table_size_bytes{size_bytes}, table_profile); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream_view{launch.get_stream()}; + cudf::detail::sorted_order(*table, {}, {}, stream_view, rmm::mr::get_current_device_resource()); + }); +} + +NVBENCH_BENCH(nvbench_sort_lists) + .set_name("sort_list") + .add_int64_power_of_two_axis("size_bytes", {17}) + .add_int64_axis("depth", {1}) + .add_float64_axis("null_frequency", {0}); diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index afc27927715..2ac68aba782 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -643,6 +643,7 @@ struct preprocessed_table { _column_order(std::move(column_order)), _null_precedence(std::move(null_precedence)), _depths(std::move(depths)), + _dremel_data(std::move(dremel_data)), _dremel_offsets(std::move(dremel_offsets)), _rep_levels(std::move(rep_levels)), _def_levels(std::move(def_levels)), From 2bd2b6bd525b0d4f121e18ac76c878ab970435c7 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 22 Jun 2022 15:16:39 +0530 Subject: [PATCH 34/78] More benchmark iterations --- cpp/benchmarks/sort/sort_lists.cpp | 6 +++--- .../cudf/table/experimental/row_operators.cuh | 19 +++++++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/cpp/benchmarks/sort/sort_lists.cpp b/cpp/benchmarks/sort/sort_lists.cpp index 3ebeae207c2..183cb89d22d 100644 --- a/cpp/benchmarks/sort/sort_lists.cpp +++ b/cpp/benchmarks/sort/sort_lists.cpp @@ -44,6 +44,6 @@ void nvbench_sort_lists(nvbench::state& state) NVBENCH_BENCH(nvbench_sort_lists) .set_name("sort_list") - .add_int64_power_of_two_axis("size_bytes", {17}) - .add_int64_axis("depth", {1}) - .add_float64_axis("null_frequency", {0}); + .add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28}) + .add_int64_axis("depth", {1, 4}) + .add_float64_axis("null_frequency", {0, 0.2}); diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 2ac68aba782..527ada0d3b3 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -205,6 +205,9 @@ struct sorting_physical_element_comparator { } }; +// #define PRINTF(...) printf(__VA_ARGS__) +#define PRINTF(...) + /** * @brief Computes the lexicographic comparison between 2 rows. * @@ -401,13 +404,13 @@ class device_row_comparator { lcol = detail::lists_column_device_view(lcol).get_sliced_child(); rcol = detail::lists_column_device_view(rcol).get_sliced_child(); } - printf("max_def_level: %d\n", _max_def_level); + PRINTF("max_def_level: %d\n", _max_def_level); - printf("t: %d, lhs_element_index: %d, rhs_element_index: %d\n", + PRINTF("t: %d, lhs_element_index: %d, rhs_element_index: %d\n", threadIdx.x, lhs_element_index, rhs_element_index); - printf("t: %d, l_start: %d, l_end: %d, r_start: %d, r_end: %d\n", + PRINTF("t: %d, l_start: %d, l_end: %d, r_start: %d, r_end: %d\n", threadIdx.x, l_start, l_end, @@ -415,8 +418,8 @@ class device_row_comparator { r_end); weak_ordering state{weak_ordering::EQUIVALENT}; for (int i = l_start, j = r_start, k = 0; i < l_end and j < r_end; ++i, ++j) { - printf("t: %d, i: %d, j: %d, k: %d\n", threadIdx.x, i, j, k); - printf("t: %d, def_l: %d, def_r: %d, rep_l: %d, rep_r: %d\n", + PRINTF("t: %d, i: %d, j: %d, k: %d\n", threadIdx.x, i, j, k); + PRINTF("t: %d, def_l: %d, def_r: %d, rep_l: %d, rep_r: %d\n", threadIdx.x, _def_level[i], _def_level[j], @@ -424,12 +427,12 @@ class device_row_comparator { _rep_level[j]); if (_def_level[i] != _def_level[j]) { state = (_def_level[i] < _def_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; - printf("t: %d, def, state: %d\n", threadIdx.x, state); + PRINTF("t: %d, def, state: %d\n", threadIdx.x, state); return cuda::std::pair(state, _depth); } if (_rep_level[i] != _rep_level[j]) { state = (_rep_level[i] < _rep_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; - printf("t: %d, rep, state: %d\n", threadIdx.x, state); + PRINTF("t: %d, rep, state: %d\n", threadIdx.x, state); return cuda::std::pair(state, _depth); } if (_def_level[i] == _max_def_level) { @@ -438,7 +441,7 @@ class device_row_comparator { cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher(lcol.type(), comparator, k, k); if (state != weak_ordering::EQUIVALENT) { - printf("t: %d, leaf, state: %d\n", threadIdx.x, state); + PRINTF("t: %d, leaf, state: %d\n", threadIdx.x, state); return cuda::std::pair(state, _depth); } ++k; From 4be403c92d1a14791e53f7d349479d903df5ec82 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 29 Jun 2022 01:29:18 +0530 Subject: [PATCH 35/78] merge pointers to dremel data into a view class --- .../cudf/table/experimental/row_operators.cuh | 161 ++++++++---------- cpp/src/table/row_operators.cu | 69 ++++---- 2 files changed, 106 insertions(+), 124 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 527ada0d3b3..c23f93620b9 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -205,6 +205,14 @@ struct sorting_physical_element_comparator { } }; +struct dremel_device_view { + size_type* offsets; + uint8_t* rep_levels; + uint8_t* def_levels; + size_type leaf_data_size; + uint8_t max_def_level; +}; + // #define PRINTF(...) printf(__VA_ARGS__) #define PRINTF(...) @@ -255,24 +263,18 @@ class device_row_comparator { Nullate check_nulls, table_device_view lhs, table_device_view rhs, - std::optional> depth = std::nullopt, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt, - PhysicalElementComparator comparator = {}, - std::optional const>> dremel_offsets = std::nullopt, - std::optional const>> rep_levels = std::nullopt, - std::optional const>> def_levels = std::nullopt, - std::optional> max_def_levels = std::nullopt) noexcept + std::optional> depth = std::nullopt, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt, + std::optional> dremel_device_views = std::nullopt, + PhysicalElementComparator comparator = {}) noexcept : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, _depth{depth}, _column_order{column_order}, _null_precedence{null_precedence}, - _dremel_offsets{dremel_offsets}, - _rep_levels{rep_levels}, - _def_levels{def_levels}, - _max_def_levels{max_def_levels}, + _dremel_device_views{dremel_device_views}, _comparator{comparator} { } @@ -302,19 +304,13 @@ class device_row_comparator { null_order null_precedence = null_order::BEFORE, int depth = 0, PhysicalElementComparator comparator = {}, - device_span dremel_offsets = {}, - device_span rep_level = {}, - device_span def_level = {}, - uint8_t max_def_level = 0) + dremel_device_view dremel_device_view = {}) : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, _null_precedence{null_precedence}, _depth{depth}, - _dremel_offsets{dremel_offsets}, - _rep_level{rep_level}, - _def_level{def_level}, - _max_def_level{max_def_level}, + _dremel_device_view{dremel_device_view}, _comparator{comparator} { } @@ -394,17 +390,24 @@ class device_row_comparator { __device__ cuda::std::pair operator()(size_type lhs_element_index, size_type rhs_element_index) { - auto l_start = _dremel_offsets[lhs_element_index]; - auto l_end = _dremel_offsets[lhs_element_index + 1]; - auto r_start = _dremel_offsets[rhs_element_index]; - auto r_end = _dremel_offsets[rhs_element_index + 1]; + auto const l_offsets = _dremel_device_view.offsets; + auto const r_offsets = _dremel_device_view.offsets; + auto l_start = l_offsets[lhs_element_index]; + auto l_end = l_offsets[lhs_element_index + 1]; + auto r_start = r_offsets[rhs_element_index]; + auto r_end = r_offsets[rhs_element_index + 1]; column_device_view lcol = _lhs.slice(lhs_element_index, 1); column_device_view rcol = _rhs.slice(rhs_element_index, 1); while (lcol.type().id() == type_id::LIST) { lcol = detail::lists_column_device_view(lcol).get_sliced_child(); rcol = detail::lists_column_device_view(rcol).get_sliced_child(); } - PRINTF("max_def_level: %d\n", _max_def_level); + auto const l_max_def_level = _dremel_device_view.max_def_level; + auto const l_def_levels = _dremel_device_view.def_levels; + auto const r_def_levels = _dremel_device_view.def_levels; + auto const l_rep_levels = _dremel_device_view.rep_levels; + auto const r_rep_levels = _dremel_device_view.rep_levels; + PRINTF("max_def_level: %d\n", l_max_def_level); PRINTF("t: %d, lhs_element_index: %d, rhs_element_index: %d\n", threadIdx.x, @@ -421,21 +424,23 @@ class device_row_comparator { PRINTF("t: %d, i: %d, j: %d, k: %d\n", threadIdx.x, i, j, k); PRINTF("t: %d, def_l: %d, def_r: %d, rep_l: %d, rep_r: %d\n", threadIdx.x, - _def_level[i], - _def_level[j], - _rep_level[i], - _rep_level[j]); - if (_def_level[i] != _def_level[j]) { - state = (_def_level[i] < _def_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; + l_def_levels[i], + r_def_levels[j], + l_rep_levels[i], + r_rep_levels[j]); + if (l_def_levels[i] != r_def_levels[j]) { + state = + (l_def_levels[i] < r_def_levels[j]) ? weak_ordering::LESS : weak_ordering::GREATER; PRINTF("t: %d, def, state: %d\n", threadIdx.x, state); return cuda::std::pair(state, _depth); } - if (_rep_level[i] != _rep_level[j]) { - state = (_rep_level[i] < _rep_level[j]) ? weak_ordering::LESS : weak_ordering::GREATER; + if (l_rep_levels[i] != r_rep_levels[j]) { + state = + (l_rep_levels[i] < r_rep_levels[j]) ? weak_ordering::LESS : weak_ordering::GREATER; PRINTF("t: %d, rep, state: %d\n", threadIdx.x, state); return cuda::std::pair(state, _depth); } - if (_def_level[i] == _max_def_level) { + if (l_def_levels[i] == l_max_def_level) { auto comparator = element_comparator{_check_nulls, lcol, rcol, _null_precedence}; int last_null_depth = _depth; cuda::std::tie(state, last_null_depth) = @@ -445,7 +450,7 @@ class device_row_comparator { return cuda::std::pair(state, _depth); } ++k; - } else if (lcol.nullable() and _def_level[i] == _max_def_level - 1) { + } else if (lcol.nullable() and l_def_levels[i] == l_max_def_level - 1) { ++k; } } @@ -461,10 +466,7 @@ class device_row_comparator { Nullate const _check_nulls; null_order const _null_precedence; int const _depth; - device_span _dremel_offsets; - device_span _rep_level; - device_span _def_level; - uint8_t _max_def_level{0}; + dremel_device_view _dremel_device_view; PhysicalElementComparator const _comparator; }; @@ -492,16 +494,14 @@ class device_row_comparator { null_order const null_precedence = _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE; - auto element_comp = element_comparator{_check_nulls, - _lhs.column(i), - _rhs.column(i), - null_precedence, - depth, - _comparator, - (*_dremel_offsets)[i], - (*_rep_levels)[i], - (*_def_levels)[i], - (*_max_def_levels)[i]}; + auto element_comp = element_comparator{ + _check_nulls, + _lhs.column(i), + _rhs.column(i), + null_precedence, + depth, + _comparator, + (_dremel_device_views ? (*_dremel_device_views)[i] : dremel_device_view{})}; weak_ordering state; cuda::std::tie(state, last_null_depth) = @@ -526,10 +526,7 @@ class device_row_comparator { PhysicalElementComparator const _comparator; // List related members - std::optional const>> _dremel_offsets; - std::optional const>> _rep_levels; - std::optional const>> _def_levels; - std::optional> _max_def_levels; + std::optional> _dremel_device_views; }; // class device_row_comparator /** @@ -638,19 +635,13 @@ struct preprocessed_table { rmm::device_uvector&& null_precedence, rmm::device_uvector&& depths, std::vector&& dremel_data, - rmm::device_uvector>&& dremel_offsets, - rmm::device_uvector>&& rep_levels, - rmm::device_uvector>&& def_levels, - rmm::device_uvector&& max_def_levels) + rmm::device_uvector&& dremel_device_views) : _t(std::move(table)), _column_order(std::move(column_order)), _null_precedence(std::move(null_precedence)), _depths(std::move(depths)), _dremel_data(std::move(dremel_data)), - _dremel_offsets(std::move(dremel_offsets)), - _rep_levels(std::move(rep_levels)), - _def_levels(std::move(def_levels)), - _max_def_levels(std::move(max_def_levels)){}; + _dremel_device_views(std::move(dremel_device_views)){}; /** * @brief Implicit conversion operator to a `table_device_view` of the preprocessed table. @@ -700,31 +691,13 @@ struct preprocessed_table { } // TODO: span of spans? - [[nodiscard]] std::optional const>> dremel_offsets() const + [[nodiscard]] std::optional> dremel_device_views() const { - return _dremel_offsets.size() - ? std::optional const>>(_dremel_offsets) + return _dremel_device_views.size() + ? std::optional>(_dremel_device_views) : std::nullopt; } - [[nodiscard]] std::optional const>> rep_levels() const - { - return _rep_levels.size() ? std::optional const>>(_rep_levels) - : std::nullopt; - } - - [[nodiscard]] std::optional const>> def_levels() const - { - return _def_levels.size() ? std::optional const>>(_def_levels) - : std::nullopt; - } - - [[nodiscard]] std::optional> max_def_levels() const - { - return _max_def_levels.size() ? std::optional>(_max_def_levels) - : std::nullopt; - } - private: table_device_view_owner const _t; rmm::device_uvector const _column_order; @@ -733,10 +706,7 @@ struct preprocessed_table { // List related pre-computation std::vector _dremel_data; - rmm::device_uvector> _dremel_offsets; - rmm::device_uvector> _rep_levels; - rmm::device_uvector> _def_levels; - rmm::device_uvector _max_def_levels; + rmm::device_uvector _dremel_device_views; }; /** @@ -812,11 +782,8 @@ class self_comparator { d_t->depths(), d_t->column_order(), d_t->null_precedence(), - comparator, - d_t->dremel_offsets(), - d_t->rep_levels(), - d_t->def_levels(), - d_t->max_def_levels()}}; + d_t->dremel_device_views(), + comparator}}; } /// @copydoc less() @@ -825,8 +792,14 @@ class self_comparator { auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - return less_equivalent_comparator{device_row_comparator{ - nullate, *d_t, *d_t, d_t->depths(), d_t->column_order(), d_t->null_precedence(), comparator}}; + return less_equivalent_comparator{device_row_comparator{nullate, + *d_t, + *d_t, + d_t->depths(), + d_t->column_order(), + d_t->null_precedence(), + std::nullopt, + comparator}}; } private: @@ -954,6 +927,7 @@ class two_table_comparator { d_left_table->depths(), d_left_table->column_order(), d_left_table->null_precedence(), + std::nullopt, comparator}}}; } @@ -970,6 +944,7 @@ class two_table_comparator { d_left_table->depths(), d_left_table->column_order(), d_left_table->null_precedence(), + std::nullopt, comparator}}}; } diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index f5eec534a66..155f6a0e926 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -280,36 +280,45 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) dremel_data.push_back( io::parquet::gpu::get_dremel_data(col, d_nullability, nullability, stream)); max_def_levels.push_back(max_def_level); - } else { - max_def_levels.push_back(0); + // } else { + // max_def_levels.push_back(0); } } - std::vector> dremel_offsets; - std::vector> rep_levels; - std::vector> def_levels; + // std::vector> dremel_offsets; + // std::vector> rep_levels; + // std::vector> def_levels; + std::vector dremel_device_views; size_type c = 0; for (auto const& col : table) { if (col.type().id() == type_id::LIST) { - dremel_offsets.emplace_back(dremel_data[c].dremel_offsets); - rep_levels.emplace_back(dremel_data[c].rep_level); - def_levels.emplace_back(dremel_data[c].def_level); + // dremel_offsets.emplace_back(dremel_data[c].dremel_offsets); + // rep_levels.emplace_back(dremel_data[c].rep_level); + // def_levels.emplace_back(dremel_data[c].def_level); + dremel_device_views.push_back( + row::lexicographic::dremel_device_view{dremel_data[c].dremel_offsets.data(), + dremel_data[c].rep_level.data(), + dremel_data[c].def_level.data(), + dremel_data[c].leaf_data_size, + max_def_levels[c]}); ++c; } else { - dremel_offsets.emplace_back(); - rep_levels.emplace_back(); - def_levels.emplace_back(); + // dremel_offsets.emplace_back(); + // rep_levels.emplace_back(); + // def_levels.emplace_back(); + dremel_device_views.emplace_back(); } } - auto d_dremel_offsets = detail::make_device_uvector_async(dremel_offsets, stream); - auto d_rep_levels = detail::make_device_uvector_async(rep_levels, stream); - auto d_def_levels = detail::make_device_uvector_async(def_levels, stream); - auto d_max_def_levels = detail::make_device_uvector_async(max_def_levels, stream); - return std::make_tuple(std::move(dremel_data), - std::move(d_dremel_offsets), - std::move(d_rep_levels), - std::move(d_def_levels), - std::move(d_max_def_levels)); + // auto d_dremel_offsets = detail::make_device_uvector_async(dremel_offsets, stream); + // auto d_rep_levels = detail::make_device_uvector_async(rep_levels, stream); + // auto d_def_levels = detail::make_device_uvector_async(def_levels, stream); + // auto d_max_def_levels = detail::make_device_uvector_async(max_def_levels, stream); + auto d_dremel_device_views = detail::make_device_uvector_async(dremel_device_views, stream); + return std::make_tuple(std::move(dremel_data), std::move(d_dremel_device_views)); + // std::move(d_dremel_offsets), + // std::move(d_rep_levels), + // std::move(d_def_levels), + // std::move(d_max_def_levels)); } using column_checker_fn_t = std::function; @@ -395,23 +404,21 @@ std::shared_ptr preprocessed_table::create( auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = decompose_structs(t, column_order, null_precedence); - auto [dremel_data, d_dremel_offsets, d_rep_levels, d_def_levels, d_max_def_levels] = - list_lex_preprocess(verticalized_lhs, stream); + // auto [dremel_data, d_dremel_offsets, d_rep_levels, d_def_levels, d_max_def_levels] = + auto [dremel_data, d_dremel_device_views] = list_lex_preprocess(verticalized_lhs, stream); auto d_t = table_device_view::create(verticalized_lhs, stream); auto d_column_order = detail::make_device_uvector_async(new_column_order, stream); auto d_null_precedence = detail::make_device_uvector_async(new_null_precedence, stream); auto d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); - return std::shared_ptr(new preprocessed_table(std::move(d_t), - std::move(d_column_order), - std::move(d_null_precedence), - std::move(d_depths), - std::move(dremel_data), - std::move(d_dremel_offsets), - std::move(d_rep_levels), - std::move(d_def_levels), - std::move(d_max_def_levels))); + return std::shared_ptr( + new preprocessed_table(std::move(d_t), + std::move(d_column_order), + std::move(d_null_precedence), + std::move(d_depths), + std::move(dremel_data), + std::move(d_dremel_device_views))); } two_table_comparator::two_table_comparator(table_view const& left, From 8cbd70c6dfc7a1b1456ba23766c573643f0345ce Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 29 Jun 2022 14:40:53 +0530 Subject: [PATCH 36/78] Allow lhs and rhs dremel data --- .../cudf/table/experimental/row_operators.cuh | 59 +++++++++++-------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index c23f93620b9..9c58c46a469 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -263,18 +263,20 @@ class device_row_comparator { Nullate check_nulls, table_device_view lhs, table_device_view rhs, - std::optional> depth = std::nullopt, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt, - std::optional> dremel_device_views = std::nullopt, - PhysicalElementComparator comparator = {}) noexcept + std::optional> depth = std::nullopt, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt, + std::optional> l_dremel_device_views = std::nullopt, + std::optional> r_dremel_device_views = std::nullopt, + PhysicalElementComparator comparator = {}) noexcept : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, _depth{depth}, _column_order{column_order}, _null_precedence{null_precedence}, - _dremel_device_views{dremel_device_views}, + _l_dremel_device_views(l_dremel_device_views), + _r_dremel_device_views(r_dremel_device_views), _comparator{comparator} { } @@ -301,16 +303,18 @@ class device_row_comparator { __device__ element_comparator(Nullate check_nulls, column_device_view lhs, column_device_view rhs, - null_order null_precedence = null_order::BEFORE, - int depth = 0, - PhysicalElementComparator comparator = {}, - dremel_device_view dremel_device_view = {}) + null_order null_precedence = null_order::BEFORE, + int depth = 0, + PhysicalElementComparator comparator = {}, + dremel_device_view l_dremel_device_view = {}, + dremel_device_view r_dremel_device_view = {}) : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, _null_precedence{null_precedence}, _depth{depth}, - _dremel_device_view{dremel_device_view}, + _l_dremel_device_view{l_dremel_device_view}, + _r_dremel_device_view{r_dremel_device_view}, _comparator{comparator} { } @@ -390,8 +394,8 @@ class device_row_comparator { __device__ cuda::std::pair operator()(size_type lhs_element_index, size_type rhs_element_index) { - auto const l_offsets = _dremel_device_view.offsets; - auto const r_offsets = _dremel_device_view.offsets; + auto const l_offsets = _l_dremel_device_view.offsets; + auto const r_offsets = _r_dremel_device_view.offsets; auto l_start = l_offsets[lhs_element_index]; auto l_end = l_offsets[lhs_element_index + 1]; auto r_start = r_offsets[rhs_element_index]; @@ -402,11 +406,11 @@ class device_row_comparator { lcol = detail::lists_column_device_view(lcol).get_sliced_child(); rcol = detail::lists_column_device_view(rcol).get_sliced_child(); } - auto const l_max_def_level = _dremel_device_view.max_def_level; - auto const l_def_levels = _dremel_device_view.def_levels; - auto const r_def_levels = _dremel_device_view.def_levels; - auto const l_rep_levels = _dremel_device_view.rep_levels; - auto const r_rep_levels = _dremel_device_view.rep_levels; + auto const l_max_def_level = _l_dremel_device_view.max_def_level; + auto const l_def_levels = _l_dremel_device_view.def_levels; + auto const r_def_levels = _r_dremel_device_view.def_levels; + auto const l_rep_levels = _l_dremel_device_view.rep_levels; + auto const r_rep_levels = _r_dremel_device_view.rep_levels; PRINTF("max_def_level: %d\n", l_max_def_level); PRINTF("t: %d, lhs_element_index: %d, rhs_element_index: %d\n", @@ -466,7 +470,8 @@ class device_row_comparator { Nullate const _check_nulls; null_order const _null_precedence; int const _depth; - dremel_device_view _dremel_device_view; + dremel_device_view _l_dremel_device_view; + dremel_device_view _r_dremel_device_view; PhysicalElementComparator const _comparator; }; @@ -501,7 +506,8 @@ class device_row_comparator { null_precedence, depth, _comparator, - (_dremel_device_views ? (*_dremel_device_views)[i] : dremel_device_view{})}; + (_l_dremel_device_views ? (*_l_dremel_device_views)[i] : dremel_device_view{}), + (_r_dremel_device_views ? (*_r_dremel_device_views)[i] : dremel_device_view{})}; weak_ordering state; cuda::std::tie(state, last_null_depth) = @@ -526,7 +532,8 @@ class device_row_comparator { PhysicalElementComparator const _comparator; // List related members - std::optional> _dremel_device_views; + std::optional> _l_dremel_device_views; + std::optional> _r_dremel_device_views; }; // class device_row_comparator /** @@ -783,6 +790,7 @@ class self_comparator { d_t->column_order(), d_t->null_precedence(), d_t->dremel_device_views(), + d_t->dremel_device_views(), comparator}}; } @@ -798,7 +806,8 @@ class self_comparator { d_t->depths(), d_t->column_order(), d_t->null_precedence(), - std::nullopt, + d_t->dremel_device_views(), + d_t->dremel_device_views(), comparator}}; } @@ -927,7 +936,8 @@ class two_table_comparator { d_left_table->depths(), d_left_table->column_order(), d_left_table->null_precedence(), - std::nullopt, + d_left_table->dremel_device_views(), + d_right_table->dremel_device_views(), comparator}}}; } @@ -944,7 +954,8 @@ class two_table_comparator { d_left_table->depths(), d_left_table->column_order(), d_left_table->null_precedence(), - std::nullopt, + d_left_table->dremel_device_views(), + d_right_table->dremel_device_views(), comparator}}}; } From 3f4396834fd4fec9d2eb285cfe2bb06a4722ea5a Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Sat, 16 Jul 2022 00:37:26 +0530 Subject: [PATCH 37/78] reduce test verbosity --- cpp/tests/sort/sort_test.cpp | 50 +++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index 9a31db82114..4092597d8e3 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -768,19 +768,20 @@ TYPED_TEST(Sort, WithNullableListColumn) if (std::is_same_v) { GTEST_SKIP(); } using lcw = cudf::test::lists_column_wrapper; + using cudf::test::iterators::nulls_at; lcw col{ - {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, // 0 - {{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, cudf::test::iterators::nulls_at({3})}, // 1 - {{1, 2, 3}, {}, {4, 5}, {0, 6, 0}}, // 2 - {{1, 2}, {3}, {4, 5}, {0, 6, 0}}, // 3 - {{1, 2}, {3}, {4, 5}, {{0, 6, 0}, cudf::test::iterators::nulls_at({0})}}, // 4 - {{7, 8}, {}}, // 5 - lcw{lcw{}, lcw{}, lcw{}}, // 6 - lcw{lcw{}}, // 7 - {lcw{10}}, // 8 - lcw{}, // 9 - {{1, 2}, {3}, {4, 5}, {{0, 6, 0}, cudf::test::iterators::nulls_at({0, 2})}}, // 10 - {{1, 2}, {3}, {4, 5}, {{0, 7}, cudf::test::iterators::nulls_at({0})}}, // 11 + {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, // 0 + {{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, nulls_at({3})}, // 1 + {{1, 2, 3}, {}, {4, 5}, {0, 6, 0}}, // 2 + {{1, 2}, {3}, {4, 5}, {0, 6, 0}}, // 3 + {{1, 2}, {3}, {4, 5}, {{0, 6, 0}, nulls_at({0})}}, // 4 + {{7, 8}, {}}, // 5 + lcw{lcw{}, lcw{}, lcw{}}, // 6 + lcw{lcw{}}, // 7 + {lcw{10}}, // 8 + lcw{}, // 9 + {{1, 2}, {3}, {4, 5}, {{0, 6, 0}, nulls_at({0, 2})}}, // 10 + {{1, 2}, {3}, {4, 5}, {{0, 7}, nulls_at({0})}}, // 11 }; auto expect = @@ -795,19 +796,20 @@ TYPED_TEST(Sort, WithSlicedListColumn) if (std::is_same_v) { GTEST_SKIP(); } using lcw = cudf::test::lists_column_wrapper; + using cudf::test::iterators::nulls_at; lcw col{ - {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, // - {{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, cudf::test::iterators::nulls_at({3})}, // 0 - {{1, 2, 3}, {}, {4, 5}, {0, 6, 0}}, // 1 - {{1, 2}, {3}, {4, 5}, {0, 6, 0}}, // 2 - {{1, 2}, {3}, {4, 5}, {{0, 6, 0}, cudf::test::iterators::nulls_at({0})}}, // 3 - {{7, 8}, {}}, // 4 - lcw{lcw{}, lcw{}, lcw{}}, // 5 - lcw{lcw{}}, // 6 - {lcw{10}}, // 7 - lcw{}, // 8 - {{1, 2}, {3}, {4, 5}, {{0, 6, 0}, cudf::test::iterators::nulls_at({0, 2})}}, // 9 - {{1, 2}, {3}, {4, 5}, {{0, 7}, cudf::test::iterators::nulls_at({0})}}, // + {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, // + {{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, nulls_at({3})}, // 0 + {{1, 2, 3}, {}, {4, 5}, {0, 6, 0}}, // 1 + {{1, 2}, {3}, {4, 5}, {0, 6, 0}}, // 2 + {{1, 2}, {3}, {4, 5}, {{0, 6, 0}, nulls_at({0})}}, // 3 + {{7, 8}, {}}, // 4 + lcw{lcw{}, lcw{}, lcw{}}, // 5 + lcw{lcw{}}, // 6 + {lcw{10}}, // 7 + lcw{}, // 8 + {{1, 2}, {3}, {4, 5}, {{0, 6, 0}, nulls_at({0, 2})}}, // 9 + {{1, 2}, {3}, {4, 5}, {{0, 7}, nulls_at({0})}}, // }; auto sliced_col = cudf::slice(col, {1, 10}); From 4b233dce80e38fd5cb7611352a00fccdf5614690 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Mon, 18 Jul 2022 15:14:11 +0530 Subject: [PATCH 38/78] Remove debug prints --- .../cudf/table/experimental/row_operators.cuh | 29 +------------------ 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index b4813e437ba..4cfbd95fd5d 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -214,9 +214,6 @@ struct dremel_device_view { uint8_t max_def_level; }; -// #define PRINTF(...) printf(__VA_ARGS__) -#define PRINTF(...) - /** * @brief Computes the lexicographic comparison between 2 rows. * @@ -412,37 +409,16 @@ class device_row_comparator { auto const r_def_levels = _r_dremel_device_view.def_levels; auto const l_rep_levels = _l_dremel_device_view.rep_levels; auto const r_rep_levels = _r_dremel_device_view.rep_levels; - PRINTF("max_def_level: %d\n", l_max_def_level); - - PRINTF("t: %d, lhs_element_index: %d, rhs_element_index: %d\n", - threadIdx.x, - lhs_element_index, - rhs_element_index); - PRINTF("t: %d, l_start: %d, l_end: %d, r_start: %d, r_end: %d\n", - threadIdx.x, - l_start, - l_end, - r_start, - r_end); weak_ordering state{weak_ordering::EQUIVALENT}; for (int i = l_start, j = r_start, k = 0; i < l_end and j < r_end; ++i, ++j) { - PRINTF("t: %d, i: %d, j: %d, k: %d\n", threadIdx.x, i, j, k); - PRINTF("t: %d, def_l: %d, def_r: %d, rep_l: %d, rep_r: %d\n", - threadIdx.x, - l_def_levels[i], - r_def_levels[j], - l_rep_levels[i], - r_rep_levels[j]); if (l_def_levels[i] != r_def_levels[j]) { state = (l_def_levels[i] < r_def_levels[j]) ? weak_ordering::LESS : weak_ordering::GREATER; - PRINTF("t: %d, def, state: %d\n", threadIdx.x, state); return cuda::std::pair(state, _depth); } if (l_rep_levels[i] != r_rep_levels[j]) { state = (l_rep_levels[i] < r_rep_levels[j]) ? weak_ordering::LESS : weak_ordering::GREATER; - PRINTF("t: %d, rep, state: %d\n", threadIdx.x, state); return cuda::std::pair(state, _depth); } if (l_def_levels[i] == l_max_def_level) { @@ -450,10 +426,7 @@ class device_row_comparator { int last_null_depth = _depth; cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher(lcol.type(), comparator, k, k); - if (state != weak_ordering::EQUIVALENT) { - PRINTF("t: %d, leaf, state: %d\n", threadIdx.x, state); - return cuda::std::pair(state, _depth); - } + if (state != weak_ordering::EQUIVALENT) { return cuda::std::pair(state, _depth); } ++k; } else if (lcol.nullable() and l_def_levels[i] == l_max_def_level - 1) { ++k; From 6be60783e7f7b3da74d9316abcf4bd31faeab523 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Mon, 18 Jul 2022 17:01:46 +0530 Subject: [PATCH 39/78] rename linked column header --- .../cudf/detail/utilities/{column.hpp => linked_column.hpp} | 0 cpp/src/table/row_operators.cu | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename cpp/include/cudf/detail/utilities/{column.hpp => linked_column.hpp} (100%) diff --git a/cpp/include/cudf/detail/utilities/column.hpp b/cpp/include/cudf/detail/utilities/linked_column.hpp similarity index 100% rename from cpp/include/cudf/detail/utilities/column.hpp rename to cpp/include/cudf/detail/utilities/linked_column.hpp diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 155f6a0e926..c52b8e97e4b 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include #include #include #include From 8e4c870d9417d2b99b07b184909f0095e4a4f2c5 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Tue, 19 Jul 2022 01:37:26 +0530 Subject: [PATCH 40/78] Move dremel specific code out into spearate files --- cpp/CMakeLists.txt | 1 + cpp/include/cudf/detail/utilities/dremel.cuh | 70 +++ .../cudf/table/experimental/row_operators.cuh | 60 +- cpp/src/column/dremel.cu | 539 ++++++++++++++++++ cpp/src/io/parquet/page_enc.cu | 511 ----------------- cpp/src/io/parquet/parquet_gpu.hpp | 37 -- cpp/src/io/parquet/writer_impl.cu | 12 +- cpp/src/table/row_operators.cu | 18 +- 8 files changed, 652 insertions(+), 596 deletions(-) create mode 100644 cpp/include/cudf/detail/utilities/dremel.cuh create mode 100644 cpp/src/column/dremel.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 86bfdc1444b..d3bd3bff1d4 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -236,6 +236,7 @@ add_library( src/column/column_factories.cpp src/column/column_factories.cu src/column/column_view.cpp + src/column/dremel.cu src/comms/ipc/ipc.cpp src/copying/concatenate.cu src/copying/contiguous_split.cu diff --git a/cpp/include/cudf/detail/utilities/dremel.cuh b/cpp/include/cudf/detail/utilities/dremel.cuh new file mode 100644 index 00000000000..07c33628762 --- /dev/null +++ b/cpp/include/cudf/detail/utilities/dremel.cuh @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace cudf::detail { + +/** + * @brief Dremel data that describes one nested type column + * + * @see get_dremel_data() + */ +struct dremel_data { + rmm::device_uvector dremel_offsets; + rmm::device_uvector rep_level; + rmm::device_uvector def_level; + + size_type leaf_data_size; +}; + +struct dremel_device_view { + size_type* offsets; + uint8_t* rep_levels; + uint8_t* def_levels; + size_type leaf_data_size; + uint8_t max_def_level; +}; + +/** + * @brief Get the dremel offsets and repetition and definition levels for a LIST column + * + * Dremel offsets are the per row offsets into the repetition and definition level arrays for a + * column. + * Example: + * ``` + * col = {{1, 2, 3}, { }, {5, 6}} + * dremel_offsets = { 0, 3, 4, 6} + * rep_level = { 0, 1, 1, 0, 0, 1} + * def_level = { 1, 1, 1, 0, 1, 1} + * ``` + * @param col Column of LIST type + * @param level_nullability Pre-determined nullability at each list level. Empty means infer from + * `col` + * @param stream CUDA stream used for device memory operations and kernel launches. + * + * @return A struct containing dremel data + */ +dremel_data get_dremel_data(column_view h_col, + rmm::device_uvector const& d_nullability, + std::vector const& nullability, + rmm::cuda_stream_view stream); + +} // namespace cudf::detail diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 4cfbd95fd5d..342fc9f0f2d 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -16,13 +16,12 @@ #pragma once -#include "io/parquet/parquet_gpu.hpp" - #include #include #include #include #include +#include #include #include #include @@ -206,14 +205,6 @@ struct sorting_physical_element_comparator { } }; -struct dremel_device_view { - size_type* offsets; - uint8_t* rep_levels; - uint8_t* def_levels; - size_type leaf_data_size; - uint8_t max_def_level; -}; - /** * @brief Computes the lexicographic comparison between 2 rows. * @@ -261,12 +252,14 @@ class device_row_comparator { Nullate check_nulls, table_device_view lhs, table_device_view rhs, - std::optional> depth = std::nullopt, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt, - std::optional> l_dremel_device_views = std::nullopt, - std::optional> r_dremel_device_views = std::nullopt, - PhysicalElementComparator comparator = {}) noexcept + std::optional> depth = std::nullopt, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt, + std::optional> l_dremel_device_views = + std::nullopt, + std::optional> r_dremel_device_views = + std::nullopt, + PhysicalElementComparator comparator = {}) noexcept : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, @@ -301,11 +294,11 @@ class device_row_comparator { __device__ element_comparator(Nullate check_nulls, column_device_view lhs, column_device_view rhs, - null_order null_precedence = null_order::BEFORE, - int depth = 0, - PhysicalElementComparator comparator = {}, - dremel_device_view l_dremel_device_view = {}, - dremel_device_view r_dremel_device_view = {}) + null_order null_precedence = null_order::BEFORE, + int depth = 0, + PhysicalElementComparator comparator = {}, + detail::dremel_device_view l_dremel_device_view = {}, + detail::dremel_device_view r_dremel_device_view = {}) : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, @@ -444,8 +437,8 @@ class device_row_comparator { Nullate const _check_nulls; null_order const _null_precedence; int const _depth; - dremel_device_view _l_dremel_device_view; - dremel_device_view _r_dremel_device_view; + detail::dremel_device_view _l_dremel_device_view; + detail::dremel_device_view _r_dremel_device_view; PhysicalElementComparator const _comparator; }; @@ -480,8 +473,8 @@ class device_row_comparator { null_precedence, depth, _comparator, - (_l_dremel_device_views ? (*_l_dremel_device_views)[i] : dremel_device_view{}), - (_r_dremel_device_views ? (*_r_dremel_device_views)[i] : dremel_device_view{})}; + (_l_dremel_device_views ? (*_l_dremel_device_views)[i] : detail::dremel_device_view{}), + (_r_dremel_device_views ? (*_r_dremel_device_views)[i] : detail::dremel_device_view{})}; weak_ordering state; cuda::std::tie(state, last_null_depth) = @@ -506,8 +499,8 @@ class device_row_comparator { PhysicalElementComparator const _comparator; // List related members - std::optional> _l_dremel_device_views; - std::optional> _r_dremel_device_views; + std::optional> _l_dremel_device_views; + std::optional> _r_dremel_device_views; }; // class device_row_comparator /** @@ -615,8 +608,8 @@ struct preprocessed_table { rmm::device_uvector&& column_order, rmm::device_uvector&& null_precedence, rmm::device_uvector&& depths, - std::vector&& dremel_data, - rmm::device_uvector&& dremel_device_views) + std::vector&& dremel_data, + rmm::device_uvector&& dremel_device_views) : _t(std::move(table)), _column_order(std::move(column_order)), _null_precedence(std::move(null_precedence)), @@ -672,10 +665,11 @@ struct preprocessed_table { } // TODO: span of spans? - [[nodiscard]] std::optional> dremel_device_views() const + [[nodiscard]] std::optional> dremel_device_views() + const { return _dremel_device_views.size() - ? std::optional>(_dremel_device_views) + ? std::optional>(_dremel_device_views) : std::nullopt; } @@ -686,8 +680,8 @@ struct preprocessed_table { rmm::device_uvector const _depths; // List related pre-computation - std::vector _dremel_data; - rmm::device_uvector _dremel_device_views; + std::vector _dremel_data; + rmm::device_uvector _dremel_device_views; }; /** diff --git a/cpp/src/column/dremel.cu b/cpp/src/column/dremel.cu new file mode 100644 index 00000000000..cc34481cfac --- /dev/null +++ b/cpp/src/column/dremel.cu @@ -0,0 +1,539 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cudf::detail { + +/** + * @brief Functor to get definition level value for a nested struct column until the leaf level or + * the first list level. + * + */ +struct def_level_fn { + column_device_view const* parent_col; + uint8_t const* d_nullability; + uint8_t sub_level_start; + uint8_t curr_def_level; + + __device__ uint32_t operator()(size_type i) + { + uint32_t def = curr_def_level; + uint8_t l = sub_level_start; + bool is_col_struct = false; + auto col = *parent_col; + do { + // If col not nullable then it does not contribute to def levels + if (d_nullability[l]) { + if (not col.nullable() or bit_is_set(col.null_mask(), i)) { + ++def; + } else { // We have found the shallowest level at which this row is null + break; + } + } + is_col_struct = (col.type().id() == type_id::STRUCT); + if (is_col_struct) { + col = col.child(0); + ++l; + } + } while (is_col_struct); + return def; + } +}; + +/** + * @brief Get the dremel offsets and repetition and definition levels for a LIST column + * + * The repetition and definition level values are ideally computed using a recursive call over a + * nested structure but in order to better utilize GPU resources, this function calculates them + * with a bottom up merge method. + * + * Given a LIST column of type `List>` like so: + * ``` + * col = { + * [], + * [[], [1, 2, 3], [4, 5]], + * [[]] + * } + * ``` + * We can represent it in cudf format with two level of offsets like this: + * ``` + * Level 0 offsets = {0, 0, 3, 5, 6} + * Level 1 offsets = {0, 0, 3, 5, 5} + * Values = {1, 2, 3, 4, 5} + * ``` + * The desired result of this function is the repetition and definition level values that + * correspond to the data values: + * ``` + * col = {[], [[], [1, 2, 3], [4, 5]], [[]]} + * def = { 0 1, 2, 2, 2, 2, 2, 1 } + * rep = { 0, 0, 0, 2, 2, 1, 2, 0 } + * ``` + * + * Since repetition and definition levels arrays contain a value for each empty list, the size of + * the rep/def level array can be given by + * ``` + * rep_level.size() = size of leaf column + number of empty lists in level 0 + * + number of empty lists in level 1 ... + * ``` + * + * We start with finding the empty lists in the penultimate level and merging it with the indices + * of the leaf level. The values for the merge are the definition and repetition levels + * ``` + * empties at level 1 = {0, 5} + * def values at 1 = {1, 1} + * rep values at 1 = {1, 1} + * indices at leaf = {0, 1, 2, 3, 4} + * def values at leaf = {2, 2, 2, 2, 2} + * rep values at leaf = {2, 2, 2, 2, 2} + * ``` + * + * merged def values = {1, 2, 2, 2, 2, 2, 1} + * merged rep values = {1, 2, 2, 2, 2, 2, 1} + * + * The size of the rep/def values is now larger than the leaf values and the offsets need to be + * adjusted in order to point to the correct start indices. We do this with an exclusive scan over + * the indices of offsets of empty lists and adding to existing offsets. + * ``` + * Level 1 new offsets = {0, 1, 4, 6, 7} + * ``` + * Repetition values at the beginning of a list need to be decremented. We use the new offsets to + * scatter the rep value. + * ``` + * merged rep values = {1, 2, 2, 2, 2, 2, 1} + * scatter (1, new offsets) + * new offsets = {0, 1, 4, 6, 7} + * new rep values = {1, 1, 2, 2, 1, 2, 1} + * ``` + * + * Similarly we merge up all the way till level 0 offsets + * + * STRUCT COLUMNS : + * In case of struct columns, we don't have to merge struct levels with their children because a + * struct is the same size as its children. e.g. for a column `struct`, if the row `i` + * is null, then the children columns `int` and `float` are also null at `i`. They also have the + * null entry represented in their respective null masks. So for any case of strictly struct based + * nesting, we can get the definition levels merely by iterating over the nesting for the same row. + * + * In case struct and lists are intermixed, the definition levels of all the contiguous struct + * levels can be constructed using the aforementioned iterative method. Only when we reach a list + * level, we need to do a merge with the subsequent level. + * + * So, for a column like `struct>`, we are going to merge between the levels `struct>`, we are going to merge between `list` and `struct`. + * + * In general, one nesting level is the list level and any struct level that precedes it. + * + * A few more examples to visualize the partitioning of column hierarchy into nesting levels: + * (L is list, S is struct, i is integer(leaf data level), angle brackets omitted) + * ``` + * 1. LSi = L Si + * - | -- + * + * 2. LLSi = L L Si + * - | - | -- + * + * 3. SSLi = SSL i + * --- | - + * + * 4. LLSLSSi = L L SL SSi + * - | - | -- | --- + * ``` + */ +dremel_data get_dremel_data(column_view h_col, + // TODO(cp): use device_span once it is converted to a single hd_vec + rmm::device_uvector const& d_nullability, + std::vector const& nullability, + rmm::cuda_stream_view stream) +{ + auto get_list_level = [](column_view col) { + while (col.type().id() == type_id::STRUCT) { + col = col.child(0); + } + return col; + }; + + auto get_empties = [&](column_view col, size_type start, size_type end) { + auto lcv = lists_column_view(get_list_level(col)); + rmm::device_uvector empties_idx(lcv.size(), stream); + rmm::device_uvector empties(lcv.size(), stream); + auto d_off = lcv.offsets().data(); + + auto empties_idx_end = + thrust::copy_if(rmm::exec_policy(stream), + thrust::make_counting_iterator(start), + thrust::make_counting_iterator(end), + empties_idx.begin(), + [d_off] __device__(auto i) { return d_off[i] == d_off[i + 1]; }); + auto empties_end = thrust::gather(rmm::exec_policy(stream), + empties_idx.begin(), + empties_idx_end, + lcv.offsets().begin(), + empties.begin()); + + auto empties_size = empties_end - empties.begin(); + return std::make_tuple(std::move(empties), std::move(empties_idx), empties_size); + }; + + // Check if there are empty lists with empty offsets in this column + bool has_empty_list_offsets = false; + { + auto curr_col = h_col; + while (is_nested(curr_col.type())) { + if (curr_col.type().id() == type_id::LIST) { + auto lcv = lists_column_view(curr_col); + if (lcv.offsets().size() == 0) { + has_empty_list_offsets = true; + break; + } + curr_col = lcv.child(); + } else if (curr_col.type().id() == type_id::STRUCT) { + curr_col = curr_col.child(0); + } + } + } + std::unique_ptr empty_list_offset_col; + if (has_empty_list_offsets) { + empty_list_offset_col = make_fixed_width_column(data_type(type_id::INT32), 1); + cudaMemsetAsync(empty_list_offset_col->mutable_view().head(), 0, sizeof(size_type), stream); + std::function normalize_col = [&](column_view const& col) { + auto children = [&]() -> std::vector { + if (col.type().id() == type_id::LIST) { + auto lcol = lists_column_view(col); + auto offset_col = + lcol.offsets().head() == nullptr ? empty_list_offset_col->view() : lcol.offsets(); + return {offset_col, normalize_col(lcol.child())}; + } else if (col.type().id() == type_id::STRUCT) { + return {normalize_col(col.child(0))}; + } else { + return {col.child_begin(), col.child_end()}; + } + }(); + return column_view(col.type(), + col.size(), + col.head(), + col.null_mask(), + UNKNOWN_NULL_COUNT, + col.offset(), + std::move(children)); + }; + h_col = normalize_col(h_col); + } + + auto curr_col = h_col; + std::vector nesting_levels; + std::vector def_at_level; + std::vector start_at_sub_level; + uint8_t curr_nesting_level_idx = 0; + + auto add_def_at_level = [&](column_view col) { + // Add up all def level contributions in this column all the way till the first list column + // appears in the hierarchy or until we get to leaf + uint32_t def = 0; + start_at_sub_level.push_back(curr_nesting_level_idx); + while (col.type().id() == type_id::STRUCT) { + def += (nullability[curr_nesting_level_idx]) ? 1 : 0; + col = col.child(0); + ++curr_nesting_level_idx; + } + // At the end of all those structs is either a list column or the leaf. Leaf column contributes + // at least one def level. It doesn't matter what the leaf contributes because it'll be at the + // end of the exclusive scan. + def += (nullability[curr_nesting_level_idx]) ? 2 : 1; + def_at_level.push_back(def); + ++curr_nesting_level_idx; + }; + while (cudf::is_nested(curr_col.type())) { + nesting_levels.push_back(curr_col); + add_def_at_level(curr_col); + while (curr_col.type().id() == type_id::STRUCT) { + // Go down the hierarchy until we get to the LIST or the leaf level + curr_col = curr_col.child(0); + } + if (curr_col.type().id() == type_id::LIST) { + curr_col = curr_col.child(lists_column_view::child_column_index); + if (not is_nested(curr_col.type())) { + // Special case: when the leaf data column is the immediate child of the list col then we + // want it to be included right away. Otherwise the struct containing it will be included in + // the next iteration of this loop. + nesting_levels.push_back(curr_col); + add_def_at_level(curr_col); + break; + } + } + } + + auto [device_view_owners, d_nesting_levels] = + contiguous_copy_column_device_views(nesting_levels, stream); + + thrust::exclusive_scan( + thrust::host, def_at_level.begin(), def_at_level.end(), def_at_level.begin()); + + // Sliced list column views only have offsets applied to top level. Get offsets for each level. + rmm::device_uvector d_column_offsets(nesting_levels.size(), stream); + rmm::device_uvector d_column_ends(nesting_levels.size(), stream); + + auto d_col = column_device_view::create(h_col, stream); + cudf::detail::device_single_thread( + [offset_at_level = d_column_offsets.data(), + end_idx_at_level = d_column_ends.data(), + col = *d_col] __device__() { + auto curr_col = col; + size_type off = curr_col.offset(); + size_type end = off + curr_col.size(); + size_type level = 0; + offset_at_level[level] = off; + end_idx_at_level[level] = end; + ++level; + // Apply offset recursively until we get to leaf data + // Skip doing the following for any structs we encounter in between. + while (curr_col.type().id() == type_id::LIST or curr_col.type().id() == type_id::STRUCT) { + if (curr_col.type().id() == type_id::LIST) { + off = curr_col.child(lists_column_view::offsets_column_index).element(off); + end = curr_col.child(lists_column_view::offsets_column_index).element(end); + offset_at_level[level] = off; + end_idx_at_level[level] = end; + ++level; + curr_col = curr_col.child(lists_column_view::child_column_index); + } else { + curr_col = curr_col.child(0); + } + } + }, + stream); + + thrust::host_vector column_offsets = + cudf::detail::make_host_vector_async(d_column_offsets, stream); + thrust::host_vector column_ends = + cudf::detail::make_host_vector_async(d_column_ends, stream); + stream.synchronize(); + + size_t max_vals_size = 0; + for (size_t l = 0; l < column_offsets.size(); ++l) { + max_vals_size += column_ends[l] - column_offsets[l]; + } + + rmm::device_uvector rep_level(max_vals_size, stream); + rmm::device_uvector def_level(max_vals_size, stream); + + rmm::device_uvector temp_rep_vals(max_vals_size, stream); + rmm::device_uvector temp_def_vals(max_vals_size, stream); + rmm::device_uvector new_offsets(0, stream); + size_type curr_rep_values_size = 0; + { + // At this point, curr_col contains the leaf column. Max nesting level is + // nesting_levels.size(). + + // We are going to start by merging the last column in nesting_levels (the leaf, which is at the + // index `nesting_levels.size() - 1`) with the second-to-last (which is at + // `nesting_levels.size() - 2`). + size_t level = nesting_levels.size() - 2; + curr_col = nesting_levels[level]; + auto lcv = lists_column_view(get_list_level(curr_col)); + auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; + + // Get empties at this level + auto [empties, empties_idx, empties_size] = + get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); + + // Merge empty at deepest parent level with the rep, def level vals at leaf level + + auto input_parent_rep_it = thrust::make_constant_iterator(level); + auto input_parent_def_it = + thrust::make_transform_iterator(empties_idx.begin(), + def_level_fn{d_nesting_levels + level, + d_nullability.data(), + start_at_sub_level[level], + def_at_level[level]}); + + // `nesting_levels.size()` == no of list levels + leaf. Max repetition level = no of list levels + auto input_child_rep_it = thrust::make_constant_iterator(nesting_levels.size() - 1); + auto input_child_def_it = + thrust::make_transform_iterator(thrust::make_counting_iterator(column_offsets[level + 1]), + def_level_fn{d_nesting_levels + level + 1, + d_nullability.data(), + start_at_sub_level[level + 1], + def_at_level[level + 1]}); + + // Zip the input and output value iterators so that merge operation is done only once + auto input_parent_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); + + auto input_child_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(input_child_rep_it, input_child_def_it)); + + auto output_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); + + auto ends = thrust::merge_by_key(rmm::exec_policy(stream), + empties.begin(), + empties.begin() + empties_size, + thrust::make_counting_iterator(column_offsets[level + 1]), + thrust::make_counting_iterator(column_ends[level + 1]), + input_parent_zip_it, + input_child_zip_it, + thrust::make_discard_iterator(), + output_zip_it); + + curr_rep_values_size = ends.second - output_zip_it; + + // Scan to get distance by which each offset value is shifted due to the insertion of empties + auto scan_it = cudf::detail::make_counting_transform_iterator( + column_offsets[level], + [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( + auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); + rmm::device_uvector scan_out(offset_size_at_level, stream); + thrust::exclusive_scan( + rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); + + // Add scan output to existing offsets to get new offsets into merged rep level values + new_offsets = rmm::device_uvector(offset_size_at_level, stream); + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + offset_size_at_level, + [off = lcv.offsets().data() + column_offsets[level], + scan_out = scan_out.data(), + new_off = new_offsets.data()] __device__(auto i) { + new_off[i] = off[i] - off[0] + scan_out[i]; + }); + + // Set rep level values at level starts to appropriate rep level + auto scatter_it = thrust::make_constant_iterator(level); + thrust::scatter(rmm::exec_policy(stream), + scatter_it, + scatter_it + new_offsets.size() - 1, + new_offsets.begin(), + rep_level.begin()); + } + + // Having already merged the last two levels, we are now going to merge the result with the + // third-last level which is at index `nesting_levels.size() - 3`. + for (int level = nesting_levels.size() - 3; level >= 0; level--) { + curr_col = nesting_levels[level]; + auto lcv = lists_column_view(get_list_level(curr_col)); + auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; + + // Get empties at this level + auto [empties, empties_idx, empties_size] = + get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); + + auto offset_transformer = [new_child_offsets = new_offsets.data(), + child_start = column_offsets[level + 1]] __device__(auto x) { + return new_child_offsets[x - child_start]; // (x - child's offset) + }; + + // We will be reading from old rep_levels and writing again to rep_levels. Swap the current + // rep values into temp_rep_vals so it can become the input and rep_levels can again be output. + std::swap(temp_rep_vals, rep_level); + std::swap(temp_def_vals, def_level); + + // Merge empty at parent level with the rep, def level vals at current level + auto transformed_empties = thrust::make_transform_iterator(empties.begin(), offset_transformer); + + auto input_parent_rep_it = thrust::make_constant_iterator(level); + auto input_parent_def_it = + thrust::make_transform_iterator(empties_idx.begin(), + def_level_fn{d_nesting_levels + level, + d_nullability.data(), + start_at_sub_level[level], + def_at_level[level]}); + + // Zip the input and output value iterators so that merge operation is done only once + auto input_parent_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); + + auto input_child_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(temp_rep_vals.begin(), temp_def_vals.begin())); + + auto output_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); + + auto ends = thrust::merge_by_key(rmm::exec_policy(stream), + transformed_empties, + transformed_empties + empties_size, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(curr_rep_values_size), + input_parent_zip_it, + input_child_zip_it, + thrust::make_discard_iterator(), + output_zip_it); + + curr_rep_values_size = ends.second - output_zip_it; + + // Scan to get distance by which each offset value is shifted due to the insertion of dremel + // level value fof an empty list + auto scan_it = cudf::detail::make_counting_transform_iterator( + column_offsets[level], + [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( + auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); + rmm::device_uvector scan_out(offset_size_at_level, stream); + thrust::exclusive_scan( + rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); + + // Add scan output to existing offsets to get new offsets into merged rep level values + rmm::device_uvector temp_new_offsets(offset_size_at_level, stream); + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + offset_size_at_level, + [off = lcv.offsets().data() + column_offsets[level], + scan_out = scan_out.data(), + new_off = temp_new_offsets.data(), + offset_transformer] __device__(auto i) { + new_off[i] = offset_transformer(off[i]) + scan_out[i]; + }); + new_offsets = std::move(temp_new_offsets); + + // Set rep level values at level starts to appropriate rep level + auto scatter_it = thrust::make_constant_iterator(level); + thrust::scatter(rmm::exec_policy(stream), + scatter_it, + scatter_it + new_offsets.size() - 1, + new_offsets.begin(), + rep_level.begin()); + } + + size_t level_vals_size = new_offsets.back_element(stream); + rep_level.resize(level_vals_size, stream); + def_level.resize(level_vals_size, stream); + + stream.synchronize(); + + size_type leaf_data_size = column_ends.back() - column_offsets.back(); + + return dremel_data{ + std::move(new_offsets), std::move(rep_level), std::move(def_level), leaf_data_size}; +} + +} // namespace cudf::detail diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 277dc4846de..ad2a530b184 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -17,26 +17,15 @@ #include -#include #include -#include #include -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include @@ -1460,506 +1449,6 @@ __global__ void __launch_bounds__(1024) } } -/** - * @brief Functor to get definition level value for a nested struct column until the leaf level or - * the first list level. - * - */ -struct def_level_fn { - column_device_view const* parent_col; - uint8_t const* d_nullability; - uint8_t sub_level_start; - uint8_t curr_def_level; - - __device__ uint32_t operator()(size_type i) - { - uint32_t def = curr_def_level; - uint8_t l = sub_level_start; - bool is_col_struct = false; - auto col = *parent_col; - do { - // If col not nullable then it does not contribute to def levels - if (d_nullability[l]) { - if (not col.nullable() or bit_is_set(col.null_mask(), i)) { - ++def; - } else { // We have found the shallowest level at which this row is null - break; - } - } - is_col_struct = (col.type().id() == type_id::STRUCT); - if (is_col_struct) { - col = col.child(0); - ++l; - } - } while (is_col_struct); - return def; - } -}; - -/** - * @brief Get the dremel offsets and repetition and definition levels for a LIST column - * - * The repetition and definition level values are ideally computed using a recursive call over a - * nested structure but in order to better utilize GPU resources, this function calculates them - * with a bottom up merge method. - * - * Given a LIST column of type `List>` like so: - * ``` - * col = { - * [], - * [[], [1, 2, 3], [4, 5]], - * [[]] - * } - * ``` - * We can represent it in cudf format with two level of offsets like this: - * ``` - * Level 0 offsets = {0, 0, 3, 5, 6} - * Level 1 offsets = {0, 0, 3, 5, 5} - * Values = {1, 2, 3, 4, 5} - * ``` - * The desired result of this function is the repetition and definition level values that - * correspond to the data values: - * ``` - * col = {[], [[], [1, 2, 3], [4, 5]], [[]]} - * def = { 0 1, 2, 2, 2, 2, 2, 1 } - * rep = { 0, 0, 0, 2, 2, 1, 2, 0 } - * ``` - * - * Since repetition and definition levels arrays contain a value for each empty list, the size of - * the rep/def level array can be given by - * ``` - * rep_level.size() = size of leaf column + number of empty lists in level 0 - * + number of empty lists in level 1 ... - * ``` - * - * We start with finding the empty lists in the penultimate level and merging it with the indices - * of the leaf level. The values for the merge are the definition and repetition levels - * ``` - * empties at level 1 = {0, 5} - * def values at 1 = {1, 1} - * rep values at 1 = {1, 1} - * indices at leaf = {0, 1, 2, 3, 4} - * def values at leaf = {2, 2, 2, 2, 2} - * rep values at leaf = {2, 2, 2, 2, 2} - * ``` - * - * merged def values = {1, 2, 2, 2, 2, 2, 1} - * merged rep values = {1, 2, 2, 2, 2, 2, 1} - * - * The size of the rep/def values is now larger than the leaf values and the offsets need to be - * adjusted in order to point to the correct start indices. We do this with an exclusive scan over - * the indices of offsets of empty lists and adding to existing offsets. - * ``` - * Level 1 new offsets = {0, 1, 4, 6, 7} - * ``` - * Repetition values at the beginning of a list need to be decremented. We use the new offsets to - * scatter the rep value. - * ``` - * merged rep values = {1, 2, 2, 2, 2, 2, 1} - * scatter (1, new offsets) - * new offsets = {0, 1, 4, 6, 7} - * new rep values = {1, 1, 2, 2, 1, 2, 1} - * ``` - * - * Similarly we merge up all the way till level 0 offsets - * - * STRUCT COLUMNS : - * In case of struct columns, we don't have to merge struct levels with their children because a - * struct is the same size as its children. e.g. for a column `struct`, if the row `i` - * is null, then the children columns `int` and `float` are also null at `i`. They also have the - * null entry represented in their respective null masks. So for any case of strictly struct based - * nesting, we can get the definition levels merely by iterating over the nesting for the same row. - * - * In case struct and lists are intermixed, the definition levels of all the contiguous struct - * levels can be constructed using the aforementioned iterative method. Only when we reach a list - * level, we need to do a merge with the subsequent level. - * - * So, for a column like `struct>`, we are going to merge between the levels `struct>`, we are going to merge between `list` and `struct`. - * - * In general, one nesting level is the list level and any struct level that precedes it. - * - * A few more examples to visualize the partitioning of column hierarchy into nesting levels: - * (L is list, S is struct, i is integer(leaf data level), angle brackets omitted) - * ``` - * 1. LSi = L Si - * - | -- - * - * 2. LLSi = L L Si - * - | - | -- - * - * 3. SSLi = SSL i - * --- | - - * - * 4. LLSLSSi = L L SL SSi - * - | - | -- | --- - * ``` - */ -dremel_data get_dremel_data(column_view h_col, - // TODO(cp): use device_span once it is converted to a single hd_vec - rmm::device_uvector const& d_nullability, - std::vector const& nullability, - rmm::cuda_stream_view stream) -{ - auto get_list_level = [](column_view col) { - while (col.type().id() == type_id::STRUCT) { - col = col.child(0); - } - return col; - }; - - auto get_empties = [&](column_view col, size_type start, size_type end) { - auto lcv = lists_column_view(get_list_level(col)); - rmm::device_uvector empties_idx(lcv.size(), stream); - rmm::device_uvector empties(lcv.size(), stream); - auto d_off = lcv.offsets().data(); - - auto empties_idx_end = - thrust::copy_if(rmm::exec_policy(stream), - thrust::make_counting_iterator(start), - thrust::make_counting_iterator(end), - empties_idx.begin(), - [d_off] __device__(auto i) { return d_off[i] == d_off[i + 1]; }); - auto empties_end = thrust::gather(rmm::exec_policy(stream), - empties_idx.begin(), - empties_idx_end, - lcv.offsets().begin(), - empties.begin()); - - auto empties_size = empties_end - empties.begin(); - return std::make_tuple(std::move(empties), std::move(empties_idx), empties_size); - }; - - // Check if there are empty lists with empty offsets in this column - bool has_empty_list_offsets = false; - { - auto curr_col = h_col; - while (is_nested(curr_col.type())) { - if (curr_col.type().id() == type_id::LIST) { - auto lcv = lists_column_view(curr_col); - if (lcv.offsets().size() == 0) { - has_empty_list_offsets = true; - break; - } - curr_col = lcv.child(); - } else if (curr_col.type().id() == type_id::STRUCT) { - curr_col = curr_col.child(0); - } - } - } - std::unique_ptr empty_list_offset_col; - if (has_empty_list_offsets) { - empty_list_offset_col = make_fixed_width_column(data_type(type_id::INT32), 1); - cudaMemsetAsync(empty_list_offset_col->mutable_view().head(), 0, sizeof(size_type), stream); - std::function normalize_col = [&](column_view const& col) { - auto children = [&]() -> std::vector { - if (col.type().id() == type_id::LIST) { - auto lcol = lists_column_view(col); - auto offset_col = - lcol.offsets().head() == nullptr ? empty_list_offset_col->view() : lcol.offsets(); - return {offset_col, normalize_col(lcol.child())}; - } else if (col.type().id() == type_id::STRUCT) { - return {normalize_col(col.child(0))}; - } else { - return {col.child_begin(), col.child_end()}; - } - }(); - return column_view(col.type(), - col.size(), - col.head(), - col.null_mask(), - UNKNOWN_NULL_COUNT, - col.offset(), - std::move(children)); - }; - h_col = normalize_col(h_col); - } - - auto curr_col = h_col; - std::vector nesting_levels; - std::vector def_at_level; - std::vector start_at_sub_level; - uint8_t curr_nesting_level_idx = 0; - - auto add_def_at_level = [&](column_view col) { - // Add up all def level contributions in this column all the way till the first list column - // appears in the hierarchy or until we get to leaf - uint32_t def = 0; - start_at_sub_level.push_back(curr_nesting_level_idx); - while (col.type().id() == type_id::STRUCT) { - def += (nullability[curr_nesting_level_idx]) ? 1 : 0; - col = col.child(0); - ++curr_nesting_level_idx; - } - // At the end of all those structs is either a list column or the leaf. Leaf column contributes - // at least one def level. It doesn't matter what the leaf contributes because it'll be at the - // end of the exclusive scan. - def += (nullability[curr_nesting_level_idx]) ? 2 : 1; - def_at_level.push_back(def); - ++curr_nesting_level_idx; - }; - while (cudf::is_nested(curr_col.type())) { - nesting_levels.push_back(curr_col); - add_def_at_level(curr_col); - while (curr_col.type().id() == type_id::STRUCT) { - // Go down the hierarchy until we get to the LIST or the leaf level - curr_col = curr_col.child(0); - } - if (curr_col.type().id() == type_id::LIST) { - curr_col = curr_col.child(lists_column_view::child_column_index); - if (not is_nested(curr_col.type())) { - // Special case: when the leaf data column is the immediate child of the list col then we - // want it to be included right away. Otherwise the struct containing it will be included in - // the next iteration of this loop. - nesting_levels.push_back(curr_col); - add_def_at_level(curr_col); - break; - } - } - } - - auto [device_view_owners, d_nesting_levels] = - contiguous_copy_column_device_views(nesting_levels, stream); - - thrust::exclusive_scan( - thrust::host, def_at_level.begin(), def_at_level.end(), def_at_level.begin()); - - // Sliced list column views only have offsets applied to top level. Get offsets for each level. - rmm::device_uvector d_column_offsets(nesting_levels.size(), stream); - rmm::device_uvector d_column_ends(nesting_levels.size(), stream); - - auto d_col = column_device_view::create(h_col, stream); - cudf::detail::device_single_thread( - [offset_at_level = d_column_offsets.data(), - end_idx_at_level = d_column_ends.data(), - col = *d_col] __device__() { - auto curr_col = col; - size_type off = curr_col.offset(); - size_type end = off + curr_col.size(); - size_type level = 0; - offset_at_level[level] = off; - end_idx_at_level[level] = end; - ++level; - // Apply offset recursively until we get to leaf data - // Skip doing the following for any structs we encounter in between. - while (curr_col.type().id() == type_id::LIST or curr_col.type().id() == type_id::STRUCT) { - if (curr_col.type().id() == type_id::LIST) { - off = curr_col.child(lists_column_view::offsets_column_index).element(off); - end = curr_col.child(lists_column_view::offsets_column_index).element(end); - offset_at_level[level] = off; - end_idx_at_level[level] = end; - ++level; - curr_col = curr_col.child(lists_column_view::child_column_index); - } else { - curr_col = curr_col.child(0); - } - } - }, - stream); - - thrust::host_vector column_offsets = - cudf::detail::make_host_vector_async(d_column_offsets, stream); - thrust::host_vector column_ends = - cudf::detail::make_host_vector_async(d_column_ends, stream); - stream.synchronize(); - - size_t max_vals_size = 0; - for (size_t l = 0; l < column_offsets.size(); ++l) { - max_vals_size += column_ends[l] - column_offsets[l]; - } - - rmm::device_uvector rep_level(max_vals_size, stream); - rmm::device_uvector def_level(max_vals_size, stream); - - rmm::device_uvector temp_rep_vals(max_vals_size, stream); - rmm::device_uvector temp_def_vals(max_vals_size, stream); - rmm::device_uvector new_offsets(0, stream); - size_type curr_rep_values_size = 0; - { - // At this point, curr_col contains the leaf column. Max nesting level is - // nesting_levels.size(). - - // We are going to start by merging the last column in nesting_levels (the leaf, which is at the - // index `nesting_levels.size() - 1`) with the second-to-last (which is at - // `nesting_levels.size() - 2`). - size_t level = nesting_levels.size() - 2; - curr_col = nesting_levels[level]; - auto lcv = lists_column_view(get_list_level(curr_col)); - auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; - - // Get empties at this level - auto [empties, empties_idx, empties_size] = - get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); - - // Merge empty at deepest parent level with the rep, def level vals at leaf level - - auto input_parent_rep_it = thrust::make_constant_iterator(level); - auto input_parent_def_it = - thrust::make_transform_iterator(empties_idx.begin(), - def_level_fn{d_nesting_levels + level, - d_nullability.data(), - start_at_sub_level[level], - def_at_level[level]}); - - // `nesting_levels.size()` == no of list levels + leaf. Max repetition level = no of list levels - auto input_child_rep_it = thrust::make_constant_iterator(nesting_levels.size() - 1); - auto input_child_def_it = - thrust::make_transform_iterator(thrust::make_counting_iterator(column_offsets[level + 1]), - def_level_fn{d_nesting_levels + level + 1, - d_nullability.data(), - start_at_sub_level[level + 1], - def_at_level[level + 1]}); - - // Zip the input and output value iterators so that merge operation is done only once - auto input_parent_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); - - auto input_child_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(input_child_rep_it, input_child_def_it)); - - auto output_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); - - auto ends = thrust::merge_by_key(rmm::exec_policy(stream), - empties.begin(), - empties.begin() + empties_size, - thrust::make_counting_iterator(column_offsets[level + 1]), - thrust::make_counting_iterator(column_ends[level + 1]), - input_parent_zip_it, - input_child_zip_it, - thrust::make_discard_iterator(), - output_zip_it); - - curr_rep_values_size = ends.second - output_zip_it; - - // Scan to get distance by which each offset value is shifted due to the insertion of empties - auto scan_it = cudf::detail::make_counting_transform_iterator( - column_offsets[level], - [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( - auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); - rmm::device_uvector scan_out(offset_size_at_level, stream); - thrust::exclusive_scan( - rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); - - // Add scan output to existing offsets to get new offsets into merged rep level values - new_offsets = rmm::device_uvector(offset_size_at_level, stream); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - offset_size_at_level, - [off = lcv.offsets().data() + column_offsets[level], - scan_out = scan_out.data(), - new_off = new_offsets.data()] __device__(auto i) { - new_off[i] = off[i] - off[0] + scan_out[i]; - }); - - // Set rep level values at level starts to appropriate rep level - auto scatter_it = thrust::make_constant_iterator(level); - thrust::scatter(rmm::exec_policy(stream), - scatter_it, - scatter_it + new_offsets.size() - 1, - new_offsets.begin(), - rep_level.begin()); - } - - // Having already merged the last two levels, we are now going to merge the result with the - // third-last level which is at index `nesting_levels.size() - 3`. - for (int level = nesting_levels.size() - 3; level >= 0; level--) { - curr_col = nesting_levels[level]; - auto lcv = lists_column_view(get_list_level(curr_col)); - auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1; - - // Get empties at this level - auto [empties, empties_idx, empties_size] = - get_empties(nesting_levels[level], column_offsets[level], column_ends[level]); - - auto offset_transformer = [new_child_offsets = new_offsets.data(), - child_start = column_offsets[level + 1]] __device__(auto x) { - return new_child_offsets[x - child_start]; // (x - child's offset) - }; - - // We will be reading from old rep_levels and writing again to rep_levels. Swap the current - // rep values into temp_rep_vals so it can become the input and rep_levels can again be output. - std::swap(temp_rep_vals, rep_level); - std::swap(temp_def_vals, def_level); - - // Merge empty at parent level with the rep, def level vals at current level - auto transformed_empties = thrust::make_transform_iterator(empties.begin(), offset_transformer); - - auto input_parent_rep_it = thrust::make_constant_iterator(level); - auto input_parent_def_it = - thrust::make_transform_iterator(empties_idx.begin(), - def_level_fn{d_nesting_levels + level, - d_nullability.data(), - start_at_sub_level[level], - def_at_level[level]}); - - // Zip the input and output value iterators so that merge operation is done only once - auto input_parent_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(input_parent_rep_it, input_parent_def_it)); - - auto input_child_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(temp_rep_vals.begin(), temp_def_vals.begin())); - - auto output_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(rep_level.begin(), def_level.begin())); - - auto ends = thrust::merge_by_key(rmm::exec_policy(stream), - transformed_empties, - transformed_empties + empties_size, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(curr_rep_values_size), - input_parent_zip_it, - input_child_zip_it, - thrust::make_discard_iterator(), - output_zip_it); - - curr_rep_values_size = ends.second - output_zip_it; - - // Scan to get distance by which each offset value is shifted due to the insertion of dremel - // level value fof an empty list - auto scan_it = cudf::detail::make_counting_transform_iterator( - column_offsets[level], - [off = lcv.offsets().data(), size = lcv.offsets().size()] __device__( - auto i) -> int { return (i + 1 < size) && (off[i] == off[i + 1]); }); - rmm::device_uvector scan_out(offset_size_at_level, stream); - thrust::exclusive_scan( - rmm::exec_policy(stream), scan_it, scan_it + offset_size_at_level, scan_out.begin()); - - // Add scan output to existing offsets to get new offsets into merged rep level values - rmm::device_uvector temp_new_offsets(offset_size_at_level, stream); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - offset_size_at_level, - [off = lcv.offsets().data() + column_offsets[level], - scan_out = scan_out.data(), - new_off = temp_new_offsets.data(), - offset_transformer] __device__(auto i) { - new_off[i] = offset_transformer(off[i]) + scan_out[i]; - }); - new_offsets = std::move(temp_new_offsets); - - // Set rep level values at level starts to appropriate rep level - auto scatter_it = thrust::make_constant_iterator(level); - thrust::scatter(rmm::exec_policy(stream), - scatter_it, - scatter_it + new_offsets.size() - 1, - new_offsets.begin(), - rep_level.begin()); - } - - size_t level_vals_size = new_offsets.back_element(stream); - rep_level.resize(level_vals_size, stream); - def_level.resize(level_vals_size, stream); - - stream.synchronize(); - - size_type leaf_data_size = column_ends.back() - column_offsets.back(); - - return dremel_data{ - std::move(new_offsets), std::move(rep_level), std::move(def_level), leaf_data_size}; -} - void InitPageFragments(device_2dspan frag, device_span col_desc, device_span partitions, diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 1de6be38b3d..b08bdac9535 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -448,43 +448,6 @@ void DecodePageData(hostdevice_vector& pages, size_t min_row, rmm::cuda_stream_view stream); -/** - * @brief Dremel data that describes one nested type column - * - * @see get_dremel_data() - */ -struct dremel_data { - rmm::device_uvector dremel_offsets; - rmm::device_uvector rep_level; - rmm::device_uvector def_level; - - size_type leaf_data_size; -}; - -/** - * @brief Get the dremel offsets and repetition and definition levels for a LIST column - * - * Dremel offsets are the per row offsets into the repetition and definition level arrays for a - * column. - * Example: - * ``` - * col = {{1, 2, 3}, { }, {5, 6}} - * dremel_offsets = { 0, 3, 4, 6} - * rep_level = { 0, 1, 1, 0, 0, 1} - * def_level = { 1, 1, 1, 0, 1, 1} - * ``` - * @param col Column of LIST type - * @param level_nullability Pre-determined nullability at each list level. Empty means infer from - * `col` - * @param stream CUDA stream used for device memory operations and kernel launches. - * - * @return A struct containing dremel data - */ -dremel_data get_dremel_data(column_view h_col, - rmm::device_uvector const& d_nullability, - std::vector const& nullability, - rmm::cuda_stream_view stream); - /** * @brief Launches kernel for initializing encoder page fragments * diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 70f4201c04b..725ffc7dbe9 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -775,11 +776,12 @@ parquet_column_view::parquet_column_view(schema_tree_node const& schema_node, // size of the leaf column // Calculate row offset into dremel data (repetition/definition values) and the respective // definition and repetition levels - gpu::dremel_data dremel = gpu::get_dremel_data(cudf_col, _d_nullability, _nullability, stream); - _dremel_offsets = std::move(dremel.dremel_offsets); - _rep_level = std::move(dremel.rep_level); - _def_level = std::move(dremel.def_level); - _data_count = dremel.leaf_data_size; // Needed for knowing what size dictionary to allocate + ::cudf::detail::dremel_data dremel = + get_dremel_data(cudf_col, _d_nullability, _nullability, stream); + _dremel_offsets = std::move(dremel.dremel_offsets); + _rep_level = std::move(dremel.rep_level); + _def_level = std::move(dremel.def_level); + _data_count = dremel.leaf_data_size; // Needed for knowing what size dictionary to allocate stream.synchronize(); } else { diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index c52b8e97e4b..d31e1df7d95 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -261,7 +261,7 @@ auto decompose_structs(table_view table, auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) { - std::vector dremel_data; + std::vector dremel_data; std::vector max_def_levels; for (auto const& col : table) { if (col.type().id() == type_id::LIST) { @@ -277,8 +277,7 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) max_def_level += (cur_col.nullable() ? 1 : 0); nullability.push_back(static_cast(cur_col.nullable())); auto d_nullability = detail::make_device_uvector_async(nullability, stream); - dremel_data.push_back( - io::parquet::gpu::get_dremel_data(col, d_nullability, nullability, stream)); + dremel_data.push_back(detail::get_dremel_data(col, d_nullability, nullability, stream)); max_def_levels.push_back(max_def_level); // } else { // max_def_levels.push_back(0); @@ -288,19 +287,18 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) // std::vector> dremel_offsets; // std::vector> rep_levels; // std::vector> def_levels; - std::vector dremel_device_views; + std::vector dremel_device_views; size_type c = 0; for (auto const& col : table) { if (col.type().id() == type_id::LIST) { // dremel_offsets.emplace_back(dremel_data[c].dremel_offsets); // rep_levels.emplace_back(dremel_data[c].rep_level); // def_levels.emplace_back(dremel_data[c].def_level); - dremel_device_views.push_back( - row::lexicographic::dremel_device_view{dremel_data[c].dremel_offsets.data(), - dremel_data[c].rep_level.data(), - dremel_data[c].def_level.data(), - dremel_data[c].leaf_data_size, - max_def_levels[c]}); + dremel_device_views.push_back(detail::dremel_device_view{dremel_data[c].dremel_offsets.data(), + dremel_data[c].rep_level.data(), + dremel_data[c].def_level.data(), + dremel_data[c].leaf_data_size, + max_def_levels[c]}); ++c; } else { // dremel_offsets.emplace_back(); From ee1393681f94d3af560e6927622bc0ac2abcaca3 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Tue, 19 Jul 2022 01:44:34 +0530 Subject: [PATCH 41/78] pass _comparator to elem comt --- cpp/include/cudf/table/experimental/row_operators.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 342fc9f0f2d..72b00d7b83e 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -415,7 +415,8 @@ class device_row_comparator { return cuda::std::pair(state, _depth); } if (l_def_levels[i] == l_max_def_level) { - auto comparator = element_comparator{_check_nulls, lcol, rcol, _null_precedence}; + auto comparator = + element_comparator{_check_nulls, lcol, rcol, _null_precedence, _depth, _comparator}; int last_null_depth = _depth; cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher(lcol.type(), comparator, k, k); From 499a5bd4203f7f9a0673d6e50a2cb9a214e3dff2 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Tue, 19 Jul 2022 01:55:06 +0530 Subject: [PATCH 42/78] Remove lines that deal with dremel data as separate variables --- cpp/src/table/row_operators.cu | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index d31e1df7d95..5ba994af323 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -279,21 +279,13 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) auto d_nullability = detail::make_device_uvector_async(nullability, stream); dremel_data.push_back(detail::get_dremel_data(col, d_nullability, nullability, stream)); max_def_levels.push_back(max_def_level); - // } else { - // max_def_levels.push_back(0); } } - // std::vector> dremel_offsets; - // std::vector> rep_levels; - // std::vector> def_levels; std::vector dremel_device_views; size_type c = 0; for (auto const& col : table) { if (col.type().id() == type_id::LIST) { - // dremel_offsets.emplace_back(dremel_data[c].dremel_offsets); - // rep_levels.emplace_back(dremel_data[c].rep_level); - // def_levels.emplace_back(dremel_data[c].def_level); dremel_device_views.push_back(detail::dremel_device_view{dremel_data[c].dremel_offsets.data(), dremel_data[c].rep_level.data(), dremel_data[c].def_level.data(), @@ -301,22 +293,11 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) max_def_levels[c]}); ++c; } else { - // dremel_offsets.emplace_back(); - // rep_levels.emplace_back(); - // def_levels.emplace_back(); dremel_device_views.emplace_back(); } } - // auto d_dremel_offsets = detail::make_device_uvector_async(dremel_offsets, stream); - // auto d_rep_levels = detail::make_device_uvector_async(rep_levels, stream); - // auto d_def_levels = detail::make_device_uvector_async(def_levels, stream); - // auto d_max_def_levels = detail::make_device_uvector_async(max_def_levels, stream); auto d_dremel_device_views = detail::make_device_uvector_async(dremel_device_views, stream); return std::make_tuple(std::move(dremel_data), std::move(d_dremel_device_views)); - // std::move(d_dremel_offsets), - // std::move(d_rep_levels), - // std::move(d_def_levels), - // std::move(d_max_def_levels)); } using column_checker_fn_t = std::function; @@ -402,7 +383,6 @@ std::shared_ptr preprocessed_table::create( auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = decompose_structs(t, column_order, null_precedence); - // auto [dremel_data, d_dremel_offsets, d_rep_levels, d_def_levels, d_max_def_levels] = auto [dremel_data, d_dremel_device_views] = list_lex_preprocess(verticalized_lhs, stream); auto d_t = table_device_view::create(verticalized_lhs, stream); From 0c3c12e7bf0208ef03b7b391cd62d4e6110d70a5 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 20 Jul 2022 02:58:25 +0530 Subject: [PATCH 43/78] remove requirement to pass d_nullability and allow dremel_device_view to be constructed from dremel_data --- .../utilities/{dremel.cuh => dremel.hpp} | 22 +++++++++++------- .../cudf/table/experimental/row_operators.cuh | 2 +- cpp/src/column/dremel.cu | 23 +++++++++++-------- cpp/src/io/parquet/writer_impl.cu | 13 +++++------ cpp/src/table/row_operators.cu | 16 +++---------- 5 files changed, 38 insertions(+), 38 deletions(-) rename cpp/include/cudf/detail/utilities/{dremel.cuh => dremel.hpp} (91%) diff --git a/cpp/include/cudf/detail/utilities/dremel.cuh b/cpp/include/cudf/detail/utilities/dremel.hpp similarity index 91% rename from cpp/include/cudf/detail/utilities/dremel.cuh rename to cpp/include/cudf/detail/utilities/dremel.hpp index 07c33628762..536cfc72b62 100644 --- a/cpp/include/cudf/detail/utilities/dremel.cuh +++ b/cpp/include/cudf/detail/utilities/dremel.hpp @@ -22,6 +22,14 @@ namespace cudf::detail { +struct dremel_device_view { + size_type* offsets; + uint8_t* rep_levels; + uint8_t* def_levels; + size_type leaf_data_size; + uint8_t max_def_level; +}; + /** * @brief Dremel data that describes one nested type column * @@ -33,14 +41,13 @@ struct dremel_data { rmm::device_uvector def_level; size_type leaf_data_size; -}; - -struct dremel_device_view { - size_type* offsets; - uint8_t* rep_levels; - uint8_t* def_levels; - size_type leaf_data_size; uint8_t max_def_level; + + operator dremel_device_view() + { + return dremel_device_view{ + dremel_offsets.data(), rep_level.data(), def_level.data(), leaf_data_size, max_def_level}; + } }; /** @@ -63,7 +70,6 @@ struct dremel_device_view { * @return A struct containing dremel data */ dremel_data get_dremel_data(column_view h_col, - rmm::device_uvector const& d_nullability, std::vector const& nullability, rmm::cuda_stream_view stream); diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 72b00d7b83e..cff48224308 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/src/column/dremel.cu b/cpp/src/column/dremel.cu index cc34481cfac..72586f172c7 100644 --- a/cpp/src/column/dremel.cu +++ b/cpp/src/column/dremel.cu @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -173,8 +173,6 @@ struct def_level_fn { * ``` */ dremel_data get_dremel_data(column_view h_col, - // TODO(cp): use device_span once it is converted to a single hd_vec - rmm::device_uvector const& d_nullability, std::vector const& nullability, rmm::cuda_stream_view stream) { @@ -268,10 +266,10 @@ dremel_data get_dremel_data(column_view h_col, col = col.child(0); ++curr_nesting_level_idx; } - // At the end of all those structs is either a list column or the leaf. Leaf column contributes - // at least one def level. It doesn't matter what the leaf contributes because it'll be at the - // end of the exclusive scan. - def += (nullability[curr_nesting_level_idx]) ? 2 : 1; + // At the end of all those structs is either a list column or the leaf. List column contributes + // at least one def level. Leaf contributes 1 level only if it is nullable. + def += + (col.type().id() == type_id::LIST ? 1 : 0) + (nullability[curr_nesting_level_idx] ? 1 : 0); def_at_level.push_back(def); ++curr_nesting_level_idx; }; @@ -298,8 +296,10 @@ dremel_data get_dremel_data(column_view h_col, auto [device_view_owners, d_nesting_levels] = contiguous_copy_column_device_views(nesting_levels, stream); + auto max_def_level = def_at_level.back(); thrust::exclusive_scan( thrust::host, def_at_level.begin(), def_at_level.end(), def_at_level.begin()); + max_def_level += def_at_level.back(); // Sliced list column views only have offsets applied to top level. Get offsets for each level. rmm::device_uvector d_column_offsets(nesting_levels.size(), stream); @@ -345,6 +345,8 @@ dremel_data get_dremel_data(column_view h_col, max_vals_size += column_ends[l] - column_offsets[l]; } + auto d_nullability = cudf::detail::make_device_uvector_async(nullability, stream); + rmm::device_uvector rep_level(max_vals_size, stream); rmm::device_uvector def_level(max_vals_size, stream); @@ -532,8 +534,11 @@ dremel_data get_dremel_data(column_view h_col, size_type leaf_data_size = column_ends.back() - column_offsets.back(); - return dremel_data{ - std::move(new_offsets), std::move(rep_level), std::move(def_level), leaf_data_size}; + return dremel_data{std::move(new_offsets), + std::move(rep_level), + std::move(def_level), + leaf_data_size, + max_def_level}; } } // namespace cudf::detail diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 725ffc7dbe9..0d01051ef78 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include @@ -776,12 +776,11 @@ parquet_column_view::parquet_column_view(schema_tree_node const& schema_node, // size of the leaf column // Calculate row offset into dremel data (repetition/definition values) and the respective // definition and repetition levels - ::cudf::detail::dremel_data dremel = - get_dremel_data(cudf_col, _d_nullability, _nullability, stream); - _dremel_offsets = std::move(dremel.dremel_offsets); - _rep_level = std::move(dremel.rep_level); - _def_level = std::move(dremel.def_level); - _data_count = dremel.leaf_data_size; // Needed for knowing what size dictionary to allocate + cudf::detail::dremel_data dremel = get_dremel_data(cudf_col, _nullability, stream); + _dremel_offsets = std::move(dremel.dremel_offsets); + _rep_level = std::move(dremel.rep_level); + _def_level = std::move(dremel.def_level); + _data_count = dremel.leaf_data_size; // Needed for knowing what size dictionary to allocate stream.synchronize(); } else { diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 5ba994af323..1c59a300a53 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -262,23 +262,17 @@ auto decompose_structs(table_view table, auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) { std::vector dremel_data; - std::vector max_def_levels; for (auto const& col : table) { if (col.type().id() == type_id::LIST) { // Check nullability of the list std::vector nullability; - auto cur_col = col; - uint8_t max_def_level = 0; + auto cur_col = col; while (cur_col.type().id() == type_id::LIST) { - max_def_level += (cur_col.nullable() ? 2 : 1); nullability.push_back(static_cast(cur_col.nullable())); cur_col = cur_col.child(lists_column_view::child_column_index); } - max_def_level += (cur_col.nullable() ? 1 : 0); nullability.push_back(static_cast(cur_col.nullable())); - auto d_nullability = detail::make_device_uvector_async(nullability, stream); - dremel_data.push_back(detail::get_dremel_data(col, d_nullability, nullability, stream)); - max_def_levels.push_back(max_def_level); + dremel_data.push_back(detail::get_dremel_data(col, nullability, stream)); } } @@ -286,11 +280,7 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) size_type c = 0; for (auto const& col : table) { if (col.type().id() == type_id::LIST) { - dremel_device_views.push_back(detail::dremel_device_view{dremel_data[c].dremel_offsets.data(), - dremel_data[c].rep_level.data(), - dremel_data[c].def_level.data(), - dremel_data[c].leaf_data_size, - max_def_levels[c]}); + dremel_device_views.push_back(dremel_data[c]); ++c; } else { dremel_device_views.emplace_back(); From b62d0a2b70f6726cb6388edbdc947df8653bd40c Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 21 Jul 2022 03:11:44 +0530 Subject: [PATCH 44/78] Let get_dremel_data work without nullability --- cpp/include/cudf/detail/utilities/dremel.hpp | 2 +- cpp/src/column/dremel.cu | 11 ++++++++++- cpp/src/table/row_operators.cu | 10 +--------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/dremel.hpp b/cpp/include/cudf/detail/utilities/dremel.hpp index 536cfc72b62..e58143e5990 100644 --- a/cpp/include/cudf/detail/utilities/dremel.hpp +++ b/cpp/include/cudf/detail/utilities/dremel.hpp @@ -70,7 +70,7 @@ struct dremel_data { * @return A struct containing dremel data */ dremel_data get_dremel_data(column_view h_col, - std::vector const& nullability, + std::vector nullability, rmm::cuda_stream_view stream); } // namespace cudf::detail diff --git a/cpp/src/column/dremel.cu b/cpp/src/column/dremel.cu index 72586f172c7..fed98307d64 100644 --- a/cpp/src/column/dremel.cu +++ b/cpp/src/column/dremel.cu @@ -173,7 +173,7 @@ struct def_level_fn { * ``` */ dremel_data get_dremel_data(column_view h_col, - std::vector const& nullability, + std::vector nullability, rmm::cuda_stream_view stream) { auto get_list_level = [](column_view col) { @@ -256,6 +256,15 @@ dremel_data get_dremel_data(column_view h_col, std::vector start_at_sub_level; uint8_t curr_nesting_level_idx = 0; + if (nullability.empty()) { + while (is_nested(curr_col.type())) { + nullability.push_back(curr_col.nullable()); + curr_col = curr_col.type().id() == type_id::LIST ? curr_col.child(1) : curr_col.child(0); + } + nullability.push_back(curr_col.nullable()); + } + curr_col = h_col; + auto add_def_at_level = [&](column_view col) { // Add up all def level contributions in this column all the way till the first list column // appears in the hierarchy or until we get to leaf diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 1c59a300a53..8a8737019d2 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -264,15 +264,7 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) std::vector dremel_data; for (auto const& col : table) { if (col.type().id() == type_id::LIST) { - // Check nullability of the list - std::vector nullability; - auto cur_col = col; - while (cur_col.type().id() == type_id::LIST) { - nullability.push_back(static_cast(cur_col.nullable())); - cur_col = cur_col.child(lists_column_view::child_column_index); - } - nullability.push_back(static_cast(cur_col.nullable())); - dremel_data.push_back(detail::get_dremel_data(col, nullability, stream)); + dremel_data.push_back(detail::get_dremel_data(col, {}, stream)); } } From 229ebe3c7010d3ca75d379aa83fb041a913a6a96 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 26 Jul 2022 15:43:44 -0700 Subject: [PATCH 45/78] Update meta.yaml. --- conda/recipes/libcudf/meta.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 6dd85312349..cad6b6ede4e 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -113,7 +113,8 @@ outputs: - test -f $PREFIX/include/cudf/detail/transpose.hpp - test -f $PREFIX/include/cudf/detail/unary.hpp - test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp - - test -f $PREFIX/include/cudf/detail/utilities/column.hpp + - test -f $PREFIX/include/cudf/detail/utilities/dremel.hpp + - test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp - test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h - test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp - test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp From 926e7ab1d346af75470aa6c9e07bceb1e28ef0c2 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 26 Jul 2022 17:00:09 -0700 Subject: [PATCH 46/78] Consolidate and augment descriptions of Dremel encoding. --- cpp/include/cudf/detail/utilities/dremel.hpp | 113 +++++++++++++++++++ cpp/src/column/dremel.cu | 101 +---------------- 2 files changed, 114 insertions(+), 100 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/dremel.hpp b/cpp/include/cudf/detail/utilities/dremel.hpp index e58143e5990..0104d00e493 100644 --- a/cpp/include/cudf/detail/utilities/dremel.hpp +++ b/cpp/include/cudf/detail/utilities/dremel.hpp @@ -53,6 +53,21 @@ struct dremel_data { /** * @brief Get the dremel offsets and repetition and definition levels for a LIST column * + * Dremel is a query system created by Google for ad hoc data analysis. The Dremel engine is + * described in depth in the paper "Dremel: Interactive Analysis of Web-Scale + * Datasets" (https://research.google/pubs/pub36632/). One of the key components of Dremel + * is an encoding that converts record-like data into a columnar store for efficient memory + * accesses. The Parquet file format uses Dremel encoding to handle nested data, so libcudf + * requires some facilities for working with this encoding. Furthermore, libcudf leverages + * Dremel encoding as a means for performing lexicographic comparisons of nested columns. + * + * Dremel encoding is built around two concepts, the repetition and definition levels. + * Since describing them thoroughly is out of scope for this docstring, here are a couple of + * blogs that provide useful background: + * - http://www.goldsborough.me/distributed-systems/2019/05/18/21-09-00-a_look_at_dremel/ + * - https://akshays-blog.medium.com/wrapping-head-around-repetition-and-definition-levels-in-dremel-powering-bigquery-c1a33c9695da + * The remainder of this documentation assumes familiarity with the Dremel concepts. + * * Dremel offsets are the per row offsets into the repetition and definition level arrays for a * column. * Example: @@ -62,6 +77,104 @@ struct dremel_data { * rep_level = { 0, 1, 1, 0, 0, 1} * def_level = { 1, 1, 1, 0, 1, 1} * ``` + * + * The repetition and definition level values are ideally computed using a recursive call over a + * nested structure but in order to better utilize GPU resources, this function calculates them + * with a bottom up merge method. + * + * Given a LIST column of type `List>` like so: + * ``` + * col = { + * [], + * [[], [1, 2, 3], [4, 5]], + * [[]] + * } + * ``` + * We can represent it in cudf format with two level of offsets like this: + * ``` + * Level 0 offsets = {0, 0, 3, 5, 6} + * Level 1 offsets = {0, 0, 3, 5, 5} + * Values = {1, 2, 3, 4, 5} + * ``` + * The desired result of this function is the repetition and definition level values that + * correspond to the data values: + * ``` + * col = {[], [[], [1, 2, 3], [4, 5]], [[]]} + * def = { 0 1, 2, 2, 2, 2, 2, 1 } + * rep = { 0, 0, 0, 2, 2, 1, 2, 0 } + * ``` + * + * Since repetition and definition levels arrays contain a value for each empty list, the size of + * the rep/def level array can be given by + * ``` + * rep_level.size() = size of leaf column + number of empty lists in level 0 + * + number of empty lists in level 1 ... + * ``` + * + * We start with finding the empty lists in the penultimate level and merging it with the indices + * of the leaf level. The values for the merge are the definition and repetition levels + * ``` + * empties at level 1 = {0, 5} + * def values at 1 = {1, 1} + * rep values at 1 = {1, 1} + * indices at leaf = {0, 1, 2, 3, 4} + * def values at leaf = {2, 2, 2, 2, 2} + * rep values at leaf = {2, 2, 2, 2, 2} + * ``` + * + * merged def values = {1, 2, 2, 2, 2, 2, 1} + * merged rep values = {1, 2, 2, 2, 2, 2, 1} + * + * The size of the rep/def values is now larger than the leaf values and the offsets need to be + * adjusted in order to point to the correct start indices. We do this with an exclusive scan over + * the indices of offsets of empty lists and adding to existing offsets. + * ``` + * Level 1 new offsets = {0, 1, 4, 6, 7} + * ``` + * Repetition values at the beginning of a list need to be decremented. We use the new offsets to + * scatter the rep value. + * ``` + * merged rep values = {1, 2, 2, 2, 2, 2, 1} + * scatter (1, new offsets) + * new offsets = {0, 1, 4, 6, 7} + * new rep values = {1, 1, 2, 2, 1, 2, 1} + * ``` + * + * Similarly we merge up all the way till level 0 offsets + * + * STRUCT COLUMNS : + * In case of struct columns, we don't have to merge struct levels with their children because a + * struct is the same size as its children. e.g. for a column `struct`, if the row `i` + * is null, then the children columns `int` and `float` are also null at `i`. They also have the + * null entry represented in their respective null masks. So for any case of strictly struct based + * nesting, we can get the definition levels merely by iterating over the nesting for the same row. + * + * In case struct and lists are intermixed, the definition levels of all the contiguous struct + * levels can be constructed using the aforementioned iterative method. Only when we reach a list + * level, we need to do a merge with the subsequent level. + * + * So, for a column like `struct>`, we are going to merge between the levels `struct>`, we are going to merge between `list` and `struct`. + * + * In general, one nesting level is the list level and any struct level that precedes it. + * + * A few more examples to visualize the partitioning of column hierarchy into nesting levels: + * (L is list, S is struct, i is integer(leaf data level), angle brackets omitted) + * ``` + * 1. LSi = L Si + * - | -- + * + * 2. LLSi = L L Si + * - | - | -- + * + * 3. SSLi = SSL i + * --- | - + * + * 4. LLSLSSi = L L SL SSi + * - | - | -- | --- + * ``` + * * @param col Column of LIST type * @param level_nullability Pre-determined nullability at each list level. Empty means infer from * `col` diff --git a/cpp/src/column/dremel.cu b/cpp/src/column/dremel.cu index fed98307d64..83a50d9a217 100644 --- a/cpp/src/column/dremel.cu +++ b/cpp/src/column/dremel.cu @@ -72,106 +72,7 @@ struct def_level_fn { } }; -/** - * @brief Get the dremel offsets and repetition and definition levels for a LIST column - * - * The repetition and definition level values are ideally computed using a recursive call over a - * nested structure but in order to better utilize GPU resources, this function calculates them - * with a bottom up merge method. - * - * Given a LIST column of type `List>` like so: - * ``` - * col = { - * [], - * [[], [1, 2, 3], [4, 5]], - * [[]] - * } - * ``` - * We can represent it in cudf format with two level of offsets like this: - * ``` - * Level 0 offsets = {0, 0, 3, 5, 6} - * Level 1 offsets = {0, 0, 3, 5, 5} - * Values = {1, 2, 3, 4, 5} - * ``` - * The desired result of this function is the repetition and definition level values that - * correspond to the data values: - * ``` - * col = {[], [[], [1, 2, 3], [4, 5]], [[]]} - * def = { 0 1, 2, 2, 2, 2, 2, 1 } - * rep = { 0, 0, 0, 2, 2, 1, 2, 0 } - * ``` - * - * Since repetition and definition levels arrays contain a value for each empty list, the size of - * the rep/def level array can be given by - * ``` - * rep_level.size() = size of leaf column + number of empty lists in level 0 - * + number of empty lists in level 1 ... - * ``` - * - * We start with finding the empty lists in the penultimate level and merging it with the indices - * of the leaf level. The values for the merge are the definition and repetition levels - * ``` - * empties at level 1 = {0, 5} - * def values at 1 = {1, 1} - * rep values at 1 = {1, 1} - * indices at leaf = {0, 1, 2, 3, 4} - * def values at leaf = {2, 2, 2, 2, 2} - * rep values at leaf = {2, 2, 2, 2, 2} - * ``` - * - * merged def values = {1, 2, 2, 2, 2, 2, 1} - * merged rep values = {1, 2, 2, 2, 2, 2, 1} - * - * The size of the rep/def values is now larger than the leaf values and the offsets need to be - * adjusted in order to point to the correct start indices. We do this with an exclusive scan over - * the indices of offsets of empty lists and adding to existing offsets. - * ``` - * Level 1 new offsets = {0, 1, 4, 6, 7} - * ``` - * Repetition values at the beginning of a list need to be decremented. We use the new offsets to - * scatter the rep value. - * ``` - * merged rep values = {1, 2, 2, 2, 2, 2, 1} - * scatter (1, new offsets) - * new offsets = {0, 1, 4, 6, 7} - * new rep values = {1, 1, 2, 2, 1, 2, 1} - * ``` - * - * Similarly we merge up all the way till level 0 offsets - * - * STRUCT COLUMNS : - * In case of struct columns, we don't have to merge struct levels with their children because a - * struct is the same size as its children. e.g. for a column `struct`, if the row `i` - * is null, then the children columns `int` and `float` are also null at `i`. They also have the - * null entry represented in their respective null masks. So for any case of strictly struct based - * nesting, we can get the definition levels merely by iterating over the nesting for the same row. - * - * In case struct and lists are intermixed, the definition levels of all the contiguous struct - * levels can be constructed using the aforementioned iterative method. Only when we reach a list - * level, we need to do a merge with the subsequent level. - * - * So, for a column like `struct>`, we are going to merge between the levels `struct>`, we are going to merge between `list` and `struct`. - * - * In general, one nesting level is the list level and any struct level that precedes it. - * - * A few more examples to visualize the partitioning of column hierarchy into nesting levels: - * (L is list, S is struct, i is integer(leaf data level), angle brackets omitted) - * ``` - * 1. LSi = L Si - * - | -- - * - * 2. LLSi = L L Si - * - | - | -- - * - * 3. SSLi = SSL i - * --- | - - * - * 4. LLSLSSi = L L SL SSi - * - | - | -- | --- - * ``` - */ + dremel_data get_dremel_data(column_view h_col, std::vector nullability, rmm::cuda_stream_view stream) From d1cea06cc6b8006fde3eb8dd7c82b55c1527fc35 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 26 Jul 2022 17:42:27 -0700 Subject: [PATCH 47/78] Fix style. --- cpp/include/cudf/detail/utilities/dremel.hpp | 5 +++-- cpp/src/column/dremel.cu | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/dremel.hpp b/cpp/include/cudf/detail/utilities/dremel.hpp index 0104d00e493..2e44f9801e4 100644 --- a/cpp/include/cudf/detail/utilities/dremel.hpp +++ b/cpp/include/cudf/detail/utilities/dremel.hpp @@ -64,8 +64,9 @@ struct dremel_data { * Dremel encoding is built around two concepts, the repetition and definition levels. * Since describing them thoroughly is out of scope for this docstring, here are a couple of * blogs that provide useful background: - * - http://www.goldsborough.me/distributed-systems/2019/05/18/21-09-00-a_look_at_dremel/ - * - https://akshays-blog.medium.com/wrapping-head-around-repetition-and-definition-levels-in-dremel-powering-bigquery-c1a33c9695da + * http://www.goldsborough.me/distributed-systems/2019/05/18/21-09-00-a_look_at_dremel/ + * https://akshays-blog.medium.com/wrapping-head-around-repetition-and-definition-levels-in-dremel-powering-bigquery-c1a33c9695da + * * The remainder of this documentation assumes familiarity with the Dremel concepts. * * Dremel offsets are the per row offsets into the repetition and definition level arrays for a diff --git a/cpp/src/column/dremel.cu b/cpp/src/column/dremel.cu index 83a50d9a217..08d88436347 100644 --- a/cpp/src/column/dremel.cu +++ b/cpp/src/column/dremel.cu @@ -72,7 +72,6 @@ struct def_level_fn { } }; - dremel_data get_dremel_data(column_view h_col, std::vector nullability, rmm::cuda_stream_view stream) From 6030b7ba1fe80720937b27f73bc961c0e1c48f06 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 27 Jul 2022 11:32:32 -0700 Subject: [PATCH 48/78] Remove unnecessary optionals around dremel_device_view. --- .../cudf/table/experimental/row_operators.cuh | 71 ++++++++----------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index cff48224308..7db0cf7a207 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -248,26 +248,23 @@ class device_row_comparator { * `null_order::BEFORE` for all columns. * @param comparator Physical element relational comparison functor. */ - device_row_comparator( - Nullate check_nulls, - table_device_view lhs, - table_device_view rhs, - std::optional> depth = std::nullopt, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt, - std::optional> l_dremel_device_views = - std::nullopt, - std::optional> r_dremel_device_views = - std::nullopt, - PhysicalElementComparator comparator = {}) noexcept + device_row_comparator(Nullate check_nulls, + table_device_view lhs, + table_device_view rhs, + device_span l_dremel_device_views, + device_span r_dremel_device_views, + std::optional> depth = std::nullopt, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt, + PhysicalElementComparator comparator = {}) noexcept : _lhs{lhs}, _rhs{rhs}, + _l_dremel_device_views(l_dremel_device_views), + _r_dremel_device_views(r_dremel_device_views), _check_nulls{check_nulls}, _depth{depth}, _column_order{column_order}, _null_precedence{null_precedence}, - _l_dremel_device_views(l_dremel_device_views), - _r_dremel_device_views(r_dremel_device_views), _comparator{comparator} { } @@ -467,15 +464,14 @@ class device_row_comparator { null_order const null_precedence = _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE; - auto element_comp = element_comparator{ - _check_nulls, - _lhs.column(i), - _rhs.column(i), - null_precedence, - depth, - _comparator, - (_l_dremel_device_views ? (*_l_dremel_device_views)[i] : detail::dremel_device_view{}), - (_r_dremel_device_views ? (*_r_dremel_device_views)[i] : detail::dremel_device_view{})}; + auto element_comp = element_comparator{_check_nulls, + _lhs.column(i), + _rhs.column(i), + null_precedence, + depth, + _comparator, + _l_dremel_device_views[i], + _r_dremel_device_views[i]}; weak_ordering state; cuda::std::tie(state, last_null_depth) = @@ -493,15 +489,13 @@ class device_row_comparator { private: table_device_view const _lhs; table_device_view const _rhs; + device_span _l_dremel_device_views; + device_span _r_dremel_device_views; Nullate const _check_nulls; std::optional> const _depth; std::optional> const _column_order; std::optional> const _null_precedence; PhysicalElementComparator const _comparator; - - // List related members - std::optional> _l_dremel_device_views; - std::optional> _r_dremel_device_views; }; // class device_row_comparator /** @@ -666,12 +660,9 @@ struct preprocessed_table { } // TODO: span of spans? - [[nodiscard]] std::optional> dremel_device_views() - const + [[nodiscard]] device_span dremel_device_views() const { - return _dremel_device_views.size() - ? std::optional>(_dremel_device_views) - : std::nullopt; + return device_span(_dremel_device_views); } private: @@ -755,11 +746,11 @@ class self_comparator { return less_comparator{device_row_comparator{nullate, *d_t, *d_t, + d_t->dremel_device_views(), + d_t->dremel_device_views(), d_t->depths(), d_t->column_order(), d_t->null_precedence(), - d_t->dremel_device_views(), - d_t->dremel_device_views(), comparator}}; } @@ -772,11 +763,11 @@ class self_comparator { return less_equivalent_comparator{device_row_comparator{nullate, *d_t, *d_t, + d_t->dremel_device_views(), + d_t->dremel_device_views(), d_t->depths(), d_t->column_order(), d_t->null_precedence(), - d_t->dremel_device_views(), - d_t->dremel_device_views(), comparator}}; } @@ -902,11 +893,11 @@ class two_table_comparator { strong_index_comparator_adapter{device_row_comparator{nullate, *d_left_table, *d_right_table, + d_left_table->dremel_device_views(), + d_right_table->dremel_device_views(), d_left_table->depths(), d_left_table->column_order(), d_left_table->null_precedence(), - d_left_table->dremel_device_views(), - d_right_table->dremel_device_views(), comparator}}}; } @@ -920,11 +911,11 @@ class two_table_comparator { strong_index_comparator_adapter{device_row_comparator{nullate, *d_left_table, *d_right_table, + d_left_table->dremel_device_views(), + d_right_table->dremel_device_views(), d_left_table->depths(), d_left_table->column_order(), d_left_table->null_precedence(), - d_left_table->dremel_device_views(), - d_right_table->dremel_device_views(), comparator}}}; } From fbb9dd386e3738367a820703a87119b25792b3e9 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 27 Jul 2022 11:43:42 -0700 Subject: [PATCH 49/78] Simplify list_lex_preprocess. --- cpp/src/table/row_operators.cu | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 8a8737019d2..19d5af2cd1f 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -262,18 +262,11 @@ auto decompose_structs(table_view table, auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) { std::vector dremel_data; - for (auto const& col : table) { - if (col.type().id() == type_id::LIST) { - dremel_data.push_back(detail::get_dremel_data(col, {}, stream)); - } - } - std::vector dremel_device_views; - size_type c = 0; for (auto const& col : table) { if (col.type().id() == type_id::LIST) { - dremel_device_views.push_back(dremel_data[c]); - ++c; + dremel_data.push_back(detail::get_dremel_data(col, {}, stream)); + dremel_device_views.push_back(dremel_data.back()); } else { dremel_device_views.emplace_back(); } From 25c22f9defaaa1502c58477b5e3ab5dc7aa21520 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 27 Jul 2022 12:02:42 -0700 Subject: [PATCH 50/78] Add some extra comments and docstrings. --- cpp/include/cudf/detail/utilities/dremel.hpp | 7 +++++- .../cudf/table/experimental/row_operators.cuh | 23 ++++++++++++++++++- cpp/src/table/row_operators.cu | 4 ++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/dremel.hpp b/cpp/include/cudf/detail/utilities/dremel.hpp index 2e44f9801e4..34ff81d634e 100644 --- a/cpp/include/cudf/detail/utilities/dremel.hpp +++ b/cpp/include/cudf/detail/utilities/dremel.hpp @@ -22,6 +22,11 @@ namespace cudf::detail { +/** + * @brief Device view for `dremel_data`. + * + * @see the `dremel_data` struct for more info. + */ struct dremel_device_view { size_type* offsets; uint8_t* rep_levels; @@ -33,7 +38,7 @@ struct dremel_device_view { /** * @brief Dremel data that describes one nested type column * - * @see get_dremel_data() + * @see get_dremel_data() for more info. */ struct dremel_data { rmm::device_uvector dremel_offsets; diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 7db0cf7a207..21268b7b4ec 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -599,6 +599,28 @@ struct preprocessed_table { friend class self_comparator; ///< Allow self_comparator to access private members friend class two_table_comparator; ///< Allow two_table_comparator to access private members + /** + * @brief Construct a preprocessed table for use with lexicographical comparison + * + * Sets up the table for use with lexicographical comparison. The resulting preprocessed table can + * be passed to the constructor of `lexicographic::self_comparator` to avoid preprocessing again. + * + * @param table The table to preprocess + * @param column_order Optional, host array the same length as a row that indicates the desired + * ascending/descending order of each column in a row. If empty, it is assumed all columns are + * sorted in ascending order. + * @param null_precedence Optional, device array the same length as a row and indicates how null + * values compare to all other for every column. If it is nullptr, then null precedence would be + * `null_order::BEFORE` for all columns. + * @param depths The depths of each column resulting from decomposing struct columns. + * @param dremel_data The dremel data for each list column. The length of this object is the + * number of list columns in the table. + * @param dremel_device_views Device views into the dremel_data structs contained in the + * `dremel_data` parameter. For columns that are not list columns, this uvector will should + * contain an empty `dremel_device_view`. As such, this uvector has as many elements as there are + * columns in the table (unlike the `dremel_data` parameter, which is only as long as the number + * of list columns). + */ preprocessed_table(table_device_view_owner&& table, rmm::device_uvector&& column_order, rmm::device_uvector&& null_precedence, @@ -659,7 +681,6 @@ struct preprocessed_table { return _depths.size() ? std::optional>(_depths) : std::nullopt; } - // TODO: span of spans? [[nodiscard]] device_span dremel_device_views() const { return device_span(_dremel_device_views); diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 19d5af2cd1f..84fa0ab0057 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -259,6 +259,10 @@ auto decompose_structs(table_view table, std::move(verticalized_col_depths)); } +/* + * This helper function generates dremel data for any list-type columns in a + * table. This data is necessary for lexicographic comparisons. + */ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) { std::vector dremel_data; From 5305349660154deb1b28278f925ba9fb7bcd7123 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 27 Jul 2022 15:31:50 -0700 Subject: [PATCH 51/78] Add extensive comments explaining the list comparison algorithm. --- .../cudf/table/experimental/row_operators.cuh | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 21268b7b4ec..6d34f4f0d39 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -336,6 +336,9 @@ class device_row_comparator { template () and + // TODO: This also needs to account for list_view not, + // or alternatively just remove the `and` since we can + // always override with more specific templates. not std::is_same_v)> __device__ cuda::std::pair operator()(size_type const, size_type const) const noexcept @@ -400,17 +403,34 @@ class device_row_comparator { auto const l_rep_levels = _l_dremel_device_view.rep_levels; auto const r_rep_levels = _r_dremel_device_view.rep_levels; weak_ordering state{weak_ordering::EQUIVALENT}; + + // Loop over each element in the encoding. Note that this includes nulls + // and empty lists, so not every index i/j correspondings to an actual + // element in the child column. The index k is used to keep track of the + // current child element that we're actually comparing. for (int i = l_start, j = r_start, k = 0; i < l_end and j < r_end; ++i, ++j) { + // First early exit: the definition levels do not match. if (l_def_levels[i] != r_def_levels[j]) { state = (l_def_levels[i] < r_def_levels[j]) ? weak_ordering::LESS : weak_ordering::GREATER; return cuda::std::pair(state, _depth); } + + // Second early exit: the repetition levels do not match. if (l_rep_levels[i] != r_rep_levels[j]) { state = (l_rep_levels[i] < r_rep_levels[j]) ? weak_ordering::LESS : weak_ordering::GREATER; return cuda::std::pair(state, _depth); } + + // Third early exit: This case has two branches. + // 1) If we are at the maximum definition level, then we actually have + // an underlying element to compare, not just an empty list or a + // null. Therefore, we access the kth element of each list and + // compare the values. + // 2) If we are 1 - the maximum definition level and the column is + // nullable, we know that we are looking at a element in the child + // column. In this case we simply skip to the next element. if (l_def_levels[i] == l_max_def_level) { auto comparator = element_comparator{_check_nulls, lcol, rcol, _null_precedence, _depth, _comparator}; @@ -423,6 +443,14 @@ class device_row_comparator { ++k; } } + + // If we have reached this stage, we know that definition levels, + // repetition levels, and actual elements are identical in both list + // columns up to the `min(l_end - l_start, r_end - r_start)` element of + // the dremel encoding. However, two lists can only compare equivalent if + // they are of the same length. Otherwise, the shorter of the two is less + // than the longer. This final check determines the appropriate resulting + // ordering by checking how many total elements each list is composed of. state = (l_end - l_start < r_end - r_start) ? weak_ordering::LESS : (l_end - l_start > r_end - r_start) ? weak_ordering::GREATER : weak_ordering::EQUIVALENT; From 9b5f8c1255017168ec1988ea0c98fa27577214f8 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 27 Jul 2022 15:41:40 -0700 Subject: [PATCH 52/78] Reorder declarations for improved readability and logical consistency. --- .../cudf/table/experimental/row_operators.cuh | 40 ++++++++++++------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 6d34f4f0d39..9d5df4b5f05 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -385,29 +385,41 @@ class device_row_comparator { __device__ cuda::std::pair operator()(size_type lhs_element_index, size_type rhs_element_index) { - auto const l_offsets = _l_dremel_device_view.offsets; - auto const r_offsets = _r_dremel_device_view.offsets; - auto l_start = l_offsets[lhs_element_index]; - auto l_end = l_offsets[lhs_element_index + 1]; - auto r_start = r_offsets[rhs_element_index]; - auto r_end = r_offsets[rhs_element_index + 1]; + // These are all the values from the Dremel encoding. + auto const l_max_def_level = _l_dremel_device_view.max_def_level; + auto const l_def_levels = _l_dremel_device_view.def_levels; + auto const r_def_levels = _r_dremel_device_view.def_levels; + auto const l_rep_levels = _l_dremel_device_view.rep_levels; + auto const r_rep_levels = _r_dremel_device_view.rep_levels; + + // Traverse the nested list hierarchy to get a column device view + // pointing to the underlying child data. column_device_view lcol = _lhs.slice(lhs_element_index, 1); column_device_view rcol = _rhs.slice(rhs_element_index, 1); while (lcol.type().id() == type_id::LIST) { lcol = detail::lists_column_device_view(lcol).get_sliced_child(); rcol = detail::lists_column_device_view(rcol).get_sliced_child(); } - auto const l_max_def_level = _l_dremel_device_view.max_def_level; - auto const l_def_levels = _l_dremel_device_view.def_levels; - auto const r_def_levels = _r_dremel_device_view.def_levels; - auto const l_rep_levels = _l_dremel_device_view.rep_levels; - auto const r_rep_levels = _r_dremel_device_view.rep_levels; - weak_ordering state{weak_ordering::EQUIVALENT}; + + // These start and end values indicate the start and end points of all + // the elements of the lists in the current list element + // (`[lhs|rhs]_element_index`) that we are comparing. + auto const l_offsets = _l_dremel_device_view.offsets; + auto const r_offsets = _r_dremel_device_view.offsets; + auto l_start = l_offsets[lhs_element_index]; + auto l_end = l_offsets[lhs_element_index + 1]; + auto r_start = r_offsets[rhs_element_index]; + auto r_end = r_offsets[rhs_element_index + 1]; + + // This comparator will be used when we actually need to compare elements + auto comparator = + element_comparator{_check_nulls, lcol, rcol, _null_precedence, _depth, _comparator}; // Loop over each element in the encoding. Note that this includes nulls // and empty lists, so not every index i/j correspondings to an actual // element in the child column. The index k is used to keep track of the // current child element that we're actually comparing. + weak_ordering state{weak_ordering::EQUIVALENT}; for (int i = l_start, j = r_start, k = 0; i < l_end and j < r_end; ++i, ++j) { // First early exit: the definition levels do not match. if (l_def_levels[i] != r_def_levels[j]) { @@ -432,8 +444,6 @@ class device_row_comparator { // nullable, we know that we are looking at a element in the child // column. In this case we simply skip to the next element. if (l_def_levels[i] == l_max_def_level) { - auto comparator = - element_comparator{_check_nulls, lcol, rcol, _null_precedence, _depth, _comparator}; int last_null_depth = _depth; cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher(lcol.type(), comparator, k, k); @@ -447,7 +457,7 @@ class device_row_comparator { // If we have reached this stage, we know that definition levels, // repetition levels, and actual elements are identical in both list // columns up to the `min(l_end - l_start, r_end - r_start)` element of - // the dremel encoding. However, two lists can only compare equivalent if + // the Dremel encoding. However, two lists can only compare equivalent if // they are of the same length. Otherwise, the shorter of the two is less // than the longer. This final check determines the appropriate resulting // ordering by checking how many total elements each list is composed of. From b334d19003a81cbbe9665a883d070ab4f087a87f Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 27 Jul 2022 16:10:01 -0700 Subject: [PATCH 53/78] Address open PR comments. --- .../cudf/table/experimental/row_operators.cuh | 50 ++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 9d5df4b5f05..8beca11e072 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -336,17 +336,17 @@ class device_row_comparator { template () and - // TODO: This also needs to account for list_view not, - // or alternatively just remove the `and` since we can - // always override with more specific templates. - not std::is_same_v)> + not std::is_same_v and + not std::is_same_v)> __device__ cuda::std::pair operator()(size_type const, size_type const) const noexcept { CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types."); } - template )> + template () and + std::is_same_v)> __device__ cuda::std::pair operator()( size_type const lhs_element_index, size_type const rhs_element_index) const noexcept { @@ -420,18 +420,22 @@ class device_row_comparator { // element in the child column. The index k is used to keep track of the // current child element that we're actually comparing. weak_ordering state{weak_ordering::EQUIVALENT}; - for (int i = l_start, j = r_start, k = 0; i < l_end and j < r_end; ++i, ++j) { + for (int left_dremel_index = l_start, right_dremel_index = r_start, element_index = 0; + left_dremel_index < l_end and right_dremel_index < r_end; + ++left_dremel_index, ++right_dremel_index) { // First early exit: the definition levels do not match. - if (l_def_levels[i] != r_def_levels[j]) { - state = - (l_def_levels[i] < r_def_levels[j]) ? weak_ordering::LESS : weak_ordering::GREATER; + if (l_def_levels[left_dremel_index] != r_def_levels[right_dremel_index]) { + state = (l_def_levels[left_dremel_index] < r_def_levels[right_dremel_index]) + ? weak_ordering::LESS + : weak_ordering::GREATER; return cuda::std::pair(state, _depth); } // Second early exit: the repetition levels do not match. - if (l_rep_levels[i] != r_rep_levels[j]) { - state = - (l_rep_levels[i] < r_rep_levels[j]) ? weak_ordering::LESS : weak_ordering::GREATER; + if (l_rep_levels[left_dremel_index] != r_rep_levels[right_dremel_index]) { + state = (l_rep_levels[left_dremel_index] < r_rep_levels[right_dremel_index]) + ? weak_ordering::LESS + : weak_ordering::GREATER; return cuda::std::pair(state, _depth); } @@ -443,14 +447,14 @@ class device_row_comparator { // 2) If we are 1 - the maximum definition level and the column is // nullable, we know that we are looking at a element in the child // column. In this case we simply skip to the next element. - if (l_def_levels[i] == l_max_def_level) { - int last_null_depth = _depth; - cuda::std::tie(state, last_null_depth) = - cudf::type_dispatcher(lcol.type(), comparator, k, k); + if (l_def_levels[left_dremel_index] == l_max_def_level) { + int last_null_depth = _depth; + cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher( + lcol.type(), comparator, element_index, element_index); if (state != weak_ordering::EQUIVALENT) { return cuda::std::pair(state, _depth); } - ++k; - } else if (lcol.nullable() and l_def_levels[i] == l_max_def_level - 1) { - ++k; + ++element_index; + } else if (lcol.nullable() and l_def_levels[left_dremel_index] == l_max_def_level - 1) { + ++element_index; } } @@ -473,8 +477,8 @@ class device_row_comparator { Nullate const _check_nulls; null_order const _null_precedence; int const _depth; - detail::dremel_device_view _l_dremel_device_view; - detail::dremel_device_view _r_dremel_device_view; + detail::dremel_device_view const _l_dremel_device_view; + detail::dremel_device_view const _r_dremel_device_view; PhysicalElementComparator const _comparator; }; @@ -527,8 +531,8 @@ class device_row_comparator { private: table_device_view const _lhs; table_device_view const _rhs; - device_span _l_dremel_device_views; - device_span _r_dremel_device_views; + device_span const _l_dremel_device_views; + device_span const _r_dremel_device_views; Nullate const _check_nulls; std::optional> const _depth; std::optional> const _column_order; From 1e31ac1cd90be941729ae25315fd6a43296e8e06 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 27 Jul 2022 16:49:32 -0700 Subject: [PATCH 54/78] Enable previously disabled test. --- cpp/tests/rolling/collect_ops_test.cpp | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp index a0af8f150e3..9dc13b2f9f7 100644 --- a/cpp/tests/rolling/collect_ops_test.cpp +++ b/cpp/tests/rolling/collect_ops_test.cpp @@ -2275,10 +2275,23 @@ TEST_F(CollectSetTest, ListTypeRollingWindow) auto const prev_column = fixed_width_column_wrapper{1, 2, 2, 2, 2}; auto const foll_column = fixed_width_column_wrapper{1, 1, 1, 1, 0}; - EXPECT_THROW(rolling_collect_set(input_column, - prev_column, - foll_column, - 1, - *make_collect_set_aggregation()), - cudf::logic_error); + auto const expected = [] { + auto data = fixed_width_column_wrapper{1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 4, 5, + 6, 7, 8, 9, 6, 7, 8, 9, 10, 7, 8, 9, 10}; + auto inner_offsets = + fixed_width_column_wrapper{0, 3, 5, 8, 10, 11, 13, 14, 17, 18, 21, 22, 25, 26}; + auto outer_offsets = fixed_width_column_wrapper{0, 2, 5, 8, 11, 13}; + + auto inner_list = cudf::make_lists_column(13, inner_offsets.release(), data.release(), 0, {}); + + return cudf::make_lists_column(5, outer_offsets.release(), std::move(inner_list), 0, {}); + }(); + + auto const result = rolling_collect_set(input_column, + prev_column, + foll_column, + 1, + *make_collect_set_aggregation()); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected->view(), result->view()); } From 7c77616c1e59075d7ab27ce892998f9fe4feacdf Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 27 Jul 2022 16:53:02 -0700 Subject: [PATCH 55/78] Clean up comment. --- cpp/include/cudf/table/experimental/row_operators.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 8beca11e072..a81a7afdfc1 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -734,7 +734,7 @@ struct preprocessed_table { rmm::device_uvector const _null_precedence; rmm::device_uvector const _depths; - // List related pre-computation + // Dremel encoding of list columns used for the comparison algorithm std::vector _dremel_data; rmm::device_uvector _dremel_device_views; }; From 1f6b050182a72afa14ded25f53bff29f017ace95 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 28 Jul 2022 11:09:32 -0700 Subject: [PATCH 56/78] Address first round of PR comments. --- cpp/benchmarks/sort/sort_lists.cpp | 2 +- cpp/include/cudf/detail/utilities/dremel.hpp | 24 ++++++++++++-------- cpp/src/column/dremel.cu | 2 +- cpp/src/table/row_operators.cu | 3 +++ 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/cpp/benchmarks/sort/sort_lists.cpp b/cpp/benchmarks/sort/sort_lists.cpp index 183cb89d22d..c6344c8ba6b 100644 --- a/cpp/benchmarks/sort/sort_lists.cpp +++ b/cpp/benchmarks/sort/sort_lists.cpp @@ -27,7 +27,7 @@ void nvbench_sort_lists(nvbench::state& state) const size_t size_bytes(state.get_int64("size_bytes")); const cudf::size_type depth{static_cast(state.get_int64("depth"))}; - const double null_frequency{state.get_float64("null_frequency")}; + auto const null_frequency{state.get_float64("null_frequency")}; data_profile table_profile; table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5); diff --git a/cpp/include/cudf/detail/utilities/dremel.hpp b/cpp/include/cudf/detail/utilities/dremel.hpp index 34ff81d634e..cf355d41ef8 100644 --- a/cpp/include/cudf/detail/utilities/dremel.hpp +++ b/cpp/include/cudf/detail/utilities/dremel.hpp @@ -28,11 +28,15 @@ namespace cudf::detail { * @see the `dremel_data` struct for more info. */ struct dremel_device_view { - size_type* offsets; - uint8_t* rep_levels; - uint8_t* def_levels; - size_type leaf_data_size; - uint8_t max_def_level; + // TODO: These elements are default initializable to support default + // initialization of the object. This is currently exploited to create views + // that will never actually be used. We should consider whether this + // represents a serious issue that should be worked around more robustly. + size_type const* offsets{}; + uint8_t const* rep_levels{}; + uint8_t const* def_levels{}; + size_type const leaf_data_size{}; + uint8_t const max_def_level{}; }; /** @@ -45,10 +49,10 @@ struct dremel_data { rmm::device_uvector rep_level; rmm::device_uvector def_level; - size_type leaf_data_size; - uint8_t max_def_level; + size_type const leaf_data_size; + uint8_t const max_def_level; - operator dremel_device_view() + operator dremel_device_view() const { return dremel_device_view{ dremel_offsets.data(), rep_level.data(), def_level.data(), leaf_data_size, max_def_level}; @@ -69,8 +73,10 @@ struct dremel_data { * Dremel encoding is built around two concepts, the repetition and definition levels. * Since describing them thoroughly is out of scope for this docstring, here are a couple of * blogs that provide useful background: + * * http://www.goldsborough.me/distributed-systems/2019/05/18/21-09-00-a_look_at_dremel/ * https://akshays-blog.medium.com/wrapping-head-around-repetition-and-definition-levels-in-dremel-powering-bigquery-c1a33c9695da + * https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet * * The remainder of this documentation assumes familiarity with the Dremel concepts. * @@ -188,7 +194,7 @@ struct dremel_data { * * @return A struct containing dremel data */ -dremel_data get_dremel_data(column_view h_col, +dremel_data get_dremel_data(column_view const& h_col, std::vector nullability, rmm::cuda_stream_view stream); diff --git a/cpp/src/column/dremel.cu b/cpp/src/column/dremel.cu index 08d88436347..dd3d288d664 100644 --- a/cpp/src/column/dremel.cu +++ b/cpp/src/column/dremel.cu @@ -72,7 +72,7 @@ struct def_level_fn { } }; -dremel_data get_dremel_data(column_view h_col, +dremel_data get_dremel_data(column_view const& h_col, std::vector nullability, rmm::cuda_stream_view stream) { diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 84fa0ab0057..9f9b914e738 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -272,6 +272,9 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) dremel_data.push_back(detail::get_dremel_data(col, {}, stream)); dremel_device_views.push_back(dremel_data.back()); } else { + // TODO: Note that this constructs a device view that is in an invalid + // state, i.e. dereferencing any of its pointer members will lead to a + // seg fault. We may instead wish to create a vector of optionals. dremel_device_views.emplace_back(); } } From c35a39a06246fe1abb9f203898278dcc2924f054 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 28 Jul 2022 14:44:55 -0700 Subject: [PATCH 57/78] Move dremel files to lists/detail. --- conda/recipes/libcudf/meta.yaml | 2 +- cpp/CMakeLists.txt | 2 +- .../cudf/{detail/utilities => lists/detail}/dremel.hpp | 0 cpp/include/cudf/table/experimental/row_operators.cuh | 2 +- cpp/src/io/parquet/writer_impl.cu | 2 +- cpp/src/{column => lists}/dremel.cu | 4 ++-- 6 files changed, 6 insertions(+), 6 deletions(-) rename cpp/include/cudf/{detail/utilities => lists/detail}/dremel.hpp (100%) rename cpp/src/{column => lists}/dremel.cu (99%) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index cad6b6ede4e..8128ace0b78 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -113,7 +113,6 @@ outputs: - test -f $PREFIX/include/cudf/detail/transpose.hpp - test -f $PREFIX/include/cudf/detail/unary.hpp - test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp - - test -f $PREFIX/include/cudf/detail/utilities/dremel.hpp - test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp - test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h - test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp @@ -167,6 +166,7 @@ outputs: - test -f $PREFIX/include/cudf/lists/detail/concatenate.hpp - test -f $PREFIX/include/cudf/lists/detail/contains.hpp - test -f $PREFIX/include/cudf/lists/detail/copying.hpp + - test -f $PREFIX/include/cudf/lists/detail/dremel.hpp - test -f $PREFIX/include/cudf/lists/detail/extract.hpp - test -f $PREFIX/include/cudf/lists/detail/interleave_columns.hpp - test -f $PREFIX/include/cudf/lists/detail/scatter_helper.cuh diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b2dffef89ea..5de11835a47 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -237,7 +237,6 @@ add_library( src/column/column_factories.cpp src/column/column_factories.cu src/column/column_view.cpp - src/column/dremel.cu src/copying/concatenate.cu src/copying/contiguous_split.cu src/copying/copy.cpp @@ -379,6 +378,7 @@ add_library( src/lists/copying/segmented_gather.cu src/lists/copying/scatter_helper.cu src/lists/count_elements.cu + src/lists/dremel.cu src/lists/explode.cu src/lists/extract.cu src/lists/interleave_columns.cu diff --git a/cpp/include/cudf/detail/utilities/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp similarity index 100% rename from cpp/include/cudf/detail/utilities/dremel.hpp rename to cpp/include/cudf/lists/detail/dremel.hpp diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index a81a7afdfc1..57b0f1463ad 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -21,8 +21,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 2626eb7eecd..d69769d2fae 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -30,9 +30,9 @@ #include #include -#include #include #include +#include #include #include #include diff --git a/cpp/src/column/dremel.cu b/cpp/src/lists/dremel.cu similarity index 99% rename from cpp/src/column/dremel.cu rename to cpp/src/lists/dremel.cu index dd3d288d664..671f5a7ccd9 100644 --- a/cpp/src/column/dremel.cu +++ b/cpp/src/lists/dremel.cu @@ -18,8 +18,8 @@ #include #include #include -#include #include +#include #include #include @@ -72,7 +72,7 @@ struct def_level_fn { } }; -dremel_data get_dremel_data(column_view const& h_col, +dremel_data get_dremel_data(column_view h_col, std::vector nullability, rmm::cuda_stream_view stream) { From b520e38856eff938b920442ee4165c96a7ee1822 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 28 Jul 2022 14:46:58 -0700 Subject: [PATCH 58/78] Fix header. --- cpp/include/cudf/lists/detail/dremel.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp index cf355d41ef8..325e37ff831 100644 --- a/cpp/include/cudf/lists/detail/dremel.hpp +++ b/cpp/include/cudf/lists/detail/dremel.hpp @@ -194,7 +194,7 @@ struct dremel_data { * * @return A struct containing dremel data */ -dremel_data get_dremel_data(column_view const& h_col, +dremel_data get_dremel_data(column_view h_col, std::vector nullability, rmm::cuda_stream_view stream); From e8ebcc4ce3746086bf46adf23c0a5fe7568628b0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 4 Aug 2022 11:33:37 -0700 Subject: [PATCH 59/78] Try separating out primitive comparison. --- .../cudf/table/experimental/row_operators.cuh | 78 +++++++++++++++---- 1 file changed, 63 insertions(+), 15 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index b225fe6c76d..166fd82ec97 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -205,6 +205,62 @@ struct sorting_physical_element_comparator { } }; +template +struct primitive_element_comparator { + // (const cudf::column_device_view, const cudf::column_device_view, const cudf::nullate::DYNAMIC, + // const cudf::null_order, const int, const + // cudf::experimental::row::lexicographic::sorting_physical_element_comparator) + + __device__ primitive_element_comparator(Nullate const check_nulls, + column_device_view const lhs, + column_device_view const rhs, + null_order const null_precedence = null_order::BEFORE, + int const depth = 0, + PhysicalElementComparator const comparator = {}) + : _lhs{lhs}, + _rhs{rhs}, + _check_nulls{check_nulls}, + _null_precedence{null_precedence}, + _depth{depth}, + _comparator{comparator} + { + } + + template ())> + __device__ cuda::std::pair operator()( + size_type const lhs_element_index, size_type const rhs_element_index) const noexcept + { + if (_check_nulls) { + bool const lhs_is_null{_lhs.is_null(lhs_element_index)}; + bool const rhs_is_null{_rhs.is_null(rhs_element_index)}; + + if (lhs_is_null or rhs_is_null) { // at least one is null + return cuda::std::pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth); + } + } + + return cuda::std::pair(_comparator(_lhs.element(lhs_element_index), + _rhs.element(rhs_element_index)), + std::numeric_limits::max()); + } + + template ())> + __device__ cuda::std::pair operator()(size_type const, + size_type const) const noexcept + { + CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types."); + } + + column_device_view const _lhs; + column_device_view const _rhs; + Nullate const _check_nulls; + null_order const _null_precedence; + int const _depth; + PhysicalElementComparator const _comparator; +}; + /** * @brief Computes the lexicographic comparison between 2 rows. * @@ -320,18 +376,9 @@ class device_row_comparator { __device__ cuda::std::pair operator()( size_type const lhs_element_index, size_type const rhs_element_index) const noexcept { - if (_check_nulls) { - bool const lhs_is_null{_lhs.is_null(lhs_element_index)}; - bool const rhs_is_null{_rhs.is_null(rhs_element_index)}; - - if (lhs_is_null or rhs_is_null) { // at least one is null - return cuda::std::pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth); - } - } - - return cuda::std::pair(_comparator(_lhs.element(lhs_element_index), - _rhs.element(rhs_element_index)), - std::numeric_limits::max()); + return primitive_element_comparator{ + _check_nulls, _lhs, _rhs, _null_precedence, _depth, _comparator} + .template operator()(lhs_element_index, rhs_element_index); } template ( lcol.type(), - element_comparator{_check_nulls, lcol, rcol, _null_precedence, depth, _comparator}, + primitive_element_comparator{ + _check_nulls, _lhs, _rhs, _null_precedence, _depth, _comparator}, lhs_element_index, rhs_element_index); } @@ -412,8 +460,8 @@ class device_row_comparator { auto r_end = r_offsets[rhs_element_index + 1]; // This comparator will be used when we actually need to compare elements - auto comparator = - element_comparator{_check_nulls, lcol, rcol, _null_precedence, _depth, _comparator}; + auto comparator = primitive_element_comparator{ + _check_nulls, _lhs, _rhs, _null_precedence, _depth, _comparator}; // Loop over each element in the encoding. Note that this includes nulls // and empty lists, so not every index i/j correspondings to an actual From 77c57bf9a861606648ee80259cb6fa33211dc284 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 4 Aug 2022 11:33:48 -0700 Subject: [PATCH 60/78] Revert "Try separating out primitive comparison." This reverts commit e8ebcc4ce3746086bf46adf23c0a5fe7568628b0. --- .../cudf/table/experimental/row_operators.cuh | 78 ++++--------------- 1 file changed, 15 insertions(+), 63 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 166fd82ec97..b225fe6c76d 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -205,62 +205,6 @@ struct sorting_physical_element_comparator { } }; -template -struct primitive_element_comparator { - // (const cudf::column_device_view, const cudf::column_device_view, const cudf::nullate::DYNAMIC, - // const cudf::null_order, const int, const - // cudf::experimental::row::lexicographic::sorting_physical_element_comparator) - - __device__ primitive_element_comparator(Nullate const check_nulls, - column_device_view const lhs, - column_device_view const rhs, - null_order const null_precedence = null_order::BEFORE, - int const depth = 0, - PhysicalElementComparator const comparator = {}) - : _lhs{lhs}, - _rhs{rhs}, - _check_nulls{check_nulls}, - _null_precedence{null_precedence}, - _depth{depth}, - _comparator{comparator} - { - } - - template ())> - __device__ cuda::std::pair operator()( - size_type const lhs_element_index, size_type const rhs_element_index) const noexcept - { - if (_check_nulls) { - bool const lhs_is_null{_lhs.is_null(lhs_element_index)}; - bool const rhs_is_null{_rhs.is_null(rhs_element_index)}; - - if (lhs_is_null or rhs_is_null) { // at least one is null - return cuda::std::pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth); - } - } - - return cuda::std::pair(_comparator(_lhs.element(lhs_element_index), - _rhs.element(rhs_element_index)), - std::numeric_limits::max()); - } - - template ())> - __device__ cuda::std::pair operator()(size_type const, - size_type const) const noexcept - { - CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types."); - } - - column_device_view const _lhs; - column_device_view const _rhs; - Nullate const _check_nulls; - null_order const _null_precedence; - int const _depth; - PhysicalElementComparator const _comparator; -}; - /** * @brief Computes the lexicographic comparison between 2 rows. * @@ -376,9 +320,18 @@ class device_row_comparator { __device__ cuda::std::pair operator()( size_type const lhs_element_index, size_type const rhs_element_index) const noexcept { - return primitive_element_comparator{ - _check_nulls, _lhs, _rhs, _null_precedence, _depth, _comparator} - .template operator()(lhs_element_index, rhs_element_index); + if (_check_nulls) { + bool const lhs_is_null{_lhs.is_null(lhs_element_index)}; + bool const rhs_is_null{_rhs.is_null(rhs_element_index)}; + + if (lhs_is_null or rhs_is_null) { // at least one is null + return cuda::std::pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth); + } + } + + return cuda::std::pair(_comparator(_lhs.element(lhs_element_index), + _rhs.element(rhs_element_index)), + std::numeric_limits::max()); } template ( lcol.type(), - primitive_element_comparator{ - _check_nulls, _lhs, _rhs, _null_precedence, _depth, _comparator}, + element_comparator{_check_nulls, lcol, rcol, _null_precedence, depth, _comparator}, lhs_element_index, rhs_element_index); } @@ -460,8 +412,8 @@ class device_row_comparator { auto r_end = r_offsets[rhs_element_index + 1]; // This comparator will be used when we actually need to compare elements - auto comparator = primitive_element_comparator{ - _check_nulls, _lhs, _rhs, _null_precedence, _depth, _comparator}; + auto comparator = + element_comparator{_check_nulls, lcol, rcol, _null_precedence, _depth, _comparator}; // Loop over each element in the encoding. Note that this includes nulls // and empty lists, so not every index i/j correspondings to an actual From 46f234fd10c71c987de503d00ee3eafa4597225a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 4 Aug 2022 14:07:14 -0700 Subject: [PATCH 61/78] Address most simple review comments. --- .../cudf/table/experimental/row_operators.cuh | 38 +++++++++---------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index b225fe6c76d..281c3034019 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -259,8 +259,8 @@ class device_row_comparator { PhysicalElementComparator comparator = {}) noexcept : _lhs{lhs}, _rhs{rhs}, - _l_dremel_device_views(l_dremel_device_views), - _r_dremel_device_views(r_dremel_device_views), + _l_dremel(l_dremel_device_views), + _r_dremel(r_dremel_device_views), _check_nulls{check_nulls}, _depth{depth}, _column_order{column_order}, @@ -336,8 +336,7 @@ class device_row_comparator { template () and - not std::is_same_v and - not std::is_same_v)> + not cudf::is_nested())> __device__ cuda::std::pair operator()(size_type const, size_type const) const noexcept { @@ -411,14 +410,14 @@ class device_row_comparator { auto r_start = r_offsets[rhs_element_index]; auto r_end = r_offsets[rhs_element_index + 1]; - // This comparator will be used when we actually need to compare elements + // This comparator will be used to compare leaf (non-nested) data types. auto comparator = element_comparator{_check_nulls, lcol, rcol, _null_precedence, _depth, _comparator}; // Loop over each element in the encoding. Note that this includes nulls - // and empty lists, so not every index i/j correspondings to an actual - // element in the child column. The index k is used to keep track of the - // current child element that we're actually comparing. + // and empty lists, so not every index corresponds to an actual element + // in the child column. The element_index is used to keep track of the current + // child element that we're actually comparing. weak_ordering state{weak_ordering::EQUIVALENT}; for (int left_dremel_index = l_start, right_dremel_index = r_start, element_index = 0; left_dremel_index < l_end and right_dremel_index < r_end; @@ -442,11 +441,11 @@ class device_row_comparator { // Third early exit: This case has two branches. // 1) If we are at the maximum definition level, then we actually have // an underlying element to compare, not just an empty list or a - // null. Therefore, we access the kth element of each list and - // compare the values. + // null. Therefore, we access the element_index element of each list + // and compare the values. // 2) If we are 1 - the maximum definition level and the column is - // nullable, we know that we are looking at a element in the child - // column. In this case we simply skip to the next element. + // nullable, the current element must be a null in the leaf data. + // In this case we ignore the null and skip to the next element. if (l_def_levels[left_dremel_index] == l_max_def_level) { int last_null_depth = _depth; cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher( @@ -465,10 +464,7 @@ class device_row_comparator { // they are of the same length. Otherwise, the shorter of the two is less // than the longer. This final check determines the appropriate resulting // ordering by checking how many total elements each list is composed of. - state = (l_end - l_start < r_end - r_start) ? weak_ordering::LESS - : (l_end - l_start > r_end - r_start) ? weak_ordering::GREATER - : weak_ordering::EQUIVALENT; - return cuda::std::pair(state, _depth); + return cuda::std::pair(detail::compare_elements(l_end - l_start, r_end - r_start), _depth); } private: @@ -512,8 +508,8 @@ class device_row_comparator { null_precedence, depth, _comparator, - _l_dremel_device_views[i], - _r_dremel_device_views[i]}; + _l_dremel[i], + _r_dremel[i]}; weak_ordering state; cuda::std::tie(state, last_null_depth) = @@ -531,8 +527,8 @@ class device_row_comparator { private: table_device_view const _lhs; table_device_view const _rhs; - device_span const _l_dremel_device_views; - device_span const _r_dremel_device_views; + device_span const _l_dremel; + device_span const _r_dremel; Nullate const _check_nulls; std::optional> const _depth; std::optional> const _column_order; @@ -648,7 +644,7 @@ struct preprocessed_table { * be passed to the constructor of `lexicographic::self_comparator` to avoid preprocessing again. * * @param table The table to preprocess - * @param column_order Optional, host array the same length as a row that indicates the desired + * @param column_order Optional, device array the same length as a row that indicates the desired * ascending/descending order of each column in a row. If empty, it is assumed all columns are * sorted in ascending order. * @param null_precedence Optional, device array the same length as a row and indicates how null From b32205ddb7a269c084b3ff30b98465b401eaabcf Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 30 Aug 2022 10:14:50 -0700 Subject: [PATCH 62/78] Update benchmark for new data generation API. --- cpp/benchmarks/sort/sort_lists.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/sort/sort_lists.cpp b/cpp/benchmarks/sort/sort_lists.cpp index c6344c8ba6b..dac865de479 100644 --- a/cpp/benchmarks/sort/sort_lists.cpp +++ b/cpp/benchmarks/sort/sort_lists.cpp @@ -32,7 +32,7 @@ void nvbench_sort_lists(nvbench::state& state) data_profile table_profile; table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5); table_profile.set_list_depth(depth); - table_profile.set_null_frequency(null_frequency); + table_profile.set_null_probability(null_frequency); auto const table = create_random_table({cudf::type_id::LIST}, table_size_bytes{size_bytes}, table_profile); From d578c8bbd733b538a983b85428f2f0a6e3973174 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 31 Aug 2022 09:38:29 -0700 Subject: [PATCH 63/78] Add method to check for nested columns in a table_view. --- cpp/include/cudf/table/table_view.hpp | 8 ++++++++ cpp/src/table/table_view.cpp | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index 4d0aee292f6..e3168efd3ee 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -336,6 +336,14 @@ inline bool has_nested_nulls(table_view const& input) */ std::vector get_nullable_columns(table_view const& table); +/** + * @brief The function to collect all nested columns in a given table. + * + * @param table The input table + * @return A vector containing all nested columns in the input table + */ +std::vector get_nested_columns(table_view const& table); + /** * @brief Checks if two `table_view`s have columns of same types * diff --git a/cpp/src/table/table_view.cpp b/cpp/src/table/table_view.cpp index a413c8fe65b..c10de3a300f 100644 --- a/cpp/src/table/table_view.cpp +++ b/cpp/src/table/table_view.cpp @@ -114,6 +114,15 @@ std::vector get_nullable_columns(table_view const& table) return result; } +std::vector get_nested_columns(table_view const& table) +{ + std::vector result; + for (auto const& col : table) { + if (is_nested(col.type())) { result.push_back(col); } + } + return result; +} + namespace detail { template From 0671246c1166bb614c8e0087fca11b87b6f7eb46 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 31 Aug 2022 10:26:57 -0700 Subject: [PATCH 64/78] Template comparator on the presence of nested columns and propagate parameter. --- .../cudf/table/experimental/row_operators.cuh | 97 +++++++++++-------- .../binaryop/compiled/struct_binary_ops.cuh | 14 ++- cpp/src/search/search_ordered.cu | 56 +++++++---- cpp/src/sort/sort_impl.cuh | 36 ++++--- .../table/experimental_row_operator_tests.cu | 48 ++++++--- 5 files changed, 161 insertions(+), 90 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index c654278298d..7796f295bb4 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -231,7 +231,8 @@ struct sorting_physical_element_comparator { * rather than logical elements, defaults to `NaN` aware relational comparator that evaluates `NaN` * as greater than all other values. */ -template class device_row_comparator { friend class self_comparator; ///< Allow self_comparator to access private members @@ -804,36 +805,42 @@ class self_comparator { * @param comparator Physical element relational comparison functor. * @return A binary callable object. */ - template auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - return less_comparator{device_row_comparator{nullate, - *d_t, - *d_t, - d_t->dremel_device_views(), - d_t->dremel_device_views(), - d_t->depths(), - d_t->column_order(), - d_t->null_precedence(), - comparator}}; + return less_comparator{ + device_row_comparator{ + nullate, + *d_t, + *d_t, + d_t->dremel_device_views(), + d_t->dremel_device_views(), + d_t->depths(), + d_t->column_order(), + d_t->null_precedence(), + comparator}}; } /// @copydoc less() - template auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - return less_equivalent_comparator{device_row_comparator{nullate, - *d_t, - *d_t, - d_t->dremel_device_views(), - d_t->dremel_device_views(), - d_t->depths(), - d_t->column_order(), - d_t->null_precedence(), - comparator}}; + return less_equivalent_comparator{ + device_row_comparator{ + nullate, + *d_t, + *d_t, + d_t->dremel_device_views(), + d_t->dremel_device_views(), + d_t->depths(), + d_t->column_order(), + d_t->null_precedence(), + comparator}}; } private: @@ -950,38 +957,42 @@ class two_table_comparator { * @param comparator Physical element relational comparison functor. * @return A binary callable object. */ - template auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - return less_comparator{ - strong_index_comparator_adapter{device_row_comparator{nullate, - *d_left_table, - *d_right_table, - d_left_table->dremel_device_views(), - d_right_table->dremel_device_views(), - d_left_table->depths(), - d_left_table->column_order(), - d_left_table->null_precedence(), - comparator}}}; + return less_comparator{strong_index_comparator_adapter{ + device_row_comparator{ + nullate, + *d_left_table, + *d_right_table, + d_left_table->dremel_device_views(), + d_right_table->dremel_device_views(), + d_left_table->depths(), + d_left_table->column_order(), + d_left_table->null_precedence(), + comparator}}}; } /// @copydoc less() - template auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - return less_equivalent_comparator{ - strong_index_comparator_adapter{device_row_comparator{nullate, - *d_left_table, - *d_right_table, - d_left_table->dremel_device_views(), - d_right_table->dremel_device_views(), - d_left_table->depths(), - d_left_table->column_order(), - d_left_table->null_precedence(), - comparator}}}; + return less_equivalent_comparator{strong_index_comparator_adapter{ + device_row_comparator{ + nullate, + *d_left_table, + *d_right_table, + d_left_table->dremel_device_views(), + d_right_table->dremel_device_views(), + d_left_table->depths(), + d_left_table->column_order(), + d_left_table->null_precedence(), + comparator}}}; } private: diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index 804b931fa5b..d9f5f225e1e 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -93,9 +93,17 @@ void apply_struct_binary_op(mutable_column_view& out, out.end(), device_comparison_functor{optional_iter, is_lhs_scalar, is_rhs_scalar, device_comparator}); }; - is_any_v - ? tabulate_device_operator(table_comparator.less_equivalent(comparator_nulls, comparator)) - : tabulate_device_operator(table_comparator.less(comparator_nulls, comparator)); + if (cudf::get_nested_columns(tlhs).size() > 0 || cudf::get_nested_columns(trhs).size() > 0) { + is_any_v + ? tabulate_device_operator( + table_comparator.less_equivalent(comparator_nulls, comparator)) + : tabulate_device_operator(table_comparator.less(comparator_nulls, comparator)); + } else { + is_any_v + ? tabulate_device_operator( + table_comparator.less_equivalent(comparator_nulls, comparator)) + : tabulate_device_operator(table_comparator.less(comparator_nulls, comparator)); + } } template search_ordered(table_view const& haystack, auto const comparator = cudf::experimental::row::lexicographic::two_table_comparator( matched_haystack, matched_needles, column_order, null_precedence, stream); - auto const has_nulls = has_nested_nulls(matched_haystack) or has_nested_nulls(matched_needles); - auto const d_comparator = comparator.less(nullate::DYNAMIC{has_nulls}); + auto const has_nulls = has_nested_nulls(matched_haystack) or has_nested_nulls(matched_needles); auto const haystack_it = cudf::experimental::row::lhs_iterator(0); auto const needles_it = cudf::experimental::row::rhs_iterator(0); - if (find_first) { - thrust::lower_bound(rmm::exec_policy(stream), - haystack_it, - haystack_it + haystack.num_rows(), - needles_it, - needles_it + needles.num_rows(), - out_it, - d_comparator); + if (cudf::get_nested_columns(haystack).size() > 0 || + cudf::get_nested_columns(needles).size() > 0) { + auto const d_comparator = comparator.less(nullate::DYNAMIC{has_nulls}); + if (find_first) { + thrust::lower_bound(rmm::exec_policy(stream), + haystack_it, + haystack_it + haystack.num_rows(), + needles_it, + needles_it + needles.num_rows(), + out_it, + d_comparator); + } else { + thrust::upper_bound(rmm::exec_policy(stream), + haystack_it, + haystack_it + haystack.num_rows(), + needles_it, + needles_it + needles.num_rows(), + out_it, + d_comparator); + } } else { - thrust::upper_bound(rmm::exec_policy(stream), - haystack_it, - haystack_it + haystack.num_rows(), - needles_it, - needles_it + needles.num_rows(), - out_it, - d_comparator); + auto const d_comparator = comparator.less(nullate::DYNAMIC{has_nulls}); + if (find_first) { + thrust::lower_bound(rmm::exec_policy(stream), + haystack_it, + haystack_it + haystack.num_rows(), + needles_it, + needles_it + needles.num_rows(), + out_it, + d_comparator); + } else { + thrust::upper_bound(rmm::exec_policy(stream), + haystack_it, + haystack_it + haystack.num_rows(), + needles_it, + needles_it + needles.num_rows(), + out_it, + d_comparator); + } } return result; } diff --git a/cpp/src/sort/sort_impl.cuh b/cpp/src/sort/sort_impl.cuh index f98fda307b8..6fd8ec4c4cc 100644 --- a/cpp/src/sort/sort_impl.cuh +++ b/cpp/src/sort/sort_impl.cuh @@ -127,18 +127,32 @@ std::unique_ptr sorted_order(table_view input, auto comp = experimental::row::lexicographic::self_comparator(input, column_order, null_precedence, stream); - auto comparator = comp.less(nullate::DYNAMIC{has_nested_nulls(input)}); - - if (stable) { - thrust::stable_sort(rmm::exec_policy(stream), - mutable_indices_view.begin(), - mutable_indices_view.end(), - comparator); + if (cudf::get_nested_columns(input).size() > 0) { + auto comparator = comp.less(nullate::DYNAMIC{has_nested_nulls(input)}); + if (stable) { + thrust::stable_sort(rmm::exec_policy(stream), + mutable_indices_view.begin(), + mutable_indices_view.end(), + comparator); + } else { + thrust::sort(rmm::exec_policy(stream), + mutable_indices_view.begin(), + mutable_indices_view.end(), + comparator); + } } else { - thrust::sort(rmm::exec_policy(stream), - mutable_indices_view.begin(), - mutable_indices_view.end(), - comparator); + auto comparator = comp.less(nullate::DYNAMIC{has_nested_nulls(input)}); + if (stable) { + thrust::stable_sort(rmm::exec_policy(stream), + mutable_indices_view.begin(), + mutable_indices_view.end(), + comparator); + } else { + thrust::sort(rmm::exec_policy(stream), + mutable_indices_view.begin(), + mutable_indices_view.end(), + comparator); + } } // protection for temporary d_column_order and d_null_precedence stream.synchronize(); diff --git a/cpp/tests/table/experimental_row_operator_tests.cu b/cpp/tests/table/experimental_row_operator_tests.cu index db5a064b1c2..c70eed152df 100644 --- a/cpp/tests/table/experimental_row_operator_tests.cu +++ b/cpp/tests/table/experimental_row_operator_tests.cu @@ -54,17 +54,25 @@ auto self_comparison(cudf::table_view input, rmm::cuda_stream_view stream{cudf::default_stream_value}; auto const table_comparator = lexicographic::self_comparator{input, column_order, {}, stream}; - auto const less_comparator = table_comparator.less(cudf::nullate::NO{}, comparator); auto output = cudf::make_numeric_column( cudf::data_type(cudf::type_id::BOOL8), input.num_rows(), cudf::mask_state::UNALLOCATED); - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.num_rows()), - thrust::make_counting_iterator(0), - output->mutable_view().data(), - less_comparator); + if (cudf::get_nested_columns(input).size() > 0) { + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.num_rows()), + thrust::make_counting_iterator(0), + output->mutable_view().data(), + table_comparator.less(cudf::nullate::NO{}, comparator)); + } else { + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.num_rows()), + thrust::make_counting_iterator(0), + output->mutable_view().data(), + table_comparator.less(cudf::nullate::NO{}, comparator)); + } return output; } @@ -78,19 +86,27 @@ auto two_table_comparison(cudf::table_view lhs, auto const table_comparator = lexicographic::two_table_comparator{lhs, rhs, column_order, {}, stream}; - auto const less_comparator = table_comparator.less(cudf::nullate::NO{}, comparator); - auto const lhs_it = cudf::experimental::row::lhs_iterator(0); - auto const rhs_it = cudf::experimental::row::rhs_iterator(0); + auto const lhs_it = cudf::experimental::row::lhs_iterator(0); + auto const rhs_it = cudf::experimental::row::rhs_iterator(0); auto output = cudf::make_numeric_column( cudf::data_type(cudf::type_id::BOOL8), lhs.num_rows(), cudf::mask_state::UNALLOCATED); - thrust::transform(rmm::exec_policy(stream), - lhs_it, - lhs_it + lhs.num_rows(), - rhs_it, - output->mutable_view().data(), - less_comparator); + if (cudf::get_nested_columns(lhs).size() > 0 || cudf::get_nested_columns(rhs).size() > 0) { + thrust::transform(rmm::exec_policy(stream), + lhs_it, + lhs_it + lhs.num_rows(), + rhs_it, + output->mutable_view().data(), + table_comparator.less(cudf::nullate::NO{}, comparator)); + } else { + thrust::transform(rmm::exec_policy(stream), + lhs_it, + lhs_it + lhs.num_rows(), + rhs_it, + output->mutable_view().data(), + table_comparator.less(cudf::nullate::NO{}, comparator)); + } return output; } From 8c0ae93c79c13e1e4ebfac047ca3e8ba804355e4 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 31 Aug 2022 13:41:58 -0700 Subject: [PATCH 65/78] Only define the list/struct overloads in the specialization that could contain nested columns. --- cpp/include/cudf/table/experimental/row_operators.cuh | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 7796f295bb4..3cc95dabf73 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -343,7 +343,7 @@ class device_row_comparator { template () and - not cudf::is_nested())> + (not has_nested_nulls or not cudf::is_nested()))> __device__ cuda::std::pair operator()(size_type const, size_type const) const noexcept { @@ -351,8 +351,7 @@ class device_row_comparator { } template () and - std::is_same_v)> + CUDF_ENABLE_IF(has_nested_nulls and std::is_same_v)> __device__ cuda::std::pair operator()( size_type const lhs_element_index, size_type const rhs_element_index) const noexcept { @@ -386,8 +385,7 @@ class device_row_comparator { } template () and - std::is_same_v)> + CUDF_ENABLE_IF(has_nested_nulls and std::is_same_v)> __device__ cuda::std::pair operator()(size_type lhs_element_index, size_type rhs_element_index) { From d285df9fc8c1dbdb0d3487178be2ce960c4bd41a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 31 Aug 2022 17:38:53 -0700 Subject: [PATCH 66/78] Move the specialization to a completely separate class. --- .../cudf/table/experimental/row_operators.cuh | 346 +++++++++++++++--- 1 file changed, 294 insertions(+), 52 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 3cc95dabf73..74afa07e086 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -231,10 +231,9 @@ struct sorting_physical_element_comparator { * rather than logical elements, defaults to `NaN` aware relational comparator that evaluates `NaN` * as greater than all other values. */ -template -class device_row_comparator { +class device_row_comparator_primitive { friend class self_comparator; ///< Allow self_comparator to access private members friend class two_table_comparator; ///< Allow two_table_comparator to access private members @@ -255,15 +254,16 @@ class device_row_comparator { * `null_order::BEFORE` for all columns. * @param comparator Physical element relational comparison functor. */ - device_row_comparator(Nullate check_nulls, - table_device_view lhs, - table_device_view rhs, - device_span l_dremel_device_views, - device_span r_dremel_device_views, - std::optional> depth = std::nullopt, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt, - PhysicalElementComparator comparator = {}) noexcept + device_row_comparator_primitive( + Nullate check_nulls, + table_device_view lhs, + table_device_view rhs, + device_span l_dremel_device_views, + device_span r_dremel_device_views, + std::optional> depth = std::nullopt, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt, + PhysicalElementComparator comparator = {}) noexcept : _lhs{lhs}, _rhs{rhs}, _l_dremel(l_dremel_device_views), @@ -342,16 +342,202 @@ class device_row_comparator { } template () and - (not has_nested_nulls or not cudf::is_nested()))> + CUDF_ENABLE_IF(not cudf::is_relationally_comparable())> __device__ cuda::std::pair operator()(size_type const, size_type const) const noexcept { CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types."); } + private: + column_device_view const _lhs; + column_device_view const _rhs; + Nullate const _check_nulls; + null_order const _null_precedence; + int const _depth; + detail::dremel_device_view const _l_dremel_device_view; + detail::dremel_device_view const _r_dremel_device_view; + PhysicalElementComparator const _comparator; + }; + + public: + /** + * @brief Checks whether the row at `lhs_index` in the `lhs` table compares + * lexicographically less, greater, or equivalent to the row at `rhs_index` in the `rhs` table. + * + * @param lhs_index The index of the row in the `lhs` table to examine + * @param rhs_index The index of the row in the `rhs` table to examine + * @return weak ordering comparison of the row in the `lhs` table relative to the row in the `rhs` + * table + */ + __device__ constexpr weak_ordering operator()(size_type const lhs_index, + size_type const rhs_index) const noexcept + { + int last_null_depth = std::numeric_limits::max(); + for (size_type i = 0; i < _lhs.num_columns(); ++i) { + int const depth = _depth.has_value() ? (*_depth)[i] : 0; + if (depth > last_null_depth) { continue; } + + bool const ascending = + _column_order.has_value() ? (*_column_order)[i] == order::ASCENDING : true; + + null_order const null_precedence = + _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE; + + auto element_comp = element_comparator{_check_nulls, + _lhs.column(i), + _rhs.column(i), + null_precedence, + depth, + _comparator, + _l_dremel[i], + _r_dremel[i]}; + + weak_ordering state; + cuda::std::tie(state, last_null_depth) = + cudf::type_dispatcher(_lhs.column(i).type(), element_comp, lhs_index, rhs_index); + + if (state == weak_ordering::EQUIVALENT) { continue; } + + return ascending + ? state + : (state == weak_ordering::GREATER ? weak_ordering::LESS : weak_ordering::GREATER); + } + return weak_ordering::EQUIVALENT; + } + + private: + table_device_view const _lhs; + table_device_view const _rhs; + device_span const _l_dremel; + device_span const _r_dremel; + Nullate const _check_nulls; + std::optional> const _depth; + std::optional> const _column_order; + std::optional> const _null_precedence; + PhysicalElementComparator const _comparator; +}; // class device_row_comparator + +template +class device_row_comparator_nested { + friend class self_comparator; ///< Allow self_comparator to access private members + friend class two_table_comparator; ///< Allow two_table_comparator to access private members + + /** + * @brief Construct a function object for performing a lexicographic + * comparison between the rows of two tables. + * + * @param check_nulls Indicates if any input column contains nulls. + * @param lhs The first table + * @param rhs The second table (may be the same table as `lhs`) + * @param depth Optional, device array the same length as a row that contains starting depths of + * columns if they're nested, and 0 otherwise. + * @param column_order Optional, device array the same length as a row that indicates the desired + * ascending/descending order of each column in a row. If `nullopt`, it is assumed all columns are + * sorted in ascending order. + * @param null_precedence Optional, device array the same length as a row and indicates how null + * values compare to all other for every column. If `nullopt`, then null precedence would be + * `null_order::BEFORE` for all columns. + * @param comparator Physical element relational comparison functor. + */ + device_row_comparator_nested( + Nullate check_nulls, + table_device_view lhs, + table_device_view rhs, + device_span l_dremel_device_views, + device_span r_dremel_device_views, + std::optional> depth = std::nullopt, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt, + PhysicalElementComparator comparator = {}) noexcept + : _lhs{lhs}, + _rhs{rhs}, + _l_dremel(l_dremel_device_views), + _r_dremel(r_dremel_device_views), + _check_nulls{check_nulls}, + _depth{depth}, + _column_order{column_order}, + _null_precedence{null_precedence}, + _comparator{comparator} + { + } + + /** + * @brief Performs a relational comparison between two elements in two columns. + */ + class element_comparator { + public: + /** + * @brief Construct type-dispatched function object for performing a + * relational comparison between two elements. + * + * @note `lhs` and `rhs` may be the same. + * + * @param check_nulls Indicates if either input column contains nulls. + * @param lhs The column containing the first element + * @param rhs The column containing the second element (may be the same as lhs) + * @param null_precedence Indicates how null values are ordered with other values + * @param depth The depth of the column if part of a nested column @see + * preprocessed_table::depths + * @param comparator Physical element relational comparison functor. + */ + __device__ element_comparator(Nullate check_nulls, + column_device_view lhs, + column_device_view rhs, + null_order null_precedence = null_order::BEFORE, + int depth = 0, + PhysicalElementComparator comparator = {}, + detail::dremel_device_view l_dremel_device_view = {}, + detail::dremel_device_view r_dremel_device_view = {}) + : _lhs{lhs}, + _rhs{rhs}, + _check_nulls{check_nulls}, + _null_precedence{null_precedence}, + _depth{depth}, + _l_dremel_device_view{l_dremel_device_view}, + _r_dremel_device_view{r_dremel_device_view}, + _comparator{comparator} + { + } + + /** + * @brief Performs a relational comparison between the specified elements + * + * @param lhs_element_index The index of the first element + * @param rhs_element_index The index of the second element + * @return Indicates the relationship between the elements in the `lhs` and `rhs` columns, along + * with the depth at which a null value was encountered. + */ template )> + CUDF_ENABLE_IF(cudf::is_relationally_comparable())> + __device__ cuda::std::pair operator()( + size_type const lhs_element_index, size_type const rhs_element_index) const noexcept + { + if (_check_nulls) { + bool const lhs_is_null{_lhs.is_null(lhs_element_index)}; + bool const rhs_is_null{_rhs.is_null(rhs_element_index)}; + + if (lhs_is_null or rhs_is_null) { // at least one is null + return cuda::std::pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth); + } + } + + return cuda::std::pair(_comparator(_lhs.element(lhs_element_index), + _rhs.element(rhs_element_index)), + std::numeric_limits::max()); + } + + template () and + not cudf::is_nested())> + __device__ cuda::std::pair operator()(size_type const, + size_type const) const noexcept + { + CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types."); + } + + template )> __device__ cuda::std::pair operator()( size_type const lhs_element_index, size_type const rhs_element_index) const noexcept { @@ -384,8 +570,7 @@ class device_row_comparator { rhs_element_index); } - template )> + template )> __device__ cuda::std::pair operator()(size_type lhs_element_index, size_type rhs_element_index) { @@ -808,8 +993,20 @@ class self_comparator { typename PhysicalElementComparator = sorting_physical_element_comparator> auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - return less_comparator{ - device_row_comparator{ + if constexpr (has_nested_columns) { + return less_comparator{ + device_row_comparator_nested{nullate, + *d_t, + *d_t, + d_t->dremel_device_views(), + d_t->dremel_device_views(), + d_t->depths(), + d_t->column_order(), + d_t->null_precedence(), + comparator}}; + } + if constexpr (!has_nested_columns) { + return less_comparator{device_row_comparator_primitive{ nullate, *d_t, *d_t, @@ -819,6 +1016,7 @@ class self_comparator { d_t->column_order(), d_t->null_precedence(), comparator}}; + } } /// @copydoc less() @@ -828,17 +1026,31 @@ class self_comparator { auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - return less_equivalent_comparator{ - device_row_comparator{ - nullate, - *d_t, - *d_t, - d_t->dremel_device_views(), - d_t->dremel_device_views(), - d_t->depths(), - d_t->column_order(), - d_t->null_precedence(), - comparator}}; + if constexpr (has_nested_columns) { + return less_equivalent_comparator{ + device_row_comparator_nested{nullate, + *d_t, + *d_t, + d_t->dremel_device_views(), + d_t->dremel_device_views(), + d_t->depths(), + d_t->column_order(), + d_t->null_precedence(), + comparator}}; + } + if constexpr (!has_nested_columns) { + return less_equivalent_comparator{ + device_row_comparator_primitive{ + nullate, + *d_t, + *d_t, + d_t->dremel_device_views(), + d_t->dremel_device_views(), + d_t->depths(), + d_t->column_order(), + d_t->null_precedence(), + comparator}}; + } } private: @@ -960,17 +1172,32 @@ class two_table_comparator { typename PhysicalElementComparator = sorting_physical_element_comparator> auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - return less_comparator{strong_index_comparator_adapter{ - device_row_comparator{ - nullate, - *d_left_table, - *d_right_table, - d_left_table->dremel_device_views(), - d_right_table->dremel_device_views(), - d_left_table->depths(), - d_left_table->column_order(), - d_left_table->null_precedence(), - comparator}}}; + if constexpr (has_nested_columns) { + return less_comparator{strong_index_comparator_adapter{ + device_row_comparator_nested{ + nullate, + *d_left_table, + *d_right_table, + d_left_table->dremel_device_views(), + d_right_table->dremel_device_views(), + d_left_table->depths(), + d_left_table->column_order(), + d_left_table->null_precedence(), + comparator}}}; + } + if constexpr (!has_nested_columns) { + return less_comparator{strong_index_comparator_adapter{ + device_row_comparator_primitive{ + nullate, + *d_left_table, + *d_right_table, + d_left_table->dremel_device_views(), + d_right_table->dremel_device_views(), + d_left_table->depths(), + d_left_table->column_order(), + d_left_table->null_precedence(), + comparator}}}; + } } /// @copydoc less() @@ -980,17 +1207,32 @@ class two_table_comparator { auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - return less_equivalent_comparator{strong_index_comparator_adapter{ - device_row_comparator{ - nullate, - *d_left_table, - *d_right_table, - d_left_table->dremel_device_views(), - d_right_table->dremel_device_views(), - d_left_table->depths(), - d_left_table->column_order(), - d_left_table->null_precedence(), - comparator}}}; + if constexpr (has_nested_columns) { + return less_equivalent_comparator{strong_index_comparator_adapter{ + device_row_comparator_nested{ + nullate, + *d_left_table, + *d_right_table, + d_left_table->dremel_device_views(), + d_right_table->dremel_device_views(), + d_left_table->depths(), + d_left_table->column_order(), + d_left_table->null_precedence(), + comparator}}}; + } + if constexpr (!has_nested_columns) { + return less_equivalent_comparator{strong_index_comparator_adapter{ + device_row_comparator_primitive{ + nullate, + *d_left_table, + *d_right_table, + d_left_table->dremel_device_views(), + d_right_table->dremel_device_views(), + d_left_table->depths(), + d_left_table->column_order(), + d_left_table->null_precedence(), + comparator}}}; + } } private: From 31a9bfd600b0ab7019c894ec0b8fd0a1e8ba8854 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 31 Aug 2022 17:39:18 -0700 Subject: [PATCH 67/78] Revert "Move the specialization to a completely separate class." This reverts commit d285df9fc8c1dbdb0d3487178be2ce960c4bd41a. --- .../cudf/table/experimental/row_operators.cuh | 346 +++--------------- 1 file changed, 52 insertions(+), 294 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 74afa07e086..3cc95dabf73 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -231,196 +231,10 @@ struct sorting_physical_element_comparator { * rather than logical elements, defaults to `NaN` aware relational comparator that evaluates `NaN` * as greater than all other values. */ -template -class device_row_comparator_primitive { - friend class self_comparator; ///< Allow self_comparator to access private members - friend class two_table_comparator; ///< Allow two_table_comparator to access private members - - /** - * @brief Construct a function object for performing a lexicographic - * comparison between the rows of two tables. - * - * @param check_nulls Indicates if any input column contains nulls. - * @param lhs The first table - * @param rhs The second table (may be the same table as `lhs`) - * @param depth Optional, device array the same length as a row that contains starting depths of - * columns if they're nested, and 0 otherwise. - * @param column_order Optional, device array the same length as a row that indicates the desired - * ascending/descending order of each column in a row. If `nullopt`, it is assumed all columns are - * sorted in ascending order. - * @param null_precedence Optional, device array the same length as a row and indicates how null - * values compare to all other for every column. If `nullopt`, then null precedence would be - * `null_order::BEFORE` for all columns. - * @param comparator Physical element relational comparison functor. - */ - device_row_comparator_primitive( - Nullate check_nulls, - table_device_view lhs, - table_device_view rhs, - device_span l_dremel_device_views, - device_span r_dremel_device_views, - std::optional> depth = std::nullopt, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt, - PhysicalElementComparator comparator = {}) noexcept - : _lhs{lhs}, - _rhs{rhs}, - _l_dremel(l_dremel_device_views), - _r_dremel(r_dremel_device_views), - _check_nulls{check_nulls}, - _depth{depth}, - _column_order{column_order}, - _null_precedence{null_precedence}, - _comparator{comparator} - { - } - - /** - * @brief Performs a relational comparison between two elements in two columns. - */ - class element_comparator { - public: - /** - * @brief Construct type-dispatched function object for performing a - * relational comparison between two elements. - * - * @note `lhs` and `rhs` may be the same. - * - * @param check_nulls Indicates if either input column contains nulls. - * @param lhs The column containing the first element - * @param rhs The column containing the second element (may be the same as lhs) - * @param null_precedence Indicates how null values are ordered with other values - * @param depth The depth of the column if part of a nested column @see - * preprocessed_table::depths - * @param comparator Physical element relational comparison functor. - */ - __device__ element_comparator(Nullate check_nulls, - column_device_view lhs, - column_device_view rhs, - null_order null_precedence = null_order::BEFORE, - int depth = 0, - PhysicalElementComparator comparator = {}, - detail::dremel_device_view l_dremel_device_view = {}, - detail::dremel_device_view r_dremel_device_view = {}) - : _lhs{lhs}, - _rhs{rhs}, - _check_nulls{check_nulls}, - _null_precedence{null_precedence}, - _depth{depth}, - _l_dremel_device_view{l_dremel_device_view}, - _r_dremel_device_view{r_dremel_device_view}, - _comparator{comparator} - { - } - - /** - * @brief Performs a relational comparison between the specified elements - * - * @param lhs_element_index The index of the first element - * @param rhs_element_index The index of the second element - * @return Indicates the relationship between the elements in the `lhs` and `rhs` columns, along - * with the depth at which a null value was encountered. - */ - template ())> - __device__ cuda::std::pair operator()( - size_type const lhs_element_index, size_type const rhs_element_index) const noexcept - { - if (_check_nulls) { - bool const lhs_is_null{_lhs.is_null(lhs_element_index)}; - bool const rhs_is_null{_rhs.is_null(rhs_element_index)}; - - if (lhs_is_null or rhs_is_null) { // at least one is null - return cuda::std::pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth); - } - } - - return cuda::std::pair(_comparator(_lhs.element(lhs_element_index), - _rhs.element(rhs_element_index)), - std::numeric_limits::max()); - } - - template ())> - __device__ cuda::std::pair operator()(size_type const, - size_type const) const noexcept - { - CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types."); - } - - private: - column_device_view const _lhs; - column_device_view const _rhs; - Nullate const _check_nulls; - null_order const _null_precedence; - int const _depth; - detail::dremel_device_view const _l_dremel_device_view; - detail::dremel_device_view const _r_dremel_device_view; - PhysicalElementComparator const _comparator; - }; - - public: - /** - * @brief Checks whether the row at `lhs_index` in the `lhs` table compares - * lexicographically less, greater, or equivalent to the row at `rhs_index` in the `rhs` table. - * - * @param lhs_index The index of the row in the `lhs` table to examine - * @param rhs_index The index of the row in the `rhs` table to examine - * @return weak ordering comparison of the row in the `lhs` table relative to the row in the `rhs` - * table - */ - __device__ constexpr weak_ordering operator()(size_type const lhs_index, - size_type const rhs_index) const noexcept - { - int last_null_depth = std::numeric_limits::max(); - for (size_type i = 0; i < _lhs.num_columns(); ++i) { - int const depth = _depth.has_value() ? (*_depth)[i] : 0; - if (depth > last_null_depth) { continue; } - - bool const ascending = - _column_order.has_value() ? (*_column_order)[i] == order::ASCENDING : true; - - null_order const null_precedence = - _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE; - - auto element_comp = element_comparator{_check_nulls, - _lhs.column(i), - _rhs.column(i), - null_precedence, - depth, - _comparator, - _l_dremel[i], - _r_dremel[i]}; - - weak_ordering state; - cuda::std::tie(state, last_null_depth) = - cudf::type_dispatcher(_lhs.column(i).type(), element_comp, lhs_index, rhs_index); - - if (state == weak_ordering::EQUIVALENT) { continue; } - - return ascending - ? state - : (state == weak_ordering::GREATER ? weak_ordering::LESS : weak_ordering::GREATER); - } - return weak_ordering::EQUIVALENT; - } - - private: - table_device_view const _lhs; - table_device_view const _rhs; - device_span const _l_dremel; - device_span const _r_dremel; - Nullate const _check_nulls; - std::optional> const _depth; - std::optional> const _column_order; - std::optional> const _null_precedence; - PhysicalElementComparator const _comparator; -}; // class device_row_comparator - -template -class device_row_comparator_nested { +class device_row_comparator { friend class self_comparator; ///< Allow self_comparator to access private members friend class two_table_comparator; ///< Allow two_table_comparator to access private members @@ -441,16 +255,15 @@ class device_row_comparator_nested { * `null_order::BEFORE` for all columns. * @param comparator Physical element relational comparison functor. */ - device_row_comparator_nested( - Nullate check_nulls, - table_device_view lhs, - table_device_view rhs, - device_span l_dremel_device_views, - device_span r_dremel_device_views, - std::optional> depth = std::nullopt, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt, - PhysicalElementComparator comparator = {}) noexcept + device_row_comparator(Nullate check_nulls, + table_device_view lhs, + table_device_view rhs, + device_span l_dremel_device_views, + device_span r_dremel_device_views, + std::optional> depth = std::nullopt, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt, + PhysicalElementComparator comparator = {}) noexcept : _lhs{lhs}, _rhs{rhs}, _l_dremel(l_dremel_device_views), @@ -530,14 +343,15 @@ class device_row_comparator_nested { template () and - not cudf::is_nested())> + (not has_nested_nulls or not cudf::is_nested()))> __device__ cuda::std::pair operator()(size_type const, size_type const) const noexcept { CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types."); } - template )> + template )> __device__ cuda::std::pair operator()( size_type const lhs_element_index, size_type const rhs_element_index) const noexcept { @@ -570,7 +384,8 @@ class device_row_comparator_nested { rhs_element_index); } - template )> + template )> __device__ cuda::std::pair operator()(size_type lhs_element_index, size_type rhs_element_index) { @@ -993,20 +808,8 @@ class self_comparator { typename PhysicalElementComparator = sorting_physical_element_comparator> auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - if constexpr (has_nested_columns) { - return less_comparator{ - device_row_comparator_nested{nullate, - *d_t, - *d_t, - d_t->dremel_device_views(), - d_t->dremel_device_views(), - d_t->depths(), - d_t->column_order(), - d_t->null_precedence(), - comparator}}; - } - if constexpr (!has_nested_columns) { - return less_comparator{device_row_comparator_primitive{ + return less_comparator{ + device_row_comparator{ nullate, *d_t, *d_t, @@ -1016,7 +819,6 @@ class self_comparator { d_t->column_order(), d_t->null_precedence(), comparator}}; - } } /// @copydoc less() @@ -1026,31 +828,17 @@ class self_comparator { auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - if constexpr (has_nested_columns) { - return less_equivalent_comparator{ - device_row_comparator_nested{nullate, - *d_t, - *d_t, - d_t->dremel_device_views(), - d_t->dremel_device_views(), - d_t->depths(), - d_t->column_order(), - d_t->null_precedence(), - comparator}}; - } - if constexpr (!has_nested_columns) { - return less_equivalent_comparator{ - device_row_comparator_primitive{ - nullate, - *d_t, - *d_t, - d_t->dremel_device_views(), - d_t->dremel_device_views(), - d_t->depths(), - d_t->column_order(), - d_t->null_precedence(), - comparator}}; - } + return less_equivalent_comparator{ + device_row_comparator{ + nullate, + *d_t, + *d_t, + d_t->dremel_device_views(), + d_t->dremel_device_views(), + d_t->depths(), + d_t->column_order(), + d_t->null_precedence(), + comparator}}; } private: @@ -1172,32 +960,17 @@ class two_table_comparator { typename PhysicalElementComparator = sorting_physical_element_comparator> auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - if constexpr (has_nested_columns) { - return less_comparator{strong_index_comparator_adapter{ - device_row_comparator_nested{ - nullate, - *d_left_table, - *d_right_table, - d_left_table->dremel_device_views(), - d_right_table->dremel_device_views(), - d_left_table->depths(), - d_left_table->column_order(), - d_left_table->null_precedence(), - comparator}}}; - } - if constexpr (!has_nested_columns) { - return less_comparator{strong_index_comparator_adapter{ - device_row_comparator_primitive{ - nullate, - *d_left_table, - *d_right_table, - d_left_table->dremel_device_views(), - d_right_table->dremel_device_views(), - d_left_table->depths(), - d_left_table->column_order(), - d_left_table->null_precedence(), - comparator}}}; - } + return less_comparator{strong_index_comparator_adapter{ + device_row_comparator{ + nullate, + *d_left_table, + *d_right_table, + d_left_table->dremel_device_views(), + d_right_table->dremel_device_views(), + d_left_table->depths(), + d_left_table->column_order(), + d_left_table->null_precedence(), + comparator}}}; } /// @copydoc less() @@ -1207,32 +980,17 @@ class two_table_comparator { auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const noexcept { - if constexpr (has_nested_columns) { - return less_equivalent_comparator{strong_index_comparator_adapter{ - device_row_comparator_nested{ - nullate, - *d_left_table, - *d_right_table, - d_left_table->dremel_device_views(), - d_right_table->dremel_device_views(), - d_left_table->depths(), - d_left_table->column_order(), - d_left_table->null_precedence(), - comparator}}}; - } - if constexpr (!has_nested_columns) { - return less_equivalent_comparator{strong_index_comparator_adapter{ - device_row_comparator_primitive{ - nullate, - *d_left_table, - *d_right_table, - d_left_table->dremel_device_views(), - d_right_table->dremel_device_views(), - d_left_table->depths(), - d_left_table->column_order(), - d_left_table->null_precedence(), - comparator}}}; - } + return less_equivalent_comparator{strong_index_comparator_adapter{ + device_row_comparator{ + nullate, + *d_left_table, + *d_right_table, + d_left_table->dremel_device_views(), + d_right_table->dremel_device_views(), + d_left_table->depths(), + d_left_table->column_order(), + d_left_table->null_precedence(), + comparator}}}; } private: From 36cc5f3a16207f28854c95b73f4cefde0ef9c878 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 31 Aug 2022 17:53:21 -0700 Subject: [PATCH 68/78] Fix typo. --- cpp/include/cudf/table/experimental/row_operators.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 3cc95dabf73..4e1a0d7ea39 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -343,7 +343,7 @@ class device_row_comparator { template () and - (not has_nested_nulls or not cudf::is_nested()))> + (not has_nested_columns or not cudf::is_nested()))> __device__ cuda::std::pair operator()(size_type const, size_type const) const noexcept { @@ -351,7 +351,7 @@ class device_row_comparator { } template )> + CUDF_ENABLE_IF(has_nested_columns and std::is_same_v)> __device__ cuda::std::pair operator()( size_type const lhs_element_index, size_type const rhs_element_index) const noexcept { @@ -385,7 +385,7 @@ class device_row_comparator { } template )> + CUDF_ENABLE_IF(has_nested_columns and std::is_same_v)> __device__ cuda::std::pair operator()(size_type lhs_element_index, size_type rhs_element_index) { From 8dd293a984e867321553567c9807fd7124cb92db Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 1 Sep 2022 15:57:18 -0700 Subject: [PATCH 69/78] Convert the Dremel members of the preprocessed_table to optionals. --- .../cudf/table/experimental/row_operators.cuh | 21 +++++++++++++--- cpp/src/table/row_operators.cu | 24 ++++++++++++------- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 4e1a0d7ea39..96bccea9b2a 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -677,6 +677,17 @@ struct preprocessed_table { _dremel_data(std::move(dremel_data)), _dremel_device_views(std::move(dremel_device_views)){}; + preprocessed_table(table_device_view_owner&& table, + rmm::device_uvector&& column_order, + rmm::device_uvector&& null_precedence, + rmm::device_uvector&& depths) + : _t(std::move(table)), + _column_order(std::move(column_order)), + _null_precedence(std::move(null_precedence)), + _depths(std::move(depths)), + _dremel_data{}, + _dremel_device_views{} {}; + /** * @brief Implicit conversion operator to a `table_device_view` of the preprocessed table. * @@ -726,7 +737,11 @@ struct preprocessed_table { [[nodiscard]] device_span dremel_device_views() const { - return device_span(_dremel_device_views); + if (_dremel_device_views.has_value()) { + return device_span(*_dremel_device_views); + } else { + return {}; + } } private: @@ -736,8 +751,8 @@ struct preprocessed_table { rmm::device_uvector const _depths; // Dremel encoding of list columns used for the comparison algorithm - std::vector _dremel_data; - rmm::device_uvector _dremel_device_views; + std::optional> _dremel_data; + std::optional> _dremel_device_views; }; /** diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 8ab9b65ecef..23a254a1e60 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -361,20 +361,26 @@ std::shared_ptr preprocessed_table::create( auto [verticalized_lhs, new_column_order, new_null_precedence, verticalized_col_depths] = decompose_structs(t, column_order, null_precedence); - auto [dremel_data, d_dremel_device_views] = list_lex_preprocess(verticalized_lhs, stream); - auto d_t = table_device_view::create(verticalized_lhs, stream); auto d_column_order = detail::make_device_uvector_async(new_column_order, stream); auto d_null_precedence = detail::make_device_uvector_async(new_null_precedence, stream); auto d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); - return std::shared_ptr( - new preprocessed_table(std::move(d_t), - std::move(d_column_order), - std::move(d_null_precedence), - std::move(d_depths), - std::move(dremel_data), - std::move(d_dremel_device_views))); + if (cudf::get_nested_columns(t).size() > 0) { + auto [dremel_data, d_dremel_device_view] = list_lex_preprocess(verticalized_lhs, stream); + return std::shared_ptr( + new preprocessed_table(std::move(d_t), + std::move(d_column_order), + std::move(d_null_precedence), + std::move(d_depths), + std::move(dremel_data), + std::move(d_dremel_device_view))); + } else { + return std::shared_ptr(new preprocessed_table(std::move(d_t), + std::move(d_column_order), + std::move(d_null_precedence), + std::move(d_depths))); + } } two_table_comparator::two_table_comparator(table_view const& left, From 7b0ae58a6ab607e8c088c80e51a7bc7c6ce47b99 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 1 Sep 2022 16:23:09 -0700 Subject: [PATCH 70/78] Propagate optionals down to the element comparator. --- .../cudf/table/experimental/row_operators.cuh | 77 +++++++++++-------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 96bccea9b2a..7b53c11a39b 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -255,15 +255,16 @@ class device_row_comparator { * `null_order::BEFORE` for all columns. * @param comparator Physical element relational comparison functor. */ - device_row_comparator(Nullate check_nulls, - table_device_view lhs, - table_device_view rhs, - device_span l_dremel_device_views, - device_span r_dremel_device_views, - std::optional> depth = std::nullopt, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt, - PhysicalElementComparator comparator = {}) noexcept + device_row_comparator( + Nullate check_nulls, + table_device_view lhs, + table_device_view rhs, + std::optional> l_dremel_device_views, + std::optional> r_dremel_device_views, + std::optional> depth = std::nullopt, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt, + PhysicalElementComparator comparator = {}) noexcept : _lhs{lhs}, _rhs{rhs}, _l_dremel(l_dremel_device_views), @@ -295,14 +296,15 @@ class device_row_comparator { * preprocessed_table::depths * @param comparator Physical element relational comparison functor. */ - __device__ element_comparator(Nullate check_nulls, - column_device_view lhs, - column_device_view rhs, - null_order null_precedence = null_order::BEFORE, - int depth = 0, - PhysicalElementComparator comparator = {}, - detail::dremel_device_view l_dremel_device_view = {}, - detail::dremel_device_view r_dremel_device_view = {}) + __device__ element_comparator( + Nullate check_nulls, + column_device_view lhs, + column_device_view rhs, + null_order null_precedence = null_order::BEFORE, + int depth = 0, + PhysicalElementComparator comparator = {}, + thrust::optional l_dremel_device_view = {}, + thrust::optional r_dremel_device_view = {}) : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, @@ -390,11 +392,11 @@ class device_row_comparator { size_type rhs_element_index) { // These are all the values from the Dremel encoding. - auto const l_max_def_level = _l_dremel_device_view.max_def_level; - auto const l_def_levels = _l_dremel_device_view.def_levels; - auto const r_def_levels = _r_dremel_device_view.def_levels; - auto const l_rep_levels = _l_dremel_device_view.rep_levels; - auto const r_rep_levels = _r_dremel_device_view.rep_levels; + auto const l_max_def_level = _l_dremel_device_view->max_def_level; + auto const l_def_levels = _l_dremel_device_view->def_levels; + auto const r_def_levels = _r_dremel_device_view->def_levels; + auto const l_rep_levels = _l_dremel_device_view->rep_levels; + auto const r_rep_levels = _r_dremel_device_view->rep_levels; // Traverse the nested list hierarchy to get a column device view // pointing to the underlying child data. @@ -408,8 +410,8 @@ class device_row_comparator { // These start and end values indicate the start and end points of all // the elements of the lists in the current list element // (`[lhs|rhs]_element_index`) that we are comparing. - auto const l_offsets = _l_dremel_device_view.offsets; - auto const r_offsets = _r_dremel_device_view.offsets; + auto const l_offsets = _l_dremel_device_view->offsets; + auto const r_offsets = _r_dremel_device_view->offsets; auto l_start = l_offsets[lhs_element_index]; auto l_end = l_offsets[lhs_element_index + 1]; auto r_start = r_offsets[rhs_element_index]; @@ -478,8 +480,8 @@ class device_row_comparator { Nullate const _check_nulls; null_order const _null_precedence; int const _depth; - detail::dremel_device_view const _l_dremel_device_view; - detail::dremel_device_view const _r_dremel_device_view; + thrust::optional _l_dremel_device_view; + thrust::optional _r_dremel_device_view; PhysicalElementComparator const _comparator; }; @@ -507,14 +509,24 @@ class device_row_comparator { null_order const null_precedence = _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE; + auto [l_dremel_i, r_dremel_i] = [&]() { + if (_l_dremel.has_value() && _r_dremel.has_value()) { + return std::make_tuple( + thrust::optional{(*_l_dremel)[i]}, + thrust::optional{(*_r_dremel)[i]}); + } else { + return std::make_tuple(thrust::optional{}, + thrust::optional{}); + } + }(); auto element_comp = element_comparator{_check_nulls, _lhs.column(i), _rhs.column(i), null_precedence, depth, _comparator, - _l_dremel[i], - _r_dremel[i]}; + l_dremel_i, + r_dremel_i}; weak_ordering state; cuda::std::tie(state, last_null_depth) = @@ -532,8 +544,8 @@ class device_row_comparator { private: table_device_view const _lhs; table_device_view const _rhs; - device_span const _l_dremel; - device_span const _r_dremel; + std::optional const> _l_dremel; + std::optional const> _r_dremel; Nullate const _check_nulls; std::optional> const _depth; std::optional> const _column_order; @@ -735,10 +747,11 @@ struct preprocessed_table { return _depths.size() ? std::optional>(_depths) : std::nullopt; } - [[nodiscard]] device_span dremel_device_views() const + [[nodiscard]] std::optional> dremel_device_views() + const { if (_dremel_device_views.has_value()) { - return device_span(*_dremel_device_views); + return {device_span(*_dremel_device_views)}; } else { return {}; } From be3ab5e6cc2907326649ae1c6603bdba61c62df1 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 2 Sep 2022 14:19:32 -0700 Subject: [PATCH 71/78] Revert "Propagate optionals down to the element comparator." This reverts commit 7b0ae58a6ab607e8c088c80e51a7bc7c6ce47b99. --- .../cudf/table/experimental/row_operators.cuh | 77 ++++++++----------- 1 file changed, 32 insertions(+), 45 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 7b53c11a39b..96bccea9b2a 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -255,16 +255,15 @@ class device_row_comparator { * `null_order::BEFORE` for all columns. * @param comparator Physical element relational comparison functor. */ - device_row_comparator( - Nullate check_nulls, - table_device_view lhs, - table_device_view rhs, - std::optional> l_dremel_device_views, - std::optional> r_dremel_device_views, - std::optional> depth = std::nullopt, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt, - PhysicalElementComparator comparator = {}) noexcept + device_row_comparator(Nullate check_nulls, + table_device_view lhs, + table_device_view rhs, + device_span l_dremel_device_views, + device_span r_dremel_device_views, + std::optional> depth = std::nullopt, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt, + PhysicalElementComparator comparator = {}) noexcept : _lhs{lhs}, _rhs{rhs}, _l_dremel(l_dremel_device_views), @@ -296,15 +295,14 @@ class device_row_comparator { * preprocessed_table::depths * @param comparator Physical element relational comparison functor. */ - __device__ element_comparator( - Nullate check_nulls, - column_device_view lhs, - column_device_view rhs, - null_order null_precedence = null_order::BEFORE, - int depth = 0, - PhysicalElementComparator comparator = {}, - thrust::optional l_dremel_device_view = {}, - thrust::optional r_dremel_device_view = {}) + __device__ element_comparator(Nullate check_nulls, + column_device_view lhs, + column_device_view rhs, + null_order null_precedence = null_order::BEFORE, + int depth = 0, + PhysicalElementComparator comparator = {}, + detail::dremel_device_view l_dremel_device_view = {}, + detail::dremel_device_view r_dremel_device_view = {}) : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, @@ -392,11 +390,11 @@ class device_row_comparator { size_type rhs_element_index) { // These are all the values from the Dremel encoding. - auto const l_max_def_level = _l_dremel_device_view->max_def_level; - auto const l_def_levels = _l_dremel_device_view->def_levels; - auto const r_def_levels = _r_dremel_device_view->def_levels; - auto const l_rep_levels = _l_dremel_device_view->rep_levels; - auto const r_rep_levels = _r_dremel_device_view->rep_levels; + auto const l_max_def_level = _l_dremel_device_view.max_def_level; + auto const l_def_levels = _l_dremel_device_view.def_levels; + auto const r_def_levels = _r_dremel_device_view.def_levels; + auto const l_rep_levels = _l_dremel_device_view.rep_levels; + auto const r_rep_levels = _r_dremel_device_view.rep_levels; // Traverse the nested list hierarchy to get a column device view // pointing to the underlying child data. @@ -410,8 +408,8 @@ class device_row_comparator { // These start and end values indicate the start and end points of all // the elements of the lists in the current list element // (`[lhs|rhs]_element_index`) that we are comparing. - auto const l_offsets = _l_dremel_device_view->offsets; - auto const r_offsets = _r_dremel_device_view->offsets; + auto const l_offsets = _l_dremel_device_view.offsets; + auto const r_offsets = _r_dremel_device_view.offsets; auto l_start = l_offsets[lhs_element_index]; auto l_end = l_offsets[lhs_element_index + 1]; auto r_start = r_offsets[rhs_element_index]; @@ -480,8 +478,8 @@ class device_row_comparator { Nullate const _check_nulls; null_order const _null_precedence; int const _depth; - thrust::optional _l_dremel_device_view; - thrust::optional _r_dremel_device_view; + detail::dremel_device_view const _l_dremel_device_view; + detail::dremel_device_view const _r_dremel_device_view; PhysicalElementComparator const _comparator; }; @@ -509,24 +507,14 @@ class device_row_comparator { null_order const null_precedence = _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE; - auto [l_dremel_i, r_dremel_i] = [&]() { - if (_l_dremel.has_value() && _r_dremel.has_value()) { - return std::make_tuple( - thrust::optional{(*_l_dremel)[i]}, - thrust::optional{(*_r_dremel)[i]}); - } else { - return std::make_tuple(thrust::optional{}, - thrust::optional{}); - } - }(); auto element_comp = element_comparator{_check_nulls, _lhs.column(i), _rhs.column(i), null_precedence, depth, _comparator, - l_dremel_i, - r_dremel_i}; + _l_dremel[i], + _r_dremel[i]}; weak_ordering state; cuda::std::tie(state, last_null_depth) = @@ -544,8 +532,8 @@ class device_row_comparator { private: table_device_view const _lhs; table_device_view const _rhs; - std::optional const> _l_dremel; - std::optional const> _r_dremel; + device_span const _l_dremel; + device_span const _r_dremel; Nullate const _check_nulls; std::optional> const _depth; std::optional> const _column_order; @@ -747,11 +735,10 @@ struct preprocessed_table { return _depths.size() ? std::optional>(_depths) : std::nullopt; } - [[nodiscard]] std::optional> dremel_device_views() - const + [[nodiscard]] device_span dremel_device_views() const { if (_dremel_device_views.has_value()) { - return {device_span(*_dremel_device_views)}; + return device_span(*_dremel_device_views); } else { return {}; } From 4a735d3b68b49a0dae4aed88e93c892c5b8cfc4e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 2 Sep 2022 15:22:54 -0700 Subject: [PATCH 72/78] Stop storing empty dremel views for non-list columns and use a thrust::optional at the element_comparator level. --- cpp/include/cudf/lists/detail/dremel.hpp | 14 ++-- .../cudf/table/experimental/row_operators.cuh | 71 ++++++++++++++----- cpp/src/table/row_operators.cu | 5 -- 3 files changed, 57 insertions(+), 33 deletions(-) diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp index 4ddad4177be..4e3aeec2499 100644 --- a/cpp/include/cudf/lists/detail/dremel.hpp +++ b/cpp/include/cudf/lists/detail/dremel.hpp @@ -28,15 +28,11 @@ namespace cudf::detail { * @see the `dremel_data` struct for more info. */ struct dremel_device_view { - // TODO: These elements are default initializable to support default - // initialization of the object. This is currently exploited to create views - // that will never actually be used. We should consider whether this - // represents a serious issue that should be worked around more robustly. - size_type const* offsets{}; - uint8_t const* rep_levels{}; - uint8_t const* def_levels{}; - size_type const leaf_data_size{}; - uint8_t const max_def_level{}; + size_type const* offsets; + uint8_t const* rep_levels; + uint8_t const* def_levels; + size_type const leaf_data_size; + uint8_t const max_def_level; }; /** diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 96bccea9b2a..babb74f3c78 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -211,6 +211,23 @@ struct sorting_physical_element_comparator { } }; +// The has_nested_columns template parameter of the device_row_comparator is +// necessary to help the compiler optimize our code. Without it, the list and +// struct view specializations are present in the code paths used for primitive +// types, and the compiler fails to inline this nearly as well resulting in a +// significant performance drop. As a result, there is some minor tension in +// the current design between the presence of this parameter and the way that +// the Dremel data is passed around, first as a +// std::optional> in the +// preprocessed_table/device_row_comparator (which is always valid when +// has_nested_columns and is otherwise invalid) that is then unpacked to a +// thrust::optional at the element_comparator level (which +// is always valid for a list column and otherwise invalid). We cannot use an +// additional template parameter for the element_comparator on a per-column +// basis because we cannot conditionally define dremel_device_view member +// variables without jumping through extra hoops with inheritance, so the +// thrust::optional member must be an optional rather than +// a raw dremel_device_view. /** * @brief Computes the lexicographic comparison between 2 rows. * @@ -295,14 +312,15 @@ class device_row_comparator { * preprocessed_table::depths * @param comparator Physical element relational comparison functor. */ - __device__ element_comparator(Nullate check_nulls, - column_device_view lhs, - column_device_view rhs, - null_order null_precedence = null_order::BEFORE, - int depth = 0, - PhysicalElementComparator comparator = {}, - detail::dremel_device_view l_dremel_device_view = {}, - detail::dremel_device_view r_dremel_device_view = {}) + __device__ element_comparator( + Nullate check_nulls, + column_device_view lhs, + column_device_view rhs, + null_order null_precedence = null_order::BEFORE, + int depth = 0, + PhysicalElementComparator comparator = {}, + thrust::optional l_dremel_device_view = {}, + thrust::optional r_dremel_device_view = {}) : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, @@ -390,11 +408,11 @@ class device_row_comparator { size_type rhs_element_index) { // These are all the values from the Dremel encoding. - auto const l_max_def_level = _l_dremel_device_view.max_def_level; - auto const l_def_levels = _l_dremel_device_view.def_levels; - auto const r_def_levels = _r_dremel_device_view.def_levels; - auto const l_rep_levels = _l_dremel_device_view.rep_levels; - auto const r_rep_levels = _r_dremel_device_view.rep_levels; + auto const l_max_def_level = _l_dremel_device_view->max_def_level; + auto const l_def_levels = _l_dremel_device_view->def_levels; + auto const r_def_levels = _r_dremel_device_view->def_levels; + auto const l_rep_levels = _l_dremel_device_view->rep_levels; + auto const r_rep_levels = _r_dremel_device_view->rep_levels; // Traverse the nested list hierarchy to get a column device view // pointing to the underlying child data. @@ -408,8 +426,8 @@ class device_row_comparator { // These start and end values indicate the start and end points of all // the elements of the lists in the current list element // (`[lhs|rhs]_element_index`) that we are comparing. - auto const l_offsets = _l_dremel_device_view.offsets; - auto const r_offsets = _r_dremel_device_view.offsets; + auto const l_offsets = _l_dremel_device_view->offsets; + auto const r_offsets = _r_dremel_device_view->offsets; auto l_start = l_offsets[lhs_element_index]; auto l_end = l_offsets[lhs_element_index + 1]; auto r_start = r_offsets[rhs_element_index]; @@ -478,8 +496,8 @@ class device_row_comparator { Nullate const _check_nulls; null_order const _null_precedence; int const _depth; - detail::dremel_device_view const _l_dremel_device_view; - detail::dremel_device_view const _r_dremel_device_view; + thrust::optional _l_dremel_device_view; + thrust::optional _r_dremel_device_view; PhysicalElementComparator const _comparator; }; @@ -497,6 +515,7 @@ class device_row_comparator { size_type const rhs_index) const noexcept { int last_null_depth = std::numeric_limits::max(); + size_type list_column_index{0}; for (size_type i = 0; i < _lhs.num_columns(); ++i) { int const depth = _depth.has_value() ? (*_depth)[i] : 0; if (depth > last_null_depth) { continue; } @@ -507,14 +526,28 @@ class device_row_comparator { null_order const null_precedence = _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE; + // TODO: At what point do we verify that the columns of lhs and rhs are + // all of the same types? I assume that it's already happened before + // here, otherwise the current code would be failing. + auto [l_dremel_i, r_dremel_i] = [&]() { + if (_lhs.column(i).type().id() == type_id::LIST) { + auto idx = list_column_index++; + return std::make_tuple( + thrust::optional(_l_dremel[idx]), + thrust::optional(_r_dremel[idx])); + } else { + return std::make_tuple(thrust::optional{}, + thrust::optional{}); + } + }(); auto element_comp = element_comparator{_check_nulls, _lhs.column(i), _rhs.column(i), null_precedence, depth, _comparator, - _l_dremel[i], - _r_dremel[i]}; + l_dremel_i, + r_dremel_i}; weak_ordering state; cuda::std::tie(state, last_null_depth) = diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 23a254a1e60..2881472c230 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -267,11 +267,6 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) if (col.type().id() == type_id::LIST) { dremel_data.push_back(detail::get_dremel_data(col, {}, false, stream)); dremel_device_views.push_back(dremel_data.back()); - } else { - // TODO: Note that this constructs a device view that is in an invalid - // state, i.e. dereferencing any of its pointer members will lead to a - // seg fault. We may instead wish to create a vector of optionals. - dremel_device_views.emplace_back(); } } auto d_dremel_device_views = detail::make_device_uvector_async(dremel_device_views, stream); From ab5a264078d42f452f6d25d85eaeb808b1ba07a3 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 2 Sep 2022 15:27:47 -0700 Subject: [PATCH 73/78] Some cleanup. --- .../cudf/table/experimental/row_operators.cuh | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index babb74f3c78..d74e6ebeeac 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -211,6 +211,8 @@ struct sorting_physical_element_comparator { } }; +using optional_dremel_view = thrust::optional; + // The has_nested_columns template parameter of the device_row_comparator is // necessary to help the compiler optimize our code. Without it, the list and // struct view specializations are present in the code paths used for primitive @@ -312,15 +314,14 @@ class device_row_comparator { * preprocessed_table::depths * @param comparator Physical element relational comparison functor. */ - __device__ element_comparator( - Nullate check_nulls, - column_device_view lhs, - column_device_view rhs, - null_order null_precedence = null_order::BEFORE, - int depth = 0, - PhysicalElementComparator comparator = {}, - thrust::optional l_dremel_device_view = {}, - thrust::optional r_dremel_device_view = {}) + __device__ element_comparator(Nullate check_nulls, + column_device_view lhs, + column_device_view rhs, + null_order null_precedence = null_order::BEFORE, + int depth = 0, + PhysicalElementComparator comparator = {}, + optional_dremel_view l_dremel_device_view = {}, + optional_dremel_view r_dremel_device_view = {}) : _lhs{lhs}, _rhs{rhs}, _check_nulls{check_nulls}, @@ -496,8 +497,8 @@ class device_row_comparator { Nullate const _check_nulls; null_order const _null_precedence; int const _depth; - thrust::optional _l_dremel_device_view; - thrust::optional _r_dremel_device_view; + optional_dremel_view _l_dremel_device_view; + optional_dremel_view _r_dremel_device_view; PhysicalElementComparator const _comparator; }; @@ -532,12 +533,10 @@ class device_row_comparator { auto [l_dremel_i, r_dremel_i] = [&]() { if (_lhs.column(i).type().id() == type_id::LIST) { auto idx = list_column_index++; - return std::make_tuple( - thrust::optional(_l_dremel[idx]), - thrust::optional(_r_dremel[idx])); + return std::make_tuple(optional_dremel_view(_l_dremel[idx]), + optional_dremel_view(_r_dremel[idx])); } else { - return std::make_tuple(thrust::optional{}, - thrust::optional{}); + return std::make_tuple(optional_dremel_view{}, optional_dremel_view{}); } }(); auto element_comp = element_comparator{_check_nulls, From d58ad8064d2c2812777f1dbb0ba9299dd5c9073d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 2 Sep 2022 15:45:06 -0700 Subject: [PATCH 74/78] Remove unnecessary check. --- cpp/src/table/row_operators.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 2881472c230..7246abe674a 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -291,8 +291,6 @@ void check_lex_compatibility(table_view const& input) check_column(list_col.child()); } else if (c.type().id() == type_id::STRUCT) { for (auto child = c.child_begin(); child < c.child_end(); ++child) { - CUDF_EXPECTS(child->type().id() != type_id::LIST, - "Cannot lexicographic compare a table with a STRUCT of LIST column"); check_column(*child); } } From 1f6b7ab43e1a08d7a9a637af58604adfbe39829d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 7 Sep 2022 16:22:36 -0700 Subject: [PATCH 75/78] Address PR comments. --- .../cudf/table/experimental/row_operators.cuh | 25 ++++++++++--------- cpp/include/cudf/table/table_view.hpp | 16 ++++++------ .../binaryop/compiled/struct_binary_ops.cuh | 2 +- cpp/src/search/search_ordered.cu | 3 +-- cpp/src/sort/sort_impl.cuh | 2 +- cpp/src/table/row_operators.cu | 24 ++++++++++-------- cpp/src/table/table_view.cpp | 15 +++++------ .../table/experimental_row_operator_tests.cu | 4 +-- python/cudf/CMakeLists.txt | 2 +- 9 files changed, 46 insertions(+), 47 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index d74e6ebeeac..f3acea7114d 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -443,20 +443,20 @@ class device_row_comparator { // in the child column. The element_index is used to keep track of the current // child element that we're actually comparing. weak_ordering state{weak_ordering::EQUIVALENT}; - for (int left_dremel_index = l_start, right_dremel_index = r_start, element_index = 0; - left_dremel_index < l_end and right_dremel_index < r_end; - ++left_dremel_index, ++right_dremel_index) { + for (int l_dremel_index = l_start, r_dremel_index = r_start, element_index = 0; + l_dremel_index < l_end and r_dremel_index < r_end; + ++l_dremel_index, ++r_dremel_index) { // First early exit: the definition levels do not match. - if (l_def_levels[left_dremel_index] != r_def_levels[right_dremel_index]) { - state = (l_def_levels[left_dremel_index] < r_def_levels[right_dremel_index]) + if (l_def_levels[l_dremel_index] != r_def_levels[r_dremel_index]) { + state = (l_def_levels[l_dremel_index] < r_def_levels[r_dremel_index]) ? weak_ordering::LESS : weak_ordering::GREATER; return cuda::std::pair(state, _depth); } // Second early exit: the repetition levels do not match. - if (l_rep_levels[left_dremel_index] != r_rep_levels[right_dremel_index]) { - state = (l_rep_levels[left_dremel_index] < r_rep_levels[right_dremel_index]) + if (l_rep_levels[l_dremel_index] != r_rep_levels[r_dremel_index]) { + state = (l_rep_levels[l_dremel_index] < r_rep_levels[r_dremel_index]) ? weak_ordering::LESS : weak_ordering::GREATER; return cuda::std::pair(state, _depth); @@ -467,16 +467,17 @@ class device_row_comparator { // an underlying element to compare, not just an empty list or a // null. Therefore, we access the element_index element of each list // and compare the values. - // 2) If we are 1 - the maximum definition level and the column is - // nullable, the current element must be a null in the leaf data. - // In this case we ignore the null and skip to the next element. - if (l_def_levels[left_dremel_index] == l_max_def_level) { + // 2) If we are one level below the maximum definition level and the + // column is nullable, the current element must be a null in the + // leaf data. In this case we ignore the null and skip to the next + // element. + if (l_def_levels[l_dremel_index] == l_max_def_level) { int last_null_depth = _depth; cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher( lcol.type(), comparator, element_index, element_index); if (state != weak_ordering::EQUIVALENT) { return cuda::std::pair(state, _depth); } ++element_index; - } else if (lcol.nullable() and l_def_levels[left_dremel_index] == l_max_def_level - 1) { + } else if (lcol.nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) { ++element_index; } } diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index e3168efd3ee..fc1c93c9a46 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -166,6 +166,14 @@ class table_view_base { */ table_view_base& operator=(table_view_base&&) = default; }; + +/** + * @brief The function to collect all nested columns in a given table. + * + * @param table The input table + * @return A vector containing all nested columns in the input table + */ +bool has_nested_columns(table_view const& table); } // namespace detail /** @@ -336,14 +344,6 @@ inline bool has_nested_nulls(table_view const& input) */ std::vector get_nullable_columns(table_view const& table); -/** - * @brief The function to collect all nested columns in a given table. - * - * @param table The input table - * @return A vector containing all nested columns in the input table - */ -std::vector get_nested_columns(table_view const& table); - /** * @brief Checks if two `table_view`s have columns of same types * diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index d9f5f225e1e..def9ebcef97 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -93,7 +93,7 @@ void apply_struct_binary_op(mutable_column_view& out, out.end(), device_comparison_functor{optional_iter, is_lhs_scalar, is_rhs_scalar, device_comparator}); }; - if (cudf::get_nested_columns(tlhs).size() > 0 || cudf::get_nested_columns(trhs).size() > 0) { + if (cudf::detail::has_nested_columns(tlhs) || cudf::detail::has_nested_columns(trhs)) { is_any_v ? tabulate_device_operator( table_comparator.less_equivalent(comparator_nulls, comparator)) diff --git a/cpp/src/search/search_ordered.cu b/cpp/src/search/search_ordered.cu index dbda063ca84..e661ca5beb3 100644 --- a/cpp/src/search/search_ordered.cu +++ b/cpp/src/search/search_ordered.cu @@ -73,8 +73,7 @@ std::unique_ptr search_ordered(table_view const& haystack, auto const haystack_it = cudf::experimental::row::lhs_iterator(0); auto const needles_it = cudf::experimental::row::rhs_iterator(0); - if (cudf::get_nested_columns(haystack).size() > 0 || - cudf::get_nested_columns(needles).size() > 0) { + if (cudf::detail::has_nested_columns(haystack) || cudf::get_nested_columns(needles).size() > 0) { auto const d_comparator = comparator.less(nullate::DYNAMIC{has_nulls}); if (find_first) { thrust::lower_bound(rmm::exec_policy(stream), diff --git a/cpp/src/sort/sort_impl.cuh b/cpp/src/sort/sort_impl.cuh index 6fd8ec4c4cc..97fc8ac14cb 100644 --- a/cpp/src/sort/sort_impl.cuh +++ b/cpp/src/sort/sort_impl.cuh @@ -127,7 +127,7 @@ std::unique_ptr sorted_order(table_view input, auto comp = experimental::row::lexicographic::self_comparator(input, column_order, null_precedence, stream); - if (cudf::get_nested_columns(input).size() > 0) { + if (cudf::detail::has_nested_columns(input)) { auto comparator = comp.less(nullate::DYNAMIC{has_nested_nulls(input)}); if (stable) { thrust::stable_sort(rmm::exec_policy(stream), diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 7246abe674a..26b270533a4 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -269,7 +269,7 @@ auto list_lex_preprocess(table_view table, rmm::cuda_stream_view stream) dremel_device_views.push_back(dremel_data.back()); } } - auto d_dremel_device_views = detail::make_device_uvector_async(dremel_device_views, stream); + auto d_dremel_device_views = detail::make_device_uvector_sync(dremel_device_views, stream); return std::make_tuple(std::move(dremel_data), std::move(d_dremel_device_views)); } @@ -359,16 +359,18 @@ std::shared_ptr preprocessed_table::create( auto d_null_precedence = detail::make_device_uvector_async(new_null_precedence, stream); auto d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); - if (cudf::get_nested_columns(t).size() > 0) { - auto [dremel_data, d_dremel_device_view] = list_lex_preprocess(verticalized_lhs, stream); - return std::shared_ptr( - new preprocessed_table(std::move(d_t), - std::move(d_column_order), - std::move(d_null_precedence), - std::move(d_depths), - std::move(dremel_data), - std::move(d_dremel_device_view))); - } else { + if (detail::has_nested_columns(t)) > 0) + { + auto [dremel_data, d_dremel_device_view] = list_lex_preprocess(verticalized_lhs, stream); + return std::shared_ptr( + new preprocessed_table(std::move(d_t), + std::move(d_column_order), + std::move(d_null_precedence), + std::move(d_depths), + std::move(dremel_data), + std::move(d_dremel_device_view))); + } + else { return std::shared_ptr(new preprocessed_table(std::move(d_t), std::move(d_column_order), std::move(d_null_precedence), diff --git a/cpp/src/table/table_view.cpp b/cpp/src/table/table_view.cpp index c10de3a300f..0d1cabfd4f6 100644 --- a/cpp/src/table/table_view.cpp +++ b/cpp/src/table/table_view.cpp @@ -114,15 +114,6 @@ std::vector get_nullable_columns(table_view const& table) return result; } -std::vector get_nested_columns(table_view const& table) -{ - std::vector result; - for (auto const& col : table) { - if (is_nested(col.type())) { result.push_back(col); } - } - return result; -} - namespace detail { template @@ -143,5 +134,11 @@ template bool is_relationally_comparable(table_view const& lhs, tabl template bool is_relationally_comparable(mutable_table_view const& lhs, mutable_table_view const& rhs); +bool has_nested_columns(table_view const& table) +{ + return std::any_of( + table.begin(), table.end(), [](column_view const& col) { return is_nested(col.type()); }); +} + } // namespace detail } // namespace cudf diff --git a/cpp/tests/table/experimental_row_operator_tests.cu b/cpp/tests/table/experimental_row_operator_tests.cu index c70eed152df..0566f55e46d 100644 --- a/cpp/tests/table/experimental_row_operator_tests.cu +++ b/cpp/tests/table/experimental_row_operator_tests.cu @@ -58,7 +58,7 @@ auto self_comparison(cudf::table_view input, auto output = cudf::make_numeric_column( cudf::data_type(cudf::type_id::BOOL8), input.num_rows(), cudf::mask_state::UNALLOCATED); - if (cudf::get_nested_columns(input).size() > 0) { + if (cudf::detail::has_nested_columns(input)) { thrust::transform(rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(input.num_rows()), @@ -92,7 +92,7 @@ auto two_table_comparison(cudf::table_view lhs, auto output = cudf::make_numeric_column( cudf::data_type(cudf::type_id::BOOL8), lhs.num_rows(), cudf::mask_state::UNALLOCATED); - if (cudf::get_nested_columns(lhs).size() > 0 || cudf::get_nested_columns(rhs).size() > 0) { + if (cudf::detail::has_nested_columns(lhs) || cudf::detail::has_nested_columns(rhs)) { thrust::transform(rmm::exec_policy(stream), lhs_it, lhs_it + lhs.num_rows(), diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 72e1779401f..91c29661ea6 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -25,7 +25,7 @@ project( # language to be enabled here. The test project that is built in scikit-build to verify # various linking options for the python library is hardcoded to build with C, so until # that is fixed we need to keep C. - C CXX + C CXX CUDA ) option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" From a1a965533c0668845c3bc8c55db4aec88dd4e977 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 7 Sep 2022 16:22:48 -0700 Subject: [PATCH 76/78] Revert "Remove unnecessary check." This reverts commit d58ad8064d2c2812777f1dbb0ba9299dd5c9073d. --- cpp/src/table/row_operators.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 26b270533a4..84913fec1d9 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -291,6 +291,8 @@ void check_lex_compatibility(table_view const& input) check_column(list_col.child()); } else if (c.type().id() == type_id::STRUCT) { for (auto child = c.child_begin(); child < c.child_end(); ++child) { + CUDF_EXPECTS(child->type().id() != type_id::LIST, + "Cannot lexicographic compare a table with a STRUCT of LIST column"); check_column(*child); } } From f5cee47d2601976063447edeb7a4763be3aa092b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 8 Sep 2022 10:14:21 -0700 Subject: [PATCH 77/78] Address remaining TODOs. --- cpp/include/cudf/table/table_view.hpp | 4 ++-- cpp/src/table/row_operators.cu | 22 ++++++++++------------ python/cudf/CMakeLists.txt | 2 +- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index fc1c93c9a46..8b520714b34 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -168,10 +168,10 @@ class table_view_base { }; /** - * @brief The function to collect all nested columns in a given table. + * @brief Determine if any nested columns exist in a given table. * * @param table The input table - * @return A vector containing all nested columns in the input table + * @return Whether nested columns exist in the input table */ bool has_nested_columns(table_view const& table); } // namespace detail diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 84913fec1d9..05e8860d63d 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -361,18 +361,16 @@ std::shared_ptr preprocessed_table::create( auto d_null_precedence = detail::make_device_uvector_async(new_null_precedence, stream); auto d_depths = detail::make_device_uvector_async(verticalized_col_depths, stream); - if (detail::has_nested_columns(t)) > 0) - { - auto [dremel_data, d_dremel_device_view] = list_lex_preprocess(verticalized_lhs, stream); - return std::shared_ptr( - new preprocessed_table(std::move(d_t), - std::move(d_column_order), - std::move(d_null_precedence), - std::move(d_depths), - std::move(dremel_data), - std::move(d_dremel_device_view))); - } - else { + if (detail::has_nested_columns(t)) { + auto [dremel_data, d_dremel_device_view] = list_lex_preprocess(verticalized_lhs, stream); + return std::shared_ptr( + new preprocessed_table(std::move(d_t), + std::move(d_column_order), + std::move(d_null_precedence), + std::move(d_depths), + std::move(dremel_data), + std::move(d_dremel_device_view))); + } else { return std::shared_ptr(new preprocessed_table(std::move(d_t), std::move(d_column_order), std::move(d_null_precedence), diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 91c29661ea6..72e1779401f 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -25,7 +25,7 @@ project( # language to be enabled here. The test project that is built in scikit-build to verify # various linking options for the python library is hardcoded to build with C, so until # that is fixed we need to keep C. - C CXX CUDA + C CXX ) option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" From f7b671ab1634f3286841d47744cac908c98f0a23 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 8 Sep 2022 14:09:03 -0700 Subject: [PATCH 78/78] Fix typo. --- cpp/src/search/search_ordered.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/search/search_ordered.cu b/cpp/src/search/search_ordered.cu index e661ca5beb3..8d3b0f97726 100644 --- a/cpp/src/search/search_ordered.cu +++ b/cpp/src/search/search_ordered.cu @@ -73,7 +73,7 @@ std::unique_ptr search_ordered(table_view const& haystack, auto const haystack_it = cudf::experimental::row::lhs_iterator(0); auto const needles_it = cudf::experimental::row::rhs_iterator(0); - if (cudf::detail::has_nested_columns(haystack) || cudf::get_nested_columns(needles).size() > 0) { + if (cudf::detail::has_nested_columns(haystack) || cudf::detail::has_nested_columns(needles)) { auto const d_comparator = comparator.less(nullate::DYNAMIC{has_nulls}); if (find_first) { thrust::lower_bound(rmm::exec_policy(stream),