From 4c272df8f2ee753f26e088bb4d95fff28d731bbf Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 23 Jun 2021 10:31:28 -0600 Subject: [PATCH 01/19] Add aggregation definitions --- cpp/include/cudf/aggregation.hpp | 70 ++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 5fab284d506..995eae7a4a6 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -78,12 +78,14 @@ class aggregation { ROW_NUMBER, ///< get row-number of current index (relative to rolling window) COLLECT_LIST, ///< collect values into a list COLLECT_SET, ///< collect values into a list without duplicate entries - MERGE_LISTS, ///< merge multiple lists values into one list - MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries LEAD, ///< window function, accesses row at specified offset following current row LAG, ///< window function, accesses row at specified offset preceding current row PTX, ///< PTX UDF based reduction - CUDA ///< CUDA UDF based reduction + CUDA, ///< CUDA UDF based reduction + MERGE_LISTS, ///< merge multiple lists values into one list + MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries + MERGE_VARIANCE, ///< merge partial variance values + MERGE_STD ///< merge partial standard deviation values }; aggregation() = delete; @@ -271,6 +273,28 @@ std::unique_ptr make_collect_set_aggregation(null_policy null_handling = n null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::UNEQUAL); +/// Factory to create a LAG aggregation +template +std::unique_ptr make_lag_aggregation(size_type offset); + +/// Factory to create a LEAD aggregation +template +std::unique_ptr make_lead_aggregation(size_type offset); + +/** + * @brief Factory to create an aggregation base on UDF for PTX or CUDA + * + * @param[in] type: either udf_type::PTX or udf_type::CUDA + * @param[in] user_defined_aggregator A string containing the aggregator code + * @param[in] output_type expected output type + * + * @return aggregation unique pointer housing user_defined_aggregator string. + */ +template +std::unique_ptr make_udf_aggregation(udf_type type, + std::string const& user_defined_aggregator, + data_type output_type); + /** * @brief Factory to create a MERGE_LISTS aggregation. * @@ -308,27 +332,39 @@ template std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::UNEQUAL); -/// Factory to create a LAG aggregation -template -std::unique_ptr make_lag_aggregation(size_type offset); - -/// Factory to create a LEAD aggregation +/** + * @brief Factory to create a MERGE_VARIANCE aggregation + * + * This aggregation is designed specificly to perform distributed computing of `VARIANCE` + * aggregation. The partial results input to this aggregation is generated by two groupby + * aggregations: `VARIANCE` and `COUNT_VALID`. + * + * In order to use this aggregation, the `aggregation_request` array input to `groupby::aggregate` + * must contain at least two requests: + * - A `COLLECT_LIST` request for collecting the partial results of `COUNT_VALID`, and + * - This `MERGE_VARIANCE` request, which must be given AFTER the `COLLECT_LIST` request above + * + * Since the partial results output from `VARIANCE` and `COUNT_VALID` do not contain nulls, the + * input values to those two aggregations must be non-nullable. + * + * @param ddof Delta degrees of freedom. The divisor used in calculation of `variance` is + * `N - ddof`, where `N` is the population size. + */ template -std::unique_ptr make_lead_aggregation(size_type offset); +std::unique_ptr make_merge_variance_aggregation(size_type ddof = 1); /** - * @brief Factory to create an aggregation base on UDF for PTX or CUDA + * @brief Factory to create a MERGE_STD aggregation * - * @param[in] type: either udf_type::PTX or udf_type::CUDA - * @param[in] user_defined_aggregator A string containing the aggregator code - * @param[in] output_type expected output type + * This aggregation is designed specificly to perform distributed computing of `STD` + * aggregation. The partial results input to this aggregation and its usage are the same as of + * `MERGE_VARIANCE` aggregation. * - * @return aggregation unique pointer housing user_defined_aggregator string. + * @param ddof Delta degrees of freedom. The divisor used in calculation of `variance` is + * `N - ddof`, where `N` is the population size. */ template -std::unique_ptr make_udf_aggregation(udf_type type, - std::string const& user_defined_aggregator, - data_type output_type); +std::unique_ptr make_merge_std_aggregation(size_type ddof = 1); /** @} */ // end of group } // namespace cudf From d446076ebe63cbbeec7d82ecfa6ce32a568b309c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 24 Jun 2021 15:26:00 -0600 Subject: [PATCH 02/19] Adding new aggregations: M2, MERGE_VARIANCES, MERGE_STDS --- cpp/CMakeLists.txt | 1 + cpp/include/cudf/aggregation.hpp | 90 ++++--- .../cudf/detail/aggregation/aggregation.hpp | 249 ++++++++++++------ cpp/src/aggregation/aggregation.cpp | 147 +++++++---- cpp/src/groupby/sort/aggregate.cpp | 122 ++++++++- cpp/src/groupby/sort/group_merge_variances.cu | 74 ++++++ cpp/src/groupby/sort/group_reductions.hpp | 65 +++++ cpp/src/groupby/sort/group_std.cu | 88 ++++++- 8 files changed, 657 insertions(+), 179 deletions(-) create mode 100644 cpp/src/groupby/sort/group_merge_variances.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 678f202d106..05c3028067e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -199,6 +199,7 @@ add_library(cudf src/groupby/sort/aggregate.cpp src/groupby/sort/group_collect.cu src/groupby/sort/group_merge_lists.cu + src/groupby/sort/group_merge_variances.cu src/groupby/sort/group_count.cu src/groupby/sort/group_max.cu src/groupby/sort/group_min.cu diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 995eae7a4a6..5913d39e020 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -57,35 +57,36 @@ class aggregation { * @brief Possible aggregation operations */ enum Kind { - SUM, ///< sum reduction - PRODUCT, ///< product reduction - MIN, ///< min reduction - MAX, ///< max reduction - COUNT_VALID, ///< count number of valid elements - COUNT_ALL, ///< count number of elements - ANY, ///< any reduction - ALL, ///< all reduction - SUM_OF_SQUARES, ///< sum of squares reduction - MEAN, ///< arithmetic mean reduction - VARIANCE, ///< groupwise variance - STD, ///< groupwise standard deviation - MEDIAN, ///< median reduction - QUANTILE, ///< compute specified quantile(s) - ARGMAX, ///< Index of max element - ARGMIN, ///< Index of min element - NUNIQUE, ///< count number of unique elements - NTH_ELEMENT, ///< get the nth element - ROW_NUMBER, ///< get row-number of current index (relative to rolling window) - COLLECT_LIST, ///< collect values into a list - COLLECT_SET, ///< collect values into a list without duplicate entries - LEAD, ///< window function, accesses row at specified offset following current row - LAG, ///< window function, accesses row at specified offset preceding current row - PTX, ///< PTX UDF based reduction - CUDA, ///< CUDA UDF based reduction - MERGE_LISTS, ///< merge multiple lists values into one list - MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries - MERGE_VARIANCE, ///< merge partial variance values - MERGE_STD ///< merge partial standard deviation values + SUM, ///< sum reduction + PRODUCT, ///< product reduction + MIN, ///< min reduction + MAX, ///< max reduction + COUNT_VALID, ///< count number of valid elements + COUNT_ALL, ///< count number of elements + ANY, ///< any reduction + ALL, ///< all reduction + SUM_OF_SQUARES, ///< sum of squares reduction + MEAN, ///< arithmetic mean reduction + M2, ///< groupwise sum of squares of differences from the current mean + VARIANCE, ///< groupwise variance + STD, ///< groupwise standard deviation + MEDIAN, ///< median reduction + QUANTILE, ///< compute specified quantile(s) + ARGMAX, ///< Index of max element + ARGMIN, ///< Index of min element + NUNIQUE, ///< count number of unique elements + NTH_ELEMENT, ///< get the nth element + ROW_NUMBER, ///< get row-number of current index (relative to rolling window) + COLLECT_LIST, ///< collect values into a list + COLLECT_SET, ///< collect values into a list without duplicate entries + LEAD, ///< window function, accesses row at specified offset following current row + LAG, ///< window function, accesses row at specified offset preceding current row + PTX, ///< PTX UDF based reduction + CUDA, ///< CUDA UDF based reduction + MERGE_LISTS, ///< merge multiple lists values into one list + MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries + MERGE_VARIANCES, ///< merge partial variance values + MERGE_STDS ///< merge partial standard deviation values }; aggregation() = delete; @@ -161,6 +162,16 @@ std::unique_ptr make_sum_of_squares_aggregation(); template std::unique_ptr make_mean_aggregation(); +/** + * @brief Factory to create a M2 aggregation + * + * A M2 aggregation is groupwise sum of squares of differences from the current mean. From this, + * a `VARIANCE` aggregation can be computed as `M2 / (N - ddof)`, where `N` is the population size + * and `ddof` is the delta degrees of freedom. + */ +template +std::unique_ptr make_m2_aggregation(); + /** * @brief Factory to create a VARIANCE aggregation * @@ -333,7 +344,7 @@ std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal = nu nan_equality nans_equal = nan_equality::UNEQUAL); /** - * @brief Factory to create a MERGE_VARIANCE aggregation + * @brief Factory to create a MERGE_VARIANCES aggregation * * This aggregation is designed specificly to perform distributed computing of `VARIANCE` * aggregation. The partial results input to this aggregation is generated by two groupby @@ -341,30 +352,35 @@ std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal = nu * * In order to use this aggregation, the `aggregation_request` array input to `groupby::aggregate` * must contain at least two requests: - * - A `COLLECT_LIST` request for collecting the partial results of `COUNT_VALID`, and - * - This `MERGE_VARIANCE` request, which must be given AFTER the `COLLECT_LIST` request above + * - A request for `COLLECT_LIST` aggregation to collect the partial results of `COUNT_VALID` + * - This `MERGE_VARIANCES` request, which must be given AFTER the request above so that it can + * access the cached results generated by that request + * + * For a merging operation that is not a final merge (i.e., its outputs will be used as input to + * perform another `MERGE_VARIANCES` aggregation), a `SUM` aggregation must also be added to the + * same request for `COLLECT_LIST` above to produce the merged values for `COUNT_VALID`. * * Since the partial results output from `VARIANCE` and `COUNT_VALID` do not contain nulls, the - * input values to those two aggregations must be non-nullable. + * input values columns to those two requests must be non-nullable. * * @param ddof Delta degrees of freedom. The divisor used in calculation of `variance` is * `N - ddof`, where `N` is the population size. */ template -std::unique_ptr make_merge_variance_aggregation(size_type ddof = 1); +std::unique_ptr make_merge_variances_aggregation(size_type ddof = 1); /** - * @brief Factory to create a MERGE_STD aggregation + * @brief Factory to create a MERGE_STDS aggregation * * This aggregation is designed specificly to perform distributed computing of `STD` * aggregation. The partial results input to this aggregation and its usage are the same as of - * `MERGE_VARIANCE` aggregation. + * `MERGE_VARIANCES` aggregation. * * @param ddof Delta degrees of freedom. The divisor used in calculation of `variance` is * `N - ddof`, where `N` is the population size. */ template -std::unique_ptr make_merge_std_aggregation(size_type ddof = 1); +std::unique_ptr make_merge_stds_aggregation(size_type ddof = 1); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 373d695a5b5..ab2ef947cba 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -53,6 +53,8 @@ class simple_aggregations_collector { // Declares the interface for the simple data_type col_type, class sum_of_squares_aggregation const& agg); virtual std::vector> visit(data_type col_type, class mean_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class m2_aggregation const& agg); virtual std::vector> visit(data_type col_type, class var_aggregation const& agg); virtual std::vector> visit(data_type col_type, @@ -75,14 +77,18 @@ class simple_aggregations_collector { // Declares the interface for the simple data_type col_type, class collect_list_aggregation const& agg); virtual std::vector> visit(data_type col_type, class collect_set_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class lead_lag_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class udf_aggregation const& agg); virtual std::vector> visit(data_type col_type, class merge_lists_aggregation const& agg); virtual std::vector> visit(data_type col_type, class merge_sets_aggregation const& agg); + virtual std::vector> visit( + data_type col_type, class merge_variances_aggregation const& agg); virtual std::vector> visit(data_type col_type, - class lead_lag_aggregation const& agg); - virtual std::vector> visit(data_type col_type, - class udf_aggregation const& agg); + class merge_stds_aggregation const& agg); }; class aggregation_finalizer { // Declares the interface for the finalizer @@ -98,6 +104,7 @@ class aggregation_finalizer { // Declares the interface for the finalizer virtual void visit(class all_aggregation const& agg); virtual void visit(class sum_of_squares_aggregation const& agg); virtual void visit(class mean_aggregation const& agg); + virtual void visit(class m2_aggregation const& agg); virtual void visit(class var_aggregation const& agg); virtual void visit(class std_aggregation const& agg); virtual void visit(class median_aggregation const& agg); @@ -109,10 +116,12 @@ class aggregation_finalizer { // Declares the interface for the finalizer virtual void visit(class row_number_aggregation const& agg); virtual void visit(class collect_list_aggregation const& agg); virtual void visit(class collect_set_aggregation const& agg); - virtual void visit(class merge_lists_aggregation const& agg); - virtual void visit(class merge_sets_aggregation const& agg); virtual void visit(class lead_lag_aggregation const& agg); virtual void visit(class udf_aggregation const& agg); + virtual void visit(class merge_lists_aggregation const& agg); + virtual void visit(class merge_sets_aggregation const& agg); + virtual void visit(class merge_variances_aggregation const& agg); + virtual void visit(class merge_stds_aggregation const& agg); }; /** @@ -286,6 +295,25 @@ class mean_aggregation final : public rolling_aggregation { void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; +/** + * @brief Derived class for specifying a m2 aggregation + */ +class m2_aggregation : public aggregation { + public: + m2_aggregation() : aggregation{M2} {} + + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + /** * @brief Derived class for specifying a standard deviation/variance aggregation */ @@ -305,8 +333,9 @@ class std_var_aggregation : public aggregation { protected: std_var_aggregation(aggregation::Kind k, size_type ddof) : aggregation(k), _ddof{ddof} { - CUDF_EXPECTS(k == aggregation::STD or k == aggregation::VARIANCE, - "std_var_aggregation can accept only STD, VARIANCE"); + CUDF_EXPECTS(k == aggregation::STD or k == aggregation::VARIANCE or + k == aggregation::MERGE_STDS or k == aggregation::MERGE_VARIANCES, + "std_var_aggregation can accept only STD, VARIANCE, MERGE_STDS, MERGE_VARIANCES"); } size_type hash_impl() const { return std::hash{}(_ddof); } @@ -634,7 +663,97 @@ class collect_set_aggregation final : public rolling_aggregation { }; /** - * @brief Derived aggregation class for specifying MERGE_LISTs aggregation + * @brief Derived aggregation class for specifying LEAD/LAG window aggregations + */ +class lead_lag_aggregation final : public rolling_aggregation { + public: + lead_lag_aggregation(Kind kind, size_type offset) + : aggregation{offset < 0 ? (kind == LAG ? LEAD : LAG) : kind}, row_offset{std::abs(offset)} + { + } + + bool is_equal(aggregation const& _other) const override + { + if (!this->aggregation::is_equal(_other)) { return false; } + auto const& other = dynamic_cast(_other); + return (row_offset == other.row_offset); + } + + size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } + + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } + + size_type row_offset; + + private: + size_t hash_impl() const { return std::hash()(row_offset); } +}; + +/** + * @brief Derived class for specifying a custom aggregation + * specified in udf + */ +class udf_aggregation final : public rolling_aggregation { + public: + udf_aggregation(aggregation::Kind type, + std::string const& user_defined_aggregator, + data_type output_type) + : aggregation{type}, + _source{user_defined_aggregator}, + _operator_name{(type == aggregation::PTX) ? "rolling_udf_ptx" : "rolling_udf_cuda"}, + _function_name{"rolling_udf"}, + _output_type{output_type} + { + CUDF_EXPECTS(type == aggregation::PTX or type == aggregation::CUDA, + "udf_aggregation can accept only PTX, CUDA"); + } + + bool is_equal(aggregation const& _other) const override + { + if (!this->aggregation::is_equal(_other)) { return false; } + auto const& other = dynamic_cast(_other); + return (_source == other._source and _operator_name == other._operator_name and + _function_name == other._function_name and _output_type == other._output_type); + } + + size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } + + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } + + std::string const _source; + std::string const _operator_name; + std::string const _function_name; + data_type _output_type; + + protected: + size_t hash_impl() const + { + return std::hash{}(_source) ^ std::hash{}(_operator_name) ^ + std::hash{}(_function_name) ^ + std::hash{}(static_cast(_output_type.id())); + } +}; + +/** + * @brief Derived aggregation class for specifying MERGE_LISTS aggregation */ class merge_lists_aggregation final : public aggregation { public: @@ -653,7 +772,7 @@ class merge_lists_aggregation final : public aggregation { }; /** - * @brief Derived aggregation class for specifying MERGE_SETs aggregation + * @brief Derived aggregation class for specifying MERGE_SETS aggregation */ class merge_sets_aggregation final : public aggregation { public: @@ -694,27 +813,18 @@ class merge_sets_aggregation final : public aggregation { }; /** - * @brief Derived aggregation class for specifying LEAD/LAG window aggregations + * @brief Derived aggregation class for specifying MERGE_VARIANCES aggregation */ -class lead_lag_aggregation final : public rolling_aggregation { +class merge_variances_aggregation final : public std_var_aggregation { public: - lead_lag_aggregation(Kind kind, size_type offset) - : aggregation{offset < 0 ? (kind == LAG ? LEAD : LAG) : kind}, row_offset{std::abs(offset)} + explicit merge_variances_aggregation(size_type ddof) + : std_var_aggregation{aggregation::MERGE_VARIANCES, ddof} { } - bool is_equal(aggregation const& _other) const override - { - if (!this->aggregation::is_equal(_other)) { return false; } - auto const& other = dynamic_cast(_other); - return (row_offset == other.row_offset); - } - - size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } - std::unique_ptr clone() const override { - return std::make_unique(*this); + return std::make_unique(*this); } std::vector> get_simple_aggregations( data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override @@ -722,45 +832,21 @@ class lead_lag_aggregation final : public rolling_aggregation { return collector.visit(col_type, *this); } void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } - - size_type row_offset; - - private: - size_t hash_impl() const { return std::hash()(row_offset); } }; /** - * @brief Derived class for specifying a custom aggregation - * specified in udf + * @brief Derived aggregation class for specifying MERGE_STDS aggregation */ -class udf_aggregation final : public rolling_aggregation { +class merge_stds_aggregation final : public std_var_aggregation { public: - udf_aggregation(aggregation::Kind type, - std::string const& user_defined_aggregator, - data_type output_type) - : aggregation{type}, - _source{user_defined_aggregator}, - _operator_name{(type == aggregation::PTX) ? "rolling_udf_ptx" : "rolling_udf_cuda"}, - _function_name{"rolling_udf"}, - _output_type{output_type} + explicit merge_stds_aggregation(size_type ddof) + : std_var_aggregation{aggregation::MERGE_STDS, ddof} { - CUDF_EXPECTS(type == aggregation::PTX or type == aggregation::CUDA, - "udf_aggregation can accept only PTX, CUDA"); - } - - bool is_equal(aggregation const& _other) const override - { - if (!this->aggregation::is_equal(_other)) { return false; } - auto const& other = dynamic_cast(_other); - return (_source == other._source and _operator_name == other._operator_name and - _function_name == other._function_name and _output_type == other._output_type); } - size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } - std::unique_ptr clone() const override { - return std::make_unique(*this); + return std::make_unique(*this); } std::vector> get_simple_aggregations( data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override @@ -768,19 +854,6 @@ class udf_aggregation final : public rolling_aggregation { return collector.visit(col_type, *this); } void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } - - std::string const _source; - std::string const _operator_name; - std::string const _function_name; - data_type _output_type; - - protected: - size_t hash_impl() const - { - return std::hash{}(_source) ^ std::hash{}(_operator_name) ^ - std::hash{}(_function_name) ^ - std::hash{}(static_cast(_output_type.id())); - } }; /** @@ -904,6 +977,12 @@ struct target_type_impl() && is_su using type = Source; }; +// Always use `double` for M2 +template +struct target_type_impl { + using type = double; +}; + // Always use `double` for VARIANCE template struct target_type_impl { @@ -970,6 +1049,18 @@ struct target_type_impl { using type = cudf::list_view; }; +// Always use Source for LEAD +template +struct target_type_impl { + using type = Source; +}; + +// Always use Source for LAG +template +struct target_type_impl { + using type = Source; +}; + // Always use list for MERGE_LISTS template struct target_type_impl { @@ -982,16 +1073,16 @@ struct target_type_impl { using type = cudf::list_view; }; -// Always use Source for LEAD -template -struct target_type_impl { - using type = Source; +// Always use `double` for MERGE_VARIANCES +template +struct target_type_impl { + using type = double; }; -// Always use Source for LAG -template -struct target_type_impl { - using type = Source; +// Always use `double` for MERGE_STDS +template +struct target_type_impl { + using type = double; }; /** @@ -1083,14 +1174,18 @@ CUDA_HOST_DEVICE_CALLABLE decltype(auto) aggregation_dispatcher(aggregation::Kin return f.template operator()(std::forward(args)...); case aggregation::COLLECT_SET: return f.template operator()(std::forward(args)...); - case aggregation::MERGE_LISTS: - return f.template operator()(std::forward(args)...); - case aggregation::MERGE_SETS: - return f.template operator()(std::forward(args)...); case aggregation::LEAD: return f.template operator()(std::forward(args)...); case aggregation::LAG: return f.template operator()(std::forward(args)...); + case aggregation::MERGE_LISTS: + return f.template operator()(std::forward(args)...); + case aggregation::MERGE_SETS: + return f.template operator()(std::forward(args)...); + case aggregation::MERGE_VARIANCES: + return f.template operator()(std::forward(args)...); + case aggregation::MERGE_STDS: + return f.template operator()(std::forward(args)...); default: { #ifndef __CUDA_ARCH__ CUDF_FAIL("Unsupported aggregation."); diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index f0fd865f685..26340bf6c41 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -88,6 +88,12 @@ std::vector> simple_aggregations_collector::visit( return visit(col_type, static_cast(agg)); } +std::vector> simple_aggregations_collector::visit( + data_type col_type, m2_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + std::vector> simple_aggregations_collector::visit( data_type col_type, var_aggregation const& agg) { @@ -154,6 +160,18 @@ std::vector> simple_aggregations_collector::visit( return visit(col_type, static_cast(agg)); } +std::vector> simple_aggregations_collector::visit( + data_type col_type, lead_lag_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, udf_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + std::vector> simple_aggregations_collector::visit( data_type col_type, merge_lists_aggregation const& agg) { @@ -167,13 +185,13 @@ std::vector> simple_aggregations_collector::visit( } std::vector> simple_aggregations_collector::visit( - data_type col_type, lead_lag_aggregation const& agg) + data_type col_type, merge_variances_aggregation const& agg) { return visit(col_type, static_cast(agg)); } std::vector> simple_aggregations_collector::visit( - data_type col_type, udf_aggregation const& agg) + data_type col_type, merge_stds_aggregation const& agg) { return visit(col_type, static_cast(agg)); } @@ -227,6 +245,11 @@ void aggregation_finalizer::visit(mean_aggregation const& agg) visit(static_cast(agg)); } +void aggregation_finalizer::visit(m2_aggregation const& agg) +{ + visit(static_cast(agg)); +} + void aggregation_finalizer::visit(var_aggregation const& agg) { visit(static_cast(agg)); @@ -282,6 +305,16 @@ void aggregation_finalizer::visit(collect_set_aggregation const& agg) visit(static_cast(agg)); } +void aggregation_finalizer::visit(lead_lag_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(udf_aggregation const& agg) +{ + visit(static_cast(agg)); +} + void aggregation_finalizer::visit(merge_lists_aggregation const& agg) { visit(static_cast(agg)); @@ -292,12 +325,12 @@ void aggregation_finalizer::visit(merge_sets_aggregation const& agg) visit(static_cast(agg)); } -void aggregation_finalizer::visit(lead_lag_aggregation const& agg) +void aggregation_finalizer::visit(merge_variances_aggregation const& agg) { visit(static_cast(agg)); } -void aggregation_finalizer::visit(udf_aggregation const& agg) +void aggregation_finalizer::visit(merge_stds_aggregation const& agg) { visit(static_cast(agg)); } @@ -311,7 +344,7 @@ std::vector> aggregation::get_simple_aggregations( } /// Factory to create a SUM aggregation -template +template std::unique_ptr make_sum_aggregation() { return std::make_unique(); @@ -320,7 +353,7 @@ template std::unique_ptr make_sum_aggregation(); template std::unique_ptr make_sum_aggregation(); /// Factory to create a PRODUCT aggregation -template +template std::unique_ptr make_product_aggregation() { return std::make_unique(); @@ -328,7 +361,7 @@ std::unique_ptr make_product_aggregation() template std::unique_ptr make_product_aggregation(); /// Factory to create a MIN aggregation -template +template std::unique_ptr make_min_aggregation() { return std::make_unique(); @@ -337,7 +370,7 @@ template std::unique_ptr make_min_aggregation(); template std::unique_ptr make_min_aggregation(); /// Factory to create a MAX aggregation -template +template std::unique_ptr make_max_aggregation() { return std::make_unique(); @@ -346,7 +379,7 @@ template std::unique_ptr make_max_aggregation(); template std::unique_ptr make_max_aggregation(); /// Factory to create a COUNT aggregation -template +template std::unique_ptr make_count_aggregation(null_policy null_handling) { auto kind = @@ -359,7 +392,7 @@ template std::unique_ptr make_count_aggregation +template std::unique_ptr make_any_aggregation() { return std::make_unique(); @@ -367,7 +400,7 @@ std::unique_ptr make_any_aggregation() template std::unique_ptr make_any_aggregation(); /// Factory to create a ALL aggregation -template +template std::unique_ptr make_all_aggregation() { return std::make_unique(); @@ -375,7 +408,7 @@ std::unique_ptr make_all_aggregation() template std::unique_ptr make_all_aggregation(); /// Factory to create a SUM_OF_SQUARES aggregation -template +template std::unique_ptr make_sum_of_squares_aggregation() { return std::make_unique(); @@ -383,7 +416,7 @@ std::unique_ptr make_sum_of_squares_aggregation() template std::unique_ptr make_sum_of_squares_aggregation(); /// Factory to create a MEAN aggregation -template +template std::unique_ptr make_mean_aggregation() { return std::make_unique(); @@ -391,8 +424,16 @@ std::unique_ptr make_mean_aggregation() template std::unique_ptr make_mean_aggregation(); template std::unique_ptr make_mean_aggregation(); +/// Factory to create a M2 aggregation +template +std::unique_ptr make_m2_aggregation() +{ + return std::make_unique(); +} +template std::unique_ptr make_m2_aggregation(); + /// Factory to create a VARIANCE aggregation -template +template std::unique_ptr make_variance_aggregation(size_type ddof) { return std::make_unique(ddof); @@ -400,7 +441,7 @@ std::unique_ptr make_variance_aggregation(size_type ddof) template std::unique_ptr make_variance_aggregation(size_type ddof); /// Factory to create a STD aggregation -template +template std::unique_ptr make_std_aggregation(size_type ddof) { return std::make_unique(ddof); @@ -408,7 +449,7 @@ std::unique_ptr make_std_aggregation(size_type ddof) template std::unique_ptr make_std_aggregation(size_type ddof); /// Factory to create a MEDIAN aggregation -template +template std::unique_ptr make_median_aggregation() { return std::make_unique(); @@ -416,7 +457,7 @@ std::unique_ptr make_median_aggregation() template std::unique_ptr make_median_aggregation(); /// Factory to create a QUANTILE aggregation -template +template std::unique_ptr make_quantile_aggregation(std::vector const& q, interpolation i) { return std::make_unique(q, i); @@ -425,7 +466,7 @@ template std::unique_ptr make_quantile_aggregation( std::vector const& q, interpolation i); /// Factory to create an ARGMAX aggregation -template +template std::unique_ptr make_argmax_aggregation() { return std::make_unique(); @@ -434,7 +475,7 @@ template std::unique_ptr make_argmax_aggregation(); template std::unique_ptr make_argmax_aggregation(); /// Factory to create an ARGMIN aggregation -template +template std::unique_ptr make_argmin_aggregation() { return std::make_unique(); @@ -443,7 +484,7 @@ template std::unique_ptr make_argmin_aggregation(); template std::unique_ptr make_argmin_aggregation(); /// Factory to create an NUNIQUE aggregation -template +template std::unique_ptr make_nunique_aggregation(null_policy null_handling) { return std::make_unique(null_handling); @@ -452,7 +493,7 @@ template std::unique_ptr make_nunique_aggregation( null_policy null_handling); /// Factory to create an NTH_ELEMENT aggregation -template +template std::unique_ptr make_nth_element_aggregation(size_type n, null_policy null_handling) { return std::make_unique(n, null_handling); @@ -461,7 +502,7 @@ template std::unique_ptr make_nth_element_aggregation( size_type n, null_policy null_handling); /// Factory to create a ROW_NUMBER aggregation -template +template std::unique_ptr make_row_number_aggregation() { return std::make_unique(); @@ -470,7 +511,7 @@ template std::unique_ptr make_row_number_aggregation() template std::unique_ptr make_row_number_aggregation(); /// Factory to create a COLLECT_LIST aggregation -template +template std::unique_ptr make_collect_list_aggregation(null_policy null_handling) { return std::make_unique(null_handling); @@ -481,7 +522,7 @@ template std::unique_ptr make_collect_list_aggregation +template std::unique_ptr make_collect_set_aggregation(null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal) @@ -493,26 +534,8 @@ template std::unique_ptr make_collect_set_aggregation( template std::unique_ptr make_collect_set_aggregation( null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal); -/// Factory to create a MERGE_LISTS aggregation -template -std::unique_ptr make_merge_lists_aggregation() -{ - return std::make_unique(); -} -template std::unique_ptr make_merge_lists_aggregation(); - -/// Factory to create a MERGE_SETS aggregation -template -std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal, - nan_equality nans_equal) -{ - return std::make_unique(nulls_equal, nans_equal); -} -template std::unique_ptr make_merge_sets_aggregation(null_equality, - nan_equality); - /// Factory to create a LAG aggregation -template +template std::unique_ptr make_lag_aggregation(size_type offset) { return std::make_unique(aggregation::LAG, offset); @@ -522,7 +545,7 @@ template std::unique_ptr make_lag_aggregation +template std::unique_ptr make_lead_aggregation(size_type offset) { return std::make_unique(aggregation::LEAD, offset); @@ -532,7 +555,7 @@ template std::unique_ptr make_lead_aggregation +template std::unique_ptr make_udf_aggregation(udf_type type, std::string const& user_defined_aggregator, data_type output_type) @@ -548,6 +571,40 @@ template std::unique_ptr make_udf_aggregation( template std::unique_ptr make_udf_aggregation( udf_type type, std::string const& user_defined_aggregator, data_type output_type); +/// Factory to create a MERGE_LISTS aggregation +template +std::unique_ptr make_merge_lists_aggregation() +{ + return std::make_unique(); +} +template std::unique_ptr make_merge_lists_aggregation(); + +/// Factory to create a MERGE_SETS aggregation +template +std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal, + nan_equality nans_equal) +{ + return std::make_unique(nulls_equal, nans_equal); +} +template std::unique_ptr make_merge_sets_aggregation(null_equality, + nan_equality); + +/// Factory to create a MERGE_VARIANCES aggregation +template +std::unique_ptr make_merge_variances_aggregation(size_type ddof) +{ + return std::make_unique(ddof); +} +template std::unique_ptr make_merge_variances_aggregation(size_type); + +/// Factory to create a MERGE__STD aggregation +template +std::unique_ptr make_merge_stds_aggregation(size_type ddof) +{ + return std::make_unique(ddof); +} +template std::unique_ptr make_merge_stds_aggregation(size_type); + namespace detail { namespace { struct target_type_functor { diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 5e202b9ef3f..473062dc266 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -239,25 +239,46 @@ void aggregate_result_functor::operator()(aggregation const& }; template <> -void aggregate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) return; - auto var_agg = dynamic_cast(agg); - auto mean_agg = make_mean_aggregation(); - auto count_agg = make_count_aggregation(); + auto mean_agg = make_mean_aggregation(); operator()(*mean_agg); + auto const mean_result = cache.get_result(col_idx, *mean_agg); + + cache.add_result( + col_idx, + agg, + detail::group_m2(get_grouped_values(), mean_result, helper.group_labels(stream), stream, mr)); +}; + +template <> +void aggregate_result_functor::operator()(aggregation const& agg) +{ + if (cache.has_result(col_idx, agg)) return; + + auto const count_agg = make_count_aggregation(); operator()(*count_agg); - column_view mean_result = cache.get_result(col_idx, *mean_agg); - column_view group_sizes = cache.get_result(col_idx, *count_agg); - auto result = detail::group_var(get_grouped_values(), - mean_result, - group_sizes, - helper.group_labels(stream), - var_agg._ddof, - stream, - mr); + auto const group_sizes = cache.get_result(col_idx, *count_agg); + auto const& group_labels = helper.group_labels(stream); + auto const ddof = dynamic_cast(agg)._ddof; + + auto result = [&] { + if (auto m2_agg = make_m2_aggregation(); cache.has_result(col_idx, *m2_agg)) { + auto const group_m2 = cache.get_result(col_idx, *m2_agg); + return detail::group_var_from_m2(group_m2, group_sizes, group_labels, ddof, stream, mr); + } else { + auto mean_agg = make_mean_aggregation(); + operator()(*mean_agg); + auto const mean_result = cache.get_result(col_idx, *mean_agg); + + return detail::group_var( + get_grouped_values(), mean_result, group_sizes, group_labels, ddof, stream, mr); + } + }(); + cache.add_result(col_idx, agg, std::move(result)); }; @@ -474,6 +495,81 @@ void aggregate_result_functor::operator()(aggregation c mr)); }; +/** + * @brief Perform merging for the lists that correspond to the same key value. + * + * This aggregation is similar to `COLLECT_LIST` with the following differences: + * - It requires the input values to be a non-nullable lists column, and + * - The values (lists) corresponding to the same key will not result in a list of lists as output + * from `COLLECT_LIST`. Instead, those lists will result in a list generated by merging them + * together. + * + * In practice, this aggregation is used to merge the partial results of multiple (distributed) + * groupby `COLLECT_LIST` aggregations into a final `COLLECT_LIST` result. Those distributed + * aggregations were executed on different values columns partitioned from the original values + * column, then their results were (vertically) concatenated before given as the values column for + * this aggregation. + * + * TODO + */ +template <> +void aggregate_result_functor::operator()(aggregation const& agg) +{ + if (cache.has_result(col_idx, agg)) { return; } + + cache.add_result( + col_idx, + agg, + detail::group_merge_lists( + get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); +}; + +/** + * @brief Perform merging for the lists corresponding to the same key value, then dropping duplicate + * list entries. + * + * This aggregation is similar to `COLLECT_SET` with the following differences: + * - It requires the input values to be a non-nullable lists column, and + * - The values (lists) corresponding to the same key will result in a list generated by merging + * them together then dropping duplicate entries. + * + * In practice, this aggregation is used to merge the partial results of multiple (distributed) + * groupby `COLLECT_LIST` or `COLLECT_SET` aggregations into a final `COLLECT_SET` result. Those + * distributed aggregations were executed on different values columns partitioned from the original + * values column, then their results were (vertically) concatenated before given as the values + * column for this aggregation. + * + * Firstly, this aggregation performs `MERGE_LISTS` to concatenate the input lists (corresponding to + * the same key) into intermediate lists, then it calls `lists::drop_list_duplicates` on them to + * remove duplicate list entries. As such, the input (partial results) to this aggregation should be + * generated by (distributed) `COLLECT_LIST` aggregations, not `COLLECT_SET`, to avoid unnecessarily + * removing duplicate entries for the partial results. + * + * Since duplicate list entries will be removed, the parameters `null_equality` and `nan_equality` + * are needed for calling to `lists::drop_list_duplicates`. + * + * TODO + */ +template <> +void aggregate_result_functor::operator()(aggregation const& agg) +{ + if (cache.has_result(col_idx, agg)) { return; } + + auto const merged_result = detail::group_merge_lists(get_grouped_values(), + helper.group_offsets(stream), + helper.num_groups(stream), + stream, + rmm::mr::get_current_device_resource()); + auto const merge_sets_agg = dynamic_cast(agg); + cache.add_result(col_idx, + agg, + lists::detail::drop_list_duplicates(lists_column_view(merged_result->view()), + merge_sets_agg._nulls_equal, + merge_sets_agg._nans_equal, + stream, + mr)); +}; + } // namespace detail // Sort-based groupby diff --git a/cpp/src/groupby/sort/group_merge_variances.cu b/cpp/src/groupby/sort/group_merge_variances.cu new file mode 100644 index 00000000000..329f22431b1 --- /dev/null +++ b/cpp/src/groupby/sort/group_merge_variances.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include + +namespace cudf { +namespace groupby { +namespace detail { +std::unique_ptr group_merge_variances(column_view const& values, + cudf::device_span group_offsets, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(values.type().id() == type_id::LIST, + "Input to `group_merge_lists` must be a lists column."); + CUDF_EXPECTS(!values.nullable(), + "Input to `group_merge_lists` must be a non-nullable lists column."); + + auto offsets_column = make_numeric_column( + data_type(type_to_id()), num_groups + 1, mask_state::UNALLOCATED, stream, mr); + + // Generate offsets of the output lists column by gathering from the provided group offsets and + // the input list offsets. + // + // For example: + // values = [[2, 1], [], [4, -1, -2], [], [, 4, ]] + // list_offsets = [0, 2, 2, 5, 5 8] + // group_offsets = [0, 3, 5] + // + // then, the output offsets_column is [0, 5, 8]. + // + thrust::gather(rmm::exec_policy(stream), + group_offsets.begin(), + group_offsets.end(), + lists_column_view(values).offsets_begin(), + offsets_column->mutable_view().template begin()); + + // The child column of the output lists column is just copied from the input column. + auto child_column = + std::make_unique(lists_column_view(values).get_sliced_child(stream), stream, mr); + + return make_lists_column(num_groups, + std::move(offsets_column), + std::move(child_column), + 0, + rmm::device_buffer{}, + stream, + mr); +} + +} // namespace detail +} // namespace groupby +} // namespace cudf diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index 3390af29330..78492f0dd9b 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -217,6 +217,47 @@ std::unique_ptr group_count_all(cudf::device_span group rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Internal API to calculate groupwise sum of squares of differences from the current mean + * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * group_means = [2.333333, -1.5, 4.0, ] + * + * group_m2 = [4.666666, 1.0, 0, ] + * @endcode + * + * @param values Grouped values to get M2 of + * @param group_means Pre-calculated groupwise MEAN + * @param group_labels ID of group corresponding value in @p values belongs to + * @param mr Device memory resource used to allocate the returned column's device memory + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr group_m2(column_view const& values, + column_view const& group_means, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +/** + * @brief group_var + * @param values + * @param group_means + * @param group_sizes + * @param group_labels + * @param ddof + * @param stream + * @param mr + * @return + */ +std::unique_ptr group_var_from_m2(column_view const& group_m2, + column_view const& group_sizes, + cudf::device_span group_labels, + size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** * @brief Internal API to calculate groupwise variance * @@ -392,6 +433,30 @@ std::unique_ptr group_merge_lists(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +// todo +/** + * @brief Internal API to merge grouped variances into one variance value. + * + * @code{.pseudo} + * values = [[2, 1], [], [4, -1, -2], [], [, 4, ]] + * group_offsets = [0, 3, 5] + * num_groups = 2 + * + * group_merge_lists(...) = [[2, 1, 4, -1, -2], [, 4, ]] + * @endcode + * + * @param values Grouped values (lists column) to collect. + * @param group_offsets Offsets of groups' starting points within @p values. + * @param num_groups Number of groups. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + */ +std::unique_ptr group_merge_variances(column_view const& values, + cudf::device_span group_offsets, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** @endinternal * */ diff --git a/cpp/src/groupby/sort/group_std.cu b/cpp/src/groupby/sort/group_std.cu index 9ebb516ee14..c63db6c0c4e 100644 --- a/cpp/src/groupby/sort/group_std.cu +++ b/cpp/src/groupby/sort/group_std.cu @@ -49,16 +49,22 @@ struct var_transform { { if (d_values.is_null(i)) return 0.0; - ResultType x = static_cast(values_iter[i]); + auto const x = static_cast(values_iter[i]); + auto const group_idx = d_group_labels[i]; - size_type group_idx = d_group_labels[i]; - size_type group_size = d_group_sizes[group_idx]; + if (d_group_sizes) { // for variance/std + auto const group_size = d_group_sizes[group_idx]; - // prevent divide by zero error - if (group_size == 0 or group_size - ddof <= 0) return 0.0; + // prevent divide by zero error + if (group_size == 0 or group_size - ddof <= 0) return 0.0; - ResultType mean = d_means[group_idx]; - return (x - mean) * (x - mean) / (group_size - ddof); + auto const mean = d_means[group_idx]; + return (x - mean) * (x - mean) / (group_size - ddof); + } else { // for m2 + auto const mean = d_means[group_idx]; + auto const diff = x - mean; + return diff * diff; + } } }; @@ -85,6 +91,50 @@ void reduce_by_key_fn(column_device_view const& values, d_result); } +struct m2_functor { + template + std::enable_if_t::value, std::unique_ptr> operator()( + column_view const& values, + column_view const& group_means, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { +// Running this in debug build causes a runtime error: +// `reduce_by_key failed on 2nd step: invalid device function` +#if !defined(__CUDACC_DEBUG__) + using ResultType = cudf::detail::target_type_t; + + auto result = make_numeric_column( + data_type(type_to_id()), values.size(), mask_state::UNINITIALIZED, stream, mr); + + auto const values_dv_ptr = column_device_view::create(values, stream); + auto const values_dv = *values_dv_ptr; + + auto d_means = group_means.data(); + auto d_result = result->mutable_view().data(); + + if (!cudf::is_dictionary(values.type())) { + auto const values_iter = values_dv.begin(); + reduce_by_key_fn(values_dv, values_iter, group_labels, d_means, nullptr, 0, d_result, stream); + } else { + auto values_iter = cudf::dictionary::detail::make_dictionary_iterator(*values_dv_ptr); + reduce_by_key_fn(values_dv, values_iter, group_labels, d_means, nullptr, 0, d_result, stream); + } + + return result; +#else + CUDF_FAIL("Groupby m2 aggregation is not supported in debug build"); +#endif + } + + template + std::enable_if_t::value, std::unique_ptr> operator()(Args&&...) + { + CUDF_FAIL("Only numeric types are supported in groupby m2 aggregation"); + } +}; + struct var_functor { template std::enable_if_t::value, std::unique_ptr> operator()( @@ -152,6 +202,30 @@ struct var_functor { } // namespace +std::unique_ptr group_m2(column_view const& values, + column_view const& group_means, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto values_type = cudf::is_dictionary(values.type()) + ? dictionary_column_view(values).keys().type() + : values.type(); + + return type_dispatcher(values_type, m2_functor{}, values, group_means, group_labels, stream, mr); +} + +std::unique_ptr group_var_from_m2(column_view const& group_m2, + column_view const& group_sizes, + cudf::device_span group_labels, + size_type ddof, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // TODO + return std::make_unique(); +} + std::unique_ptr group_var(column_view const& values, column_view const& group_means, column_view const& group_sizes, From ebc718a2a82bba9a6d5879d72ff1f011171daba4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 25 Jun 2021 13:33:38 -0600 Subject: [PATCH 03/19] Remove `MERGE_VARIANCES` and `MERGE_STDS`, add `MERGE_M2` --- cpp/CMakeLists.txt | 4 +- cpp/include/cudf/aggregation.hpp | 101 +++++++----------- .../cudf/detail/aggregation/aggregation.hpp | 61 +++-------- cpp/src/aggregation/aggregation.cpp | 31 ++---- cpp/src/groupby/sort/aggregate.cpp | 70 ++---------- ...p_merge_variances.cu => group_merge_m2.cu} | 10 +- cpp/src/groupby/sort/group_reductions.hpp | 12 +-- 7 files changed, 76 insertions(+), 213 deletions(-) rename cpp/src/groupby/sort/{group_merge_variances.cu => group_merge_m2.cu} (86%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 05c3028067e..c6d56f7075b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -199,7 +199,7 @@ add_library(cudf src/groupby/sort/aggregate.cpp src/groupby/sort/group_collect.cu src/groupby/sort/group_merge_lists.cu - src/groupby/sort/group_merge_variances.cu + src/groupby/sort/group_merge_m2.cu src/groupby/sort/group_count.cu src/groupby/sort/group_max.cu src/groupby/sort/group_min.cu @@ -273,7 +273,7 @@ add_library(cudf src/join/join.cu src/join/semi_join.cu src/lists/contains.cu - src/lists/combine/concatenate_list_elements.cu + src/lists/combine/concatenate_list_elements.cu src/lists/combine/concatenate_rows.cu src/lists/copying/concatenate.cu src/lists/copying/copying.cu diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 5913d39e020..8602b7086f1 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -57,36 +57,35 @@ class aggregation { * @brief Possible aggregation operations */ enum Kind { - SUM, ///< sum reduction - PRODUCT, ///< product reduction - MIN, ///< min reduction - MAX, ///< max reduction - COUNT_VALID, ///< count number of valid elements - COUNT_ALL, ///< count number of elements - ANY, ///< any reduction - ALL, ///< all reduction - SUM_OF_SQUARES, ///< sum of squares reduction - MEAN, ///< arithmetic mean reduction - M2, ///< groupwise sum of squares of differences from the current mean - VARIANCE, ///< groupwise variance - STD, ///< groupwise standard deviation - MEDIAN, ///< median reduction - QUANTILE, ///< compute specified quantile(s) - ARGMAX, ///< Index of max element - ARGMIN, ///< Index of min element - NUNIQUE, ///< count number of unique elements - NTH_ELEMENT, ///< get the nth element - ROW_NUMBER, ///< get row-number of current index (relative to rolling window) - COLLECT_LIST, ///< collect values into a list - COLLECT_SET, ///< collect values into a list without duplicate entries - LEAD, ///< window function, accesses row at specified offset following current row - LAG, ///< window function, accesses row at specified offset preceding current row - PTX, ///< PTX UDF based reduction - CUDA, ///< CUDA UDF based reduction - MERGE_LISTS, ///< merge multiple lists values into one list - MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries - MERGE_VARIANCES, ///< merge partial variance values - MERGE_STDS ///< merge partial standard deviation values + SUM, ///< sum reduction + PRODUCT, ///< product reduction + MIN, ///< min reduction + MAX, ///< max reduction + COUNT_VALID, ///< count number of valid elements + COUNT_ALL, ///< count number of elements + ANY, ///< any reduction + ALL, ///< all reduction + SUM_OF_SQUARES, ///< sum of squares reduction + MEAN, ///< arithmetic mean reduction + M2, ///< groupwise sum of squares of differences from the group mean + VARIANCE, ///< groupwise variance + STD, ///< groupwise standard deviation + MEDIAN, ///< median reduction + QUANTILE, ///< compute specified quantile(s) + ARGMAX, ///< Index of max element + ARGMIN, ///< Index of min element + NUNIQUE, ///< count number of unique elements + NTH_ELEMENT, ///< get the nth element + ROW_NUMBER, ///< get row-number of current index (relative to rolling window) + COLLECT_LIST, ///< collect values into a list + COLLECT_SET, ///< collect values into a list without duplicate entries + LEAD, ///< window function, accesses row at specified offset following current row + LAG, ///< window function, accesses row at specified offset preceding current row + PTX, ///< PTX UDF based reduction + CUDA, ///< CUDA UDF based reduction + MERGE_LISTS, ///< merge multiple lists values into one list + MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries + MERGE_M2 ///< merge partial values of M2 aggregation }; aggregation() = delete; @@ -165,7 +164,7 @@ std::unique_ptr make_mean_aggregation(); /** * @brief Factory to create a M2 aggregation * - * A M2 aggregation is groupwise sum of squares of differences from the current mean. From this, + * A M2 aggregation is groupwise sum of squares of differences from the group mean. From this, * a `VARIANCE` aggregation can be computed as `M2 / (N - ddof)`, where `N` is the population size * and `ddof` is the delta degrees of freedom. */ @@ -344,43 +343,17 @@ std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal = nu nan_equality nans_equal = nan_equality::UNEQUAL); /** - * @brief Factory to create a MERGE_VARIANCES aggregation + * @brief Factory to create a MERGE_M2 aggregation * - * This aggregation is designed specificly to perform distributed computing of `VARIANCE` - * aggregation. The partial results input to this aggregation is generated by two groupby - * aggregations: `VARIANCE` and `COUNT_VALID`. + * This aggregation is designed specificly to perform distributed computing of `M2` + * aggregation. The partial results input to this aggregation is a structs column with children are + * columns generated by three groupby aggregations: `SUM`, `COUNT_VALID`, and `M2`. * - * In order to use this aggregation, the `aggregation_request` array input to `groupby::aggregate` - * must contain at least two requests: - * - A request for `COLLECT_LIST` aggregation to collect the partial results of `COUNT_VALID` - * - This `MERGE_VARIANCES` request, which must be given AFTER the request above so that it can - * access the cached results generated by that request - * - * For a merging operation that is not a final merge (i.e., its outputs will be used as input to - * perform another `MERGE_VARIANCES` aggregation), a `SUM` aggregation must also be added to the - * same request for `COLLECT_LIST` above to produce the merged values for `COUNT_VALID`. - * - * Since the partial results output from `VARIANCE` and `COUNT_VALID` do not contain nulls, the - * input values columns to those two requests must be non-nullable. - * - * @param ddof Delta degrees of freedom. The divisor used in calculation of `variance` is - * `N - ddof`, where `N` is the population size. - */ -template -std::unique_ptr make_merge_variances_aggregation(size_type ddof = 1); - -/** - * @brief Factory to create a MERGE_STDS aggregation - * - * This aggregation is designed specificly to perform distributed computing of `STD` - * aggregation. The partial results input to this aggregation and its usage are the same as of - * `MERGE_VARIANCES` aggregation. - * - * @param ddof Delta degrees of freedom. The divisor used in calculation of `variance` is - * `N - ddof`, where `N` is the population size. + * Since the partial results output from those aggregations do not contain nulls, the input structs + * column and its children must all be non-nullable. */ template -std::unique_ptr make_merge_stds_aggregation(size_type ddof = 1); +std::unique_ptr make_merge_m2_aggregation(); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index ab2ef947cba..d4176e1ab6c 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -85,10 +85,8 @@ class simple_aggregations_collector { // Declares the interface for the simple class merge_lists_aggregation const& agg); virtual std::vector> visit(data_type col_type, class merge_sets_aggregation const& agg); - virtual std::vector> visit( - data_type col_type, class merge_variances_aggregation const& agg); virtual std::vector> visit(data_type col_type, - class merge_stds_aggregation const& agg); + class merge_m2_aggregation const& agg); }; class aggregation_finalizer { // Declares the interface for the finalizer @@ -120,8 +118,7 @@ class aggregation_finalizer { // Declares the interface for the finalizer virtual void visit(class udf_aggregation const& agg); virtual void visit(class merge_lists_aggregation const& agg); virtual void visit(class merge_sets_aggregation const& agg); - virtual void visit(class merge_variances_aggregation const& agg); - virtual void visit(class merge_stds_aggregation const& agg); + virtual void visit(class merge_m2_aggregation const& agg); }; /** @@ -333,9 +330,8 @@ class std_var_aggregation : public aggregation { protected: std_var_aggregation(aggregation::Kind k, size_type ddof) : aggregation(k), _ddof{ddof} { - CUDF_EXPECTS(k == aggregation::STD or k == aggregation::VARIANCE or - k == aggregation::MERGE_STDS or k == aggregation::MERGE_VARIANCES, - "std_var_aggregation can accept only STD, VARIANCE, MERGE_STDS, MERGE_VARIANCES"); + CUDF_EXPECTS(k == aggregation::STD or k == aggregation::VARIANCE, + "std_var_aggregation can accept only STD, VARIANCE"); } size_type hash_impl() const { return std::hash{}(_ddof); } @@ -813,40 +809,15 @@ class merge_sets_aggregation final : public aggregation { }; /** - * @brief Derived aggregation class for specifying MERGE_VARIANCES aggregation - */ -class merge_variances_aggregation final : public std_var_aggregation { - public: - explicit merge_variances_aggregation(size_type ddof) - : std_var_aggregation{aggregation::MERGE_VARIANCES, ddof} - { - } - - std::unique_ptr clone() const override - { - return std::make_unique(*this); - } - std::vector> get_simple_aggregations( - data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override - { - return collector.visit(col_type, *this); - } - void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } -}; - -/** - * @brief Derived aggregation class for specifying MERGE_STDS aggregation + * @brief Derived aggregation class for specifying MERGE_M2 aggregation */ -class merge_stds_aggregation final : public std_var_aggregation { +class merge_m2_aggregation final : public aggregation { public: - explicit merge_stds_aggregation(size_type ddof) - : std_var_aggregation{aggregation::MERGE_STDS, ddof} - { - } + explicit merge_m2_aggregation() : aggregation{MERGE_M2} {} std::unique_ptr clone() const override { - return std::make_unique(*this); + return std::make_unique(*this); } std::vector> get_simple_aggregations( data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override @@ -1073,15 +1044,9 @@ struct target_type_impl { using type = cudf::list_view; }; -// Always use `double` for MERGE_VARIANCES -template -struct target_type_impl { - using type = double; -}; - -// Always use `double` for MERGE_STDS +// Always use `double` for MERGE_M2 template -struct target_type_impl { +struct target_type_impl { using type = double; }; @@ -1182,10 +1147,8 @@ CUDA_HOST_DEVICE_CALLABLE decltype(auto) aggregation_dispatcher(aggregation::Kin return f.template operator()(std::forward(args)...); case aggregation::MERGE_SETS: return f.template operator()(std::forward(args)...); - case aggregation::MERGE_VARIANCES: - return f.template operator()(std::forward(args)...); - case aggregation::MERGE_STDS: - return f.template operator()(std::forward(args)...); + case aggregation::MERGE_M2: + return f.template operator()(std::forward(args)...); default: { #ifndef __CUDA_ARCH__ CUDF_FAIL("Unsupported aggregation."); diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 26340bf6c41..53a55351f8e 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -185,13 +185,7 @@ std::vector> simple_aggregations_collector::visit( } std::vector> simple_aggregations_collector::visit( - data_type col_type, merge_variances_aggregation const& agg) -{ - return visit(col_type, static_cast(agg)); -} - -std::vector> simple_aggregations_collector::visit( - data_type col_type, merge_stds_aggregation const& agg) + data_type col_type, merge_m2_aggregation const& agg) { return visit(col_type, static_cast(agg)); } @@ -325,12 +319,7 @@ void aggregation_finalizer::visit(merge_sets_aggregation const& agg) visit(static_cast(agg)); } -void aggregation_finalizer::visit(merge_variances_aggregation const& agg) -{ - visit(static_cast(agg)); -} - -void aggregation_finalizer::visit(merge_stds_aggregation const& agg) +void aggregation_finalizer::visit(merge_m2_aggregation const& agg) { visit(static_cast(agg)); } @@ -589,21 +578,13 @@ std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal, template std::unique_ptr make_merge_sets_aggregation(null_equality, nan_equality); -/// Factory to create a MERGE_VARIANCES aggregation -template -std::unique_ptr make_merge_variances_aggregation(size_type ddof) -{ - return std::make_unique(ddof); -} -template std::unique_ptr make_merge_variances_aggregation(size_type); - -/// Factory to create a MERGE__STD aggregation +/// Factory to create a MERGE_M2 aggregation template -std::unique_ptr make_merge_stds_aggregation(size_type ddof) +std::unique_ptr make_merge_m2_aggregation() { - return std::make_unique(ddof); + return std::make_unique(); } -template std::unique_ptr make_merge_stds_aggregation(size_type); +template std::unique_ptr make_merge_m2_aggregation(); namespace detail { namespace { diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 473062dc266..8c12aa57f6d 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -496,80 +496,26 @@ void aggregate_result_functor::operator()(aggregation c }; /** - * @brief Perform merging for the lists that correspond to the same key value. - * - * This aggregation is similar to `COLLECT_LIST` with the following differences: - * - It requires the input values to be a non-nullable lists column, and - * - The values (lists) corresponding to the same key will not result in a list of lists as output - * from `COLLECT_LIST`. Instead, those lists will result in a list generated by merging them - * together. + * @brief Perform merging for the M2 values that correspond to the same key value. * - * In practice, this aggregation is used to merge the partial results of multiple (distributed) - * groupby `COLLECT_LIST` aggregations into a final `COLLECT_LIST` result. Those distributed - * aggregations were executed on different values columns partitioned from the original values - * column, then their results were (vertically) concatenated before given as the values column for - * this aggregation. - * - * TODO + * The partial results input to this aggregation is a structs column with children are + * columns generated by three groupby aggregations: `SUM`, `COUNT_VALID`, and `M2` performed on + * partitioned datasets. After distributedly computed, the results output from these aggregations + * are (vertically) concatenated before assembling into a structs column given as the values column + * for this aggregation. */ template <> -void aggregate_result_functor::operator()(aggregation const& agg) +void aggregate_result_functor::operator()(aggregation const& agg) { if (cache.has_result(col_idx, agg)) { return; } cache.add_result( col_idx, agg, - detail::group_merge_lists( + detail::group_merge_m2( get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); }; -/** - * @brief Perform merging for the lists corresponding to the same key value, then dropping duplicate - * list entries. - * - * This aggregation is similar to `COLLECT_SET` with the following differences: - * - It requires the input values to be a non-nullable lists column, and - * - The values (lists) corresponding to the same key will result in a list generated by merging - * them together then dropping duplicate entries. - * - * In practice, this aggregation is used to merge the partial results of multiple (distributed) - * groupby `COLLECT_LIST` or `COLLECT_SET` aggregations into a final `COLLECT_SET` result. Those - * distributed aggregations were executed on different values columns partitioned from the original - * values column, then their results were (vertically) concatenated before given as the values - * column for this aggregation. - * - * Firstly, this aggregation performs `MERGE_LISTS` to concatenate the input lists (corresponding to - * the same key) into intermediate lists, then it calls `lists::drop_list_duplicates` on them to - * remove duplicate list entries. As such, the input (partial results) to this aggregation should be - * generated by (distributed) `COLLECT_LIST` aggregations, not `COLLECT_SET`, to avoid unnecessarily - * removing duplicate entries for the partial results. - * - * Since duplicate list entries will be removed, the parameters `null_equality` and `nan_equality` - * are needed for calling to `lists::drop_list_duplicates`. - * - * TODO - */ -template <> -void aggregate_result_functor::operator()(aggregation const& agg) -{ - if (cache.has_result(col_idx, agg)) { return; } - - auto const merged_result = detail::group_merge_lists(get_grouped_values(), - helper.group_offsets(stream), - helper.num_groups(stream), - stream, - rmm::mr::get_current_device_resource()); - auto const merge_sets_agg = dynamic_cast(agg); - cache.add_result(col_idx, - agg, - lists::detail::drop_list_duplicates(lists_column_view(merged_result->view()), - merge_sets_agg._nulls_equal, - merge_sets_agg._nans_equal, - stream, - mr)); -}; - } // namespace detail // Sort-based groupby diff --git a/cpp/src/groupby/sort/group_merge_variances.cu b/cpp/src/groupby/sort/group_merge_m2.cu similarity index 86% rename from cpp/src/groupby/sort/group_merge_variances.cu rename to cpp/src/groupby/sort/group_merge_m2.cu index 329f22431b1..4ffc29c2a56 100644 --- a/cpp/src/groupby/sort/group_merge_variances.cu +++ b/cpp/src/groupby/sort/group_merge_m2.cu @@ -26,11 +26,11 @@ namespace cudf { namespace groupby { namespace detail { -std::unique_ptr group_merge_variances(column_view const& values, - cudf::device_span group_offsets, - size_type num_groups, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr group_merge_m2(column_view const& values, + cudf::device_span group_offsets, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(values.type().id() == type_id::LIST, "Input to `group_merge_lists` must be a lists column."); diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index 78492f0dd9b..032973481a0 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -435,7 +435,7 @@ std::unique_ptr group_merge_lists(column_view const& values, // todo /** - * @brief Internal API to merge grouped variances into one variance value. + * @brief Internal API to merge grouped m2 values corresponding to the same key. * * @code{.pseudo} * values = [[2, 1], [], [4, -1, -2], [], [, 4, ]] @@ -451,11 +451,11 @@ std::unique_ptr group_merge_lists(column_view const& values, * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. */ -std::unique_ptr group_merge_variances(column_view const& values, - cudf::device_span group_offsets, - size_type num_groups, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr group_merge_m2(column_view const& values, + cudf::device_span group_offsets, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** @endinternal * From 93b38c41bf965bf93bc4551d5fab655f1011d18a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 30 Jun 2021 13:05:47 -0600 Subject: [PATCH 04/19] Finish implementation, no unit tests implemented yet --- cpp/CMakeLists.txt | 7 +- cpp/include/cudf/aggregation.hpp | 11 +- cpp/src/groupby/sort/aggregate.cpp | 44 ++++--- cpp/src/groupby/sort/group_m2.cu | 140 ++++++++++++++++++++++ cpp/src/groupby/sort/group_merge_m2.cu | 128 ++++++++++++++------ cpp/src/groupby/sort/group_reductions.hpp | 53 +++----- cpp/src/groupby/sort/group_std.cu | 88 ++------------ 7 files changed, 284 insertions(+), 187 deletions(-) create mode 100644 cpp/src/groupby/sort/group_m2.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c6d56f7075b..adc62494382 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -194,15 +194,16 @@ add_library(cudf src/filling/sequence.cu src/groupby/groupby.cu src/groupby/hash/groupby.cu + src/groupby/sort/aggregate.cpp src/groupby/sort/group_argmax.cu src/groupby/sort/group_argmin.cu - src/groupby/sort/aggregate.cpp src/groupby/sort/group_collect.cu - src/groupby/sort/group_merge_lists.cu - src/groupby/sort/group_merge_m2.cu src/groupby/sort/group_count.cu + src/groupby/sort/group_m2.cu src/groupby/sort/group_max.cu src/groupby/sort/group_min.cu + src/groupby/sort/group_merge_lists.cu + src/groupby/sort/group_merge_m2.cu src/groupby/sort/group_nth_element.cu src/groupby/sort/group_nunique.cu src/groupby/sort/group_product.cu diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 8602b7086f1..30da2976bda 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -164,9 +164,9 @@ std::unique_ptr make_mean_aggregation(); /** * @brief Factory to create a M2 aggregation * - * A M2 aggregation is groupwise sum of squares of differences from the group mean. From this, - * a `VARIANCE` aggregation can be computed as `M2 / (N - ddof)`, where `N` is the population size - * and `ddof` is the delta degrees of freedom. + * A M2 aggregation is groupwise sum of squares of differences from the group mean. It produces the + * intermediate values that are used to compute variance and standard deviation in distributed + * computing. */ template std::unique_ptr make_m2_aggregation(); @@ -347,10 +347,7 @@ std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal = nu * * This aggregation is designed specificly to perform distributed computing of `M2` * aggregation. The partial results input to this aggregation is a structs column with children are - * columns generated by three groupby aggregations: `SUM`, `COUNT_VALID`, and `M2`. - * - * Since the partial results output from those aggregations do not contain nulls, the input structs - * column and its children must all be non-nullable. + * columns generated by three groupby aggregations: `M2`, `COUNT_VALID`, and `MEAN`. */ template std::unique_ptr make_merge_m2_aggregation(); diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 8c12aa57f6d..78db3ad1f68 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -243,7 +243,7 @@ void aggregate_result_functor::operator()(aggregation const& ag { if (cache.has_result(col_idx, agg)) return; - auto mean_agg = make_mean_aggregation(); + auto const mean_agg = make_mean_aggregation(); operator()(*mean_agg); auto const mean_result = cache.get_result(col_idx, *mean_agg); @@ -258,27 +258,21 @@ void aggregate_result_functor::operator()(aggregation con { if (cache.has_result(col_idx, agg)) return; - auto const count_agg = make_count_aggregation(); + auto var_agg = dynamic_cast(agg); + auto mean_agg = make_mean_aggregation(); + auto count_agg = make_count_aggregation(); + operator()(*mean_agg); operator()(*count_agg); + column_view mean_result = cache.get_result(col_idx, *mean_agg); + column_view group_sizes = cache.get_result(col_idx, *count_agg); - auto const group_sizes = cache.get_result(col_idx, *count_agg); - auto const& group_labels = helper.group_labels(stream); - auto const ddof = dynamic_cast(agg)._ddof; - - auto result = [&] { - if (auto m2_agg = make_m2_aggregation(); cache.has_result(col_idx, *m2_agg)) { - auto const group_m2 = cache.get_result(col_idx, *m2_agg); - return detail::group_var_from_m2(group_m2, group_sizes, group_labels, ddof, stream, mr); - } else { - auto mean_agg = make_mean_aggregation(); - operator()(*mean_agg); - auto const mean_result = cache.get_result(col_idx, *mean_agg); - - return detail::group_var( - get_grouped_values(), mean_result, group_sizes, group_labels, ddof, stream, mr); - } - }(); - + auto result = detail::group_var(get_grouped_values(), + mean_result, + group_sizes, + helper.group_labels(stream), + var_agg._ddof, + stream, + mr); cache.add_result(col_idx, agg, std::move(result)); }; @@ -498,11 +492,15 @@ void aggregate_result_functor::operator()(aggregation c /** * @brief Perform merging for the M2 values that correspond to the same key value. * - * The partial results input to this aggregation is a structs column with children are - * columns generated by three groupby aggregations: `SUM`, `COUNT_VALID`, and `M2` performed on - * partitioned datasets. After distributedly computed, the results output from these aggregations + * The partial results input to this aggregation is a structs column with children are columns + * generated by three other groupby aggregations: `M2`, `MEAN`, `COUNT_VALID` performed + * on partitioned datasets. After distributedly computed, the results output from these aggregations * are (vertically) concatenated before assembling into a structs column given as the values column * for this aggregation. + * + * The values of M2 are merged following the parallel algorithm described here: + * https://www.wikiwand.com/en/Algorithms_for_calculating_variance#/Parallel_algorithm + * */ template <> void aggregate_result_functor::operator()(aggregation const& agg) diff --git a/cpp/src/groupby/sort/group_m2.cu b/cpp/src/groupby/sort/group_m2.cu new file mode 100644 index 00000000000..2de57a94923 --- /dev/null +++ b/cpp/src/groupby/sort/group_m2.cu @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "group_reductions.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +namespace cudf { +namespace groupby { +namespace detail { +namespace { + +template +struct m2_transform { + column_device_view const d_values; + Iterator const values_iter; + ResultType const* d_means; + size_type const* d_group_labels; + + __device__ ResultType operator()(size_type const idx) const noexcept + { + if (d_values.is_null(idx)) return 0.0; + + auto const x = static_cast(values_iter[idx]); + auto const group_idx = d_group_labels[idx]; + auto const mean = d_means[group_idx]; + auto const diff = x - mean; + return diff * diff; + } +}; + +template +void reduce_by_key_fn(column_device_view const& values, + Iterator values_iter, + cudf::device_span group_labels, + ResultType const* d_means, + ResultType* d_result, + rmm::cuda_stream_view stream) +{ + auto const var_iter = cudf::detail::make_counting_transform_iterator( + size_type{0}, + m2_transform{ + values, values_iter, d_means, group_labels.data()}); + + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + var_iter, + thrust::make_discard_iterator(), + d_result); +} + +struct m2_functor { + template + std::enable_if_t::value, std::unique_ptr> operator()( + column_view const& values, + column_view const& group_means, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + using ResultType = cudf::detail::target_type_t; + auto result = make_numeric_column( + data_type(type_to_id()), group_means.size(), mask_state::UNALLOCATED, stream, mr); + + auto const values_dv_ptr = column_device_view::create(values, stream); + auto const d_values = *values_dv_ptr; + auto const d_means = group_means.data(); + auto const d_result = result->mutable_view().data(); + + if (!cudf::is_dictionary(values.type())) { + auto const values_iter = d_values.begin(); + reduce_by_key_fn(d_values, values_iter, group_labels, d_means, d_result, stream); + } else { + auto const values_iter = + cudf::dictionary::detail::make_dictionary_iterator(*values_dv_ptr); + reduce_by_key_fn(d_values, values_iter, group_labels, d_means, d_result, stream); + } + + // M2 column values should have the same bitmask as means's. + if (group_means.nullable()) { + result->set_null_mask(cudf::detail::copy_bitmask(group_means, stream, mr), + group_means.null_count()); + } + + return result; + } + + template + std::enable_if_t::value, std::unique_ptr> operator()(Args&&...) + { + CUDF_FAIL("Only numeric types are supported in M2 groupby aggregation"); + } +}; + +} // namespace + +std::unique_ptr group_m2(column_view const& values, + column_view const& group_means, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto values_type = cudf::is_dictionary(values.type()) + ? dictionary_column_view(values).keys().type() + : values.type(); + + return type_dispatcher(values_type, m2_functor{}, values, group_means, group_labels, stream, mr); +} + +} // namespace detail +} // namespace groupby +} // namespace cudf diff --git a/cpp/src/groupby/sort/group_merge_m2.cu b/cpp/src/groupby/sort/group_merge_m2.cu index 4ffc29c2a56..8176c2ea712 100644 --- a/cpp/src/groupby/sort/group_merge_m2.cu +++ b/cpp/src/groupby/sort/group_merge_m2.cu @@ -14,59 +14,113 @@ * limitations under the License. */ +#include #include -#include +#include +#include +#include +#include #include +#include #include #include -#include +#include +#include namespace cudf { namespace groupby { namespace detail { + +namespace { +template +struct accumulate_fn { + ResultType M2_a; + ResultType mean_a; + size_type n_a; + + void __device__ operator()(ResultType const M2_b, + ResultType const mean_b, + size_type const n_b) noexcept + { + if (n_b == 0) { return; } + + auto const n_ab = n_a + n_b; + auto const delta = mean_b - mean_a; + M2_a += + M2_b + (delta * delta) * static_cast(n_a) * static_cast(n_b) / n_ab; + mean_a = (mean_a * n_a + mean_b * n_b) / n_ab; + n_a = n_ab; + } +}; + +} // namespace + std::unique_ptr group_merge_m2(column_view const& values, cudf::device_span group_offsets, size_type num_groups, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(values.type().id() == type_id::LIST, - "Input to `group_merge_lists` must be a lists column."); - CUDF_EXPECTS(!values.nullable(), - "Input to `group_merge_lists` must be a non-nullable lists column."); - - auto offsets_column = make_numeric_column( - data_type(type_to_id()), num_groups + 1, mask_state::UNALLOCATED, stream, mr); - - // Generate offsets of the output lists column by gathering from the provided group offsets and - // the input list offsets. - // - // For example: - // values = [[2, 1], [], [4, -1, -2], [], [, 4, ]] - // list_offsets = [0, 2, 2, 5, 5 8] - // group_offsets = [0, 3, 5] - // - // then, the output offsets_column is [0, 5, 8]. - // - thrust::gather(rmm::exec_policy(stream), - group_offsets.begin(), - group_offsets.end(), - lists_column_view(values).offsets_begin(), - offsets_column->mutable_view().template begin()); - - // The child column of the output lists column is just copied from the input column. - auto child_column = - std::make_unique(lists_column_view(values).get_sliced_child(stream), stream, mr); - - return make_lists_column(num_groups, - std::move(offsets_column), - std::move(child_column), - 0, - rmm::device_buffer{}, - stream, - mr); + CUDF_EXPECTS(values.type().id() == type_id::STRUCT, + "Input to `group_merge_m2` must be a structs column."); + CUDF_EXPECTS(values.num_children() == 3, + "Input to `group_merge_m2` must be a structs column having 3 children columns."); + + using ResultType = id_to_type; + static_assert( + std::is_same_v, ResultType>); + CUDF_EXPECTS(values.child(0).type().id() == type_to_id() && + values.child(1).type().id() == type_to_id() && + values.child(2).type().id() == type_id::INT32, + "Input to `group_merge_m2` must be a structs column having children columns " + "containing tuples of groupwise (M2_value, mean, valid_count)."); + + auto result = make_numeric_column( + data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); + + auto const M2_values = values.child(0); + auto const mean_values = values.child(1); + auto const count_valid = values.child(2); + auto const iter = thrust::make_counting_iterator(0); + auto validities = rmm::device_uvector(num_groups, stream); + + thrust::transform(rmm::exec_policy(stream), + iter, + iter + num_groups, + result->mutable_view().data(), + [d_M2 = M2_values.template begin(), + d_mean = mean_values.template begin(), + d_count = count_valid.template begin(), + d_offsets = group_offsets.begin(), + d_valid = validities.begin()] __device__(auto const group_idx) { + auto const start_idx = d_offsets[group_idx], + end_idx = d_offsets[group_idx + 1]; + + // Firstly, this stores (M2, mean, valid_count) of the first partial result. + // Then, merge all the following partial results into it. + auto accumulator = accumulate_fn{ + d_M2[start_idx], d_mean[start_idx], d_count[start_idx]}; + + for (auto idx = start_idx + 1; idx < end_idx; ++idx) { + auto const n_b = d_count[idx]; + auto const M2_b = n_b > 0 ? d_M2[idx] : ResultType{0}; + auto const mean_b = n_b > 0 ? d_mean[idx] : ResultType{0}; + accumulator(M2_b, mean_b, n_b); + } + + // If there are all nulls in the partial results (i.e., sum of valid counts is + // zero), then output a null. + d_valid[group_idx] = accumulator.n_a > 0; + return accumulator.n_a > 0 ? accumulator.M2_a : ResultType{0}; + }); + + auto [null_mask, null_count] = cudf::detail::valid_if( + validities.begin(), validities.end(), thrust::identity{}, stream, mr); + if (null_count > 0) { result->set_null_mask(null_mask, null_count); } + + return result; } } // namespace detail diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index 032973481a0..d3e043a0ac6 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -218,18 +218,19 @@ std::unique_ptr group_count_all(cudf::device_span group rmm::mr::device_memory_resource* mr); /** - * @brief Internal API to calculate groupwise sum of squares of differences from the current mean + * @brief Internal API to calculate groupwise sum of squares of differences from group means. * - * @code{.pseudo} - * values = [2, 1, 4, -1, -2, , 4, ] - * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] - * group_means = [2.333333, -1.5, 4.0, ] + * If there are only nulls in the group, the output value of that group will be `0`. * - * group_m2 = [4.666666, 1.0, 0, ] + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * group_means = [2.333333, -1.5, 4.0, ] + * group_m2(...) = [4.666666, 1.0, 0.0, ] * @endcode * - * @param values Grouped values to get M2 of - * @param group_means Pre-calculated groupwise MEAN + * @param values Grouped values to compute M2 values + * @param group_means Pre-computed groupwise MEAN * @param group_labels ID of group corresponding value in @p values belongs to * @param mr Device memory resource used to allocate the returned column's device memory * @param stream CUDA stream used for device memory operations and kernel launches. @@ -240,24 +241,6 @@ std::unique_ptr group_m2(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); -/** - * @brief group_var - * @param values - * @param group_means - * @param group_sizes - * @param group_labels - * @param ddof - * @param stream - * @param mr - * @return - */ -std::unique_ptr group_var_from_m2(column_view const& group_m2, - column_view const& group_sizes, - cudf::device_span group_labels, - size_type ddof, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); - /** * @brief Internal API to calculate groupwise variance * @@ -433,23 +416,21 @@ std::unique_ptr group_merge_lists(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); -// todo /** - * @brief Internal API to merge grouped m2 values corresponding to the same key. + * @brief Internal API to merge grouped M2 values corresponding to the same key. * - * @code{.pseudo} - * values = [[2, 1], [], [4, -1, -2], [], [, 4, ]] - * group_offsets = [0, 3, 5] - * num_groups = 2 + * The values of M2 are merged following the parallel algorithm described here: + * https://www.wikiwand.com/en/Algorithms_for_calculating_variance#/Parallel_algorithm * - * group_merge_lists(...) = [[2, 1, 4, -1, -2], [, 4, ]] - * @endcode + * Merging M2 values require accessing to partial M2 values and also groupwise means and group valid + * counts. Thus, the input to this aggregation need to be a structs column containing tuples of + * groupwise `(M2_value, mean, valid_count)`. * - * @param values Grouped values (lists column) to collect. + * @param values Grouped values (tuples of groupwise `(M2_value, mean, valid_count)`) to merge M2. * @param group_offsets Offsets of groups' starting points within @p values. * @param num_groups Number of groups. + * @param mr Device memory resource used to allocate the returned column's device memory * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. */ std::unique_ptr group_merge_m2(column_view const& values, cudf::device_span group_offsets, diff --git a/cpp/src/groupby/sort/group_std.cu b/cpp/src/groupby/sort/group_std.cu index c63db6c0c4e..9ebb516ee14 100644 --- a/cpp/src/groupby/sort/group_std.cu +++ b/cpp/src/groupby/sort/group_std.cu @@ -49,22 +49,16 @@ struct var_transform { { if (d_values.is_null(i)) return 0.0; - auto const x = static_cast(values_iter[i]); - auto const group_idx = d_group_labels[i]; + ResultType x = static_cast(values_iter[i]); - if (d_group_sizes) { // for variance/std - auto const group_size = d_group_sizes[group_idx]; + size_type group_idx = d_group_labels[i]; + size_type group_size = d_group_sizes[group_idx]; - // prevent divide by zero error - if (group_size == 0 or group_size - ddof <= 0) return 0.0; + // prevent divide by zero error + if (group_size == 0 or group_size - ddof <= 0) return 0.0; - auto const mean = d_means[group_idx]; - return (x - mean) * (x - mean) / (group_size - ddof); - } else { // for m2 - auto const mean = d_means[group_idx]; - auto const diff = x - mean; - return diff * diff; - } + ResultType mean = d_means[group_idx]; + return (x - mean) * (x - mean) / (group_size - ddof); } }; @@ -91,50 +85,6 @@ void reduce_by_key_fn(column_device_view const& values, d_result); } -struct m2_functor { - template - std::enable_if_t::value, std::unique_ptr> operator()( - column_view const& values, - column_view const& group_means, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - { -// Running this in debug build causes a runtime error: -// `reduce_by_key failed on 2nd step: invalid device function` -#if !defined(__CUDACC_DEBUG__) - using ResultType = cudf::detail::target_type_t; - - auto result = make_numeric_column( - data_type(type_to_id()), values.size(), mask_state::UNINITIALIZED, stream, mr); - - auto const values_dv_ptr = column_device_view::create(values, stream); - auto const values_dv = *values_dv_ptr; - - auto d_means = group_means.data(); - auto d_result = result->mutable_view().data(); - - if (!cudf::is_dictionary(values.type())) { - auto const values_iter = values_dv.begin(); - reduce_by_key_fn(values_dv, values_iter, group_labels, d_means, nullptr, 0, d_result, stream); - } else { - auto values_iter = cudf::dictionary::detail::make_dictionary_iterator(*values_dv_ptr); - reduce_by_key_fn(values_dv, values_iter, group_labels, d_means, nullptr, 0, d_result, stream); - } - - return result; -#else - CUDF_FAIL("Groupby m2 aggregation is not supported in debug build"); -#endif - } - - template - std::enable_if_t::value, std::unique_ptr> operator()(Args&&...) - { - CUDF_FAIL("Only numeric types are supported in groupby m2 aggregation"); - } -}; - struct var_functor { template std::enable_if_t::value, std::unique_ptr> operator()( @@ -202,30 +152,6 @@ struct var_functor { } // namespace -std::unique_ptr group_m2(column_view const& values, - column_view const& group_means, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto values_type = cudf::is_dictionary(values.type()) - ? dictionary_column_view(values).keys().type() - : values.type(); - - return type_dispatcher(values_type, m2_functor{}, values, group_means, group_labels, stream, mr); -} - -std::unique_ptr group_var_from_m2(column_view const& group_m2, - column_view const& group_sizes, - cudf::device_span group_labels, - size_type ddof, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - // TODO - return std::make_unique(); -} - std::unique_ptr group_var(column_view const& values, column_view const& group_means, column_view const& group_sizes, From 8c74cb44eb3e0cd12ce76942defadebcf21ad982 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 1 Jul 2021 17:11:02 -0600 Subject: [PATCH 05/19] Finish unit tests for M2 aggregation --- .../cudf/detail/aggregation/aggregation.hpp | 1 + cpp/src/groupby/sort/aggregate.cpp | 1 - cpp/src/groupby/sort/group_m2.cu | 2 - cpp/tests/CMakeLists.txt | 2 + cpp/tests/groupby/m2_tests.cpp | 223 ++++++++++++++++++ 5 files changed, 226 insertions(+), 3 deletions(-) create mode 100644 cpp/tests/groupby/m2_tests.cpp diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index d4176e1ab6c..c51eeb6bc7a 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -1117,6 +1117,7 @@ CUDA_HOST_DEVICE_CALLABLE decltype(auto) aggregation_dispatcher(aggregation::Kin return f.template operator()(std::forward(args)...); case aggregation::MEAN: return f.template operator()(std::forward(args)...); + case aggregation::M2: return f.template operator()(std::forward(args)...); case aggregation::VARIANCE: return f.template operator()(std::forward(args)...); case aggregation::STD: diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 78db3ad1f68..59c40c24198 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -500,7 +500,6 @@ void aggregate_result_functor::operator()(aggregation c * * The values of M2 are merged following the parallel algorithm described here: * https://www.wikiwand.com/en/Algorithms_for_calculating_variance#/Parallel_algorithm - * */ template <> void aggregate_result_functor::operator()(aggregation const& agg) diff --git a/cpp/src/groupby/sort/group_m2.cu b/cpp/src/groupby/sort/group_m2.cu index 2de57a94923..533a8b93d2e 100644 --- a/cpp/src/groupby/sort/group_m2.cu +++ b/cpp/src/groupby/sort/group_m2.cu @@ -14,8 +14,6 @@ * limitations under the License. */ -#include "group_reductions.hpp" - #include #include #include diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 4360b418e95..bb7c3233bc3 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -62,11 +62,13 @@ ConfigureTest(GROUPBY_TEST groupby/count_tests.cpp groupby/groups_tests.cpp groupby/keys_tests.cpp + groupby/m2_tests.cpp groupby/min_tests.cpp groupby/max_scan_tests.cpp groupby/max_tests.cpp groupby/mean_tests.cpp groupby/median_tests.cpp + groupby/merge_m2_tests.cpp groupby/merge_lists_tests.cpp groupby/merge_sets_tests.cpp groupby/min_scan_tests.cpp diff --git a/cpp/tests/groupby/m2_tests.cpp b/cpp/tests/groupby/m2_tests.cpp new file mode 100644 index 00000000000..656a5af09df --- /dev/null +++ b/cpp/tests/groupby/m2_tests.cpp @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +using namespace cudf::test::iterators; + +namespace { +constexpr bool print_all{false}; // For debugging +constexpr int32_t null{0}; // Mark for null elements +constexpr double NaN{std::numeric_limits::quiet_NaN()}; // Mark for NaN double elements + +template +using keys_col = cudf::test::fixed_width_column_wrapper; + +template +using vals_col = cudf::test::fixed_width_column_wrapper; + +template +using M2s_col = cudf::test::fixed_width_column_wrapper; + +auto compute_M2(cudf::column_view const& keys, cudf::column_view const& values) +{ + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].values = values; + requests[0].aggregations.emplace_back(cudf::make_m2_aggregation()); + + auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys})); + auto result = gb_obj.aggregate(requests); + return std::make_pair(std::move(result.first->release()[0]), + std::move(result.second[0].results[0])); +} +} // namespace + +template +struct GroupbyM2TypedTest : public cudf::test::BaseFixture { +}; + +using TestTypes = + cudf::test::Concat; +TYPED_TEST_SUITE(GroupbyM2TypedTest, TestTypes); + +TYPED_TEST(GroupbyM2TypedTest, EmptyInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + auto const keys = keys_col{}; + auto const vals = vals_col{}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_M2s = M2s_col{}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, AllNullKeysInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + auto const keys = keys_col{{1, 2, 3}, all_nulls()}; + auto const vals = vals_col{3, 4, 5}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{}; + auto const expected_M2s = M2s_col{}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, AllNullValuesInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + auto const keys = keys_col{1, 2, 3}; + auto const vals = vals_col{{3, 4, 5}, all_nulls()}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_M2s = M2s_col{{null, null, null}, all_nulls()}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, SimpleInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // key = 1: vals = [0, 3, 6] + // key = 2: vals = [1, 4, 5, 9] + // key = 3: vals = [2, 7, 8] + auto const keys = keys_col{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + auto const vals = vals_col{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{1, 2, 3}; + auto const expected_M2s = M2s_col{18.0, 32.75, 20.0 + 2.0 / 3.0}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, ValuesHaveNulls) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + auto const keys = keys_col{1, 2, 3, 4, 5, 2, 3, 2}; + auto const vals = vals_col{{0, null, 2, 3, null, 5, 6, 7}, nulls_at({1, 4})}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{1, 2, 3, 4, 5}; + auto const expected_M2s = M2s_col{{0.0, 2.0, 8.0, 0.0, 0.0 /*NULL*/}, null_at(4)}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, KeysAndValuesHaveNulls) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // key = 1: vals = [null, 3, 6] + // key = 2: vals = [1, 4, null, 9] + // key = 3: vals = [2, 8] + // key = 4: vals = [null] + auto const keys = keys_col{{1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4}, null_at(7)}; + auto const vals = vals_col{{null, 1, 2, 3, 4, null, 6, 7, 8, 9, null}, nulls_at({0, 5, 10})}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{{4.5, 32.0 + 2.0 / 3.0, 18.0, 0.0 /*NULL*/}, null_at(3)}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, InputHaveNullsAndNaNs) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // key = 1: vals = [0, 3, 6] + // key = 2: vals = [1, 4, NaN, 9] + // key = 3: vals = [null, 2, 8] + // key = 4: vals = [null, 10, NaN] + auto const keys = keys_col{{4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4}, null_at(9)}; + auto const vals = vals_col{ + {0.0 /*NULL*/, 0.0 /*NULL*/, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN}, + nulls_at({0, 1})}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{18.0, NaN, 18.0, NaN}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, SlicedColumnsInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // This test should compute M2 aggregation on the same dataset as the InputHaveNullsAndNaNs test. + // i.e.: + // + // key = 1: vals = [0, 3, 6] + // key = 2: vals = [1, 4, NaN, 9] + // key = 3: vals = [null, 2, 8] + // key = 4: vals = [null, 10, NaN] + + auto const keys_original = keys_col{{ + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, // discarded, don't care + 4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4, // used + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5 // discarded, don't care + }, + null_at(19)}; + auto const vals_original = vals_col{ + { + 3.0, 2.0, 5.0, 4.0, 6.0, 9.0, 1.0, 0.0, 1.0, 7.0, // discarded, don't care + 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN, // used + 9.0, 10.0, 11.0, 12.0, 0.0, 5.0, 1.0, 20.0, 19.0, 15.0 // discarded, don't care + }, + nulls_at({10, 11})}; + + auto const keys = cudf::slice(keys_original, {10, 24})[0]; + auto const vals = cudf::slice(vals_original, {10, 24})[0]; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{18.0, NaN, 18.0, NaN}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} From 44a3b16e419e60a1db4d97d4f7b6eb8ef0e69dc7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Jul 2021 13:32:55 -0600 Subject: [PATCH 06/19] Rewrite doxygen --- cpp/include/cudf/aggregation.hpp | 10 ++++++---- cpp/src/groupby/sort/aggregate.cpp | 13 +++++++++---- cpp/src/groupby/sort/group_reductions.hpp | 8 ++++++-- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 30da2976bda..e309c80a50a 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -309,7 +309,7 @@ std::unique_ptr make_udf_aggregation(udf_type type, * @brief Factory to create a MERGE_LISTS aggregation. * * Given a lists column, this aggregation merges all the lists corresponding to the same key value - * into one list. It is designed specificly to merge the partial results of multiple (distributed) + * into one list. It is designed specifically to merge the partial results of multiple (distributed) * groupby `COLLECT_LIST` aggregations into a final `COLLECT_LIST` result. As such, it requires the * input lists column to be non-nullable (the child column containing list entries is not subjected * to this requirement). @@ -324,7 +324,7 @@ std::unique_ptr make_merge_lists_aggregation(); * value into one list, then it drops all the duplicate entries in each lists, producing a lists * column containing non-repeated entries. * - * This aggregation is designed specificly to merge the partial results of multiple (distributed) + * This aggregation is designed specifically to merge the partial results of multiple (distributed) * groupby `COLLECT_LIST` or `COLLECT_SET` aggregations into a final `COLLECT_SET` result. As such, * it requires the input lists column to be non-nullable (the child column containing list entries * is not subjected to this requirement). @@ -345,9 +345,11 @@ std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal = nu /** * @brief Factory to create a MERGE_M2 aggregation * - * This aggregation is designed specificly to perform distributed computing of `M2` + * This aggregation is designed specifically to accommodate distributed computing of `M2` * aggregation. The partial results input to this aggregation is a structs column with children are - * columns generated by three groupby aggregations: `M2`, `COUNT_VALID`, and `MEAN`. + * columns generated by three groupby aggregations: `COUNT_VALID`, `MEAN`, and `M2`. The output of + * this aggregation is also a structs column containing the merged values of all those aggregations, + * which are all required for recursively merging of `M2` values. */ template std::unique_ptr make_merge_m2_aggregation(); diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 59c40c24198..4e60d8d3f7d 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -493,10 +493,15 @@ void aggregate_result_functor::operator()(aggregation c * @brief Perform merging for the M2 values that correspond to the same key value. * * The partial results input to this aggregation is a structs column with children are columns - * generated by three other groupby aggregations: `M2`, `MEAN`, `COUNT_VALID` performed - * on partitioned datasets. After distributedly computed, the results output from these aggregations - * are (vertically) concatenated before assembling into a structs column given as the values column - * for this aggregation. + * generated by three other groupby aggregations: `COUNT_VALID`, `MEAN`, and `M2` that were + * performed on partitioned datasets. After distributedly computed, the results output from these + * aggregations are (vertically) concatenated before assembling into a structs column given as the + * values column for this aggregation. + * + * For recursive merging of `M2` values, the aggregations values of all input (`COUNT_VALID`, + * `MEAN`, and `M2`) are all merged and stored in the output of this aggregation. As such, the + * output will be a structs column containing children columns of merged `COUNT_VALID`, `MEAN`, and + * `M2` values. * * The values of M2 are merged following the parallel algorithm described here: * https://www.wikiwand.com/en/Algorithms_for_calculating_variance#/Parallel_algorithm diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index d3e043a0ac6..2b88317fcc3 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -424,9 +424,13 @@ std::unique_ptr group_merge_lists(column_view const& values, * * Merging M2 values require accessing to partial M2 values and also groupwise means and group valid * counts. Thus, the input to this aggregation need to be a structs column containing tuples of - * groupwise `(M2_value, mean, valid_count)`. + * groupwise `(valid_count, mean, M2_value)`. * - * @param values Grouped values (tuples of groupwise `(M2_value, mean, valid_count)`) to merge M2. + * This aggregation not only merges the partial results of `M2` but also merged all the partial + * results of input aggregations (`COUNT_VALID`, `MEAN`, and `M2`). As such, the output will be a + * structs column containing children columns of merged `COUNT_VALID`, `MEAN`, and `M2` values. + * + * @param values Grouped values (tuples of groupwise `(valid_count, mean, M2_value)`) to merge. * @param group_offsets Offsets of groups' starting points within @p values. * @param num_groups Number of groups. * @param mr Device memory resource used to allocate the returned column's device memory From 1d54ef56e07e6e35d851793c5e75282bd14dd82b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Jul 2021 13:33:22 -0600 Subject: [PATCH 07/19] Fix `MERGE_M2`implementation --- .../cudf/detail/aggregation/aggregation.hpp | 4 +- cpp/src/groupby/sort/group_merge_m2.cu | 143 +++++++++++++----- 2 files changed, 103 insertions(+), 44 deletions(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index c51eeb6bc7a..10d9d8c1b92 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -1044,10 +1044,10 @@ struct target_type_impl { using type = cudf::list_view; }; -// Always use `double` for MERGE_M2 +// Always use struct for MERGE_M2 template struct target_type_impl { - using type = double; + using type = cudf::struct_view; }; /** diff --git a/cpp/src/groupby/sort/group_merge_m2.cu b/cpp/src/groupby/sort/group_merge_m2.cu index 8176c2ea712..3752b5b1d92 100644 --- a/cpp/src/groupby/sort/group_merge_m2.cu +++ b/cpp/src/groupby/sort/group_merge_m2.cu @@ -34,15 +34,20 @@ namespace groupby { namespace detail { namespace { +/** + * @brief Functor to accumulate (merge) all partial results corresponding to the same key into a + * final result storing in its member variables. It performs merging for the partial results of + * `COUNT_VALID`, `MEAN`, and `M2` at the same time. + */ template struct accumulate_fn { - ResultType M2_a; - ResultType mean_a; size_type n_a; + ResultType mean_a; + ResultType M2_a; - void __device__ operator()(ResultType const M2_b, + void __device__ operator()(size_type const n_b, ResultType const mean_b, - size_type const n_b) noexcept + ResultType const M2_b) noexcept { if (n_b == 0) { return; } @@ -55,6 +60,57 @@ struct accumulate_fn { } }; +/** + * @brief Functor to merge partial results of `COUNT_VALID`, `MEAN`, and `M2` aggregations + * for a given group (key) index. + */ +template +struct merge_fn { + size_type const* const d_offsets; + size_type const* const d_counts; + ResultType const* const d_means; + ResultType const* const d_M2s; + + auto __device__ operator()(size_type const group_idx) noexcept + { + auto const start_idx = d_offsets[group_idx], end_idx = d_offsets[group_idx + 1]; + + // This case should never happen, because all groups are non-empty due to the given input. + // Here just to make sure we cover this case. + if (start_idx == end_idx) { + return thrust::make_tuple(size_type{0}, ResultType{0}, ResultType{0}, int8_t{0}); + } + + // Firstly, this stores (valid_count, mean, M2) of the first partial result. + // Then, it accumulates (merges) the remaining partial results into it. + // Note that, if `n_a == 0` then `mean_a` and `M2_a` will be null. + // Thus, in such situations, we need to set zero for them before accumulating partial results. + auto const n_a = d_counts[start_idx]; + auto const mean_a = n_a > 0 ? d_means[start_idx] : ResultType{0}; + auto const M2_a = n_a > 0 ? d_M2s[start_idx] : ResultType{0}; + auto accumulator = accumulate_fn{n_a, mean_a, M2_a}; + + for (auto idx = start_idx + 1; idx < end_idx; ++idx) { + // if `n_b > 0` then we must have `d_means[idx] != null` and `d_M2s[idx] != null`. + // if `n_b == 0` then `mean_b` and `M2_b` will be null. + // In such situations, we need to set zero for them before merging (all zero partial results + // will not change the final output). + auto const n_b = d_counts[idx]; + auto const mean_b = n_b > 0 ? d_means[idx] : ResultType{0}; + auto const M2_b = n_b > 0 ? d_M2s[idx] : ResultType{0}; + accumulator(n_b, mean_b, M2_b); + } + + // If there are all nulls in the partial results (i.e., sum of valid counts is + // zero), then the output is null. + auto const is_valid = int8_t{accumulator.n_a > 0}; + + return accumulator.n_a > 0 + ? thrust::make_tuple(accumulator.n_a, accumulator.mean_a, accumulator.M2_a, is_valid) + : thrust::make_tuple(size_type{0}, ResultType{0}, ResultType{0}, is_valid); + } +}; + } // namespace std::unique_ptr group_merge_m2(column_view const& values, @@ -71,54 +127,57 @@ std::unique_ptr group_merge_m2(column_view const& values, using ResultType = id_to_type; static_assert( std::is_same_v, ResultType>); - CUDF_EXPECTS(values.child(0).type().id() == type_to_id() && + CUDF_EXPECTS(values.child(0).type().id() == type_id::INT32 && values.child(1).type().id() == type_to_id() && - values.child(2).type().id() == type_id::INT32, + values.child(2).type().id() == type_to_id(), "Input to `group_merge_m2` must be a structs column having children columns " "containing tuples of groupwise (M2_value, mean, valid_count)."); - auto result = make_numeric_column( + auto result_counts = make_numeric_column( + data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); + auto result_means = make_numeric_column( data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); - - auto const M2_values = values.child(0); + auto result_M2s = make_numeric_column( + data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); + auto validities = rmm::device_uvector(num_groups, stream); + + // Perform merging for all the aggregations. Their output (and their validity data) are written + // out concurrently through an output zip iterator. + using IteratorTuple = thrust::tuple; + using ZipIterator = thrust::zip_iterator; + auto const out_iter = + ZipIterator{thrust::make_tuple(result_counts->mutable_view().template data(), + result_means->mutable_view().template data(), + result_M2s->mutable_view().template data(), + validities.begin())}; + + auto const count_valid = values.child(0); auto const mean_values = values.child(1); - auto const count_valid = values.child(2); + auto const M2_values = values.child(2); auto const iter = thrust::make_counting_iterator(0); - auto validities = rmm::device_uvector(num_groups, stream); - - thrust::transform(rmm::exec_policy(stream), - iter, - iter + num_groups, - result->mutable_view().data(), - [d_M2 = M2_values.template begin(), - d_mean = mean_values.template begin(), - d_count = count_valid.template begin(), - d_offsets = group_offsets.begin(), - d_valid = validities.begin()] __device__(auto const group_idx) { - auto const start_idx = d_offsets[group_idx], - end_idx = d_offsets[group_idx + 1]; - - // Firstly, this stores (M2, mean, valid_count) of the first partial result. - // Then, merge all the following partial results into it. - auto accumulator = accumulate_fn{ - d_M2[start_idx], d_mean[start_idx], d_count[start_idx]}; - - for (auto idx = start_idx + 1; idx < end_idx; ++idx) { - auto const n_b = d_count[idx]; - auto const M2_b = n_b > 0 ? d_M2[idx] : ResultType{0}; - auto const mean_b = n_b > 0 ? d_mean[idx] : ResultType{0}; - accumulator(M2_b, mean_b, n_b); - } - - // If there are all nulls in the partial results (i.e., sum of valid counts is - // zero), then output a null. - d_valid[group_idx] = accumulator.n_a > 0; - return accumulator.n_a > 0 ? accumulator.M2_a : ResultType{0}; - }); + auto const fn = merge_fn{group_offsets.begin(), + count_valid.template begin(), + mean_values.template begin(), + M2_values.template begin()}; + thrust::transform(rmm::exec_policy(stream), iter, iter + num_groups, out_iter, fn); + + // Generate bitmask for the output. + // Only mean and M2 values can be nullable. auto [null_mask, null_count] = cudf::detail::valid_if( validities.begin(), validities.end(), thrust::identity{}, stream, mr); - if (null_count > 0) { result->set_null_mask(null_mask, null_count); } + if (null_count > 0) { + result_means->set_null_mask(null_mask, null_count); // copy null_mask + result_M2s->set_null_mask(std::move(null_mask), null_count); // take over null_mask + } + + // Output is a structs column containing the merged values of `COUNT_VALID`, `MEAN`, and `M2`. + std::vector> out_columns; + out_columns.emplace_back(std::move(result_counts)); + out_columns.emplace_back(std::move(result_means)); + out_columns.emplace_back(std::move(result_M2s)); + auto result = cudf::make_structs_column( + num_groups, std::move(out_columns), 0, rmm::device_buffer{0, stream, mr}, stream, mr); return result; } From 96dc79216948f2d57aea1e87e7b1dfa4697469a3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Jul 2021 13:33:35 -0600 Subject: [PATCH 08/19] Finish unit tests for `MERGE_M2` --- cpp/tests/groupby/merge_m2_tests.cpp | 360 +++++++++++++++++++++++++++ 1 file changed, 360 insertions(+) create mode 100644 cpp/tests/groupby/merge_m2_tests.cpp diff --git a/cpp/tests/groupby/merge_m2_tests.cpp b/cpp/tests/groupby/merge_m2_tests.cpp new file mode 100644 index 00000000000..36e1eb584cb --- /dev/null +++ b/cpp/tests/groupby/merge_m2_tests.cpp @@ -0,0 +1,360 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace cudf::test::iterators; + +namespace { +constexpr bool print_all{false}; // For debugging +constexpr int32_t null{0}; // Mark for null elements +constexpr double NaN{std::numeric_limits::quiet_NaN()}; // Mark for NaN double elements + +template +using keys_col = cudf::test::fixed_width_column_wrapper; + +template +using vals_col = cudf::test::fixed_width_column_wrapper; + +using counts_col = cudf::test::fixed_width_column_wrapper; + +template +using means_col = cudf::test::fixed_width_column_wrapper; + +template +using M2s_col = cudf::test::fixed_width_column_wrapper; + +using structs_col = cudf::test::structs_column_wrapper; +using vcol_views = std::vector; + +/** + * @brief Compute groupwise `COUNT_VALID`, `MEAN`, `M2` aggregations for the given values + * columns. + * @return A pair of unique keys column and a structs column containing the computed values of + * `COUNT_VALID`, `MEAN`, `M2`. + */ +auto compute_partial_results(cudf::column_view const& keys, cudf::column_view const& values) +{ + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].values = values; + requests[0].aggregations.emplace_back(cudf::make_count_aggregation()); + requests[0].aggregations.emplace_back(cudf::make_mean_aggregation()); + requests[0].aggregations.emplace_back(cudf::make_m2_aggregation()); + + auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys})); + auto [out_keys, out_results] = gb_obj.aggregate(requests); + + auto const num_output_rows = out_keys->num_rows(); + return std::make_pair( + std::move(out_keys->release()[0]), + cudf::make_structs_column( + num_output_rows, std::move(out_results[0].results), 0, rmm::device_buffer{})); +} + +/** + * @brief Perform merging for partial results of M2 aggregations. + * + * @return A pair of unique keys column and a structs column containing the merged values of + * `COUNT_VALID`, `MEAN`, `M2`. + */ +auto merge_M2(vcol_views const& keys_cols, vcol_views const& values_cols) +{ + // Append all the keys and values together. + auto const keys = cudf::concatenate(keys_cols); + auto const values = cudf::concatenate(values_cols); + + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].values = *values; + requests[0].aggregations.emplace_back(cudf::make_merge_m2_aggregation()); + + auto gb_obj = cudf::groupby::groupby(cudf::table_view({*keys})); + auto result = gb_obj.aggregate(requests); + return std::make_pair(std::move(result.first->release()[0]), + std::move(result.second[0].results[0])); +} +} // namespace + +template +struct GroupbyMergeM2TypedTest : public cudf::test::BaseFixture { +}; + +using TestTypes = + cudf::test::Concat; +TYPED_TEST_SUITE(GroupbyMergeM2TypedTest, TestTypes); + +TYPED_TEST(GroupbyMergeM2TypedTest, InvalidInput) +{ + using T = TypeParam; + + auto const keys = keys_col{1, 2, 3}; + + // The input column must be a structs column. + { + auto const values = keys_col{1, 2, 3}; + EXPECT_THROW(merge_M2({keys}, {values}), cudf::logic_error); + } + + // The input column must be a structs column having types (int32_t, double, double). + { + auto vals1 = keys_col{1, 2, 3}; + auto vals2 = keys_col{1, 2, 3}; + auto vals3 = keys_col{1, 2, 3}; + auto const vals = structs_col{vals1, vals2, vals3}; + EXPECT_THROW(merge_M2({keys}, {vals}), cudf::logic_error); + } +} + +TYPED_TEST(GroupbyMergeM2TypedTest, EmptyInput) +{ + using T = TypeParam; + using M2_t = cudf::detail::target_type_t; + using mean_t = cudf::detail::target_type_t; + + auto const keys = keys_col{}; + auto vals_count = counts_col{}; + auto vals_mean = means_col{}; + auto vals_M2 = M2s_col{}; + auto const vals = structs_col{vals_count, vals_mean, vals_M2}; + + auto const [out_keys, out_vals] = merge_M2({keys}, {vals}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(vals, *out_vals, print_all); +} + +TYPED_TEST(GroupbyMergeM2TypedTest, SimpleInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // Full dataset: + // + // keys = [1, 2, 3, 1, 2, 2, 1, 3, 3, 2] + // vals = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + // + // key = 1: vals = [0, 3, 6] + // key = 2: vals = [1, 4, 5, 9] + // key = 3: vals = [2, 7, 8] + + // Partitioned datasets: + auto const keys1 = keys_col{1, 2, 3}; + auto const keys2 = keys_col{1, 2, 2}; + auto const keys3 = keys_col{1, 3, 3, 2}; + + auto const vals1 = vals_col{0, 1, 2}; + auto const vals2 = vals_col{3, 4, 5}; + auto const vals3 = vals_col{6, 7, 8, 9}; + + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. + // The partial results are also assembled into a structs column. + auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); + auto const [out2_keys, out2_vals] = compute_partial_results(keys2, vals2); + auto const [out3_keys, out3_vals] = compute_partial_results(keys3, vals3); + + // Merge the partial results to the final results. + // Merging can be done in just one merge step, or in multiple steps. + auto const [out4_keys, out4_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out3_keys, *out3_keys}, vcol_views{*out3_vals, *out4_vals}); + + // Get the final M2 values. + auto const out_M2s = final_vals->child(2); + + auto const expected_keys = keys_col{1, 2, 3}; + auto const expected_M2s = M2s_col{18.0, 32.75, 20.0 + 2.0 / 3.0}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); +} + +TYPED_TEST(GroupbyMergeM2TypedTest, InputHasNulls) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // Full dataset: + // + // keys = [1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4] + // vals = [null, 1, 2, 3, 4, null, 6, 7, 8, 9, null] + // + // key = 1: vals = [null, 3, 6] + // key = 2: vals = [1, 4, null, 9] + // key = 3: vals = [2, 8] + // key = 4: vals = [null] + + // Partitioned datasets: + auto const keys1 = keys_col{1, 2, 3, 1}; + auto const keys2 = keys_col{{2, 2, 1, null}, null_at(3)}; + auto const keys3 = keys_col{3, 2, 4}; + + auto const vals1 = vals_col{{null, 1, 2, 3}, null_at(0)}; + auto const vals2 = vals_col{{4, null, 6, 7}, null_at(1)}; + auto const vals3 = vals_col{{8, 9, null}, null_at(2)}; + + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. + // The partial results are also assembled into a structs column. + auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); + auto const [out2_keys, out2_vals] = compute_partial_results(keys2, vals2); + auto const [out3_keys, out3_vals] = compute_partial_results(keys3, vals3); + + // Merge the partial results to the final results. + // Merging can be done in just one merge step, or in multiple steps. + auto const [out4_keys, out4_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); + + // Get the final M2 values. + auto const out_M2s = final_vals->child(2); + + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{{4.5, 32.0 + 2.0 / 3.0, 18.0, 0.0 /*NULL*/}, null_at(3)}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); +} + +TYPED_TEST(GroupbyMergeM2TypedTest, InputHaveNullsAndNaNs) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // Full dataset: + // + // keys = [4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4] + // vals = [null, null, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN] + // + // key = 1: vals = [0, 3, 6] + // key = 2: vals = [1, 4, NaN, 9] + // key = 3: vals = [null, 2, 8] + // key = 4: vals = [null, 10, NaN] + + // Partitioned datasets: + auto const keys1 = keys_col{4, 3, 1, 2}; + auto const keys2 = keys_col{3, 1, 2}; + auto const keys3 = keys_col{{2, 1, null}, null_at(2)}; + auto const keys4 = keys_col{3, 2, 4, 4}; + + auto const vals1 = vals_col{{0.0 /*NULL*/, 0.0 /*NULL*/, 0.0, 1.0}, nulls_at({0, 1})}; + auto const vals2 = vals_col{2.0, 3.0, 4.0}; + auto const vals3 = vals_col{NaN, 6.0, 7.0}; + auto const vals4 = vals_col{8.0, 9.0, 10.0, NaN}; + + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. + // The partial results are also assembled into a structs column. + auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); + auto const [out2_keys, out2_vals] = compute_partial_results(keys2, vals2); + auto const [out3_keys, out3_vals] = compute_partial_results(keys3, vals3); + auto const [out4_keys, out4_vals] = compute_partial_results(keys4, vals4); + + // Merge the partial results to the final results. + // Merging can be done in just one merge step, or in multiple steps. + auto const [out5_keys, out5_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [out6_keys, out6_vals] = + merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); + + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out5_keys, *out6_keys}, vcol_views{*out5_vals, *out6_vals}); + + // Get the final M2 values. + auto const out_M2s = final_vals->child(2); + + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{18.0, NaN, 18.0, NaN}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); +} + +TYPED_TEST(GroupbyMergeM2TypedTest, SlicedColumnsInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // This test should compute M2 aggregation on the same dataset as the InputHaveNullsAndNaNs test. + // i.e.: + // + // keys = [4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4] + // vals = [null, null, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN] + // + // key = 1: vals = [0, 3, 6] + // key = 2: vals = [1, 4, NaN, 9] + // key = 3: vals = [null, 2, 8] + // key = 4: vals = [null, 10, NaN] + + auto const keys_original = keys_col{{ + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, // discarded, don't care + 4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4, // used + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5 // discarded, don't care + }, + null_at(19)}; + auto const vals_original = vals_col{ + { + 3.0, 2.0, 5.0, 4.0, 6.0, 9.0, 1.0, 0.0, 1.0, 7.0, // discarded, don't care + 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN, // used + 9.0, 10.0, 11.0, 12.0, 0.0, 5.0, 1.0, 20.0, 19.0, 15.0 // discarded, don't care + }, + nulls_at({10, 11})}; + + // Partitioned datasets, taken from the original dataset in the range [10, 24). + auto const keys1 = cudf::slice(keys_original, {10, 14})[0]; // {4, 3, 1, 2} + auto const keys2 = cudf::slice(keys_original, {14, 17})[0]; // {3, 1, 2} + auto const keys3 = cudf::slice(keys_original, {17, 20})[0]; // {2, 1, null} + auto const keys4 = cudf::slice(keys_original, {20, 24})[0]; // {3, 2, 4, 4} + + auto const vals1 = cudf::slice(vals_original, {10, 14})[0]; // {null, null, 0.0, 1.0} + auto const vals2 = cudf::slice(vals_original, {14, 17})[0]; // {2.0, 3.0, 4.0} + auto const vals3 = cudf::slice(vals_original, {17, 20})[0]; // {NaN, 6.0, 7.0} + auto const vals4 = cudf::slice(vals_original, {20, 24})[0]; // {8.0, 9.0, 10.0, NaN} + + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. + // The partial results are also assembled into a structs column. + auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); + auto const [out2_keys, out2_vals] = compute_partial_results(keys2, vals2); + auto const [out3_keys, out3_vals] = compute_partial_results(keys3, vals3); + auto const [out4_keys, out4_vals] = compute_partial_results(keys4, vals4); + + // Merge the partial results to the final results. + // Merging can be done in just one merge step, or in multiple steps. + auto const [out5_keys, out5_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [out6_keys, out6_vals] = + merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); + + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out5_keys, *out6_keys}, vcol_views{*out5_vals, *out6_vals}); + + // Get the final M2 values. + auto const out_M2s = final_vals->child(2); + + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{18.0, NaN, 18.0, NaN}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); +} From 99ab9a15d927cb8f2596470ed522eb977b5bb712 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Jul 2021 14:02:21 -0600 Subject: [PATCH 09/19] Fix copyright header --- cpp/src/groupby/sort/group_m2.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/groupby/sort/group_m2.cu b/cpp/src/groupby/sort/group_m2.cu index 533a8b93d2e..f71111bf027 100644 --- a/cpp/src/groupby/sort/group_m2.cu +++ b/cpp/src/groupby/sort/group_m2.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 0ac08fe55631fe10c414dc29b2f9756094c07e22 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 2 Jul 2021 14:12:30 -0600 Subject: [PATCH 10/19] Rename functor --- cpp/src/groupby/sort/group_m2.cu | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/groupby/sort/group_m2.cu b/cpp/src/groupby/sort/group_m2.cu index f71111bf027..71260acc165 100644 --- a/cpp/src/groupby/sort/group_m2.cu +++ b/cpp/src/groupby/sort/group_m2.cu @@ -55,12 +55,12 @@ struct m2_transform { }; template -void reduce_by_key_fn(column_device_view const& values, - Iterator values_iter, - cudf::device_span group_labels, - ResultType const* d_means, - ResultType* d_result, - rmm::cuda_stream_view stream) +void compute_m2_fn(column_device_view const& values, + Iterator values_iter, + cudf::device_span group_labels, + ResultType const* d_means, + ResultType* d_result, + rmm::cuda_stream_view stream) { auto const var_iter = cudf::detail::make_counting_transform_iterator( size_type{0}, @@ -95,11 +95,11 @@ struct m2_functor { if (!cudf::is_dictionary(values.type())) { auto const values_iter = d_values.begin(); - reduce_by_key_fn(d_values, values_iter, group_labels, d_means, d_result, stream); + compute_m2_fn(d_values, values_iter, group_labels, d_means, d_result, stream); } else { auto const values_iter = cudf::dictionary::detail::make_dictionary_iterator(*values_dv_ptr); - reduce_by_key_fn(d_values, values_iter, group_labels, d_means, d_result, stream); + compute_m2_fn(d_values, values_iter, group_labels, d_means, d_result, stream); } // M2 column values should have the same bitmask as means's. From 7863b6f18081c283bb99be51f4412c065cdca3c6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 6 Jul 2021 10:07:14 -0600 Subject: [PATCH 11/19] Rewrite the merge functor, adding `partial_result` struct to store intermediate merging result --- cpp/src/groupby/sort/group_merge_m2.cu | 109 +++++++++++++------------ 1 file changed, 58 insertions(+), 51 deletions(-) diff --git a/cpp/src/groupby/sort/group_merge_m2.cu b/cpp/src/groupby/sort/group_merge_m2.cu index 3752b5b1d92..120fe41a314 100644 --- a/cpp/src/groupby/sort/group_merge_m2.cu +++ b/cpp/src/groupby/sort/group_merge_m2.cu @@ -32,31 +32,38 @@ namespace cudf { namespace groupby { namespace detail { - namespace { +/** + * @brief Struct to store partial results for merging. + */ +template +struct partial_result { + size_type count; + result_type mean; + result_type M2; +}; + /** * @brief Functor to accumulate (merge) all partial results corresponding to the same key into a - * final result storing in its member variables. It performs merging for the partial results of + * final result storing in a member variable. It performs merging for the partial results of * `COUNT_VALID`, `MEAN`, and `M2` at the same time. */ -template +template struct accumulate_fn { - size_type n_a; - ResultType mean_a; - ResultType M2_a; + partial_result merge_vals; - void __device__ operator()(size_type const n_b, - ResultType const mean_b, - ResultType const M2_b) noexcept + void __device__ operator()(partial_result const& partial_vals) noexcept { - if (n_b == 0) { return; } - - auto const n_ab = n_a + n_b; - auto const delta = mean_b - mean_a; - M2_a += - M2_b + (delta * delta) * static_cast(n_a) * static_cast(n_b) / n_ab; - mean_a = (mean_a * n_a + mean_b * n_b) / n_ab; - n_a = n_ab; + if (partial_vals.count == 0) { return; } + + auto const n_ab = merge_vals.count + partial_vals.count; + auto const delta = partial_vals.mean - merge_vals.mean; + merge_vals.M2 += partial_vals.M2 + (delta * delta) * + static_cast(merge_vals.count) * + static_cast(partial_vals.count) / n_ab; + merge_vals.mean = + (merge_vals.mean * merge_vals.count + partial_vals.mean * partial_vals.count) / n_ab; + merge_vals.count = n_ab; } }; @@ -64,50 +71,50 @@ struct accumulate_fn { * @brief Functor to merge partial results of `COUNT_VALID`, `MEAN`, and `M2` aggregations * for a given group (key) index. */ -template +template struct merge_fn { size_type const* const d_offsets; size_type const* const d_counts; - ResultType const* const d_means; - ResultType const* const d_M2s; + result_type const* const d_means; + result_type const* const d_M2s; auto __device__ operator()(size_type const group_idx) noexcept { auto const start_idx = d_offsets[group_idx], end_idx = d_offsets[group_idx + 1]; - // This case should never happen, because all groups are non-empty due to the given input. - // Here just to make sure we cover this case. + // This case should never happen, because all groups are non-empty as the results of + // aggregation. Here we just to make sure we cover this case. if (start_idx == end_idx) { - return thrust::make_tuple(size_type{0}, ResultType{0}, ResultType{0}, int8_t{0}); - } - - // Firstly, this stores (valid_count, mean, M2) of the first partial result. - // Then, it accumulates (merges) the remaining partial results into it. - // Note that, if `n_a == 0` then `mean_a` and `M2_a` will be null. - // Thus, in such situations, we need to set zero for them before accumulating partial results. - auto const n_a = d_counts[start_idx]; - auto const mean_a = n_a > 0 ? d_means[start_idx] : ResultType{0}; - auto const M2_a = n_a > 0 ? d_M2s[start_idx] : ResultType{0}; - auto accumulator = accumulate_fn{n_a, mean_a, M2_a}; - - for (auto idx = start_idx + 1; idx < end_idx; ++idx) { - // if `n_b > 0` then we must have `d_means[idx] != null` and `d_M2s[idx] != null`. - // if `n_b == 0` then `mean_b` and `M2_b` will be null. - // In such situations, we need to set zero for them before merging (all zero partial results - // will not change the final output). - auto const n_b = d_counts[idx]; - auto const mean_b = n_b > 0 ? d_means[idx] : ResultType{0}; - auto const M2_b = n_b > 0 ? d_M2s[idx] : ResultType{0}; - accumulator(n_b, mean_b, M2_b); + return thrust::make_tuple(size_type{0}, result_type{0}, result_type{0}, int8_t{0}); } - // If there are all nulls in the partial results (i.e., sum of valid counts is - // zero), then the output is null. - auto const is_valid = int8_t{accumulator.n_a > 0}; - - return accumulator.n_a > 0 - ? thrust::make_tuple(accumulator.n_a, accumulator.mean_a, accumulator.M2_a, is_valid) - : thrust::make_tuple(size_type{0}, ResultType{0}, ResultType{0}, is_valid); + // If `(n = d_counts[idx]) > 0` then `d_means[idx] != null` and `d_M2s[idx] != null`. + // Otherwise (`n == 0`), these value (mean and M2) will always be nulls. + // In such cases, reading `mean` and `M2` from memory will return garbage values. + // By setting these values to zero when `n == 0`, we can safely merge the all-zero tuple without + // affecting the final result. + auto get_partial_result = [&] __device__(size_type idx) { + { + auto const n = d_counts[idx]; + return n > 0 ? partial_result{n, d_means[idx], d_M2s[idx]} + : partial_result{size_type{0}, result_type{0}, result_type{0}}; + }; + }; + + // Firstly, store tuple(count, mean, M2) of the first partial result in an accumulator. + auto accumulator = accumulate_fn{get_partial_result(start_idx)}; + + // Then, accumulate (merge) the remaining partial results into that accumulator. + for (auto idx = start_idx + 1; idx < end_idx; ++idx) { accumulator(get_partial_result(idx)); } + + // Get the final result after merging. + auto const& merge_vals = accumulator.merge_vals; + + // If there are all nulls in the partial results (i.e., sum of all valid counts is + // zero), then the output is a null. + auto const is_valid = int8_t{merge_vals.count > 0}; + + return thrust::make_tuple(merge_vals.count, merge_vals.mean, merge_vals.M2, is_valid); } }; @@ -163,7 +170,7 @@ std::unique_ptr group_merge_m2(column_view const& values, thrust::transform(rmm::exec_policy(stream), iter, iter + num_groups, out_iter, fn); // Generate bitmask for the output. - // Only mean and M2 values can be nullable. + // Only mean and M2 values can be nullable. Count column must be non-nullable. auto [null_mask, null_count] = cudf::detail::valid_if( validities.begin(), validities.end(), thrust::identity{}, stream, mr); if (null_count > 0) { From 0ab2c18bcd6ad1db3c570082604cdbad6e4dc4cf Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 6 Jul 2021 10:07:22 -0600 Subject: [PATCH 12/19] Rewrite doxygen --- cpp/include/cudf/aggregation.hpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index e309c80a50a..9ab5739b0b6 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -164,9 +164,11 @@ std::unique_ptr make_mean_aggregation(); /** * @brief Factory to create a M2 aggregation * - * A M2 aggregation is groupwise sum of squares of differences from the group mean. It produces the - * intermediate values that are used to compute variance and standard deviation in distributed - * computing. + * A M2 aggregation is groupwise sum of squares of differences from the group mean. That is: + * `M2(group) = SUM((x - MEAN(group)) * (x - MEAN(group)), for all x in group)`. + * + * This aggregation produces the intermediate values that are used to compute variance and standard + * deviation in distributed computing. */ template std::unique_ptr make_m2_aggregation(); @@ -350,6 +352,10 @@ std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal = nu * columns generated by three groupby aggregations: `COUNT_VALID`, `MEAN`, and `M2`. The output of * this aggregation is also a structs column containing the merged values of all those aggregations, * which are all required for recursively merging of `M2` values. + * + * The partial results of `M2` aggregation are expected to be all non-negative numbers, since they + * are expected to be output from `M2` aggregation. However, this will not be checked due to + * performance reason. */ template std::unique_ptr make_merge_m2_aggregation(); From 90d984a9166dba44d6cc79f579ed2b3b10f4aa16 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 6 Jul 2021 10:08:11 -0600 Subject: [PATCH 13/19] Add a unit test when the input column column is a structs column with number of children is not 3 --- cpp/tests/groupby/merge_m2_tests.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cpp/tests/groupby/merge_m2_tests.cpp b/cpp/tests/groupby/merge_m2_tests.cpp index 36e1eb584cb..1af6f73f90b 100644 --- a/cpp/tests/groupby/merge_m2_tests.cpp +++ b/cpp/tests/groupby/merge_m2_tests.cpp @@ -118,6 +118,14 @@ TYPED_TEST(GroupbyMergeM2TypedTest, InvalidInput) EXPECT_THROW(merge_M2({keys}, {values}), cudf::logic_error); } + // The input column must be a structs column having 3 children. + { + auto vals1 = keys_col{1, 2, 3}; + auto vals2 = vals_col{1.0, 2.0, 3.0}; + auto const vals = structs_col{vals1, vals2}; + EXPECT_THROW(merge_M2({keys}, {vals}), cudf::logic_error); + } + // The input column must be a structs column having types (int32_t, double, double). { auto vals1 = keys_col{1, 2, 3}; From 9967ec27863e53a2ea5fb55292e7c1c91ba90bd9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 6 Jul 2021 10:20:16 -0600 Subject: [PATCH 14/19] Add unit tests for the cases when the input values column has negative numbers --- cpp/tests/groupby/m2_tests.cpp | 23 +++++++++++-- cpp/tests/groupby/merge_m2_tests.cpp | 50 ++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 4 deletions(-) diff --git a/cpp/tests/groupby/m2_tests.cpp b/cpp/tests/groupby/m2_tests.cpp index 656a5af09df..e9a3ed6119a 100644 --- a/cpp/tests/groupby/m2_tests.cpp +++ b/cpp/tests/groupby/m2_tests.cpp @@ -57,8 +57,8 @@ template struct GroupbyM2TypedTest : public cudf::test::BaseFixture { }; -using TestTypes = - cudf::test::Concat; +using TestTypes = cudf::test::Concat, + cudf::test::FloatingPointTypes>; TYPED_TEST_SUITE(GroupbyM2TypedTest, TestTypes); TYPED_TEST(GroupbyM2TypedTest, EmptyInput) @@ -126,6 +126,25 @@ TYPED_TEST(GroupbyM2TypedTest, SimpleInput) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); } +TYPED_TEST(GroupbyM2TypedTest, SimpleInputHavingNegativeValues) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // key = 1: vals = [0, 3, -6] + // key = 2: vals = [1, -4, -5, 9] + // key = 3: vals = [-2, 7, -8] + auto const keys = keys_col{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + auto const vals = vals_col{0, 1, -2, 3, -4, -5, -6, 7, -8, 9}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{1, 2, 3}; + auto const expected_M2s = M2s_col{42.0, 122.75, 114.0}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + TYPED_TEST(GroupbyM2TypedTest, ValuesHaveNulls) { using T = TypeParam; diff --git a/cpp/tests/groupby/merge_m2_tests.cpp b/cpp/tests/groupby/merge_m2_tests.cpp index 1af6f73f90b..92a17bfa7ba 100644 --- a/cpp/tests/groupby/merge_m2_tests.cpp +++ b/cpp/tests/groupby/merge_m2_tests.cpp @@ -102,8 +102,8 @@ template struct GroupbyMergeM2TypedTest : public cudf::test::BaseFixture { }; -using TestTypes = - cudf::test::Concat; +using TestTypes = cudf::test::Concat, + cudf::test::FloatingPointTypes>; TYPED_TEST_SUITE(GroupbyMergeM2TypedTest, TestTypes); TYPED_TEST(GroupbyMergeM2TypedTest, InvalidInput) @@ -199,6 +199,52 @@ TYPED_TEST(GroupbyMergeM2TypedTest, SimpleInput) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); } +TYPED_TEST(GroupbyMergeM2TypedTest, SimpleInputHavingNegativeValues) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // Full dataset: + // + // keys = [1, 2, 3, 1, 2, 2, 1, 3, 3, 2] + // vals = [0, 1, -2, 3, -4, -5, -6, 7, -8, 9] + // + // key = 1: vals = [0, 3, -6] + // key = 2: vals = [1, -4, -5, 9] + // key = 3: vals = [-2, 7, -8] + + // Partitioned datasets: + auto const keys1 = keys_col{1, 2, 3}; + auto const keys2 = keys_col{1, 2, 2}; + auto const keys3 = keys_col{1, 3, 3, 2}; + + auto const vals1 = vals_col{0, 1, -2}; + auto const vals2 = vals_col{3, -4, -5}; + auto const vals3 = vals_col{-6, 7, -8, 9}; + + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. + // The partial results are also assembled into a structs column. + auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); + auto const [out2_keys, out2_vals] = compute_partial_results(keys2, vals2); + auto const [out3_keys, out3_vals] = compute_partial_results(keys3, vals3); + + // Merge the partial results to the final results. + // Merging can be done in just one merge step, or in multiple steps. + auto const [out4_keys, out4_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out3_keys, *out3_keys}, vcol_views{*out3_vals, *out4_vals}); + + // Get the final M2 values. + auto const out_M2s = final_vals->child(2); + + auto const expected_keys = keys_col{1, 2, 3}; + auto const expected_M2s = M2s_col{42.0, 122.75, 114.0}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); +} + TYPED_TEST(GroupbyMergeM2TypedTest, InputHasNulls) { using T = TypeParam; From f3d0a3ca1738b7254d8a71ac4985e02ad26b160c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 6 Jul 2021 10:29:19 -0600 Subject: [PATCH 15/19] Fix comments --- cpp/tests/groupby/m2_tests.cpp | 19 ++++++++++--------- cpp/tests/groupby/merge_m2_tests.cpp | 19 ++++++++++--------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/cpp/tests/groupby/m2_tests.cpp b/cpp/tests/groupby/m2_tests.cpp index e9a3ed6119a..7611dce2271 100644 --- a/cpp/tests/groupby/m2_tests.cpp +++ b/cpp/tests/groupby/m2_tests.cpp @@ -216,17 +216,18 @@ TYPED_TEST(GroupbyM2TypedTest, SlicedColumnsInput) // key = 3: vals = [null, 2, 8] // key = 4: vals = [null, 10, NaN] - auto const keys_original = keys_col{{ - 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, // discarded, don't care - 4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4, // used - 1, 2, 3, 4, 5, 1, 2, 3, 4, 5 // discarded, don't care - }, - null_at(19)}; + auto const keys_original = + keys_col{{ + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, // will not use, don't care + 4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4, // use this + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5 // will not use, don't care + }, + null_at(19)}; auto const vals_original = vals_col{ { - 3.0, 2.0, 5.0, 4.0, 6.0, 9.0, 1.0, 0.0, 1.0, 7.0, // discarded, don't care - 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN, // used - 9.0, 10.0, 11.0, 12.0, 0.0, 5.0, 1.0, 20.0, 19.0, 15.0 // discarded, don't care + 3.0, 2.0, 5.0, 4.0, 6.0, 9.0, 1.0, 0.0, 1.0, 7.0, // will not use, don't care + 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN, // use this + 9.0, 10.0, 11.0, 12.0, 0.0, 5.0, 1.0, 20.0, 19.0, 15.0 // will not use, don't care }, nulls_at({10, 11})}; diff --git a/cpp/tests/groupby/merge_m2_tests.cpp b/cpp/tests/groupby/merge_m2_tests.cpp index 92a17bfa7ba..d34bbf3d60f 100644 --- a/cpp/tests/groupby/merge_m2_tests.cpp +++ b/cpp/tests/groupby/merge_m2_tests.cpp @@ -361,17 +361,18 @@ TYPED_TEST(GroupbyMergeM2TypedTest, SlicedColumnsInput) // key = 3: vals = [null, 2, 8] // key = 4: vals = [null, 10, NaN] - auto const keys_original = keys_col{{ - 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, // discarded, don't care - 4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4, // used - 1, 2, 3, 4, 5, 1, 2, 3, 4, 5 // discarded, don't care - }, - null_at(19)}; + auto const keys_original = + keys_col{{ + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, // will not use, don't care + 4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4, // use this + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5 // will not use, don't care + }, + null_at(19)}; auto const vals_original = vals_col{ { - 3.0, 2.0, 5.0, 4.0, 6.0, 9.0, 1.0, 0.0, 1.0, 7.0, // discarded, don't care - 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN, // used - 9.0, 10.0, 11.0, 12.0, 0.0, 5.0, 1.0, 20.0, 19.0, 15.0 // discarded, don't care + 3.0, 2.0, 5.0, 4.0, 6.0, 9.0, 1.0, 0.0, 1.0, 7.0, // will not use, don't care + 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN, // use this + 9.0, 10.0, 11.0, 12.0, 0.0, 5.0, 1.0, 20.0, 19.0, 15.0 // will not use, don't care }, nulls_at({10, 11})}; From bbb961fd0c5c54b9be777392fecb79c91634980e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 6 Jul 2021 10:52:21 -0600 Subject: [PATCH 16/19] Rewrite unit tests, separating multiple steps merging and one step merging --- cpp/tests/groupby/merge_m2_tests.cpp | 177 ++++++++++++++++++--------- 1 file changed, 121 insertions(+), 56 deletions(-) diff --git a/cpp/tests/groupby/merge_m2_tests.cpp b/cpp/tests/groupby/merge_m2_tests.cpp index d34bbf3d60f..2f5f5f3b351 100644 --- a/cpp/tests/groupby/merge_m2_tests.cpp +++ b/cpp/tests/groupby/merge_m2_tests.cpp @@ -176,6 +176,10 @@ TYPED_TEST(GroupbyMergeM2TypedTest, SimpleInput) auto const vals2 = vals_col{3, 4, 5}; auto const vals3 = vals_col{6, 7, 8, 9}; + // The expected results to validate. + auto const expected_keys = keys_col{1, 2, 3}; + auto const expected_M2s = M2s_col{18.0, 32.75, 20.0 + 2.0 / 3.0}; + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. // The partial results are also assembled into a structs column. auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); @@ -184,19 +188,28 @@ TYPED_TEST(GroupbyMergeM2TypedTest, SimpleInput) // Merge the partial results to the final results. // Merging can be done in just one merge step, or in multiple steps. - auto const [out4_keys, out4_vals] = - merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); - auto const [final_keys, final_vals] = - merge_M2(vcol_views{*out3_keys, *out3_keys}, vcol_views{*out3_vals, *out4_vals}); - // Get the final M2 values. - auto const out_M2s = final_vals->child(2); + // Multiple steps merging: + { + auto const [out4_keys, out4_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } - auto const expected_keys = keys_col{1, 2, 3}; - auto const expected_M2s = M2s_col{18.0, 32.75, 20.0 + 2.0 / 3.0}; + // One step merging: + { + auto const [final_keys, final_vals] = merge_M2(vcol_views{*out1_keys, *out2_keys, *out3_keys}, + vcol_views{*out1_vals, *out2_vals, *out3_vals}); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } } TYPED_TEST(GroupbyMergeM2TypedTest, SimpleInputHavingNegativeValues) @@ -222,6 +235,10 @@ TYPED_TEST(GroupbyMergeM2TypedTest, SimpleInputHavingNegativeValues) auto const vals2 = vals_col{3, -4, -5}; auto const vals3 = vals_col{-6, 7, -8, 9}; + // The expected results to validate. + auto const expected_keys = keys_col{1, 2, 3}; + auto const expected_M2s = M2s_col{42.0, 122.75, 114.0}; + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. // The partial results are also assembled into a structs column. auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); @@ -230,19 +247,28 @@ TYPED_TEST(GroupbyMergeM2TypedTest, SimpleInputHavingNegativeValues) // Merge the partial results to the final results. // Merging can be done in just one merge step, or in multiple steps. - auto const [out4_keys, out4_vals] = - merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); - auto const [final_keys, final_vals] = - merge_M2(vcol_views{*out3_keys, *out3_keys}, vcol_views{*out3_vals, *out4_vals}); - // Get the final M2 values. - auto const out_M2s = final_vals->child(2); + // Multiple steps merging: + { + auto const [out4_keys, out4_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } - auto const expected_keys = keys_col{1, 2, 3}; - auto const expected_M2s = M2s_col{42.0, 122.75, 114.0}; + // One step merging: + { + auto const [final_keys, final_vals] = merge_M2(vcol_views{*out1_keys, *out2_keys, *out3_keys}, + vcol_views{*out1_vals, *out2_vals, *out3_vals}); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } } TYPED_TEST(GroupbyMergeM2TypedTest, InputHasNulls) @@ -269,6 +295,10 @@ TYPED_TEST(GroupbyMergeM2TypedTest, InputHasNulls) auto const vals2 = vals_col{{4, null, 6, 7}, null_at(1)}; auto const vals3 = vals_col{{8, 9, null}, null_at(2)}; + // The expected results to validate. + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{{4.5, 32.0 + 2.0 / 3.0, 18.0, 0.0 /*NULL*/}, null_at(3)}; + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. // The partial results are also assembled into a structs column. auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); @@ -277,19 +307,28 @@ TYPED_TEST(GroupbyMergeM2TypedTest, InputHasNulls) // Merge the partial results to the final results. // Merging can be done in just one merge step, or in multiple steps. - auto const [out4_keys, out4_vals] = - merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); - auto const [final_keys, final_vals] = - merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); - // Get the final M2 values. - auto const out_M2s = final_vals->child(2); + // Multiple steps merging: + { + auto const [out4_keys, out4_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } - auto const expected_keys = keys_col{1, 2, 3, 4}; - auto const expected_M2s = M2s_col{{4.5, 32.0 + 2.0 / 3.0, 18.0, 0.0 /*NULL*/}, null_at(3)}; + // One step merging: + { + auto const [final_keys, final_vals] = merge_M2(vcol_views{*out1_keys, *out2_keys, *out3_keys}, + vcol_views{*out1_vals, *out2_vals, *out3_vals}); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } } TYPED_TEST(GroupbyMergeM2TypedTest, InputHaveNullsAndNaNs) @@ -318,6 +357,10 @@ TYPED_TEST(GroupbyMergeM2TypedTest, InputHaveNullsAndNaNs) auto const vals3 = vals_col{NaN, 6.0, 7.0}; auto const vals4 = vals_col{8.0, 9.0, 10.0, NaN}; + // The expected results to validate. + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{18.0, NaN, 18.0, NaN}; + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. // The partial results are also assembled into a structs column. auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); @@ -327,22 +370,31 @@ TYPED_TEST(GroupbyMergeM2TypedTest, InputHaveNullsAndNaNs) // Merge the partial results to the final results. // Merging can be done in just one merge step, or in multiple steps. - auto const [out5_keys, out5_vals] = - merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); - auto const [out6_keys, out6_vals] = - merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); - auto const [final_keys, final_vals] = - merge_M2(vcol_views{*out5_keys, *out6_keys}, vcol_views{*out5_vals, *out6_vals}); - - // Get the final M2 values. - auto const out_M2s = final_vals->child(2); + // Multiple steps merging: + { + auto const [out5_keys, out5_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [out6_keys, out6_vals] = + merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out5_keys, *out6_keys}, vcol_views{*out5_vals, *out6_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } - auto const expected_keys = keys_col{1, 2, 3, 4}; - auto const expected_M2s = M2s_col{18.0, NaN, 18.0, NaN}; + // One step merging: + { + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys, *out3_keys, *out4_keys}, + vcol_views{*out1_vals, *out2_vals, *out3_vals, *out4_vals}); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } } TYPED_TEST(GroupbyMergeM2TypedTest, SlicedColumnsInput) @@ -387,6 +439,10 @@ TYPED_TEST(GroupbyMergeM2TypedTest, SlicedColumnsInput) auto const vals3 = cudf::slice(vals_original, {17, 20})[0]; // {NaN, 6.0, 7.0} auto const vals4 = cudf::slice(vals_original, {20, 24})[0]; // {8.0, 9.0, 10.0, NaN} + // The expected results to validate. + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{18.0, NaN, 18.0, NaN}; + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. // The partial results are also assembled into a structs column. auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); @@ -396,20 +452,29 @@ TYPED_TEST(GroupbyMergeM2TypedTest, SlicedColumnsInput) // Merge the partial results to the final results. // Merging can be done in just one merge step, or in multiple steps. - auto const [out5_keys, out5_vals] = - merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); - auto const [out6_keys, out6_vals] = - merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); - - auto const [final_keys, final_vals] = - merge_M2(vcol_views{*out5_keys, *out6_keys}, vcol_views{*out5_vals, *out6_vals}); - // Get the final M2 values. - auto const out_M2s = final_vals->child(2); + // Multiple steps merging: + { + auto const [out5_keys, out5_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [out6_keys, out6_vals] = + merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out5_keys, *out6_keys}, vcol_views{*out5_vals, *out6_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } - auto const expected_keys = keys_col{1, 2, 3, 4}; - auto const expected_M2s = M2s_col{18.0, NaN, 18.0, NaN}; + // One step merging: + { + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys, *out3_keys, *out4_keys}, + vcol_views{*out1_vals, *out2_vals, *out3_vals, *out4_vals}); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } } From 26e17f6771e7699edaf35a120290991279c6880e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 6 Jul 2021 11:08:31 -0600 Subject: [PATCH 17/19] Change `ResultType` to `result_type` to enforce name consistency --- cpp/src/groupby/sort/group_m2.cu | 15 +++++++----- cpp/src/groupby/sort/group_merge_m2.cu | 32 +++++++++++++------------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/cpp/src/groupby/sort/group_m2.cu b/cpp/src/groupby/sort/group_m2.cu index 71260acc165..a72f6c6f647 100644 --- a/cpp/src/groupby/sort/group_m2.cu +++ b/cpp/src/groupby/sort/group_m2.cu @@ -44,7 +44,7 @@ struct m2_transform { __device__ ResultType operator()(size_type const idx) const noexcept { - if (d_values.is_null(idx)) return 0.0; + if (d_values.is_null(idx)) { return 0.0; } auto const x = static_cast(values_iter[idx]); auto const group_idx = d_group_labels[idx]; @@ -84,14 +84,17 @@ struct m2_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - using ResultType = cudf::detail::target_type_t; - auto result = make_numeric_column( - data_type(type_to_id()), group_means.size(), mask_state::UNALLOCATED, stream, mr); + using result_type = cudf::detail::target_type_t; + auto result = make_numeric_column(data_type(type_to_id()), + group_means.size(), + mask_state::UNALLOCATED, + stream, + mr); auto const values_dv_ptr = column_device_view::create(values, stream); auto const d_values = *values_dv_ptr; - auto const d_means = group_means.data(); - auto const d_result = result->mutable_view().data(); + auto const d_means = group_means.data(); + auto const d_result = result->mutable_view().data(); if (!cudf::is_dictionary(values.type())) { auto const values_iter = d_values.begin(); diff --git a/cpp/src/groupby/sort/group_merge_m2.cu b/cpp/src/groupby/sort/group_merge_m2.cu index 120fe41a314..1545f1830f2 100644 --- a/cpp/src/groupby/sort/group_merge_m2.cu +++ b/cpp/src/groupby/sort/group_merge_m2.cu @@ -131,42 +131,42 @@ std::unique_ptr group_merge_m2(column_view const& values, CUDF_EXPECTS(values.num_children() == 3, "Input to `group_merge_m2` must be a structs column having 3 children columns."); - using ResultType = id_to_type; + using result_type = id_to_type; static_assert( - std::is_same_v, ResultType>); + std::is_same_v, result_type>); CUDF_EXPECTS(values.child(0).type().id() == type_id::INT32 && - values.child(1).type().id() == type_to_id() && - values.child(2).type().id() == type_to_id(), + values.child(1).type().id() == type_to_id() && + values.child(2).type().id() == type_to_id(), "Input to `group_merge_m2` must be a structs column having children columns " "containing tuples of groupwise (M2_value, mean, valid_count)."); auto result_counts = make_numeric_column( data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); auto result_means = make_numeric_column( - data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); + data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); auto result_M2s = make_numeric_column( - data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); + data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); auto validities = rmm::device_uvector(num_groups, stream); // Perform merging for all the aggregations. Their output (and their validity data) are written // out concurrently through an output zip iterator. - using IteratorTuple = thrust::tuple; - using ZipIterator = thrust::zip_iterator; + using iterator_tuple = thrust::tuple; + using output_iterator = thrust::zip_iterator; auto const out_iter = - ZipIterator{thrust::make_tuple(result_counts->mutable_view().template data(), - result_means->mutable_view().template data(), - result_M2s->mutable_view().template data(), - validities.begin())}; + output_iterator{thrust::make_tuple(result_counts->mutable_view().template data(), + result_means->mutable_view().template data(), + result_M2s->mutable_view().template data(), + validities.begin())}; auto const count_valid = values.child(0); auto const mean_values = values.child(1); auto const M2_values = values.child(2); auto const iter = thrust::make_counting_iterator(0); - auto const fn = merge_fn{group_offsets.begin(), - count_valid.template begin(), - mean_values.template begin(), - M2_values.template begin()}; + auto const fn = merge_fn{group_offsets.begin(), + count_valid.template begin(), + mean_values.template begin(), + M2_values.template begin()}; thrust::transform(rmm::exec_policy(stream), iter, iter + num_groups, out_iter, fn); // Generate bitmask for the output. From 18728460d7c1dd88a06e402ac3b77756565d847c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 6 Jul 2021 21:05:41 -0600 Subject: [PATCH 18/19] Rewrite doxygen --- cpp/include/cudf/aggregation.hpp | 29 ++++++++++++----------- cpp/src/groupby/sort/group_merge_m2.cu | 2 +- cpp/src/groupby/sort/group_reductions.hpp | 14 +++++------ cpp/tests/groupby/merge_m2_tests.cpp | 7 +++--- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 9ab5739b0b6..a2f59de54db 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -67,9 +67,9 @@ class aggregation { ALL, ///< all reduction SUM_OF_SQUARES, ///< sum of squares reduction MEAN, ///< arithmetic mean reduction - M2, ///< groupwise sum of squares of differences from the group mean - VARIANCE, ///< groupwise variance - STD, ///< groupwise standard deviation + M2, ///< sum of squares of differences from the mean + VARIANCE, ///< variance + STD, ///< standard deviation MEDIAN, ///< median reduction QUANTILE, ///< compute specified quantile(s) ARGMAX, ///< Index of max element @@ -164,11 +164,13 @@ std::unique_ptr make_mean_aggregation(); /** * @brief Factory to create a M2 aggregation * - * A M2 aggregation is groupwise sum of squares of differences from the group mean. That is: - * `M2(group) = SUM((x - MEAN(group)) * (x - MEAN(group)), for all x in group)`. + * A M2 aggregation is sum of squares of differences from the mean. That is: + * `M2 = SUM((x - MEAN) * (x - MEAN))`. * * This aggregation produces the intermediate values that are used to compute variance and standard - * deviation in distributed computing. + * deviation across multiple discrete sets. See + * `https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm` for more + * detail. */ template std::unique_ptr make_m2_aggregation(); @@ -347,15 +349,14 @@ std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal = nu /** * @brief Factory to create a MERGE_M2 aggregation * - * This aggregation is designed specifically to accommodate distributed computing of `M2` - * aggregation. The partial results input to this aggregation is a structs column with children are - * columns generated by three groupby aggregations: `COUNT_VALID`, `MEAN`, and `M2`. The output of - * this aggregation is also a structs column containing the merged values of all those aggregations, - * which are all required for recursively merging of `M2` values. + * Merges the results of `M2` aggregations on independent sets into a new `M2` value equivalent to + * if a single `M2` aggregation was done across all of the sets at once. This aggregation is only + * valid on structs whose members are the result of the `COUNT_VALID`, `MEAN`, and `M2` aggregations + * on the same sets. The output of this aggregation is a struct containing the merged `COUNT_VALID`, + * `MEAN`, and `M2` aggregations. * - * The partial results of `M2` aggregation are expected to be all non-negative numbers, since they - * are expected to be output from `M2` aggregation. However, this will not be checked due to - * performance reason. + * The input `M2` aggregation values are expected to be all non-negative numbers, since they + * were output from `M2` aggregation. */ template std::unique_ptr make_merge_m2_aggregation(); diff --git a/cpp/src/groupby/sort/group_merge_m2.cu b/cpp/src/groupby/sort/group_merge_m2.cu index 1545f1830f2..15ada3e9d74 100644 --- a/cpp/src/groupby/sort/group_merge_m2.cu +++ b/cpp/src/groupby/sort/group_merge_m2.cu @@ -138,7 +138,7 @@ std::unique_ptr group_merge_m2(column_view const& values, values.child(1).type().id() == type_to_id() && values.child(2).type().id() == type_to_id(), "Input to `group_merge_m2` must be a structs column having children columns " - "containing tuples of groupwise (M2_value, mean, valid_count)."); + "containing tuples of (M2_value, mean, valid_count)."); auto result_counts = make_numeric_column( data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index 2b88317fcc3..2770162da2d 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -218,9 +218,9 @@ std::unique_ptr group_count_all(cudf::device_span group rmm::mr::device_memory_resource* mr); /** - * @brief Internal API to calculate groupwise sum of squares of differences from group means. + * @brief Internal API to calculate sum of squares of differences from means. * - * If there are only nulls in the group, the output value of that group will be `0`. + * If there are only nulls in the group, the output value of that group will be null. * * @code{.pseudo} * values = [2, 1, 4, -1, -2, , 4, ] @@ -420,17 +420,17 @@ std::unique_ptr group_merge_lists(column_view const& values, * @brief Internal API to merge grouped M2 values corresponding to the same key. * * The values of M2 are merged following the parallel algorithm described here: - * https://www.wikiwand.com/en/Algorithms_for_calculating_variance#/Parallel_algorithm + * `https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm` * - * Merging M2 values require accessing to partial M2 values and also groupwise means and group valid - * counts. Thus, the input to this aggregation need to be a structs column containing tuples of - * groupwise `(valid_count, mean, M2_value)`. + * Merging M2 values require accessing to partial M2 values, means, and valid counts. Thus, the + * input to this aggregation need to be a structs column containing tuples of 3 values + * `(valid_count, mean, M2)`. * * This aggregation not only merges the partial results of `M2` but also merged all the partial * results of input aggregations (`COUNT_VALID`, `MEAN`, and `M2`). As such, the output will be a * structs column containing children columns of merged `COUNT_VALID`, `MEAN`, and `M2` values. * - * @param values Grouped values (tuples of groupwise `(valid_count, mean, M2_value)`) to merge. + * @param values Grouped values (tuples of values `(valid_count, mean, M2)`) to merge. * @param group_offsets Offsets of groups' starting points within @p values. * @param num_groups Number of groups. * @param mr Device memory resource used to allocate the returned column's device memory diff --git a/cpp/tests/groupby/merge_m2_tests.cpp b/cpp/tests/groupby/merge_m2_tests.cpp index 2f5f5f3b351..63451f9612d 100644 --- a/cpp/tests/groupby/merge_m2_tests.cpp +++ b/cpp/tests/groupby/merge_m2_tests.cpp @@ -50,10 +50,9 @@ using structs_col = cudf::test::structs_column_wrapper; using vcol_views = std::vector; /** - * @brief Compute groupwise `COUNT_VALID`, `MEAN`, `M2` aggregations for the given values - * columns. + * @brief Compute `COUNT_VALID`, `MEAN`, `M2` aggregations for the given values columns. * @return A pair of unique keys column and a structs column containing the computed values of - * `COUNT_VALID`, `MEAN`, `M2`. + * (`COUNT_VALID`, `MEAN`, `M2`). */ auto compute_partial_results(cudf::column_view const& keys, cudf::column_view const& values) { @@ -78,7 +77,7 @@ auto compute_partial_results(cudf::column_view const& keys, cudf::column_view co * @brief Perform merging for partial results of M2 aggregations. * * @return A pair of unique keys column and a structs column containing the merged values of - * `COUNT_VALID`, `MEAN`, `M2`. + * (`COUNT_VALID`, `MEAN`, `M2`). */ auto merge_M2(vcol_views const& keys_cols, vcol_views const& values_cols) { From a1d00b1a02ffab11cb4c7cc6997eafb7fd13eee8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 6 Jul 2021 21:40:38 -0600 Subject: [PATCH 19/19] Fix formatting --- cpp/src/groupby/sort/group_merge_m2.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/groupby/sort/group_merge_m2.cu b/cpp/src/groupby/sort/group_merge_m2.cu index 15ada3e9d74..4e2a5b68abc 100644 --- a/cpp/src/groupby/sort/group_merge_m2.cu +++ b/cpp/src/groupby/sort/group_merge_m2.cu @@ -105,7 +105,9 @@ struct merge_fn { auto accumulator = accumulate_fn{get_partial_result(start_idx)}; // Then, accumulate (merge) the remaining partial results into that accumulator. - for (auto idx = start_idx + 1; idx < end_idx; ++idx) { accumulator(get_partial_result(idx)); } + for (auto idx = start_idx + 1; idx < end_idx; ++idx) { + accumulator(get_partial_result(idx)); + } // Get the final result after merging. auto const& merge_vals = accumulator.merge_vals;