From fdb6d34d0800e841de71215622cc59dae76671c1 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Tue, 12 Sep 2023 17:37:46 -0400 Subject: [PATCH 1/9] Fixes adjoint accumulation for reverse mode where aliasing can occur. Creates a assignment op tag that is used by adjoint_results to do a += instead of a = into the adjoint matrix --- stan/math/opencl/kernel_generator.hpp | 2 +- .../kernel_generator/as_operation_cl.hpp | 18 ++-- .../kernel_generator/assignment_ops.hpp | 71 +++++++++++++++ stan/math/opencl/kernel_generator/load.hpp | 18 ++-- .../kernel_generator/multi_result_kernel.hpp | 40 +++++++-- .../opencl/kernel_generator/operation_cl.hpp | 21 ++++- stan/math/opencl/prim/normal_lccdf.hpp | 9 +- stan/math/opencl/rev.hpp | 1 + stan/math/opencl/rev/adjoint_results.hpp | 18 ++-- stan/math/opencl/rev/grad.hpp | 31 +++++++ .../kernel_generator/assignment_ops_test.cpp | 27 ++++++ test/unit/math/opencl/rev/add_test.cpp | 86 +++++-------------- test/unit/math/opencl/rev/grad_test.cpp | 37 ++++++++ 13 files changed, 275 insertions(+), 104 deletions(-) create mode 100644 stan/math/opencl/kernel_generator/assignment_ops.hpp create mode 100644 stan/math/opencl/rev/grad.hpp create mode 100644 test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp create mode 100644 test/unit/math/opencl/rev/grad_test.cpp diff --git a/stan/math/opencl/kernel_generator.hpp b/stan/math/opencl/kernel_generator.hpp index a3cb839d776..fd78ce3f9a8 100644 --- a/stan/math/opencl/kernel_generator.hpp +++ b/stan/math/opencl/kernel_generator.hpp @@ -108,7 +108,7 @@ #include #include #include - +#include #include #include #include diff --git a/stan/math/opencl/kernel_generator/as_operation_cl.hpp b/stan/math/opencl/kernel_generator/as_operation_cl.hpp index c6e5e819174..a7cf2ac7264 100644 --- a/stan/math/opencl/kernel_generator/as_operation_cl.hpp +++ b/stan/math/opencl/kernel_generator/as_operation_cl.hpp @@ -2,6 +2,7 @@ #define STAN_MATH_OPENCL_KERNEL_GENERATOR_AS_OPERATION_CL_HPP #ifdef STAN_OPENCL +#include #include #include #include @@ -23,7 +24,7 @@ namespace math { * @param a an operation * @return operation */ -template >::value>> inline T_operation&& as_operation_cl(T_operation&& a) { @@ -37,7 +38,7 @@ inline T_operation&& as_operation_cl(T_operation&& a) { * @param a scalar * @return \c scalar_ wrapping the input */ -template , +template , require_not_same_t* = nullptr> inline scalar_ as_operation_cl(const T_scalar a) { return scalar_(a); @@ -50,6 +51,7 @@ inline scalar_ as_operation_cl(const T_scalar a) { * @param a scalar * @return \c scalar_ wrapping the input */ +template inline scalar_ as_operation_cl(const bool a) { return scalar_(a); } /** @@ -59,11 +61,11 @@ inline scalar_ as_operation_cl(const bool a) { return scalar_(a); } * @param a \c matrix_cl * @return \c load_ wrapping the input */ -template , is_arena_matrix_cl>> -inline load_ as_operation_cl(T_matrix_cl&& a) { - return load_(std::forward(a)); +inline load_ as_operation_cl(T_matrix_cl&& a) { + return load_(std::forward(a)); } /** @@ -74,11 +76,11 @@ inline load_ as_operation_cl(T_matrix_cl&& a) { * rvalue reference, the reference is removed, so that a variable of this type * actually stores the value. */ -template +template using as_operation_cl_t = std::conditional_t< std::is_lvalue_reference::value, - decltype(as_operation_cl(std::declval())), - std::remove_reference_t()))>>; + decltype(as_operation_cl(std::declval())), + std::remove_reference_t(std::declval()))>>; /** @}*/ } // namespace math diff --git a/stan/math/opencl/kernel_generator/assignment_ops.hpp b/stan/math/opencl/kernel_generator/assignment_ops.hpp new file mode 100644 index 00000000000..5e3919ced95 --- /dev/null +++ b/stan/math/opencl/kernel_generator/assignment_ops.hpp @@ -0,0 +1,71 @@ +#ifndef STAN_MATH_OPENCL_KERNEL_GENERATOR_ASSIGNMENT_OPS +#define STAN_MATH_OPENCL_KERNEL_GENERATOR_ASSIGNMENT_OPS +#ifdef STAN_OPENCL +#include + +namespace stan { +namespace math { + +/** + * Ops that decide the type of assignment for LHS operations + */ +enum class assignment_ops_cl {equals, plus_equals, minus_equals, divide_equals}; + +/** + * @param value A static constexpr const char* member for printing assignment ops + */ +template +struct assignment_op_str; + +template <> +struct assignment_op_str { + static constexpr const char* value = " = "; +}; + +template <> +struct assignment_op_str { + static constexpr const char* value = " += "; +}; + +template <> +struct assignment_op_str { + static constexpr const char* value = " *= "; +}; + +template <> +struct assignment_op_str { + static constexpr const char* value = " /= "; +}; + + +namespace internal { +template +struct has_assignment_op_str : std::false_type {}; + +template +struct has_assignment_op_str> : std::true_type {}; + +} // namespace internal + +/** + * @tparam T A type that does not have an `assignment_op` static constexpr member type + * @return A constexpr const char* equal to `" = "` + */ +template >::value>* = nullptr> +inline constexpr const char* assignment_op() noexcept { + return " = "; +} + +/** + * @tparam T A type that has an `assignment_op` static constexpr member type + * @return The types assignment op as a constexpr const char* + */ +template ::value>* = nullptr> +inline constexpr const char* assignment_op() noexcept { + return assignment_op_str::assignment_op>::value; +} + +} +} +#endif +#endif diff --git a/stan/math/opencl/kernel_generator/load.hpp b/stan/math/opencl/kernel_generator/load.hpp index 319557959b6..bd3eaafb1c3 100644 --- a/stan/math/opencl/kernel_generator/load.hpp +++ b/stan/math/opencl/kernel_generator/load.hpp @@ -4,6 +4,8 @@ #include #include +#include + #include #include #include @@ -23,17 +25,20 @@ namespace math { /** * Represents an access to a \c matrix_cl in kernel generator expressions * @tparam T \c matrix_cl + * @tparam AssignOp tells higher level operations whether the final operation should be an assignment or a type of compound assignment. */ -template +template class load_ - : public operation_cl_lhs, + : public operation_cl_lhs, typename std::remove_reference_t::type> { protected: T a_; public: + + static constexpr assignment_ops_cl assignment_op = AssignOp; using Scalar = typename std::remove_reference_t::type; - using base = operation_cl, Scalar>; + using base = operation_cl, Scalar>; using base::var_name_; static_assert(disjunction, is_arena_matrix_cl>::value, "load_: argument a must be a matrix_cl!"); @@ -51,9 +56,9 @@ class load_ * Creates a deep copy of this expression. * @return copy of \c *this */ - inline load_ deep_copy() & { return load_(a_); } - inline load_ deep_copy() const& { return load_(a_); } - inline load_ deep_copy() && { return load_(std::forward(a_)); } + inline load_ deep_copy() & { return load_(a_); } + inline load_ deep_copy() const& { return load_(a_); } + inline load_ deep_copy() && { return load_(std::forward(a_)); } /** * Generates kernel code for this expression. @@ -327,6 +332,7 @@ class load_ } } }; + /** @}*/ } // namespace math } // namespace stan diff --git a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp index a82d033c5f3..3b9362ced06 100644 --- a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp +++ b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -334,13 +335,34 @@ class results_cl { == sizeof...(T_expressions)>> void operator+=(const expressions_cl& exprs) { index_apply([this, &exprs](auto... Is) { - auto tmp = std::tuple_cat(make_assignment_pair( + auto tmp = std::tuple_cat(make_assignment_pair( std::get(results_), std::get(exprs.expressions_))...); index_apply::value>( [this, &tmp](auto... Is2) { assignment_impl(std::make_tuple(std::make_pair( - std::get(tmp).first, - std::get(tmp).first + std::get(tmp).second)...)); + std::get(tmp).first, std::get(tmp).second)...)); + }); + }); + } + + /** + * Incrementing \c results_ object by \c expressions_cl object + * executes the kernel that evaluates expressions and increments results by + * those expressions. + * @tparam T_expressions types of expressions + * @param exprs expressions + */ + template > + void operator-=(const expressions_cl& exprs) { + index_apply([this, &exprs](auto... Is) { + auto tmp = std::tuple_cat(make_assignment_pair( + std::get(results_), std::get(exprs.expressions_))...); + index_apply::value>( + [this, &tmp](auto... Is2) { + assignment_impl(std::make_tuple(std::make_pair( + std::get(tmp).first, std::get(tmp).second)...)); }); }); } @@ -426,7 +448,7 @@ class results_cl { + parts.reduction_2d + "}\n"; } - return src; + return src; } /** @@ -529,7 +551,7 @@ class results_cl { * @param expression expression * @return a tuple of pair of result and expression */ - template , conjunction, std::is_arithmetic, as_operation_cl_t>( - as_operation_cl(std::forward(result)), + std::pair, as_operation_cl_t>( + as_operation_cl(std::forward(result)), as_operation_cl(std::forward(expression)))); } @@ -548,7 +570,7 @@ class results_cl { * @param expression expression * @return a tuple of pair of result and expression */ - template >* = nullptr> static auto make_assignment_pair(T_result&& result, T_expression&& expression) { @@ -562,7 +584,7 @@ class results_cl { * @param pass bool scalar * @return an empty tuple */ - template >* = nullptr, require_integral_t* = nullptr> static std::tuple<> make_assignment_pair(T_check&& result, T_pass&& pass) { diff --git a/stan/math/opencl/kernel_generator/operation_cl.hpp b/stan/math/opencl/kernel_generator/operation_cl.hpp index ef2d4977c97..84f026dbdf6 100644 --- a/stan/math/opencl/kernel_generator/operation_cl.hpp +++ b/stan/math/opencl/kernel_generator/operation_cl.hpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -74,6 +75,24 @@ struct kernel_parts { } }; +std::ostream& operator<<(std::ostream& os, kernel_parts& parts) { + os << "args:" << std::endl; + os << parts.args.substr(0, parts.args.size() - 2) << std::endl; + os << "Decl:" << std::endl; + os << parts.declarations << std::endl; + os << "Init:" << std::endl; + os << parts.initialization << std::endl; + os << "body:" << std::endl; + os << parts.body << std::endl; + os << "body_suffix:" << std::endl; + os << parts.body_suffix << std::endl; + os << "reduction_1d:" << std::endl; + os << parts.reduction_1d << std::endl; + os << "reduction_2d:" << std::endl; + os << parts.reduction_2d << std::endl; + return os; +} + /** * Base for all kernel generator operations. * @tparam Derived derived type @@ -201,7 +220,7 @@ class operation_cl : public operation_cl_base { generated, generated_all, ng, row_index_name, col_index_name, false); kernel_parts out_parts = result.get_kernel_parts_lhs( generated, generated_all, ng, row_index_name, col_index_name); - out_parts.body += " = " + derived().var_name_ + ";\n"; + out_parts.body += assignment_op() + derived().var_name_ + ";\n"; parts += out_parts; return parts; } diff --git a/stan/math/opencl/prim/normal_lccdf.hpp b/stan/math/opencl/prim/normal_lccdf.hpp index e9e7f97c079..24a49a70f6f 100644 --- a/stan/math/opencl/prim/normal_lccdf.hpp +++ b/stan/math/opencl/prim/normal_lccdf.hpp @@ -82,13 +82,12 @@ return_type_t normal_lccdf( matrix_cl mu_deriv_cl; matrix_cl sigma_deriv_cl; - results(check_y_not_nan, check_mu_finite, check_sigma_positive, lccdf_cl, - y_deriv_cl, mu_deriv_cl, sigma_deriv_cl) - = expressions(y_not_nan_expr, mu_finite_expr, sigma_positive_expr, - lccdf_expr, calc_if::value>(y_deriv), + results(check_y_not_nan, check_mu_finite, check_sigma_positive) + = expressions(y_not_nan_expr, mu_finite_expr, sigma_positive_expr); + results(lccdf_cl, y_deriv_cl, mu_deriv_cl, sigma_deriv_cl) + = expressions(lccdf_expr, calc_if::value>(y_deriv), calc_if::value>(mu_deriv), calc_if::value>(sigma_deriv)); - T_partials_return lccdf = LOG_HALF + sum(from_matrix_cl(lccdf_cl)); auto ops_partials = make_partials_propagator(y_col, mu_col, sigma_col); diff --git a/stan/math/opencl/rev.hpp b/stan/math/opencl/rev.hpp index 7303a5d34e3..f48de0b806f 100644 --- a/stan/math/opencl/rev.hpp +++ b/stan/math/opencl/rev.hpp @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include diff --git a/stan/math/opencl/rev/adjoint_results.hpp b/stan/math/opencl/rev/adjoint_results.hpp index c42297b418d..1a3cd238ec1 100644 --- a/stan/math/opencl/rev/adjoint_results.hpp +++ b/stan/math/opencl/rev/adjoint_results.hpp @@ -41,22 +41,21 @@ class adjoint_results_cl : protected results_cl { index_apply([&](auto... Is) { auto scalars = std::tuple_cat(select_scalar_assignments( std::get(this->results_), std::get(exprs.expressions_))...); - auto nonscalars_tmp = std::tuple_cat(select_nonscalar_assignments( + auto nonscalars_tmp = std::tuple_cat(select_nonscalar_plusequals( std::get(this->results_), std::get(exprs.expressions_))...); index_apply::value>( [&](auto... Is_nonscal) { auto nonscalars = std::make_tuple(std::make_pair( - std::get(nonscalars_tmp).first, - std::get(nonscalars_tmp).first - + std::get(nonscalars_tmp).second)...); - + std::get(nonscalars_tmp).first, std::get(nonscalars_tmp).second)...); + + index_apply::value>( [&](auto... Is_scal) { // evaluate all expressions this->assignment_impl(std::tuple_cat( nonscalars, - this->make_assignment_pair( + this->template make_assignment_pair( std::get<2>(std::get(scalars)), sum_2d(std::get<1>(std::get(scalars))))...)); @@ -100,6 +99,7 @@ class adjoint_results_cl : protected results_cl { return std::make_tuple(); } + /** * Selects assignments that have non-scalar var results. * @tparam T_result type of result. This overload is used for non-scalar vars. @@ -112,9 +112,9 @@ class adjoint_results_cl : protected results_cl { template * = nullptr, require_st_var* = nullptr> - auto select_nonscalar_assignments(const T_result& result, + auto select_nonscalar_plusequals(T_result&& result, T_expression&& expression) { - return results_cl::make_assignment_pair( + return results_cl::template make_assignment_pair( result.adj(), std::forward(expression)); } /** @@ -130,7 +130,7 @@ class adjoint_results_cl : protected results_cl { typename T_result, typename T_expression, std::enable_if_t::value || !is_var>::value>* = nullptr> - auto select_nonscalar_assignments(T_result&& result, + auto select_nonscalar_plusequals(T_result&& result, T_expression&& expression) { return std::make_tuple(); } diff --git a/stan/math/opencl/rev/grad.hpp b/stan/math/opencl/rev/grad.hpp new file mode 100644 index 00000000000..46b0e7cb0d7 --- /dev/null +++ b/stan/math/opencl/rev/grad.hpp @@ -0,0 +1,31 @@ +#ifndef STAN_MATH_OPENCL_REV_FUN_GRAD_HPP +#define STAN_MATH_OPENCL_REV_FUN_GRAD_HPP +#ifdef STAN_OPENCL +#include + +namespace stan { +namespace math { + +/** + * Propagate chain rule to calculate gradients starting from + * the specified variable. Resizes the input vector to be the + * correct size. + * + * The grad() function does not itself recover any memory. use + * recover_memory() or + * recover_memory_nested() to recover memory. + * + * @param[in] v Value of function being differentiated + * @param[in] x Variables being differentiated with respect to + * @param[out] g Gradient, d/dx v, evaluated at x. + */ +inline void grad(var& v, var_value>& x, + Eigen::VectorXd& g) { + grad(v.vi_); + g = from_matrix_cl(x.adj()); +} + +} // namespace math +} // namespace stan +#endif +#endif diff --git a/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp b/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp new file mode 100644 index 00000000000..d9db2f2a7d2 --- /dev/null +++ b/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp @@ -0,0 +1,27 @@ +#ifdef STAN_OPENCL +#include +#include +#include +#include +#include +#include +#include +#include + +TEST(KernelGenerator, assign_ops) { + using stan::math::matrix_cl; + using stan::math::var_value; + using stan::math::var; + using stan::math::to_matrix_cl; + Eigen::MatrixXd A = Eigen::MatrixXd::Random(10, 10); + Eigen::MatrixXd B = Eigen::MatrixXd::Random(10, 10); + Eigen::MatrixXd C = Eigen::MatrixXd::Random(10, 10); + C += A + B; + matrix_cl A_cl = to_matrix_cl(A); + matrix_cl B_cl = to_matrix_cl(B); + matrix_cl C_cl = to_matrix_cl(C); + results(C_cl) += expressions(A_cl + B_cl); +} + + +#endif diff --git a/test/unit/math/opencl/rev/add_test.cpp b/test/unit/math/opencl/rev/add_test.cpp index 15c4d501b2f..e07c438f828 100644 --- a/test/unit/math/opencl/rev/add_test.cpp +++ b/test/unit/math/opencl/rev/add_test.cpp @@ -5,75 +5,31 @@ #include #include -auto add_functor - = [](const auto& a, const auto& b) { return stan::math::add(a, b).eval(); }; -TEST(OpenCLPrim, add_v_small_zero) { - stan::math::vector_d d1(3), d2(3); - d1 << 1, 2, 3; - d2 << 3, 2, 1; - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d1, d2); - stan::math::vector_d d0(0); - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d0, d0); - - double d3 = 3.0; - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d1, d3); - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d3, d1); - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d3, d0); - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d0, d3); -} - -TEST(OpenCLPrim, add_rv_small_zero) { - stan::math::row_vector_d d1(3), d2(3); - d1 << 1, 2, 3; - d2 << 3, 2, 1; - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d1, d2); - - stan::math::vector_d d0(0); - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d0, d0); - - double d3 = 3.0; - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d1, d3); - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d3, d1); - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d3, d0); - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d0, d3); -} - -TEST(OpenCLPrim, add_m_small_zero) { - stan::math::matrix_d d1(3, 3), d2(3, 3); +TEST(OpenCLPrim, add_aliasing) { + stan::math::matrix_d d1(3, 3); d1 << 1, 2, 3, 4, 5, 6, 7, 8, 9; - d2 << 10, 100, 1000, 0, -10, -12, 2, 4, 8; - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d1, d2); - - stan::math::matrix_d d0(0, 0); - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d0, d0); - - double d3 = 3.0; - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d1, d3); - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d3, d1); - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d3, d0); - stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d0, d3); -} - -TEST(OpenCLPrim, add_rev_exceptions) { using stan::math::matrix_cl; - stan::math::vector_d vd1(2), vd2(3); - stan::math::var_value> vd11 = stan::math::to_matrix_cl(vd1); - stan::math::var_value> vd22 = stan::math::to_matrix_cl(vd2); - EXPECT_THROW(stan::math::add(vd11, vd22), std::invalid_argument); - - stan::math::row_vector_d rvd1(2), rvd2(3); - stan::math::var_value> rvd11 - = stan::math::to_matrix_cl(rvd1); - stan::math::var_value> rvd22 - = stan::math::to_matrix_cl(rvd2); - EXPECT_THROW(stan::math::add(rvd11, rvd22), std::invalid_argument); - - stan::math::matrix_d md1(2, 2), md2(3, 3); - stan::math::var_value> md11 = stan::math::to_matrix_cl(md1); - stan::math::var_value> md22 = stan::math::to_matrix_cl(md2); - EXPECT_THROW(stan::math::add(md11, md22), std::invalid_argument); + using stan::math::var_value; + using stan::math::var; + using varmat_cl = var_value>; + varmat_cl d11 = stan::math::to_matrix_cl(d1); + // Add the same matrix as the left and right hand side + var res = stan::math::sum(stan::math::add(d11, d11)); + res.grad(); + // Get back adjoints + Eigen::MatrixXd grad_res = stan::math::from_matrix_cl(d11.adj()); + stan::math::recover_memory(); + Eigen::Matrix d_host = d1; + // Same op as above but on the host + var res_host = stan::math::sum(stan::math::add(d_host, d_host)); + res_host.grad(); + Eigen::MatrixXd grad_res_host = d_host.adj(); + std::cout << "OpenCL Adjoints: " << std::endl; + std::cout << grad_res << std::endl; + std::cout << "CPU Adjoints: " << std::endl; + std::cout << grad_res_host << std::endl; } #endif diff --git a/test/unit/math/opencl/rev/grad_test.cpp b/test/unit/math/opencl/rev/grad_test.cpp new file mode 100644 index 00000000000..e24a70d26b9 --- /dev/null +++ b/test/unit/math/opencl/rev/grad_test.cpp @@ -0,0 +1,37 @@ +#ifdef STAN_OPENCL +#include +#include +#include +#include +#include + +TEST(OpenCLGradTest, exceptions) { + using stan::math::var_value; + using stan::math::var; + using stan::math::matrix_cl; + using stan::math::to_matrix_cl; + using varmat_cl = var_value>; + Eigen::VectorXd a(6); + a << 1, 2, 3 , 4, 5, 6; + varmat_cl a_cl(to_matrix_cl(a)); + varmat_cl b_cl(a_cl.block(0, 0, 3, 1)); + varmat_cl c_cl(a_cl.block(3, 0, 3, 1)); + Eigen::VectorXd ret_grads_cl(6); + using stan::math::add; + using stan::math::subtract; + var ret = stan::math::sum(add(b_cl, b_cl)); + stan::math::grad(ret, a_cl, ret_grads_cl); + std::cout << "opencl grads: \n" << ret_grads_cl << std::endl; + stan::math::recover_memory(); + Eigen::Matrix a_host = a; + Eigen::Matrix b_host = a_host.segment(0, 3); + Eigen::Matrix c_host = a_host.segment(3, 3); + var ret_host = stan::math::sum(add(b_host, b_host)); + Eigen::VectorXd ret_grads_host(6); + stan::math::grad(ret_host, a_host, ret_grads_host); + std::cout << "host grads: \n" << ret_grads_host << std::endl; + stan::math::recover_memory(); +} + + +#endif From bfcd2223b6e2a1993b2f10ce5e2a54e3578845bb Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Wed, 13 Sep 2023 12:27:22 -0400 Subject: [PATCH 2/9] adds tests for the compound assignments --- .../kernel_generator/as_operation_cl.hpp | 10 +-- .../kernel_generator/assignment_ops.hpp | 38 +++++------ stan/math/opencl/kernel_generator/load.hpp | 4 +- .../kernel_generator/multi_result_kernel.hpp | 66 ++++++++++++++----- stan/math/opencl/rev/adjoint_results.hpp | 14 ++-- .../kernel_generator/assignment_ops_test.cpp | 63 +++++++++++++++++- 6 files changed, 142 insertions(+), 53 deletions(-) diff --git a/stan/math/opencl/kernel_generator/as_operation_cl.hpp b/stan/math/opencl/kernel_generator/as_operation_cl.hpp index a7cf2ac7264..8ea743ccdd7 100644 --- a/stan/math/opencl/kernel_generator/as_operation_cl.hpp +++ b/stan/math/opencl/kernel_generator/as_operation_cl.hpp @@ -24,7 +24,7 @@ namespace math { * @param a an operation * @return operation */ -template >::value>> inline T_operation&& as_operation_cl(T_operation&& a) { @@ -38,7 +38,7 @@ inline T_operation&& as_operation_cl(T_operation&& a) { * @param a scalar * @return \c scalar_ wrapping the input */ -template , +template , require_not_same_t* = nullptr> inline scalar_ as_operation_cl(const T_scalar a) { return scalar_(a); @@ -51,7 +51,7 @@ inline scalar_ as_operation_cl(const T_scalar a) { * @param a scalar * @return \c scalar_ wrapping the input */ -template +template inline scalar_ as_operation_cl(const bool a) { return scalar_(a); } /** @@ -61,7 +61,7 @@ inline scalar_ as_operation_cl(const bool a) { return scalar_(a); } * @param a \c matrix_cl * @return \c load_ wrapping the input */ -template , is_arena_matrix_cl>> inline load_ as_operation_cl(T_matrix_cl&& a) { @@ -76,7 +76,7 @@ inline load_ as_operation_cl(T_matrix_cl&& a) { * rvalue reference, the reference is removed, so that a variable of this type * actually stores the value. */ -template +template using as_operation_cl_t = std::conditional_t< std::is_lvalue_reference::value, decltype(as_operation_cl(std::declval())), diff --git a/stan/math/opencl/kernel_generator/assignment_ops.hpp b/stan/math/opencl/kernel_generator/assignment_ops.hpp index 5e3919ced95..cca9b8e7a6f 100644 --- a/stan/math/opencl/kernel_generator/assignment_ops.hpp +++ b/stan/math/opencl/kernel_generator/assignment_ops.hpp @@ -9,60 +9,56 @@ namespace math { /** * Ops that decide the type of assignment for LHS operations */ -enum class assignment_ops_cl {equals, plus_equals, minus_equals, divide_equals}; +enum class assign_op_cl {equals, plus_equals, minus_equals, divide_equals, multiply_equals}; +namespace internal { /** * @param value A static constexpr const char* member for printing assignment ops */ -template -struct assignment_op_str; +template +struct assignment_op_str_impl; template <> -struct assignment_op_str { +struct assignment_op_str_impl { static constexpr const char* value = " = "; }; template <> -struct assignment_op_str { +struct assignment_op_str_impl { static constexpr const char* value = " += "; }; template <> -struct assignment_op_str { - static constexpr const char* value = " *= "; +struct assignment_op_str_impl { + static constexpr const char* value = " -= "; }; template <> -struct assignment_op_str { +struct assignment_op_str_impl { static constexpr const char* value = " /= "; }; +template <> +struct assignment_op_str_impl { + static constexpr const char* value = " *= "; +}; -namespace internal { template -struct has_assignment_op_str : std::false_type {}; +struct assignment_op_str : assignment_op_str_impl {}; template -struct has_assignment_op_str> : std::true_type {}; +struct assignment_op_str> : assignment_op_str_impl {}; } // namespace internal -/** - * @tparam T A type that does not have an `assignment_op` static constexpr member type - * @return A constexpr const char* equal to `" = "` - */ -template >::value>* = nullptr> -inline constexpr const char* assignment_op() noexcept { - return " = "; -} /** * @tparam T A type that has an `assignment_op` static constexpr member type * @return The types assignment op as a constexpr const char* */ -template ::value>* = nullptr> +template inline constexpr const char* assignment_op() noexcept { - return assignment_op_str::assignment_op>::value; + return internal::assignment_op_str>::value; } } diff --git a/stan/math/opencl/kernel_generator/load.hpp b/stan/math/opencl/kernel_generator/load.hpp index bd3eaafb1c3..018ecce6e50 100644 --- a/stan/math/opencl/kernel_generator/load.hpp +++ b/stan/math/opencl/kernel_generator/load.hpp @@ -27,7 +27,7 @@ namespace math { * @tparam T \c matrix_cl * @tparam AssignOp tells higher level operations whether the final operation should be an assignment or a type of compound assignment. */ -template +template class load_ : public operation_cl_lhs, typename std::remove_reference_t::type> { @@ -36,7 +36,7 @@ class load_ public: - static constexpr assignment_ops_cl assignment_op = AssignOp; + static constexpr assign_op_cl assignment_op = AssignOp; using Scalar = typename std::remove_reference_t::type; using base = operation_cl, Scalar>; using base::var_name_; diff --git a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp index 3b9362ced06..23e8771f73a 100644 --- a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp +++ b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp @@ -323,19 +323,19 @@ class results_cl { }); } - /** + /** * Incrementing \c results_ object by \c expressions_cl object * executes the kernel that evaluates expressions and increments results by * those expressions. * @tparam T_expressions types of expressions * @param exprs expressions */ - template > - void operator+=(const expressions_cl& exprs) { + void compound_assignment_impl(const expressions_cl& exprs) { index_apply([this, &exprs](auto... Is) { - auto tmp = std::tuple_cat(make_assignment_pair( + auto tmp = std::tuple_cat(make_assignment_pair( std::get(results_), std::get(exprs.expressions_))...); index_apply::value>( [this, &tmp](auto... Is2) { @@ -345,6 +345,20 @@ class results_cl { }); } + /** + * Incrementing \c results_ object by \c expressions_cl object + * executes the kernel that evaluates expressions and increments results by + * those expressions. + * @tparam T_expressions types of expressions + * @param exprs expressions + */ + template > + void operator+=(const expressions_cl& exprs) { + compound_assignment_impl(exprs); + } + /** * Incrementing \c results_ object by \c expressions_cl object * executes the kernel that evaluates expressions and increments results by @@ -356,15 +370,35 @@ class results_cl { typename = std::enable_if_t> void operator-=(const expressions_cl& exprs) { - index_apply([this, &exprs](auto... Is) { - auto tmp = std::tuple_cat(make_assignment_pair( - std::get(results_), std::get(exprs.expressions_))...); - index_apply::value>( - [this, &tmp](auto... Is2) { - assignment_impl(std::make_tuple(std::make_pair( - std::get(tmp).first, std::get(tmp).second)...)); - }); - }); + compound_assignment_impl(exprs); + } + + /** + * Incrementing \c results_ object by \c expressions_cl object + * executes the kernel that evaluates expressions and increments results by + * those expressions. + * @tparam T_expressions types of expressions + * @param exprs expressions + */ + template > + void operator/=(const expressions_cl& exprs) { + compound_assignment_impl(exprs); + } + + /** + * Incrementing \c results_ object by \c expressions_cl object + * executes the kernel that evaluates expressions and increments results by + * those expressions. + * @tparam T_expressions types of expressions + * @param exprs expressions + */ + template > + void operator*=(const expressions_cl& exprs) { + compound_assignment_impl(exprs); } /** @@ -551,7 +585,7 @@ class results_cl { * @param expression expression * @return a tuple of pair of result and expression */ - template , conjunction, std::is_arithmetic>* = nullptr> static auto make_assignment_pair(T_result&& result, T_expression&& expression) { @@ -584,7 +618,7 @@ class results_cl { * @param pass bool scalar * @return an empty tuple */ - template >* = nullptr, require_integral_t* = nullptr> static std::tuple<> make_assignment_pair(T_check&& result, T_pass&& pass) { diff --git a/stan/math/opencl/rev/adjoint_results.hpp b/stan/math/opencl/rev/adjoint_results.hpp index 1a3cd238ec1..b9a9b1a1c9b 100644 --- a/stan/math/opencl/rev/adjoint_results.hpp +++ b/stan/math/opencl/rev/adjoint_results.hpp @@ -41,7 +41,7 @@ class adjoint_results_cl : protected results_cl { index_apply([&](auto... Is) { auto scalars = std::tuple_cat(select_scalar_assignments( std::get(this->results_), std::get(exprs.expressions_))...); - auto nonscalars_tmp = std::tuple_cat(select_nonscalar_plusequals( + auto nonscalars_tmp = std::tuple_cat(select_nonscalar_assignments( std::get(this->results_), std::get(exprs.expressions_))...); index_apply::value>( @@ -55,7 +55,7 @@ class adjoint_results_cl : protected results_cl { // evaluate all expressions this->assignment_impl(std::tuple_cat( nonscalars, - this->template make_assignment_pair( + this->template make_assignment_pair( std::get<2>(std::get(scalars)), sum_2d(std::get<1>(std::get(scalars))))...)); @@ -109,12 +109,12 @@ class adjoint_results_cl : protected results_cl { * @return pair of result and expression or empty tuple (if the result is * check or the expression is `calc_if`. */ - template * = nullptr, require_st_var* = nullptr> - auto select_nonscalar_plusequals(T_result&& result, + auto select_nonscalar_assignments(T_result&& result, T_expression&& expression) { - return results_cl::template make_assignment_pair( + return results_cl::template make_assignment_pair( result.adj(), std::forward(expression)); } /** @@ -126,11 +126,11 @@ class adjoint_results_cl : protected results_cl { * @param expression expression * @return empty tuple */ - template < + template ::value || !is_var>::value>* = nullptr> - auto select_nonscalar_plusequals(T_result&& result, + auto select_nonscalar_assignments(T_result&& result, T_expression&& expression) { return std::make_tuple(); } diff --git a/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp b/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp index d9db2f2a7d2..361e72a8630 100644 --- a/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp +++ b/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp @@ -8,20 +8,79 @@ #include #include -TEST(KernelGenerator, assign_ops) { +TEST(KernelGenerator, plus_equals) { using stan::math::matrix_cl; using stan::math::var_value; using stan::math::var; using stan::math::to_matrix_cl; + using stan::math::from_matrix_cl; Eigen::MatrixXd A = Eigen::MatrixXd::Random(10, 10); Eigen::MatrixXd B = Eigen::MatrixXd::Random(10, 10); Eigen::MatrixXd C = Eigen::MatrixXd::Random(10, 10); - C += A + B; matrix_cl A_cl = to_matrix_cl(A); matrix_cl B_cl = to_matrix_cl(B); matrix_cl C_cl = to_matrix_cl(C); + C += A + B; results(C_cl) += expressions(A_cl + B_cl); + Eigen::MatrixXd C_cl_host = from_matrix_cl(C_cl); + EXPECT_MATRIX_EQ(C_cl_host, C) +} + +TEST(KernelGenerator, minus_equals) { + using stan::math::matrix_cl; + using stan::math::var_value; + using stan::math::var; + using stan::math::to_matrix_cl; + using stan::math::from_matrix_cl; + Eigen::MatrixXd A = Eigen::MatrixXd::Random(10, 10); + Eigen::MatrixXd B = Eigen::MatrixXd::Random(10, 10); + Eigen::MatrixXd C = Eigen::MatrixXd::Random(10, 10); + matrix_cl A_cl = to_matrix_cl(A); + matrix_cl B_cl = to_matrix_cl(B); + matrix_cl C_cl = to_matrix_cl(C); + C -= A + B; + results(C_cl) -= expressions(A_cl + B_cl); + Eigen::MatrixXd C_cl_host = from_matrix_cl(C_cl); + EXPECT_MATRIX_EQ(C_cl_host, C) +} + +TEST(KernelGenerator, divide_equals) { + using stan::math::matrix_cl; + using stan::math::var_value; + using stan::math::var; + using stan::math::to_matrix_cl; + using stan::math::from_matrix_cl; + Eigen::MatrixXd A = Eigen::MatrixXd::Random(10, 10); + Eigen::MatrixXd B = Eigen::MatrixXd::Random(10, 10); + Eigen::MatrixXd C = Eigen::MatrixXd::Random(10, 10); + matrix_cl A_cl = to_matrix_cl(A); + matrix_cl B_cl = to_matrix_cl(B); + matrix_cl C_cl = to_matrix_cl(C); + C.array() /= A.array() + B.array(); + results(C_cl) /= expressions(A_cl + B_cl); + Eigen::MatrixXd C_cl_host = from_matrix_cl(C_cl); + EXPECT_MATRIX_EQ(C_cl_host, C) +} + +TEST(KernelGenerator, times_equals) { + using stan::math::matrix_cl; + using stan::math::var_value; + using stan::math::var; + using stan::math::to_matrix_cl; + using stan::math::from_matrix_cl; + + Eigen::MatrixXd A = Eigen::MatrixXd::Random(10, 10); + Eigen::MatrixXd B = Eigen::MatrixXd::Random(10, 10); + Eigen::MatrixXd C = Eigen::MatrixXd::Random(10, 10); + matrix_cl A_cl = to_matrix_cl(A); + matrix_cl B_cl = to_matrix_cl(B); + matrix_cl C_cl = to_matrix_cl(C); + C.array() *= A.array() + B.array(); + results(C_cl) *= expressions(A_cl + B_cl); + Eigen::MatrixXd C_cl_host = from_matrix_cl(C_cl); + EXPECT_MATRIX_EQ(C_cl_host, C) } + #endif From 02554505b0e6e5fa665a67ce564e7b99baac7db2 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Wed, 13 Sep 2023 12:50:03 -0400 Subject: [PATCH 3/9] update docs --- .../kernel_generator/as_operation_cl.hpp | 8 ++++++++ .../kernel_generator/multi_result_kernel.hpp | 20 ++++++++++++++++--- stan/math/opencl/rev/adjoint_results.hpp | 4 ++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/stan/math/opencl/kernel_generator/as_operation_cl.hpp b/stan/math/opencl/kernel_generator/as_operation_cl.hpp index 8ea743ccdd7..c6e0d6732a9 100644 --- a/stan/math/opencl/kernel_generator/as_operation_cl.hpp +++ b/stan/math/opencl/kernel_generator/as_operation_cl.hpp @@ -20,6 +20,7 @@ namespace math { /** * Converts any valid kernel generator expression into an operation. This is an * overload for operations - a no-op + * @tparam AssignOp ignored * @tparam T_operation type of the input operation * @param a an operation * @return operation @@ -34,6 +35,7 @@ inline T_operation&& as_operation_cl(T_operation&& a) { /** * Converts any valid kernel generator expression into an operation. This is an * overload for scalars (arithmetic types). It wraps them into \c scalar_. + * @tparam AssignOp ignored * @tparam T_scalar type of the input scalar * @param a scalar * @return \c scalar_ wrapping the input @@ -48,6 +50,7 @@ inline scalar_ as_operation_cl(const T_scalar a) { * Converts any valid kernel generator expression into an operation. This is an * overload for bool scalars. It wraps them into \c scalar_ as \c bool can * not be used as a type of a kernel argument. + * @tparam AssignOp ignored * @param a scalar * @return \c scalar_ wrapping the input */ @@ -57,6 +60,8 @@ inline scalar_ as_operation_cl(const bool a) { return scalar_(a); } /** * Converts any valid kernel generator expression into an operation. This is an * overload for \c matrix_cl. It wraps them into into \c load_. + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object + * is assigned using standard or compound assign. * @tparam T_matrix_cl \c matrix_cl * @param a \c matrix_cl * @return \c load_ wrapping the input @@ -75,6 +80,9 @@ inline load_ as_operation_cl(T_matrix_cl&& a) { * as_operation_cl_t. If the return value of \c as_operation_cl() would be a * rvalue reference, the reference is removed, so that a variable of this type * actually stores the value. + * @tparam T a `matrix_cl` or `Scalar` type + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object + * is assigned using standard or compound assign. */ template using as_operation_cl_t = std::conditional_t< diff --git a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp index 23e8771f73a..8358605aa89 100644 --- a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp +++ b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp @@ -327,6 +327,8 @@ class results_cl { * Incrementing \c results_ object by \c expressions_cl object * executes the kernel that evaluates expressions and increments results by * those expressions. + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object + * is assigned using standard or compound assign. * @tparam T_expressions types of expressions * @param exprs expressions */ @@ -360,7 +362,7 @@ class results_cl { } /** - * Incrementing \c results_ object by \c expressions_cl object + * Decrement \c results_ object by \c expressions_cl object * executes the kernel that evaluates expressions and increments results by * those expressions. * @tparam T_expressions types of expressions @@ -374,7 +376,7 @@ class results_cl { } /** - * Incrementing \c results_ object by \c expressions_cl object + * Elementwise divide \c results_ object by \c expressions_cl object * executes the kernel that evaluates expressions and increments results by * those expressions. * @tparam T_expressions types of expressions @@ -388,7 +390,7 @@ class results_cl { } /** - * Incrementing \c results_ object by \c expressions_cl object + * Elementwise multiply \c results_ object by \c expressions_cl object * executes the kernel that evaluates expressions and increments results by * those expressions. * @tparam T_expressions types of expressions @@ -581,6 +583,10 @@ class results_cl { /** * Makes a std::pair of one result and one expression and wraps it into a * tuple. + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object + * is assigned using standard or compound assign. + * @tparam T_result An non scalar type that is normally an `result_cl` operation holding a `matrix_cl` + * @tparam T_expression An expression of set of operations on `matrix_cl` and scalar types. * @param result result * @param expression expression * @return a tuple of pair of result and expression @@ -600,6 +606,10 @@ class results_cl { /** * If an expression does not need to be calculated this returns an empty tuple + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object + * is assigned using standard or compound assign. + * @tparam T_result An non scalar type that is normally an `result_cl` operation holding a `matrix_cl` + * @tparam T_expression An expression of set of operations on `matrix_cl` and scalar types. * @param result result * @param expression expression * @return a tuple of pair of result and expression @@ -614,6 +624,10 @@ class results_cl { /** * Checks on scalars are done separately in this overload instead of in * kernel. + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object + * is assigned using standard or compound assign. + * @tparam T_check A scalar type + * @tparam T_pass An integral type * @param result result - check * @param pass bool scalar * @return an empty tuple diff --git a/stan/math/opencl/rev/adjoint_results.hpp b/stan/math/opencl/rev/adjoint_results.hpp index b9a9b1a1c9b..4e671fc6837 100644 --- a/stan/math/opencl/rev/adjoint_results.hpp +++ b/stan/math/opencl/rev/adjoint_results.hpp @@ -102,6 +102,8 @@ class adjoint_results_cl : protected results_cl { /** * Selects assignments that have non-scalar var results. + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object + * is assigned using standard or compound assign. * @tparam T_result type of result. This overload is used for non-scalar vars. * @tparam T_expression type of expression * @param result result @@ -119,6 +121,8 @@ class adjoint_results_cl : protected results_cl { } /** * Selects assignments that have non-scalar var results. + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object + * is assigned using standard or compound assign. * @tparam T_result type of result. This overload is used for results that are * either scalars or not vars. * @tparam T_expression type of expression From ab11b32212641bdc08250ec80afaa6ce678f5ce1 Mon Sep 17 00:00:00 2001 From: Stan Jenkins Date: Wed, 13 Sep 2023 14:16:51 -0400 Subject: [PATCH 4/9] [Jenkins] auto-formatting by clang-format version 10.0.0-4ubuntu1 --- .../kernel_generator/as_operation_cl.hpp | 26 +++++---- .../kernel_generator/assignment_ops.hpp | 19 ++++-- stan/math/opencl/kernel_generator/load.hpp | 12 ++-- .../kernel_generator/multi_result_kernel.hpp | 58 +++++++++++-------- .../opencl/kernel_generator/operation_cl.hpp | 30 +++++----- stan/math/opencl/rev/adjoint_results.hpp | 30 +++++----- stan/math/opencl/rev/grad.hpp | 3 +- .../kernel_generator/assignment_ops_test.cpp | 26 ++++----- test/unit/math/opencl/rev/add_test.cpp | 4 +- test/unit/math/opencl/rev/grad_test.cpp | 9 ++- 10 files changed, 118 insertions(+), 99 deletions(-) diff --git a/stan/math/opencl/kernel_generator/as_operation_cl.hpp b/stan/math/opencl/kernel_generator/as_operation_cl.hpp index c6e0d6732a9..b87e4fb0a9d 100644 --- a/stan/math/opencl/kernel_generator/as_operation_cl.hpp +++ b/stan/math/opencl/kernel_generator/as_operation_cl.hpp @@ -40,7 +40,8 @@ inline T_operation&& as_operation_cl(T_operation&& a) { * @param a scalar * @return \c scalar_ wrapping the input */ -template , +template , require_not_same_t* = nullptr> inline scalar_ as_operation_cl(const T_scalar a) { return scalar_(a); @@ -55,13 +56,15 @@ inline scalar_ as_operation_cl(const T_scalar a) { * @return \c scalar_ wrapping the input */ template -inline scalar_ as_operation_cl(const bool a) { return scalar_(a); } +inline scalar_ as_operation_cl(const bool a) { + return scalar_(a); +} /** * Converts any valid kernel generator expression into an operation. This is an * overload for \c matrix_cl. It wraps them into into \c load_. - * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object - * is assigned using standard or compound assign. + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object + * is assigned using standard or compound assign. * @tparam T_matrix_cl \c matrix_cl * @param a \c matrix_cl * @return \c load_ wrapping the input @@ -80,15 +83,16 @@ inline load_ as_operation_cl(T_matrix_cl&& a) { * as_operation_cl_t. If the return value of \c as_operation_cl() would be a * rvalue reference, the reference is removed, so that a variable of this type * actually stores the value. - * @tparam T a `matrix_cl` or `Scalar` type - * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object - * is assigned using standard or compound assign. + * @tparam T a `matrix_cl` or `Scalar` type + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object + * is assigned using standard or compound assign. */ template -using as_operation_cl_t = std::conditional_t< - std::is_lvalue_reference::value, - decltype(as_operation_cl(std::declval())), - std::remove_reference_t(std::declval()))>>; +using as_operation_cl_t + = std::conditional_t::value, + decltype(as_operation_cl(std::declval())), + std::remove_reference_t(std::declval()))>>; /** @}*/ } // namespace math diff --git a/stan/math/opencl/kernel_generator/assignment_ops.hpp b/stan/math/opencl/kernel_generator/assignment_ops.hpp index cca9b8e7a6f..a365492a8e1 100644 --- a/stan/math/opencl/kernel_generator/assignment_ops.hpp +++ b/stan/math/opencl/kernel_generator/assignment_ops.hpp @@ -9,11 +9,18 @@ namespace math { /** * Ops that decide the type of assignment for LHS operations */ -enum class assign_op_cl {equals, plus_equals, minus_equals, divide_equals, multiply_equals}; +enum class assign_op_cl { + equals, + plus_equals, + minus_equals, + divide_equals, + multiply_equals +}; namespace internal { /** - * @param value A static constexpr const char* member for printing assignment ops + * @param value A static constexpr const char* member for printing assignment + * ops */ template struct assignment_op_str_impl; @@ -47,11 +54,11 @@ template struct assignment_op_str : assignment_op_str_impl {}; template -struct assignment_op_str> : assignment_op_str_impl {}; +struct assignment_op_str> + : assignment_op_str_impl {}; } // namespace internal - /** * @tparam T A type that has an `assignment_op` static constexpr member type * @return The types assignment op as a constexpr const char* @@ -61,7 +68,7 @@ inline constexpr const char* assignment_op() noexcept { return internal::assignment_op_str>::value; } -} -} +} // namespace math +} // namespace stan #endif #endif diff --git a/stan/math/opencl/kernel_generator/load.hpp b/stan/math/opencl/kernel_generator/load.hpp index 018ecce6e50..6f48252d89e 100644 --- a/stan/math/opencl/kernel_generator/load.hpp +++ b/stan/math/opencl/kernel_generator/load.hpp @@ -25,7 +25,8 @@ namespace math { /** * Represents an access to a \c matrix_cl in kernel generator expressions * @tparam T \c matrix_cl - * @tparam AssignOp tells higher level operations whether the final operation should be an assignment or a type of compound assignment. + * @tparam AssignOp tells higher level operations whether the final operation + * should be an assignment or a type of compound assignment. */ template class load_ @@ -35,7 +36,6 @@ class load_ T a_; public: - static constexpr assign_op_cl assignment_op = AssignOp; using Scalar = typename std::remove_reference_t::type; using base = operation_cl, Scalar>; @@ -57,8 +57,12 @@ class load_ * @return copy of \c *this */ inline load_ deep_copy() & { return load_(a_); } - inline load_ deep_copy() const& { return load_(a_); } - inline load_ deep_copy() && { return load_(std::forward(a_)); } + inline load_ deep_copy() const& { + return load_(a_); + } + inline load_ deep_copy() && { + return load_(std::forward(a_)); + } /** * Generates kernel code for this expression. diff --git a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp index 8358605aa89..167cd44ed13 100644 --- a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp +++ b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp @@ -323,16 +323,17 @@ class results_cl { }); } - /** + /** * Incrementing \c results_ object by \c expressions_cl object * executes the kernel that evaluates expressions and increments results by * those expressions. - * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object - * is assigned using standard or compound assign. + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the + * object is assigned using standard or compound assign. * @tparam T_expressions types of expressions * @param exprs expressions */ - template > void compound_assignment_impl(const expressions_cl& exprs) { @@ -361,7 +362,7 @@ class results_cl { compound_assignment_impl(exprs); } - /** + /** * Decrement \c results_ object by \c expressions_cl object * executes the kernel that evaluates expressions and increments results by * those expressions. @@ -375,7 +376,7 @@ class results_cl { compound_assignment_impl(exprs); } - /** + /** * Elementwise divide \c results_ object by \c expressions_cl object * executes the kernel that evaluates expressions and increments results by * those expressions. @@ -389,7 +390,7 @@ class results_cl { compound_assignment_impl(exprs); } - /** + /** * Elementwise multiply \c results_ object by \c expressions_cl object * executes the kernel that evaluates expressions and increments results by * those expressions. @@ -484,7 +485,7 @@ class results_cl { + parts.reduction_2d + "}\n"; } - return src; + return src; } /** @@ -583,38 +584,44 @@ class results_cl { /** * Makes a std::pair of one result and one expression and wraps it into a * tuple. - * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object - * is assigned using standard or compound assign. - * @tparam T_result An non scalar type that is normally an `result_cl` operation holding a `matrix_cl` - * @tparam T_expression An expression of set of operations on `matrix_cl` and scalar types. + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the + * object is assigned using standard or compound assign. + * @tparam T_result An non scalar type that is normally an `result_cl` + * operation holding a `matrix_cl` + * @tparam T_expression An expression of set of operations on `matrix_cl` and + * scalar types. * @param result result * @param expression expression * @return a tuple of pair of result and expression */ - template , conjunction, std::is_arithmetic>>>* = nullptr> static auto make_assignment_pair(T_result&& result, T_expression&& expression) { - return std::make_tuple( - std::pair, as_operation_cl_t>( - as_operation_cl(std::forward(result)), - as_operation_cl(std::forward(expression)))); + return std::make_tuple(std::pair, + as_operation_cl_t>( + as_operation_cl(std::forward(result)), + as_operation_cl(std::forward(expression)))); } /** * If an expression does not need to be calculated this returns an empty tuple - * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object - * is assigned using standard or compound assign. - * @tparam T_result An non scalar type that is normally an `result_cl` operation holding a `matrix_cl` - * @tparam T_expression An expression of set of operations on `matrix_cl` and scalar types. + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the + * object is assigned using standard or compound assign. + * @tparam T_result An non scalar type that is normally an `result_cl` + * operation holding a `matrix_cl` + * @tparam T_expression An expression of set of operations on `matrix_cl` and + * scalar types. * @param result result * @param expression expression * @return a tuple of pair of result and expression */ - template >* = nullptr> static auto make_assignment_pair(T_result&& result, T_expression&& expression) { @@ -624,15 +631,16 @@ class results_cl { /** * Checks on scalars are done separately in this overload instead of in * kernel. - * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object - * is assigned using standard or compound assign. + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the + * object is assigned using standard or compound assign. * @tparam T_check A scalar type * @tparam T_pass An integral type * @param result result - check * @param pass bool scalar * @return an empty tuple */ - template >* = nullptr, require_integral_t* = nullptr> static std::tuple<> make_assignment_pair(T_check&& result, T_pass&& pass) { diff --git a/stan/math/opencl/kernel_generator/operation_cl.hpp b/stan/math/opencl/kernel_generator/operation_cl.hpp index 84f026dbdf6..f5995484da0 100644 --- a/stan/math/opencl/kernel_generator/operation_cl.hpp +++ b/stan/math/opencl/kernel_generator/operation_cl.hpp @@ -76,21 +76,21 @@ struct kernel_parts { }; std::ostream& operator<<(std::ostream& os, kernel_parts& parts) { - os << "args:" << std::endl; - os << parts.args.substr(0, parts.args.size() - 2) << std::endl; - os << "Decl:" << std::endl; - os << parts.declarations << std::endl; - os << "Init:" << std::endl; - os << parts.initialization << std::endl; - os << "body:" << std::endl; - os << parts.body << std::endl; - os << "body_suffix:" << std::endl; - os << parts.body_suffix << std::endl; - os << "reduction_1d:" << std::endl; - os << parts.reduction_1d << std::endl; - os << "reduction_2d:" << std::endl; - os << parts.reduction_2d << std::endl; - return os; + os << "args:" << std::endl; + os << parts.args.substr(0, parts.args.size() - 2) << std::endl; + os << "Decl:" << std::endl; + os << parts.declarations << std::endl; + os << "Init:" << std::endl; + os << parts.initialization << std::endl; + os << "body:" << std::endl; + os << parts.body << std::endl; + os << "body_suffix:" << std::endl; + os << parts.body_suffix << std::endl; + os << "reduction_1d:" << std::endl; + os << parts.reduction_1d << std::endl; + os << "reduction_2d:" << std::endl; + os << parts.reduction_2d << std::endl; + return os; } /** diff --git a/stan/math/opencl/rev/adjoint_results.hpp b/stan/math/opencl/rev/adjoint_results.hpp index 4e671fc6837..4a2dede57d0 100644 --- a/stan/math/opencl/rev/adjoint_results.hpp +++ b/stan/math/opencl/rev/adjoint_results.hpp @@ -41,21 +41,24 @@ class adjoint_results_cl : protected results_cl { index_apply([&](auto... Is) { auto scalars = std::tuple_cat(select_scalar_assignments( std::get(this->results_), std::get(exprs.expressions_))...); - auto nonscalars_tmp = std::tuple_cat(select_nonscalar_assignments( - std::get(this->results_), std::get(exprs.expressions_))...); + auto nonscalars_tmp = std::tuple_cat( + select_nonscalar_assignments( + std::get(this->results_), + std::get(exprs.expressions_))...); index_apply::value>( [&](auto... Is_nonscal) { - auto nonscalars = std::make_tuple(std::make_pair( - std::get(nonscalars_tmp).first, std::get(nonscalars_tmp).second)...); - - + auto nonscalars = std::make_tuple( + std::make_pair(std::get(nonscalars_tmp).first, + std::get(nonscalars_tmp).second)...); + index_apply::value>( [&](auto... Is_scal) { // evaluate all expressions this->assignment_impl(std::tuple_cat( nonscalars, - this->template make_assignment_pair( + this->template make_assignment_pair< + assign_op_cl::plus_equals>( std::get<2>(std::get(scalars)), sum_2d(std::get<1>(std::get(scalars))))...)); @@ -99,11 +102,10 @@ class adjoint_results_cl : protected results_cl { return std::make_tuple(); } - /** * Selects assignments that have non-scalar var results. - * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object - * is assigned using standard or compound assign. + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the + * object is assigned using standard or compound assign. * @tparam T_result type of result. This overload is used for non-scalar vars. * @tparam T_expression type of expression * @param result result @@ -121,8 +123,8 @@ class adjoint_results_cl : protected results_cl { } /** * Selects assignments that have non-scalar var results. - * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object - * is assigned using standard or compound assign. + * @tparam AssignOp an optional `assign_op_cl` that dictates whether the + * object is assigned using standard or compound assign. * @tparam T_result type of result. This overload is used for results that are * either scalars or not vars. * @tparam T_expression type of expression @@ -130,8 +132,8 @@ class adjoint_results_cl : protected results_cl { * @param expression expression * @return empty tuple */ - template ::value || !is_var>::value>* = nullptr> auto select_nonscalar_assignments(T_result&& result, diff --git a/stan/math/opencl/rev/grad.hpp b/stan/math/opencl/rev/grad.hpp index 46b0e7cb0d7..f4a1dde1dc9 100644 --- a/stan/math/opencl/rev/grad.hpp +++ b/stan/math/opencl/rev/grad.hpp @@ -19,8 +19,7 @@ namespace math { * @param[in] x Variables being differentiated with respect to * @param[out] g Gradient, d/dx v, evaluated at x. */ -inline void grad(var& v, var_value>& x, - Eigen::VectorXd& g) { +inline void grad(var& v, var_value>& x, Eigen::VectorXd& g) { grad(v.vi_); g = from_matrix_cl(x.adj()); } diff --git a/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp b/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp index 361e72a8630..3675dbd0a0f 100644 --- a/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp +++ b/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp @@ -9,11 +9,11 @@ #include TEST(KernelGenerator, plus_equals) { + using stan::math::from_matrix_cl; using stan::math::matrix_cl; - using stan::math::var_value; - using stan::math::var; using stan::math::to_matrix_cl; - using stan::math::from_matrix_cl; + using stan::math::var; + using stan::math::var_value; Eigen::MatrixXd A = Eigen::MatrixXd::Random(10, 10); Eigen::MatrixXd B = Eigen::MatrixXd::Random(10, 10); Eigen::MatrixXd C = Eigen::MatrixXd::Random(10, 10); @@ -27,11 +27,11 @@ TEST(KernelGenerator, plus_equals) { } TEST(KernelGenerator, minus_equals) { + using stan::math::from_matrix_cl; using stan::math::matrix_cl; - using stan::math::var_value; - using stan::math::var; using stan::math::to_matrix_cl; - using stan::math::from_matrix_cl; + using stan::math::var; + using stan::math::var_value; Eigen::MatrixXd A = Eigen::MatrixXd::Random(10, 10); Eigen::MatrixXd B = Eigen::MatrixXd::Random(10, 10); Eigen::MatrixXd C = Eigen::MatrixXd::Random(10, 10); @@ -45,11 +45,11 @@ TEST(KernelGenerator, minus_equals) { } TEST(KernelGenerator, divide_equals) { + using stan::math::from_matrix_cl; using stan::math::matrix_cl; - using stan::math::var_value; - using stan::math::var; using stan::math::to_matrix_cl; - using stan::math::from_matrix_cl; + using stan::math::var; + using stan::math::var_value; Eigen::MatrixXd A = Eigen::MatrixXd::Random(10, 10); Eigen::MatrixXd B = Eigen::MatrixXd::Random(10, 10); Eigen::MatrixXd C = Eigen::MatrixXd::Random(10, 10); @@ -63,11 +63,11 @@ TEST(KernelGenerator, divide_equals) { } TEST(KernelGenerator, times_equals) { + using stan::math::from_matrix_cl; using stan::math::matrix_cl; - using stan::math::var_value; - using stan::math::var; using stan::math::to_matrix_cl; - using stan::math::from_matrix_cl; + using stan::math::var; + using stan::math::var_value; Eigen::MatrixXd A = Eigen::MatrixXd::Random(10, 10); Eigen::MatrixXd B = Eigen::MatrixXd::Random(10, 10); @@ -81,6 +81,4 @@ TEST(KernelGenerator, times_equals) { EXPECT_MATRIX_EQ(C_cl_host, C) } - - #endif diff --git a/test/unit/math/opencl/rev/add_test.cpp b/test/unit/math/opencl/rev/add_test.cpp index e07c438f828..27950a35bf1 100644 --- a/test/unit/math/opencl/rev/add_test.cpp +++ b/test/unit/math/opencl/rev/add_test.cpp @@ -5,14 +5,12 @@ #include #include - - TEST(OpenCLPrim, add_aliasing) { stan::math::matrix_d d1(3, 3); d1 << 1, 2, 3, 4, 5, 6, 7, 8, 9; using stan::math::matrix_cl; - using stan::math::var_value; using stan::math::var; + using stan::math::var_value; using varmat_cl = var_value>; varmat_cl d11 = stan::math::to_matrix_cl(d1); // Add the same matrix as the left and right hand side diff --git a/test/unit/math/opencl/rev/grad_test.cpp b/test/unit/math/opencl/rev/grad_test.cpp index e24a70d26b9..23d9ee330e7 100644 --- a/test/unit/math/opencl/rev/grad_test.cpp +++ b/test/unit/math/opencl/rev/grad_test.cpp @@ -6,13 +6,13 @@ #include TEST(OpenCLGradTest, exceptions) { - using stan::math::var_value; - using stan::math::var; using stan::math::matrix_cl; using stan::math::to_matrix_cl; + using stan::math::var; + using stan::math::var_value; using varmat_cl = var_value>; Eigen::VectorXd a(6); - a << 1, 2, 3 , 4, 5, 6; + a << 1, 2, 3, 4, 5, 6; varmat_cl a_cl(to_matrix_cl(a)); varmat_cl b_cl(a_cl.block(0, 0, 3, 1)); varmat_cl c_cl(a_cl.block(3, 0, 3, 1)); @@ -25,7 +25,7 @@ TEST(OpenCLGradTest, exceptions) { stan::math::recover_memory(); Eigen::Matrix a_host = a; Eigen::Matrix b_host = a_host.segment(0, 3); - Eigen::Matrix c_host = a_host.segment(3, 3); + Eigen::Matrix c_host = a_host.segment(3, 3); var ret_host = stan::math::sum(add(b_host, b_host)); Eigen::VectorXd ret_grads_host(6); stan::math::grad(ret_host, a_host, ret_grads_host); @@ -33,5 +33,4 @@ TEST(OpenCLGradTest, exceptions) { stan::math::recover_memory(); } - #endif From c6bc259c9a4a23e77d7d28a63b25ca0a785f9f9a Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Wed, 13 Sep 2023 14:18:14 -0400 Subject: [PATCH 5/9] add back rest of add test --- test/unit/math/opencl/rev/add_test.cpp | 172 ++++++++++++++++++++++++- 1 file changed, 171 insertions(+), 1 deletion(-) diff --git a/test/unit/math/opencl/rev/add_test.cpp b/test/unit/math/opencl/rev/add_test.cpp index 27950a35bf1..d64f360030e 100644 --- a/test/unit/math/opencl/rev/add_test.cpp +++ b/test/unit/math/opencl/rev/add_test.cpp @@ -5,12 +5,182 @@ #include #include +TEST(OpenCLPrim, add_exceptions) { + stan::math::vector_d vd1(2), vd2(3); + stan::math::matrix_cl vd11(vd1); + stan::math::matrix_cl vd22(vd2); + EXPECT_THROW(stan::math::add(vd11, vd22), std::invalid_argument); + + stan::math::row_vector_d rvd1(2), rvd2(3); + stan::math::matrix_cl rvd11(rvd1); + stan::math::matrix_cl rvd22(rvd2); + EXPECT_THROW(stan::math::add(rvd11, rvd22), std::invalid_argument); + + stan::math::matrix_d md1(2, 2), md2(3, 3); + stan::math::matrix_cl md11(md1); + stan::math::matrix_cl md22(md2); + EXPECT_THROW(stan::math::add(md11, md22), std::invalid_argument); +} + +TEST(OpenCLPrim, add_tri_value_check) { + Eigen::MatrixXd a(3, 3); + a << 1, 2, 3, 4, 5, 6, 7, 8, 9; + Eigen::MatrixXd b = Eigen::MatrixXd::Ones(3, 3) * -3; + + stan::math::matrix_cl a_cl(a); + stan::math::matrix_cl b_cl(b); + stan::math::matrix_cl c_cl(3, 3); + stan::math::matrix_cl c_cl_fun(3, 3); + Eigen::MatrixXd c(3, 3); + + a_cl.view(stan::math::matrix_cl_view::Lower); + b_cl.view(stan::math::matrix_cl_view::Lower); + c_cl = a_cl + b_cl; + EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Lower); + c = stan::math::from_matrix_cl(c_cl); + EXPECT_MATRIX_NEAR((Eigen::MatrixXd(a.triangularView()) + + Eigen::MatrixXd(b.triangularView())), + c, 1E-8); + + c_cl_fun = add(a_cl, b_cl); + EXPECT_EQ(c_cl_fun.view(), stan::math::matrix_cl_view::Lower); + c = stan::math::from_matrix_cl(c_cl_fun); + EXPECT_MATRIX_NEAR((Eigen::MatrixXd(a.triangularView()) + + Eigen::MatrixXd(b.triangularView())), + c, 1E-8); + + a_cl.view(stan::math::matrix_cl_view::Lower); + b_cl.view(stan::math::matrix_cl_view::Upper); + c_cl = a_cl + b_cl; + EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); + c = stan::math::from_matrix_cl(c_cl); + EXPECT_MATRIX_NEAR((Eigen::MatrixXd(a.triangularView()) + + Eigen::MatrixXd(b.triangularView())), + c, 1E-8); + + c_cl_fun = add(a_cl, b_cl); + EXPECT_EQ(c_cl_fun.view(), stan::math::matrix_cl_view::Entire); + c = stan::math::from_matrix_cl(c_cl_fun); + EXPECT_MATRIX_NEAR((Eigen::MatrixXd(a.triangularView()) + + Eigen::MatrixXd(b.triangularView())), + c, 1E-8); + + a_cl.view(stan::math::matrix_cl_view::Upper); + b_cl.view(stan::math::matrix_cl_view::Lower); + c_cl = a_cl + b_cl; + EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); + c = stan::math::from_matrix_cl(c_cl); + EXPECT_MATRIX_NEAR((Eigen::MatrixXd(a.triangularView()) + + Eigen::MatrixXd(b.triangularView())), + c, 1E-8); + + c_cl_fun = add(a_cl, b_cl); + EXPECT_EQ(c_cl_fun.view(), stan::math::matrix_cl_view::Entire); + c = stan::math::from_matrix_cl(c_cl_fun); + EXPECT_MATRIX_NEAR((Eigen::MatrixXd(a.triangularView()) + + Eigen::MatrixXd(b.triangularView())), + c, 1E-8); + + a_cl.view(stan::math::matrix_cl_view::Entire); + b_cl.view(stan::math::matrix_cl_view::Lower); + c_cl = a_cl + b_cl; + EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); + c = stan::math::from_matrix_cl(c_cl); + EXPECT_MATRIX_NEAR((a + Eigen::MatrixXd(b.triangularView())), c, + 1E-8); + + c_cl_fun = add(a_cl, b_cl); + EXPECT_EQ(c_cl_fun.view(), stan::math::matrix_cl_view::Entire); + c = stan::math::from_matrix_cl(c_cl_fun); + EXPECT_MATRIX_NEAR((a + Eigen::MatrixXd(b.triangularView())), c, + 1E-8); +} + +TEST(OpenCLPrim, add_tri_scalar_value_check) { + Eigen::MatrixXd a(3, 3); + a << 1, 2, 3, 4, 5, 6, 7, 8, 9; + stan::math::matrix_cl a_cl(a); + stan::math::matrix_cl c_cl(3, 3); + Eigen::MatrixXd c(3, 3); + + using stan::math::add; + a_cl.view(stan::math::matrix_cl_view::Lower); + c_cl = add(a_cl, 1.5); + EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); + c = stan::math::from_matrix_cl(c_cl); + EXPECT_MATRIX_NEAR( + add(Eigen::MatrixXd(a.triangularView()), 1.5), c, 1E-8); + + a_cl.view(stan::math::matrix_cl_view::Lower); + c_cl = add(1.5, a_cl); + EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); + c = stan::math::from_matrix_cl(c_cl); + EXPECT_MATRIX_NEAR( + add(1.5, Eigen::MatrixXd(a.triangularView())), c, 1E-8); + + a_cl.view(stan::math::matrix_cl_view::Upper); + c_cl = add(a_cl, 1.5); + EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); + c = stan::math::from_matrix_cl(c_cl); + EXPECT_MATRIX_NEAR( + add(Eigen::MatrixXd(a.triangularView()), 1.5), c, 1E-8); + + a_cl.view(stan::math::matrix_cl_view::Upper); + c_cl = add(1.5, a_cl); + EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); + c = stan::math::from_matrix_cl(c_cl); + EXPECT_MATRIX_NEAR( + add(1.5, Eigen::MatrixXd(a.triangularView())), c, 1E-8); + + a_cl.view(stan::math::matrix_cl_view::Entire); + c_cl = add(a_cl, 1.5); + EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); + c = stan::math::from_matrix_cl(c_cl); + EXPECT_MATRIX_NEAR(add(a, 1.5), c, 1E-8); + + a_cl.view(stan::math::matrix_cl_view::Entire); + c_cl = add(1.5, a_cl); + EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); + c = stan::math::from_matrix_cl(c_cl); + EXPECT_MATRIX_NEAR(add(1.5, a), c, 1E-8); +} + +TEST(OpenCLPrim, add_batch) { + // used to represent 5 matrices of size 10x10 + const int batch_size = 11; + const int size = 13; + stan::math::matrix_d a(size, size * batch_size); + stan::math::matrix_d a_res(size, size); + for (int k = 0; k < batch_size; k++) { + for (int i = 0; i < size; i++) + for (int j = 0; j < size; j++) { + a(i, k * size + j) = k; + } + } + stan::math::matrix_cl a_cl(a); + stan::math::matrix_cl a_cl_res(size, size); + stan::math::opencl_kernels::add_batch(cl::NDRange(size, size), a_cl_res, a_cl, + size, size, batch_size); + a_res = stan::math::from_matrix_cl(a_cl_res); + for (int k = 0; k < batch_size; k++) { + for (int i = 0; i < size; i++) + for (int j = 0; j < size; j++) { + a(i, j) += a(i, k * size + j); + } + } + for (int i = 0; i < size; i++) { + for (int j = 0; j < size; j++) { + EXPECT_EQ(a(i, j), a_res(i, j)); + } + } +} + TEST(OpenCLPrim, add_aliasing) { stan::math::matrix_d d1(3, 3); d1 << 1, 2, 3, 4, 5, 6, 7, 8, 9; using stan::math::matrix_cl; - using stan::math::var; using stan::math::var_value; + using stan::math::var; using varmat_cl = var_value>; varmat_cl d11 = stan::math::to_matrix_cl(d1); // Add the same matrix as the left and right hand side From 1dff379133a706f496e514b67b77fdcd605c6810 Mon Sep 17 00:00:00 2001 From: Stan Jenkins Date: Wed, 13 Sep 2023 14:19:26 -0400 Subject: [PATCH 6/9] [Jenkins] auto-formatting by clang-format version 10.0.0-4ubuntu1 --- test/unit/math/opencl/rev/add_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/math/opencl/rev/add_test.cpp b/test/unit/math/opencl/rev/add_test.cpp index d64f360030e..aaf591986c4 100644 --- a/test/unit/math/opencl/rev/add_test.cpp +++ b/test/unit/math/opencl/rev/add_test.cpp @@ -179,8 +179,8 @@ TEST(OpenCLPrim, add_aliasing) { stan::math::matrix_d d1(3, 3); d1 << 1, 2, 3, 4, 5, 6, 7, 8, 9; using stan::math::matrix_cl; - using stan::math::var_value; using stan::math::var; + using stan::math::var_value; using varmat_cl = var_value>; varmat_cl d11 = stan::math::to_matrix_cl(d1); // Add the same matrix as the left and right hand side From 95f40da8adeea16dcb4c25cfe5677fe710af5b60 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Wed, 13 Sep 2023 15:19:43 -0400 Subject: [PATCH 7/9] revert add test --- test/unit/math/opencl/rev/add_test.cpp | 220 +++++++------------------ 1 file changed, 61 insertions(+), 159 deletions(-) diff --git a/test/unit/math/opencl/rev/add_test.cpp b/test/unit/math/opencl/rev/add_test.cpp index aaf591986c4..874074968ce 100644 --- a/test/unit/math/opencl/rev/add_test.cpp +++ b/test/unit/math/opencl/rev/add_test.cpp @@ -5,175 +5,77 @@ #include #include -TEST(OpenCLPrim, add_exceptions) { +auto add_functor + = [](const auto& a, const auto& b) { return stan::math::add(a, b).eval(); }; + +TEST(OpenCLPrim, add_v_small_zero) { + stan::math::vector_d d1(3), d2(3); + d1 << 1, 2, 3; + d2 << 3, 2, 1; + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d1, d2); + + stan::math::vector_d d0(0); + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d0, d0); + + double d3 = 3.0; + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d1, d3); + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d3, d1); + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d3, d0); + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d0, d3); +} + +TEST(OpenCLPrim, add_rv_small_zero) { + stan::math::row_vector_d d1(3), d2(3); + d1 << 1, 2, 3; + d2 << 3, 2, 1; + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d1, d2); + + stan::math::vector_d d0(0); + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d0, d0); + + double d3 = 3.0; + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d1, d3); + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d3, d1); + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d3, d0); + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d0, d3); +} + +TEST(OpenCLPrim, add_m_small_zero) { + stan::math::matrix_d d1(3, 3), d2(3, 3); + d1 << 1, 2, 3, 4, 5, 6, 7, 8, 9; + d2 << 10, 100, 1000, 0, -10, -12, 2, 4, 8; + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d1, d2); + + stan::math::matrix_d d0(0, 0); + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d0, d0); + + double d3 = 3.0; + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d1, d3); + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d3, d1); + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d3, d0); + stan::math::test::compare_cpu_opencl_prim_rev(add_functor, d0, d3); +} + +TEST(OpenCLPrim, add_rev_exceptions) { + using stan::math::matrix_cl; stan::math::vector_d vd1(2), vd2(3); - stan::math::matrix_cl vd11(vd1); - stan::math::matrix_cl vd22(vd2); + stan::math::var_value> vd11 = stan::math::to_matrix_cl(vd1); + stan::math::var_value> vd22 = stan::math::to_matrix_cl(vd2); EXPECT_THROW(stan::math::add(vd11, vd22), std::invalid_argument); stan::math::row_vector_d rvd1(2), rvd2(3); - stan::math::matrix_cl rvd11(rvd1); - stan::math::matrix_cl rvd22(rvd2); + stan::math::var_value> rvd11 + = stan::math::to_matrix_cl(rvd1); + stan::math::var_value> rvd22 + = stan::math::to_matrix_cl(rvd2); EXPECT_THROW(stan::math::add(rvd11, rvd22), std::invalid_argument); stan::math::matrix_d md1(2, 2), md2(3, 3); - stan::math::matrix_cl md11(md1); - stan::math::matrix_cl md22(md2); + stan::math::var_value> md11 = stan::math::to_matrix_cl(md1); + stan::math::var_value> md22 = stan::math::to_matrix_cl(md2); EXPECT_THROW(stan::math::add(md11, md22), std::invalid_argument); } -TEST(OpenCLPrim, add_tri_value_check) { - Eigen::MatrixXd a(3, 3); - a << 1, 2, 3, 4, 5, 6, 7, 8, 9; - Eigen::MatrixXd b = Eigen::MatrixXd::Ones(3, 3) * -3; - - stan::math::matrix_cl a_cl(a); - stan::math::matrix_cl b_cl(b); - stan::math::matrix_cl c_cl(3, 3); - stan::math::matrix_cl c_cl_fun(3, 3); - Eigen::MatrixXd c(3, 3); - - a_cl.view(stan::math::matrix_cl_view::Lower); - b_cl.view(stan::math::matrix_cl_view::Lower); - c_cl = a_cl + b_cl; - EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Lower); - c = stan::math::from_matrix_cl(c_cl); - EXPECT_MATRIX_NEAR((Eigen::MatrixXd(a.triangularView()) - + Eigen::MatrixXd(b.triangularView())), - c, 1E-8); - - c_cl_fun = add(a_cl, b_cl); - EXPECT_EQ(c_cl_fun.view(), stan::math::matrix_cl_view::Lower); - c = stan::math::from_matrix_cl(c_cl_fun); - EXPECT_MATRIX_NEAR((Eigen::MatrixXd(a.triangularView()) - + Eigen::MatrixXd(b.triangularView())), - c, 1E-8); - - a_cl.view(stan::math::matrix_cl_view::Lower); - b_cl.view(stan::math::matrix_cl_view::Upper); - c_cl = a_cl + b_cl; - EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); - c = stan::math::from_matrix_cl(c_cl); - EXPECT_MATRIX_NEAR((Eigen::MatrixXd(a.triangularView()) - + Eigen::MatrixXd(b.triangularView())), - c, 1E-8); - - c_cl_fun = add(a_cl, b_cl); - EXPECT_EQ(c_cl_fun.view(), stan::math::matrix_cl_view::Entire); - c = stan::math::from_matrix_cl(c_cl_fun); - EXPECT_MATRIX_NEAR((Eigen::MatrixXd(a.triangularView()) - + Eigen::MatrixXd(b.triangularView())), - c, 1E-8); - - a_cl.view(stan::math::matrix_cl_view::Upper); - b_cl.view(stan::math::matrix_cl_view::Lower); - c_cl = a_cl + b_cl; - EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); - c = stan::math::from_matrix_cl(c_cl); - EXPECT_MATRIX_NEAR((Eigen::MatrixXd(a.triangularView()) - + Eigen::MatrixXd(b.triangularView())), - c, 1E-8); - - c_cl_fun = add(a_cl, b_cl); - EXPECT_EQ(c_cl_fun.view(), stan::math::matrix_cl_view::Entire); - c = stan::math::from_matrix_cl(c_cl_fun); - EXPECT_MATRIX_NEAR((Eigen::MatrixXd(a.triangularView()) - + Eigen::MatrixXd(b.triangularView())), - c, 1E-8); - - a_cl.view(stan::math::matrix_cl_view::Entire); - b_cl.view(stan::math::matrix_cl_view::Lower); - c_cl = a_cl + b_cl; - EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); - c = stan::math::from_matrix_cl(c_cl); - EXPECT_MATRIX_NEAR((a + Eigen::MatrixXd(b.triangularView())), c, - 1E-8); - - c_cl_fun = add(a_cl, b_cl); - EXPECT_EQ(c_cl_fun.view(), stan::math::matrix_cl_view::Entire); - c = stan::math::from_matrix_cl(c_cl_fun); - EXPECT_MATRIX_NEAR((a + Eigen::MatrixXd(b.triangularView())), c, - 1E-8); -} - -TEST(OpenCLPrim, add_tri_scalar_value_check) { - Eigen::MatrixXd a(3, 3); - a << 1, 2, 3, 4, 5, 6, 7, 8, 9; - stan::math::matrix_cl a_cl(a); - stan::math::matrix_cl c_cl(3, 3); - Eigen::MatrixXd c(3, 3); - - using stan::math::add; - a_cl.view(stan::math::matrix_cl_view::Lower); - c_cl = add(a_cl, 1.5); - EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); - c = stan::math::from_matrix_cl(c_cl); - EXPECT_MATRIX_NEAR( - add(Eigen::MatrixXd(a.triangularView()), 1.5), c, 1E-8); - - a_cl.view(stan::math::matrix_cl_view::Lower); - c_cl = add(1.5, a_cl); - EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); - c = stan::math::from_matrix_cl(c_cl); - EXPECT_MATRIX_NEAR( - add(1.5, Eigen::MatrixXd(a.triangularView())), c, 1E-8); - - a_cl.view(stan::math::matrix_cl_view::Upper); - c_cl = add(a_cl, 1.5); - EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); - c = stan::math::from_matrix_cl(c_cl); - EXPECT_MATRIX_NEAR( - add(Eigen::MatrixXd(a.triangularView()), 1.5), c, 1E-8); - - a_cl.view(stan::math::matrix_cl_view::Upper); - c_cl = add(1.5, a_cl); - EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); - c = stan::math::from_matrix_cl(c_cl); - EXPECT_MATRIX_NEAR( - add(1.5, Eigen::MatrixXd(a.triangularView())), c, 1E-8); - - a_cl.view(stan::math::matrix_cl_view::Entire); - c_cl = add(a_cl, 1.5); - EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); - c = stan::math::from_matrix_cl(c_cl); - EXPECT_MATRIX_NEAR(add(a, 1.5), c, 1E-8); - - a_cl.view(stan::math::matrix_cl_view::Entire); - c_cl = add(1.5, a_cl); - EXPECT_EQ(c_cl.view(), stan::math::matrix_cl_view::Entire); - c = stan::math::from_matrix_cl(c_cl); - EXPECT_MATRIX_NEAR(add(1.5, a), c, 1E-8); -} - -TEST(OpenCLPrim, add_batch) { - // used to represent 5 matrices of size 10x10 - const int batch_size = 11; - const int size = 13; - stan::math::matrix_d a(size, size * batch_size); - stan::math::matrix_d a_res(size, size); - for (int k = 0; k < batch_size; k++) { - for (int i = 0; i < size; i++) - for (int j = 0; j < size; j++) { - a(i, k * size + j) = k; - } - } - stan::math::matrix_cl a_cl(a); - stan::math::matrix_cl a_cl_res(size, size); - stan::math::opencl_kernels::add_batch(cl::NDRange(size, size), a_cl_res, a_cl, - size, size, batch_size); - a_res = stan::math::from_matrix_cl(a_cl_res); - for (int k = 0; k < batch_size; k++) { - for (int i = 0; i < size; i++) - for (int j = 0; j < size; j++) { - a(i, j) += a(i, k * size + j); - } - } - for (int i = 0; i < size; i++) { - for (int j = 0; j < size; j++) { - EXPECT_EQ(a(i, j), a_res(i, j)); - } - } -} TEST(OpenCLPrim, add_aliasing) { stan::math::matrix_d d1(3, 3); From 03eb22be1368e262c8407606ac8d75bd25108f21 Mon Sep 17 00:00:00 2001 From: Stan Jenkins Date: Wed, 13 Sep 2023 15:21:08 -0400 Subject: [PATCH 8/9] [Jenkins] auto-formatting by clang-format version 10.0.0-4ubuntu1 --- test/unit/math/opencl/rev/add_test.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/unit/math/opencl/rev/add_test.cpp b/test/unit/math/opencl/rev/add_test.cpp index 874074968ce..f678fbc255c 100644 --- a/test/unit/math/opencl/rev/add_test.cpp +++ b/test/unit/math/opencl/rev/add_test.cpp @@ -76,7 +76,6 @@ TEST(OpenCLPrim, add_rev_exceptions) { EXPECT_THROW(stan::math::add(md11, md22), std::invalid_argument); } - TEST(OpenCLPrim, add_aliasing) { stan::math::matrix_d d1(3, 3); d1 << 1, 2, 3, 4, 5, 6, 7, 8, 9; From 29644f65785a44596f1e059954a6c76f9f518772 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Thu, 14 Sep 2023 17:12:38 -0400 Subject: [PATCH 9/9] add inline to operator<< for kernel_parts --- stan/math/opencl/kernel_generator/operation_cl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stan/math/opencl/kernel_generator/operation_cl.hpp b/stan/math/opencl/kernel_generator/operation_cl.hpp index f5995484da0..43a50b59654 100644 --- a/stan/math/opencl/kernel_generator/operation_cl.hpp +++ b/stan/math/opencl/kernel_generator/operation_cl.hpp @@ -61,7 +61,7 @@ struct kernel_parts { args + other.args}; } - kernel_parts operator+=(const kernel_parts& other) { + kernel_parts& operator+=(const kernel_parts& other) { includes += other.includes; declarations += other.declarations; initialization += other.initialization; @@ -75,7 +75,7 @@ struct kernel_parts { } }; -std::ostream& operator<<(std::ostream& os, kernel_parts& parts) { +inline std::ostream& operator<<(std::ostream& os, kernel_parts& parts) { os << "args:" << std::endl; os << parts.args.substr(0, parts.args.size() - 2) << std::endl; os << "Decl:" << std::endl;