stan-dev · t4c1 · Jul 27, 2021 · Jul 27, 2021 · SteveBronder · Jul 27, 2021
diff --git a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp
@@ -321,6 +321,16 @@ class results_cl {
           std::get<Is>(results_), std::get<Is>(exprs.expressions_))...));
     });
   }
+  template <typename... T_expressions,
+            typename = std::enable_if_t<sizeof...(T_results)
+                                        == sizeof...(T_expressions)>>
+  void operator=(expressions_cl<T_expressions...>&& exprs) {
+    index_apply<sizeof...(T_expressions)>([this, &exprs](auto... Is) mutable {
+      assignment_impl(std::tuple_cat(make_assignment_pair(
+          std::get<Is>(results_),
+          std::forward<T_expressions>(std::get<Is>(exprs.expressions_)))...));
+    });
-    index_apply<sizeof...(T_expressions)>([this, &exprs](auto... Is) mutable {
-      assignment_impl(std::tuple_cat(make_assignment_pair(
-          std::get<Is>(results_),
-          std::forward<T_expressions>(std::get<Is>(exprs.expressions_)))...));
-    });
+    for_each([](auto& result, auto&& expr) mutable {
+      assignment_impl(std::tuple_cat(make_assignment_pair(result, std::move(expr.expressions_))));
+    }, this->results_, std::move(exprs));
-    index_apply<sizeof...(T_expressions)>([this, &exprs](auto... Is) mutable {
-      assignment_impl(std::tuple_cat(make_assignment_pair(
-          std::get<Is>(results_),
-          std::forward<T_expressions>(std::get<Is>(exprs.expressions_)))...));
-    });
+    for_each([](auto& result, auto&& expr) mutable {
+      assignment_impl(std::tuple_cat(make_assignment_pair(result, std::move(expr.expressions_))));
+    }, this->results_, std::move(exprs));
+  }
 
   /**
    * Incrementing \c results_ object by \c expressions_cl object
@@ -531,11 +541,16 @@ class results_cl {
    * @param expression expression
    * @return a tuple of pair of result and expression
    */
-  template <typename T_result, typename T_expression,
-            require_all_not_t<is_without_output<T_expression>,
-                              conjunction<internal::is_scalar_check<T_result>,
-                                          std::is_arithmetic<std::decay_t<
-                                              T_expression>>>>* = nullptr>
+  template <
+      typename T_result, typename T_expression,
+      require_all_not_t<
+          is_without_output<T_expression>,
+          conjunction<internal::is_scalar_check<T_result>,
+                      std::is_arithmetic<std::decay_t<T_expression>>>,
+          conjunction<
+              is_matrix_cl<T_result>, is_matrix_cl<T_expression>,
+              std::is_same<value_type_t<T_expression>, value_type_t<T_result>>,
+              std::is_rvalue_reference<T_expression&&>>>* = nullptr>
   static auto make_assignment_pair(T_result&& result,
                                    T_expression&& expression) {
     return std::make_tuple(
@@ -576,6 +591,22 @@ class results_cl {
     }
     return std::make_tuple();
   }
+
+  /**
+   * Optimized move assignment of a `matrix_cl` into another `matrix_cl`.
+   * @param result result - check
+   * @param pass bool scalar
+   * @return an empty tuple
+   */
+  template <typename T_result, typename T_matrix,
+            require_all_matrix_cl_t<T_result, T_matrix>* = nullptr,
+            require_all_st_same<T_result, T_matrix>* = nullptr,
+            require_t<std::is_rvalue_reference<T_matrix&&>>* = nullptr>
+  static std::tuple<> make_assignment_pair(T_result&& result,
+                                           T_matrix&& matrix) {
+    result = std::move(matrix);
+    return std::make_tuple();
+  }
 };
 
 /**

diff --git a/test/unit/math/opencl/kernel_generator/multi_result_kernel_test.cpp b/test/unit/math/opencl/kernel_generator/multi_result_kernel_test.cpp
@@ -131,4 +131,21 @@ TEST(KernelGenerator, multi_result_kernel_reuse_kernel) {
   EXPECT_MATRIX_NEAR(res_diff, correct_diff, 1e-9);
 }
 
+TEST(KernelGenerator, multi_result_kernel_matrix_cl_move) {
+  MatrixXd m1(3, 3);
+  m1 << 1, 2, 3, 4, 5, 6, 7, 8, 9;
+  MatrixXd m2(3, 3);
+  m2 << 10, 100, 1000, 0, -10, -12, 2, 4, 8;
+
+  matrix_cl<double> m1_cl(m1);
+  matrix_cl<double> m2_cl(m2);
+
+  cl_mem m2_buf = m2_cl.buffer()();
+
+  stan::math::results(m1_cl) = stan::math::expressions(m2_cl);
+  EXPECT_NE(m1_cl.buffer()(), m2_buf);
+  stan::math::results(m1_cl) = stan::math::expressions(std::move(m2_cl));
+  EXPECT_EQ(m1_cl.buffer()(), m2_buf);
+}
+
 #endif