Changes all the kernel files so that they are placed into const char*…

…'s within each kernel file. Doxygen works but the object holding the kernel code is undocumented. All of the kernel structs are moved into the respective kernel file
stan-dev · rok-cesnovar · Aug 23, 2018 · Jul 30, 2018 · Aug 1, 2018 · Aug 1, 2018
commit aadb3ce1a1b69313e7ce0840c806b48ae95369e3
diff --git a/doxygen/doxygen.cfg b/doxygen/doxygen.cfg
@@ -281,7 +281,7 @@ OPTIMIZE_OUTPUT_VHDL   = NO
 # Note that for custom extensions you also need to set FILE_PATTERNS otherwise
 # the files are not read by doxygen.
 
-EXTENSION_MAPPING      =
+EXTENSION_MAPPING      = cl=C
 
 # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
 # according to the Markdown format, which allows for more readable
@@ -782,7 +782,8 @@ INPUT_ENCODING         = UTF-8
 
 FILE_PATTERNS          = *.hpp \
                          *.cpp \
-                         *.dox
+                         *.dox \
+												 *.cl
 
 # The RECURSIVE tag can be used to specify whether or not subdirectories should
 # be searched for input files as well.
@@ -824,7 +825,7 @@ EXCLUDE_PATTERNS       =
 # Note that the wildcards are matched against the file with absolute path, so to
 # exclude all test directories use the pattern */test/*
 
-EXCLUDE_SYMBOLS        =
+EXCLUDE_SYMBOLS        = STRINGIFY
 
 # The EXAMPLE_PATH tag can be used to specify one or more files or directories
 # that contain example code fragments that are included (see the \include

diff --git a/stan/math/gpu/add.hpp b/stan/math/gpu/add.hpp
@@ -2,6 +2,7 @@
 #define STAN_MATH_GPU_ADD_HPP
 #ifdef STAN_OPENCL
 #include <stan/math/gpu/matrix_gpu.hpp>
+#include <stan/math/gpu/kernels/add.hpp>
 #include <stan/math/gpu/err/check_matching_dims.hpp>
 #include <CL/cl.hpp>
 

diff --git a/stan/math/gpu/copy.hpp b/stan/math/gpu/copy.hpp
@@ -5,6 +5,7 @@
 #include <stan/math/gpu/opencl_context.hpp>
 #include <stan/math/gpu/kernel_cl.hpp>
 #include <stan/math/gpu/matrix_gpu.hpp>
+#include <stan/math/gpu/kernels/copy.hpp>
 #include <stan/math/prim/mat/fun/Eigen.hpp>
 #include <stan/math/prim/scal/err/check_size_match.hpp>
 #include <CL/cl.hpp>
@@ -111,9 +112,8 @@ inline void copy(matrix_gpu& dst, const matrix_gpu& src) {
        * see the matrix_gpu(matrix_gpu&) constructor
        *  for explanation
        */
-      opencl_kernels::copy(cl::NDRange(dst.rows(), dst.cols()),
-                           src.buffer(), dst.buffer(),
-                           dst.rows(), dst.cols());
+      opencl_kernels::copy(cl::NDRange(dst.rows(), dst.cols()), src.buffer(),
+                           dst.buffer(), dst.rows(), dst.cols());
     } catch (const cl::Error& e) {
       std::cout << e.err() << std::endl;
       check_opencl_error("copy GPU->GPU", e);

diff --git a/stan/math/gpu/copy_triangular.hpp b/stan/math/gpu/copy_triangular.hpp
@@ -4,6 +4,7 @@
 #include <stan/math/gpu/constants.hpp>
 #include <stan/math/gpu/matrix_gpu.hpp>
 #include <stan/math/gpu/copy.hpp>
+#include <stan/math/gpu/kernels/copy_triangular.hpp>
 #include <CL/cl.hpp>
 
 namespace stan {

diff --git a/stan/math/gpu/err/check_diagonal_zeros.hpp b/stan/math/gpu/err/check_diagonal_zeros.hpp
@@ -2,6 +2,7 @@
 #define STAN_MATH_GPU_ERR_CHECK_DIAGONAL_ZEROS_HPP
 #ifdef STAN_OPENCL
 #include <stan/math/gpu/matrix_gpu.hpp>
+#include <stan/math/gpu/kernels/check_diagonal_zeros.hpp>
 #include <stan/math/prim/scal/err/domain_error.hpp>
 
 namespace stan {

diff --git a/stan/math/gpu/err/check_nan.hpp b/stan/math/gpu/err/check_nan.hpp
@@ -2,6 +2,7 @@
 #define STAN_MATH_GPU_ERR_CHECK_NAN_HPP
 #ifdef STAN_OPENCL
 #include <stan/math/gpu/matrix_gpu.hpp>
+#include <stan/math/gpu/kernels/check_nan.hpp>
 #include <stan/math/prim/scal/err/domain_error.hpp>
 
 namespace stan {

diff --git a/stan/math/gpu/err/check_symmetric.hpp b/stan/math/gpu/err/check_symmetric.hpp
@@ -3,6 +3,8 @@
 #ifdef STAN_OPENCL
 #include <stan/math/gpu/matrix_gpu.hpp>
 #include <stan/math/prim/scal/err/domain_error.hpp>
+#include <stan/math/gpu/kernels/check_symmetric.hpp>
+
 
 namespace stan {
 namespace math {

diff --git a/stan/math/gpu/identity.hpp b/stan/math/gpu/identity.hpp
@@ -2,6 +2,7 @@
 #define STAN_MATH_GPU_IDENTITY_HPP
 #ifdef STAN_OPENCL
 #include <stan/math/gpu/matrix_gpu.hpp>
+#include <stan/math/gpu/kernels/identity.hpp>
 #include <CL/cl.hpp>
 
 namespace stan {
@@ -23,8 +24,8 @@ inline matrix_gpu identity(int rows_cols) {
   cl::CommandQueue cmdQueue = opencl_context.queue();
 
   try {
-    opencl_kernels::identity(cl::NDRange(A.rows(), A.cols()),
-                             A.buffer(), A.rows(), A.cols());
+    opencl_kernels::identity(cl::NDRange(A.rows(), A.cols()), A.buffer(),
+                             A.rows(), A.cols());
   } catch (const cl::Error& e) {
     check_opencl_error("identity", e);
   }

diff --git a/stan/math/gpu/kernel_cl.hpp b/stan/math/gpu/kernel_cl.hpp
@@ -3,6 +3,7 @@
 #ifdef STAN_OPENCL
 #include <stan/math/gpu/opencl_context.hpp>
 #include <stan/math/gpu/constants.hpp>
+#include <stan/math/gpu/kernels/helpers.hpp>
 #include <CL/cl.hpp>
 #include <string>
 #include <algorithm>
@@ -13,9 +14,6 @@ namespace stan {
 namespace math {
 namespace {
 
-std::string helpers =                        // Helper macros for the kernels.
-#include <stan/math/gpu/kernels/helpers.cl>  // NOLINT
-    ;                                        // NOLINT
 // Holds Default parameter values for each Kernel.
 typedef std::map<const char*, int> map_base_opts;
 map_base_opts base_opts
@@ -31,7 +29,7 @@ auto compile_kernel(const char* name, const char* source) {
     kernel_opts += std::string(" -D") + comp_opts.first + "="
                    + std::to_string(comp_opts.second);
   }
-  std::string kernel_source(helpers);
+  std::string kernel_source(opencl_kernels::helpers);
   kernel_source.append(source);
   try {
     cl::Program::Sources src(1, std::make_pair(kernel_source.c_str(),
@@ -58,9 +56,7 @@ class kernel_functor {
   kernel_functor(const char* name, const char* source)
       : kernel_(compile_kernel(name, source)) {}
 
-  auto operator()() const {
-    return cl::make_kernel<Args...>(kernel_);
-  }
+  auto operator()() const { return cl::make_kernel<Args...>(kernel_); }
 };
 
 template <typename... Args>
@@ -75,53 +71,6 @@ struct global_range_kernel {
   }
 };
 
-const global_range_kernel<cl::Buffer, int, int> identity("identity",
-#include <stan/math/gpu/kernels/identity_matrix.cl>  // NOLINT
-);                                                   // NOLINT
-const global_range_kernel<cl::Buffer, cl::Buffer, int, int> copy("copy",
-#include <stan/math/gpu/kernels/copy_matrix.cl>  // NOLINT
-);                                               // NOLINT
-const global_range_kernel<cl::Buffer, cl::Buffer, int, int> transpose(
-    "transpose",
-#include <stan/math/gpu/kernels/transpose_matrix.cl>  // NOLINT
-);                                                    // NOLINT
-const global_range_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int, int> add(
-    "add",
-#include <stan/math/gpu/kernels/add_matrix.cl>  // NOLINT
-);                                              // NOLINT
-const global_range_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int, int>
-    subtract("subtract",
-#include <stan/math/gpu/kernels/subtract_matrix.cl>  // NOLINT
-    );                                               // NOLINT
-const global_range_kernel<cl::Buffer, cl::Buffer, int, int, int, int, int, int,
-                          int, int, int, int>
-    sub_block("sub_block",
-#include <stan/math/gpu/kernels/sub_block.cl>  // NOLINT
-    );                                         // NOLINT
-const global_range_kernel<cl::Buffer, cl::Buffer, int, int>
-    check_diagonal_zeros("is_zero_on_diagonal",
-#include <stan/math/gpu/kernels/check_diagonal_zeros.cl>  // NOLINT
-    );                                                    // NOLINT
-const global_range_kernel<cl::Buffer, cl::Buffer, int, int> check_nan("is_nan",
-#include <stan/math/gpu/kernels/check_nan.cl>  // NOLINT
-);                                             // NOLINT
-const global_range_kernel<cl::Buffer, cl::Buffer, int, int, const double>
-    check_symmetric("is_symmetric",
-#include <stan/math/gpu/kernels/check_symmetric.cl>  // NOLINT
-    );                                               // NOLINT
-const global_range_kernel<cl::Buffer, cl::Buffer, int, int, TriangularViewGPU>
-    copy_triangular("copy_triangular",
-#include <stan/math/gpu/kernels/copy_triangular_matrix.cl>  // NOLINT
-    );                                                      // NOLINT
-const global_range_kernel<cl::Buffer, int, int, TriangularViewGPU> zeros(
-    "zeros",
-#include <stan/math/gpu/kernels/zeros_matrix.cl>  // NOLINT
-);                                                // NOLINT
-const global_range_kernel<cl::Buffer, int, int, TriangularMapGPU>
-    triangular_transpose("triangular_transpose",
-#include <stan/math/gpu/kernels/triangular_transpose.cl>  // NOLINT
-    );                                                    // NOLINT
-
 }  // namespace opencl_kernels
 }  // namespace math
 }  // namespace stan

diff --git a/stan/math/gpu/kernels/add.hpp b/stan/math/gpu/kernels/add.hpp
@@ -0,0 +1,51 @@
+#ifndef STAN_MATH_GPU_KERNELS_ADD_HPP
+#define STAN_MATH_GPU_KERNELS_ADD_HPP
+
+#ifndef STRINGIFY
+#define STRINGIFY(src) #src
+#endif
+
+#include <stan/math/gpu/kernel_cl.hpp>
+
+namespace stan {
+namespace math {
+namespace opencl_kernels {
+// \cond
+const char *add_kernel_code = STRINGIFY(
+    // \endcond
+    /**
+     * Matrix addition on the GPU
+     *
+     * @param[out] C Output matrix.
+     * @param[in] A LHS of matrix addition.
+     * @param[in] B RHS of matrix addition.
+     * @param rows Number of rows for matrix A.
+     * @param cols Number of cols for matrix A.
+     * @note Code is a <code>const char*</code> held in
+     * <code>add_kernel_code.</code>
+     * This kernel uses the helper macros available in helpers.cl.
+     */
+    __kernel void add(__global write_only double *C,
+                      __global read_only double *A,
+                      __global read_only double *B, read_only unsigned int rows,
+                      read_only unsigned int cols) {
+      int i = get_global_id(0);
+      int j = get_global_id(1);
+      if (i < rows && j < cols) {
+        C(i, j) = A(i, j) + B(i, j);
+      }
+    }
+    // \cond
+);
+// \endcond
+
+/**
+ * See the docs for \link kernels/add.hpp add() \endlink
+ */
+const global_range_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int, int> add(
+    "add", add_kernel_code);
+
+}  // namespace opencl_kernels
+}  // namespace math
+}  // namespace stan
+#endif
diff --git a/stan/math/gpu/kernels/add_matrix.cl b/stan/math/gpu/kernels/add_matrix.cl
diff --git a/stan/math/gpu/kernels/check_diagonal_zeros.cl b/stan/math/gpu/kernels/check_diagonal_zeros.cl
diff --git a/stan/math/gpu/kernels/check_diagonal_zeros.hpp b/stan/math/gpu/kernels/check_diagonal_zeros.hpp
@@ -0,0 +1,53 @@
+#ifndef STAN_MATH_GPU_KERNELS_CHECK_DIAGONAL_ZEROS_HPP
+#define STAN_MATH_GPU_KERNELS_CHECK_DIAGONAL_ZEROS_HPP
+
+#ifndef STRINGIFY
+#define STRINGIFY(src) #src
+#endif
+
+#include <stan/math/gpu/kernel_cl.hpp>
+
+namespace stan {
+namespace math {
+namespace opencl_kernels {
+// \cond
+const char *is_zero_on_diagonal_kernel_code = STRINGIFY(
+    // \endcond
+    /**
+     * Check if the <code>matrix_gpu</code> has zeros on the diagonal
+     *
+     * @param[in] A Matrix to check.
+     * @param[out] flag the flag to be written to if any diagonal is zero.
+     * @param rows The number of rows for A.
+     * @param cols The number of cols of A.
+     * @note Code is a <code>const char*</code> held in
+     * <code>is_zero_on_diagonal_kernel_code.</code>
+     * Kernel for stan/math/gpu/err/check_diagonal_zeros.hpp.
+     * This kernel uses the helper macros available in helpers.cl.
+     */
+    __kernel void is_zero_on_diagonal(
+        __global read_only double *A, __global int *flag,
+        read_only unsigned int rows, write_only unsigned int cols) {
+      const int i = get_global_id(0);
+      if (i < rows && i < cols) {
+        if (A(i, i) == 0) {
+          flag[0] = 1;
+        }
+      }
+    }
+    // \cond
+);
+// \endcond
+
+/**
+ * See the docs for \link kernels/check_diagonal_zeros.hpp
+ * check_diagonal_zeros() \endlink
+ */
+const global_range_kernel<cl::Buffer, cl::Buffer, int, int>
+    check_diagonal_zeros("is_zero_on_diagonal",
+                         is_zero_on_diagonal_kernel_code);
+
+}  // namespace opencl_kernels
+}  // namespace math
+}  // namespace stan
+#endif
diff --git a/stan/math/gpu/kernels/check_nan.cl b/stan/math/gpu/kernels/check_nan.cl