-
-
Notifications
You must be signed in to change notification settings - Fork 183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Change how users access OpenCL kernels #966
Changes from 1 commit
79c365a
2ab6ce5
621d8a2
1f2aad1
de7b5b0
35d75c6
adfbcd1
b324f01
2927e93
9a614c0
15d590d
8564d93
d00e347
adb1eff
1e5f13e
e5de811
931e62b
9f6a9f9
229bb19
f680bef
f809a5b
f634ed1
9050d5c
5fb4289
e8eec3b
7471093
3bacc32
3424025
d20ac48
62d339b
d1c4ec3
5de2818
f8db5fd
0b4f512
a68f51e
afc73c5
5508137
6f8bd4a
fba61eb
aadb3ce
4189530
6a2014c
3625c8f
58943c7
1aa47d8
e738921
93ed1a6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
…rnels. Going to find better way to bring those in
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,9 +3,9 @@ | |
#ifdef STAN_OPENCL | ||
|
||
#include <stan/math/gpu/opencl_context.hpp> | ||
#include <stan/math/gpu/constants.hpp> | ||
#include <map> | ||
#include <CL/cl.hpp> | ||
#include <string> | ||
#include <map> | ||
#include <vector> | ||
|
||
namespace stan { | ||
|
@@ -14,52 +14,51 @@ namespace math { | |
class kernel_cl_base { | ||
friend class kernel_cl; | ||
|
||
private: | ||
private: | ||
const char* copy_matrix_kernel = | ||
#include <stan/math/gpu/kernels/copy_matrix_kernel.cl> | ||
; // NOLINT | ||
#include <stan/math/gpu/kernels/copy_matrix_kernel.cl> // NOLINT | ||
; // NOLINT | ||
const char* transpose_matrix_kernel = | ||
#include <stan/math/gpu/kernels/transpose_matrix_kernel.cl> | ||
; // NOLINT | ||
#include <stan/math/gpu/kernels/transpose_matrix_kernel.cl> // NOLINT | ||
; // NOLINT | ||
const char* zeros_matrix_kernel = | ||
#include <stan/math/gpu/kernels/zeros_matrix_kernel.cl> | ||
; // NOLINT | ||
#include <stan/math/gpu/kernels/zeros_matrix_kernel.cl> // NOLINT | ||
; // NOLINT | ||
const char* identity_matrix_kernel = | ||
#include <stan/math/gpu/kernels/identity_matrix_kernel.cl> | ||
; // NOLINT | ||
#include <stan/math/gpu/kernels/identity_matrix_kernel.cl> // NOLINT | ||
; // NOLINT | ||
const char* copy_triangular_matrix_kernel = | ||
#include <stan/math/gpu/kernels/copy_triangular_matrix_kernel.cl> | ||
; // NOLINT | ||
#include <stan/math/gpu/kernels/copy_triangular_matrix_kernel.cl> // NOLINT | ||
; // NOLINT | ||
const char* copy_triangular_transposed_matrix_kernel = | ||
#include <stan/math/gpu/kernels/triangular_transpose_kernel.cl> | ||
; // NOLINT | ||
#include <stan/math/gpu/kernels/triangular_transpose_kernel.cl> // NOLINT | ||
; // NOLINT | ||
const char* copy_submatrix_kernel = | ||
#include <stan/math/gpu/kernels/sub_block_kernel.cl> | ||
; // NOLINT | ||
#include <stan/math/gpu/kernels/sub_block_kernel.cl> // NOLINT | ||
; // NOLINT | ||
const char* check_nan_kernel = | ||
#include <stan/math/gpu/kernels/check_nan_kernel.cl> | ||
; // NOLINT | ||
#include <stan/math/gpu/kernels/check_nan_kernel.cl> // NOLINT | ||
; // NOLINT | ||
const char* check_diagonal_zeros_kernel = | ||
#include <stan/math/gpu/kernels/check_diagonal_zeros_kernel.cl> | ||
; // NOLINT | ||
#include <stan/math/gpu/kernels/check_diagonal_zeros_kernel.cl> // NOLINT | ||
; // NOLINT | ||
const char* check_symmetric_kernel = | ||
#include <stan/math/gpu/kernels/check_symmetric_kernel.cl> | ||
; // NOLINT | ||
#include <stan/math/gpu/kernels/check_symmetric_kernel.cl> // NOLINT | ||
; // NOLINT | ||
const char* subtract_symmetric_kernel = | ||
#include <stan/math/gpu/kernels/subtract_matrix_kernel.cl> | ||
; // NOLINT | ||
#include <stan/math/gpu/kernels/subtract_matrix_kernel.cl> // NOLINT | ||
; // NOLINT | ||
const char* add_symmetric_kernel = | ||
#include <stan/math/gpu/kernels/add_matrix_kernel.cl> | ||
; // NOLINT | ||
#include <stan/math/gpu/kernels/add_matrix_kernel.cl> // NOLINT | ||
; // NOLINT | ||
|
||
protected: | ||
protected: | ||
typedef std::map<const char*, int> map_base_opts; | ||
const map_base_opts base_opts = { | ||
{"LOWER", gpu::Lower}, | ||
{"UPPER", gpu::Upper}, | ||
{"ENTIRE", gpu::Entire}, | ||
{"UPPER_TO_LOWER", gpu::UpperToLower}, | ||
{"LOWER_TO_UPPER", gpu::LowerToUpper}}; | ||
const map_base_opts base_opts = {{"LOWER", gpu::Lower}, | ||
{"UPPER", gpu::Upper}, | ||
{"ENTIRE", gpu::Entire}, | ||
{"UPPER_TO_LOWER", gpu::UpperToLower}, | ||
{"LOWER_TO_UPPER", gpu::LowerToUpper}}; | ||
|
||
/** Holds meta information about a kernel. | ||
* @param exists a bool to identify whether a kernel has been compiled. | ||
|
@@ -77,29 +76,41 @@ class kernel_cl_base { | |
* Map of a kernel name (first) and it's meta information (second). | ||
*/ | ||
typedef std::map<const char*, kernel_meta_info> map_kernel_info; | ||
const map_kernel_info kernel_info = { | ||
{"dummy", { false, "timing", {}, | ||
"__kernel void dummy(__global const int* foo) { };"}}, | ||
{"dummy2", {false, "timing", {}, | ||
"__kernel void dummy2(__global const int* foo) { };"}}, | ||
{"copy", {false, "basic_matrix", {}, copy_matrix_kernel}}, | ||
{"transpose", {false, "basic_matrix", {}, transpose_matrix_kernel}}, | ||
{"zeros", {false, "basic_matrix", | ||
{"LOWER", "UPPER", "ENTIRE"}, zeros_matrix_kernel}}, | ||
{"identity", {false, "basic_matrix", {}, identity_matrix_kernel}}, | ||
{"copy_triangular", {false, "basic_matrix", {}, | ||
copy_triangular_matrix_kernel}}, | ||
{"copy_triangular_transposed", {false, "basic_matrix", | ||
{"LOWER_TO_UPPER", "UPPER_TO_LOWER"}, | ||
copy_triangular_transposed_matrix_kernel}}, | ||
{"copy_submatrix", {false, "basic_matrix", {}, copy_submatrix_kernel}}, | ||
{"add", {false, "basic_matrix", {}, add_symmetric_kernel}}, | ||
{"subtract", {false, "basic_matrix", {}, subtract_symmetric_kernel}}, | ||
{"is_nan", {false, "check", {""}, check_nan_kernel}}, | ||
{"is_zero_on_diagonal", {false, "check", {}, check_diagonal_zeros_kernel}}, | ||
{"is_symmetric", {false, "check", {}, check_symmetric_kernel}}}; | ||
const map_kernel_info kernel_info | ||
= {{"dummy", | ||
{false, | ||
"timing", | ||
{}, | ||
"__kernel void dummy(__global const int* foo) { };"}}, | ||
{"dummy2", | ||
{false, | ||
"timing", | ||
{}, | ||
"__kernel void dummy2(__global const int* foo) { };"}}, | ||
{"copy", {false, "basic_matrix", {}, copy_matrix_kernel}}, | ||
{"transpose", {false, "basic_matrix", {}, transpose_matrix_kernel}}, | ||
{"zeros", | ||
{false, | ||
"basic_matrix", | ||
{"LOWER", "UPPER", "ENTIRE"}, | ||
zeros_matrix_kernel}}, | ||
{"identity", {false, "basic_matrix", {}, identity_matrix_kernel}}, | ||
{"copy_triangular", | ||
{false, "basic_matrix", {}, copy_triangular_matrix_kernel}}, | ||
{"copy_triangular_transposed", | ||
{false, | ||
"basic_matrix", | ||
{"LOWER_TO_UPPER", "UPPER_TO_LOWER"}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a chance those gpu options can turn into enums of some kind? If those gpu::UpperToLower become scoped enums, these can become the type of that enum instead of strings, e.g. enum class GpuOpt {upper, lower, ... };
...
struct kernel_meta_info {
bool exists;
std::vector<GpuOpt> opts;
const char* raw_code;
};
If you need to be able to set which integer each constant refers to, you can do that, though I don't think you can have two that point to the same int. But I think you'd actually rather use the scoped enum type in the template parameters later on rather than ints, |
||
copy_triangular_transposed_matrix_kernel}}, | ||
{"copy_submatrix", {false, "basic_matrix", {}, copy_submatrix_kernel}}, | ||
{"add", {false, "basic_matrix", {}, add_symmetric_kernel}}, | ||
{"subtract", {false, "basic_matrix", {}, subtract_symmetric_kernel}}, | ||
{"is_nan", {false, "check", {""}, check_nan_kernel}}, | ||
{"is_zero_on_diagonal", | ||
{false, "check", {}, check_diagonal_zeros_kernel}}, | ||
{"is_symmetric", {false, "check", {}, check_symmetric_kernel}}}; | ||
typedef std::map<const char*, cl::Kernel> map_kernel; | ||
map_kernel kernels; // The compiled kernels | ||
map_kernel kernels; // The compiled kernels | ||
|
||
static kernel_cl_base& getInstance() { | ||
static kernel_cl_base instance_; | ||
|
@@ -111,7 +122,7 @@ class kernel_cl_base { | |
}; | ||
|
||
class kernel_cl { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Class could use a doc string too I think |
||
public: | ||
public: | ||
cl::Kernel compiled_; | ||
/** | ||
* Compiles all the kernels in the specified group. The side effect of this | ||
|
@@ -130,17 +141,17 @@ class kernel_cl { | |
std::string kernel_source = ""; | ||
if (this->kernel_info().count(kernel_name) == 0) { | ||
// throws if the kernel does not exist | ||
domain_error("compiling kernels", kernel_name, | ||
" kernel does not exist", ""); | ||
domain_error("compiling kernels", kernel_name, " kernel does not exist", | ||
""); | ||
} | ||
const char* kernel_group = this->kernel_info()[kernel_name].group; | ||
for (auto kern : this->kernel_info()) { | ||
if (strcmp(kern.second.group, kernel_group) == 0) { | ||
kernel_source += kern.second.raw_code; | ||
for (auto comp_opts : kern.second.opts) { | ||
if (strcmp(comp_opts, "") != 0) { | ||
kernel_opts += std::string(" -D") + comp_opts + "=" + | ||
std::to_string(this->base_options()[comp_opts]); | ||
kernel_opts += std::string(" -D") + comp_opts + "=" | ||
+ std::to_string(this->base_options()[comp_opts]); | ||
} | ||
} | ||
} | ||
|
@@ -186,7 +197,7 @@ class kernel_cl { | |
* | ||
* @param[in] kernel_name The kernel name. | ||
*/ | ||
explicit kernel_cl(const char* kernel_name) { | ||
explicit kernel_cl(const char* kernel_name) { | ||
// Compile the kernel group and return the kernel | ||
if (!this->kernel_info()[kernel_name].exists) { | ||
this->compile_kernel_group(kernel_name); | ||
|
@@ -217,9 +228,8 @@ class kernel_cl { | |
* simpleopencl.blogspot.com/2013/04/calling-kernels-with-large-number-of.html | ||
*/ | ||
template <typename T, typename... Args> | ||
inline void recursive_args(cl::Kernel& kernel, int i, | ||
const T& first_arg, | ||
const Args&... extra_args) { | ||
inline void recursive_args(cl::Kernel& kernel, int i, const T& first_arg, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Think these might be better off private or in the |
||
const Args&... extra_args) { | ||
kernel.setArg(i, first_arg); | ||
this->recursive_args(kernel, i + 1, extra_args...); | ||
} | ||
|
@@ -259,8 +269,8 @@ class kernel_cl { | |
return kernel_cl_base::getInstance().base_opts; | ||
} | ||
}; | ||
} | ||
} | ||
} // namespace math | ||
} // namespace stan | ||
|
||
#endif | ||
#endif | ||
#endif | ||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like you might want to hide this
kernel_cl_base
class as an implementation detail behindkernel_cl
, which it seems like other Math library developers should want to use instead, right? You can do that by nesting this class one further, inside thestan::math::internal
namespace. I'm not 100% sure this suggestion is correct, but it might make sense if you want to limit the developer API to the exposedkernel_cl
functionality (which I think you might but I'm not sure) and it doesn't depend on knowing howkernel_cl_base
is implemented (ideally it doesn't or is explained at thekernel_cl
API layer)