diff --git a/examples/matmul/matmul.cc b/examples/matmul/matmul.cc index 7969e6fc6..724742b2d 100644 --- a/examples/matmul/matmul.cc +++ b/examples/matmul/matmul.cc @@ -7,7 +7,7 @@ constexpr size_t MAT_SIZE = 1024; template void set_identity(celerity::distr_queue queue, celerity::buffer mat) { queue.submit([=](celerity::handler& cgh) { - auto dw = mat.template get_access(cgh, celerity::access::one_to_one<2>()); + celerity::accessor dw{mat, cgh, celerity::access::one_to_one<2>(), cl::sycl::write_only, cl::sycl::no_init}; cgh.parallel_for(mat.get_range(), [=](cl::sycl::item<2> item) { dw[item] = item[0] == item[1]; }); }); } @@ -15,9 +15,9 @@ void set_identity(celerity::distr_queue queue, celerity::buffer mat) { template void multiply(celerity::distr_queue queue, celerity::buffer mat_a, celerity::buffer mat_b, celerity::buffer mat_c) { queue.submit([=](celerity::handler& cgh) { - auto a = mat_a.template get_access(cgh, celerity::access::slice<2>(1)); - auto b = mat_b.template get_access(cgh, celerity::access::slice<2>(0)); - auto c = mat_c.template get_access(cgh, celerity::access::one_to_one<2>()); + celerity::accessor a{mat_a, cgh, celerity::access::slice<2>(1), cl::sycl::read_only}; + celerity::accessor b{mat_b, cgh, celerity::access::slice<2>(0), cl::sycl::read_only}; + celerity::accessor c{mat_c, cgh, celerity::access::one_to_one<2>(), cl::sycl::write_only, cl::sycl::no_init}; cgh.parallel_for(cl::sycl::range<2>(MAT_SIZE, MAT_SIZE), [=](cl::sycl::item<2> item) { auto sum = 0.f; @@ -58,7 +58,7 @@ int main(int argc, char* argv[]) { multiply(queue, mat_b_buf, mat_c_buf, mat_a_buf); queue.submit(celerity::allow_by_ref, [&](celerity::handler& cgh) { - auto result = mat_a_buf.get_access(cgh, celerity::access::one_to_one<2>()); + celerity::accessor result{mat_a_buf, cgh, celerity::access::one_to_one<2>(), cl::sycl::read_only_host_task}; cgh.host_task(range, [=, &verification_passed](celerity::partition<2> part) { celerity::experimental::bench::end("main program"); diff --git a/examples/syncing/syncing.cc b/examples/syncing/syncing.cc index 9678157c7..d055875ee 100644 --- a/examples/syncing/syncing.cc +++ b/examples/syncing/syncing.cc @@ -13,12 +13,12 @@ int main(int argc, char* argv[]) { std::vector host_buff(N); q.submit([=](handler& cgh) { - auto b = buff.get_access(cgh, access::one_to_one<1>()); + celerity::accessor b{buff, cgh, access::one_to_one<1>(), cl::sycl::write_only, cl::sycl::no_init}; cgh.parallel_for(cl::sycl::range<1>(N), [=](cl::sycl::item<1> item) { b[item] = item.get_linear_id(); }); }); q.submit(celerity::allow_by_ref, [=, &host_buff](handler& cgh) { - auto b = buff.get_access(cgh, access::fixed<1>({0, N})); + celerity::accessor b{buff, cgh, access::fixed<1>({0, N}), cl::sycl::read_only_host_task}; cgh.host_task(on_master_node, [=, &host_buff] { std::this_thread::sleep_for(std::chrono::milliseconds(10)); // give the synchronization more time to fail for(int i = 0; i < N; i++) { diff --git a/examples/wave_sim/wave_sim.cc b/examples/wave_sim/wave_sim.cc index 3bdc047fa..78008f6b0 100644 --- a/examples/wave_sim/wave_sim.cc +++ b/examples/wave_sim/wave_sim.cc @@ -9,7 +9,7 @@ void setup_wave(celerity::distr_queue& queue, celerity::buffer u, cl::sycl::float2 center, float amplitude, cl::sycl::float2 sigma) { queue.submit([=](celerity::handler& cgh) { - auto dw_u = u.get_access(cgh, celerity::access::one_to_one<2>()); + celerity::accessor dw_u{u, cgh, celerity::access::one_to_one<2>(), cl::sycl::write_only, cl::sycl::no_init}; cgh.parallel_for(u.get_range(), [=, c = center, a = amplitude, s = sigma](cl::sycl::item<2> item) { const float dx = item[1] - c.x(); const float dy = item[0] - c.y(); @@ -20,7 +20,7 @@ void setup_wave(celerity::distr_queue& queue, celerity::buffer u, cl:: void zero(celerity::distr_queue& queue, celerity::buffer buf) { queue.submit([=](celerity::handler& cgh) { - auto dw_buf = buf.get_access(cgh, celerity::access::one_to_one<2>()); + celerity::accessor dw_buf{buf, cgh, celerity::access::one_to_one<2>(), cl::sycl::write_only, cl::sycl::no_init}; cgh.parallel_for(buf.get_range(), [=](cl::sycl::item<2> item) { dw_buf[item] = 0.f; }); }); } @@ -40,8 +40,8 @@ struct update_config { template void step(celerity::distr_queue& queue, celerity::buffer up, celerity::buffer u, float dt, cl::sycl::float2 delta) { queue.submit([=](celerity::handler& cgh) { - auto rw_up = up.template get_access(cgh, celerity::access::one_to_one<2>()); - auto r_u = u.template get_access(cgh, celerity::access::neighborhood<2>(1, 1)); + celerity::accessor rw_up{up, cgh, celerity::access::one_to_one<2>(), cl::sycl::read_write}; + celerity::accessor r_u{u, cgh, celerity::access::neighborhood<2>(1, 1), cl::sycl::read_only}; const auto size = up.get_range(); cgh.parallel_for(size, [=](cl::sycl::item<2> item) { @@ -69,7 +69,7 @@ template void store(celerity::distr_queue& queue, celerity::buffer up, std::vector>& result_frames) { const auto range = up.get_range(); queue.submit(celerity::allow_by_ref, [=, &result_frames](celerity::handler& cgh) { - auto up_r = up.template get_access(cgh, celerity::access::fixed<2>{{{}, range}}); + celerity::accessor up_r{up, cgh, celerity::access::fixed<2>{{{}, range}}, cl::sycl::read_only_host_task}; cgh.host_task(celerity::on_master_node, [=, &result_frames] { result_frames.emplace_back(); auto& frame = *result_frames.rbegin(); diff --git a/include/accessor.h b/include/accessor.h index 21ec27d72..ca5359ab1 100644 --- a/include/accessor.h +++ b/include/accessor.h @@ -3,22 +3,25 @@ #include #include +#include #include "access_modes.h" +#include "buffer.h" #include "buffer_storage.h" - +#include "ccpp_2020_compatibility_layer.h" +#include "handler.h" namespace celerity { template class partition; -template +template class accessor; namespace detail { - template + template class accessor_base { public: static_assert(Dims > 0, "0-dimensional accessors NYI"); @@ -28,11 +31,20 @@ namespace detail { using const_reference = const DataT&; }; - template - accessor make_device_accessor(Args&&...); + template + accessor make_device_accessor(Args&&...); + + template + accessor make_host_accessor(Args&&...); + + template + constexpr cl::sycl::access_mode deduce_access_mode(); - template - accessor make_host_accessor(Args&&...); + template + constexpr cl::sycl::access_mode deduce_access_mode_discard(); + + template + constexpr cl::sycl::target deduce_access_target(); } // namespace detail @@ -114,12 +126,55 @@ class host_memory_layout { * @note The Celerity accessor currently does not support get_size, get_count, get_range, get_offset and get_pointer, * as their semantics in a distributed context are unclear. */ -template -class accessor - : public detail::accessor_base { +template +class accessor : public detail::accessor_base { public: accessor(const accessor& other) : sycl_accessor(other.sycl_accessor) { init_from(other); } + template + accessor(const buffer& buff, handler& cgh, Functor rmfn) { + using rmfn_traits = allscale::utils::lambda_traits; + static_assert(rmfn_traits::result_type::dims == Dims, "The returned subrange doesn't match buffer dimensions."); + + if(detail::is_prepass_handler(cgh)) { + auto& prepass_cgh = dynamic_cast(cgh); + prepass_cgh.add_requirement( + detail::get_buffer_id(buff), std::make_unique>(rmfn, Mode, buff.get_range())); + sycl_accessor = sycl_accessor_t(); + + } else { + if(detail::get_handler_execution_target(cgh) != detail::execution_target::DEVICE) { + throw std::runtime_error( + "Calling accessor constructor with device target is only allowed in parallel_for tasks." + "If you want to access this buffer from within a host task, please specialize the call using one of the *_host_task tags"); + } + auto& live_cgh = dynamic_cast(cgh); + // It's difficult to figure out which stored range mapper corresponds to this constructor call, which is why we just call the raw mapper + // manually. This also means that we have to clamp the subrange ourselves here, which is not ideal from an encapsulation standpoint. + const auto sr = detail::clamp_subrange_to_buffer_size(live_cgh.apply_range_mapper(rmfn, buff.get_range()), buff.get_range()); + auto access_info = detail::runtime::get_instance().get_buffer_manager().get_device_buffer( + detail::get_buffer_id(buff), Mode, detail::range_cast<3>(sr.range), detail::id_cast<3>(sr.offset)); + eventual_sycl_cgh = live_cgh.get_eventual_sycl_cgh(); + sycl_accessor = sycl_accessor_t(access_info.buffer, buff.get_range(), access_info.offset); + backing_buffer_offset = access_info.offset; + } + } + + template + accessor(const buffer& buff, handler& cgh, Functor rmfn, TagT tag) : accessor(buff, cgh, rmfn) {} + + template + accessor(const buffer& buff, handler& cgh, Functor rmfn, TagT tag, cl::sycl::property::no_init no_init) : accessor(buff, cgh, rmfn) {} + + template + accessor(const buffer& buff, handler& cgh, Functor rmfn, TagT tag, cl::sycl::property_list prop_list) { + // in this static assert it would be more relevant to use property_list type, but if a defined type is used then it is always false and + // always fails to compile. Hence we use a templated type so that it only produces a compile error when the ctr is called. + static_assert(!std::is_same_v, + "Currently it is not accepted to pass a property list to an accessor constructor. Please use the property cl::sycl::no_init " + "as a last argument in the constructor"); + } + accessor& operator=(const accessor& other) { if(this != &other) { sycl_accessor = other.sycl_accessor; @@ -128,19 +183,19 @@ class accessor return *this; } - template - std::enable_if_t 0), DataT&> operator[]( + template + std::enable_if_t 0), DataT&> operator[]( cl::sycl::id index) const { return sycl_accessor[index - backing_buffer_offset]; } - template + template std::enable_if_t 0), DataT> operator[](cl::sycl::id index) const { return sycl_accessor[index - backing_buffer_offset]; } - template - std::enable_if_t 0), cl::sycl::atomic> operator[](cl::sycl::id index) const { + template + std::enable_if_t 0), cl::sycl::atomic> operator[](cl::sycl::id index) const { return sycl_accessor[index - backing_buffer_offset]; } @@ -151,26 +206,26 @@ class accessor friend bool operator!=(const accessor& lhs, const accessor& rhs) { return !(lhs == rhs); } private: +#if WORKAROUND_COMPUTECPP using sycl_accessor_t = cl::sycl::accessor; +#else + using sycl_accessor_t = cl::sycl::accessor; +#endif - template - friend accessor detail::make_device_accessor(Args&&...); + template + friend accessor detail::make_device_accessor(Args&&...); // see init_from cl::sycl::handler* const* eventual_sycl_cgh = nullptr; - cl::sycl::accessor sycl_accessor; - cl::sycl::id backing_buffer_offset; + sycl_accessor_t sycl_accessor; - // TODO remove this once we have SYCL 2020 default-constructible accessors - accessor(cl::sycl::buffer& faux_buffer) - : sycl_accessor(cl::sycl::accessor(faux_buffer)) {} + cl::sycl::id backing_buffer_offset; accessor(cl::sycl::handler* const* eventual_sycl_cgh, const subrange& mapped_subrange, cl::sycl::buffer& buffer, cl::sycl::id backing_buffer_offset) : eventual_sycl_cgh(eventual_sycl_cgh), // We pass a range and offset here to avoid interference from SYCL, but the offset must be relative to the *backing buffer*. - sycl_accessor(cl::sycl::accessor( - buffer, mapped_subrange.range, mapped_subrange.offset - backing_buffer_offset)), + sycl_accessor(sycl_accessor_t(buffer, mapped_subrange.range, mapped_subrange.offset - backing_buffer_offset)), backing_buffer_offset(backing_buffer_offset) { // SYCL 1.2.1 dictates that all kernel parameters must have standard layout. // However, since we are wrapping a SYCL accessor, this assertion fails for some implementations, @@ -196,16 +251,84 @@ class accessor } }; -template -class accessor - : public detail::accessor_base { +// Celerity Accessor Deduction Guides +template +accessor(const buffer& buff, handler& cgh, Functor rmfn, TagT tag) + -> accessor(), detail::deduce_access_target()>; + +template +accessor(const buffer& buff, handler& cgh, Functor rmfn, TagT tag, cl::sycl::property::no_init no_init) + -> accessor(), detail::deduce_access_target()>; + +template +accessor(const buffer& buff, handler& cgh, Functor rmfn, TagT tag, cl::sycl::property_list prop_list) + -> accessor(), detail::deduce_access_target()>; + +// + +template +class accessor : public detail::accessor_base { public: - template + template + accessor(const buffer& buff, handler& cgh, Functor rmfn) { + static_assert(!std::is_same_v>, + "The accessor constructor overload for master-access tasks (now called 'host tasks') has " + "been removed with Celerity 0.2.0. Please provide a range mapper instead."); + + using rmfn_traits = allscale::utils::lambda_traits; + static_assert(rmfn_traits::result_type::dims == Dims, "The returned subrange doesn't match buffer dimensions."); + + if(detail::is_prepass_handler(cgh)) { + auto& prepass_cgh = dynamic_cast(cgh); + prepass_cgh.add_requirement( + detail::get_buffer_id(buff), std::make_unique>(rmfn, Mode, buff.get_range())); + } else { + if constexpr(Target == cl::sycl::target::host_buffer) { + if(detail::get_handler_execution_target(cgh) != detail::execution_target::HOST) { + throw std::runtime_error( + "Calling accessor constructor with host_buffer target is only allowed in host tasks." + "If you want to access this buffer from within a parallel_for task, please specialize the call using one of the non host tags"); + } + auto& live_cgh = dynamic_cast(cgh); + // It's difficult to figure out which stored range mapper corresponds to this constructor call, which is why we just call the raw mapper + // manually. This also means that we have to clamp the subrange ourselves here, which is not ideal from an encapsulation standpoint. + const auto sr = detail::clamp_subrange_to_buffer_size(live_cgh.apply_range_mapper(rmfn, buff.get_range()), buff.get_range()); + auto access_info = detail::runtime::get_instance().get_buffer_manager().get_host_buffer( + detail::get_buffer_id(buff), Mode, detail::range_cast<3>(sr.range), detail::id_cast<3>(sr.offset)); + + mapped_subrange = sr; + optional_buffer = &access_info.buffer; + backing_buffer_offset = access_info.offset; + virtual_buffer_range = buff.get_range(); + } + } + } + + template + accessor(const buffer& buff, handler& cgh, Functor rmfn, TagT tag) : accessor(buff, cgh, rmfn) {} + + /** + * TODO: As of ComputeCpp 2.5.0 they do not support no_init prop, hence this constructor is needed along with discard deduction guide. + * but once they do this should be replace for a constructor that takes a prop list as an argument. + */ + template + accessor(const buffer& buff, handler& cgh, Functor rmfn, TagT tag, cl::sycl::property::no_init no_init) : accessor(buff, cgh, rmfn) {} + + template + accessor(const buffer& buff, handler& cgh, Functor rmfn, TagT tag, cl::sycl::property_list prop_list) { + // in this static assert it would be more relevant to use property_list type, but if a defined type is used then it is always false and + // always fails to compile. Hence we use a templated type so that it only produces a compile error when the ctr is loaded. + static_assert(!std::is_same_v, + "Currently it is not accepted to pass a property list to an accessor constructor. Please use the property cl::sycl::no_init " + "as a last argument in the constructor"); + } + + template std::enable_if_t 0), DataT&> operator[](cl::sycl::id index) const { return *(get_buffer().get_pointer() + get_linear_offset(index)); } - template + template std::enable_if_t 0), DataT> operator[](cl::sycl::id index) const { return *(get_buffer().get_pointer() + get_linear_offset(index)); } @@ -247,8 +370,8 @@ class accessor */ template std::pair get_host_memory(const partition& part) const { - // We already know the range mapper output for "chunk" from the constructor. The parameter is a purely semantic dependency which ensures that this - // function is not called outside a host task. + // We already know the range mapper output for "chunk" from the constructor. The parameter is a purely semantic dependency which ensures that + // this function is not called outside a host task. (void)part; host_memory_layout::dimension_vector dimensions(Dims); @@ -264,8 +387,8 @@ class accessor } private: - template - friend accessor detail::make_host_accessor(Args&&...); + template + friend accessor detail::make_host_accessor(Args&&...); // Subange of the accessor, as set by the range mapper or requested by the user (master node host tasks only). // This does not necessarily correspond to the backing buffer's range. @@ -302,17 +425,53 @@ class accessor }; namespace detail { - - template - accessor make_device_accessor(Args&&... args) { + template + accessor make_device_accessor(Args&&... args) { return {std::forward(args)...}; } - template - accessor make_host_accessor(Args&&... args) { + template + accessor make_host_accessor(Args&&... args) { return {std::forward(args)...}; } + template + constexpr cl::sycl::access_mode deduce_access_mode() { + if(std::is_same_v || // + std::is_same_v) { + return cl::sycl::access_mode::read; + } else if(std::is_same_v || // + std::is_same_v) { + return cl::sycl::access_mode::read_write; + } else { + return cl::sycl::access_mode::write; + } + } + + template + constexpr cl::sycl::access_mode deduce_access_mode_discard() { + if constexpr(std::is_same_v || // + std::is_same_v) { + static_assert(!std::is_same_v, "Invalid access mode + no_init"); + } else if(std::is_same_v || // + std::is_same_v) { + return cl::sycl::access_mode::discard_read_write; + } else { + return cl::sycl::access_mode::discard_write; + } + } + + template + constexpr cl::sycl::target deduce_access_target() { + if(std::is_same_v || // + std::is_same_v || // + std::is_same_v) { + return cl::sycl::target::device; + } else { + return cl::sycl::target::host_buffer; + } + } + } // namespace detail } // namespace celerity diff --git a/include/buffer.h b/include/buffer.h index eafd764fc..00af02b9f 100644 --- a/include/buffer.h +++ b/include/buffer.h @@ -5,14 +5,17 @@ #include #include -#include "accessor.h" #include "buffer_manager.h" -#include "handler.h" +#include "ccpp_2020_compatibility_layer.h" #include "range_mapper.h" #include "ranges.h" #include "runtime.h" namespace celerity { + +template +class buffer; + namespace detail { struct buffer_lifetime_tracker { @@ -28,15 +31,20 @@ namespace detail { buffer_id id; }; + template + buffer_id get_buffer_id(const buffer& buff); + } // namespace detail +template +class accessor; + template class buffer { public: static_assert(Dims > 0, "0-dimensional buffers NYI"); - buffer(const DataT* host_ptr, cl::sycl::range range) - : range(range), faux_buf(new cl::sycl::buffer(detail::range_cast(cl::sycl::range<3>{1, 1, 1}))) { + buffer(const DataT* host_ptr, cl::sycl::range range) : range(range) { if(!detail::runtime::is_initialized()) { detail::runtime::init(nullptr, nullptr); } lifetime_tracker = std::make_shared(); @@ -53,53 +61,24 @@ class buffer { ~buffer() {} - template - accessor get_access(handler& cgh, Functor rmfn) const { - return get_access(cgh, rmfn); + template + accessor get_access(handler& cgh, Functor rmfn) const { + return get_access(cgh, rmfn); } - template + + template accessor get_access(handler& cgh, Functor rmfn) const { - static_assert(!std::is_same_v>, "The buffer::get_access overload for master-access tasks (now called 'host tasks') has " - "been removed with Celerity 0.2.0. Please provide a range mapper instead."); - - using rmfn_traits = allscale::utils::lambda_traits; - static_assert(rmfn_traits::result_type::dims == Dims, "The returned subrange doesn't match buffer dimensions."); - - if(detail::is_prepass_handler(cgh)) { - auto& prepass_cgh = dynamic_cast(cgh); - prepass_cgh.add_requirement(id, std::make_unique>(rmfn, Mode, get_range())); - if constexpr(Target == cl::sycl::access::target::host_buffer) { - return detail::make_host_accessor(); - } else { - return detail::make_device_accessor(*faux_buf); - } - } + return accessor(*this, cgh, rmfn); + } - // It's difficult to figure out which stored range mapper corresponds to this get_access call, which is why we just call the raw mapper manually. - // This also means that we have to clamp the subrange ourselves here, which is not ideal from an encapsulation standpoint. - if constexpr(Target == cl::sycl::access::target::host_buffer) { - if(detail::get_handler_execution_target(cgh) != detail::execution_target::HOST) { - throw std::runtime_error("Calling buffer::get_access with sycl::access::target::host_buffer is only allowed in host tasks."); - } - auto& live_cgh = dynamic_cast(cgh); - const auto sr = detail::clamp_subrange_to_buffer_size(live_cgh.apply_range_mapper(rmfn, get_range()), get_range()); - auto access_info = detail::runtime::get_instance().get_buffer_manager().get_host_buffer( - id, Mode, detail::range_cast<3>(sr.range), detail::id_cast<3>(sr.offset)); - return detail::make_host_accessor(sr, access_info.buffer, access_info.offset, range); - } else { - if(detail::get_handler_execution_target(cgh) != detail::execution_target::DEVICE) { - throw std::runtime_error( - "Calling buffer::get_access on device buffers is only allowed in compute tasks. " - "If you want to access this buffer from within a host task, please specialize the call using sycl::access::target::host_buffer."); - } - auto& live_cgh = dynamic_cast(cgh); - const auto sr = detail::clamp_subrange_to_buffer_size(live_cgh.apply_range_mapper(rmfn, get_range()), get_range()); - auto access_info = detail::runtime::get_instance().get_buffer_manager().get_device_buffer( - id, Mode, detail::range_cast<3>(sr.range), detail::id_cast<3>(sr.offset)); - return detail::make_device_accessor(live_cgh.get_eventual_sycl_cgh(), sr, access_info.buffer, access_info.offset); - } +#if WORKAROUND_COMPUTECPP + template + auto get_access(handler& cgh, Functor rmfn) const { + return accessor(Trgt)>(*this, cgh, rmfn); } +#endif + cl::sycl::range get_range() const { return range; } @@ -108,21 +87,17 @@ class buffer { cl::sycl::range range; detail::buffer_id id; - // Unfortunately, as of SYCL 1.2.1 Rev 6, there is now way of creating a - // SYCL accessor without at least a buffer reference (i.e., there is no - // default ctor, even for placeholder accessors). During the pre-pass, we - // not only don't have access to a SYCL command group handler, but also - // don't know the backing buffer yet (it might not even exist at that - // point). For calls to get_access() we however still have to construct a - // SYCL accessor to return inside the Celerity accessor. For this, we use - // this faux buffer. It has size 1 in all dimensions, so the allocation - // overhead should be minimal. Hopefully the runtime overhead is also - // negligible. - // - // (The reason why we make this a shared_ptr is so that Celerity buffers - // still satisfy StandardLayoutType, which we use as a crude safety check; - // see distr_queue::submit). - std::shared_ptr> faux_buf; + template + friend detail::buffer_id detail::get_buffer_id(const buffer& buff); }; +namespace detail { + + template + buffer_id get_buffer_id(const buffer& buff) { + return buff.id; + } + +} // namespace detail + } // namespace celerity diff --git a/include/ccpp_2020_compatibility_layer.h b/include/ccpp_2020_compatibility_layer.h new file mode 100644 index 000000000..dee10862c --- /dev/null +++ b/include/ccpp_2020_compatibility_layer.h @@ -0,0 +1,63 @@ +/** + * @file + * This whole file is just a 'compatibility' layer for ComputeCPP 2.5.0 until they provide support for the SYCL 2020 features specified below. + */ +#pragma once + +#include "workaround.h" + +#if WORKAROUND_COMPUTECPP + +#include + + +namespace cl::sycl { +using access_mode = cl::sycl::access::mode; +enum class target { + device = static_cast>(cl::sycl::access::target::global_buffer), + host_task = static_cast>(cl::sycl::access::target::host_buffer), + global_buffer = device, + constant_buffer = static_cast>(cl::sycl::access::target::constant_buffer), + local = static_cast>(cl::sycl::access::target::local), + host_buffer = static_cast>(cl::sycl::access::target::host_buffer) +}; + +namespace detail { + + template + constexpr cl::sycl::access::target ccpp_target_2_acc() { + return static_cast(Target); + } + + + template + constexpr cl::sycl::target ccpp_acc_2_target() { + return static_cast(Target); + } + + struct read_only_tag_t {}; + struct read_write_tag_t {}; + struct write_only_tag_t {}; + struct read_only_host_task_tag_t {}; + struct read_write_host_task_tag_t {}; + struct write_only_host_task_tag_t {}; + +} // namespace detail + +inline constexpr detail::read_only_tag_t read_only; +inline constexpr detail::read_write_tag_t read_write; +inline constexpr detail::write_only_tag_t write_only; +inline constexpr detail::read_only_host_task_tag_t read_only_host_task; +inline constexpr detail::read_write_host_task_tag_t read_write_host_task; +inline constexpr detail::write_only_host_task_tag_t write_only_host_task; + +namespace property { + struct no_init : detail::property_base { + no_init() : detail::property_base(static_cast(0)) {} + }; +} // namespace property +inline property::no_init no_init; + +}; // namespace cl::sycl + +#endif \ No newline at end of file diff --git a/include/celerity.h b/include/celerity.h index 44a1c1b87..13f04fd59 100644 --- a/include/celerity.h +++ b/include/celerity.h @@ -3,6 +3,7 @@ #include "runtime.h" +#include "accessor.h" #include "buffer.h" #include "distr_queue.h" #include "user_bench.h" diff --git a/test/runtime_tests.cc b/test/runtime_tests.cc index d37288d39..93ddb7425 100644 --- a/test/runtime_tests.cc +++ b/test/runtime_tests.cc @@ -665,16 +665,16 @@ namespace detail { return result; } - template - accessor get_device_accessor( + template + accessor get_device_accessor( live_pass_device_handler& cgh, buffer_id bid, const cl::sycl::range& range, const cl::sycl::id& offset) { auto buf_info = bm->get_device_buffer(bid, Mode, range_cast<3>(range), id_cast<3>(offset)); return detail::make_device_accessor( cgh.get_eventual_sycl_cgh(), subrange(offset, range), buf_info.buffer, buf_info.offset); } - template - accessor get_host_accessor( + template + accessor get_host_accessor( buffer_id bid, const cl::sycl::range& range, const cl::sycl::id& offset) { auto buf_info = bm->get_host_buffer(bid, Mode, range_cast<3>(range), id_cast<3>(offset)); return detail::make_host_accessor( @@ -1898,5 +1898,67 @@ namespace detail { CHECK(out == 43); } + TEST_CASE("accessors mode and target deduced correctly from SYCL 2020 tag types and no_init property", "[accessor]") { + buffer buf_a(cl::sycl::range<1>(32)); + auto& tm = runtime::get_instance().get_task_manager(); + detail::task_id tid; + + SECTION("Device Accessors") { + tid = test_utils::add_compute_task( + tm, + [&](handler& cgh) { + accessor acc1{buf_a, cgh, one_to_one<1>(), cl::sycl::write_only}; + static_assert(std::is_same_v, decltype(acc1)>); + + accessor acc2{buf_a, cgh, one_to_one<1>(), cl::sycl::read_only}; + static_assert(std::is_same_v, decltype(acc2)>); + + accessor acc3{buf_a, cgh, one_to_one<1>(), cl::sycl::read_write}; + static_assert(std::is_same_v, decltype(acc3)>); + + accessor acc4{buf_a, cgh, one_to_one<1>(), cl::sycl::write_only, cl::sycl::no_init}; + static_assert(std::is_same_v, decltype(acc4)>); + + accessor acc5{buf_a, cgh, one_to_one<1>(), cl::sycl::read_write, cl::sycl::no_init}; + static_assert(std::is_same_v, decltype(acc5)>); + }, + buf_a.get_range()); + } + + + SECTION("Host Accessors") { + tid = test_utils::add_host_task(tm, on_master_node, [&](handler& cgh) { + // The following line is commented because it produces a compile error but it is still a case we wanted to test. + // Since we can not check the content of a property list at compile time, for now it is only accepted to pass either the property + // cl::sycl::no_init or nothing. + // accessor acc0{buf_a, cgh, one_to_one<1>(), cl::sycl::write_only_host_task, cl::sycl::property_list{cl::sycl::no_init}}; + + accessor acc1{buf_a, cgh, one_to_one<1>(), cl::sycl::write_only_host_task}; + static_assert(std::is_same_v, decltype(acc1)>); + + accessor acc2{buf_a, cgh, one_to_one<1>(), cl::sycl::read_only_host_task}; + static_assert(std::is_same_v, decltype(acc2)>); + + accessor acc3{buf_a, cgh, fixed<1>({0, 1}), cl::sycl::read_write_host_task}; + static_assert(std::is_same_v, decltype(acc3)>); + + accessor acc4{buf_a, cgh, one_to_one<1>(), cl::sycl::write_only_host_task, cl::sycl::no_init}; + static_assert(std::is_same_v, decltype(acc4)>); + + accessor acc5{buf_a, cgh, one_to_one<1>(), cl::sycl::read_write_host_task, cl::sycl::no_init}; + static_assert(std::is_same_v, decltype(acc5)>); + }); + } + + const auto tsk = tm.get_task(tid); + const auto buff_id = detail::get_buffer_id(buf_a); + + REQUIRE(tsk->get_buffer_access_map().get_access_modes(buff_id).count(cl::sycl::access_mode::write) == 1); + REQUIRE(tsk->get_buffer_access_map().get_access_modes(buff_id).count(cl::sycl::access_mode::read) == 1); + REQUIRE(tsk->get_buffer_access_map().get_access_modes(buff_id).count(cl::sycl::access_mode::read_write) == 1); + REQUIRE(tsk->get_buffer_access_map().get_access_modes(buff_id).count(cl::sycl::access_mode::discard_write) == 1); + REQUIRE(tsk->get_buffer_access_map().get_access_modes(buff_id).count(cl::sycl::access_mode::discard_read_write) == 1); + } + } // namespace detail -} // namespace celerity +} // namespace celerity \ No newline at end of file