Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mark all cuco kernels as static so they have hidden visibility #422

Merged
merged 4 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 63 additions & 60 deletions include/cuco/detail/dynamic_map_kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/
#pragma once
#include <cuco/detail/utility/cuda.cuh>

#include <cub/block/block_reduce.cuh>

Expand All @@ -25,6 +26,8 @@ namespace cuco {
namespace detail {
namespace cg = cooperative_groups;

CUCO_SUPPRESS_KERNEL_WARNINGS

/**
* @brief Inserts all key/value pairs in the range `[first, last)`.
*
Expand Down Expand Up @@ -62,15 +65,15 @@ template <uint32_t block_size,
typename atomicT,
typename Hash,
typename KeyEqual>
__global__ void insert(InputIt first,
InputIt last,
viewT* submap_views,
mutableViewT* submap_mutable_views,
atomicT* num_successes,
uint32_t insert_idx,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
CUCO_KERNEL void insert(InputIt first,
InputIt last,
viewT* submap_views,
mutableViewT* submap_mutable_views,
atomicT* num_successes,
uint32_t insert_idx,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
{
using BlockReduce = cub::BlockReduce<std::size_t, block_size>;
__shared__ typename BlockReduce::TempStorage temp_storage;
Expand Down Expand Up @@ -147,15 +150,15 @@ template <uint32_t block_size,
typename atomicT,
typename Hash,
typename KeyEqual>
__global__ void insert(InputIt first,
InputIt last,
viewT* submap_views,
mutableViewT* submap_mutable_views,
atomicT** submap_num_successes,
uint32_t insert_idx,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
CUCO_KERNEL void insert(InputIt first,
InputIt last,
viewT* submap_views,
mutableViewT* submap_mutable_views,
atomicT** submap_num_successes,
uint32_t insert_idx,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
{
using BlockReduce = cub::BlockReduce<std::size_t, block_size>;
__shared__ typename BlockReduce::TempStorage temp_storage;
Expand Down Expand Up @@ -225,13 +228,13 @@ template <uint32_t block_size,
typename atomicT,
typename Hash,
typename KeyEqual>
__global__ void erase(InputIt first,
InputIt last,
mutableViewT* submap_mutable_views,
atomicT** submap_num_successes,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
CUCO_KERNEL void erase(InputIt first,
InputIt last,
mutableViewT* submap_mutable_views,
atomicT** submap_num_successes,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
{
extern __shared__ unsigned long long submap_block_num_successes[];

Expand Down Expand Up @@ -296,13 +299,13 @@ template <uint32_t block_size,
typename atomicT,
typename Hash,
typename KeyEqual>
__global__ void erase(InputIt first,
InputIt last,
mutableViewT* submap_mutable_views,
atomicT** submap_num_successes,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
CUCO_KERNEL void erase(InputIt first,
InputIt last,
mutableViewT* submap_mutable_views,
atomicT** submap_num_successes,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
{
extern __shared__ unsigned long long submap_block_num_successes[];

Expand Down Expand Up @@ -368,13 +371,13 @@ template <uint32_t block_size,
typename viewT,
typename Hash,
typename KeyEqual>
__global__ void find(InputIt first,
InputIt last,
OutputIt output_begin,
viewT* submap_views,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
CUCO_KERNEL void find(InputIt first,
InputIt last,
OutputIt output_begin,
viewT* submap_views,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
{
auto tid = blockDim.x * blockIdx.x + threadIdx.x;
auto empty_value_sentinel = submap_views[0].get_empty_value_sentinel();
Expand Down Expand Up @@ -443,13 +446,13 @@ template <uint32_t block_size,
typename viewT,
typename Hash,
typename KeyEqual>
__global__ void find(InputIt first,
InputIt last,
OutputIt output_begin,
viewT* submap_views,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
CUCO_KERNEL void find(InputIt first,
InputIt last,
OutputIt output_begin,
viewT* submap_views,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
{
auto tile = cg::tiled_partition<tile_size>(cg::this_thread_block());
auto tid = blockDim.x * blockIdx.x + threadIdx.x;
Expand Down Expand Up @@ -514,13 +517,13 @@ template <uint32_t block_size,
typename viewT,
typename Hash,
typename KeyEqual>
__global__ void contains(InputIt first,
InputIt last,
OutputIt output_begin,
viewT* submap_views,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
CUCO_KERNEL void contains(InputIt first,
InputIt last,
OutputIt output_begin,
viewT* submap_views,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
{
auto tid = blockDim.x * blockIdx.x + threadIdx.x;
__shared__ bool writeBuffer[block_size];
Expand Down Expand Up @@ -582,13 +585,13 @@ template <uint32_t block_size,
typename viewT,
typename Hash,
typename KeyEqual>
__global__ void contains(InputIt first,
InputIt last,
OutputIt output_begin,
viewT* submap_views,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
CUCO_KERNEL void contains(InputIt first,
InputIt last,
OutputIt output_begin,
viewT* submap_views,
uint32_t num_submaps,
Hash hash,
KeyEqual key_equal)
{
auto tile = cg::tiled_partition<tile_size>(cg::this_thread_block());
auto tid = blockDim.x * blockIdx.x + threadIdx.x;
Expand Down
37 changes: 19 additions & 18 deletions include/cuco/detail/open_addressing/kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

namespace cuco {
namespace detail {
CUCO_SUPPRESS_KERNEL_WARNINGS

/**
* @brief Inserts all elements in the range `[first, first + n)` and returns the number of
Expand Down Expand Up @@ -61,12 +62,12 @@ template <int32_t CGSize,
typename Predicate,
typename AtomicT,
typename Ref>
__global__ void insert_if_n(InputIt first,
cuco::detail::index_type n,
StencilIt stencil,
Predicate pred,
AtomicT* num_successes,
Ref ref)
CUCO_KERNEL void insert_if_n(InputIt first,
cuco::detail::index_type n,
StencilIt stencil,
Predicate pred,
AtomicT* num_successes,
Ref ref)
{
using BlockReduce = cub::BlockReduce<typename Ref::size_type, BlockSize>;
__shared__ typename BlockReduce::TempStorage temp_storage;
Expand Down Expand Up @@ -127,7 +128,7 @@ template <int32_t CGSize,
typename StencilIt,
typename Predicate,
typename Ref>
__global__ void insert_if_n(
CUCO_KERNEL void insert_if_n(
InputIt first, cuco::detail::index_type n, StencilIt stencil, Predicate pred, Ref ref)
{
auto const loop_stride = cuco::detail::grid_stride() / CGSize;
Expand Down Expand Up @@ -162,7 +163,7 @@ __global__ void insert_if_n(
* @param ref Non-owning container device ref used to access the slot storage
*/
template <int32_t CGSize, int32_t BlockSize, typename InputIt, typename Ref>
__global__ void erase(InputIt first, cuco::detail::index_type n, Ref ref)
CUCO_KERNEL void erase(InputIt first, cuco::detail::index_type n, Ref ref)
{
auto const loop_stride = cuco::detail::grid_stride() / CGSize;
auto idx = cuco::detail::global_thread_id() / CGSize;
Expand Down Expand Up @@ -212,12 +213,12 @@ template <int32_t CGSize,
typename Predicate,
typename OutputIt,
typename Ref>
__global__ void contains_if_n(InputIt first,
cuco::detail::index_type n,
StencilIt stencil,
Predicate pred,
OutputIt output_begin,
Ref ref)
CUCO_KERNEL void contains_if_n(InputIt first,
cuco::detail::index_type n,
StencilIt stencil,
Predicate pred,
OutputIt output_begin,
Ref ref)
{
namespace cg = cooperative_groups;

Expand Down Expand Up @@ -267,7 +268,7 @@ __global__ void contains_if_n(InputIt first,
* @param count Number of filled slots
*/
template <int32_t BlockSize, typename StorageRef, typename Predicate, typename AtomicT>
__global__ void size(StorageRef storage, Predicate is_filled, AtomicT* count)
CUCO_KERNEL void size(StorageRef storage, Predicate is_filled, AtomicT* count)
{
using size_type = typename StorageRef::size_type;

Expand All @@ -293,9 +294,9 @@ __global__ void size(StorageRef storage, Predicate is_filled, AtomicT* count)
}

template <int32_t BlockSize, typename ContainerRef, typename Predicate>
__global__ void rehash(typename ContainerRef::storage_ref_type storage_ref,
ContainerRef container_ref,
Predicate is_filled)
CUCO_KERNEL void rehash(typename ContainerRef::storage_ref_type storage_ref,
ContainerRef container_ref,
Predicate is_filled)
{
namespace cg = cooperative_groups;

Expand Down
5 changes: 3 additions & 2 deletions include/cuco/detail/static_map/kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
namespace cuco {
namespace static_map_ns {
namespace detail {
CUCO_SUPPRESS_KERNEL_WARNINGS

/**
* @brief For any key-value pair `{k, v}` in the range `[first, first + n)`, if a key equivalent to
Expand All @@ -48,7 +49,7 @@ namespace detail {
* @param ref Non-owning container device ref used to access the slot storage
*/
template <int32_t CGSize, int32_t BlockSize, typename InputIt, typename Ref>
__global__ void insert_or_assign(InputIt first, cuco::detail::index_type n, Ref ref)
CUCO_KERNEL void insert_or_assign(InputIt first, cuco::detail::index_type n, Ref ref)
{
auto const loop_stride = cuco::detail::grid_stride() / CGSize;
auto idx = cuco::detail::global_thread_id() / CGSize;
Expand Down Expand Up @@ -87,7 +88,7 @@ __global__ void insert_or_assign(InputIt first, cuco::detail::index_type n, Ref
* @param ref Non-owning map device ref used to access the slot storage
*/
template <int32_t CGSize, int32_t BlockSize, typename InputIt, typename OutputIt, typename Ref>
__global__ void find(InputIt first, cuco::detail::index_type n, OutputIt output_begin, Ref ref)
CUCO_KERNEL void find(InputIt first, cuco::detail::index_type n, OutputIt output_begin, Ref ref)
{
namespace cg = cooperative_groups;

Expand Down
Loading
Loading