Skip to content

Commit

Permalink
Use thread_index_type to avoid index overflow in grid-stride loops (#…
Browse files Browse the repository at this point in the history
…13895)

This PR checks all related files under `src/hash`, `src/bitmask` and `src/transform` folders and fixes potential index overflow issues by using `thread_index_type`.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - MithunR (https://github.com/mythrocks)
  - Karthikeyan (https://github.com/karthikeyann)

URL: #13895
  • Loading branch information
PointKernel authored Aug 24, 2023
1 parent d2efb1f commit ff99f98
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 19 deletions.
24 changes: 14 additions & 10 deletions cpp/src/bitmask/null_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,15 @@ __global__ void set_null_mask_kernel(bitmask_type* __restrict__ destination,
bool valid,
size_type number_of_mask_words)
{
auto x = destination + word_index(begin_bit);
auto const last_word = word_index(end_bit) - word_index(begin_bit);
bitmask_type fill_value = valid ? 0xffff'ffff : 0;
auto x = destination + word_index(begin_bit);
thread_index_type const last_word = word_index(end_bit) - word_index(begin_bit);
bitmask_type fill_value = valid ? 0xffff'ffff : 0;

for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x;
thread_index_type const stride = blockDim.x * gridDim.x;

for (thread_index_type destination_word_index = grid_1d::global_thread_id();
destination_word_index < number_of_mask_words;
destination_word_index += blockDim.x * gridDim.x) {
destination_word_index += stride) {
if (destination_word_index == 0 || destination_word_index == last_word) {
bitmask_type mask = ~bitmask_type{0};
if (destination_word_index == 0) {
Expand Down Expand Up @@ -189,9 +191,10 @@ __global__ void copy_offset_bitmask(bitmask_type* __restrict__ destination,
size_type source_end_bit,
size_type number_of_mask_words)
{
for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x;
thread_index_type const stride = blockDim.x * gridDim.x;
for (thread_index_type destination_word_index = grid_1d::global_thread_id();
destination_word_index < number_of_mask_words;
destination_word_index += blockDim.x * gridDim.x) {
destination_word_index += stride) {
destination[destination_word_index] = detail::get_mask_offset_word(
source, destination_word_index, source_begin_bit, source_end_bit);
}
Expand Down Expand Up @@ -261,14 +264,15 @@ __global__ void count_set_bits_kernel(bitmask_type const* bitmask,

auto const first_word_index{word_index(first_bit_index)};
auto const last_word_index{word_index(last_bit_index)};
auto const tid = threadIdx.x + blockIdx.x * blockDim.x;
auto thread_word_index = tid + first_word_index;
thread_index_type const tid = grid_1d::global_thread_id();
thread_index_type const stride = blockDim.x * gridDim.x;
thread_index_type thread_word_index = tid + first_word_index;
size_type thread_count{0};

// First, just count the bits in all words
while (thread_word_index <= last_word_index) {
thread_count += __popc(bitmask[thread_word_index]);
thread_word_index += blockDim.x * gridDim.x;
thread_word_index += stride;
}

// Subtract any slack bits counted from the first and last word
Expand Down
15 changes: 6 additions & 9 deletions cpp/src/transform/jit/kernel.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,15 +37,12 @@ namespace jit {
template <typename TypeOut, typename TypeIn>
__global__ void kernel(cudf::size_type size, TypeOut* out_data, TypeIn* in_data)
{
int tid = threadIdx.x;
int blkid = blockIdx.x;
int blksz = blockDim.x;
int gridsz = gridDim.x;
// cannot use global_thread_id utility due to a JIT build issue by including
// the `cudf/detail/utilities/cuda.cuh` header
thread_index_type const start = threadIdx.x + blockIdx.x * blockDim.x;
thread_index_type const stride = blockDim.x * gridDim.x;

int start = tid + blkid * blksz;
int step = blksz * gridsz;

for (cudf::size_type i = start; i < size; i += step) {
for (auto i = start; i < static_cast<thread_index_type>(size); i += stride) {
GENERIC_UNARY_OP(&out_data[i], in_data[i]);
}
}
Expand Down

0 comments on commit ff99f98

Please sign in to comment.