adding proclaim_return_type to device lambdas

Signed-off-by: Mike Wilson <knobby@burntsheep.com>
NVIDIA · Dec 19, 2023 · 9076ad5 · 9076ad5
1 parent 48d2736
commit 9076ad5
Show file tree

Hide file tree

Showing 9 changed files with 94 additions and 76 deletions.
diff --git a/src/main/cpp/src/bloom_filter.cu b/src/main/cpp/src/bloom_filter.cu
@@ -34,6 +34,8 @@
 
 #include <thrust/logical.h>
 
+#include <cuda/functional>
+
 #include <byteswap.h>
 
 namespace spark_rapids_jni {
@@ -316,14 +318,14 @@ std::unique_ptr<cudf::list_scalar> bloom_filter_merge(cudf::column_view const& b
     thrust::make_counting_iterator(0),
     thrust::make_counting_iterator(0) + num_words,
     dst,
-    [src, num_buffers = bloom_filters.size(), stride = buf_size] __device__(
+    cuda::proclaim_return_type<cudf::bitmask_type>([src, num_buffers = bloom_filters.size(), stride = buf_size] __device__(
       cudf::size_type word_index) {
       cudf::bitmask_type out = (reinterpret_cast<cudf::bitmask_type const*>(src))[word_index];
       for (auto idx = 1; idx < num_buffers; idx++) {
         out |= (reinterpret_cast<cudf::bitmask_type const*>(src + idx * stride))[word_index];
       }
       return out;
-    });
+    }));
 
   // create the 1-row list column and move it into a scalar.
   return std::make_unique<cudf::list_scalar>(

diff --git a/src/main/cpp/src/datetime_rebase.cu b/src/main/cpp/src/datetime_rebase.cu
@@ -30,6 +30,8 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/transform.h>
 
+#include <cuda/functional>
+
 namespace {
 
 // Convert a date in Julian calendar to the number of days since epoch.
@@ -73,7 +75,7 @@ std::unique_ptr<cudf::column> gregorian_to_julian_days(cudf::column_view const&
     thrust::make_counting_iterator(0),
     thrust::make_counting_iterator(input.size()),
     output->mutable_view().begin<cudf::timestamp_D>(),
-    [d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
+    cuda::proclaim_return_type<cudf::timestamp_D>([d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
       auto constexpr julian_end = cuda::std::chrono::year_month_day{
         cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{4}};
       auto constexpr gregorian_start = cuda::std::chrono::year_month_day{
@@ -94,7 +96,7 @@ std::unique_ptr<cudf::column> gregorian_to_julian_days(cudf::column_view const&
 
       // Reinterpret year/month/day as in Julian calendar then compute the days since epoch.
       return cudf::timestamp_D{cudf::duration_D{days_from_julian(ymd)}};
-    });
+    }));
 
   return output;
 }
@@ -142,7 +144,7 @@ std::unique_ptr<cudf::column> julian_to_gregorian_days(cudf::column_view const&
                     thrust::make_counting_iterator(0),
                     thrust::make_counting_iterator(input.size()),
                     output->mutable_view().begin<cudf::timestamp_D>(),
-                    [d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
+                    cuda::proclaim_return_type<cudf::timestamp_D>([d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
                       auto const days_ts = d_input[idx].time_since_epoch().count();
                       if (days_ts >= -141427) {  // Gregorian start day
                         return d_input[idx];
@@ -154,7 +156,7 @@ std::unique_ptr<cudf::column> julian_to_gregorian_days(cudf::column_view const&
                       auto const result =
                         cuda::std::chrono::local_days{ymd}.time_since_epoch().count();
                       return cudf::timestamp_D{cudf::duration_D{result}};
-                    });
+                    }));
 
   return output;
 }
@@ -242,7 +244,7 @@ std::unique_ptr<cudf::column> gregorian_to_julian_micros(cudf::column_view const
     thrust::make_counting_iterator(0),
     thrust::make_counting_iterator(input.size()),
     output->mutable_view().begin<cudf::timestamp_us>(),
-    [d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
+    cuda::proclaim_return_type<cudf::timestamp_us>([d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
       // This timestamp corresponds to October 15th, 1582 UTC.
       // After this day, there is no difference in microsecond values between Gregorian
       // and Julian calendars.
@@ -274,7 +276,7 @@ std::unique_ptr<cudf::column> gregorian_to_julian_micros(cudf::column_view const
       result += timeparts.subsecond;
 
       return cudf::timestamp_us{cudf::duration_us{result}};
-    });
+    }));
 
   return output;
 }
@@ -304,7 +306,7 @@ std::unique_ptr<cudf::column> julian_to_gregorian_micros(cudf::column_view const
     thrust::make_counting_iterator(0),
     thrust::make_counting_iterator(input.size()),
     output->mutable_view().begin<cudf::timestamp_us>(),
-    [d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
+    cuda::proclaim_return_type<cudf::timestamp_us>([d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
       // This timestamp corresponds to October 15th, 1582 UTC.
       // After this day, there is no difference in microsecond values between Gregorian
       // and Julian calendars.
@@ -328,7 +330,7 @@ std::unique_ptr<cudf::column> julian_to_gregorian_micros(cudf::column_view const
       result += timeparts.subsecond;
 
       return cudf::timestamp_us{cudf::duration_us{result}};
-    });
+    }));
 
   return output;
 }

diff --git a/src/main/cpp/src/map_utils.cu b/src/main/cpp/src/map_utils.cu
@@ -54,6 +54,8 @@
 //
 #include <cub/device/device_radix_sort.cuh>
 
+#include <cuda/functional>
+
 namespace spark_rapids_jni {
 
 using namespace cudf::io::json;
@@ -179,29 +181,29 @@ rmm::device_uvector<TreeDepthT> compute_node_levels(int64_t num_nodes,
   auto token_levels = rmm::device_uvector<TreeDepthT>(tokens.size(), stream);
 
   // Whether the token pops from the parent node stack.
-  auto const does_pop = [] __device__(PdaTokenT const token) -> bool {
+  auto const does_pop = cuda::proclaim_return_type<bool>([] __device__(PdaTokenT const token) -> bool {
     switch (token) {
       case token_t::StructMemberEnd:
       case token_t::StructEnd:
       case token_t::ListEnd: return true;
       default: return false;
     };
-  };
+  });
 
   // Whether the token pushes onto the parent node stack.
-  auto const does_push = [] __device__(PdaTokenT const token) -> bool {
+  auto const does_push = cuda::proclaim_return_type<bool>([] __device__(PdaTokenT const token) -> bool {
     switch (token) {
       case token_t::FieldNameBegin:
       case token_t::StructBegin:
       case token_t::ListBegin: return true;
       default: return false;
     };
-  };
+  });
 
   auto const push_pop_it = thrust::make_transform_iterator(
-    tokens.begin(), [does_push, does_pop] __device__(PdaTokenT const token) -> cudf::size_type {
+    tokens.begin(), cuda::proclaim_return_type<cudf::size_type>([does_push, does_pop] __device__(PdaTokenT const token) -> cudf::size_type {
       return does_push(token) - does_pop(token);
-    });
+    }));
   thrust::exclusive_scan(
     rmm::exec_policy(stream), push_pop_it, push_pop_it + tokens.size(), token_levels.begin());
 
@@ -302,7 +304,7 @@ rmm::device_uvector<NodeIndexT> compute_parent_node_ids(
   rmm::device_uvector<NodeIndexT> const& node_token_ids,
   rmm::cuda_stream_view stream)
 {
-  auto const first_childs_parent_token_id = [tokens =
+  auto const first_childs_parent_token_id = cuda::proclaim_return_type<NodeIndexT>([tokens =
                                                tokens.begin()] __device__(auto i) -> NodeIndexT {
     if (i <= 0) { return -1; }
     if (tokens[i - 1] == token_t::StructBegin || tokens[i - 1] == token_t::ListBegin) {
@@ -315,22 +317,22 @@ rmm::device_uvector<NodeIndexT> compute_parent_node_ids(
     } else {
       return -1;
     }
-  };
+  });
 
   auto parent_node_ids = rmm::device_uvector<NodeIndexT>(num_nodes, stream);
   thrust::transform(
     rmm::exec_policy(stream),
     node_token_ids.begin(),
     node_token_ids.end(),
     parent_node_ids.begin(),
-    [node_ids_gpu = node_token_ids.begin(), num_nodes, first_childs_parent_token_id] __device__(
+    cuda::proclaim_return_type<NodeIndexT>([node_ids_gpu = node_token_ids.begin(), num_nodes, first_childs_parent_token_id] __device__(
       NodeIndexT const tid) -> NodeIndexT {
       auto const pid = first_childs_parent_token_id(tid);
       return pid < 0
                ? cudf::io::json::parent_node_sentinel
                : thrust::lower_bound(thrust::seq, node_ids_gpu, node_ids_gpu + num_nodes, pid) -
                    node_ids_gpu;
-    });
+    }));
 
   // Propagate parent node to siblings from first sibling - inplace.
   auto const node_levels = compute_node_levels(num_nodes, tokens, stream);
@@ -356,7 +358,7 @@ rmm::device_uvector<int8_t> check_key_or_value_nodes(
     transform_it,
     transform_it + parent_node_ids.size(),
     key_or_value.begin(),
-    [key_sentinel   = key_sentinel,
+    cuda::proclaim_return_type<int8_t>([key_sentinel   = key_sentinel,
      value_sentinel = value_sentinel,
      parent_ids     = parent_node_ids.begin()] __device__(auto const node_id) -> int8_t {
       if (parent_ids[node_id] > 0) {
@@ -369,7 +371,7 @@ rmm::device_uvector<int8_t> check_key_or_value_nodes(
       }
 
       return 0;
-    });
+    }));
 
 #ifdef DEBUG_FROM_JSON
   print_debug(key_or_value, "Nodes are key/value (1==key, 2==value)", ", ", stream);
@@ -390,7 +392,7 @@ struct node_ranges_fn {
 
   __device__ thrust::pair<SymbolOffsetT, SymbolOffsetT> operator()(cudf::size_type node_id) const
   {
-    [[maybe_unused]] auto const is_begin_of_section = [] __device__(PdaTokenT const token) {
+    [[maybe_unused]] auto const is_begin_of_section = cuda::proclaim_return_type<bool>([] __device__(PdaTokenT const token) {
       switch (token) {
         case token_t::StructBegin:
         case token_t::ListBegin:
@@ -399,10 +401,10 @@ struct node_ranges_fn {
         case token_t::FieldNameBegin: return true;
         default: return false;
       };
-    };
+    });
 
     // The end-of-* partner token for a given beginning-of-* token
-    auto const end_of_partner = [] __device__(PdaTokenT const token) {
+    auto const end_of_partner = cuda::proclaim_return_type<token_t>([] __device__(PdaTokenT const token) {
       switch (token) {
         case token_t::StructBegin: return token_t::StructEnd;
         case token_t::ListBegin: return token_t::ListEnd;
@@ -411,20 +413,20 @@ struct node_ranges_fn {
         case token_t::FieldNameBegin: return token_t::FieldNameEnd;
         default: return token_t::ErrorBegin;
       };
-    };
+    });
 
     // Encode a fixed value for nested node types (list+struct).
-    auto const nested_node_to_value = [] __device__(PdaTokenT const token) -> int32_t {
+    auto const nested_node_to_value = cuda::proclaim_return_type<int32_t>([] __device__(PdaTokenT const token) -> int32_t {
       switch (token) {
         case token_t::StructBegin: return 1;
         case token_t::StructEnd: return -1;
         case token_t::ListBegin: return 1 << 8;
         case token_t::ListEnd: return -(1 << 8);
         default: return 0;
       };
-    };
+    });
 
-    auto const get_token_index = [include_quote_char = include_quote_char] __device__(
+    auto const get_token_index = cuda::proclaim_return_type<SymbolOffsetT>([include_quote_char = include_quote_char] __device__(
                                    PdaTokenT const token, SymbolOffsetT const token_index) {
       constexpr SymbolOffsetT quote_char_size = 1;
       switch (token) {
@@ -436,7 +438,7 @@ struct node_ranges_fn {
         case token_t::FieldNameBegin: return token_index + quote_char_size;
         default: return token_index;
       };
-    };
+    });
 
     if (key_or_value[node_id] != key_sentinel && key_or_value[node_id] != value_sentinel) {
       return thrust::make_pair(0, 0);
@@ -529,13 +531,13 @@ std::unique_ptr<cudf::column> extract_keys_or_values(
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr)
 {
-  auto const is_key = [key_or_value = key_or_value.begin()] __device__(auto const node_id) {
+  auto const is_key = cuda::proclaim_return_type<bool>([key_or_value = key_or_value.begin()] __device__(auto const node_id) {
     return key_or_value[node_id] == key_sentinel;
-  };
+  });
 
-  auto const is_value = [key_or_value = key_or_value.begin()] __device__(auto const node_id) {
+  auto const is_value = cuda::proclaim_return_type<bool>([key_or_value = key_or_value.begin()] __device__(auto const node_id) {
     return key_or_value[node_id] == value_sentinel;
-  };
+  });
 
   auto extract_ranges =
     rmm::device_uvector<thrust::pair<SymbolOffsetT, SymbolOffsetT>>(num_nodes, stream, mr);
@@ -578,13 +580,13 @@ rmm::device_uvector<cudf::size_type> compute_list_offsets(
                     parent_node_ids.begin(),
                     parent_node_ids.end(),
                     node_child_counts.begin(),
-                    [] __device__(auto const parent_id) -> NodeIndexT {
+                    cuda::proclaim_return_type<NodeIndexT>([] __device__(auto const parent_id) -> NodeIndexT {
                       return parent_id == 0 ? 0 : std::numeric_limits<NodeIndexT>::lowest();
-                    });
+                    }));
 
-  auto const is_key = [key_or_value = key_or_value.begin()] __device__(auto const node_id) {
+  auto const is_key = cuda::proclaim_return_type<bool>([key_or_value = key_or_value.begin()] __device__(auto const node_id) {
     return key_or_value[node_id] == key_sentinel;
-  };
+  });
 
   // Count the number of keys for each json object using `atomicAdd`.
   auto const transform_it = thrust::counting_iterator<int>(0);
@@ -608,7 +610,7 @@ rmm::device_uvector<cudf::size_type> compute_list_offsets(
     node_child_counts.begin(),
     node_child_counts.end(),
     list_offsets.begin(),
-    [] __device__(auto const count) { return count >= 0; },
+    cuda::proclaim_return_type<bool>([] __device__(auto const count) { return count >= 0; }),
     stream);
   CUDF_EXPECTS(thrust::distance(list_offsets.begin(), copy_end) == static_cast<int64_t>(n_lists),
                "Invalid list size computation.");

diff --git a/src/main/cpp/src/murmur_hash.cu b/src/main/cpp/src/murmur_hash.cu
@@ -27,6 +27,8 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/tabulate.h>
 
+#include <cuda/functional>
+
 namespace spark_rapids_jni {
 
 namespace {
@@ -77,10 +79,10 @@ class murmur_device_row_hasher {
       _table.begin(),
       _table.end(),
       _seed,
-      [row_index, nulls = this->_check_nulls] __device__(auto hash, auto column) {
+      cuda::proclaim_return_type<murmur_hash_value_type>([row_index, nulls = this->_check_nulls] __device__(auto hash, auto column) {
         return cudf::type_dispatcher(
           column.type(), element_hasher_adapter<hash_function>{nulls, hash}, column, row_index);
-      });
+      }));
   }
 
  private: