diff --git a/src/main/cpp/src/bloom_filter.cu b/src/main/cpp/src/bloom_filter.cu
index 7637c85f10..8e828e3a4d 100644
--- a/src/main/cpp/src/bloom_filter.cu
+++ b/src/main/cpp/src/bloom_filter.cu
@@ -34,6 +34,8 @@
 
 #include <thrust/logical.h>
 
+#include <cuda/functional>
+
 #include <byteswap.h>
 
 namespace spark_rapids_jni {
@@ -316,14 +318,14 @@ std::unique_ptr<cudf::list_scalar> bloom_filter_merge(cudf::column_view const& b
     thrust::make_counting_iterator(0),
     thrust::make_counting_iterator(0) + num_words,
     dst,
-    [src, num_buffers = bloom_filters.size(), stride = buf_size] __device__(
+    cuda::proclaim_return_type<cudf::bitmask_type>([src, num_buffers = bloom_filters.size(), stride = buf_size] __device__(
       cudf::size_type word_index) {
       cudf::bitmask_type out = (reinterpret_cast<cudf::bitmask_type const*>(src))[word_index];
       for (auto idx = 1; idx < num_buffers; idx++) {
         out |= (reinterpret_cast<cudf::bitmask_type const*>(src + idx * stride))[word_index];
       }
       return out;
-    });
+    }));
 
   // create the 1-row list column and move it into a scalar.
   return std::make_unique<cudf::list_scalar>(
diff --git a/src/main/cpp/src/datetime_rebase.cu b/src/main/cpp/src/datetime_rebase.cu
index 9548d09dad..a95df64613 100644
--- a/src/main/cpp/src/datetime_rebase.cu
+++ b/src/main/cpp/src/datetime_rebase.cu
@@ -30,6 +30,8 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/transform.h>
 
+#include <cuda/functional>
+
 namespace {
 
 // Convert a date in Julian calendar to the number of days since epoch.
@@ -73,7 +75,7 @@ std::unique_ptr<cudf::column> gregorian_to_julian_days(cudf::column_view const&
     thrust::make_counting_iterator(0),
     thrust::make_counting_iterator(input.size()),
     output->mutable_view().begin<cudf::timestamp_D>(),
-    [d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
+    cuda::proclaim_return_type<cudf::timestamp_D>([d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
       auto constexpr julian_end = cuda::std::chrono::year_month_day{
         cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{4}};
       auto constexpr gregorian_start = cuda::std::chrono::year_month_day{
@@ -94,7 +96,7 @@ std::unique_ptr<cudf::column> gregorian_to_julian_days(cudf::column_view const&
 
       // Reinterpret year/month/day as in Julian calendar then compute the days since epoch.
       return cudf::timestamp_D{cudf::duration_D{days_from_julian(ymd)}};
-    });
+    }));
 
   return output;
 }
@@ -142,7 +144,7 @@ std::unique_ptr<cudf::column> julian_to_gregorian_days(cudf::column_view const&
                     thrust::make_counting_iterator(0),
                     thrust::make_counting_iterator(input.size()),
                     output->mutable_view().begin<cudf::timestamp_D>(),
-                    [d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
+                    cuda::proclaim_return_type<cudf::timestamp_D>([d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
                       auto const days_ts = d_input[idx].time_since_epoch().count();
                       if (days_ts >= -141427) {  // Gregorian start day
                         return d_input[idx];
@@ -154,7 +156,7 @@ std::unique_ptr<cudf::column> julian_to_gregorian_days(cudf::column_view const&
                       auto const result =
                         cuda::std::chrono::local_days{ymd}.time_since_epoch().count();
                       return cudf::timestamp_D{cudf::duration_D{result}};
-                    });
+                    }));
 
   return output;
 }
@@ -242,7 +244,7 @@ std::unique_ptr<cudf::column> gregorian_to_julian_micros(cudf::column_view const
     thrust::make_counting_iterator(0),
     thrust::make_counting_iterator(input.size()),
     output->mutable_view().begin<cudf::timestamp_us>(),
-    [d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
+    cuda::proclaim_return_type<cudf::timestamp_us>([d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
       // This timestamp corresponds to October 15th, 1582 UTC.
       // After this day, there is no difference in microsecond values between Gregorian
       // and Julian calendars.
@@ -274,7 +276,7 @@ std::unique_ptr<cudf::column> gregorian_to_julian_micros(cudf::column_view const
       result += timeparts.subsecond;
 
       return cudf::timestamp_us{cudf::duration_us{result}};
-    });
+    }));
 
   return output;
 }
@@ -304,7 +306,7 @@ std::unique_ptr<cudf::column> julian_to_gregorian_micros(cudf::column_view const
     thrust::make_counting_iterator(0),
     thrust::make_counting_iterator(input.size()),
     output->mutable_view().begin<cudf::timestamp_us>(),
-    [d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
+    cuda::proclaim_return_type<cudf::timestamp_us>([d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
       // This timestamp corresponds to October 15th, 1582 UTC.
       // After this day, there is no difference in microsecond values between Gregorian
       // and Julian calendars.
@@ -328,7 +330,7 @@ std::unique_ptr<cudf::column> julian_to_gregorian_micros(cudf::column_view const
       result += timeparts.subsecond;
 
       return cudf::timestamp_us{cudf::duration_us{result}};
-    });
+    }));
 
   return output;
 }
diff --git a/src/main/cpp/src/map_utils.cu b/src/main/cpp/src/map_utils.cu
index f8ac369973..a90e077135 100644
--- a/src/main/cpp/src/map_utils.cu
+++ b/src/main/cpp/src/map_utils.cu
@@ -54,6 +54,8 @@
 //
 #include <cub/device/device_radix_sort.cuh>
 
+#include <cuda/functional>
+
 namespace spark_rapids_jni {
 
 using namespace cudf::io::json;
@@ -179,29 +181,29 @@ rmm::device_uvector<TreeDepthT> compute_node_levels(int64_t num_nodes,
   auto token_levels = rmm::device_uvector<TreeDepthT>(tokens.size(), stream);
 
   // Whether the token pops from the parent node stack.
-  auto const does_pop = [] __device__(PdaTokenT const token) -> bool {
+  auto const does_pop = cuda::proclaim_return_type<bool>([] __device__(PdaTokenT const token) -> bool {
     switch (token) {
       case token_t::StructMemberEnd:
       case token_t::StructEnd:
       case token_t::ListEnd: return true;
       default: return false;
     };
-  };
+  });
 
   // Whether the token pushes onto the parent node stack.
-  auto const does_push = [] __device__(PdaTokenT const token) -> bool {
+  auto const does_push = cuda::proclaim_return_type<bool>([] __device__(PdaTokenT const token) -> bool {
     switch (token) {
       case token_t::FieldNameBegin:
       case token_t::StructBegin:
       case token_t::ListBegin: return true;
       default: return false;
     };
-  };
+  });
 
   auto const push_pop_it = thrust::make_transform_iterator(
-    tokens.begin(), [does_push, does_pop] __device__(PdaTokenT const token) -> cudf::size_type {
+    tokens.begin(), cuda::proclaim_return_type<cudf::size_type>([does_push, does_pop] __device__(PdaTokenT const token) -> cudf::size_type {
       return does_push(token) - does_pop(token);
-    });
+    }));
   thrust::exclusive_scan(
     rmm::exec_policy(stream), push_pop_it, push_pop_it + tokens.size(), token_levels.begin());
 
@@ -302,7 +304,7 @@ rmm::device_uvector<NodeIndexT> compute_parent_node_ids(
   rmm::device_uvector<NodeIndexT> const& node_token_ids,
   rmm::cuda_stream_view stream)
 {
-  auto const first_childs_parent_token_id = [tokens =
+  auto const first_childs_parent_token_id = cuda::proclaim_return_type<NodeIndexT>([tokens =
                                                tokens.begin()] __device__(auto i) -> NodeIndexT {
     if (i <= 0) { return -1; }
     if (tokens[i - 1] == token_t::StructBegin || tokens[i - 1] == token_t::ListBegin) {
@@ -315,7 +317,7 @@ rmm::device_uvector<NodeIndexT> compute_parent_node_ids(
     } else {
       return -1;
     }
-  };
+  });
 
   auto parent_node_ids = rmm::device_uvector<NodeIndexT>(num_nodes, stream);
   thrust::transform(
@@ -323,14 +325,14 @@ rmm::device_uvector<NodeIndexT> compute_parent_node_ids(
     node_token_ids.begin(),
     node_token_ids.end(),
     parent_node_ids.begin(),
-    [node_ids_gpu = node_token_ids.begin(), num_nodes, first_childs_parent_token_id] __device__(
+    cuda::proclaim_return_type<NodeIndexT>([node_ids_gpu = node_token_ids.begin(), num_nodes, first_childs_parent_token_id] __device__(
       NodeIndexT const tid) -> NodeIndexT {
       auto const pid = first_childs_parent_token_id(tid);
       return pid < 0
                ? cudf::io::json::parent_node_sentinel
                : thrust::lower_bound(thrust::seq, node_ids_gpu, node_ids_gpu + num_nodes, pid) -
                    node_ids_gpu;
-    });
+    }));
 
   // Propagate parent node to siblings from first sibling - inplace.
   auto const node_levels = compute_node_levels(num_nodes, tokens, stream);
@@ -356,7 +358,7 @@ rmm::device_uvector<int8_t> check_key_or_value_nodes(
     transform_it,
     transform_it + parent_node_ids.size(),
     key_or_value.begin(),
-    [key_sentinel   = key_sentinel,
+    cuda::proclaim_return_type<int8_t>([key_sentinel   = key_sentinel,
      value_sentinel = value_sentinel,
      parent_ids     = parent_node_ids.begin()] __device__(auto const node_id) -> int8_t {
       if (parent_ids[node_id] > 0) {
@@ -369,7 +371,7 @@ rmm::device_uvector<int8_t> check_key_or_value_nodes(
       }
 
       return 0;
-    });
+    }));
 
 #ifdef DEBUG_FROM_JSON
   print_debug(key_or_value, "Nodes are key/value (1==key, 2==value)", ", ", stream);
@@ -390,7 +392,7 @@ struct node_ranges_fn {
 
   __device__ thrust::pair<SymbolOffsetT, SymbolOffsetT> operator()(cudf::size_type node_id) const
   {
-    [[maybe_unused]] auto const is_begin_of_section = [] __device__(PdaTokenT const token) {
+    [[maybe_unused]] auto const is_begin_of_section = cuda::proclaim_return_type<bool>([] __device__(PdaTokenT const token) {
       switch (token) {
         case token_t::StructBegin:
         case token_t::ListBegin:
@@ -399,10 +401,10 @@ struct node_ranges_fn {
         case token_t::FieldNameBegin: return true;
         default: return false;
       };
-    };
+    });
 
     // The end-of-* partner token for a given beginning-of-* token
-    auto const end_of_partner = [] __device__(PdaTokenT const token) {
+    auto const end_of_partner = cuda::proclaim_return_type<token_t>([] __device__(PdaTokenT const token) {
       switch (token) {
         case token_t::StructBegin: return token_t::StructEnd;
         case token_t::ListBegin: return token_t::ListEnd;
@@ -411,10 +413,10 @@ struct node_ranges_fn {
         case token_t::FieldNameBegin: return token_t::FieldNameEnd;
         default: return token_t::ErrorBegin;
       };
-    };
+    });
 
     // Encode a fixed value for nested node types (list+struct).
-    auto const nested_node_to_value = [] __device__(PdaTokenT const token) -> int32_t {
+    auto const nested_node_to_value = cuda::proclaim_return_type<int32_t>([] __device__(PdaTokenT const token) -> int32_t {
       switch (token) {
         case token_t::StructBegin: return 1;
         case token_t::StructEnd: return -1;
@@ -422,9 +424,9 @@ struct node_ranges_fn {
         case token_t::ListEnd: return -(1 << 8);
         default: return 0;
       };
-    };
+    });
 
-    auto const get_token_index = [include_quote_char = include_quote_char] __device__(
+    auto const get_token_index = cuda::proclaim_return_type<SymbolOffsetT>([include_quote_char = include_quote_char] __device__(
                                    PdaTokenT const token, SymbolOffsetT const token_index) {
       constexpr SymbolOffsetT quote_char_size = 1;
       switch (token) {
@@ -436,7 +438,7 @@ struct node_ranges_fn {
         case token_t::FieldNameBegin: return token_index + quote_char_size;
         default: return token_index;
       };
-    };
+    });
 
     if (key_or_value[node_id] != key_sentinel && key_or_value[node_id] != value_sentinel) {
       return thrust::make_pair(0, 0);
@@ -529,13 +531,13 @@ std::unique_ptr<cudf::column> extract_keys_or_values(
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr)
 {
-  auto const is_key = [key_or_value = key_or_value.begin()] __device__(auto const node_id) {
+  auto const is_key = cuda::proclaim_return_type<bool>([key_or_value = key_or_value.begin()] __device__(auto const node_id) {
     return key_or_value[node_id] == key_sentinel;
-  };
+  });
 
-  auto const is_value = [key_or_value = key_or_value.begin()] __device__(auto const node_id) {
+  auto const is_value = cuda::proclaim_return_type<bool>([key_or_value = key_or_value.begin()] __device__(auto const node_id) {
     return key_or_value[node_id] == value_sentinel;
-  };
+  });
 
   auto extract_ranges =
     rmm::device_uvector<thrust::pair<SymbolOffsetT, SymbolOffsetT>>(num_nodes, stream, mr);
@@ -578,13 +580,13 @@ rmm::device_uvector<cudf::size_type> compute_list_offsets(
                     parent_node_ids.begin(),
                     parent_node_ids.end(),
                     node_child_counts.begin(),
-                    [] __device__(auto const parent_id) -> NodeIndexT {
+                    cuda::proclaim_return_type<NodeIndexT>([] __device__(auto const parent_id) -> NodeIndexT {
                       return parent_id == 0 ? 0 : std::numeric_limits<NodeIndexT>::lowest();
-                    });
+                    }));
 
-  auto const is_key = [key_or_value = key_or_value.begin()] __device__(auto const node_id) {
+  auto const is_key = cuda::proclaim_return_type<bool>([key_or_value = key_or_value.begin()] __device__(auto const node_id) {
     return key_or_value[node_id] == key_sentinel;
-  };
+  });
 
   // Count the number of keys for each json object using `atomicAdd`.
   auto const transform_it = thrust::counting_iterator<int>(0);
@@ -608,7 +610,7 @@ rmm::device_uvector<cudf::size_type> compute_list_offsets(
     node_child_counts.begin(),
     node_child_counts.end(),
     list_offsets.begin(),
-    [] __device__(auto const count) { return count >= 0; },
+    cuda::proclaim_return_type<bool>([] __device__(auto const count) { return count >= 0; }),
     stream);
   CUDF_EXPECTS(thrust::distance(list_offsets.begin(), copy_end) == static_cast<int64_t>(n_lists),
                "Invalid list size computation.");
diff --git a/src/main/cpp/src/murmur_hash.cu b/src/main/cpp/src/murmur_hash.cu
index 679f521e77..7059e38932 100644
--- a/src/main/cpp/src/murmur_hash.cu
+++ b/src/main/cpp/src/murmur_hash.cu
@@ -27,6 +27,8 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/tabulate.h>
 
+#include <cuda/functional>
+
 namespace spark_rapids_jni {
 
 namespace {
@@ -77,10 +79,10 @@ class murmur_device_row_hasher {
       _table.begin(),
       _table.end(),
       _seed,
-      [row_index, nulls = this->_check_nulls] __device__(auto hash, auto column) {
+      cuda::proclaim_return_type<murmur_hash_value_type>([row_index, nulls = this->_check_nulls] __device__(auto hash, auto column) {
         return cudf::type_dispatcher(
           column.type(), element_hasher_adapter<hash_function>{nulls, hash}, column, row_index);
-      });
+      }));
   }
 
  private:
diff --git a/src/main/cpp/src/parse_uri.cu b/src/main/cpp/src/parse_uri.cu
index 13a8effb37..6251f647bd 100644
--- a/src/main/cpp/src/parse_uri.cu
+++ b/src/main/cpp/src/parse_uri.cu
@@ -29,6 +29,8 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/functional>
+
 #include <memory>
 
 namespace spark_rapids_jni {
@@ -164,11 +166,11 @@ bool __device__ validate_ipv6(string_view s)
   int address_char_count{0};
   bool address_has_hex{false};
 
-  auto const leading_double_colon = [&]() {
+  auto const leading_double_colon = cuda::proclaim_return_type<bool>([&]() {
     auto iter = s.begin();
     if (*iter == '[') iter++;
     return *iter++ == ':' && *iter == ':';
-  }();
+  })();
 
   for (auto iter = s.begin(); iter < s.end(); ++iter) {
     auto const c = *iter;
@@ -384,14 +386,14 @@ chunk_validity __device__ validate_host(string_view host)
 bool __device__ validate_query(string_view query)
 {
   // query can be alphanum and _-!.~'()*,;:$&+=?/[]@"
-  return validate_chunk(query, [] __device__(string_view::const_iterator iter) {
+  return validate_chunk(query, cuda::proclaim_return_type<bool>([] __device__(string_view::const_iterator iter) {
     auto const c = *iter;
     if (c != '!' && c != '"' && c != '$' && !(c >= '&' && c <= ';') && c != '=' &&
         !(c >= '?' && c <= ']' && c != '\\') && !(c >= 'a' && c <= 'z') && c != '_' && c != '~') {
       return false;
     }
     return true;
-  });
+  }));
 }
 
 bool __device__ validate_authority(string_view authority, bool allow_invalid_escapes)
@@ -399,7 +401,7 @@ bool __device__ validate_authority(string_view authority, bool allow_invalid_esc
   // authority needs to be alphanum and @[]_-!.'()*,;:$&+=
   return validate_chunk(
     authority,
-    [allow_invalid_escapes] __device__(string_view::const_iterator iter) {
+    cuda::proclaim_return_type<bool>([allow_invalid_escapes] __device__(string_view::const_iterator iter) {
       auto const c = *iter;
       if (c != '!' && c != '$' && !(c >= '&' && c <= ';' && c != '/') && c != '=' &&
           !(c >= '@' && c <= '_' && c != '^' && c != '\\') && !(c >= 'a' && c <= 'z') && c != '~' &&
@@ -407,67 +409,67 @@ bool __device__ validate_authority(string_view authority, bool allow_invalid_esc
         return false;
       }
       return true;
-    },
+    }),
     allow_invalid_escapes);
 }
 
 bool __device__ validate_userinfo(string_view userinfo)
 {
   // can't be ] or [ in here
-  return validate_chunk(userinfo, [] __device__(string_view::const_iterator iter) {
+  return validate_chunk(userinfo, cuda::proclaim_return_type<bool>([] __device__(string_view::const_iterator iter) {
     auto const c = *iter;
     if (c == '[' || c == ']') { return false; }
     return true;
-  });
+  }));
 }
 
 bool __device__ validate_port(string_view port)
 {
   // port is positive numeric >=0 according to spark...shrug
-  return validate_chunk(port, [] __device__(string_view::const_iterator iter) {
+  return validate_chunk(port, cuda::proclaim_return_type<bool>([] __device__(string_view::const_iterator iter) {
     auto const c = *iter;
     if (c < '0' && c > '9') { return false; }
     return true;
-  });
+  }));
 }
 
 bool __device__ validate_path(string_view path)
 {
   // path can be alphanum and @[]_-!.~'()*?/&,;:$+=
-  return validate_chunk(path, [] __device__(string_view::const_iterator iter) {
+  return validate_chunk(path, cuda::proclaim_return_type<bool>([] __device__(string_view::const_iterator iter) {
     auto const c = *iter;
     if (c != '!' && c != '$' && !(c >= '&' && c <= ';') && c != '=' && !(c >= '@' && c <= 'Z') &&
         c != '_' && !(c >= 'a' && c <= 'z') && c != '~') {
       return false;
     }
     return true;
-  });
+  }));
 }
 
 bool __device__ validate_opaque(string_view opaque)
 {
   // opaque can be alphanum and @[]_-!.~'()*?/,;:$@+=
-  return validate_chunk(opaque, [] __device__(string_view::const_iterator iter) {
+  return validate_chunk(opaque, cuda::proclaim_return_type<bool>([] __device__(string_view::const_iterator iter) {
     auto const c = *iter;
     if (c != '!' && c != '$' && !(c >= '&' && c <= ';') && c != '=' &&
         !(c >= '?' && c <= ']' && c != '\\') && c != '_' && c != '~' && !(c >= 'a' && c <= 'z')) {
       return false;
     }
     return true;
-  });
+  }));
 }
 
 bool __device__ validate_fragment(string_view fragment)
 {
   // fragment can be alphanum and @[]_-!.~'()*?/,;:$&+=
-  return validate_chunk(fragment, [] __device__(string_view::const_iterator iter) {
+  return validate_chunk(fragment, cuda::proclaim_return_type<bool>([] __device__(string_view::const_iterator iter) {
     auto const c = *iter;
     if (c != '!' && c != '$' && !(c >= '&' && c <= ';') && c != '=' &&
         !(c >= '?' && c <= ']' && c != '\\') && c != '_' && c != '~' && !(c >= 'a' && c <= 'z')) {
       return false;
     }
     return true;
-  });
+  }));
 }
 
 uri_parts __device__ validate_uri(const char* str, int len)
diff --git a/src/main/cpp/src/row_conversion.cu b/src/main/cpp/src/row_conversion.cu
index f2416fb3ab..58782b215c 100644
--- a/src/main/cpp/src/row_conversion.cu
+++ b/src/main/cpp/src/row_conversion.cu
@@ -52,6 +52,8 @@
 #include <cuda/barrier>
 #endif  // #if !defined(__CUDA_ARCH__) || defined(ASYNC_MEMCPY_SUPPORTED)
 
+#include <cuda/functional>
+
 #include <algorithm>
 #include <cstdarg>
 #include <cstdint>
@@ -257,10 +259,10 @@ build_string_row_offsets(table_view const& tbl,
                     d_row_sizes.begin(),
                     d_row_sizes.end(),
                     d_row_sizes.begin(),
-                    [fixed_width_and_validity_size] __device__(auto row_size) {
+                    cuda::proclaim_return_type<size_type>([fixed_width_and_validity_size] __device__(auto row_size) {
                       return util::round_up_unsafe(fixed_width_and_validity_size + row_size,
                                                    JCUDF_ROW_ALIGNMENT);
-                    });
+                    }));
 
   return {std::move(d_row_sizes), std::move(d_offsets_iterators)};
 }
@@ -1655,12 +1657,12 @@ int compute_tile_counts(device_span<size_type const> const& batch_row_boundaries
     iter,
     iter + num_batches,
     num_tiles.begin(),
-    [desired_tile_height,
+    cuda::proclaim_return_type<size_type>([desired_tile_height,
      batch_row_boundaries = batch_row_boundaries.data()] __device__(auto batch_index) -> size_type {
       return util::div_rounding_up_unsafe(
         batch_row_boundaries[batch_index + 1] - batch_row_boundaries[batch_index],
         desired_tile_height);
-    });
+    }));
   return thrust::reduce(rmm::exec_policy(stream), num_tiles.begin(), num_tiles.end());
 }
 
@@ -1693,21 +1695,21 @@ size_type build_tiles(
     iter,
     iter + num_batches,
     num_tiles.begin(),
-    [desired_tile_height,
+    cuda::proclaim_return_type<size_type>([desired_tile_height,
      batch_row_boundaries = batch_row_boundaries.data()] __device__(auto batch_index) -> size_type {
       return util::div_rounding_up_unsafe(
         batch_row_boundaries[batch_index + 1] - batch_row_boundaries[batch_index],
         desired_tile_height);
-    });
+    }));
 
   size_type const total_tiles =
     thrust::reduce(rmm::exec_policy(stream), num_tiles.begin(), num_tiles.end());
 
   device_uvector<size_type> tile_starts(num_batches + 1, stream);
   auto tile_iter = cudf::detail::make_counting_transform_iterator(
-    0, [num_tiles = num_tiles.data(), num_batches] __device__(auto i) {
+    0, cuda::proclaim_return_type<size_type>([num_tiles = num_tiles.data(), num_batches] __device__(auto i) {
       return (i < num_batches) ? num_tiles[i] : 0;
-    });
+    }));
   thrust::exclusive_scan(rmm::exec_policy(stream),
                          tile_iter,
                          tile_iter + num_batches + 1,
@@ -1718,7 +1720,7 @@ size_type build_tiles(
     iter,
     iter + total_tiles,
     tiles.begin(),
-    [                     =,
+    cuda::proclaim_return_type<tile_info>([                     =,
      tile_starts          = tile_starts.data(),
      batch_row_boundaries = batch_row_boundaries.data()] __device__(size_type tile_index) {
       // what batch this tile falls in
@@ -1742,7 +1744,7 @@ size_type build_tiles(
       // stuff the tile
       return tile_info{
         column_start, tile_row_start, column_end, tile_row_end, static_cast<int>(batch_index)};
-    });
+    }));
 
   return total_tiles;
 }
@@ -2344,7 +2346,7 @@ std::unique_ptr<table> convert_from_rows(lists_column_view const& input,
                     thrust::make_counting_iterator(0),
                     thrust::make_counting_iterator(num_batches),
                     gpu_batch_row_boundaries.begin(),
-                    [num_rows] __device__(auto i) { return i == 0 ? 0 : num_rows; });
+                    cuda::proclaim_return_type<size_type>([num_rows] __device__(auto i) { return i == 0 ? 0 : num_rows; }));
 
   int info_count = 0;
   detail::determine_tiles(column_info.column_sizes,
@@ -2456,9 +2458,9 @@ std::unique_ptr<table> convert_from_rows(lists_column_view const& input,
     std::vector<char*> string_data_col_ptrs;
     for (auto& col_string_lengths : string_lengths) {
       device_uvector<size_type> output_string_offsets(num_rows + 1, stream, mr);
-      auto tmp = [num_rows, col_string_lengths] __device__(auto const& i) {
+      auto tmp = cuda::proclaim_return_type<int32_t>([num_rows, col_string_lengths] __device__(auto const& i) {
         return i < num_rows ? col_string_lengths[i] : 0;
-      };
+      });
       auto bounded_iter = cudf::detail::make_counting_transform_iterator(0, tmp);
       thrust::exclusive_scan(rmm::exec_policy(stream),
                              bounded_iter,
diff --git a/src/main/cpp/src/utilities.cu b/src/main/cpp/src/utilities.cu
index c66ee5cbcb..e5c9c0eba0 100644
--- a/src/main/cpp/src/utilities.cu
+++ b/src/main/cpp/src/utilities.cu
@@ -25,6 +25,8 @@
 #include <rmm/exec_policy.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
+#include <cuda/functional>
+
 namespace spark_rapids_jni {
 
 std::unique_ptr<rmm::device_buffer> bitmask_bitwise_or(
@@ -56,13 +58,13 @@ std::unique_ptr<rmm::device_buffer> bitmask_bitwise_or(
     thrust::make_counting_iterator(0),
     thrust::make_counting_iterator(0) + mask_size,
     static_cast<cudf::bitmask_type*>(out->data()),
-    [buffers = d_input.data(), num_buffers = input.size()] __device__(cudf::size_type word_index) {
+    cuda::proclaim_return_type<cudf::bitmask_type>([buffers = d_input.data(), num_buffers = input.size()] __device__(cudf::size_type word_index) {
       cudf::bitmask_type out = buffers[0][word_index];
       for (auto idx = 1; idx < num_buffers; idx++) {
         out |= buffers[idx][word_index];
       }
       return out;
-    });
+    }));
 
   return out;
 }
diff --git a/src/main/cpp/src/xxhash64.cu b/src/main/cpp/src/xxhash64.cu
index 561aa49862..46fe02690e 100644
--- a/src/main/cpp/src/xxhash64.cu
+++ b/src/main/cpp/src/xxhash64.cu
@@ -25,6 +25,8 @@
 
 #include <thrust/tabulate.h>
 
+#include <cuda/functional>
+
 namespace spark_rapids_jni {
 
 namespace {
@@ -286,10 +288,10 @@ class device_row_hasher {
       _table.begin(),
       _table.end(),
       _seed,
-      [row_index, nulls = _check_nulls] __device__(auto hash, auto column) {
+      cuda::proclaim_return_type<hash_value_type>([row_index, nulls = _check_nulls] __device__(auto hash, auto column) {
         return cudf::type_dispatcher(
           column.type(), element_hasher_adapter{}, column, row_index, nulls, hash);
-      });
+      }));
   }
 
   /**
diff --git a/src/main/cpp/src/zorder.cu b/src/main/cpp/src/zorder.cu
index c0f21b9b3a..5c655bb06f 100644
--- a/src/main/cpp/src/zorder.cu
+++ b/src/main/cpp/src/zorder.cu
@@ -28,6 +28,8 @@
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/counting_iterator.h>
 
+#include <cuda/functional>
+
 namespace {
 
 // pretends to be an array of uint32_t, but really only stores
@@ -253,7 +255,7 @@ std::unique_ptr<cudf::column> hilbert_index(int32_t const num_bits_per_entry,
     thrust::make_counting_iterator<cudf::size_type>(0),
     thrust::make_counting_iterator<cudf::size_type>(0) + num_rows,
     output_dv_ptr->begin<int64_t>(),
-    [num_bits_per_entry, num_columns, input = *input_dv] __device__(cudf::size_type row_index) {
+    cuda::proclaim_return_type<int64_t>([num_bits_per_entry, num_columns, input = *input_dv] __device__(cudf::size_type row_index) {
       uint_backed_array<uint64_t> row(num_bits_per_entry);
       for (cudf::size_type column_index = 0; column_index < num_columns; column_index++) {
         auto const column   = input.column(column_index);
@@ -264,7 +266,7 @@ std::unique_ptr<cudf::column> hilbert_index(int32_t const num_bits_per_entry,
       auto const transposed_index = hilbert_transposed_index(row, num_bits_per_entry, num_columns);
       return static_cast<int64_t>(
         to_hilbert_index(transposed_index, num_bits_per_entry, num_columns));
-    });
+    }));
 
   return output_data_col;
 }