Skip to content

Commit

Permalink
adding proclaim_return_type to device lambdas
Browse files Browse the repository at this point in the history
Signed-off-by: Mike Wilson <knobby@burntsheep.com>
  • Loading branch information
hyperbolic2346 committed Dec 19, 2023
1 parent 48d2736 commit 9076ad5
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 76 deletions.
6 changes: 4 additions & 2 deletions src/main/cpp/src/bloom_filter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@

#include <thrust/logical.h>

#include <cuda/functional>

#include <byteswap.h>

namespace spark_rapids_jni {
Expand Down Expand Up @@ -316,14 +318,14 @@ std::unique_ptr<cudf::list_scalar> bloom_filter_merge(cudf::column_view const& b
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(0) + num_words,
dst,
[src, num_buffers = bloom_filters.size(), stride = buf_size] __device__(
cuda::proclaim_return_type<cudf::bitmask_type>([src, num_buffers = bloom_filters.size(), stride = buf_size] __device__(
cudf::size_type word_index) {
cudf::bitmask_type out = (reinterpret_cast<cudf::bitmask_type const*>(src))[word_index];
for (auto idx = 1; idx < num_buffers; idx++) {
out |= (reinterpret_cast<cudf::bitmask_type const*>(src + idx * stride))[word_index];
}
return out;
});
}));

// create the 1-row list column and move it into a scalar.
return std::make_unique<cudf::list_scalar>(
Expand Down
18 changes: 10 additions & 8 deletions src/main/cpp/src/datetime_rebase.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
#include <thrust/iterator/counting_iterator.h>
#include <thrust/transform.h>

#include <cuda/functional>

namespace {

// Convert a date in Julian calendar to the number of days since epoch.
Expand Down Expand Up @@ -73,7 +75,7 @@ std::unique_ptr<cudf::column> gregorian_to_julian_days(cudf::column_view const&
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.size()),
output->mutable_view().begin<cudf::timestamp_D>(),
[d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
cuda::proclaim_return_type<cudf::timestamp_D>([d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
auto constexpr julian_end = cuda::std::chrono::year_month_day{
cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{4}};
auto constexpr gregorian_start = cuda::std::chrono::year_month_day{
Expand All @@ -94,7 +96,7 @@ std::unique_ptr<cudf::column> gregorian_to_julian_days(cudf::column_view const&

// Reinterpret year/month/day as in Julian calendar then compute the days since epoch.
return cudf::timestamp_D{cudf::duration_D{days_from_julian(ymd)}};
});
}));

return output;
}
Expand Down Expand Up @@ -142,7 +144,7 @@ std::unique_ptr<cudf::column> julian_to_gregorian_days(cudf::column_view const&
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.size()),
output->mutable_view().begin<cudf::timestamp_D>(),
[d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
cuda::proclaim_return_type<cudf::timestamp_D>([d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
auto const days_ts = d_input[idx].time_since_epoch().count();
if (days_ts >= -141427) { // Gregorian start day
return d_input[idx];
Expand All @@ -154,7 +156,7 @@ std::unique_ptr<cudf::column> julian_to_gregorian_days(cudf::column_view const&
auto const result =
cuda::std::chrono::local_days{ymd}.time_since_epoch().count();
return cudf::timestamp_D{cudf::duration_D{result}};
});
}));

return output;
}
Expand Down Expand Up @@ -242,7 +244,7 @@ std::unique_ptr<cudf::column> gregorian_to_julian_micros(cudf::column_view const
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.size()),
output->mutable_view().begin<cudf::timestamp_us>(),
[d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
cuda::proclaim_return_type<cudf::timestamp_us>([d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
// This timestamp corresponds to October 15th, 1582 UTC.
// After this day, there is no difference in microsecond values between Gregorian
// and Julian calendars.
Expand Down Expand Up @@ -274,7 +276,7 @@ std::unique_ptr<cudf::column> gregorian_to_julian_micros(cudf::column_view const
result += timeparts.subsecond;

return cudf::timestamp_us{cudf::duration_us{result}};
});
}));

return output;
}
Expand Down Expand Up @@ -304,7 +306,7 @@ std::unique_ptr<cudf::column> julian_to_gregorian_micros(cudf::column_view const
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.size()),
output->mutable_view().begin<cudf::timestamp_us>(),
[d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
cuda::proclaim_return_type<cudf::timestamp_us>([d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
// This timestamp corresponds to October 15th, 1582 UTC.
// After this day, there is no difference in microsecond values between Gregorian
// and Julian calendars.
Expand All @@ -328,7 +330,7 @@ std::unique_ptr<cudf::column> julian_to_gregorian_micros(cudf::column_view const
result += timeparts.subsecond;

return cudf::timestamp_us{cudf::duration_us{result}};
});
}));

return output;
}
Expand Down
60 changes: 31 additions & 29 deletions src/main/cpp/src/map_utils.cu
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@
//
#include <cub/device/device_radix_sort.cuh>

#include <cuda/functional>

namespace spark_rapids_jni {

using namespace cudf::io::json;
Expand Down Expand Up @@ -179,29 +181,29 @@ rmm::device_uvector<TreeDepthT> compute_node_levels(int64_t num_nodes,
auto token_levels = rmm::device_uvector<TreeDepthT>(tokens.size(), stream);

// Whether the token pops from the parent node stack.
auto const does_pop = [] __device__(PdaTokenT const token) -> bool {
auto const does_pop = cuda::proclaim_return_type<bool>([] __device__(PdaTokenT const token) -> bool {
switch (token) {
case token_t::StructMemberEnd:
case token_t::StructEnd:
case token_t::ListEnd: return true;
default: return false;
};
};
});

// Whether the token pushes onto the parent node stack.
auto const does_push = [] __device__(PdaTokenT const token) -> bool {
auto const does_push = cuda::proclaim_return_type<bool>([] __device__(PdaTokenT const token) -> bool {
switch (token) {
case token_t::FieldNameBegin:
case token_t::StructBegin:
case token_t::ListBegin: return true;
default: return false;
};
};
});

auto const push_pop_it = thrust::make_transform_iterator(
tokens.begin(), [does_push, does_pop] __device__(PdaTokenT const token) -> cudf::size_type {
tokens.begin(), cuda::proclaim_return_type<cudf::size_type>([does_push, does_pop] __device__(PdaTokenT const token) -> cudf::size_type {
return does_push(token) - does_pop(token);
});
}));
thrust::exclusive_scan(
rmm::exec_policy(stream), push_pop_it, push_pop_it + tokens.size(), token_levels.begin());

Expand Down Expand Up @@ -302,7 +304,7 @@ rmm::device_uvector<NodeIndexT> compute_parent_node_ids(
rmm::device_uvector<NodeIndexT> const& node_token_ids,
rmm::cuda_stream_view stream)
{
auto const first_childs_parent_token_id = [tokens =
auto const first_childs_parent_token_id = cuda::proclaim_return_type<NodeIndexT>([tokens =
tokens.begin()] __device__(auto i) -> NodeIndexT {
if (i <= 0) { return -1; }
if (tokens[i - 1] == token_t::StructBegin || tokens[i - 1] == token_t::ListBegin) {
Expand All @@ -315,22 +317,22 @@ rmm::device_uvector<NodeIndexT> compute_parent_node_ids(
} else {
return -1;
}
};
});

auto parent_node_ids = rmm::device_uvector<NodeIndexT>(num_nodes, stream);
thrust::transform(
rmm::exec_policy(stream),
node_token_ids.begin(),
node_token_ids.end(),
parent_node_ids.begin(),
[node_ids_gpu = node_token_ids.begin(), num_nodes, first_childs_parent_token_id] __device__(
cuda::proclaim_return_type<NodeIndexT>([node_ids_gpu = node_token_ids.begin(), num_nodes, first_childs_parent_token_id] __device__(
NodeIndexT const tid) -> NodeIndexT {
auto const pid = first_childs_parent_token_id(tid);
return pid < 0
? cudf::io::json::parent_node_sentinel
: thrust::lower_bound(thrust::seq, node_ids_gpu, node_ids_gpu + num_nodes, pid) -
node_ids_gpu;
});
}));

// Propagate parent node to siblings from first sibling - inplace.
auto const node_levels = compute_node_levels(num_nodes, tokens, stream);
Expand All @@ -356,7 +358,7 @@ rmm::device_uvector<int8_t> check_key_or_value_nodes(
transform_it,
transform_it + parent_node_ids.size(),
key_or_value.begin(),
[key_sentinel = key_sentinel,
cuda::proclaim_return_type<int8_t>([key_sentinel = key_sentinel,
value_sentinel = value_sentinel,
parent_ids = parent_node_ids.begin()] __device__(auto const node_id) -> int8_t {
if (parent_ids[node_id] > 0) {
Expand All @@ -369,7 +371,7 @@ rmm::device_uvector<int8_t> check_key_or_value_nodes(
}

return 0;
});
}));

#ifdef DEBUG_FROM_JSON
print_debug(key_or_value, "Nodes are key/value (1==key, 2==value)", ", ", stream);
Expand All @@ -390,7 +392,7 @@ struct node_ranges_fn {

__device__ thrust::pair<SymbolOffsetT, SymbolOffsetT> operator()(cudf::size_type node_id) const
{
[[maybe_unused]] auto const is_begin_of_section = [] __device__(PdaTokenT const token) {
[[maybe_unused]] auto const is_begin_of_section = cuda::proclaim_return_type<bool>([] __device__(PdaTokenT const token) {
switch (token) {
case token_t::StructBegin:
case token_t::ListBegin:
Expand All @@ -399,10 +401,10 @@ struct node_ranges_fn {
case token_t::FieldNameBegin: return true;
default: return false;
};
};
});

// The end-of-* partner token for a given beginning-of-* token
auto const end_of_partner = [] __device__(PdaTokenT const token) {
auto const end_of_partner = cuda::proclaim_return_type<token_t>([] __device__(PdaTokenT const token) {
switch (token) {
case token_t::StructBegin: return token_t::StructEnd;
case token_t::ListBegin: return token_t::ListEnd;
Expand All @@ -411,20 +413,20 @@ struct node_ranges_fn {
case token_t::FieldNameBegin: return token_t::FieldNameEnd;
default: return token_t::ErrorBegin;
};
};
});

// Encode a fixed value for nested node types (list+struct).
auto const nested_node_to_value = [] __device__(PdaTokenT const token) -> int32_t {
auto const nested_node_to_value = cuda::proclaim_return_type<int32_t>([] __device__(PdaTokenT const token) -> int32_t {
switch (token) {
case token_t::StructBegin: return 1;
case token_t::StructEnd: return -1;
case token_t::ListBegin: return 1 << 8;
case token_t::ListEnd: return -(1 << 8);
default: return 0;
};
};
});

auto const get_token_index = [include_quote_char = include_quote_char] __device__(
auto const get_token_index = cuda::proclaim_return_type<SymbolOffsetT>([include_quote_char = include_quote_char] __device__(
PdaTokenT const token, SymbolOffsetT const token_index) {
constexpr SymbolOffsetT quote_char_size = 1;
switch (token) {
Expand All @@ -436,7 +438,7 @@ struct node_ranges_fn {
case token_t::FieldNameBegin: return token_index + quote_char_size;
default: return token_index;
};
};
});

if (key_or_value[node_id] != key_sentinel && key_or_value[node_id] != value_sentinel) {
return thrust::make_pair(0, 0);
Expand Down Expand Up @@ -529,13 +531,13 @@ std::unique_ptr<cudf::column> extract_keys_or_values(
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const is_key = [key_or_value = key_or_value.begin()] __device__(auto const node_id) {
auto const is_key = cuda::proclaim_return_type<bool>([key_or_value = key_or_value.begin()] __device__(auto const node_id) {
return key_or_value[node_id] == key_sentinel;
};
});

auto const is_value = [key_or_value = key_or_value.begin()] __device__(auto const node_id) {
auto const is_value = cuda::proclaim_return_type<bool>([key_or_value = key_or_value.begin()] __device__(auto const node_id) {
return key_or_value[node_id] == value_sentinel;
};
});

auto extract_ranges =
rmm::device_uvector<thrust::pair<SymbolOffsetT, SymbolOffsetT>>(num_nodes, stream, mr);
Expand Down Expand Up @@ -578,13 +580,13 @@ rmm::device_uvector<cudf::size_type> compute_list_offsets(
parent_node_ids.begin(),
parent_node_ids.end(),
node_child_counts.begin(),
[] __device__(auto const parent_id) -> NodeIndexT {
cuda::proclaim_return_type<NodeIndexT>([] __device__(auto const parent_id) -> NodeIndexT {
return parent_id == 0 ? 0 : std::numeric_limits<NodeIndexT>::lowest();
});
}));

auto const is_key = [key_or_value = key_or_value.begin()] __device__(auto const node_id) {
auto const is_key = cuda::proclaim_return_type<bool>([key_or_value = key_or_value.begin()] __device__(auto const node_id) {
return key_or_value[node_id] == key_sentinel;
};
});

// Count the number of keys for each json object using `atomicAdd`.
auto const transform_it = thrust::counting_iterator<int>(0);
Expand All @@ -608,7 +610,7 @@ rmm::device_uvector<cudf::size_type> compute_list_offsets(
node_child_counts.begin(),
node_child_counts.end(),
list_offsets.begin(),
[] __device__(auto const count) { return count >= 0; },
cuda::proclaim_return_type<bool>([] __device__(auto const count) { return count >= 0; }),
stream);
CUDF_EXPECTS(thrust::distance(list_offsets.begin(), copy_end) == static_cast<int64_t>(n_lists),
"Invalid list size computation.");
Expand Down
6 changes: 4 additions & 2 deletions src/main/cpp/src/murmur_hash.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include <thrust/iterator/counting_iterator.h>
#include <thrust/tabulate.h>

#include <cuda/functional>

namespace spark_rapids_jni {

namespace {
Expand Down Expand Up @@ -77,10 +79,10 @@ class murmur_device_row_hasher {
_table.begin(),
_table.end(),
_seed,
[row_index, nulls = this->_check_nulls] __device__(auto hash, auto column) {
cuda::proclaim_return_type<murmur_hash_value_type>([row_index, nulls = this->_check_nulls] __device__(auto hash, auto column) {
return cudf::type_dispatcher(
column.type(), element_hasher_adapter<hash_function>{nulls, hash}, column, row_index);
});
}));
}

private:
Expand Down
Loading

0 comments on commit 9076ad5

Please sign in to comment.