diff --git a/src/main/cpp/src/bloom_filter.cu b/src/main/cpp/src/bloom_filter.cu index 8e828e3a4d..6270705178 100644 --- a/src/main/cpp/src/bloom_filter.cu +++ b/src/main/cpp/src/bloom_filter.cu @@ -318,14 +318,15 @@ std::unique_ptr bloom_filter_merge(cudf::column_view const& b thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_words, dst, - cuda::proclaim_return_type([src, num_buffers = bloom_filters.size(), stride = buf_size] __device__( - cudf::size_type word_index) { - cudf::bitmask_type out = (reinterpret_cast(src))[word_index]; - for (auto idx = 1; idx < num_buffers; idx++) { - out |= (reinterpret_cast(src + idx * stride))[word_index]; - } - return out; - })); + cuda::proclaim_return_type( + [src, num_buffers = bloom_filters.size(), stride = buf_size] __device__( + cudf::size_type word_index) { + cudf::bitmask_type out = (reinterpret_cast(src))[word_index]; + for (auto idx = 1; idx < num_buffers; idx++) { + out |= (reinterpret_cast(src + idx * stride))[word_index]; + } + return out; + })); // create the 1-row list column and move it into a scalar. return std::make_unique( diff --git a/src/main/cpp/src/datetime_rebase.cu b/src/main/cpp/src/datetime_rebase.cu index a95df64613..8963acf491 100644 --- a/src/main/cpp/src/datetime_rebase.cu +++ b/src/main/cpp/src/datetime_rebase.cu @@ -75,28 +75,29 @@ std::unique_ptr gregorian_to_julian_days(cudf::column_view const& thrust::make_counting_iterator(0), thrust::make_counting_iterator(input.size()), output->mutable_view().begin(), - cuda::proclaim_return_type([d_input = input.begin()] __device__(auto const idx) { - auto constexpr julian_end = cuda::std::chrono::year_month_day{ - cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{4}}; - auto constexpr gregorian_start = cuda::std::chrono::year_month_day{ - cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{15}}; - - auto const days_ts = d_input[idx].time_since_epoch().count(); - auto const days_since_epoch = cuda::std::chrono::sys_days(cudf::duration_D{days_ts}); - - // Convert the input into local date in Proleptic Gregorian calendar. - auto const ymd = cuda::std::chrono::year_month_day(days_since_epoch); - if (ymd > julian_end && ymd < gregorian_start) { - // This is the same as rebasing from the local date given at `gregorian_start`. - return cudf::timestamp_D{cudf::duration_D{-141427}}; - } - - // No change since this time. - if (ymd >= gregorian_start) { return d_input[idx]; } - - // Reinterpret year/month/day as in Julian calendar then compute the days since epoch. - return cudf::timestamp_D{cudf::duration_D{days_from_julian(ymd)}}; - })); + cuda::proclaim_return_type( + [d_input = input.begin()] __device__(auto const idx) { + auto constexpr julian_end = cuda::std::chrono::year_month_day{ + cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{4}}; + auto constexpr gregorian_start = cuda::std::chrono::year_month_day{ + cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{15}}; + + auto const days_ts = d_input[idx].time_since_epoch().count(); + auto const days_since_epoch = cuda::std::chrono::sys_days(cudf::duration_D{days_ts}); + + // Convert the input into local date in Proleptic Gregorian calendar. + auto const ymd = cuda::std::chrono::year_month_day(days_since_epoch); + if (ymd > julian_end && ymd < gregorian_start) { + // This is the same as rebasing from the local date given at `gregorian_start`. + return cudf::timestamp_D{cudf::duration_D{-141427}}; + } + + // No change since this time. + if (ymd >= gregorian_start) { return d_input[idx]; } + + // Reinterpret year/month/day as in Julian calendar then compute the days since epoch. + return cudf::timestamp_D{cudf::duration_D{days_from_julian(ymd)}}; + })); return output; } @@ -144,19 +145,20 @@ std::unique_ptr julian_to_gregorian_days(cudf::column_view const& thrust::make_counting_iterator(0), thrust::make_counting_iterator(input.size()), output->mutable_view().begin(), - cuda::proclaim_return_type([d_input = input.begin()] __device__(auto const idx) { - auto const days_ts = d_input[idx].time_since_epoch().count(); - if (days_ts >= -141427) { // Gregorian start day - return d_input[idx]; - } - - // Reinterpret year/month/day as in Gregorian calendar then compute the days - // since epoch. - auto const ymd = julian_from_days(days_ts); - auto const result = - cuda::std::chrono::local_days{ymd}.time_since_epoch().count(); - return cudf::timestamp_D{cudf::duration_D{result}}; - })); + cuda::proclaim_return_type( + [d_input = input.begin()] __device__(auto const idx) { + auto const days_ts = d_input[idx].time_since_epoch().count(); + if (days_ts >= -141427) { // Gregorian start day + return d_input[idx]; + } + + // Reinterpret year/month/day as in Gregorian calendar then compute the days + // since epoch. + auto const ymd = julian_from_days(days_ts); + auto const result = + cuda::std::chrono::local_days{ymd}.time_since_epoch().count(); + return cudf::timestamp_D{cudf::duration_D{result}}; + })); return output; } @@ -244,39 +246,40 @@ std::unique_ptr gregorian_to_julian_micros(cudf::column_view const thrust::make_counting_iterator(0), thrust::make_counting_iterator(input.size()), output->mutable_view().begin(), - cuda::proclaim_return_type([d_input = input.begin()] __device__(auto const idx) { - // This timestamp corresponds to October 15th, 1582 UTC. - // After this day, there is no difference in microsecond values between Gregorian - // and Julian calendars. - int64_t constexpr last_switch_gregorian_ts = -12219292800000000L; - - auto const micros_ts = d_input[idx].time_since_epoch().count(); - if (micros_ts >= last_switch_gregorian_ts) { return d_input[idx]; } - - // Convert the input into local date-time in Proleptic Gregorian calendar. - auto const days_since_epoch = cuda::std::chrono::sys_days(static_cast( - cuda::std::chrono::floor(cudf::duration_us(micros_ts)))); - auto const ymd = cuda::std::chrono::year_month_day(days_since_epoch); - auto const timeparts = get_time_components(micros_ts); - - auto constexpr julian_end = cuda::std::chrono::year_month_day{ - cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{4}}; - auto constexpr gregorian_start = cuda::std::chrono::year_month_day{ - cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{15}}; - - // Reinterpret the local date-time as in Julian calendar and compute microseconds since - // the epoch from that Julian local date-time. - // If the input date is outside of both calendars, consider it as it is a local date - // given at `gregorian_start` (-141427 Julian days since epoch). - auto const julian_days = - (ymd > julian_end && ymd < gregorian_start) ? -141427 : days_from_julian(ymd); - int64_t result = (julian_days * 24L * 3600L) + (timeparts.hour * 3600L) + - (timeparts.minute * 60L) + timeparts.second; - result *= MICROS_PER_SECOND; // to microseconds - result += timeparts.subsecond; - - return cudf::timestamp_us{cudf::duration_us{result}}; - })); + cuda::proclaim_return_type( + [d_input = input.begin()] __device__(auto const idx) { + // This timestamp corresponds to October 15th, 1582 UTC. + // After this day, there is no difference in microsecond values between Gregorian + // and Julian calendars. + int64_t constexpr last_switch_gregorian_ts = -12219292800000000L; + + auto const micros_ts = d_input[idx].time_since_epoch().count(); + if (micros_ts >= last_switch_gregorian_ts) { return d_input[idx]; } + + // Convert the input into local date-time in Proleptic Gregorian calendar. + auto const days_since_epoch = cuda::std::chrono::sys_days(static_cast( + cuda::std::chrono::floor(cudf::duration_us(micros_ts)))); + auto const ymd = cuda::std::chrono::year_month_day(days_since_epoch); + auto const timeparts = get_time_components(micros_ts); + + auto constexpr julian_end = cuda::std::chrono::year_month_day{ + cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{4}}; + auto constexpr gregorian_start = cuda::std::chrono::year_month_day{ + cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{15}}; + + // Reinterpret the local date-time as in Julian calendar and compute microseconds since + // the epoch from that Julian local date-time. + // If the input date is outside of both calendars, consider it as it is a local date + // given at `gregorian_start` (-141427 Julian days since epoch). + auto const julian_days = + (ymd > julian_end && ymd < gregorian_start) ? -141427 : days_from_julian(ymd); + int64_t result = (julian_days * 24L * 3600L) + (timeparts.hour * 3600L) + + (timeparts.minute * 60L) + timeparts.second; + result *= MICROS_PER_SECOND; // to microseconds + result += timeparts.subsecond; + + return cudf::timestamp_us{cudf::duration_us{result}}; + })); return output; } @@ -306,31 +309,32 @@ std::unique_ptr julian_to_gregorian_micros(cudf::column_view const thrust::make_counting_iterator(0), thrust::make_counting_iterator(input.size()), output->mutable_view().begin(), - cuda::proclaim_return_type([d_input = input.begin()] __device__(auto const idx) { - // This timestamp corresponds to October 15th, 1582 UTC. - // After this day, there is no difference in microsecond values between Gregorian - // and Julian calendars. - int64_t constexpr last_switch_gregorian_ts = -12219292800000000L; - - auto const micros_ts = d_input[idx].time_since_epoch().count(); - if (micros_ts >= last_switch_gregorian_ts) { return d_input[idx]; } - - // Convert the input into local date-time in Julian calendar. - auto const days_since_epoch = cuda::std::chrono::sys_days(static_cast( - cuda::std::chrono::floor(cudf::duration_us(micros_ts)))); - auto const ymd = julian_from_days(days_since_epoch.time_since_epoch().count()); - auto const timeparts = get_time_components(micros_ts); - - // Reinterpret the local date-time as in Gregorian calendar and compute microseconds since - // the epoch from that Gregorian local date-time. - auto const gregorian_days = cuda::std::chrono::local_days(ymd).time_since_epoch().count(); - int64_t result = (gregorian_days * 24L * 3600L) + (timeparts.hour * 3600L) + - (timeparts.minute * 60L) + timeparts.second; - result *= MICROS_PER_SECOND; // to microseconds - result += timeparts.subsecond; - - return cudf::timestamp_us{cudf::duration_us{result}}; - })); + cuda::proclaim_return_type( + [d_input = input.begin()] __device__(auto const idx) { + // This timestamp corresponds to October 15th, 1582 UTC. + // After this day, there is no difference in microsecond values between Gregorian + // and Julian calendars. + int64_t constexpr last_switch_gregorian_ts = -12219292800000000L; + + auto const micros_ts = d_input[idx].time_since_epoch().count(); + if (micros_ts >= last_switch_gregorian_ts) { return d_input[idx]; } + + // Convert the input into local date-time in Julian calendar. + auto const days_since_epoch = cuda::std::chrono::sys_days(static_cast( + cuda::std::chrono::floor(cudf::duration_us(micros_ts)))); + auto const ymd = julian_from_days(days_since_epoch.time_since_epoch().count()); + auto const timeparts = get_time_components(micros_ts); + + // Reinterpret the local date-time as in Gregorian calendar and compute microseconds since + // the epoch from that Gregorian local date-time. + auto const gregorian_days = cuda::std::chrono::local_days(ymd).time_since_epoch().count(); + int64_t result = (gregorian_days * 24L * 3600L) + (timeparts.hour * 3600L) + + (timeparts.minute * 60L) + timeparts.second; + result *= MICROS_PER_SECOND; // to microseconds + result += timeparts.subsecond; + + return cudf::timestamp_us{cudf::duration_us{result}}; + })); return output; } diff --git a/src/main/cpp/src/map_utils.cu b/src/main/cpp/src/map_utils.cu index a90e077135..a51a7de57b 100644 --- a/src/main/cpp/src/map_utils.cu +++ b/src/main/cpp/src/map_utils.cu @@ -181,29 +181,33 @@ rmm::device_uvector compute_node_levels(int64_t num_nodes, auto token_levels = rmm::device_uvector(tokens.size(), stream); // Whether the token pops from the parent node stack. - auto const does_pop = cuda::proclaim_return_type([] __device__(PdaTokenT const token) -> bool { - switch (token) { - case token_t::StructMemberEnd: - case token_t::StructEnd: - case token_t::ListEnd: return true; - default: return false; - }; - }); + auto const does_pop = + cuda::proclaim_return_type([] __device__(PdaTokenT const token) -> bool { + switch (token) { + case token_t::StructMemberEnd: + case token_t::StructEnd: + case token_t::ListEnd: return true; + default: return false; + }; + }); // Whether the token pushes onto the parent node stack. - auto const does_push = cuda::proclaim_return_type([] __device__(PdaTokenT const token) -> bool { - switch (token) { - case token_t::FieldNameBegin: - case token_t::StructBegin: - case token_t::ListBegin: return true; - default: return false; - }; - }); + auto const does_push = + cuda::proclaim_return_type([] __device__(PdaTokenT const token) -> bool { + switch (token) { + case token_t::FieldNameBegin: + case token_t::StructBegin: + case token_t::ListBegin: return true; + default: return false; + }; + }); auto const push_pop_it = thrust::make_transform_iterator( - tokens.begin(), cuda::proclaim_return_type([does_push, does_pop] __device__(PdaTokenT const token) -> cudf::size_type { - return does_push(token) - does_pop(token); - })); + tokens.begin(), + cuda::proclaim_return_type( + [does_push, does_pop] __device__(PdaTokenT const token) -> cudf::size_type { + return does_push(token) - does_pop(token); + })); thrust::exclusive_scan( rmm::exec_policy(stream), push_pop_it, push_pop_it + tokens.size(), token_levels.begin()); @@ -304,20 +308,20 @@ rmm::device_uvector compute_parent_node_ids( rmm::device_uvector const& node_token_ids, rmm::cuda_stream_view stream) { - auto const first_childs_parent_token_id = cuda::proclaim_return_type([tokens = - tokens.begin()] __device__(auto i) -> NodeIndexT { - if (i <= 0) { return -1; } - if (tokens[i - 1] == token_t::StructBegin || tokens[i - 1] == token_t::ListBegin) { - return i - 1; - } else if (tokens[i - 1] == token_t::FieldNameEnd) { - return i - 2; - } else if (tokens[i - 1] == token_t::StructMemberBegin && - (tokens[i - 2] == token_t::StructBegin || tokens[i - 2] == token_t::ListBegin)) { - return i - 2; - } else { - return -1; - } - }); + auto const first_childs_parent_token_id = cuda::proclaim_return_type( + [tokens = tokens.begin()] __device__(auto i) -> NodeIndexT { + if (i <= 0) { return -1; } + if (tokens[i - 1] == token_t::StructBegin || tokens[i - 1] == token_t::ListBegin) { + return i - 1; + } else if (tokens[i - 1] == token_t::FieldNameEnd) { + return i - 2; + } else if (tokens[i - 1] == token_t::StructMemberBegin && + (tokens[i - 2] == token_t::StructBegin || tokens[i - 2] == token_t::ListBegin)) { + return i - 2; + } else { + return -1; + } + }); auto parent_node_ids = rmm::device_uvector(num_nodes, stream); thrust::transform( @@ -325,14 +329,15 @@ rmm::device_uvector compute_parent_node_ids( node_token_ids.begin(), node_token_ids.end(), parent_node_ids.begin(), - cuda::proclaim_return_type([node_ids_gpu = node_token_ids.begin(), num_nodes, first_childs_parent_token_id] __device__( - NodeIndexT const tid) -> NodeIndexT { - auto const pid = first_childs_parent_token_id(tid); - return pid < 0 - ? cudf::io::json::parent_node_sentinel - : thrust::lower_bound(thrust::seq, node_ids_gpu, node_ids_gpu + num_nodes, pid) - - node_ids_gpu; - })); + cuda::proclaim_return_type( + [node_ids_gpu = node_token_ids.begin(), num_nodes, first_childs_parent_token_id] __device__( + NodeIndexT const tid) -> NodeIndexT { + auto const pid = first_childs_parent_token_id(tid); + return pid < 0 + ? cudf::io::json::parent_node_sentinel + : thrust::lower_bound(thrust::seq, node_ids_gpu, node_ids_gpu + num_nodes, pid) - + node_ids_gpu; + })); // Propagate parent node to siblings from first sibling - inplace. auto const node_levels = compute_node_levels(num_nodes, tokens, stream); @@ -358,20 +363,21 @@ rmm::device_uvector check_key_or_value_nodes( transform_it, transform_it + parent_node_ids.size(), key_or_value.begin(), - cuda::proclaim_return_type([key_sentinel = key_sentinel, - value_sentinel = value_sentinel, - parent_ids = parent_node_ids.begin()] __device__(auto const node_id) -> int8_t { - if (parent_ids[node_id] > 0) { - auto const grand_parent = parent_ids[parent_ids[node_id]]; - if (grand_parent == 0) { - return key_sentinel; - } else if (parent_ids[grand_parent] == 0) { - return value_sentinel; + cuda::proclaim_return_type( + [key_sentinel = key_sentinel, + value_sentinel = value_sentinel, + parent_ids = parent_node_ids.begin()] __device__(auto const node_id) -> int8_t { + if (parent_ids[node_id] > 0) { + auto const grand_parent = parent_ids[parent_ids[node_id]]; + if (grand_parent == 0) { + return key_sentinel; + } else if (parent_ids[grand_parent] == 0) { + return value_sentinel; + } } - } - return 0; - })); + return 0; + })); #ifdef DEBUG_FROM_JSON print_debug(key_or_value, "Nodes are key/value (1==key, 2==value)", ", ", stream); @@ -392,53 +398,58 @@ struct node_ranges_fn { __device__ thrust::pair operator()(cudf::size_type node_id) const { - [[maybe_unused]] auto const is_begin_of_section = cuda::proclaim_return_type([] __device__(PdaTokenT const token) { - switch (token) { - case token_t::StructBegin: - case token_t::ListBegin: - case token_t::StringBegin: - case token_t::ValueBegin: - case token_t::FieldNameBegin: return true; - default: return false; - }; - }); + [[maybe_unused]] auto const is_begin_of_section = + cuda::proclaim_return_type([] __device__(PdaTokenT const token) { + switch (token) { + case token_t::StructBegin: + case token_t::ListBegin: + case token_t::StringBegin: + case token_t::ValueBegin: + case token_t::FieldNameBegin: return true; + default: return false; + }; + }); // The end-of-* partner token for a given beginning-of-* token - auto const end_of_partner = cuda::proclaim_return_type([] __device__(PdaTokenT const token) { - switch (token) { - case token_t::StructBegin: return token_t::StructEnd; - case token_t::ListBegin: return token_t::ListEnd; - case token_t::StringBegin: return token_t::StringEnd; - case token_t::ValueBegin: return token_t::ValueEnd; - case token_t::FieldNameBegin: return token_t::FieldNameEnd; - default: return token_t::ErrorBegin; - }; - }); + auto const end_of_partner = + cuda::proclaim_return_type([] __device__(PdaTokenT const token) { + switch (token) { + case token_t::StructBegin: return token_t::StructEnd; + case token_t::ListBegin: return token_t::ListEnd; + case token_t::StringBegin: return token_t::StringEnd; + case token_t::ValueBegin: return token_t::ValueEnd; + case token_t::FieldNameBegin: return token_t::FieldNameEnd; + default: return token_t::ErrorBegin; + }; + }); // Encode a fixed value for nested node types (list+struct). - auto const nested_node_to_value = cuda::proclaim_return_type([] __device__(PdaTokenT const token) -> int32_t { - switch (token) { - case token_t::StructBegin: return 1; - case token_t::StructEnd: return -1; - case token_t::ListBegin: return 1 << 8; - case token_t::ListEnd: return -(1 << 8); - default: return 0; - }; - }); - - auto const get_token_index = cuda::proclaim_return_type([include_quote_char = include_quote_char] __device__( - PdaTokenT const token, SymbolOffsetT const token_index) { - constexpr SymbolOffsetT quote_char_size = 1; - switch (token) { - // Strip off quote char included for StringBegin - case token_t::StringBegin: return token_index + (include_quote_char ? 0 : quote_char_size); - // Strip off or Include trailing quote char for string values for StringEnd - case token_t::StringEnd: return token_index + (include_quote_char ? quote_char_size : 0); - // Strip off quote char included for FieldNameBegin - case token_t::FieldNameBegin: return token_index + quote_char_size; - default: return token_index; - }; - }); + auto const nested_node_to_value = + cuda::proclaim_return_type([] __device__(PdaTokenT const token) -> int32_t { + switch (token) { + case token_t::StructBegin: return 1; + case token_t::StructEnd: return -1; + case token_t::ListBegin: return 1 << 8; + case token_t::ListEnd: return -(1 << 8); + default: return 0; + }; + }); + + auto const get_token_index = cuda::proclaim_return_type( + [include_quote_char = include_quote_char] __device__(PdaTokenT const token, + SymbolOffsetT const token_index) { + constexpr SymbolOffsetT quote_char_size = 1; + switch (token) { + // Strip off quote char included for StringBegin + case token_t::StringBegin: + return token_index + (include_quote_char ? 0 : quote_char_size); + // Strip off or Include trailing quote char for string values for StringEnd + case token_t::StringEnd: return token_index + (include_quote_char ? quote_char_size : 0); + // Strip off quote char included for FieldNameBegin + case token_t::FieldNameBegin: return token_index + quote_char_size; + default: return token_index; + }; + }); if (key_or_value[node_id] != key_sentinel && key_or_value[node_id] != value_sentinel) { return thrust::make_pair(0, 0); @@ -531,13 +542,15 @@ std::unique_ptr extract_keys_or_values( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto const is_key = cuda::proclaim_return_type([key_or_value = key_or_value.begin()] __device__(auto const node_id) { - return key_or_value[node_id] == key_sentinel; - }); + auto const is_key = cuda::proclaim_return_type( + [key_or_value = key_or_value.begin()] __device__(auto const node_id) { + return key_or_value[node_id] == key_sentinel; + }); - auto const is_value = cuda::proclaim_return_type([key_or_value = key_or_value.begin()] __device__(auto const node_id) { - return key_or_value[node_id] == value_sentinel; - }); + auto const is_value = cuda::proclaim_return_type( + [key_or_value = key_or_value.begin()] __device__(auto const node_id) { + return key_or_value[node_id] == value_sentinel; + }); auto extract_ranges = rmm::device_uvector>(num_nodes, stream, mr); @@ -576,17 +589,19 @@ rmm::device_uvector compute_list_offsets( // For the nodes having parent_id == 0 (they are json object given by one input row), set their // child counts to zero. Otherwise, set child counts to `-1` (a sentinel number). - thrust::transform(rmm::exec_policy(stream), - parent_node_ids.begin(), - parent_node_ids.end(), - node_child_counts.begin(), - cuda::proclaim_return_type([] __device__(auto const parent_id) -> NodeIndexT { - return parent_id == 0 ? 0 : std::numeric_limits::lowest(); - })); - - auto const is_key = cuda::proclaim_return_type([key_or_value = key_or_value.begin()] __device__(auto const node_id) { - return key_or_value[node_id] == key_sentinel; - }); + thrust::transform( + rmm::exec_policy(stream), + parent_node_ids.begin(), + parent_node_ids.end(), + node_child_counts.begin(), + cuda::proclaim_return_type([] __device__(auto const parent_id) -> NodeIndexT { + return parent_id == 0 ? 0 : std::numeric_limits::lowest(); + })); + + auto const is_key = cuda::proclaim_return_type( + [key_or_value = key_or_value.begin()] __device__(auto const node_id) { + return key_or_value[node_id] == key_sentinel; + }); // Count the number of keys for each json object using `atomicAdd`. auto const transform_it = thrust::counting_iterator(0); diff --git a/src/main/cpp/src/murmur_hash.cu b/src/main/cpp/src/murmur_hash.cu index 7059e38932..17ec120b5a 100644 --- a/src/main/cpp/src/murmur_hash.cu +++ b/src/main/cpp/src/murmur_hash.cu @@ -79,10 +79,11 @@ class murmur_device_row_hasher { _table.begin(), _table.end(), _seed, - cuda::proclaim_return_type([row_index, nulls = this->_check_nulls] __device__(auto hash, auto column) { - return cudf::type_dispatcher( - column.type(), element_hasher_adapter{nulls, hash}, column, row_index); - })); + cuda::proclaim_return_type( + [row_index, nulls = this->_check_nulls] __device__(auto hash, auto column) { + return cudf::type_dispatcher( + column.type(), element_hasher_adapter{nulls, hash}, column, row_index); + })); } private: diff --git a/src/main/cpp/src/parse_uri.cu b/src/main/cpp/src/parse_uri.cu index 6251f647bd..a1094959e4 100644 --- a/src/main/cpp/src/parse_uri.cu +++ b/src/main/cpp/src/parse_uri.cu @@ -386,90 +386,97 @@ chunk_validity __device__ validate_host(string_view host) bool __device__ validate_query(string_view query) { // query can be alphanum and _-!.~'()*,;:$&+=?/[]@" - return validate_chunk(query, cuda::proclaim_return_type([] __device__(string_view::const_iterator iter) { - auto const c = *iter; - if (c != '!' && c != '"' && c != '$' && !(c >= '&' && c <= ';') && c != '=' && - !(c >= '?' && c <= ']' && c != '\\') && !(c >= 'a' && c <= 'z') && c != '_' && c != '~') { - return false; - } - return true; - })); -} - -bool __device__ validate_authority(string_view authority, bool allow_invalid_escapes) -{ - // authority needs to be alphanum and @[]_-!.'()*,;:$&+= return validate_chunk( - authority, - cuda::proclaim_return_type([allow_invalid_escapes] __device__(string_view::const_iterator iter) { + query, cuda::proclaim_return_type([] __device__(string_view::const_iterator iter) { auto const c = *iter; - if (c != '!' && c != '$' && !(c >= '&' && c <= ';' && c != '/') && c != '=' && - !(c >= '@' && c <= '_' && c != '^' && c != '\\') && !(c >= 'a' && c <= 'z') && c != '~' && - (!allow_invalid_escapes || c != '%')) { + if (c != '!' && c != '"' && c != '$' && !(c >= '&' && c <= ';') && c != '=' && + !(c >= '?' && c <= ']' && c != '\\') && !(c >= 'a' && c <= 'z') && c != '_' && c != '~') { return false; } return true; - }), - allow_invalid_escapes); + })); +} + +bool __device__ validate_authority(string_view authority, bool allow_invalid_escapes) +{ + // authority needs to be alphanum and @[]_-!.'()*,;:$&+= + return validate_chunk(authority, + cuda::proclaim_return_type( + [allow_invalid_escapes] __device__(string_view::const_iterator iter) { + auto const c = *iter; + if (c != '!' && c != '$' && !(c >= '&' && c <= ';' && c != '/') && + c != '=' && !(c >= '@' && c <= '_' && c != '^' && c != '\\') && + !(c >= 'a' && c <= 'z') && c != '~' && + (!allow_invalid_escapes || c != '%')) { + return false; + } + return true; + }), + allow_invalid_escapes); } bool __device__ validate_userinfo(string_view userinfo) { // can't be ] or [ in here - return validate_chunk(userinfo, cuda::proclaim_return_type([] __device__(string_view::const_iterator iter) { - auto const c = *iter; - if (c == '[' || c == ']') { return false; } - return true; - })); + return validate_chunk( + userinfo, cuda::proclaim_return_type([] __device__(string_view::const_iterator iter) { + auto const c = *iter; + if (c == '[' || c == ']') { return false; } + return true; + })); } bool __device__ validate_port(string_view port) { // port is positive numeric >=0 according to spark...shrug - return validate_chunk(port, cuda::proclaim_return_type([] __device__(string_view::const_iterator iter) { - auto const c = *iter; - if (c < '0' && c > '9') { return false; } - return true; - })); + return validate_chunk( + port, cuda::proclaim_return_type([] __device__(string_view::const_iterator iter) { + auto const c = *iter; + if (c < '0' && c > '9') { return false; } + return true; + })); } bool __device__ validate_path(string_view path) { // path can be alphanum and @[]_-!.~'()*?/&,;:$+= - return validate_chunk(path, cuda::proclaim_return_type([] __device__(string_view::const_iterator iter) { - auto const c = *iter; - if (c != '!' && c != '$' && !(c >= '&' && c <= ';') && c != '=' && !(c >= '@' && c <= 'Z') && - c != '_' && !(c >= 'a' && c <= 'z') && c != '~') { - return false; - } - return true; - })); + return validate_chunk( + path, cuda::proclaim_return_type([] __device__(string_view::const_iterator iter) { + auto const c = *iter; + if (c != '!' && c != '$' && !(c >= '&' && c <= ';') && c != '=' && !(c >= '@' && c <= 'Z') && + c != '_' && !(c >= 'a' && c <= 'z') && c != '~') { + return false; + } + return true; + })); } bool __device__ validate_opaque(string_view opaque) { // opaque can be alphanum and @[]_-!.~'()*?/,;:$@+= - return validate_chunk(opaque, cuda::proclaim_return_type([] __device__(string_view::const_iterator iter) { - auto const c = *iter; - if (c != '!' && c != '$' && !(c >= '&' && c <= ';') && c != '=' && - !(c >= '?' && c <= ']' && c != '\\') && c != '_' && c != '~' && !(c >= 'a' && c <= 'z')) { - return false; - } - return true; - })); + return validate_chunk( + opaque, cuda::proclaim_return_type([] __device__(string_view::const_iterator iter) { + auto const c = *iter; + if (c != '!' && c != '$' && !(c >= '&' && c <= ';') && c != '=' && + !(c >= '?' && c <= ']' && c != '\\') && c != '_' && c != '~' && !(c >= 'a' && c <= 'z')) { + return false; + } + return true; + })); } bool __device__ validate_fragment(string_view fragment) { // fragment can be alphanum and @[]_-!.~'()*?/,;:$&+= - return validate_chunk(fragment, cuda::proclaim_return_type([] __device__(string_view::const_iterator iter) { - auto const c = *iter; - if (c != '!' && c != '$' && !(c >= '&' && c <= ';') && c != '=' && - !(c >= '?' && c <= ']' && c != '\\') && c != '_' && c != '~' && !(c >= 'a' && c <= 'z')) { - return false; - } - return true; - })); + return validate_chunk( + fragment, cuda::proclaim_return_type([] __device__(string_view::const_iterator iter) { + auto const c = *iter; + if (c != '!' && c != '$' && !(c >= '&' && c <= ';') && c != '=' && + !(c >= '?' && c <= ']' && c != '\\') && c != '_' && c != '~' && !(c >= 'a' && c <= 'z')) { + return false; + } + return true; + })); } uri_parts __device__ validate_uri(const char* str, int len) diff --git a/src/main/cpp/src/row_conversion.cu b/src/main/cpp/src/row_conversion.cu index 58782b215c..d5c00f91d3 100644 --- a/src/main/cpp/src/row_conversion.cu +++ b/src/main/cpp/src/row_conversion.cu @@ -259,10 +259,11 @@ build_string_row_offsets(table_view const& tbl, d_row_sizes.begin(), d_row_sizes.end(), d_row_sizes.begin(), - cuda::proclaim_return_type([fixed_width_and_validity_size] __device__(auto row_size) { - return util::round_up_unsafe(fixed_width_and_validity_size + row_size, - JCUDF_ROW_ALIGNMENT); - })); + cuda::proclaim_return_type( + [fixed_width_and_validity_size] __device__(auto row_size) { + return util::round_up_unsafe(fixed_width_and_validity_size + row_size, + JCUDF_ROW_ALIGNMENT); + })); return {std::move(d_row_sizes), std::move(d_offsets_iterators)}; } @@ -1657,12 +1658,13 @@ int compute_tile_counts(device_span const& batch_row_boundaries iter, iter + num_batches, num_tiles.begin(), - cuda::proclaim_return_type([desired_tile_height, - batch_row_boundaries = batch_row_boundaries.data()] __device__(auto batch_index) -> size_type { - return util::div_rounding_up_unsafe( - batch_row_boundaries[batch_index + 1] - batch_row_boundaries[batch_index], - desired_tile_height); - })); + cuda::proclaim_return_type( + [desired_tile_height, batch_row_boundaries = batch_row_boundaries.data()] __device__( + auto batch_index) -> size_type { + return util::div_rounding_up_unsafe( + batch_row_boundaries[batch_index + 1] - batch_row_boundaries[batch_index], + desired_tile_height); + })); return thrust::reduce(rmm::exec_policy(stream), num_tiles.begin(), num_tiles.end()); } @@ -1695,21 +1697,24 @@ size_type build_tiles( iter, iter + num_batches, num_tiles.begin(), - cuda::proclaim_return_type([desired_tile_height, - batch_row_boundaries = batch_row_boundaries.data()] __device__(auto batch_index) -> size_type { - return util::div_rounding_up_unsafe( - batch_row_boundaries[batch_index + 1] - batch_row_boundaries[batch_index], - desired_tile_height); - })); + cuda::proclaim_return_type( + [desired_tile_height, batch_row_boundaries = batch_row_boundaries.data()] __device__( + auto batch_index) -> size_type { + return util::div_rounding_up_unsafe( + batch_row_boundaries[batch_index + 1] - batch_row_boundaries[batch_index], + desired_tile_height); + })); size_type const total_tiles = thrust::reduce(rmm::exec_policy(stream), num_tiles.begin(), num_tiles.end()); device_uvector tile_starts(num_batches + 1, stream); auto tile_iter = cudf::detail::make_counting_transform_iterator( - 0, cuda::proclaim_return_type([num_tiles = num_tiles.data(), num_batches] __device__(auto i) { - return (i < num_batches) ? num_tiles[i] : 0; - })); + 0, + cuda::proclaim_return_type( + [num_tiles = num_tiles.data(), num_batches] __device__(auto i) { + return (i < num_batches) ? num_tiles[i] : 0; + })); thrust::exclusive_scan(rmm::exec_policy(stream), tile_iter, tile_iter + num_batches + 1, @@ -1720,31 +1725,33 @@ size_type build_tiles( iter, iter + total_tiles, tiles.begin(), - cuda::proclaim_return_type([ =, - tile_starts = tile_starts.data(), - batch_row_boundaries = batch_row_boundaries.data()] __device__(size_type tile_index) { - // what batch this tile falls in - auto const batch_index_iter = - thrust::upper_bound(thrust::seq, tile_starts, tile_starts + num_batches, tile_index); - auto const batch_index = std::distance(tile_starts, batch_index_iter) - 1; - // local index within the tile - int const local_tile_index = tile_index - tile_starts[batch_index]; - // the start row for this batch. - int const batch_row_start = batch_row_boundaries[batch_index]; - // the start row for this tile - int const tile_row_start = batch_row_start + (local_tile_index * desired_tile_height); - // the end row for this tile - int const max_row = std::min(total_number_of_rows - 1, - batch_index + 1 > num_batches - ? std::numeric_limits::max() - : static_cast(batch_row_boundaries[batch_index + 1]) - 1); - int const tile_row_end = - std::min(batch_row_start + ((local_tile_index + 1) * desired_tile_height) - 1, max_row); - - // stuff the tile - return tile_info{ - column_start, tile_row_start, column_end, tile_row_end, static_cast(batch_index)}; - })); + cuda::proclaim_return_type( + [ =, + tile_starts = tile_starts.data(), + batch_row_boundaries = batch_row_boundaries.data()] __device__(size_type tile_index) { + // what batch this tile falls in + auto const batch_index_iter = + thrust::upper_bound(thrust::seq, tile_starts, tile_starts + num_batches, tile_index); + auto const batch_index = std::distance(tile_starts, batch_index_iter) - 1; + // local index within the tile + int const local_tile_index = tile_index - tile_starts[batch_index]; + // the start row for this batch. + int const batch_row_start = batch_row_boundaries[batch_index]; + // the start row for this tile + int const tile_row_start = batch_row_start + (local_tile_index * desired_tile_height); + // the end row for this tile + int const max_row = + std::min(total_number_of_rows - 1, + batch_index + 1 > num_batches + ? std::numeric_limits::max() + : static_cast(batch_row_boundaries[batch_index + 1]) - 1); + int const tile_row_end = + std::min(batch_row_start + ((local_tile_index + 1) * desired_tile_height) - 1, max_row); + + // stuff the tile + return tile_info{ + column_start, tile_row_start, column_end, tile_row_end, static_cast(batch_index)}; + })); return total_tiles; } @@ -2346,7 +2353,8 @@ std::unique_ptr convert_from_rows(lists_column_view const& input, thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_batches), gpu_batch_row_boundaries.begin(), - cuda::proclaim_return_type([num_rows] __device__(auto i) { return i == 0 ? 0 : num_rows; })); + cuda::proclaim_return_type( + [num_rows] __device__(auto i) { return i == 0 ? 0 : num_rows; })); int info_count = 0; detail::determine_tiles(column_info.column_sizes, @@ -2458,9 +2466,10 @@ std::unique_ptr
convert_from_rows(lists_column_view const& input, std::vector string_data_col_ptrs; for (auto& col_string_lengths : string_lengths) { device_uvector output_string_offsets(num_rows + 1, stream, mr); - auto tmp = cuda::proclaim_return_type([num_rows, col_string_lengths] __device__(auto const& i) { - return i < num_rows ? col_string_lengths[i] : 0; - }); + auto tmp = cuda::proclaim_return_type( + [num_rows, col_string_lengths] __device__(auto const& i) { + return i < num_rows ? col_string_lengths[i] : 0; + }); auto bounded_iter = cudf::detail::make_counting_transform_iterator(0, tmp); thrust::exclusive_scan(rmm::exec_policy(stream), bounded_iter, diff --git a/src/main/cpp/src/utilities.cu b/src/main/cpp/src/utilities.cu index e5c9c0eba0..7c202a1bec 100644 --- a/src/main/cpp/src/utilities.cu +++ b/src/main/cpp/src/utilities.cu @@ -53,18 +53,19 @@ std::unique_ptr bitmask_bitwise_or( std::unique_ptr out = std::make_unique(mask_size * sizeof(cudf::bitmask_type), stream, mr); - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(0) + mask_size, - static_cast(out->data()), - cuda::proclaim_return_type([buffers = d_input.data(), num_buffers = input.size()] __device__(cudf::size_type word_index) { - cudf::bitmask_type out = buffers[0][word_index]; - for (auto idx = 1; idx < num_buffers; idx++) { - out |= buffers[idx][word_index]; - } - return out; - })); + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + mask_size, + static_cast(out->data()), + cuda::proclaim_return_type( + [buffers = d_input.data(), + num_buffers = input.size()] __device__(cudf::size_type word_index) { + cudf::bitmask_type out = buffers[0][word_index]; + for (auto idx = 1; idx < num_buffers; idx++) { + out |= buffers[idx][word_index]; + } + return out; + })); return out; } diff --git a/src/main/cpp/src/xxhash64.cu b/src/main/cpp/src/xxhash64.cu index 46fe02690e..8c0b1b8766 100644 --- a/src/main/cpp/src/xxhash64.cu +++ b/src/main/cpp/src/xxhash64.cu @@ -288,10 +288,11 @@ class device_row_hasher { _table.begin(), _table.end(), _seed, - cuda::proclaim_return_type([row_index, nulls = _check_nulls] __device__(auto hash, auto column) { - return cudf::type_dispatcher( - column.type(), element_hasher_adapter{}, column, row_index, nulls, hash); - })); + cuda::proclaim_return_type( + [row_index, nulls = _check_nulls] __device__(auto hash, auto column) { + return cudf::type_dispatcher( + column.type(), element_hasher_adapter{}, column, row_index, nulls, hash); + })); } /** diff --git a/src/main/cpp/src/zorder.cu b/src/main/cpp/src/zorder.cu index 5c655bb06f..f9c2d4da07 100644 --- a/src/main/cpp/src/zorder.cu +++ b/src/main/cpp/src/zorder.cu @@ -255,18 +255,20 @@ std::unique_ptr hilbert_index(int32_t const num_bits_per_entry, thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_rows, output_dv_ptr->begin(), - cuda::proclaim_return_type([num_bits_per_entry, num_columns, input = *input_dv] __device__(cudf::size_type row_index) { - uint_backed_array row(num_bits_per_entry); - for (cudf::size_type column_index = 0; column_index < num_columns; column_index++) { - auto const column = input.column(column_index); - uint32_t const data = column.is_valid(row_index) ? column.data()[row_index] : 0; - row.set(column_index, data); - } - - auto const transposed_index = hilbert_transposed_index(row, num_bits_per_entry, num_columns); - return static_cast( - to_hilbert_index(transposed_index, num_bits_per_entry, num_columns)); - })); + cuda::proclaim_return_type( + [num_bits_per_entry, num_columns, input = *input_dv] __device__(cudf::size_type row_index) { + uint_backed_array row(num_bits_per_entry); + for (cudf::size_type column_index = 0; column_index < num_columns; column_index++) { + auto const column = input.column(column_index); + uint32_t const data = column.is_valid(row_index) ? column.data()[row_index] : 0; + row.set(column_index, data); + } + + auto const transposed_index = + hilbert_transposed_index(row, num_bits_per_entry, num_columns); + return static_cast( + to_hilbert_index(transposed_index, num_bits_per_entry, num_columns)); + })); return output_data_col; }