Skip to content

Commit

Permalink
Use cuda::proclaim_return_type on device lambdas (NVIDIA#1662)
Browse files Browse the repository at this point in the history
* adding proclaim_return_type to device lambdas

Signed-off-by: Mike Wilson <knobby@burntsheep.com>

* clang-format

Signed-off-by: Mike Wilson <knobby@burntsheep.com>

* No cuda::proclaim_return_type on non-device lambda

Signed-off-by: Mike Wilson <knobby@burntsheep.com>

* Adding Mithun's changes for CCCL 2

Signed-off-by: Mike Wilson <knobby@burntsheep.com>

* linting

Signed-off-by: Mike Wilson <knobby@burntsheep.com>

* updating return type

Signed-off-by: Mike Wilson <knobby@burntsheep.com>

* Update src/main/cpp/CMakeLists.txt

Co-authored-by: Bradley Dice <bdice@bradleydice.com>

* Update jni

Signed-off-by: Nghia Truong <nghiat@nvidia.com>

* Apply suggestions from code review

* Fix styles

Signed-off-by: Nghia Truong <nghiat@nvidia.com>

* linting

Signed-off-by: Mike Wilson <knobby@burntsheep.com>

* Update submodule manually

Signed-off-by: Nghia Truong <nghiat@nvidia.com>

* Fix header

Signed-off-by: Nghia Truong <nghiat@nvidia.com>

---------

Signed-off-by: Mike Wilson <knobby@burntsheep.com>
Signed-off-by: Nghia Truong <nghiat@nvidia.com>
Co-authored-by: Bradley Dice <bdice@bradleydice.com>
Co-authored-by: Nghia Truong <nghiat@nvidia.com>
  • Loading branch information
3 people authored Dec 21, 2023
1 parent 98dc423 commit 763406c
Show file tree
Hide file tree
Showing 16 changed files with 367 additions and 4,018 deletions.
8 changes: 2 additions & 6 deletions src/main/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,8 @@ include(cmake/Modules/ConfigureCUDA.cmake) # set other CUDA compilation flags
# ##################################################################################################
# * dependencies ----------------------------------------------------------------------------------

# find libcu++
include(${rapids-cmake-dir}/cpm/libcudacxx.cmake)

# find thrust/cub
include(${CUDF_DIR}/cpp/cmake/thirdparty/get_thrust.cmake)
# find CCCL
include(${CUDF_DIR}/cpp/cmake/thirdparty/get_cccl.cmake)

# JNI
find_package(JNI REQUIRED)
Expand Down Expand Up @@ -174,7 +171,6 @@ add_library(
src/map_utils.cu
src/murmur_hash.cu
src/parse_uri.cu
src/row_conversion.cu
src/timezones.cu
src/utilities.cu
src/xxhash64.cu
Expand Down
16 changes: 8 additions & 8 deletions src/main/cpp/benchmarks/row_conversion.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -48,15 +48,15 @@ void fixed_width(nvbench::state& state)
bytes_per_row += cudf::size_of(t);
}

auto rows = spark_rapids_jni::convert_to_rows_fixed_width_optimized(table->view());
auto rows = cudf::convert_to_rows_fixed_width_optimized(table->view());

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
if (direction == "to row") {
auto _rows = spark_rapids_jni::convert_to_rows_fixed_width_optimized(table->view());
auto _rows = cudf::convert_to_rows_fixed_width_optimized(table->view());
} else {
for (auto const& r : rows) {
cudf::lists_column_view const l(r->view());
auto out = spark_rapids_jni::convert_from_rows_fixed_width_optimized(l, schema);
auto out = cudf::convert_from_rows_fixed_width_optimized(l, schema);
}
}
});
Expand Down Expand Up @@ -117,16 +117,16 @@ static void variable_or_fixed_width(nvbench::state& state)
}
}

auto rows = spark_rapids_jni::convert_to_rows(table->view());
auto rows = cudf::convert_to_rows(table->view());

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto new_rows = spark_rapids_jni::convert_to_rows(table->view());
auto new_rows = cudf::convert_to_rows(table->view());
if (direction == "to row") {
auto _rows = spark_rapids_jni::convert_to_rows(table->view());
auto _rows = cudf::convert_to_rows(table->view());
} else {
for (auto const& r : rows) {
cudf::lists_column_view const l(r->view());
auto out = spark_rapids_jni::convert_from_rows(l, schema);
auto out = cudf::convert_from_rows(l, schema);
}
}
});
Expand Down
15 changes: 7 additions & 8 deletions src/main/cpp/src/RowConversionJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@

#include "cudf_jni_apis.hpp"
#include "dtype_utils.hpp"
#include "row_conversion.hpp"

#include <cudf/row_conversion.hpp>

extern "C" {

Expand All @@ -31,7 +32,7 @@ Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRowsFixedWidthOptimized(
cudf::jni::auto_set_device(env);
cudf::table_view const* n_input_table = reinterpret_cast<cudf::table_view const*>(input_table);
std::vector<std::unique_ptr<cudf::column>> cols =
spark_rapids_jni::convert_to_rows_fixed_width_optimized(*n_input_table);
cudf::convert_to_rows_fixed_width_optimized(*n_input_table);
int const num_columns = cols.size();
cudf::jni::native_jlongArray outcol_handles(env, num_columns);
std::transform(cols.begin(), cols.end(), outcol_handles.begin(), [](auto& col) {
Expand All @@ -50,9 +51,8 @@ Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRows(JNIEnv* env, jclass
try {
cudf::jni::auto_set_device(env);
cudf::table_view const* n_input_table = reinterpret_cast<cudf::table_view const*>(input_table);
std::vector<std::unique_ptr<cudf::column>> cols =
spark_rapids_jni::convert_to_rows(*n_input_table);
int const num_columns = cols.size();
std::vector<std::unique_ptr<cudf::column>> cols = cudf::convert_to_rows(*n_input_table);
int const num_columns = cols.size();
cudf::jni::native_jlongArray outcol_handles(env, num_columns);
std::transform(cols.begin(), cols.end(), outcol_handles.begin(), [](auto& col) {
return cudf::jni::release_as_jlong(col);
Expand Down Expand Up @@ -84,7 +84,7 @@ Java_com_nvidia_spark_rapids_jni_RowConversion_convertFromRowsFixedWidthOptimize
std::back_inserter(types_vec),
[](jint type, jint scale) { return cudf::jni::make_data_type(type, scale); });
std::unique_ptr<cudf::table> result =
spark_rapids_jni::convert_from_rows_fixed_width_optimized(list_input, types_vec);
cudf::convert_from_rows_fixed_width_optimized(list_input, types_vec);
return cudf::jni::convert_table_for_return(env, result);
}
CATCH_STD(env, 0);
Expand All @@ -110,8 +110,7 @@ JNIEXPORT jlongArray JNICALL Java_com_nvidia_spark_rapids_jni_RowConversion_conv
n_scale.begin(),
std::back_inserter(types_vec),
[](jint type, jint scale) { return cudf::jni::make_data_type(type, scale); });
std::unique_ptr<cudf::table> result =
spark_rapids_jni::convert_from_rows(list_input, types_vec);
std::unique_ptr<cudf::table> result = cudf::convert_from_rows(list_input, types_vec);
return cudf::jni::convert_table_for_return(env, result);
}
CATCH_STD(env, 0);
Expand Down
19 changes: 11 additions & 8 deletions src/main/cpp/src/bloom_filter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@

#include <thrust/logical.h>

#include <cuda/functional>

#include <byteswap.h>

namespace spark_rapids_jni {
Expand Down Expand Up @@ -316,14 +318,15 @@ std::unique_ptr<cudf::list_scalar> bloom_filter_merge(cudf::column_view const& b
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(0) + num_words,
dst,
[src, num_buffers = bloom_filters.size(), stride = buf_size] __device__(
cudf::size_type word_index) {
cudf::bitmask_type out = (reinterpret_cast<cudf::bitmask_type const*>(src))[word_index];
for (auto idx = 1; idx < num_buffers; idx++) {
out |= (reinterpret_cast<cudf::bitmask_type const*>(src + idx * stride))[word_index];
}
return out;
});
cuda::proclaim_return_type<cudf::bitmask_type>(
[src, num_buffers = bloom_filters.size(), stride = buf_size] __device__(
cudf::size_type word_index) {
cudf::bitmask_type out = (reinterpret_cast<cudf::bitmask_type const*>(src))[word_index];
for (auto idx = 1; idx < num_buffers; idx++) {
out |= (reinterpret_cast<cudf::bitmask_type const*>(src + idx * stride))[word_index];
}
return out;
}));

// create the 1-row list column and move it into a scalar.
return std::make_unique<cudf::list_scalar>(
Expand Down
192 changes: 99 additions & 93 deletions src/main/cpp/src/datetime_rebase.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
#include <thrust/iterator/counting_iterator.h>
#include <thrust/transform.h>

#include <cuda/functional>

namespace {

// Convert a date in Julian calendar to the number of days since epoch.
Expand Down Expand Up @@ -73,28 +75,29 @@ std::unique_ptr<cudf::column> gregorian_to_julian_days(cudf::column_view const&
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.size()),
output->mutable_view().begin<cudf::timestamp_D>(),
[d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
auto constexpr julian_end = cuda::std::chrono::year_month_day{
cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{4}};
auto constexpr gregorian_start = cuda::std::chrono::year_month_day{
cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{15}};

auto const days_ts = d_input[idx].time_since_epoch().count();
auto const days_since_epoch = cuda::std::chrono::sys_days(cudf::duration_D{days_ts});

// Convert the input into local date in Proleptic Gregorian calendar.
auto const ymd = cuda::std::chrono::year_month_day(days_since_epoch);
if (ymd > julian_end && ymd < gregorian_start) {
// This is the same as rebasing from the local date given at `gregorian_start`.
return cudf::timestamp_D{cudf::duration_D{-141427}};
}

// No change since this time.
if (ymd >= gregorian_start) { return d_input[idx]; }

// Reinterpret year/month/day as in Julian calendar then compute the days since epoch.
return cudf::timestamp_D{cudf::duration_D{days_from_julian(ymd)}};
});
cuda::proclaim_return_type<cudf::timestamp_D>(
[d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
auto constexpr julian_end = cuda::std::chrono::year_month_day{
cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{4}};
auto constexpr gregorian_start = cuda::std::chrono::year_month_day{
cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{15}};

auto const days_ts = d_input[idx].time_since_epoch().count();
auto const days_since_epoch = cuda::std::chrono::sys_days(cudf::duration_D{days_ts});

// Convert the input into local date in Proleptic Gregorian calendar.
auto const ymd = cuda::std::chrono::year_month_day(days_since_epoch);
if (ymd > julian_end && ymd < gregorian_start) {
// This is the same as rebasing from the local date given at `gregorian_start`.
return cudf::timestamp_D{cudf::duration_D{-141427}};
}

// No change since this time.
if (ymd >= gregorian_start) { return d_input[idx]; }

// Reinterpret year/month/day as in Julian calendar then compute the days since epoch.
return cudf::timestamp_D{cudf::duration_D{days_from_julian(ymd)}};
}));

return output;
}
Expand Down Expand Up @@ -142,19 +145,20 @@ std::unique_ptr<cudf::column> julian_to_gregorian_days(cudf::column_view const&
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.size()),
output->mutable_view().begin<cudf::timestamp_D>(),
[d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
auto const days_ts = d_input[idx].time_since_epoch().count();
if (days_ts >= -141427) { // Gregorian start day
return d_input[idx];
}

// Reinterpret year/month/day as in Gregorian calendar then compute the days
// since epoch.
auto const ymd = julian_from_days(days_ts);
auto const result =
cuda::std::chrono::local_days{ymd}.time_since_epoch().count();
return cudf::timestamp_D{cudf::duration_D{result}};
});
cuda::proclaim_return_type<cudf::timestamp_D>(
[d_input = input.begin<cudf::timestamp_D>()] __device__(auto const idx) {
auto const days_ts = d_input[idx].time_since_epoch().count();
if (days_ts >= -141427) { // Gregorian start day
return d_input[idx];
}

// Reinterpret year/month/day as in Gregorian calendar then compute the days
// since epoch.
auto const ymd = julian_from_days(days_ts);
auto const result =
cuda::std::chrono::local_days{ymd}.time_since_epoch().count();
return cudf::timestamp_D{cudf::duration_D{result}};
}));

return output;
}
Expand Down Expand Up @@ -242,39 +246,40 @@ std::unique_ptr<cudf::column> gregorian_to_julian_micros(cudf::column_view const
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.size()),
output->mutable_view().begin<cudf::timestamp_us>(),
[d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
// This timestamp corresponds to October 15th, 1582 UTC.
// After this day, there is no difference in microsecond values between Gregorian
// and Julian calendars.
int64_t constexpr last_switch_gregorian_ts = -12219292800000000L;

auto const micros_ts = d_input[idx].time_since_epoch().count();
if (micros_ts >= last_switch_gregorian_ts) { return d_input[idx]; }

// Convert the input into local date-time in Proleptic Gregorian calendar.
auto const days_since_epoch = cuda::std::chrono::sys_days(static_cast<cudf::duration_D>(
cuda::std::chrono::floor<cuda::std::chrono::days>(cudf::duration_us(micros_ts))));
auto const ymd = cuda::std::chrono::year_month_day(days_since_epoch);
auto const timeparts = get_time_components(micros_ts);

auto constexpr julian_end = cuda::std::chrono::year_month_day{
cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{4}};
auto constexpr gregorian_start = cuda::std::chrono::year_month_day{
cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{15}};

// Reinterpret the local date-time as in Julian calendar and compute microseconds since
// the epoch from that Julian local date-time.
// If the input date is outside of both calendars, consider it as it is a local date
// given at `gregorian_start` (-141427 Julian days since epoch).
auto const julian_days =
(ymd > julian_end && ymd < gregorian_start) ? -141427 : days_from_julian(ymd);
int64_t result = (julian_days * 24L * 3600L) + (timeparts.hour * 3600L) +
(timeparts.minute * 60L) + timeparts.second;
result *= MICROS_PER_SECOND; // to microseconds
result += timeparts.subsecond;

return cudf::timestamp_us{cudf::duration_us{result}};
});
cuda::proclaim_return_type<cudf::timestamp_us>(
[d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
// This timestamp corresponds to October 15th, 1582 UTC.
// After this day, there is no difference in microsecond values between Gregorian
// and Julian calendars.
int64_t constexpr last_switch_gregorian_ts = -12219292800000000L;

auto const micros_ts = d_input[idx].time_since_epoch().count();
if (micros_ts >= last_switch_gregorian_ts) { return d_input[idx]; }

// Convert the input into local date-time in Proleptic Gregorian calendar.
auto const days_since_epoch = cuda::std::chrono::sys_days(static_cast<cudf::duration_D>(
cuda::std::chrono::floor<cuda::std::chrono::days>(cudf::duration_us(micros_ts))));
auto const ymd = cuda::std::chrono::year_month_day(days_since_epoch);
auto const timeparts = get_time_components(micros_ts);

auto constexpr julian_end = cuda::std::chrono::year_month_day{
cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{4}};
auto constexpr gregorian_start = cuda::std::chrono::year_month_day{
cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{15}};

// Reinterpret the local date-time as in Julian calendar and compute microseconds since
// the epoch from that Julian local date-time.
// If the input date is outside of both calendars, consider it as it is a local date
// given at `gregorian_start` (-141427 Julian days since epoch).
auto const julian_days =
(ymd > julian_end && ymd < gregorian_start) ? -141427 : days_from_julian(ymd);
int64_t result = (julian_days * 24L * 3600L) + (timeparts.hour * 3600L) +
(timeparts.minute * 60L) + timeparts.second;
result *= MICROS_PER_SECOND; // to microseconds
result += timeparts.subsecond;

return cudf::timestamp_us{cudf::duration_us{result}};
}));

return output;
}
Expand Down Expand Up @@ -304,31 +309,32 @@ std::unique_ptr<cudf::column> julian_to_gregorian_micros(cudf::column_view const
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.size()),
output->mutable_view().begin<cudf::timestamp_us>(),
[d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
// This timestamp corresponds to October 15th, 1582 UTC.
// After this day, there is no difference in microsecond values between Gregorian
// and Julian calendars.
int64_t constexpr last_switch_gregorian_ts = -12219292800000000L;

auto const micros_ts = d_input[idx].time_since_epoch().count();
if (micros_ts >= last_switch_gregorian_ts) { return d_input[idx]; }

// Convert the input into local date-time in Julian calendar.
auto const days_since_epoch = cuda::std::chrono::sys_days(static_cast<cudf::duration_D>(
cuda::std::chrono::floor<cuda::std::chrono::days>(cudf::duration_us(micros_ts))));
auto const ymd = julian_from_days(days_since_epoch.time_since_epoch().count());
auto const timeparts = get_time_components(micros_ts);

// Reinterpret the local date-time as in Gregorian calendar and compute microseconds since
// the epoch from that Gregorian local date-time.
auto const gregorian_days = cuda::std::chrono::local_days(ymd).time_since_epoch().count();
int64_t result = (gregorian_days * 24L * 3600L) + (timeparts.hour * 3600L) +
(timeparts.minute * 60L) + timeparts.second;
result *= MICROS_PER_SECOND; // to microseconds
result += timeparts.subsecond;

return cudf::timestamp_us{cudf::duration_us{result}};
});
cuda::proclaim_return_type<cudf::timestamp_us>(
[d_input = input.begin<cudf::timestamp_us>()] __device__(auto const idx) {
// This timestamp corresponds to October 15th, 1582 UTC.
// After this day, there is no difference in microsecond values between Gregorian
// and Julian calendars.
int64_t constexpr last_switch_gregorian_ts = -12219292800000000L;

auto const micros_ts = d_input[idx].time_since_epoch().count();
if (micros_ts >= last_switch_gregorian_ts) { return d_input[idx]; }

// Convert the input into local date-time in Julian calendar.
auto const days_since_epoch = cuda::std::chrono::sys_days(static_cast<cudf::duration_D>(
cuda::std::chrono::floor<cuda::std::chrono::days>(cudf::duration_us(micros_ts))));
auto const ymd = julian_from_days(days_since_epoch.time_since_epoch().count());
auto const timeparts = get_time_components(micros_ts);

// Reinterpret the local date-time as in Gregorian calendar and compute microseconds since
// the epoch from that Gregorian local date-time.
auto const gregorian_days = cuda::std::chrono::local_days(ymd).time_since_epoch().count();
int64_t result = (gregorian_days * 24L * 3600L) + (timeparts.hour * 3600L) +
(timeparts.minute * 60L) + timeparts.second;
result *= MICROS_PER_SECOND; // to microseconds
result += timeparts.subsecond;

return cudf::timestamp_us{cudf::duration_us{result}};
}));

return output;
}
Expand Down
Loading

0 comments on commit 763406c

Please sign in to comment.