Skip to content

Commit

Permalink
Merge branch 'branch-24.08' into bug/astype/empty
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar authored Jul 1, 2024
2 parents becbe4f + 599ce95 commit a2d9bb0
Show file tree
Hide file tree
Showing 127 changed files with 3,921 additions and 1,977 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
- docs-build
- wheel-build-cudf
- wheel-tests-cudf
- test-cudf-polars
- wheel-build-dask-cudf
- wheel-tests-dask-cudf
- devcontainer
Expand Down Expand Up @@ -132,6 +133,17 @@ jobs:
with:
build_type: pull-request
script: ci/test_wheel_cudf.sh
test-cudf-polars:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
# This always runs, but only fails if this PR touches code in
# pylibcudf or cudf_polars
script: "ci/test_cudf_polars.sh"
wheel-build-dask-cudf:
needs: wheel-build-cudf
secrets: inherit
Expand Down
13 changes: 6 additions & 7 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,14 @@ for a minimal build of libcudf without using conda are also listed below.

Compilers:

* `gcc` version 9.3+
* `nvcc` version 11.5+
* `cmake` version 3.26.4+
* `gcc` version 11.4+
* `nvcc` version 11.8+
* `cmake` version 3.29.6+

CUDA/GPU:
CUDA/GPU Runtime:

* CUDA 11.5+
* NVIDIA driver 450.80.02+
* Volta architecture or better (Compute Capability >=7.0)
* CUDA 11.4+
* Volta architecture or better ([Compute Capability](https://docs.nvidia.com/deploy/cuda-compatibility/) >=7.0)

You can obtain CUDA from
[https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads).
Expand Down
68 changes: 68 additions & 0 deletions ci/test_cudf_polars.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -eou pipefail

# We will only fail these tests if the PR touches code in pylibcudf
# or cudf_polars itself.
# Note, the three dots mean we are doing diff between the merge-base
# of upstream and HEAD. So this is asking, "does _this branch_ touch
# files in cudf_polars/pylibcudf", rather than "are there changes
# between upstream and this branch which touch cudf_polars/pylibcudf"
# TODO: is the target branch exposed anywhere in an environment variable?
if [ -n "$(git diff --name-only origin/branch-24.08...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ];
then
HAS_CHANGES=1
else
HAS_CHANGES=0
fi

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist

RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
mkdir -p "${RAPIDS_TESTS_DIR}"

rapids-logger "Install cudf wheel"
# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/cudf*.whl)[test]

rapids-logger "Install polars (allow pre-release versions)"
python -m pip install 'polars>=1.0.0a0'

rapids-logger "Install cudf_polars"
python -m pip install --no-deps python/cudf_polars

rapids-logger "Run cudf_polars tests"

function set_exitcode()
{
EXITCODE=$?
}
EXITCODE=0
trap set_exitcode ERR
set +e

python -m pytest \
--cache-clear \
--cov cudf_polars \
--cov-fail-under=100 \
--cov-config=python/cudf_polars/pyproject.toml \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf_polars.xml" \
python/cudf_polars/tests

trap ERR
set -e

if [ ${EXITCODE} != 0 ]; then
rapids-logger "Testing FAILED: exitcode ${EXITCODE}"
else
rapids-logger "Testing PASSED"
fi

if [ ${HAS_CHANGES} == 1 ]; then
exit ${EXITCODE}
else
exit 0
fi
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,7 @@ add_library(
src/unary/math_ops.cu
src/unary/nan_ops.cu
src/unary/null_ops.cu
src/utilities/cuda_memcpy.cu
src/utilities/default_stream.cpp
src/utilities/linked_column.cpp
src/utilities/logger.cpp
Expand Down
50 changes: 42 additions & 8 deletions cpp/benchmarks/io/parquet/parquet_reader_input.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -59,20 +59,18 @@ void parquet_read_common(cudf::size_type num_rows_to_read,
}

template <data_type DataType>
void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
void BM_parquet_read_data_common(nvbench::state& state,
data_profile const& profile,
nvbench::type_list<nvbench::enum_type<DataType>>)
{
auto const d_type = get_type_or_group(static_cast<int32_t>(DataType));
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const run_length = static_cast<cudf::size_type>(state.get_int64("run_length"));
auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
auto const compression = cudf::io::compression_type::SNAPPY;
cuio_source_sink_pair source_sink(source_type);

auto const num_rows_written = [&]() {
auto const tbl = create_random_table(
cycle_dtypes(d_type, num_cols),
table_size_bytes{data_size},
data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
auto const tbl =
create_random_table(cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, profile);
auto const view = tbl->view();

cudf::io::parquet_writer_options write_opts =
Expand All @@ -85,6 +83,32 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enu
parquet_read_common(num_rows_written, num_cols, source_sink, state);
}

template <data_type DataType>
void BM_parquet_read_data(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>> type_list)
{
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const run_length = static_cast<cudf::size_type>(state.get_int64("run_length"));
BM_parquet_read_data_common<DataType>(
state, data_profile_builder().cardinality(cardinality).avg_run_length(run_length), type_list);
}

template <data_type DataType>
void BM_parquet_read_fixed_width_struct(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>> type_list)
{
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const run_length = static_cast<cudf::size_type>(state.get_int64("run_length"));
std::vector<cudf::type_id> s_types{
cudf::type_id::INT32, cudf::type_id::FLOAT32, cudf::type_id::INT64};
BM_parquet_read_data_common<DataType>(state,
data_profile_builder()
.cardinality(cardinality)
.avg_run_length(run_length)
.struct_types(s_types),
type_list);
}

void BM_parquet_read_io_compression(nvbench::state& state)
{
auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
Expand Down Expand Up @@ -247,3 +271,13 @@ NVBENCH_BENCH(BM_parquet_read_io_small_mixed)
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32})
.add_int64_axis("num_string_cols", {1, 2, 3});

// a benchmark for structs that only contain fixed-width types
using d_type_list_struct_only = nvbench::enum_type_list<data_type::STRUCT>;
NVBENCH_BENCH_TYPES(BM_parquet_read_fixed_width_struct, NVBENCH_TYPE_AXES(d_type_list_struct_only))
.set_name("parquet_read_fixed_width_struct")
.set_type_axes_names({"data_type"})
.add_string_axis("io_type", {"DEVICE_BUFFER"})
.set_min_samples(4)
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32});
60 changes: 34 additions & 26 deletions cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# libcudf C++ Developer Guide
# libcudf C++ Developer Guide {#DEVELOPER_GUIDE}

This document serves as a guide for contributors to libcudf C++ code. Developers should also refer
to these additional files for further documentation of libcudf best practices.
Expand Down Expand Up @@ -469,7 +469,7 @@ libcudf throws under different circumstances, see the [section on error handling

# libcudf API and Implementation

## Streams
## Streams {#streams}

libcudf is in the process of adding support for asynchronous execution using
CUDA streams. In order to facilitate the usage of streams, all new libcudf APIs
Expand All @@ -486,33 +486,37 @@ use only asynchronous versions of CUDA APIs with the stream parameter.

In order to make the `detail` API callable from other libcudf functions, it should be exposed in a
header placed in the `cudf/cpp/include/detail/` directory.
The declaration is not necessary if no other libcudf functions call the `detail` function.

For example:

```c++
// cpp/include/cudf/header.hpp
void external_function(...);
void external_function(...,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

// cpp/include/cudf/detail/header.hpp
namespace detail{
void external_function(..., rmm::cuda_stream_view stream)
void external_function(..., rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
} // namespace detail

// cudf/src/implementation.cpp
namespace detail{
// Use the stream parameter in the detail implementation.
void external_function(..., rmm::cuda_stream_view stream){
// Implementation uses the stream with async APIs.
rmm::device_buffer buff(...,stream);
CUDF_CUDA_TRY(cudaMemcpyAsync(...,stream.value()));
kernel<<<..., stream>>>(...);
thrust::algorithm(rmm::exec_policy(stream), ...);
}
// Use the stream parameter in the detail implementation.
void external_function(..., rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr){
// Implementation uses the stream with async APIs.
rmm::device_buffer buff(..., stream, mr);
CUDF_CUDA_TRY(cudaMemcpyAsync(...,stream.value()));
kernel<<<..., stream>>>(...);
thrust::algorithm(rmm::exec_policy(stream), ...);
}
} // namespace detail

void external_function(...){
CUDF_FUNC_RANGE(); // Generates an NVTX range for the lifetime of this function.
detail::external_function(..., cudf::get_default_stream());
void external_function(..., rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE(); // Generates an NVTX range for the lifetime of this function.
detail::external_function(..., stream, mr);
}
```
Expand Down Expand Up @@ -703,28 +707,28 @@ The preferred style for how inputs are passed in and outputs are returned is the
- `column_view const&`
- Tables:
- `table_view const&`
- Scalar:
- `scalar const&`
- Everything else:
- Trivial or inexpensively copied types
- Pass by value
- Non-trivial or expensive to copy types
- Pass by `const&`
- Scalar:
- `scalar const&`
- Everything else:
- Trivial or inexpensively copied types
- Pass by value
- Non-trivial or expensive to copy types
- Pass by `const&`
- In/Outs
- Columns:
- `mutable_column_view&`
- Tables:
- `mutable_table_view&`
- Everything else:
- Pass by via raw pointer
- Everything else:
- Pass by via raw pointer
- Outputs
- Outputs should be *returned*, i.e., no output parameters
- Columns:
- `std::unique_ptr<column>`
- Tables:
- `std::unique_ptr<table>`
- Scalars:
- `std::unique_ptr<scalar>`
- Scalars:
- `std::unique_ptr<scalar>`


### Multiple Return Values
Expand Down Expand Up @@ -908,6 +912,10 @@ functions that are specific to columns of Strings. These functions reside in the
namespace. Similarly, functionality used exclusively for unit testing is in the `cudf::test::`
namespace.

The public function is expected to contain a call to `CUDF_FUNC_RANGE()` followed by a call to
a `detail` function with same name and parameters as the public function.
See the [Streams](#streams) section for an example of this pattern.

### Internal

Many functions are not meant for public use, so place them in either the `detail` or an *anonymous*
Expand Down
50 changes: 50 additions & 0 deletions cpp/include/cudf/binaryop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,56 @@ enum class binary_operator : int32_t {
///< (null, false) is null, and (valid, valid) == LOGICAL_OR(valid, valid)
INVALID_BINARY ///< invalid operation
};

/// Binary operation common type default
template <typename L, typename R, typename = void>
struct binary_op_common_type {};

/// Binary operation common type specialization
template <typename L, typename R>
struct binary_op_common_type<L, R, std::enable_if_t<has_common_type_v<L, R>>> {
/// The common type of the template parameters
using type = std::common_type_t<L, R>;
};

/// Binary operation common type specialization
template <typename L, typename R>
struct binary_op_common_type<
L,
R,
std::enable_if_t<is_fixed_point<L>() && cuda::std::is_floating_point_v<R>>> {
/// The common type of the template parameters
using type = L;
};

/// Binary operation common type specialization
template <typename L, typename R>
struct binary_op_common_type<
L,
R,
std::enable_if_t<is_fixed_point<R>() && cuda::std::is_floating_point_v<L>>> {
/// The common type of the template parameters
using type = R;
};

/// Binary operation common type helper
template <typename L, typename R>
using binary_op_common_type_t = typename binary_op_common_type<L, R>::type;

namespace detail {
template <typename AlwaysVoid, typename L, typename R>
struct binary_op_has_common_type_impl : std::false_type {};

template <typename L, typename R>
struct binary_op_has_common_type_impl<std::void_t<binary_op_common_type_t<L, R>>, L, R>
: std::true_type {};
} // namespace detail

/// Checks if binary operation types have a common type
template <typename L, typename R>
constexpr inline bool binary_op_has_common_type_v =
detail::binary_op_has_common_type_impl<void, L, R>::value;

/**
* @brief Performs a binary operation between a scalar and a column.
*
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cudf/detail/copy_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/gather.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/cuda.cuh>
Expand Down Expand Up @@ -242,8 +242,8 @@ struct scatter_gather_functor {
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
auto output_column = cudf::detail::allocate_like(
input, output_size, cudf::mask_allocation_policy::RETAIN, stream, mr);
auto output_column =
cudf::allocate_like(input, output_size, cudf::mask_allocation_policy::RETAIN, stream, mr);
auto output = output_column->mutable_view();

bool has_valid = input.nullable();
Expand Down
Loading

0 comments on commit a2d9bb0

Please sign in to comment.