Skip to content

Commit

Permalink
Merge branch 'branch-24.08' into patch-7
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr authored May 29, 2024
2 parents 025bfcf + eafa570 commit a62580d
Show file tree
Hide file tree
Showing 83 changed files with 2,664 additions and 494 deletions.
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ repos:
- id: trailing-whitespace
exclude: |
(?x)^(
^cpp/cmake/thirdparty/patches/.*|
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- id: end-of-file-fixer
exclude: |
(?x)^(
^cpp/cmake/thirdparty/patches/.*|
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- repo: https://github.com/PyCQA/isort
Expand Down
16 changes: 6 additions & 10 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,23 +46,19 @@ pushd docs/cudf
make dirhtml
mkdir -p "${RAPIDS_DOCS_DIR}/cudf/html"
mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/cudf/html"
if [[ "${RAPIDS_BUILD_TYPE}" != "pull-request" ]]; then
make text
mkdir -p "${RAPIDS_DOCS_DIR}/cudf/txt"
mv build/text/* "${RAPIDS_DOCS_DIR}/cudf/txt"
fi
make text
mkdir -p "${RAPIDS_DOCS_DIR}/cudf/txt"
mv build/text/* "${RAPIDS_DOCS_DIR}/cudf/txt"
popd

rapids-logger "Build dask-cuDF Sphinx docs"
pushd docs/dask_cudf
make dirhtml
mkdir -p "${RAPIDS_DOCS_DIR}/dask-cudf/html"
mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/dask-cudf/html"
if [[ "${RAPIDS_BUILD_TYPE}" != "pull-request" ]]; then
make text
mkdir -p "${RAPIDS_DOCS_DIR}/dask-cudf/txt"
mv build/text/* "${RAPIDS_DOCS_DIR}/dask-cudf/txt"
fi
make text
mkdir -p "${RAPIDS_DOCS_DIR}/dask-cudf/txt"
mv build/text/* "${RAPIDS_DOCS_DIR}/dask-cudf/txt"
popd

rapids-upload-docs
9 changes: 9 additions & 0 deletions ci/run_cudf_kafka_pytests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -euo pipefail

# Support invoking run_cudf_kafka_pytests.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cudf_kafka/cudf_kafka

pytest --cache-clear "$@" tests
2 changes: 1 addition & 1 deletion ci/run_custreamz_pytests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

set -euo pipefail

# It is essential to cd into python/cudf/cudf as `pytest-xdist` + `coverage` seem to work only at this directory level.
# It is essential to cd into python/custreamz/custreamz/ as `pytest-xdist` + `coverage` seem to work only at this directory level.

# Support invoking run_custreamz_pytests.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/custreamz/custreamz/
Expand Down
2 changes: 1 addition & 1 deletion ci/run_dask_cudf_pytests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

set -euo pipefail

# It is essential to cd into python/cudf/cudf as `pytest-xdist` + `coverage` seem to work only at this directory level.
# It is essential to cd into python/dask_cudf/dask_cudf/ as `pytest-xdist` + `coverage` seem to work only at this directory level.

# Support invoking run_dask_cudf_pytests.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/dask_cudf/dask_cudf/
Expand Down
4 changes: 4 additions & 0 deletions ci/test_python_other.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ DASK_DATAFRAME__QUERY_PLANNING=False ./ci/run_dask_cudf_pytests.sh \
--dist=loadscope \
.

rapids-logger "pytest cudf_kafka"
./ci/run_cudf_kafka_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-kafka.xml"

rapids-logger "pytest custreamz"
./ci/run_custreamz_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-custreamz.xml" \
Expand Down
3 changes: 2 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -360,11 +360,12 @@ add_library(
src/hash/xxhash_64.cu
src/interop/dlpack.cpp
src/interop/from_arrow.cu
src/interop/arrow_utilities.cpp
src/interop/to_arrow.cu
src/interop/to_arrow_device.cu
src/interop/from_arrow_device.cu
src/interop/from_arrow_host.cu
src/interop/to_arrow_schema.cpp
src/interop/to_arrow_utilities.cpp
src/interop/detail/arrow_allocator.cpp
src/io/avro/avro.cpp
src/io/avro/avro_gpu.cu
Expand Down
17 changes: 0 additions & 17 deletions cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,12 @@
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/thread_pool.hpp>

#include <rmm/mr/device/pool_memory_resource.hpp>
#include <rmm/mr/pinned_host_memory_resource.hpp>
#include <rmm/resource_ref.hpp>

#include <nvtx3/nvtx3.hpp>

#include <nvbench/nvbench.cuh>

#include <vector>

// TODO: remove this once pinned/pooled is enabled by default in cuIO
void set_cuio_host_pinned_pool()
{
using host_pooled_mr = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
static std::shared_ptr<host_pooled_mr> mr = std::make_shared<host_pooled_mr>(
std::make_shared<rmm::mr::pinned_host_memory_resource>().get(), 256ul * 1024 * 1024);
cudf::io::set_host_memory_resource(*mr);
}

size_t get_num_reads(nvbench::state const& state) { return state.get_int64("num_threads"); }

size_t get_read_size(nvbench::state const& state)
Expand Down Expand Up @@ -105,8 +92,6 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state,
size_t const data_size = state.get_int64("total_data_size");
auto const num_threads = state.get_int64("num_threads");

set_cuio_host_pinned_pool();

auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads);
cudf::detail::thread_pool threads(num_threads);

Expand Down Expand Up @@ -186,8 +171,6 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state,
size_t const input_limit = state.get_int64("input_limit");
size_t const output_limit = state.get_int64("output_limit");

set_cuio_host_pinned_pool();

auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads);
cudf::detail::thread_pool threads(num_threads);
auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types);
Expand Down
22 changes: 10 additions & 12 deletions cpp/benchmarks/join/distinct_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,16 @@ template <typename Key, bool Nullable>
void distinct_inner_join(nvbench::state& state,
nvbench::type_list<Key, nvbench::enum_type<Nullable>>)
{
auto join = [](cudf::table_view const& build_input,
cudf::table_view const& probe_input,
cudf::null_equality compare_nulls,
rmm::cuda_stream_view stream) {
auto join = [](cudf::table_view const& probe_input,
cudf::table_view const& build_input,
cudf::null_equality compare_nulls) {
auto const has_nulls =
cudf::has_nested_nulls(build_input) || cudf::has_nested_nulls(probe_input)
? cudf::nullable_join::YES
: cudf::nullable_join::NO;
auto hj_obj = cudf::distinct_hash_join<cudf::has_nested::NO>{
build_input, probe_input, has_nulls, compare_nulls, stream};
return hj_obj.inner_join(stream);
build_input, probe_input, has_nulls, compare_nulls};
return hj_obj.inner_join();
};

BM_join<Key, Nullable>(state, join);
Expand All @@ -40,17 +39,16 @@ template <typename Key, bool Nullable>
void distinct_left_join(nvbench::state& state,
nvbench::type_list<Key, nvbench::enum_type<Nullable>>)
{
auto join = [](cudf::table_view const& build_input,
cudf::table_view const& probe_input,
cudf::null_equality compare_nulls,
rmm::cuda_stream_view stream) {
auto join = [](cudf::table_view const& probe_input,
cudf::table_view const& build_input,
cudf::null_equality compare_nulls) {
auto const has_nulls =
cudf::has_nested_nulls(build_input) || cudf::has_nested_nulls(probe_input)
? cudf::nullable_join::YES
: cudf::nullable_join::NO;
auto hj_obj = cudf::distinct_hash_join<cudf::has_nested::NO>{
build_input, probe_input, has_nulls, compare_nulls, stream};
return hj_obj.left_join(stream);
build_input, probe_input, has_nulls, compare_nulls};
return hj_obj.left_join();
};

BM_join<Key, Nullable>(state, join);
Expand Down
30 changes: 6 additions & 24 deletions cpp/benchmarks/join/join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,9 @@ void nvbench_inner_join(nvbench::state& state,
{
auto join = [](cudf::table_view const& left_input,
cudf::table_view const& right_input,
cudf::null_equality compare_nulls,
rmm::cuda_stream_view stream) {
auto const has_nulls = cudf::has_nested_nulls(left_input) || cudf::has_nested_nulls(right_input)
? cudf::nullable_join::YES
: cudf::nullable_join::NO;
cudf::hash_join hj_obj(left_input, has_nulls, compare_nulls, stream);
return hj_obj.inner_join(right_input, std::nullopt, stream);
cudf::null_equality compare_nulls) {
return cudf::inner_join(left_input, right_input, compare_nulls);
};

BM_join<Key, Nullable>(state, join);
}

Expand All @@ -39,15 +33,9 @@ void nvbench_left_join(nvbench::state& state, nvbench::type_list<Key, nvbench::e
{
auto join = [](cudf::table_view const& left_input,
cudf::table_view const& right_input,
cudf::null_equality compare_nulls,
rmm::cuda_stream_view stream) {
auto const has_nulls = cudf::has_nested_nulls(left_input) || cudf::has_nested_nulls(right_input)
? cudf::nullable_join::YES
: cudf::nullable_join::NO;
cudf::hash_join hj_obj(left_input, has_nulls, compare_nulls, stream);
return hj_obj.left_join(right_input, std::nullopt, stream);
cudf::null_equality compare_nulls) {
return cudf::left_join(left_input, right_input, compare_nulls);
};

BM_join<Key, Nullable>(state, join);
}

Expand All @@ -56,15 +44,9 @@ void nvbench_full_join(nvbench::state& state, nvbench::type_list<Key, nvbench::e
{
auto join = [](cudf::table_view const& left_input,
cudf::table_view const& right_input,
cudf::null_equality compare_nulls,
rmm::cuda_stream_view stream) {
auto const has_nulls = cudf::has_nested_nulls(left_input) || cudf::has_nested_nulls(right_input)
? cudf::nullable_join::YES
: cudf::nullable_join::NO;
cudf::hash_join hj_obj(left_input, has_nulls, compare_nulls, stream);
return hj_obj.full_join(right_input, std::nullopt, stream);
cudf::null_equality compare_nulls) {
return cudf::full_join(left_input, right_input, compare_nulls);
};

BM_join<Key, Nullable>(state, join);
}

Expand Down
9 changes: 3 additions & 6 deletions cpp/benchmarks/join/join_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,30 +178,27 @@ void BM_join(state_type& state, Join JoinFunc)
}
}
if constexpr (std::is_same_v<state_type, nvbench::state> and (join_type != join_t::CONDITIONAL)) {
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
if constexpr (join_type == join_t::MIXED) {
auto const col_ref_left_0 = cudf::ast::column_reference(0);
auto const col_ref_right_0 =
cudf::ast::column_reference(0, cudf::ast::table_reference::RIGHT);
auto left_zero_eq_right_zero =
cudf::ast::operation(cudf::ast::ast_operator::EQUAL, col_ref_left_0, col_ref_right_0);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
rmm::cuda_stream_view stream_view{launch.get_stream()};
auto result = JoinFunc(left_table.select(columns_to_join),
right_table.select(columns_to_join),
left_table.select({1}),
right_table.select({1}),
left_zero_eq_right_zero,
cudf::null_equality::UNEQUAL,
stream_view);
cudf::null_equality::UNEQUAL);
});
}
if constexpr (join_type == join_t::HASH) {
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
rmm::cuda_stream_view stream_view{launch.get_stream()};
auto result = JoinFunc(left_table.select(columns_to_join),
right_table.select(columns_to_join),
cudf::null_equality::UNEQUAL,
stream_view);
cudf::null_equality::UNEQUAL);
});
}
}
Expand Down
15 changes: 5 additions & 10 deletions cpp/benchmarks/join/mixed_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ void nvbench_mixed_inner_join(nvbench::state& state,
cudf::table_view const& left_conditional_input,
cudf::table_view const& right_conditional_input,
cudf::ast::operation binary_pred,
cudf::null_equality compare_nulls,
rmm::cuda_stream_view stream) {
cudf::null_equality compare_nulls) {
return cudf::mixed_inner_join(left_equality_input,
right_equality_input,
left_conditional_input,
Expand All @@ -47,8 +46,7 @@ void nvbench_mixed_left_join(nvbench::state& state,
cudf::table_view const& left_conditional_input,
cudf::table_view const& right_conditional_input,
cudf::ast::operation binary_pred,
cudf::null_equality compare_nulls,
rmm::cuda_stream_view stream) {
cudf::null_equality compare_nulls) {
return cudf::mixed_left_join(left_equality_input,
right_equality_input,
left_conditional_input,
Expand All @@ -69,8 +67,7 @@ void nvbench_mixed_full_join(nvbench::state& state,
cudf::table_view const& left_conditional_input,
cudf::table_view const& right_conditional_input,
cudf::ast::operation binary_pred,
cudf::null_equality compare_nulls,
rmm::cuda_stream_view stream) {
cudf::null_equality compare_nulls) {
return cudf::mixed_full_join(left_equality_input,
right_equality_input,
left_conditional_input,
Expand All @@ -91,8 +88,7 @@ void nvbench_mixed_left_semi_join(nvbench::state& state,
cudf::table_view const& left_conditional_input,
cudf::table_view const& right_conditional_input,
cudf::ast::operation binary_pred,
cudf::null_equality compare_nulls,
rmm::cuda_stream_view stream) {
cudf::null_equality compare_nulls) {
return cudf::mixed_left_semi_join(left_equality_input,
right_equality_input,
left_conditional_input,
Expand All @@ -113,8 +109,7 @@ void nvbench_mixed_left_anti_join(nvbench::state& state,
cudf::table_view const& left_conditional_input,
cudf::table_view const& right_conditional_input,
cudf::ast::operation binary_pred,
cudf::null_equality compare_nulls,
rmm::cuda_stream_view stream) {
cudf::null_equality compare_nulls) {
return cudf::mixed_left_anti_join(left_equality_input,
right_equality_input,
left_conditional_input,
Expand Down
20 changes: 20 additions & 0 deletions cpp/cmake/thirdparty/patches/cccl_override.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
"issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue.",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/revert_pr_211_cccl_2.5.0.diff",
"issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue.",
"fixed_in" : ""
},
{
"file": "cccl/kernel_pointer_hiding.diff",
"issue": "Hide APIs that accept kernel pointers [https://github.com/NVIDIA/cccl/pull/1395]",
Expand All @@ -28,15 +33,30 @@
"issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_disable_64bit_dispatching_cccl_2.5.0.diff",
"issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_faster_sort_compile_times.diff",
"issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_faster_sort_compile_times_cccl_2.5.0.diff",
"issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff",
"issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_faster_scan_compile_times_cccl_2.5.0.diff",
"issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]",
"fixed_in" : ""
}
]
}
Expand Down
Loading

0 comments on commit a62580d

Please sign in to comment.