Skip to content

Commit

Permalink
Merge branch 'branch-24.08' of github.com:rapidsai/cudf into pylibcud…
Browse files Browse the repository at this point in the history
…f-io-writers
  • Loading branch information
lithomas1 committed Jun 12, 2024
2 parents c54316e + 0891c5d commit dc93356
Show file tree
Hide file tree
Showing 319 changed files with 3,837 additions and 2,931 deletions.
25 changes: 15 additions & 10 deletions .github/workflows/external_issue_labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,36 +20,41 @@ on:
types:
- opened

pull_request:
pull_request_target:
types:
- opened

env:
GITHUB_TOKEN: ${{ github.token }}

permissions:
issues: write
pull-requests: write

jobs:
Label-Issue:
runs-on: ubuntu-latest
# Only run if the issue author is not part of RAPIDS
if: ${{ ! contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.issue.author_association)}}
permissions:
issues: write
if: github.event_name == 'issues'
steps:
- name: add-external-labels
# Only run if the issue author is not part of RAPIDS
if: ${{ ! contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.issue.author_association)}}
run: |
echo ${{ github.event.issue.author_association }}
issue_url=${{ github.event.issue.html_url }}
gh issue edit ${issue_url} --add-label "External"
continue-on-error: true

Label-PR:
runs-on: ubuntu-latest
# Only run if the issue author is not part of RAPIDS
if: ${{ ! contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.pull_request.author_association)}}
permissions:
pull-requests: write
issues: write
if: github.event_name == 'pull_request_target'
steps:
- name: add-external-labels
# Only run if the issue author is not part of RAPIDS
if: ${{ ! contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.pull_request.author_association)}}
run: |
echo ${{ github.event.pull_request.author_association }}
pr_url=${{ github.event.pull_request.html_url }}
gh issue edit ${pr_url} --add-label "External"
continue-on-error: true
continue-on-error: true
2 changes: 1 addition & 1 deletion .github/workflows/pr_issue_status_automation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:

update-sprint:
# This job sets the PR and its linked issues to the current "Weekly Sprint"
uses: jarmak-nv/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-24.08
uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-24.08
if: github.event.pull_request.state == 'open'
needs: get-project-id
with:
Expand Down
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,20 @@ repos:
- id: clang-format
types_or: [c, c++, cuda]
args: ["-fallback-style=none", "-style=file", "-i"]
exclude: |
(?x)^(
^cpp/src/io/parquet/ipc/Schema_generated.h|
^cpp/src/io/parquet/ipc/Message_generated.h|
^cpp/include/cudf_test/cxxopts.hpp|
)
- repo: https://github.com/sirosen/texthooks
rev: 0.6.6
hooks:
- id: fix-smartquotes
exclude: |
(?x)^(
^cpp/src/io/parquet/ipc/Schema_generated.h|
^cpp/src/io/parquet/ipc/Message_generated.h|
^cpp/include/cudf_test/cxxopts.hpp|
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*|
^python/cudf/cudf/tests/text/test_text_methods.py
Expand Down
4 changes: 1 addition & 3 deletions ci/build_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@ export CMAKE_GENERATOR=Ninja

rapids-print-env

version=$(rapids-generate-version)

rapids-logger "Begin cpp build"

# With boa installed conda build forward to boa
RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \
RAPIDS_PACKAGE_VERSION=$(rapids-generate-version) rapids-conda-retry mambabuild \
conda/recipes/libcudf

rapids-upload-conda-to-s3 cpp
2 changes: 1 addition & 1 deletion ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ ENV_YAML_DIR="$(mktemp -d)"

rapids-dependency-file-generator \
--output conda \
--file_key docs \
--file-key docs \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml"

rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n docs
Expand Down
2 changes: 1 addition & 1 deletion ci/check_style.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ ENV_YAML_DIR="$(mktemp -d)"

rapids-dependency-file-generator \
--output conda \
--file_key checks \
--file-key checks \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml"

rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n checks
Expand Down
2 changes: 1 addition & 1 deletion ci/configure_cpp_static.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ REQUIREMENTS_FILE="${ENV_YAML_DIR}/requirements.txt"

rapids-dependency-file-generator \
--output requirements \
--file_key test_static_build \
--file-key test_static_build \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee "${REQUIREMENTS_FILE}"

python -m pip install -r "${REQUIREMENTS_FILE}"
Expand Down
2 changes: 1 addition & 1 deletion ci/test_cpp_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ ENV_YAML_DIR="$(mktemp -d)"

rapids-dependency-file-generator \
--output conda \
--file_key test_cpp \
--file-key test_cpp \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee "${ENV_YAML_DIR}/env.yaml"

rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n test
Expand Down
2 changes: 1 addition & 1 deletion ci/test_java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ ENV_YAML_DIR="$(mktemp -d)"

rapids-dependency-file-generator \
--output conda \
--file_key test_java \
--file-key test_java \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee "${ENV_YAML_DIR}/env.yaml"

rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n test
Expand Down
2 changes: 1 addition & 1 deletion ci/test_notebooks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ ENV_YAML_DIR="$(mktemp -d)"

rapids-dependency-file-generator \
--output conda \
--file_key test_notebooks \
--file-key test_notebooks \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml"

rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n test
Expand Down
2 changes: 1 addition & 1 deletion ci/test_python_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ ENV_YAML_DIR="$(mktemp -d)"

rapids-dependency-file-generator \
--output conda \
--file_key test_python \
--file-key test_python \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml"

rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n test
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ add_library(
src/utilities/default_stream.cpp
src/utilities/linked_column.cpp
src/utilities/logger.cpp
src/utilities/pinned_memory.cpp
src/utilities/stacktrace.cpp
src/utilities/stream_pool.cpp
src/utilities/traits.cpp
Expand Down
13 changes: 7 additions & 6 deletions cpp/benchmarks/fixture/nvbench_fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
*/
#pragma once

#include <cudf/io/memory_resource.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/pinned_memory.hpp>

#include <rmm/cuda_device.hpp>
#include <rmm/mr/device/arena_memory_resource.hpp>
Expand Down Expand Up @@ -81,17 +81,18 @@ struct nvbench_base_fixture {
"\nExpecting: cuda, pool, async, arena, managed, or managed_pool");
}

inline rmm::host_async_resource_ref make_cuio_host_pinned()
inline rmm::host_device_async_resource_ref make_cuio_host_pinned()
{
static std::shared_ptr<rmm::mr::pinned_host_memory_resource> mr =
std::make_shared<rmm::mr::pinned_host_memory_resource>();
return *mr;
}

inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode)
inline rmm::host_device_async_resource_ref create_cuio_host_memory_resource(
std::string const& mode)
{
if (mode == "pinned") return make_cuio_host_pinned();
if (mode == "pinned_pool") return cudf::io::get_host_memory_resource();
if (mode == "pinned_pool") return cudf::get_pinned_memory_resource();
CUDF_FAIL("Unknown cuio_host_mem parameter: " + mode + "\nExpecting: pinned or pinned_pool");
}

Expand All @@ -112,14 +113,14 @@ struct nvbench_base_fixture {
rmm::mr::set_current_device_resource(mr.get());
std::cout << "RMM memory resource = " << rmm_mode << "\n";

cudf::io::set_host_memory_resource(create_cuio_host_memory_resource(cuio_host_mode));
cudf::set_pinned_memory_resource(create_cuio_host_memory_resource(cuio_host_mode));
std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n";
}

~nvbench_base_fixture()
{
// Ensure the the pool is freed before the CUDA context is destroyed:
cudf::io::set_host_memory_resource(this->make_cuio_host_pinned());
cudf::set_pinned_memory_resource(this->make_cuio_host_pinned());
}

std::shared_ptr<rmm::mr::device_memory_resource> mr;
Expand Down
12 changes: 12 additions & 0 deletions cpp/benchmarks/io/cuio_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/detail/utilities/logger.hpp>

#include <rmm/mr/pinned_host_memory_resource.hpp>
#include <rmm/resource_ref.hpp>

#include <unistd.h>

#include <cstdio>
Expand All @@ -28,6 +31,14 @@

temp_directory const cuio_source_sink_pair::tmpdir{"cudf_gbench"};

// Don't use cudf's pinned pool for the source data
rmm::host_async_resource_ref pinned_memory_resource()
{
static rmm::mr::pinned_host_memory_resource mr = rmm::mr::pinned_host_memory_resource{};

return mr;
}

std::string random_file_in_dir(std::string const& dir_path)
{
// `mkstemp` modifies the template in place
Expand All @@ -41,6 +52,7 @@ std::string random_file_in_dir(std::string const& dir_path)

cuio_source_sink_pair::cuio_source_sink_pair(io_type type)
: type{type},
pinned_buffer({pinned_memory_resource(), cudf::get_default_stream()}),
d_buffer{0, cudf::get_default_stream()},
file_name{random_file_in_dir(tmpdir.path())},
void_sink{cudf::io::data_sink::create()}
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/io/cuio_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include <cudf_test/file_utilities.hpp>

#include <cudf/detail/utilities/pinned_host_vector.hpp>
#include <cudf/detail/utilities/host_vector.hpp>
#include <cudf/io/data_sink.hpp>
#include <cudf/io/datasource.hpp>

Expand Down Expand Up @@ -79,7 +79,7 @@ class cuio_source_sink_pair {

io_type const type;
std::vector<char> h_buffer;
cudf::detail::pinned_host_vector<char> pinned_buffer;
cudf::detail::host_vector<char> pinned_buffer;
rmm::device_uvector<std::byte> d_buffer;
std::string const file_name;
std::unique_ptr<cudf::io::data_sink> void_sink;
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#include <benchmarks/io/nvbench_helpers.hpp>

#include <cudf/detail/utilities/stream_pool.hpp>
#include <cudf/io/memory_resource.hpp>
#include <cudf/io/parquet.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/pinned_memory.hpp>
#include <cudf/utilities/thread_pool.hpp>

#include <nvtx3/nvtx3.hpp>
Expand Down
10 changes: 5 additions & 5 deletions cpp/benchmarks/io/text/multibyte_split.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,7 +22,6 @@
#include <cudf_test/file_utilities.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/utilities/pinned_host_vector.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/io/text/data_chunk_source_factories.hpp>
#include <cudf/io/text/detail/bgzip_utils.hpp>
Expand Down Expand Up @@ -132,9 +131,10 @@ static void bench_multibyte_split(nvbench::state& state,

auto const delim_factor = static_cast<double>(delim_percent) / 100;
std::unique_ptr<cudf::io::datasource> datasource;
auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim);
auto host_input = std::vector<char>{};
auto host_pinned_input = cudf::detail::pinned_host_vector<char>{};
auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim);
auto host_input = std::vector<char>{};
auto host_pinned_input =
cudf::detail::make_pinned_vector_async<char>(0, cudf::get_default_stream());

if (source_type != data_chunk_source_type::device &&
source_type != data_chunk_source_type::host_pinned) {
Expand Down
7 changes: 5 additions & 2 deletions cpp/include/cudf/ast/expressions.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -478,7 +478,10 @@ class operation : public expression {
*
* @return Vector of operands
*/
std::vector<std::reference_wrapper<expression const>> get_operands() const { return operands; }
[[nodiscard]] std::vector<std::reference_wrapper<expression const>> get_operands() const
{
return operands;
}

/**
* @copydoc expression::accept
Expand Down
10 changes: 5 additions & 5 deletions cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
* @return string_view instance representing this element at this index
*/
template <typename T, CUDF_ENABLE_IF(std::is_same_v<T, string_view>)>
__device__ T element(size_type element_index) const noexcept
__device__ [[nodiscard]] T element(size_type element_index) const noexcept
{
size_type index = element_index + offset(); // account for this view's _offset
char const* d_strings = static_cast<char const*>(_data);
Expand Down Expand Up @@ -501,7 +501,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
* @return dictionary32 instance representing this element at this index
*/
template <typename T, CUDF_ENABLE_IF(std::is_same_v<T, dictionary32>)>
__device__ T element(size_type element_index) const noexcept
__device__ [[nodiscard]] T element(size_type element_index) const noexcept
{
size_type index = element_index + offset(); // account for this view's _offset
auto const indices = d_children[0];
Expand All @@ -519,7 +519,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
* @return numeric::fixed_point representing the element at this index
*/
template <typename T, CUDF_ENABLE_IF(cudf::is_fixed_point<T>())>
__device__ T element(size_type element_index) const noexcept
__device__ [[nodiscard]] T element(size_type element_index) const noexcept
{
using namespace numeric;
using rep = typename T::rep;
Expand Down Expand Up @@ -858,7 +858,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
*/
[[nodiscard]] __device__ device_span<column_device_view const> children() const noexcept
{
return device_span<column_device_view const>(d_children, _num_children);
return {d_children, static_cast<std::size_t>(_num_children)};
}

/**
Expand Down Expand Up @@ -1032,7 +1032,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
* @return Reference to the element at the specified index
*/
template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
__device__ T& element(size_type element_index) const noexcept
__device__ [[nodiscard]] T& element(size_type element_index) const noexcept
{
return data<T>()[element_index];
}
Expand Down
Loading

0 comments on commit dc93356

Please sign in to comment.