Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forward-merge branch-23.12 to branch-24.02 #14422

Merged
merged 18 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
9be4de5
Upgrade to nvCOMP 3.0.4 (#13815)
vuule Nov 10, 2023
87d2a36
Remove Cython libcpp wrappers (#14382)
vyasr Nov 10, 2023
04d13d8
Normalizing offsets iterator (#14234)
davidwendt Nov 13, 2023
4313cfa
Use new rapids-dask-dependency metapackage for managing dask versions…
vyasr Nov 13, 2023
5d09d38
Always build nvbench statically so we don't need to package it (#14399)
robertmaynard Nov 14, 2023
e982d37
cudf.pandas: cuDF subpath checking in module `__getattr__` (#14388)
shwina Nov 14, 2023
7f3fba1
Refactor cudf_kafka to use skbuild (#14292)
jdye64 Nov 14, 2023
b0c1b7b
Add BytePairEncoder class to cuDF (#13891)
davidwendt Nov 14, 2023
b446a6f
Fix token-count logic in nvtext::tokenize_with_vocabulary (#14393)
davidwendt Nov 14, 2023
8106a0c
Cleanup remaining usages of dask dependencies (#14407)
galipremsagar Nov 14, 2023
27b052d
Added streams to CSV reader and writer api (#14340)
shrshi Nov 14, 2023
330d389
Ensure nvbench initializes nvml context when built statically (#14411)
robertmaynard Nov 14, 2023
8a0a08f
Fix as_column(pd.Timestamp/Timedelta, length=) not respecting length …
mroeschke Nov 15, 2023
ab2248e
Fix and disable encoding for nanosecond statistics in ORC writer (#14…
vuule Nov 15, 2023
8deb3dd
Raise error in `reindex` when `index` is not unique (#14400)
galipremsagar Nov 15, 2023
9e7f8a5
Fix dask dependency in custreamz (#14420)
vyasr Nov 15, 2023
d56a70f
Merge branch-23.12 into branch-24.02
bdice Nov 16, 2023
e4e6975
Update cudf_kafka_version.
bdice Nov 16, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ fi
# build cudf_kafka Python package
if hasArg cudf_kafka; then
cd ${REPODIR}/python/cudf_kafka
SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR}" \
SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS}" \
SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL:-1}" \
python -m pip install --no-build-isolation --no-deps .
fi
Expand Down
2 changes: 2 additions & 0 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ fi

if [[ ${package_name} == "dask_cudf" ]]; then
sed -r -i "s/cudf==(.*)\"/cudf${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
sed -r -i "s/dask-cuda==(.*)\"/dask-cuda==\1${alpha_spec}\"/g" ${pyproject_file}
sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file}
else
sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file}
# ptxcompiler and cubinlinker aren't version constrained
Expand Down
2 changes: 2 additions & 0 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g'

# Python CMakeLists updates
sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt
sed_runner 's/'"cudf_kafka_version .*)"'/'"cudf_kafka_version ${NEXT_FULL_TAG})"'/g' python/cudf_kafka/CMakeLists.txt

# cpp libcudf_kafka update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt
Expand Down Expand Up @@ -80,6 +81,7 @@ DEPENDENCIES=(
kvikio
libkvikio
librmm
rapids-dask-dependency
rmm
)
for DEP in "${DEPENDENCIES[@]}"; do
Expand Down
3 changes: 0 additions & 3 deletions ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ manylinux="manylinux_${manylinux_version}"
RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install --no-deps ./local-cudf-dep/cudf*.whl

# Always install latest dask for testing
python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-24.02

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/dask_cudf*.whl)[test]

Expand Down
8 changes: 3 additions & 5 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,8 @@ dependencies:
- cudatoolkit
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
- dask-core>=2023.9.2
- cython>=3.0.3
- dask-cuda==24.2.*
- dask>=2023.9.2
- distributed>=2023.9.2
- dlpack>=0.5,<0.6.0a0
- doxygen=1.9.1
- fastavro>=0.22.9
Expand Down Expand Up @@ -60,7 +57,7 @@ dependencies:
- numpy>=1.21,<1.25
- numpydoc
- nvcc_linux-64=11.8
- nvcomp==2.6.1
- nvcomp==3.0.4
- nvtx>=0.2.1
- packaging
- pandas>=1.3,<1.6.0dev0
Expand All @@ -80,6 +77,7 @@ dependencies:
- python-snappy>=0.6.0
- python>=3.9,<3.11
- pytorch<1.12.0
- rapids-dask-dependency==24.2.*
- rich
- rmm==24.2.*
- s3fs>=2022.3.0
Expand Down
9 changes: 3 additions & 6 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ dependencies:
- cmake>=3.26.4
- cramjam
- cuda-cudart-dev
- cuda-gdb
- cuda-nvcc
- cuda-nvrtc-dev
- cuda-nvtx-dev
Expand All @@ -26,11 +25,8 @@ dependencies:
- cuda-version=12.0
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
- dask-core>=2023.9.2
- cython>=3.0.3
- dask-cuda==24.2.*
- dask>=2023.9.2
- distributed>=2023.9.2
- dlpack>=0.5,<0.6.0a0
- doxygen=1.9.1
- fastavro>=0.22.9
Expand Down Expand Up @@ -59,7 +55,7 @@ dependencies:
- numba>=0.57,<0.58
- numpy>=1.21,<1.25
- numpydoc
- nvcomp==2.6.1
- nvcomp==3.0.4
- nvtx>=0.2.1
- packaging
- pandas>=1.3,<1.6.0dev0
Expand All @@ -78,6 +74,7 @@ dependencies:
- python-snappy>=0.6.0
- python>=3.9,<3.11
- pytorch<1.12.0
- rapids-dask-dependency==24.2.*
- rich
- rmm==24.2.*
- s3fs>=2022.3.0
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ requirements:
host:
- protobuf ==4.24.*
- python
- cython >=3.0.0
- cython >=3.0.3
- scikit-build >=0.13.1
- setuptools
- dlpack >=0.5,<0.6.0a0
Expand Down
13 changes: 0 additions & 13 deletions conda/recipes/cudf_kafka/build.sh
Original file line number Diff line number Diff line change
@@ -1,16 +1,3 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

# This assumes the script is executed from the root of the repo directory
# Need to set CUDA_HOME inside conda environments because the hacked together
# setup.py for cudf-kafka searches that way.
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
CUDA_MAJOR=${RAPIDS_CUDA_VERSION%%.*}
if [[ ${CUDA_MAJOR} == "12" ]]; then
target_name="x86_64-linux"
if [[ ! $(arch) == "x86_64" ]]; then
target_name="sbsa-linux"
fi
export CUDA_HOME="${PREFIX}/targets/${target_name}/"
fi
./build.sh -v cudf_kafka
6 changes: 6 additions & 0 deletions conda/recipes/cudf_kafka/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@ sysroot_version:

cmake_version:
- ">=3.26.4"

cuda_compiler:
- cuda-nvcc

cuda11_compiler:
- nvcc
23 changes: 13 additions & 10 deletions conda/recipes/cudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,28 +33,31 @@ build:
- SCCACHE_S3_KEY_PREFIX=cudf-kafka-linux64 # [linux64]
- SCCACHE_S3_USE_SSL
- SCCACHE_S3_NO_CREDENTIALS
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
- RAPIDS_CUDA_VERSION
ignore_run_exports_from:
{% if cuda_major == "11" %}
- {{ compiler('cuda11') }}
{% endif %}

requirements:
build:
- cmake {{ cmake_version }}
- ninja
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
{% if cuda_major == "12" %}
- cuda-gdb
{% if cuda_major == "11" %}
- {{ compiler('cuda11') }} ={{ cuda_version }}
{% else %}
- {{ compiler('cuda') }}
{% endif %}
- cuda-version ={{ cuda_version }}
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- python
- cython >=3.0.0
- cython >=3.0.3
- cuda-version ={{ cuda_version }}
- cudf ={{ version }}
- libcudf_kafka ={{ version }}
- scikit-build >=0.13.1
- setuptools
{% if cuda_major == "12" %}
- cuda-cudart-dev
Expand Down
4 changes: 1 addition & 3 deletions conda/recipes/custreamz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,7 @@ requirements:
- streamz
- cudf ={{ version }}
- cudf_kafka ={{ version }}
- dask >=2023.9.2
- dask-core >=2023.9.2
- distributed >=2023.9.2
- rapids-dask-dependency ={{ minor_version }}
- python-confluent-kafka >=1.9.0,<1.10.0a0
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

Expand Down
8 changes: 1 addition & 7 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,11 @@ build:
requirements:
host:
- python
- cudf ={{ version }}
- dask >=2023.9.2
- dask-core >=2023.9.2
- distributed >=2023.9.2
- cuda-version ={{ cuda_version }}
run:
- python
- cudf ={{ version }}
- dask >=2023.9.2
- dask-core >=2023.9.2
- distributed >=2023.9.2
- rapids-dask-dependency ={{ minor_version }}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

test:
Expand Down
36 changes: 0 additions & 36 deletions conda/recipes/dask-cudf/run_test.sh

This file was deleted.

2 changes: 1 addition & 1 deletion conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ spdlog_version:
- ">=1.11.0,<1.12"

nvcomp_version:
- "=2.6.1"
- "=3.0.4"

zlib_version:
- ">=1.2.13"
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/text/vocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ static void bench_vocab_tokenize(nvbench::state& state)

auto const vocab_col = [] {
data_profile const profile = data_profile_builder().no_validity().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, 0, 5);
cudf::type_id::STRING, distribution_id::NORMAL, 0, 15);
auto const col = create_random_column(cudf::type_id::STRING, row_count{100}, profile);
return cudf::strings::filter_characters_of_type(
cudf::strings_column_view(col->view()),
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_nvbench.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function(find_and_configure_nvbench)
set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
rapids_cpm_package_override("${cudf_patch_dir}/nvbench_override.json")

rapids_cpm_nvbench()
rapids_cpm_nvbench(BUILD_STATIC)

endfunction()

Expand Down
4 changes: 2 additions & 2 deletions cpp/cmake/thirdparty/patches/nvbench_override.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
"fixed_in" : ""
},
{
"file" : "nvbench/use_existing_fmt.diff",
"issue" : "Fix add support for using an existing fmt [https://github.com/NVIDIA/nvbench/pull/125]",
"file" : "nvbench/nvml_with_static_builds.diff",
"issue" : "Add support for nvml with static nvbench [https://github.com/NVIDIA/nvbench/pull/148]",
"fixed_in" : ""
}
]
Expand Down
8 changes: 5 additions & 3 deletions cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include <cudf/column/column_view.hpp>
#include <cudf/detail/offsets_iterator.cuh>
#include <cudf/detail/utilities/alignment.hpp>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/lists/list_view.hpp>
Expand Down Expand Up @@ -442,10 +443,11 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
__device__ T element(size_type element_index) const noexcept
{
size_type index = element_index + offset(); // account for this view's _offset
auto const* d_offsets = d_children[strings_column_view::offsets_column_index].data<int32_t>();
char const* d_strings = d_children[strings_column_view::chars_column_index].data<char>();
size_type offset = d_offsets[index];
return string_view{d_strings + offset, d_offsets[index + 1] - offset};
auto const offsets = d_children[strings_column_view::offsets_column_index];
auto const itr = cudf::detail::input_offsetalator(offsets.head(), offsets.type());
auto const offset = itr[index];
return string_view{d_strings + offset, static_cast<cudf::size_type>(itr[index + 1] - offset)};
}

private:
Expand Down
Loading
Loading