Skip to content

Commit

Permalink
Merge branch 'branch-21.08' into bug/correct_unused_parameters_in_rol…
Browse files Browse the repository at this point in the history
…ling
  • Loading branch information
robertmaynard committed Jun 3, 2021
2 parents 6139dce + f24c6b4 commit 06e7787
Show file tree
Hide file tree
Showing 176 changed files with 7,272 additions and 1,886 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# cuDF 21.08.00 (Date TBD)

Please see https://github.com/rapidsai/cudf/releases/tag/v21.08.00a for the latest changes to this development branch.

# cuDF 21.06.00 (Date TBD)

Please see https://github.com/rapidsai/cudf/releases/tag/v21.06.00a for the latest changes to this development branch.
Expand Down
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,16 @@ Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapids

cuDF can be installed with conda ([miniconda](https://conda.io/miniconda.html), or the full [Anaconda distribution](https://www.anaconda.com/download)) from the `rapidsai` channel:

For `cudf version == 0.19.2` :
<<<<<<< HEAD
For `cudf version == 21.06` :
```bash
# for CUDA 10.1
# for CUDA 11.0
conda install -c rapidsai -c nvidia -c numba -c conda-forge \
cudf=0.19 python=3.7 cudatoolkit=10.1
cudf=21.06 python=3.7 cudatoolkit=11.0

# or, for CUDA 10.2
# or, for CUDA 11.2
conda install -c rapidsai -c nvidia -c numba -c conda-forge \
cudf=0.19 python=3.7 cudatoolkit=10.2
cudf=21.06 python=3.7 cudatoolkit=11.2

```

Expand Down
26 changes: 13 additions & 13 deletions ci/benchmark/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ function hasArg {
export PATH=/conda/bin:/usr/local/cuda/bin:$PATH
export PARALLEL_LEVEL=4
export CUDA_REL=${CUDA_VERSION%.*}
export HOME=$WORKSPACE
export HOME="$WORKSPACE"

# Parse git describe
cd $WORKSPACE
cd "$WORKSPACE"
export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`

# Set Benchmark Vars
export GBENCH_BENCHMARKS_DIR=${WORKSPACE}/cpp/build/gbenchmarks/
export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"

# Set `LIBCUDF_KERNEL_CACHE_PATH` environment variable to $HOME/.jitify-cache because
# it's local to the container's virtual file system, and not shared with other CI jobs
Expand Down Expand Up @@ -96,9 +96,9 @@ conda list --show-channel-urls

logger "Build libcudf..."
if [[ ${BUILD_MODE} == "pull-request" ]]; then
$WORKSPACE/build.sh clean libcudf cudf dask_cudf benchmarks tests --ptds
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests --ptds
else
$WORKSPACE/build.sh clean libcudf cudf dask_cudf benchmarks tests -l --ptds
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests -l --ptds
fi

################################################################################
Expand Down Expand Up @@ -144,9 +144,9 @@ function getReqs() {

REQS=$(getReqs "${LIBCUDF_DEPS[@]}")

mkdir -p ${WORKSPACE}/tmp/benchmark
touch ${WORKSPACE}/tmp/benchmark/benchmarks.txt
ls ${GBENCH_BENCHMARKS_DIR} > ${WORKSPACE}/tmp/benchmark/benchmarks.txt
mkdir -p "$WORKSPACE/tmp/benchmark"
touch "$WORKSPACE/tmp/benchmark/benchmarks.txt"
ls ${GBENCH_BENCHMARKS_DIR} > "$WORKSPACE/tmp/benchmark/benchmarks.txt"

#Disable error aborting while tests run, failed tests will not generate data
logger "Running libcudf GBenchmarks..."
Expand All @@ -161,13 +161,13 @@ do
rm ./${BENCH}.json
JOBEXITCODE=1
fi
done < ${WORKSPACE}/tmp/benchmark/benchmarks.txt
done < "$WORKSPACE/tmp/benchmark/benchmarks.txt"
set -e

rm ${WORKSPACE}/tmp/benchmark/benchmarks.txt
cd ${WORKSPACE}
mv ${GBENCH_BENCHMARKS_DIR}/*.json ${WORKSPACE}/tmp/benchmark/
python GBenchToASV.py -d ${WORKSPACE}/tmp/benchmark/ -t ${S3_ASV_DIR} -n libcudf -b branch-${MINOR_VERSION} -r "${REQS}"
rm "$WORKSPACE/tmp/benchmark/benchmarks.txt"
cd "$WORKSPACE"
mv ${GBENCH_BENCHMARKS_DIR}/*.json "$WORKSPACE/tmp/benchmark/"
python GBenchToASV.py -d "$WORKSPACE/tmp/benchmark/" -t ${S3_ASV_DIR} -n libcudf -b branch-${MINOR_VERSION} -r "${REQS}"

###
# Run Python Benchmarks
Expand Down
6 changes: 3 additions & 3 deletions ci/cpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}

# Set home to the job's workspace
export HOME=$WORKSPACE
export HOME="$WORKSPACE"

# Determine CUDA release version
export CUDA_REL=${CUDA_VERSION%.*}
Expand All @@ -21,10 +21,10 @@ export GPUCI_CONDA_RETRY_SLEEP=30

# Use Ninja to build, setup Conda Build Dir
export CMAKE_GENERATOR="Ninja"
export CONDA_BLD_DIR="${WORKSPACE}/.conda-bld"
export CONDA_BLD_DIR="$WORKSPACE/.conda-bld"

# Switch to project root; also root of repo checkout
cd $WORKSPACE
cd "$WORKSPACE"

# If nightly build, append current YYMMDD to version
if [[ "$BUILD_MODE" = "branch" && "$SOURCE_BRANCH" = branch-* ]] ; then
Expand Down
4 changes: 2 additions & 2 deletions ci/cpu/upload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ fi

gpuci_logger "Get conda file output locations"

export LIBCUDF_FILE=`conda build --no-build-id --croot ${WORKSPACE}/.conda-bld conda/recipes/libcudf --output`
export LIBCUDF_KAFKA_FILE=`conda build --no-build-id --croot ${WORKSPACE}/.conda-bld conda/recipes/libcudf_kafka --output`
export LIBCUDF_FILE=`conda build --no-build-id --croot "$WORKSPACE/.conda-bld" conda/recipes/libcudf --output`
export LIBCUDF_KAFKA_FILE=`conda build --no-build-id --croot "$WORKSPACE/.conda-bld" conda/recipes/libcudf_kafka --output`
export CUDF_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/cudf --python=$PYTHON --output`
export DASK_CUDF_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/dask-cudf --python=$PYTHON --output`
export CUDF_KAFKA_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON --output`
Expand Down
4 changes: 2 additions & 2 deletions ci/docs/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ if [ -z "$PROJECT_WORKSPACE" ]; then
exit 1
fi

export DOCS_WORKSPACE=$WORKSPACE/docs
export DOCS_WORKSPACE="$WORKSPACE/docs"
export PATH=/conda/bin:/usr/local/cuda/bin:$PATH
export HOME=$WORKSPACE
export HOME="$WORKSPACE"
export PROJECT_WORKSPACE=/rapids/cudf
export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
export PROJECTS=(cudf libcudf)
Expand Down
40 changes: 20 additions & 20 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}

# Set home to the job's workspace
export HOME=$WORKSPACE
export HOME="$WORKSPACE"

# Switch to project root; also root of repo checkout
cd $WORKSPACE
cd "$WORKSPACE"

# Determine CUDA release version
export CUDA_REL=${CUDA_VERSION%.*}
export CONDA_ARTIFACT_PATH=${WORKSPACE}/ci/artifacts/cudf/cpu/.conda-bld/
export CONDA_ARTIFACT_PATH="$WORKSPACE/ci/artifacts/cudf/cpu/.conda-bld/"

# Parse git describe
export GIT_DESCRIBE_TAG=`git describe --tags`
Expand Down Expand Up @@ -80,7 +80,7 @@ gpuci_conda_retry install -y \
"rapids-notebook-env=$MINOR_VERSION.*" \
"dask-cuda=${MINOR_VERSION}" \
"rmm=$MINOR_VERSION.*" \
"ucx-py=${MINOR_VERSION}"
"ucx-py=0.21.*"

# https://docs.rapids.ai/maintainers/depmgmt/
# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
Expand Down Expand Up @@ -117,9 +117,9 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then

gpuci_logger "Build from source"
if [[ ${BUILD_MODE} == "pull-request" ]]; then
$WORKSPACE/build.sh clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds
else
$WORKSPACE/build.sh clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests -l --ptds
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests -l --ptds
fi

################################################################################
Expand All @@ -140,12 +140,12 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then

gpuci_logger "GoogleTests"
set -x
cd $WORKSPACE/cpp/build
cd "$WORKSPACE/cpp/build"

for gt in ${WORKSPACE}/cpp/build/gtests/* ; do
for gt in "$WORKSPACE/cpp/build/gtests/"* ; do
test_name=$(basename ${gt})
echo "Running GoogleTest $test_name"
${gt} --gtest_output=xml:${WORKSPACE}/test-results/
${gt} --gtest_output=xml:"$WORKSPACE/test-results/"
done
fi
else
Expand All @@ -168,7 +168,7 @@ else
for gt in gtests/* ; do
test_name=$(basename ${gt})
echo "Running GoogleTest $test_name"
${gt} --gtest_output=xml:${WORKSPACE}/test-results/
${gt} --gtest_output=xml:"$WORKSPACE/test-results/"
done

CUDF_CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcudf-*.tar.bz2"`
Expand All @@ -185,9 +185,9 @@ else

gpuci_logger "Build python libs from source"
if [[ ${BUILD_MODE} == "pull-request" ]]; then
$WORKSPACE/build.sh cudf dask_cudf cudf_kafka --ptds
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka --ptds
else
$WORKSPACE/build.sh cudf dask_cudf cudf_kafka -l --ptds
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka -l --ptds
fi
fi

Expand All @@ -205,21 +205,21 @@ fi
# TEST - Run py.test, notebooks
################################################################################

cd $WORKSPACE/python/cudf
cd "$WORKSPACE/python/cudf"
gpuci_logger "Python py.test for cuDF"
py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-cudf.xml -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:${WORKSPACE}/python/cudf/cudf-coverage.xml --cov-report term
py.test -n 6 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term

cd $WORKSPACE/python/dask_cudf
cd "$WORKSPACE/python/dask_cudf"
gpuci_logger "Python py.test for dask-cudf"
py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/dask-cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-dask-cudf.xml -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:${WORKSPACE}/python/dask_cudf/dask-cudf-coverage.xml --cov-report term
py.test -n 6 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term

cd $WORKSPACE/python/custreamz
cd "$WORKSPACE/python/custreamz"
gpuci_logger "Python py.test for cuStreamz"
py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/custreamz-cuda-tmp --junitxml=${WORKSPACE}/junit-custreamz.xml -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:${WORKSPACE}/python/custreamz/custreamz-coverage.xml --cov-report term
py.test -n 6 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term

gpuci_logger "Test notebooks"
${WORKSPACE}/ci/gpu/test-notebooks.sh 2>&1 | tee nbtest.log
python ${WORKSPACE}/ci/utils/nbtestlog2junitxml.py nbtest.log
"$WORKSPACE/ci/gpu/test-notebooks.sh" 2>&1 | tee nbtest.log
python "$WORKSPACE/ci/utils/nbtestlog2junitxml.py" nbtest.log

if [ -n "${CODECOV_TOKEN}" ]; then
codecov -t $CODECOV_TOKEN
Expand Down
6 changes: 3 additions & 3 deletions ci/gpu/test-notebooks.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/bin/bash

NOTEBOOKS_DIR=${WORKSPACE}/notebooks
NBTEST=${WORKSPACE}/ci/utils/nbtest.sh
LIBCUDF_KERNEL_CACHE_PATH=${WORKSPACE}/.jitcache
NOTEBOOKS_DIR="$WORKSPACE/notebooks"
NBTEST="$WORKSPACE/ci/utils/nbtest.sh"
LIBCUDF_KERNEL_CACHE_PATH="$WORKSPACE/.jitcache"

cd ${NOTEBOOKS_DIR}
TOPLEVEL_NB_FOLDERS=$(find . -name *.ipynb |cut -d'/' -f2|sort -u)
Expand Down
2 changes: 1 addition & 1 deletion ci/utils/nbtest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ get_ipython().run_cell_magic=my_run_cell_magic

NO_COLORS=--colors=NoColor
EXITCODE=0
NBTMPDIR=${WORKSPACE}/tmp
NBTMPDIR="$WORKSPACE/tmp"
mkdir -p ${NBTMPDIR}

for nb in $*; do
Expand Down
8 changes: 4 additions & 4 deletions conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ dependencies:
- clang=8.0.1
- clang-tools=8.0.1
- cupy>7.1.0,<10.0.0a0
- rmm=21.06.*
- rmm=21.08.*
- cmake>=3.18
- cmake_setuptools>=0.1.3
- python>=3.7,<3.9
Expand Down Expand Up @@ -41,10 +41,10 @@ dependencies:
- mypy=0.782
- typing_extensions
- pre_commit
- dask==2021.4.0
- distributed>=2.22.0,<=2021.4.0
- dask>=2021.4.0,<=2021.5.1
- distributed>=2.22.0,<=2021.5.1
- streamz
- dlpack==0.3
- dlpack>=0.5,<0.6.0a0
- arrow-cpp=1.0.1
- arrow-cpp-proc * cuda
- double-conversion
Expand Down
8 changes: 4 additions & 4 deletions conda/environments/cudf_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ dependencies:
- clang=8.0.1
- clang-tools=8.0.1
- cupy>7.1.0,<10.0.0a0
- rmm=21.06.*
- rmm=21.08.*
- cmake>=3.18
- cmake_setuptools>=0.1.3
- python>=3.7,<3.9
Expand Down Expand Up @@ -41,10 +41,10 @@ dependencies:
- mypy=0.782
- typing_extensions
- pre_commit
- dask==2021.4.0
- distributed>=2.22.0,<=2021.4.0
- dask>=2021.4.0,<=2021.5.1
- distributed>=2.22.0,<=2021.5.1
- streamz
- dlpack==0.3
- dlpack>=0.5,<0.6.0a0
- arrow-cpp=1.0.1
- arrow-cpp-proc * cuda
- double-conversion
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ requirements:
- cython >=0.29,<0.30
- setuptools
- numba >=0.53.1
- dlpack 0.3
- dlpack>=0.5,<0.6.0a0
- pyarrow 1.0.1
- libcudf {{ version }}
- rmm {{ minor_version }}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/custreamz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ requirements:
- python
- streamz
- cudf {{ version }}
- dask >=2.22.0,<=2021.4.0
- distributed >=2.22.0,<=2021.4.0
- dask>=2021.4.0,<=2021.5.1
- distributed>=2.22.0,<=2021.5.1
- python-confluent-kafka
- cudf_kafka {{ version }}

Expand Down
8 changes: 4 additions & 4 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ requirements:
host:
- python
- cudf {{ version }}
- dask==2021.4.0
- distributed >=2.22.0,<=2021.4.0
- dask>=2021.4.0,<=2021.5.1
- distributed>=2.22.0,<=2021.5.1
run:
- python
- cudf {{ version }}
- dask==2021.4.0
- distributed >=2.22.0,<=2021.4.0
- dask>=2021.4.0,<=2021.5.1
- distributed>=2.22.0,<=2021.5.1

test:
requires:
Expand Down
3 changes: 2 additions & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ requirements:
- cudatoolkit {{ cuda_version }}.*
- arrow-cpp 1.0.1
- arrow-cpp-proc * cuda
- dlpack 0.3
- dlpack>=0.5,<0.6.0a0
run:
- {{ pin_compatible('cudatoolkit', max_pin='x.x') }}
- arrow-cpp-proc * cuda
Expand Down Expand Up @@ -74,6 +74,7 @@ test:
- test -f $PREFIX/include/cudf/detail/gather.hpp
- test -f $PREFIX/include/cudf/detail/groupby.hpp
- test -f $PREFIX/include/cudf/detail/groupby/sort_helper.hpp
- test -f $PREFIX/include/cudf/detail/groupby/group_replace_nulls.hpp
- test -f $PREFIX/include/cudf/detail/hashing.hpp
- test -f $PREFIX/include/cudf/detail/interop.hpp
- test -f $PREFIX/include/cudf/detail/is_element_valid.hpp
Expand Down
6 changes: 4 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "")
set(CUDF_BUILD_FOR_DETECTED_ARCHS TRUE)
endif()

project(CUDF VERSION 21.06.00 LANGUAGES C CXX)
project(CUDF VERSION 21.08.00 LANGUAGES C CXX)

# Needed because GoogleBenchmark changes the state of FindThreads.cmake,
# causing subsequent runs to have different values for the `Threads::Threads` target.
Expand Down Expand Up @@ -209,6 +209,7 @@ add_library(cudf
src/groupby/sort/group_max_scan.cu
src/groupby/sort/group_min_scan.cu
src/groupby/sort/group_sum_scan.cu
src/groupby/sort/group_replace_nulls.cu
src/groupby/sort/sort_helper.cu
src/hash/hashing.cu
src/hash/md5_hash.cu
Expand Down Expand Up @@ -250,7 +251,8 @@ add_library(cudf
src/io/parquet/parquet.cpp
src/io/parquet/reader_impl.cu
src/io/parquet/writer_impl.cu
src/io/statistics/column_stats.cu
src/io/statistics/orc_column_statistics.cu
src/io/statistics/parquet_column_statistics.cu
src/io/utilities/column_buffer.cpp
src/io/utilities/data_sink.cpp
src/io/utilities/datasource.cpp
Expand Down
Loading

0 comments on commit 06e7787

Please sign in to comment.