Skip to content

Commit

Permalink
Merge branch 'branch-22.02' into jni-mixed-join
Browse files Browse the repository at this point in the history
  • Loading branch information
jlowe committed Jan 18, 2022
2 parents 61b0e7d + e4a16ae commit 0652f8f
Show file tree
Hide file tree
Showing 77 changed files with 3,620 additions and 566 deletions.
4 changes: 2 additions & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ if buildAll || hasArg libcudf; then

# get the current count before the compile starts
FILES_IN_CCACHE=""
if [[ "$BUILD_REPORT_INCL_CACHE_STATS"=="ON" && -x "$(command -v ccache)" ]]; then
if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v ccache)" ]]; then
FILES_IN_CCACHE=$(ccache -s | grep "files in cache")
echo "$FILES_IN_CCACHE"
# zero the ccache statistics
Expand All @@ -212,7 +212,7 @@ if buildAll || hasArg libcudf; then
compile_total=$(( compile_end - compile_start ))

# Record build times
if [[ "$BUILD_REPORT_METRICS"=="ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then
if [[ "$BUILD_REPORT_METRICS" == "ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then
echo "Formatting build metrics"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml
MSG="<p>"
Expand Down
4 changes: 2 additions & 2 deletions ci/benchmark/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"

# Dask & Distributed git tag
export DASK_DISTRIBUTED_GIT_TAG='2021.11.2'
export DASK_DISTRIBUTED_GIT_TAG='main'

function remove_libcudf_kernel_cache_dir {
EXITCODE=$?
Expand Down Expand Up @@ -98,7 +98,7 @@ conda list --show-channel-urls
################################################################################

logger "Build libcudf..."
if [[ ${BUILD_MODE} == "pull-request" ]]; then
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests --ptds
else
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests -l --ptds
Expand Down
2 changes: 1 addition & 1 deletion ci/cpu/upload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export GPUCI_RETRY_SLEEP=30
export LABEL_OPTION=${LABEL_OPTION:-"--label main"}

# Skip uploads unless BUILD_MODE == "branch"
if [ ${BUILD_MODE} != "branch" ]; then
if [ "${BUILD_MODE}" != "branch" ]; then
echo "Skipping upload"
return 0
fi
Expand Down
6 changes: 3 additions & 3 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`

# Dask & Distributed git tag
export DASK_DISTRIBUTED_GIT_TAG='2021.11.2'
export DASK_DISTRIBUTED_GIT_TAG='main'

# ucx-py version
export UCX_PY_VERSION='0.24.*'
Expand Down Expand Up @@ -124,7 +124,7 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
################################################################################

gpuci_logger "Build from source"
if [[ ${BUILD_MODE} == "pull-request" ]]; then
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds
else
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests -l --ptds
Expand Down Expand Up @@ -222,7 +222,7 @@ else
install_dask

gpuci_logger "Build python libs from source"
if [[ ${BUILD_MODE} == "pull-request" ]]; then
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka --ptds
else
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka -l --ptds
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ channels:
dependencies:
- clang=11.1.0
- clang-tools=11.1.0
- cupy>=9.5.0,<10.0.0a0
- cupy>=9.5.0,<11.0.0a0
- rmm=22.02.*
- cmake>=3.20.1
- cmake_setuptools>=0.1.3
Expand Down Expand Up @@ -41,8 +41,8 @@ dependencies:
- pydocstyle=6.1.1
- typing_extensions
- pre-commit
- dask>=2021.11.1,<=2021.11.2
- distributed>=2021.11.1,<=2021.11.2
- dask>=2021.11.1
- distributed>=2021.11.1
- streamz
- arrow-cpp=5.0.0
- dlpack>=0.5,<0.6.0a0
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ requirements:
- python
- typing_extensions
- pandas >=1.0,<1.4.0dev0
- cupy >=9.5.0,<10.0.0a0
- cupy >=9.5.0,<11.0.0a0
- numba >=0.54
- numpy
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }} *cuda
Expand Down
8 changes: 4 additions & 4 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ requirements:
host:
- python
- cudf {{ version }}
- dask>=2021.11.1,<=2021.11.2
- distributed>=2021.11.1,<=2021.11.2
- dask>=2021.11.1
- distributed>=2021.11.1
- cudatoolkit {{ cuda_version }}
run:
- python
- cudf {{ version }}
- dask>=2021.11.1,<=2021.11.2
- distributed>=2021.11.1,<=2021.11.2
- dask>=2021.11.1
- distributed>=2021.11.1
- {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}

test: # [linux64]
Expand Down
3 changes: 2 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ add_library(
src/jit/parser.cpp
src/jit/type.cpp
src/join/conditional_join.cu
src/join/mixed_join.cu
src/join/cross_join.cu
src/join/hash_join.cu
src/join/join.cu
Expand Down Expand Up @@ -618,7 +619,7 @@ set_target_properties(
)

target_compile_options(
cudftestutil PUBLIC "$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>"
cudftestutil PUBLIC "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>>"
"$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>>"
)

Expand Down
65 changes: 43 additions & 22 deletions cpp/benchmarks/copying/contiguous_split_benchmark.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,10 +34,18 @@ void BM_contiguous_split_common(benchmark::State& state,
int64_t bytes_total)
{
// generate splits
cudf::size_type split_stride = num_rows / num_splits;
std::vector<cudf::size_type> splits;
for (int idx = 0; idx < num_rows; idx += split_stride) {
splits.push_back(std::min(idx + split_stride, static_cast<cudf::size_type>(num_rows)));
if (num_splits > 0) {
cudf::size_type const split_stride = num_rows / num_splits;
// start after the first element.
auto iter = thrust::make_counting_iterator(1);
splits.reserve(num_splits);
std::transform(iter,
iter + num_splits,
std::back_inserter(splits),
[split_stride, num_rows](cudf::size_type i) {
return std::min(i * split_stride, static_cast<cudf::size_type>(num_rows));
});
}

std::vector<std::unique_ptr<cudf::column>> columns(src_cols.size());
Expand All @@ -53,21 +61,22 @@ void BM_contiguous_split_common(benchmark::State& state,
auto result = cudf::contiguous_split(src_table, splits);
}

state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * bytes_total);
// it's 2x bytes_total because we're both reading and writing.
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * bytes_total * 2);
}

class ContiguousSplit : public cudf::benchmark {
};

void BM_contiguous_split(benchmark::State& state)
{
int64_t total_desired_bytes = state.range(0);
cudf::size_type num_cols = state.range(1);
cudf::size_type num_splits = state.range(2);
bool include_validity = state.range(3) == 0 ? false : true;
int64_t const total_desired_bytes = state.range(0);
cudf::size_type const num_cols = state.range(1);
cudf::size_type const num_splits = state.range(2);
bool const include_validity = state.range(3) == 0 ? false : true;

cudf::size_type el_size = 4; // ints and floats
int64_t num_rows = total_desired_bytes / (num_cols * el_size);
int64_t const num_rows = total_desired_bytes / (num_cols * el_size);

// generate input table
srand(31337);
Expand All @@ -85,8 +94,10 @@ void BM_contiguous_split(benchmark::State& state)
}
}

size_t total_bytes = total_desired_bytes;
if (include_validity) { total_bytes += num_rows / (sizeof(cudf::bitmask_type) * 8); }
int64_t const total_bytes =
total_desired_bytes +
(include_validity ? (max(int64_t{1}, (num_rows / 32)) * sizeof(cudf::bitmask_type) * num_cols)
: 0);

BM_contiguous_split_common(state, src_cols, num_rows, num_splits, total_bytes);
}
Expand All @@ -102,17 +113,17 @@ int rand_range(int r)

void BM_contiguous_split_strings(benchmark::State& state)
{
int64_t total_desired_bytes = state.range(0);
cudf::size_type num_cols = state.range(1);
cudf::size_type num_splits = state.range(2);
bool include_validity = state.range(3) == 0 ? false : true;
int64_t const total_desired_bytes = state.range(0);
cudf::size_type const num_cols = state.range(1);
cudf::size_type const num_splits = state.range(2);
bool const include_validity = state.range(3) == 0 ? false : true;

const int64_t string_len = 8;
constexpr int64_t string_len = 8;
std::vector<const char*> h_strings{
"aaaaaaaa", "bbbbbbbb", "cccccccc", "dddddddd", "eeeeeeee", "ffffffff", "gggggggg", "hhhhhhhh"};

int64_t col_len_bytes = total_desired_bytes / num_cols;
int64_t num_rows = col_len_bytes / string_len;
int64_t const col_len_bytes = total_desired_bytes / num_cols;
int64_t const num_rows = col_len_bytes / string_len;

// generate input table
srand(31337);
Expand All @@ -133,8 +144,10 @@ void BM_contiguous_split_strings(benchmark::State& state)
}
}

size_t total_bytes = total_desired_bytes + (num_rows * sizeof(cudf::size_type));
if (include_validity) { total_bytes += num_rows / (sizeof(cudf::bitmask_type) * 8); }
int64_t const total_bytes =
total_desired_bytes + ((num_rows + 1) * sizeof(cudf::offset_type)) +
(include_validity ? (max(int64_t{1}, (num_rows / 32)) * sizeof(cudf::bitmask_type) * num_cols)
: 0);

BM_contiguous_split_common(state, src_cols, num_rows, num_splits, total_bytes);
}
Expand All @@ -157,12 +170,16 @@ CSBM_BENCHMARK_DEFINE(6Gb10ColsValidity, (int64_t)6 * 1024 * 1024 * 1024, 10, 25
CSBM_BENCHMARK_DEFINE(4Gb512ColsNoValidity, (int64_t)4 * 1024 * 1024 * 1024, 512, 256, 0);
CSBM_BENCHMARK_DEFINE(4Gb512ColsValidity, (int64_t)4 * 1024 * 1024 * 1024, 512, 256, 1);
CSBM_BENCHMARK_DEFINE(4Gb10ColsNoValidity, (int64_t)4 * 1024 * 1024 * 1024, 10, 256, 0);
CSBM_BENCHMARK_DEFINE(46b10ColsValidity, (int64_t)4 * 1024 * 1024 * 1024, 10, 256, 1);
CSBM_BENCHMARK_DEFINE(4Gb10ColsValidity, (int64_t)4 * 1024 * 1024 * 1024, 10, 256, 1);
CSBM_BENCHMARK_DEFINE(4Gb4ColsNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 4, 0, 1);
CSBM_BENCHMARK_DEFINE(4Gb4ColsValidityNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 4, 0, 1);

CSBM_BENCHMARK_DEFINE(1Gb512ColsNoValidity, (int64_t)1 * 1024 * 1024 * 1024, 512, 256, 0);
CSBM_BENCHMARK_DEFINE(1Gb512ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 512, 256, 1);
CSBM_BENCHMARK_DEFINE(1Gb10ColsNoValidity, (int64_t)1 * 1024 * 1024 * 1024, 10, 256, 0);
CSBM_BENCHMARK_DEFINE(1Gb10ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 10, 256, 1);
CSBM_BENCHMARK_DEFINE(1Gb1ColNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 1, 0, 1);
CSBM_BENCHMARK_DEFINE(1Gb1ColValidityNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 1, 0, 1);

#define CSBM_STRINGS_BENCHMARK_DEFINE(name, size, num_columns, num_splits, validity) \
BENCHMARK_DEFINE_F(ContiguousSplitStrings, name)(::benchmark::State & state) \
Expand All @@ -179,8 +196,12 @@ CSBM_STRINGS_BENCHMARK_DEFINE(4Gb512ColsNoValidity, (int64_t)4 * 1024 * 1024 * 1
CSBM_STRINGS_BENCHMARK_DEFINE(4Gb512ColsValidity, (int64_t)4 * 1024 * 1024 * 1024, 512, 256, 1);
CSBM_STRINGS_BENCHMARK_DEFINE(4Gb10ColsNoValidity, (int64_t)4 * 1024 * 1024 * 1024, 10, 256, 0);
CSBM_STRINGS_BENCHMARK_DEFINE(4Gb10ColsValidity, (int64_t)4 * 1024 * 1024 * 1024, 10, 256, 1);
CSBM_STRINGS_BENCHMARK_DEFINE(4Gb4ColsNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 4, 0, 0);
CSBM_STRINGS_BENCHMARK_DEFINE(4Gb4ColsValidityNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 4, 0, 1);

CSBM_STRINGS_BENCHMARK_DEFINE(1Gb512ColsNoValidity, (int64_t)1 * 1024 * 1024 * 1024, 512, 256, 0);
CSBM_STRINGS_BENCHMARK_DEFINE(1Gb512ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 512, 256, 1);
CSBM_STRINGS_BENCHMARK_DEFINE(1Gb10ColsNoValidity, (int64_t)1 * 1024 * 1024 * 1024, 10, 256, 0);
CSBM_STRINGS_BENCHMARK_DEFINE(1Gb10ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 10, 256, 1);
CSBM_STRINGS_BENCHMARK_DEFINE(1Gb1ColNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 1, 0, 0);
CSBM_STRINGS_BENCHMARK_DEFINE(1Gb1ColValidityNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 1, 0, 1);
2 changes: 1 addition & 1 deletion cpp/doxygen/regex.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ The details are based on features documented at https://www.regular-expressions.
| Literal character | Any character except `[\^$.⎮?*+()` | All characters except the listed special characters match a single instance of themselves | `a` matches `a` |
| Literal curly braces | `{` and `}` | `{` and `}` are literal characters, unless they are part of a valid regular expression token such as a quantifier `{3}` | `{` matches `{` |
| Backslash escapes a metacharacter | `\` followed by any of `[\^$.⎮?*+(){}` | A backslash escapes special characters to suppress their special meaning | `\*` matches `*` |
| Hexadecimal escape | `\xFF` where `FF` are 2 hexadecimal digits | Matches the character at the specified position in the code page | `\xA9` matches `©` |
| Hexadecimal escape | `\xFF` where `FF` are 2 hexadecimal digits | Matches the character at the specified position in the ASCII table | `\x40` matches `@` |
| Character escape | `\n`, `\r` and `\t` | Match an line-feed (LF) character, carriage return (CR) character and a tab character respectively | `\r\n` matches a Windows CRLF line break |
| Character escape | `\a` | Match the "alert" or "bell" control character (ASCII 0x07) | |
| Character escape | `\f` | Match the form-feed control character (ASCII 0x0C) | |
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/ast/detail/expression_evaluator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ struct expression_evaluator {
__device__ __forceinline__ void evaluate(
expression_result<ResultSubclass, T, result_has_nulls>& output_object,
cudf::size_type const row_index,
IntermediateDataType<has_nulls>* thread_intermediate_storage)
IntermediateDataType<has_nulls>* thread_intermediate_storage) const
{
evaluate(output_object, row_index, row_index, row_index, thread_intermediate_storage);
}
Expand All @@ -452,7 +452,7 @@ struct expression_evaluator {
cudf::size_type const left_row_index,
cudf::size_type const right_row_index,
cudf::size_type const output_row_index,
IntermediateDataType<has_nulls>* thread_intermediate_storage)
IntermediateDataType<has_nulls>* thread_intermediate_storage) const
{
cudf::size_type operator_source_index{0};
for (cudf::size_type operator_index = 0; operator_index < plan.operators.size();
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,9 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(
* @param mask_size_bits The number of bits to be ANDed in each mask
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return rmm::device_buffer Output bitmask
* @return Count of set bits
*/
void inplace_bitmask_and(
cudf::size_type inplace_bitmask_and(
device_span<bitmask_type> dest_mask,
host_span<bitmask_type const*> masks,
host_span<size_type const> masks_begin_bits,
Expand Down
Loading

0 comments on commit 0652f8f

Please sign in to comment.