diff --git a/CHANGELOG.md b/CHANGELOG.md index 9722e1f0f80..ff12b5cd30a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,7 @@ - PR #3193 Add cuPy as a formal dependency - PR #3195 Support for zero columned `table_view` - PR #3165 Java device memory size for string category +- PR #3205 Move transform files to legacy - PR #3202 Rename and move error.hpp to public headers ## Bug Fixes diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 11bc8f714b7..5505e22c534 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -77,7 +77,7 @@ test: - test -f $PREFIX/include/cudf/rolling.hpp - test -f $PREFIX/include/cudf/legacy/search.hpp - test -f $PREFIX/include/cudf/stream_compaction.hpp - - test -f $PREFIX/include/cudf/transform.hpp + - test -f $PREFIX/include/cudf/legacy/transform.hpp - test -f $PREFIX/include/cudf/types.h - test -f $PREFIX/include/cudf/types.hpp - test -f $PREFIX/include/cudf/legacy/unary.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 26bc3c5f1b2..aed93db7a4c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -388,9 +388,9 @@ add_library(cudf src/jit/parser.cpp src/jit/cache.cpp src/jit/launcher.cpp - src/transform/transform.cpp + src/transform/legacy/transform.cpp src/transform/jit/code/kernel.cpp - src/transform/nans_to_nulls.cu + src/transform/legacy/nans_to_nulls.cu src/bitmask/legacy/bitmask_ops.cu src/stream_compaction/apply_boolean_mask.cu src/stream_compaction/drop_nulls.cu diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/legacy/transform.hpp similarity index 97% rename from cpp/include/cudf/transform.hpp rename to cpp/include/cudf/legacy/transform.hpp index 7baa53bce8a..5e42e1f803d 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/legacy/transform.hpp @@ -17,8 +17,8 @@ #ifndef CUDF_TRANSFORM_HPP #define CUDF_TRANSFORM_HPP -#include "cudf.h" -#include "types.h" +#include "cudf/cudf.h" +#include "cudf/types.h" namespace cudf { diff --git a/cpp/src/bitmask/legacy/valid_if.cuh b/cpp/src/bitmask/legacy/valid_if.cuh index 515dc8a7088..adaad97fd2a 100644 --- a/cpp/src/bitmask/legacy/valid_if.cuh +++ b/cpp/src/bitmask/legacy/valid_if.cuh @@ -18,7 +18,7 @@ #define __BITMASK_VALID_IF_CUH__ #include -#include +#include #include #include #include @@ -42,9 +42,9 @@ constexpr int block_size = 256; * @return[out] result of each block is returned in thread 0. */ template -__device__ __inline__ gdf_size_type single_lane_popc_block_reduce(bit_container bit_mask) { +__device__ __inline__ cudf::size_type single_lane_popc_block_reduce(bit_container bit_mask) { - static __shared__ gdf_size_type warp_count[warp_size]; + static __shared__ cudf::size_type warp_count[warp_size]; int lane_id = (threadIdx.x % warp_size); int warp_id = (threadIdx.x / warp_size); @@ -56,7 +56,7 @@ __device__ __inline__ gdf_size_type single_lane_popc_block_reduce(bit_container } __syncthreads(); - gdf_size_type block_count = 0; + cudf::size_type block_count = 0; if (warp_id == 0) { @@ -65,12 +65,12 @@ __device__ __inline__ gdf_size_type single_lane_popc_block_reduce(bit_container // Maximum block size is 1024 and 1024 / 32 = 32 // so one single warp is enough to do the reduction over different warps - gdf_size_type count = + cudf::size_type count = (lane_id < (blockDim.x / warp_size)) ? warp_count[lane_id] : 0; __shared__ - typename cub::WarpReduce::TempStorage temp_storage; - block_count = cub::WarpReduce(temp_storage).Sum(count); + typename cub::WarpReduce::TempStorage temp_storage; + block_count = cub::WarpReduce(temp_storage).Sum(count); } diff --git a/cpp/src/bitmask/valid_if.cuh b/cpp/src/bitmask/valid_if.cuh deleted file mode 100644 index adaad97fd2a..00000000000 --- a/cpp/src/bitmask/valid_if.cuh +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright (c) 2019, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __BITMASK_VALID_IF_CUH__ -#define __BITMASK_VALID_IF_CUH__ - -#include -#include -#include -#include -#include - -#include - -using bit_mask::bit_mask_t; - -namespace cudf { - -namespace detail { - -constexpr int warp_size = 32; - -constexpr int block_size = 256; - -/** - * @brief for each warp in the block do a reduction (summation) of the - * `__popc(bit_mask)` on a certain lane (default is lane 0). - * @param[in] bit_mask The bit_mask to be reduced. - * @return[out] result of each block is returned in thread 0. - */ -template -__device__ __inline__ cudf::size_type single_lane_popc_block_reduce(bit_container bit_mask) { - - static __shared__ cudf::size_type warp_count[warp_size]; - - int lane_id = (threadIdx.x % warp_size); - int warp_id = (threadIdx.x / warp_size); - - // Assuming one lane of each warp holds the value that we want to perform - // reduction - if (lane_id == lane) { - warp_count[warp_id] = __popc(bit_mask); - } - __syncthreads(); - - cudf::size_type block_count = 0; - - if (warp_id == 0) { - - static_assert(block_size <= 1024, - "Reduction code only works with a block size less or equal to 1024."); - - // Maximum block size is 1024 and 1024 / 32 = 32 - // so one single warp is enough to do the reduction over different warps - cudf::size_type count = - (lane_id < (blockDim.x / warp_size)) ? warp_count[lane_id] : 0; - - __shared__ - typename cub::WarpReduce::TempStorage temp_storage; - block_count = cub::WarpReduce(temp_storage).Sum(count); - - } - - return block_count; - -} - -template -__global__ void valid_if_kernel( - const bit_container* source_mask, - bit_container* destination_mask, - predicate p, - size_type num_bits, - size_type* p_valid_count - ){ - - static_assert(warp_size == util::size_in_bits(), - "warp size is different from bit_container size."); - - size_type bit_index_base = blockIdx.x * blockDim.x; - - while (bit_index_base < num_bits) { - - size_type bit_index = bit_index_base + threadIdx.x; - - bool thread_active = bit_index < num_bits; - bit_container active_threads = - __ballot_sync(0xffffffff, thread_active); - - bit_container result_mask = 0; - - if(thread_active){ - - bool const predicate_is_true = p(bit_index); - const bit_container ballot_result = - __ballot_sync(active_threads, predicate_is_true); - - // Only one thread writes output - if (0 == threadIdx.x % warp_size) { - const size_type container_index = - util::detail::bit_container_index(bit_index); - - result_mask = source_mask_valid ? - source_mask[container_index] & ballot_result : ballot_result; - destination_mask[container_index] = result_mask; - } - - } - - result_mask = single_lane_popc_block_reduce(result_mask); - if(0 == threadIdx.x){ - atomicAdd(p_valid_count, result_mask); - } - - bit_index_base += blockDim.x * gridDim.x; - - } - -} - -} // namespace detail - - /** - * @brief Generate a bitmask where every bit is marked with valid - * if and only if predicate(bit) and source_mask(bit) are both true. - * - * @param source_mask The source mask - * @param p The predicate that has an operator() member function - * @param num_bits Number of bits - * @param stream An optional cudaStream_t object - * @return The generated bitmask as well as its null_count - */ -template -std::pair valid_if( - const bit_container* source_mask, - const predicate& p, - size_type num_bits, - cudaStream_t stream = 0 - ){ - - bit_container* destination_mask = nullptr; - CUDF_EXPECTS(GDF_SUCCESS == bit_mask::create_bit_mask(&destination_mask, num_bits), - "Failed to allocate bit_mask buffer."); - - auto kernel = source_mask ? - detail::valid_if_kernel : - detail::valid_if_kernel ; - - rmm::device_vector valid_count(1); - - const int grid_size = util::cuda::grid_config_1d(num_bits, detail::block_size).num_blocks; - - // launch the kernel - kernel<<>>( - source_mask, destination_mask, p, num_bits, valid_count.data().get()); - - size_type valid_count_host; - CUDA_TRY(cudaMemcpyAsync(&valid_count_host, valid_count.data().get(), - sizeof(size_type), cudaMemcpyDeviceToHost, stream)); - - // Synchronize the stream before null_count is updated on the host. - cudaStreamSynchronize(stream); - size_type null_count = num_bits - valid_count_host; - - CHECK_STREAM(stream); - return std::pair(destination_mask, null_count); - -} - -} // namespace cudf -#endif diff --git a/cpp/src/stream_compaction/drop_duplicates.cu b/cpp/src/stream_compaction/drop_duplicates.cu index a4985a551fe..cf03e41fc56 100644 --- a/cpp/src/stream_compaction/drop_duplicates.cu +++ b/cpp/src/stream_compaction/drop_duplicates.cu @@ -27,7 +27,7 @@ #include #include #include
-#include +#include #include #include diff --git a/cpp/src/strings/copying/copying.cu b/cpp/src/strings/copying/copying.cu index 28e9f041e1a..33add200624 100644 --- a/cpp/src/strings/copying/copying.cu +++ b/cpp/src/strings/copying/copying.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include #include #include diff --git a/cpp/src/strings/utilities.cuh b/cpp/src/strings/utilities.cuh index cb6b5b4fa9e..e7961a4fbf4 100644 --- a/cpp/src/strings/utilities.cuh +++ b/cpp/src/strings/utilities.cuh @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include #include diff --git a/cpp/src/transform/nans_to_nulls.cu b/cpp/src/transform/legacy/nans_to_nulls.cu similarity index 100% rename from cpp/src/transform/nans_to_nulls.cu rename to cpp/src/transform/legacy/nans_to_nulls.cu diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/legacy/transform.cpp similarity index 99% rename from cpp/src/transform/transform.cpp rename to cpp/src/transform/legacy/transform.cpp index 9bd69b750ea..38e38af792e 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/legacy/transform.cpp @@ -30,7 +30,7 @@ #include #include #include -#include "jit/code/code.h" +#include "../jit/code/code.h" #include #include diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 4f4d854f8c4..bbcb710558b 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -189,12 +189,12 @@ set(LEGACY_BITMASK_TEST_SRC ConfigureTest(LEGACY_BITMASK_TEST "${LEGACY_BITMASK_TEST_SRC}") ################################################################################################### -# - nans_to_nulls tests --------------------------------------------------------------------------- +# - legacy nans_to_nulls tests --------------------------------------------------------------------------- -set(NANS_TO_NULLS_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/bitmask/nans_to_nulls_tests.cu") +set(LEGACY_NANS_TO_NULLS_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/bitmask/legacy/nans_to_nulls_tests.cu") -ConfigureTest(NANS_TO_NULLS_TEST "${NANS_TO_NULLS_TEST_SRC}") +ConfigureTest(LEGACY_NANS_TO_NULLS_TEST "${LEGACY_NANS_TO_NULLS_TEST_SRC}") ################################################################################################### @@ -282,12 +282,12 @@ set(BINARY_TEST_SRC ConfigureTest(BINARY_TEST "${BINARY_TEST_SRC}") ################################################################################################### -# - unary transform tests ------------------------------------------------------------------------- +# - legacy unary transform tests ------------------------------------------------------------------------- -set(TRANSFORM_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/transform/integration/unary-operation-integration-test.cu") +set(LEGACY_TRANSFORM_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/transform/integration/legacy/unary-operation-integration-test.cu") -ConfigureTest(TRANSFORM_TEST "${TRANSFORM_TEST_SRC}") +ConfigureTest(LEGACY_TRANSFORM_TEST "${LEGACY_TRANSFORM_TEST_SRC}") ################################################################################################### # - jit cache tests ------------------------------------------------------------------------------- diff --git a/cpp/tests/bitmask/nans_to_nulls_tests.cu b/cpp/tests/bitmask/legacy/nans_to_nulls_tests.cu similarity index 99% rename from cpp/tests/bitmask/nans_to_nulls_tests.cu rename to cpp/tests/bitmask/legacy/nans_to_nulls_tests.cu index 71f1edcc155..cadfb614812 100644 --- a/cpp/tests/bitmask/nans_to_nulls_tests.cu +++ b/cpp/tests/bitmask/legacy/nans_to_nulls_tests.cu @@ -28,7 +28,7 @@ #include #include -#include +#include using bit_mask::bit_mask_t; diff --git a/cpp/tests/transform/integration/unary-operation-integration-test.cu b/cpp/tests/transform/integration/legacy/unary-operation-integration-test.cu similarity index 98% rename from cpp/tests/transform/integration/unary-operation-integration-test.cu rename to cpp/tests/transform/integration/legacy/unary-operation-integration-test.cu index 4fac697d34c..030aecbb521 100644 --- a/cpp/tests/transform/integration/unary-operation-integration-test.cu +++ b/cpp/tests/transform/integration/legacy/unary-operation-integration-test.cu @@ -18,8 +18,8 @@ */ #include -#include -#include "assert-unary.h" +#include +#include "../assert-unary.h" #include #include diff --git a/python/cudf/cudf/_lib/includes/unaryops.pxd b/python/cudf/cudf/_lib/includes/unaryops.pxd index 4eba682f746..aeec3125d72 100644 --- a/python/cudf/cudf/_lib/includes/unaryops.pxd +++ b/python/cudf/cudf/_lib/includes/unaryops.pxd @@ -49,7 +49,7 @@ cdef extern from "cudf/cudf.h" nogil: gdf_column *output ) except + -cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: +cdef extern from "cudf/legacy/transform.hpp" namespace "cudf" nogil: cdef gdf_column transform( const gdf_column& input, const string& ptx,