From 07845438855c3787dc5daec0195bd75a399cd598 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Sun, 16 Jul 2023 17:00:21 -0400 Subject: [PATCH] Separate MurmurHash32 from hash_functions.cuh (#13681) Moves the `MurmurHash32` class definition from `hash_functions.cuh` to a new `murmur32.cuh` file. Also moves the new file and the `hash_functions.cuh` from `cpp/include/cudf/detail/utilities/` to `cpp/include/cudf/hashing/detail` The hash functions were redeclared from the `cudf::detail` namespace to the `cudf::hashing::detail` namespace. The remaining changes are side-effects of making the above changes. This PR is a follow on to PR #13626 No new logic or functions have changed. Only internal detail headers have been refactored. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Bradley Dice (https://github.com/bdice) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/13681 --- conda/recipes/libcudf/meta.yaml | 2 +- cpp/CMakeLists.txt | 4 +- .../cudf/detail/aggregation/result_cache.hpp | 2 +- cpp/include/cudf/detail/join.hpp | 2 +- .../cudf/detail/utilities/hash_functions.cuh | 381 ------------------ cpp/include/cudf/hashing.hpp | 6 +- .../cudf/hashing/detail/default_hash.cuh | 35 ++ .../cudf/hashing/detail/hash_functions.cuh | 66 +++ .../cudf/{ => hashing}/detail/hashing.hpp | 27 +- .../hashing/detail/murmurhash3_x86_32.cuh | 194 +++++++++ cpp/include/cudf/join.hpp | 9 +- .../cudf/table/experimental/row_operators.cuh | 6 +- cpp/include/cudf/table/row_operators.cuh | 4 +- cpp/include/nvtext/minhash.hpp | 4 +- cpp/src/column/column_view.cpp | 2 +- cpp/src/groupby/hash/groupby.cu | 14 +- cpp/src/hash/concurrent_unordered_map.cuh | 4 +- cpp/src/hash/hashing.cu | 44 +- cpp/src/hash/md5_hash.cu | 113 +++++- .../{murmur_hash.cu => murmurhash3_x86_32.cu} | 25 +- ...ur_hash.cu => spark_murmurhash3_x86_32.cu} | 75 ++-- cpp/src/hash/unordered_multiset.cuh | 7 +- cpp/src/io/json/json_gpu.cu | 6 +- cpp/src/io/json/json_tree.cu | 14 +- cpp/src/io/orc/dict_enc.cu | 2 +- cpp/src/io/parquet/chunk_dict.cu | 2 +- cpp/src/io/parquet/page_data.cu | 5 +- cpp/src/join/join_common_utils.hpp | 5 +- cpp/src/join/mixed_join_common_utils.cuh | 3 +- cpp/src/partitioning/partitioning.cu | 32 +- cpp/src/search/contains_table.cu | 1 - .../stream_compaction_common.hpp | 1 - cpp/src/text/generate_ngrams.cu | 5 +- cpp/src/text/minhash.cu | 7 +- cpp/src/text/subword/bpe_tokenizer.cu | 1 - cpp/src/text/subword/bpe_tokenizer.cuh | 4 +- cpp/src/text/subword/load_merges_file.cu | 1 - cpp/tests/CMakeLists.txt | 3 +- ...3_test.cpp => murmurhash3_x86_32_test.cpp} | 39 +- ....cpp => spark_murmurhash3_x86_32_test.cpp} | 65 +-- cpp/tests/io/json_tree.cpp | 2 +- 41 files changed, 624 insertions(+), 600 deletions(-) delete mode 100644 cpp/include/cudf/detail/utilities/hash_functions.cuh create mode 100644 cpp/include/cudf/hashing/detail/default_hash.cuh create mode 100644 cpp/include/cudf/hashing/detail/hash_functions.cuh rename cpp/include/cudf/{ => hashing}/detail/hashing.hpp (74%) create mode 100644 cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh rename cpp/src/hash/{murmur_hash.cu => murmurhash3_x86_32.cu} (67%) rename cpp/src/hash/{spark_murmur_hash.cu => spark_murmurhash3_x86_32.cu} (83%) rename cpp/tests/hashing/{murmur3_test.cpp => murmurhash3_x86_32_test.cpp} (91%) rename cpp/tests/hashing/{spark_murmur3_test.cpp => spark_murmurhash3_x86_32_test.cpp} (90%) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 4e9b5e2fdc1..f4b9945de0f 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -139,7 +139,6 @@ outputs: - test -f $PREFIX/include/cudf/detail/groupby.hpp - test -f $PREFIX/include/cudf/detail/groupby/group_replace_nulls.hpp - test -f $PREFIX/include/cudf/detail/groupby/sort_helper.hpp - - test -f $PREFIX/include/cudf/detail/hashing.hpp - test -f $PREFIX/include/cudf/detail/interop.hpp - test -f $PREFIX/include/cudf/detail/is_element_valid.hpp - test -f $PREFIX/include/cudf/detail/join.hpp @@ -192,6 +191,7 @@ outputs: - test -f $PREFIX/include/cudf/fixed_point/temporary.hpp - test -f $PREFIX/include/cudf/groupby.hpp - test -f $PREFIX/include/cudf/hashing.hpp + - test -f $PREFIX/include/cudf/hashing/detail/hashing.hpp - test -f $PREFIX/include/cudf/interop.hpp - test -f $PREFIX/include/cudf/io/avro.hpp - test -f $PREFIX/include/cudf/io/csv.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0742d039092..27bde5dda73 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -344,8 +344,8 @@ add_library( src/groupby/sort/sort_helper.cu src/hash/hashing.cu src/hash/md5_hash.cu - src/hash/murmur_hash.cu - src/hash/spark_murmur_hash.cu + src/hash/murmurhash3_x86_32.cu + src/hash/spark_murmurhash3_x86_32.cu src/interop/dlpack.cpp src/interop/from_arrow.cu src/interop/to_arrow.cu diff --git a/cpp/include/cudf/detail/aggregation/result_cache.hpp b/cpp/include/cudf/detail/aggregation/result_cache.hpp index b1a2a369d22..41eec156c47 100644 --- a/cpp/include/cudf/detail/aggregation/result_cache.hpp +++ b/cpp/include/cudf/detail/aggregation/result_cache.hpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index 4a34eb6b328..6fcf10aef57 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh deleted file mode 100644 index e57822f3fdb..00000000000 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright (c) 2017-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace cudf { -namespace detail { - -/** - * Normalization of floating point NaNs, passthrough for all other values. - */ -template -T __device__ inline normalize_nans(T const& key) -{ - if constexpr (cudf::is_floating_point()) { - if (std::isnan(key)) { return std::numeric_limits::quiet_NaN(); } - } - return key; -} - -/** - * Normalization of floating point NaNs and zeros, passthrough for all other values. - */ -template -T __device__ inline normalize_nans_and_zeros(T const& key) -{ - if constexpr (cudf::is_floating_point()) { - if (key == T{0.0}) { return T{0.0}; } - } - return normalize_nans(key); -} - -__device__ inline uint32_t rotate_bits_left(uint32_t x, uint32_t r) -{ - // This function is equivalent to (x << r) | (x >> (32 - r)) - return __funnelshift_l(x, x, r); -} - -__device__ inline uint32_t rotate_bits_right(uint32_t x, uint32_t r) -{ - // This function is equivalent to (x >> r) | (x << (32 - r)) - return __funnelshift_r(x, x, r); -} - -__device__ inline uint64_t rotate_bits_right(uint64_t x, uint32_t r) -{ - return (x >> r) | (x << (64 - r)); -} - -// Swap the endianness of a 32 bit value -__device__ inline uint32_t swap_endian(uint32_t x) -{ - // The selector 0x0123 reverses the byte order - return __byte_perm(x, 0, 0x0123); -} - -// Swap the endianness of a 64 bit value -// There is no CUDA intrinsic for permuting bytes in 64 bit integers -__device__ inline uint64_t swap_endian(uint64_t x) -{ - // Reverse the endianness of each 32 bit section - uint32_t low_bits = swap_endian(static_cast(x)); - uint32_t high_bits = swap_endian(static_cast(x >> 32)); - // Reassemble a 64 bit result, swapping the low bits and high bits - return (static_cast(low_bits) << 32) | (static_cast(high_bits)); -}; - -template -struct hash_circular_buffer { - uint8_t storage[capacity]; - uint8_t* cur; - int available_space{capacity}; - hash_step_callable hash_step; - - __device__ inline hash_circular_buffer(hash_step_callable hash_step) - : cur{storage}, hash_step{hash_step} - { - } - - __device__ inline void put(uint8_t const* in, int size) - { - int copy_start = 0; - while (size >= available_space) { - // The buffer will be filled by this chunk of data. Copy a chunk of the - // data to fill the buffer and trigger a hash step. - memcpy(cur, in + copy_start, available_space); - hash_step(storage); - size -= available_space; - copy_start += available_space; - cur = storage; - available_space = capacity; - } - // The buffer will not be filled by the remaining data. That is, `size >= 0 - // && size < capacity`. We copy the remaining data into the buffer but do - // not trigger a hash step. - memcpy(cur, in + copy_start, size); - cur += size; - available_space -= size; - } - - __device__ inline void pad(int const space_to_leave) - { - if (space_to_leave > available_space) { - memset(cur, 0x00, available_space); - hash_step(storage); - cur = storage; - available_space = capacity; - } - memset(cur, 0x00, available_space - space_to_leave); - cur += available_space - space_to_leave; - available_space = space_to_leave; - } - - __device__ inline uint8_t const& operator[](int idx) const { return storage[idx]; } -}; - -// Get a uint8_t pointer to a column element and its size as a pair. -template -auto __device__ inline get_element_pointer_and_size(Element const& element) -{ - if constexpr (is_fixed_width() && !is_chrono()) { - return thrust::make_pair(reinterpret_cast(&element), sizeof(Element)); - } else { - CUDF_UNREACHABLE("Unsupported type."); - } -} - -template <> -auto __device__ inline get_element_pointer_and_size(string_view const& element) -{ - return thrust::make_pair(reinterpret_cast(element.data()), element.size_bytes()); -} - -/** - * Modified GPU implementation of - * https://johnnylee-sde.github.io/Fast-unsigned-integer-to-hex-string/ - * Copyright (c) 2015 Barry Clark - * Licensed under the MIT license. - * See file LICENSE for detail or copy at https://opensource.org/licenses/MIT - */ -void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destination) -{ - // Transform 0xABCD'1234 => 0x0000'ABCD'0000'1234 => 0x0B0A'0D0C'0201'0403 - uint64_t x = num; - x = ((x & 0xFFFF'0000u) << 16) | ((x & 0xFFFF)); - x = ((x & 0x000F'0000'000Fu) << 8) | ((x & 0x00F0'0000'00F0u) >> 4) | - ((x & 0x0F00'0000'0F00u) << 16) | ((x & 0xF000'0000'F000) << 4); - - // Calculate a mask of ascii value offsets for bytes that contain alphabetical hex digits - uint64_t offsets = (((x + 0x0606'0606'0606'0606) >> 4) & 0x0101'0101'0101'0101) * 0x27; - - x |= 0x3030'3030'3030'3030; - x += offsets; - std::memcpy(destination, reinterpret_cast(&x), 8); -} - -// MurmurHash3_32 implementation from -// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp -//----------------------------------------------------------------------------- -// MurmurHash3 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. -// Note - The x86 and x64 versions do _not_ produce the same results, as the -// algorithms are optimized for their respective platforms. You can still -// compile and run any of them on any platform, but your performance with the -// non-native version will be less than optimal. -template -struct MurmurHash3_32 { - using result_type = hash_value_type; - - constexpr MurmurHash3_32() = default; - constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {} - - [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const - { - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - return h; - } - - [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data, - cudf::size_type offset) const - { - // Read a 4-byte value from the data pointer as individual bytes for safe - // unaligned access (very likely for string types). - auto const block = reinterpret_cast(data + offset); - return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24); - } - - [[nodiscard]] result_type __device__ inline operator()(Key const& key) const - { - return compute(detail::normalize_nans_and_zeros(key)); - } - - template - result_type __device__ inline compute(T const& key) const - { - return compute_bytes(reinterpret_cast(&key), sizeof(T)); - } - - result_type __device__ inline compute_remaining_bytes(std::byte const* data, - cudf::size_type len, - cudf::size_type tail_offset, - result_type h) const - { - // Process remaining bytes that do not fill a four-byte chunk. - uint32_t k1 = 0; - switch (len % 4) { - case 3: k1 ^= std::to_integer(data[tail_offset + 2]) << 16; [[fallthrough]]; - case 2: k1 ^= std::to_integer(data[tail_offset + 1]) << 8; [[fallthrough]]; - case 1: - k1 ^= std::to_integer(data[tail_offset]); - k1 *= c1; - k1 = cudf::detail::rotate_bits_left(k1, rot_c1); - k1 *= c2; - h ^= k1; - }; - return h; - } - - result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const - { - constexpr cudf::size_type BLOCK_SIZE = 4; - cudf::size_type const nblocks = len / BLOCK_SIZE; - cudf::size_type const tail_offset = nblocks * BLOCK_SIZE; - result_type h = m_seed; - - // Process all four-byte chunks. - for (cudf::size_type i = 0; i < nblocks; i++) { - uint32_t k1 = getblock32(data, i * BLOCK_SIZE); - k1 *= c1; - k1 = cudf::detail::rotate_bits_left(k1, rot_c1); - k1 *= c2; - h ^= k1; - h = cudf::detail::rotate_bits_left(h, rot_c2); - h = h * 5 + c3; - } - - h = compute_remaining_bytes(data, len, tail_offset, h); - - // Finalize hash. - h ^= len; - h = fmix32(h); - return h; - } - - private: - uint32_t m_seed{cudf::DEFAULT_HASH_SEED}; - static constexpr uint32_t c1 = 0xcc9e2d51; - static constexpr uint32_t c2 = 0x1b873593; - static constexpr uint32_t c3 = 0xe6546b64; - static constexpr uint32_t rot_c1 = 15; - static constexpr uint32_t rot_c2 = 13; -}; - -template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(bool const& key) const -{ - return compute(static_cast(key)); -} - -template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(float const& key) const -{ - return compute(detail::normalize_nans_and_zeros(key)); -} - -template <> -hash_value_type __device__ inline MurmurHash3_32::operator()(double const& key) const -{ - return compute(detail::normalize_nans_and_zeros(key)); -} - -template <> -hash_value_type __device__ inline MurmurHash3_32::operator()( - cudf::string_view const& key) const -{ - auto const data = reinterpret_cast(key.data()); - auto const len = key.size_bytes(); - return compute_bytes(data, len); -} - -template <> -hash_value_type __device__ inline MurmurHash3_32::operator()( - numeric::decimal32 const& key) const -{ - return compute(key.value()); -} - -template <> -hash_value_type __device__ inline MurmurHash3_32::operator()( - numeric::decimal64 const& key) const -{ - return compute(key.value()); -} - -template <> -hash_value_type __device__ inline MurmurHash3_32::operator()( - numeric::decimal128 const& key) const -{ - return compute(key.value()); -} - -template <> -hash_value_type __device__ inline MurmurHash3_32::operator()( - cudf::list_view const& key) const -{ - CUDF_UNREACHABLE("List column hashing is not supported"); -} - -template <> -hash_value_type __device__ inline MurmurHash3_32::operator()( - cudf::struct_view const& key) const -{ - CUDF_UNREACHABLE("Direct hashing of struct_view is not supported"); -} - -/** - * @brief This hash function simply returns the value that is asked to be hash - * reinterpreted as the result_type of the functor. - */ -template -struct IdentityHash { - using result_type = hash_value_type; - IdentityHash() = default; - constexpr IdentityHash(uint32_t seed) : m_seed(seed) {} - - template - constexpr std::enable_if_t, return_type> operator()( - Key const& key) const - { - CUDF_UNREACHABLE("IdentityHash does not support this data type"); - } - - template - constexpr std::enable_if_t, return_type> operator()( - Key const& key) const - { - return static_cast(key); - } - - private: - uint32_t m_seed{cudf::DEFAULT_HASH_SEED}; -}; - -template -using default_hash = MurmurHash3_32; - -} // namespace detail -} // namespace cudf diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index a8f5652c384..67f52d517cc 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -86,7 +86,7 @@ namespace hashing { * * @returns A column where each row is the hash of a row from the input */ -std::unique_ptr murmur_hash3_32( +std::unique_ptr murmurhash3_x86_32( table_view const& input, uint32_t seed = DEFAULT_HASH_SEED, rmm::cuda_stream_view stream = cudf::get_default_stream(), @@ -95,7 +95,7 @@ std::unique_ptr murmur_hash3_32( /** * @brief Computes the MurmurHash3 32-bit of each row in the given table * - * This function computes the hash similar to MurmurHash3_32 with special processing + * This function computes the hash similar to MurmurHash3_x86_32 with special processing * to match Spark's implementation results. * * @param input The table of columns to hash @@ -105,7 +105,7 @@ std::unique_ptr murmur_hash3_32( * * @returns A column where each row is the hash of a row from the input */ -std::unique_ptr spark_murmur_hash3_32( +std::unique_ptr spark_murmurhash3_x86_32( table_view const& input, uint32_t seed = DEFAULT_HASH_SEED, rmm::cuda_stream_view stream = cudf::get_default_stream(), diff --git a/cpp/include/cudf/hashing/detail/default_hash.cuh b/cpp/include/cudf/hashing/detail/default_hash.cuh new file mode 100644 index 00000000000..37e13d8842f --- /dev/null +++ b/cpp/include/cudf/hashing/detail/default_hash.cuh @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf::hashing::detail { + +/** + * @brief The default hash algorithm for use within libcudf internal functions + * + * This is declared here so it may be changed to another algorithm without modifying + * all those places that use it. Internal function implementations are encourage to + * use the `cudf::hashing::detail::default_hash` where possible. + * + * @tparam Key The key type for use by the hash class + */ +template +using default_hash = MurmurHash3_x86_32; + +} // namespace cudf::hashing::detail diff --git a/cpp/include/cudf/hashing/detail/hash_functions.cuh b/cpp/include/cudf/hashing/detail/hash_functions.cuh new file mode 100644 index 00000000000..f681bef6648 --- /dev/null +++ b/cpp/include/cudf/hashing/detail/hash_functions.cuh @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace cudf::hashing::detail { + +/** + * Normalization of floating point NaNs, passthrough for all other values. + */ +template +T __device__ inline normalize_nans(T const& key) +{ + if constexpr (cudf::is_floating_point()) { + if (std::isnan(key)) { return std::numeric_limits::quiet_NaN(); } + } + return key; +} + +/** + * Normalization of floating point NaNs and zeros, passthrough for all other values. + */ +template +T __device__ inline normalize_nans_and_zeros(T const& key) +{ + if constexpr (cudf::is_floating_point()) { + if (key == T{0.0}) { return T{0.0}; } + } + return normalize_nans(key); +} + +__device__ inline uint32_t rotate_bits_left(uint32_t x, uint32_t r) +{ + // This function is equivalent to (x << r) | (x >> (32 - r)) + return __funnelshift_l(x, x, r); +} + +__device__ inline uint32_t rotate_bits_right(uint32_t x, uint32_t r) +{ + // This function is equivalent to (x >> r) | (x << (32 - r)) + return __funnelshift_r(x, x, r); +} + +__device__ inline uint64_t rotate_bits_right(uint64_t x, uint32_t r) +{ + return (x >> r) | (x << (64 - r)); +} + +} // namespace cudf::hashing::detail diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/hashing/detail/hashing.hpp similarity index 74% rename from cpp/include/cudf/detail/hashing.hpp rename to cpp/include/cudf/hashing/detail/hashing.hpp index 0447384ffdc..94d6dfe2c39 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/hashing/detail/hashing.hpp @@ -27,15 +27,15 @@ namespace cudf { namespace hashing { namespace detail { -std::unique_ptr murmur_hash3_32(table_view const& input, - uint32_t seed, - rmm::cuda_stream_view, - rmm::mr::device_memory_resource* mr); +std::unique_ptr murmurhash3_x86_32(table_view const& input, + uint32_t seed, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource* mr); -std::unique_ptr spark_murmur_hash3_32(table_view const& input, - uint32_t seed, - rmm::cuda_stream_view, - rmm::mr::device_memory_resource* mr); +std::unique_ptr spark_murmurhash3_x86_32(table_view const& input, + uint32_t seed, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource* mr); std::unique_ptr md5(table_view const& input, rmm::cuda_stream_view stream, @@ -84,17 +84,6 @@ constexpr std::size_t hash_combine(std::size_t lhs, std::size_t rhs) } } // namespace detail - -/** - * @copydoc cudf::hash - * - * @param stream CUDA stream used for device memory operations and kernel launches. - */ -std::unique_ptr hash(table_view const& input, - hash_id hash_function, - uint32_t seed, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); } // namespace hashing } // namespace cudf diff --git a/cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh b/cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh new file mode 100644 index 00000000000..6cf0b0fe817 --- /dev/null +++ b/cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2017-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace cudf::hashing::detail { + +// MurmurHash3_x86_32 implementation from +// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. +template +struct MurmurHash3_x86_32 { + using result_type = hash_value_type; + + constexpr MurmurHash3_x86_32() = default; + constexpr MurmurHash3_x86_32(uint32_t seed) : m_seed(seed) {} + + [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const + { + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; + } + + [[nodiscard]] __device__ inline uint32_t getblock32(std::byte const* data, + cudf::size_type offset) const + { + // Read a 4-byte value from the data pointer as individual bytes for safe + // unaligned access (very likely for string types). + auto const block = reinterpret_cast(data + offset); + return block[0] | (block[1] << 8) | (block[2] << 16) | (block[3] << 24); + } + + [[nodiscard]] result_type __device__ inline operator()(Key const& key) const + { + return compute(normalize_nans_and_zeros(key)); + } + + template + result_type __device__ inline compute(T const& key) const + { + return compute_bytes(reinterpret_cast(&key), sizeof(T)); + } + + result_type __device__ inline compute_remaining_bytes(std::byte const* data, + cudf::size_type len, + cudf::size_type tail_offset, + result_type h) const + { + // Process remaining bytes that do not fill a four-byte chunk. + uint32_t k1 = 0; + switch (len % 4) { + case 3: k1 ^= std::to_integer(data[tail_offset + 2]) << 16; [[fallthrough]]; + case 2: k1 ^= std::to_integer(data[tail_offset + 1]) << 8; [[fallthrough]]; + case 1: + k1 ^= std::to_integer(data[tail_offset]); + k1 *= c1; + k1 = rotate_bits_left(k1, rot_c1); + k1 *= c2; + h ^= k1; + }; + return h; + } + + result_type __device__ compute_bytes(std::byte const* data, cudf::size_type const len) const + { + constexpr cudf::size_type BLOCK_SIZE = 4; + cudf::size_type const nblocks = len / BLOCK_SIZE; + cudf::size_type const tail_offset = nblocks * BLOCK_SIZE; + result_type h = m_seed; + + // Process all four-byte chunks. + for (cudf::size_type i = 0; i < nblocks; i++) { + uint32_t k1 = getblock32(data, i * BLOCK_SIZE); + k1 *= c1; + k1 = rotate_bits_left(k1, rot_c1); + k1 *= c2; + h ^= k1; + h = rotate_bits_left(h, rot_c2); + h = h * 5 + c3; + } + + h = compute_remaining_bytes(data, len, tail_offset, h); + + // Finalize hash. + h ^= len; + h = fmix32(h); + return h; + } + + private: + uint32_t m_seed{cudf::DEFAULT_HASH_SEED}; + static constexpr uint32_t c1 = 0xcc9e2d51; + static constexpr uint32_t c2 = 0x1b873593; + static constexpr uint32_t c3 = 0xe6546b64; + static constexpr uint32_t rot_c1 = 15; + static constexpr uint32_t rot_c2 = 13; +}; + +template <> +hash_value_type __device__ inline MurmurHash3_x86_32::operator()(bool const& key) const +{ + return compute(static_cast(key)); +} + +template <> +hash_value_type __device__ inline MurmurHash3_x86_32::operator()(float const& key) const +{ + return compute(normalize_nans_and_zeros(key)); +} + +template <> +hash_value_type __device__ inline MurmurHash3_x86_32::operator()(double const& key) const +{ + return compute(normalize_nans_and_zeros(key)); +} + +template <> +hash_value_type __device__ inline MurmurHash3_x86_32::operator()( + cudf::string_view const& key) const +{ + auto const data = reinterpret_cast(key.data()); + auto const len = key.size_bytes(); + return compute_bytes(data, len); +} + +template <> +hash_value_type __device__ inline MurmurHash3_x86_32::operator()( + numeric::decimal32 const& key) const +{ + return compute(key.value()); +} + +template <> +hash_value_type __device__ inline MurmurHash3_x86_32::operator()( + numeric::decimal64 const& key) const +{ + return compute(key.value()); +} + +template <> +hash_value_type __device__ inline MurmurHash3_x86_32::operator()( + numeric::decimal128 const& key) const +{ + return compute(key.value()); +} + +template <> +hash_value_type __device__ inline MurmurHash3_x86_32::operator()( + cudf::list_view const& key) const +{ + CUDF_UNREACHABLE("List column hashing is not supported"); +} + +template <> +hash_value_type __device__ inline MurmurHash3_x86_32::operator()( + cudf::struct_view const& key) const +{ + CUDF_UNREACHABLE("Direct hashing of struct_view is not supported"); +} + +} // namespace cudf::hashing::detail diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index 61f8c13bb77..6c50e1d5998 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -34,10 +34,11 @@ namespace cudf { // forward declaration -namespace detail { +namespace hashing::detail { template -class MurmurHash3_32; - +class MurmurHash3_x86_32; +} // namespace hashing::detail +namespace detail { template class hash_join; } // namespace detail @@ -272,7 +273,7 @@ enum class nullable_join : bool { YES, NO }; class hash_join { public: using impl_type = typename cudf::detail::hash_join< - cudf::detail::MurmurHash3_32>; ///< Implementation type + cudf::hashing::detail::MurmurHash3_x86_32>; ///< Implementation type hash_join() = delete; ~hash_join(); diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index ce6dd024622..5fe9dcbdf1b 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -17,11 +17,11 @@ #pragma once #include -#include #include #include #include -#include +#include +#include #include #include #include @@ -1942,7 +1942,7 @@ class row_hasher { * @param seed The seed to use for the hash function * @return A hash operator to use on the device */ - template