From 3ba63c3c3cb72950adc4c9699fcfa1a72796a041 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 15 Feb 2024 10:50:54 -0800 Subject: [PATCH] Update cudf for compatibility with the latest cuco (#14849) Depends on https://github.com/rapidsai/rapids-cmake/pull/526 CMakes changes will be reverted once https://github.com/rapidsai/rapids-cmake/pull/526 is merged. This PR updates libcudf to make it compatible with the latest cuco. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - David Wendt (https://github.com/davidwendt) - Vyas Ramasubramani (https://github.com/vyasr) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/14849 --- .../cudf/detail/hash_reduce_by_row.cuh | 4 +- cpp/include/cudf/detail/join.hpp | 2 +- cpp/src/io/json/json_tree.cu | 32 ++++++++------- cpp/src/io/orc/orc_gpu.hpp | 4 +- cpp/src/io/parquet/parquet_gpu.cuh | 4 +- cpp/src/join/join_common_utils.hpp | 13 ++++--- cpp/src/search/contains_table.cu | 19 ++++----- cpp/src/stream_compaction/distinct_count.cu | 15 +++---- .../stream_compaction_common.hpp | 4 +- cpp/src/text/bpe/byte_pair_encoding.cuh | 39 ++++++++++--------- cpp/src/text/bpe/load_merge_pairs.cu | 4 ++ cpp/src/text/vocabulary_tokenize.cu | 22 ++++++----- 12 files changed, 89 insertions(+), 73 deletions(-) diff --git a/cpp/include/cudf/detail/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh index 006cb5142c9..a740b5c4e93 100644 --- a/cpp/include/cudf/detail/hash_reduce_by_row.cuh +++ b/cpp/include/cudf/detail/hash_reduce_by_row.cuh @@ -31,8 +31,8 @@ namespace cudf::detail { -using hash_map_type = - cuco::static_map; +using hash_map_type = cuco::legacy:: + static_map; /** * @brief The base struct for customized reduction functor to perform reduce-by-key with keys are diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index ad6269dae30..27d14874bce 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -62,7 +62,7 @@ struct hash_join { cudf::size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator, - cuco::double_hashing>; + cuco::legacy::double_hashing>; hash_join() = delete; ~hash_join() = default; diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu index db9daf28c06..148aeb5ec7a 100644 --- a/cpp/src/io/json/json_tree.cu +++ b/cpp/src/io/json/json_tree.cu @@ -548,13 +548,14 @@ rmm::device_uvector hash_node_type_with_field_name(device_span{d_hasher}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; + cuco::static_set{cuco::extent{compute_hash_table_size(num_fields, 40)}, // 40% occupancy + cuco::empty_key{empty_node_index_sentinel}, + d_equal, + cuco::linear_probing<1, hasher_type>{d_hasher}, + {}, + {}, + cudf::detail::cuco_allocator{stream}, + stream.value()}; key_set.insert_if_async(iter, iter + num_nodes, thrust::counting_iterator(0), // stencil @@ -562,7 +563,7 @@ rmm::device_uvector hash_node_type_with_field_name(device_span size_type { + [key_set = key_set.ref(cuco::op::find)] __device__(auto node_id) -> size_type { auto const it = key_set.find(node_id); return (it == key_set.end()) ? size_type{0} : *it; }; @@ -735,13 +736,14 @@ std::pair, rmm::device_uvector> hash_n constexpr size_type empty_node_index_sentinel = -1; using hasher_type = decltype(d_hashed_cache); - auto key_set = cuco::experimental::static_set{ - cuco::experimental::extent{compute_hash_table_size(num_nodes)}, - cuco::empty_key{empty_node_index_sentinel}, - d_equal, - cuco::experimental::linear_probing<1, hasher_type>{d_hashed_cache}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; + auto key_set = cuco::static_set{cuco::extent{compute_hash_table_size(num_nodes)}, + cuco::empty_key{empty_node_index_sentinel}, + d_equal, + cuco::linear_probing<1, hasher_type>{d_hashed_cache}, + {}, + {}, + cudf::detail::cuco_allocator{stream}, + stream.value()}; // insert and convert node ids to unique set ids auto nodes_itr = thrust::make_counting_iterator(0); diff --git a/cpp/src/io/orc/orc_gpu.hpp b/cpp/src/io/orc/orc_gpu.hpp index 243704b65d4..c2570d71c24 100644 --- a/cpp/src/io/orc/orc_gpu.hpp +++ b/cpp/src/io/orc/orc_gpu.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,7 +44,7 @@ using cudf::detail::host_2dspan; auto constexpr KEY_SENTINEL = size_type{-1}; auto constexpr VALUE_SENTINEL = size_type{-1}; -using map_type = cuco::static_map; +using map_type = cuco::legacy::static_map; /** * @brief The alias of `map_type::pair_atomic_type` class. diff --git a/cpp/src/io/parquet/parquet_gpu.cuh b/cpp/src/io/parquet/parquet_gpu.cuh index 10e12ebb782..e3c44c78898 100644 --- a/cpp/src/io/parquet/parquet_gpu.cuh +++ b/cpp/src/io/parquet/parquet_gpu.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ namespace cudf::io::parquet::detail { auto constexpr KEY_SENTINEL = size_type{-1}; auto constexpr VALUE_SENTINEL = size_type{-1}; -using map_type = cuco::static_map; +using map_type = cuco::legacy::static_map; /** * @brief The alias of `map_type::pair_atomic_type` class. diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp index b88a4fdef58..4d361b23502 100644 --- a/cpp/src/join/join_common_utils.hpp +++ b/cpp/src/join/join_common_utils.hpp @@ -45,13 +45,14 @@ using multimap_type = cudf::hash_join::impl_type::map_type; // Multimap type used for mixed joins. TODO: This is a temporary alias used // until the mixed joins are converted to using CGs properly. Right now it's // using a cooperative group of size 1. -using mixed_multimap_type = cuco::static_multimap>; +using mixed_multimap_type = + cuco::static_multimap>; -using semi_map_type = cuco:: +using semi_map_type = cuco::legacy:: static_map; using row_hash_legacy = diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index ce069abcb78..e1d0fab6025 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -158,9 +158,9 @@ void dispatch_nan_comparator( // Distinguish probing scheme CG sizes between nested and flat types for better performance auto const probing_scheme = [&]() { if constexpr (HasNested) { - return cuco::experimental::linear_probing<4, Hasher>{d_hasher}; + return cuco::linear_probing<4, Hasher>{d_hasher}; } else { - return cuco::experimental::linear_probing<1, Hasher>{d_hasher}; + return cuco::linear_probing<1, Hasher>{d_hasher}; } }(); @@ -228,13 +228,14 @@ rmm::device_uvector contains(table_view const& haystack, [&](auto const& d_self_equal, auto const& d_two_table_equal, auto const& probing_scheme) { auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal}; - auto set = cuco::experimental::static_set{ - cuco::experimental::extent{compute_hash_table_size(haystack.num_rows())}, - cuco::empty_key{lhs_index_type{-1}}, - d_equal, - probing_scheme, - cudf::detail::cuco_allocator{stream}, - stream.value()}; + auto set = cuco::static_set{cuco::extent{compute_hash_table_size(haystack.num_rows())}, + cuco::empty_key{lhs_index_type{-1}}, + d_equal, + probing_scheme, + {}, + {}, + cudf::detail::cuco_allocator{stream}, + stream.value()}; if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index 507bad777eb..3ec1be42bfe 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -141,13 +141,14 @@ cudf::size_type distinct_count(table_view const& keys, auto const comparator_helper = [&](auto const row_equal) { using hasher_type = decltype(hash_key); - auto key_set = - cuco::experimental::static_set{cuco::experimental::extent{compute_hash_table_size(num_rows)}, - cuco::empty_key{-1}, - row_equal, - cuco::experimental::linear_probing<1, hasher_type>{hash_key}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; + auto key_set = cuco::static_set{cuco::extent{compute_hash_table_size(num_rows)}, + cuco::empty_key{-1}, + row_equal, + cuco::linear_probing<1, hasher_type>{hash_key}, + {}, + {}, + cudf::detail::cuco_allocator{stream}, + stream.value()}; auto const iter = thrust::counting_iterator(0); // when nulls are equal, we skip hashing any row that has a null diff --git a/cpp/src/stream_compaction/stream_compaction_common.hpp b/cpp/src/stream_compaction/stream_compaction_common.hpp index ceb62d1d059..dd7d76168d9 100644 --- a/cpp/src/stream_compaction/stream_compaction_common.hpp +++ b/cpp/src/stream_compaction/stream_compaction_common.hpp @@ -27,8 +27,8 @@ namespace cudf { namespace detail { -using hash_map_type = - cuco::static_map; +using hash_map_type = cuco::legacy:: + static_map; } // namespace detail } // namespace cudf diff --git a/cpp/src/text/bpe/byte_pair_encoding.cuh b/cpp/src/text/bpe/byte_pair_encoding.cuh index 1a3f8eadea0..02a8a6c4d0a 100644 --- a/cpp/src/text/bpe/byte_pair_encoding.cuh +++ b/cpp/src/text/bpe/byte_pair_encoding.cuh @@ -44,6 +44,7 @@ namespace detail { using string_hasher_type = cudf::hashing::detail::MurmurHash3_x86_32; using hash_value_type = string_hasher_type::result_type; using merge_pair_type = thrust::pair; +using cuco_storage = cuco::storage<1>; /** * @brief Hasher function used for building and using the cuco static-map @@ -98,15 +99,16 @@ struct bpe_equal { } }; -using bpe_probe_scheme = cuco::experimental::linear_probing<1, bpe_hasher>; +using bpe_probe_scheme = cuco::linear_probing<1, bpe_hasher>; -using merge_pairs_map_type = cuco::experimental::static_map, - cuda::thread_scope_device, - bpe_equal, - bpe_probe_scheme, - cudf::detail::cuco_allocator>; +using merge_pairs_map_type = cuco::static_map, + cuda::thread_scope_device, + bpe_equal, + bpe_probe_scheme, + cudf::detail::cuco_allocator, + cuco_storage>; /** * @brief Hasher function used for building and using the cuco static-map @@ -155,15 +157,16 @@ struct mp_equal { } }; -using mp_probe_scheme = cuco::experimental::linear_probing<1, mp_hasher>; +using mp_probe_scheme = cuco::linear_probing<1, mp_hasher>; -using mp_table_map_type = cuco::experimental::static_map, - cuda::thread_scope_device, - mp_equal, - mp_probe_scheme, - cudf::detail::cuco_allocator>; +using mp_table_map_type = cuco::static_map, + cuda::thread_scope_device, + mp_equal, + mp_probe_scheme, + cudf::detail::cuco_allocator, + cuco_storage>; } // namespace detail @@ -185,8 +188,8 @@ struct bpe_merge_pairs::bpe_merge_pairs_impl { std::unique_ptr&& mp_table_map); auto const get_merge_pairs() const { return *d_merge_pairs; } - auto get_merge_pairs_ref() const { return merge_pairs_map->ref(cuco::experimental::op::find); } - auto get_mp_table_ref() const { return mp_table_map->ref(cuco::experimental::op::find); } + auto get_merge_pairs_ref() const { return merge_pairs_map->ref(cuco::op::find); } + auto get_mp_table_ref() const { return mp_table_map->ref(cuco::op::find); } }; } // namespace nvtext diff --git a/cpp/src/text/bpe/load_merge_pairs.cu b/cpp/src/text/bpe/load_merge_pairs.cu index 3b630886b3e..8da2d745966 100644 --- a/cpp/src/text/bpe/load_merge_pairs.cu +++ b/cpp/src/text/bpe/load_merge_pairs.cu @@ -48,6 +48,8 @@ std::unique_ptr initialize_merge_pairs_map( cuco::empty_value{-1}, bpe_equal{input}, bpe_probe_scheme{bpe_hasher{input}}, + cuco::thread_scope_device, + cuco_storage{}, cudf::detail::cuco_allocator{stream}, stream.value()); @@ -69,6 +71,8 @@ std::unique_ptr initialize_mp_table_map( cuco::empty_value{-1}, mp_equal{input}, mp_probe_scheme{mp_hasher{input}}, + cuco::thread_scope_device, + cuco_storage{}, cudf::detail::cuco_allocator{stream}, stream.value()); diff --git a/cpp/src/text/vocabulary_tokenize.cu b/cpp/src/text/vocabulary_tokenize.cu index c6e90c6fcaa..b6991e534bf 100644 --- a/cpp/src/text/vocabulary_tokenize.cu +++ b/cpp/src/text/vocabulary_tokenize.cu @@ -93,14 +93,16 @@ struct vocab_equal { } }; -using probe_scheme = cuco::experimental::linear_probing<1, vocab_hasher>; -using vocabulary_map_type = cuco::experimental::static_map, - cuda::thread_scope_device, - vocab_equal, - probe_scheme, - cudf::detail::cuco_allocator>; +using probe_scheme = cuco::linear_probing<1, vocab_hasher>; +using cuco_storage = cuco::storage<1>; +using vocabulary_map_type = cuco::static_map, + cuda::thread_scope_device, + vocab_equal, + probe_scheme, + cudf::detail::cuco_allocator, + cuco_storage>; } // namespace } // namespace detail @@ -115,7 +117,7 @@ struct tokenize_vocabulary::tokenize_vocabulary_impl { col_device_view const d_vocabulary; std::unique_ptr vocabulary_map; - auto get_map_ref() const { return vocabulary_map->ref(cuco::experimental::op::find); } + auto get_map_ref() const { return vocabulary_map->ref(cuco::op::find); } tokenize_vocabulary_impl(std::unique_ptr&& vocab, col_device_view&& d_vocab, @@ -149,6 +151,8 @@ tokenize_vocabulary::tokenize_vocabulary(cudf::strings_column_view const& input, cuco::empty_value{-1}, detail::vocab_equal{*d_vocabulary}, detail::probe_scheme{detail::vocab_hasher{*d_vocabulary}}, + cuco::thread_scope_device, + detail::cuco_storage{}, cudf::detail::cuco_allocator{stream}, stream.value());