Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test updates of CCCL (thrust, cub, libcudacxx) to 2.1.0. #3516

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ rapids_cpm_init()
###

# Putting this before raft to override RAFT from pulling them in.
include(cmake/thirdparty/get_libcudacxx.cmake)
include(cmake/thirdparty/get_thrust.cmake)
include(${rapids-cmake-dir}/cpm/cuco.cmake)
rapids_cpm_cuco(BUILD_EXPORT_SET cugraph-exports INSTALL_EXPORT_SET cugraph-exports)

Expand Down
24 changes: 24 additions & 0 deletions cpp/cmake/thirdparty/get_thrust.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# =============================================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

# Use CPM to find or clone thrust
function(find_and_configure_thrust)
include(${rapids-cmake-dir}/cpm/thrust.cmake)

rapids_cpm_thrust( NAMESPACE cugraph
BUILD_EXPORT_SET cugraph-exports
INSTALL_EXPORT_SET cugraph-exports)
endfunction()

find_and_configure_thrust()
5 changes: 4 additions & 1 deletion cpp/include/cugraph/utilities/misc_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include <thrust/iterator/transform_iterator.h>
#include <thrust/optional.h>

#include <cuda/functional>

#include <optional>
#include <tuple>
#include <vector>
Expand All @@ -43,7 +45,8 @@ std::tuple<std::vector<vertex_t>, std::vector<edge_t>> compute_offset_aligned_ed
{
auto search_offset_first = thrust::make_transform_iterator(
thrust::make_counting_iterator(size_t{1}),
[approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; });
cuda::proclaim_return_type<size_t>(
[approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; }));
auto num_chunks = (num_edges + approx_edge_chunk_size - 1) / approx_edge_chunk_size;

if (num_chunks > 1) {
Expand Down
48 changes: 28 additions & 20 deletions cpp/include/cugraph/utilities/shuffle_comm.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
#include <thrust/tabulate.h>
#include <thrust/tuple.h>

#include <cuda/functional>

#include <algorithm>
#include <numeric>
#include <vector>
Expand Down Expand Up @@ -198,12 +200,13 @@ void multi_partition(ValueIterator value_first,
value_last,
thrust::make_zip_iterator(
thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())),
[value_to_group_id_op, group_first, counts = counts.data()] __device__(auto value) {
auto group_id = value_to_group_id_op(value);
cuda::std::atomic_ref<size_t> counter(counts[group_id - group_first]);
return thrust::make_tuple(group_id,
counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed));
});
cuda::proclaim_return_type<thrust::tuple<int, size_t>>(
[value_to_group_id_op, group_first, counts = counts.data()] __device__(auto value) {
auto group_id = value_to_group_id_op(value);
cuda::std::atomic_ref<size_t> counter(counts[group_id - group_first]);
return thrust::make_tuple(group_id,
counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed));
}));

rmm::device_uvector<size_t> displacements(num_groups, stream_view);
thrust::exclusive_scan(
Expand Down Expand Up @@ -246,17 +249,19 @@ void multi_partition(KeyIterator key_first,
rmm::device_uvector<int> group_ids(num_keys, stream_view);
rmm::device_uvector<size_t> intra_partition_offsets(num_keys, stream_view);
thrust::fill(rmm::exec_policy(stream_view), counts.begin(), counts.end(), size_t{0});
thrust::transform(rmm::exec_policy(stream_view),
key_first,
key_last,
thrust::make_zip_iterator(
thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())),
[key_to_group_id_op, group_first, counts = counts.data()] __device__(auto key) {
auto group_id = key_to_group_id_op(key);
cuda::std::atomic_ref<size_t> counter(counts[group_id - group_first]);
return thrust::make_tuple(
group_id, counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed));
});
thrust::transform(
rmm::exec_policy(stream_view),
key_first,
key_last,
thrust::make_zip_iterator(
thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())),
cuda::proclaim_return_type<thrust::tuple<int, size_t>>(
[key_to_group_id_op, group_first, counts = counts.data()] __device__(auto key) {
auto group_id = key_to_group_id_op(key);
cuda::std::atomic_ref<size_t> counter(counts[group_id - group_first]);
return thrust::make_tuple(group_id,
counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed));
}));

rmm::device_uvector<size_t> displacements(num_groups, stream_view);
thrust::exclusive_scan(
Expand Down Expand Up @@ -762,8 +767,9 @@ rmm::device_uvector<size_t> groupby_and_count(ValueIterator tx_value_first /* [I
stream_view);

auto group_id_first = thrust::make_transform_iterator(
tx_value_first,
[value_to_group_id_op] __device__(auto value) { return value_to_group_id_op(value); });
tx_value_first, cuda::proclaim_return_type<int>([value_to_group_id_op] __device__(auto value) {
return value_to_group_id_op(value);
}));
rmm::device_uvector<int> d_tx_dst_ranks(num_groups, stream_view);
rmm::device_uvector<size_t> d_tx_value_counts(d_tx_dst_ranks.size(), stream_view);
auto rank_count_pair_first = thrust::make_zip_iterator(
Expand Down Expand Up @@ -796,7 +802,9 @@ rmm::device_uvector<size_t> groupby_and_count(VertexIterator tx_key_first /* [IN
stream_view);

auto group_id_first = thrust::make_transform_iterator(
tx_key_first, [key_to_group_id_op] __device__(auto key) { return key_to_group_id_op(key); });
tx_key_first, cuda::proclaim_return_type<int>([key_to_group_id_op] __device__(auto key) {
return key_to_group_id_op(key);
}));
rmm::device_uvector<int> d_tx_dst_ranks(num_groups, stream_view);
rmm::device_uvector<size_t> d_tx_value_counts(d_tx_dst_ranks.size(), stream_view);
auto rank_count_pair_first = thrust::make_zip_iterator(
Expand Down
16 changes: 9 additions & 7 deletions cpp/src/community/detail/mis_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
#include <thrust/transform.h>
#include <thrust/transform_reduce.h>

#include <cuda/functional>

#include <cmath>

namespace cugraph {
Expand Down Expand Up @@ -78,13 +80,13 @@ rmm::device_uvector<vertex_t> maximal_independent_set(
thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, ranks.begin());

// Set ranks of zero out-degree vetices to std::numeric_limits<vertex_t>::lowest()
thrust::transform_if(
handle.get_thrust_policy(),
out_degrees.begin(),
out_degrees.end(),
ranks.begin(),
[] __device__(auto) { return std::numeric_limits<vertex_t>::lowest(); },
[] __device__(auto deg) { return deg == 0; });
thrust::transform_if(handle.get_thrust_policy(),
out_degrees.begin(),
out_degrees.end(),
ranks.begin(),
cuda::proclaim_return_type<vertex_t>(
[] __device__(auto) { return std::numeric_limits<vertex_t>::lowest(); }),
[] __device__(auto deg) { return deg == 0; });

out_degrees.resize(0, handle.get_stream());
out_degrees.shrink_to_fit(handle.get_stream());
Expand Down
34 changes: 19 additions & 15 deletions cpp/src/community/detail/refine_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
#include <thrust/transform_reduce.h>
#include <thrust/tuple.h>

#include <cuda/functional>

CUCO_DECLARE_BITWISE_COMPARABLE(float)
CUCO_DECLARE_BITWISE_COMPARABLE(double)
// FIXME: a temporary workaround for a compiler error, should be deleted once cuco gets patched.
Expand Down Expand Up @@ -236,16 +238,17 @@ refine_clustering(
weighted_degree_of_vertices.end(),
vertex_louvain_cluster_weights.end()));

thrust::transform(handle.get_thrust_policy(),
wcut_deg_and_cluster_vol_triple_begin,
wcut_deg_and_cluster_vol_triple_end,
singleton_and_connected_flags.begin(),
[resolution] __device__(auto wcut_wdeg_and_louvain_volume) {
auto wcut = thrust::get<0>(wcut_wdeg_and_louvain_volume);
auto wdeg = thrust::get<1>(wcut_wdeg_and_louvain_volume);
auto louvain_volume = thrust::get<2>(wcut_wdeg_and_louvain_volume);
return wcut > (resolution * wdeg * (louvain_volume - wdeg));
});
thrust::transform(
handle.get_thrust_policy(),
wcut_deg_and_cluster_vol_triple_begin,
wcut_deg_and_cluster_vol_triple_end,
singleton_and_connected_flags.begin(),
cuda::proclaim_return_type<uint8_t>([resolution] __device__(auto wcut_wdeg_and_louvain_volume) {
auto wcut = thrust::get<0>(wcut_wdeg_and_louvain_volume);
auto wdeg = thrust::get<1>(wcut_wdeg_and_louvain_volume);
auto louvain_volume = thrust::get<2>(wcut_wdeg_and_louvain_volume);
return (wcut > (resolution * wdeg * (louvain_volume - wdeg))) ? uint8_t{1} : uint8_t{0};
}));

edge_src_property_t<GraphViewType, weight_t> src_louvain_cluster_weight_cache(handle);
edge_src_property_t<GraphViewType, weight_t> src_cut_to_louvain_cache(handle);
Expand Down Expand Up @@ -714,11 +717,12 @@ refine_clustering(
vertices_in_mis.begin(),
vertices_in_mis.end(),
dst_vertices.begin(),
[dst_first = thrust::get<1>(gain_and_dst_first.get_iterator_tuple()),
v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) {
auto dst = *(dst_first + v - v_first);
return dst;
});
cuda::proclaim_return_type<vertex_t>(
[dst_first = thrust::get<1>(gain_and_dst_first.get_iterator_tuple()),
v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) {
auto dst = *(dst_first + v - v_first);
return dst;
}));

cugraph::resize_dataframe_buffer(gain_and_dst_output_pairs, 0, handle.get_stream());
cugraph::shrink_to_fit_dataframe_buffer(gain_and_dst_output_pairs, handle.get_stream());
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/detail/collect_local_vertex_values.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <cugraph/graph_functions.hpp>
#include <cugraph/utilities/shuffle_comm.cuh>

#include <cuda/functional>

namespace cugraph {
namespace detail {

Expand Down Expand Up @@ -64,7 +66,8 @@ rmm::device_uvector<value_t> collect_local_vertex_values_from_ext_vertex_value_p

auto vertex_iterator = thrust::make_transform_iterator(
d_vertices.begin(),
[local_vertex_first] __device__(vertex_t v) { return v - local_vertex_first; });
cuda::proclaim_return_type<vertex_t>(
[local_vertex_first] __device__(vertex_t v) { return v - local_vertex_first; }));

d_local_values.resize(local_vertex_last - local_vertex_first, handle.get_stream());
thrust::fill(
Expand Down
24 changes: 14 additions & 10 deletions cpp/src/generators/erdos_renyi_generator.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,6 +28,8 @@
#include <thrust/transform.h>
#include <thrust/tuple.h>

#include <cuda/functional>

namespace cugraph {

template <typename vertex_t>
Expand All @@ -42,12 +44,13 @@ generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle,
"Implementation cannot support specified value");

auto random_iterator = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_t>(0), [seed] __device__(size_t index) {
thrust::make_counting_iterator<size_t>(0),
cuda::proclaim_return_type<float>([seed] __device__(size_t index) {
thrust::default_random_engine rng(seed);
thrust::uniform_real_distribution<float> dist(0.0, 1.0);
rng.discard(index);
return dist(rng);
});
}));

size_t count = thrust::count_if(handle.get_thrust_policy(),
random_iterator,
Expand All @@ -69,13 +72,14 @@ generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle,
indices_v.begin(),
indices_v.end(),
thrust::make_zip_iterator(thrust::make_tuple(src_v.begin(), src_v.end())),
[num_vertices] __device__(size_t index) {
size_t src = index / num_vertices;
size_t dst = index % num_vertices;

return thrust::make_tuple(static_cast<vertex_t>(src),
static_cast<vertex_t>(dst));
});
cuda::proclaim_return_type<thrust::tuple<vertex_t, vertex_t>>(
[num_vertices] __device__(size_t index) {
size_t src = index / num_vertices;
size_t dst = index % num_vertices;

return thrust::make_tuple(static_cast<vertex_t>(src),
static_cast<vertex_t>(dst));
}));

handle.sync_stream();

Expand Down
39 changes: 21 additions & 18 deletions cpp/src/generators/simple_generators.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,6 +27,8 @@
#include <thrust/sequence.h>
#include <thrust/tuple.h>

#include <cuda/functional>

#include <numeric>

namespace cugraph {
Expand Down Expand Up @@ -264,23 +266,24 @@ generate_complete_graph_edgelist(

auto transform_iter = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_t>(0),
[base_vertex_id, num_vertices, invalid_vertex] __device__(size_t index) {
size_t graph_index = index / (num_vertices * num_vertices);
size_t local_index = index % (num_vertices * num_vertices);

vertex_t src = base_vertex_id + static_cast<vertex_t>(local_index / num_vertices);
vertex_t dst = base_vertex_id + static_cast<vertex_t>(local_index % num_vertices);

if (src == dst) {
src = invalid_vertex;
dst = invalid_vertex;
} else {
src += (graph_index * num_vertices);
dst += (graph_index * num_vertices);
}

return thrust::make_tuple(src, dst);
});
cuda::proclaim_return_type<thrust::tuple<vertex_t, vertex_t>>(
[base_vertex_id, num_vertices, invalid_vertex] __device__(size_t index) {
size_t graph_index = index / (num_vertices * num_vertices);
size_t local_index = index % (num_vertices * num_vertices);

vertex_t src = base_vertex_id + static_cast<vertex_t>(local_index / num_vertices);
vertex_t dst = base_vertex_id + static_cast<vertex_t>(local_index % num_vertices);

if (src == dst) {
src = invalid_vertex;
dst = invalid_vertex;
} else {
src += (graph_index * num_vertices);
dst += (graph_index * num_vertices);
}

return thrust::make_tuple(src, dst);
}));

output_iterator = thrust::copy_if(handle.get_thrust_policy(),
transform_iter,
Expand Down
12 changes: 8 additions & 4 deletions cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
#include <thrust/tuple.h>
#include <thrust/unique.h>

#include <cuda/functional>

#include <optional>
#include <tuple>

Expand Down Expand Up @@ -596,8 +598,9 @@ rmm::device_uvector<edge_t> get_sampling_index_without_replacement(
multiplier_t<size_t>{high_partition_over_sampling_K}),
thrust::make_transform_iterator(
thrust::make_counting_iterator(size_t{0}),
[high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__(
size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; }),
cuda::proclaim_return_type<size_t>(
[high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__(
size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; })),
handle.get_stream());
if (tmp_storage_bytes > d_tmp_storage.size()) {
d_tmp_storage = rmm::device_uvector<std::byte>(tmp_storage_bytes, handle.get_stream());
Expand All @@ -615,8 +618,9 @@ rmm::device_uvector<edge_t> get_sampling_index_without_replacement(
multiplier_t<size_t>{high_partition_over_sampling_K}),
thrust::make_transform_iterator(
thrust::make_counting_iterator(size_t{0}),
[high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__(
size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; }),
cuda::proclaim_return_type<size_t>(
[high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__(
size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; })),
handle.get_stream());

// copy the neighbor indices back to sample_nbr_indices
Expand Down
Loading
Loading