Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update for CCCL 2.x #3862

Merged
merged 10 commits into from
Dec 8, 2023
5 changes: 4 additions & 1 deletion cpp/include/cugraph/utilities/misc_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#include <thrust/iterator/transform_iterator.h>
#include <thrust/optional.h>

#include <cuda/functional>

#include <optional>
#include <tuple>
#include <vector>
Expand All @@ -44,7 +46,8 @@ std::tuple<std::vector<vertex_t>, std::vector<edge_t>> compute_offset_aligned_ed
{
auto search_offset_first = thrust::make_transform_iterator(
thrust::make_counting_iterator(size_t{1}),
[approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; });
cuda::proclaim_return_type<size_t>(
[approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; }));
auto num_chunks = (num_edges + approx_edge_chunk_size - 1) / approx_edge_chunk_size;

if (num_chunks > 1) {
Expand Down
48 changes: 28 additions & 20 deletions cpp/include/cugraph/utilities/shuffle_comm.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
#include <thrust/tabulate.h>
#include <thrust/tuple.h>

#include <cuda/functional>

#include <algorithm>
#include <numeric>
#include <vector>
Expand Down Expand Up @@ -197,12 +199,13 @@ void multi_partition(ValueIterator value_first,
value_last,
thrust::make_zip_iterator(
thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())),
[value_to_group_id_op, group_first, counts = counts.data()] __device__(auto value) {
auto group_id = value_to_group_id_op(value);
cuda::std::atomic_ref<size_t> counter(counts[group_id - group_first]);
return thrust::make_tuple(group_id,
counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed));
});
cuda::proclaim_return_type<thrust::tuple<int, size_t>>(
[value_to_group_id_op, group_first, counts = counts.data()] __device__(auto value) {
auto group_id = value_to_group_id_op(value);
cuda::std::atomic_ref<size_t> counter(counts[group_id - group_first]);
return thrust::make_tuple(group_id,
counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed));
}));

rmm::device_uvector<size_t> displacements(num_groups, stream_view);
thrust::exclusive_scan(
Expand Down Expand Up @@ -245,17 +248,19 @@ void multi_partition(KeyIterator key_first,
rmm::device_uvector<int> group_ids(num_keys, stream_view);
rmm::device_uvector<size_t> intra_partition_offsets(num_keys, stream_view);
thrust::fill(rmm::exec_policy(stream_view), counts.begin(), counts.end(), size_t{0});
thrust::transform(rmm::exec_policy(stream_view),
key_first,
key_last,
thrust::make_zip_iterator(
thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())),
[key_to_group_id_op, group_first, counts = counts.data()] __device__(auto key) {
auto group_id = key_to_group_id_op(key);
cuda::std::atomic_ref<size_t> counter(counts[group_id - group_first]);
return thrust::make_tuple(
group_id, counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed));
});
thrust::transform(
rmm::exec_policy(stream_view),
key_first,
key_last,
thrust::make_zip_iterator(
thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())),
cuda::proclaim_return_type<thrust::tuple<int, size_t>>(
[key_to_group_id_op, group_first, counts = counts.data()] __device__(auto key) {
auto group_id = key_to_group_id_op(key);
cuda::std::atomic_ref<size_t> counter(counts[group_id - group_first]);
return thrust::make_tuple(group_id,
counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed));
}));

rmm::device_uvector<size_t> displacements(num_groups, stream_view);
thrust::exclusive_scan(
Expand Down Expand Up @@ -761,8 +766,9 @@ rmm::device_uvector<size_t> groupby_and_count(ValueIterator tx_value_first /* [I
stream_view);

auto group_id_first = thrust::make_transform_iterator(
tx_value_first,
[value_to_group_id_op] __device__(auto value) { return value_to_group_id_op(value); });
tx_value_first, cuda::proclaim_return_type<int>([value_to_group_id_op] __device__(auto value) {
return value_to_group_id_op(value);
}));
rmm::device_uvector<int> d_tx_dst_ranks(num_groups, stream_view);
rmm::device_uvector<size_t> d_tx_value_counts(d_tx_dst_ranks.size(), stream_view);
auto rank_count_pair_first = thrust::make_zip_iterator(
Expand Down Expand Up @@ -795,7 +801,9 @@ rmm::device_uvector<size_t> groupby_and_count(VertexIterator tx_key_first /* [IN
stream_view);

auto group_id_first = thrust::make_transform_iterator(
tx_key_first, [key_to_group_id_op] __device__(auto key) { return key_to_group_id_op(key); });
tx_key_first, cuda::proclaim_return_type<int>([key_to_group_id_op] __device__(auto key) {
return key_to_group_id_op(key);
}));
rmm::device_uvector<int> d_tx_dst_ranks(num_groups, stream_view);
rmm::device_uvector<size_t> d_tx_value_counts(d_tx_dst_ranks.size(), stream_view);
auto rank_count_pair_first = thrust::make_zip_iterator(
Expand Down
16 changes: 9 additions & 7 deletions cpp/src/community/detail/mis_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
#include <thrust/transform.h>
#include <thrust/transform_reduce.h>

#include <cuda/functional>

#include <cmath>

namespace cugraph {
Expand Down Expand Up @@ -78,13 +80,13 @@ rmm::device_uvector<vertex_t> maximal_independent_set(
thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, ranks.begin());

// Set ranks of zero out-degree vetices to std::numeric_limits<vertex_t>::lowest()
thrust::transform_if(
handle.get_thrust_policy(),
out_degrees.begin(),
out_degrees.end(),
ranks.begin(),
[] __device__(auto) { return std::numeric_limits<vertex_t>::lowest(); },
[] __device__(auto deg) { return deg == 0; });
thrust::transform_if(handle.get_thrust_policy(),
out_degrees.begin(),
out_degrees.end(),
ranks.begin(),
cuda::proclaim_return_type<vertex_t>(
[] __device__(auto) { return std::numeric_limits<vertex_t>::lowest(); }),
[] __device__(auto deg) { return deg == 0; });

out_degrees.resize(0, handle.get_stream());
out_degrees.shrink_to_fit(handle.get_stream());
Expand Down
39 changes: 22 additions & 17 deletions cpp/src/community/detail/refine_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
#include <thrust/transform_reduce.h>
#include <thrust/tuple.h>

#include <cuda/functional>

CUCO_DECLARE_BITWISE_COMPARABLE(float)
CUCO_DECLARE_BITWISE_COMPARABLE(double)
// FIXME: a temporary workaround for a compiler error, should be deleted once cuco gets patched.
Expand Down Expand Up @@ -213,16 +215,17 @@ refine_clustering(
: detail::edge_minor_property_view_t<vertex_t, vertex_t const*>(
louvain_assignment_of_vertices.data(), vertex_t{0}),
*edge_weight_view,
[] __device__(auto src, auto dst, auto src_cluster, auto dst_cluster, auto wt) {
weight_t weighted_cut_contribution{0};
cuda::proclaim_return_type<weight_t>(
[] __device__(auto src, auto dst, auto src_cluster, auto dst_cluster, auto wt) {
weight_t weighted_cut_contribution{0};

if (src == dst) // self loop
weighted_cut_contribution = 0;
else if (src_cluster == dst_cluster)
weighted_cut_contribution = wt;
if (src == dst) // self loop
weighted_cut_contribution = 0;
else if (src_cluster == dst_cluster)
weighted_cut_contribution = wt;

return weighted_cut_contribution;
},
return weighted_cut_contribution;
}),
weight_t{0},
cugraph::reduce_op::plus<weight_t>{},
weighted_cut_of_vertices_to_louvain.begin());
Expand All @@ -243,13 +246,14 @@ refine_clustering(
wcut_deg_and_cluster_vol_triple_begin,
wcut_deg_and_cluster_vol_triple_end,
singleton_and_connected_flags.begin(),
[resolution, total_edge_weight] __device__(auto wcut_wdeg_and_louvain_volume) {
cuda::proclaim_return_type<uint8_t>([resolution, total_edge_weight] __device__(
auto wcut_wdeg_and_louvain_volume) {
auto wcut = thrust::get<0>(wcut_wdeg_and_louvain_volume);
auto wdeg = thrust::get<1>(wcut_wdeg_and_louvain_volume);
auto louvain_volume = thrust::get<2>(wcut_wdeg_and_louvain_volume);
return wcut >
(resolution * wdeg * (louvain_volume - wdeg) / total_edge_weight);
});
return static_cast<uint8_t>(
wcut > (resolution * wdeg * (louvain_volume - wdeg) / total_edge_weight));
}));

edge_src_property_t<GraphViewType, weight_t> src_louvain_cluster_weight_cache(handle);
edge_src_property_t<GraphViewType, weight_t> src_cut_to_louvain_cache(handle);
Expand Down Expand Up @@ -718,11 +722,12 @@ refine_clustering(
vertices_in_mis.begin(),
vertices_in_mis.end(),
dst_vertices.begin(),
[dst_first = thrust::get<1>(gain_and_dst_first.get_iterator_tuple()),
v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) {
auto dst = *(dst_first + v - v_first);
return dst;
});
cuda::proclaim_return_type<vertex_t>(
[dst_first = thrust::get<1>(gain_and_dst_first.get_iterator_tuple()),
v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) {
auto dst = *(dst_first + v - v_first);
return dst;
}));

cugraph::resize_dataframe_buffer(gain_and_dst_output_pairs, 0, handle.get_stream());
cugraph::shrink_to_fit_dataframe_buffer(gain_and_dst_output_pairs, handle.get_stream());
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/detail/collect_local_vertex_values.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <cugraph/graph_functions.hpp>
#include <cugraph/utilities/shuffle_comm.cuh>

#include <cuda/functional>

namespace cugraph {
namespace detail {

Expand Down Expand Up @@ -64,7 +66,8 @@ rmm::device_uvector<value_t> collect_local_vertex_values_from_ext_vertex_value_p

auto vertex_iterator = thrust::make_transform_iterator(
d_vertices.begin(),
[local_vertex_first] __device__(vertex_t v) { return v - local_vertex_first; });
cuda::proclaim_return_type<vertex_t>(
[local_vertex_first] __device__(vertex_t v) { return v - local_vertex_first; }));

d_local_values.resize(local_vertex_last - local_vertex_first, handle.get_stream());
thrust::fill(
Expand Down
24 changes: 14 additions & 10 deletions cpp/src/generators/erdos_renyi_generator.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,6 +28,8 @@
#include <thrust/transform.h>
#include <thrust/tuple.h>

#include <cuda/functional>

namespace cugraph {

template <typename vertex_t>
Expand All @@ -42,12 +44,13 @@ generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle,
"Implementation cannot support specified value");

auto random_iterator = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_t>(0), [seed] __device__(size_t index) {
thrust::make_counting_iterator<size_t>(0),
cuda::proclaim_return_type<float>([seed] __device__(size_t index) {
thrust::default_random_engine rng(seed);
thrust::uniform_real_distribution<float> dist(0.0, 1.0);
rng.discard(index);
return dist(rng);
});
}));

size_t count = thrust::count_if(handle.get_thrust_policy(),
random_iterator,
Expand All @@ -69,13 +72,14 @@ generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle,
indices_v.begin(),
indices_v.end(),
thrust::make_zip_iterator(thrust::make_tuple(src_v.begin(), src_v.end())),
[num_vertices] __device__(size_t index) {
size_t src = index / num_vertices;
size_t dst = index % num_vertices;

return thrust::make_tuple(static_cast<vertex_t>(src),
static_cast<vertex_t>(dst));
});
cuda::proclaim_return_type<thrust::tuple<vertex_t, vertex_t>>(
[num_vertices] __device__(size_t index) {
size_t src = index / num_vertices;
size_t dst = index % num_vertices;

return thrust::make_tuple(static_cast<vertex_t>(src),
static_cast<vertex_t>(dst));
}));

handle.sync_stream();

Expand Down
39 changes: 21 additions & 18 deletions cpp/src/generators/simple_generators.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,6 +27,8 @@
#include <thrust/sequence.h>
#include <thrust/tuple.h>

#include <cuda/functional>

#include <numeric>

namespace cugraph {
Expand Down Expand Up @@ -264,23 +266,24 @@ generate_complete_graph_edgelist(

auto transform_iter = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_t>(0),
[base_vertex_id, num_vertices, invalid_vertex] __device__(size_t index) {
size_t graph_index = index / (num_vertices * num_vertices);
size_t local_index = index % (num_vertices * num_vertices);

vertex_t src = base_vertex_id + static_cast<vertex_t>(local_index / num_vertices);
vertex_t dst = base_vertex_id + static_cast<vertex_t>(local_index % num_vertices);

if (src == dst) {
src = invalid_vertex;
dst = invalid_vertex;
} else {
src += (graph_index * num_vertices);
dst += (graph_index * num_vertices);
}

return thrust::make_tuple(src, dst);
});
cuda::proclaim_return_type<thrust::tuple<vertex_t, vertex_t>>(
[base_vertex_id, num_vertices, invalid_vertex] __device__(size_t index) {
size_t graph_index = index / (num_vertices * num_vertices);
size_t local_index = index % (num_vertices * num_vertices);

vertex_t src = base_vertex_id + static_cast<vertex_t>(local_index / num_vertices);
vertex_t dst = base_vertex_id + static_cast<vertex_t>(local_index % num_vertices);

if (src == dst) {
src = invalid_vertex;
dst = invalid_vertex;
} else {
src += (graph_index * num_vertices);
dst += (graph_index * num_vertices);
}

return thrust::make_tuple(src, dst);
}));

output_iterator = thrust::copy_if(handle.get_thrust_policy(),
transform_iter,
Expand Down
12 changes: 8 additions & 4 deletions cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
#include <thrust/tuple.h>
#include <thrust/unique.h>

#include <cuda/functional>

#include <optional>
#include <tuple>

Expand Down Expand Up @@ -596,8 +598,9 @@ rmm::device_uvector<edge_t> get_sampling_index_without_replacement(
multiplier_t<size_t>{high_partition_over_sampling_K}),
thrust::make_transform_iterator(
thrust::make_counting_iterator(size_t{0}),
[high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__(
size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; }),
cuda::proclaim_return_type<size_t>(
[high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__(
size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; })),
handle.get_stream());
if (tmp_storage_bytes > d_tmp_storage.size()) {
d_tmp_storage = rmm::device_uvector<std::byte>(tmp_storage_bytes, handle.get_stream());
Expand All @@ -615,8 +618,9 @@ rmm::device_uvector<edge_t> get_sampling_index_without_replacement(
multiplier_t<size_t>{high_partition_over_sampling_K}),
thrust::make_transform_iterator(
thrust::make_counting_iterator(size_t{0}),
[high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__(
size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; }),
cuda::proclaim_return_type<size_t>(
[high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__(
size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; })),
handle.get_stream());

// copy the neighbor indices back to sample_nbr_indices
Expand Down
Loading
Loading