Skip to content

Commit

Permalink
builds but not tested (needs PR rapidsai#944)
Browse files Browse the repository at this point in the history
  • Loading branch information
afender committed Jun 12, 2020
1 parent 8629727 commit 42d6bc6
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 81 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ add_library(cugraph SHARED
src/utilities/spmv_1D.cu
src/structure/graph.cu
src/link_analysis/pagerank.cu
src/link_analysis/pagerank_1D.cu
src/link_analysis/gunrock_hits.cpp
src/traversal/bfs.cu
src/traversal/sssp.cu
Expand Down
48 changes: 28 additions & 20 deletions cpp/src/link_analysis/pagerank_1D.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@

#include <graph.hpp>
#include "pagerank_1D.cuh"
#include "utilities/graph_utils.cuh"

namespace cugraph {
namespace opg {

#define CUDA_MAX_KERNEL_THREADS 256
#define CUDA_MAX_BLOCKS 65535

template <typename VT, typename ET, typename WT>
__global__ void __launch_bounds__(CUDA_MAX_KERNEL_THREADS)
transition_kernel(const size_t e, const VT *ind, const VT *degree, WT *val)
Expand All @@ -32,18 +36,21 @@ __global__ void __launch_bounds__(CUDA_MAX_KERNEL_THREADS)
}

template <typename VT, typename ET, typename WT>
Pagerank<VT, WT>::Pagerank(
const comms::comms_t &comm_, size_t *part_off_, ET *off_, VT *ind_, cudaStream_t stream_)
: comm(comm_), part_off(part_off_), off(off_), ind(ind_), stream(stream_)
Pagerank<VT, ET, WT>::Pagerank(const raft::handle_t &handle_,
experimental::GraphCSCView<VT, ET, WT> const &G)
: comm(handle_.get_comms())
{
id = comm->get_rank();
nt = comm->get_size();
v_glob = part_off[nt];
v_loc = part_off[id + 1] - part_off[id];
VT tmp_e;
cudaMemcpy(&tmp_e, &off[v_loc], sizeof(VT), cudaMemcpyDeviceToHost);
CUDA_CHECK_LAST();
e_loc = tmp_e;
v_glob = G.number_of_vertices;

// FIXME needs PR #944
// v_loc = G.number_of_local_vertices;
// e_loc = G.number_of_local_edges;
// part_off = G.local_offset;

off = G.offsets;
ind = G.indices;
sm_count = handle_.get_device_properties().multiProcessorCount;

is_setup = false;
bookmark.resize(v_glob);
val.resize(e_loc);
Expand All @@ -62,17 +69,18 @@ template <typename VT, typename ET, typename WT>
void Pagerank<VT, ET, WT>::transition_vals(const VT *degree)
{
int threads = min(static_cast<VT>(e_loc), 256);
int blocks = min(static_cast<VT>(32 * comm->get_sm_count()), CUDA_MAX_BLOCKS);
transition_kernel<VT, WT><<<blocks, threads>>>(e_loc, ind, degree, val);
int blocks = min(static_cast<VT>(32 * sm_count), CUDA_MAX_BLOCKS);
transition_kernel<VT, ET, WT><<<blocks, threads>>>(e_loc, ind, degree, val.data().get());
CUDA_CHECK_LAST();
}

template <typename VT, typename ET, typename WT>
void Pagerank<VT, ET, WT>::flag_leafs(const VT *degree)
{
int threads = min(static_cast<VT>(v_glob), 256);
int blocks = min(static_cast<VT>(32 * comm->get_sm_count()), CUDA_MAX_BLOCKS);
cugraph::detail::flag_leafs_kernel<VT, WT><<<blocks, threads>>>(v_glob, degree, bookmark);
int blocks = min(static_cast<VT>(32 * sm_count), CUDA_MAX_BLOCKS);
cugraph::detail::flag_leafs_kernel<VT, WT>
<<<blocks, threads>>>(v_glob, degree, bookmark.data().get());
CUDA_CHECK_LAST();
}

Expand All @@ -85,9 +93,9 @@ void Pagerank<VT, ET, WT>::setup(WT _alpha, VT *degree)
WT zero = 0.0;

// Update dangling node vector
cugraph::detail::fill(v_glob, bookmark, zero);
cugraph::detail::fill(v_glob, bookmark.data().get(), zero);
flag_leafs(degree);
cugraph::detail::update_dangling_nodes(v_glob, bookmark, alpha);
cugraph::detail::update_dangling_nodes(v_glob, bookmark.data().get(), alpha);

// Transition matrix
transition_vals(degree);
Expand All @@ -110,13 +118,13 @@ void Pagerank<VT, ET, WT>::solve(int max_iter, WT *pagerank)
// This should not be requiered in theory
// This is not needed on one GPU at this time
cudaDeviceSynchronize();
dot_res = cugraph::detail::dot(v_glob, bookmark, pr);
OPGcsrmv<VT, ET, WT> spmv_solver(comm, part_off, off, ind, val, pagerank);
dot_res = cugraph::detail::dot(v_glob, bookmark.data().get(), pr);
OPGcsrmv<VT, ET, WT> spmv_solver(comm, part_off, off, ind, val.data().get(), pagerank);
for (auto i = 0; i < max_iter; ++i) {
spmv_solver.run(pagerank);
cugraph::detail::scal(v_glob, alpha, pr);
cugraph::detail::addv(v_glob, dot_res * (one / v_glob), pr);
dot_res = cugraph::detail::dot(v_glob, bookmark, pr);
dot_res = cugraph::detail::dot(v_glob, bookmark.data().get(), pr);
cugraph::detail::scal(v_glob, one / cugraph::detail::nrm2(v_glob, pr), pr);
}
cugraph::detail::scal(v_glob, one / cugraph::detail::nrm1(v_glob, pr), pr);
Expand Down
46 changes: 17 additions & 29 deletions cpp/src/link_analysis/pagerank_1D.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,21 @@
#include <raft/handle.hpp>

#include "utilities/error_utils.h"
#include "utilities/spmv_1D.cuh"

namespace cugraph {
namespace opg {

template <typename VT, typename ET, typename WT>
class Pagerank {
private:
size_t v_glob; // global number of vertices
size_t v_loc; // local number of vertices
size_t e_loc; // local number of edges
int id; // thread id
int nt; // number of threads
WT alpha; // damping factor
const comms::comms_t &comm; // info about the opg comm setup
size_t v_glob; // global number of vertices
size_t v_loc; // local number of vertices
size_t e_loc; // local number of edges
WT alpha; // damping factor
const raft::comms::comms_t &comm; // info about the opg comm setup
cudaStream_t stream;
int sm_count;

// Vertex offsets for each partition.
// This information should be available on all threads/devices
Expand All @@ -58,8 +58,7 @@ class Pagerank {
bool is_setup;

public:
Pagerank(
const raft::handle_t &handle, size_t *part_off_, ET *off_, VT *ind_, cudaStream_t stream = 0);
Pagerank(const raft::handle_t &handle, const experimental::GraphCSCView<VT, ET, WT> &G);
~Pagerank();

void transition_vals(const VT *degree);
Expand All @@ -74,19 +73,15 @@ class Pagerank {
};

template <typename VT, typename ET, typename WT>
void pagerank(const raft::handle_t &handle,
size_t v_loc,
ET *csr_off,
VT *csr_ind,
VT *degree,
void pagerank(raft::handle_t const &handle,
const experimental::GraphCSCView<VT, ET, WT> &G,
WT *pagerank_result,
const float damping_factor = 0.85,
const int n_iter = 40,
cudaStream_t stream = 0)
const int n_iter = 40)
{
// null pointers check
CUGRAPH_EXPECTS(csr_off != nullptr, "Invalid API parameter - csr_off is null");
CUGRAPH_EXPECTS(csr_ind != nullptr, "Invalid API parameter - csr_ind is null");
CUGRAPH_EXPECTS(G.offsets != nullptr, "Invalid API parameter - csr_off is null");
CUGRAPH_EXPECTS(G.indices != nullptr, "Invalid API parameter - csr_ind is null");
CUGRAPH_EXPECTS(pagerank_result != nullptr,
"Invalid API parameter - pagerank output memory must be allocated");

Expand All @@ -97,20 +92,13 @@ void pagerank(const raft::handle_t &handle,
"Invalid API parameter - invalid damping factor value (alpha>1)");
CUGRAPH_EXPECTS(n_iter > 0, "Invalid API parameter - n_iter must be > 0");

CUGRAPH_EXPECTS(v_loc > 0, "Invalid API parameter - v_loc must be > 0");
rmm::device_vector<VT> degree(G.number_of_vertices);

// Must be shared
std::vector<size_t> part_offset(comm.get_size() + 1);

// MPICHECK(MPI_Allgather(&v_loc, 1, MPI_SIZE_T, &part_offset[1], 1, MPI_SIZE_T, MPI_COMM_WORLD));
std::partial_sum(part_offset.begin(), part_offset.end(), part_offset.begin());
if (comm.is_master())
for (auto i = part_offset.begin(); i != part_offset.end(); ++i) std::cout << *i << ' ';
std::cout << std::endl;
sync_all();
// in-degree of CSC (equivalent to out-degree of original edge list)
G.degree(degree, experimental::DegreeDirection::IN);

// Allocate and intialize Pagerank class
Pagerank<VT, ET, WT> pr_solver(&comm, &part_offset[0], csr_off, csr_ind, stream);
Pagerank<VT, ET, WT> pr_solver(handle, G);

// Set all constants info
pr_solver.setup(damping_factor, degree);
Expand Down
32 changes: 7 additions & 25 deletions cpp/src/utilities/spmv_1D.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@ namespace cugraph {
namespace opg {
template <typename VT, typename ET, typename WT>
OPGcsrmv<VT, ET, WT>::OPGcsrmv(
const raft::handle_t &handle, size_t *part_off_, ET *off_, VT *ind_, WT *val_, WT *x)
: comm(handle.get_comms()), part_off(part_off_), off(off_), ind(ind_), val(val_)
const raft::comms::comms_t &comm_, size_t *part_off_, ET *off_, VT *ind_, WT *val_, WT *x)
: comm(comm_), part_off(part_off_), off(off_), ind(ind_), val(val_)
{
sync_all();
stream = nullptr;
i = comm.get_rank();
p = comm.get_size();
Expand All @@ -45,7 +44,7 @@ OPGcsrmv<VT, ET, WT>::OPGcsrmv(
// comm.allgather(v_loc, displs_d, 1, stream);
// memcpy displs_h displs_d

spmv.setup(v_loc, v_glob, e_loc, &h_one, val, off, ind, x, &h_zero, y_loc);
spmv.setup(v_loc, v_glob, e_loc, &h_one, val, off, ind, x, &h_zero, y_loc.data().get());
}

template <typename VT, typename ET, typename WT>
Expand All @@ -56,31 +55,14 @@ OPGcsrmv<VT, ET, WT>::~OPGcsrmv()
template <typename VT, typename ET, typename WT>
void OPGcsrmv<VT, ET, WT>::run(WT *x)
{
sync_all();
WT h_one = 1.0;
WT h_zero = 0.0;
spmv.run(v_loc, v_glob, e_loc, &h_one, val, off, ind, x, &h_zero, y_loc);
comm.allgatherv(y_loc, x, v_loc, displs_h, stream);
spmv.run(v_loc, v_glob, e_loc, &h_one, val, off, ind, x, &h_zero, y_loc.data().get());
comm.allgatherv(y_loc.data().get(), x, &v_locs_h[0], &displs_h[0], stream);
}

template class OPGcsrmv<int, double>;
template class OPGcsrmv<int, float>;

template <typename VT, typename ET, typename WT>
void snmg_csrmv_impl(size_t *part_offsets, ET *off, VT *ind, WT *val, WT *x)
{
CUGRAPH_EXPECTS(part_offsets != nullptr, "Invalid API parameter");
CUGRAPH_EXPECTS(off != nullptr, "Invalid API parameter");
CUGRAPH_EXPECTS(ind != nullptr, "Invalid API parameter");
CUGRAPH_EXPECTS(val != nullptr, "Invalid API parameter");
CUGRAPH_EXPECTS(x != nullptr, "Invalid API parameter");

cugraph::detail::Cusparse::get_handle();

OPGcsrmv<VT, ET, WT> spmv_solver(snmg_env, part_offsets, off, ind, val, x);
spmv_solver.run(x);
cugraph::detail::Cusparse::destroy_handle();
}
template class OPGcsrmv<int, int, double>;
template class OPGcsrmv<int, int, float>;

} // namespace opg
} // namespace cugraph
14 changes: 7 additions & 7 deletions cpp/src/utilities/spmv_1D.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#pragma once
#include <rmm/thrust_rmm_allocator.h>
#include <raft/handle.hpp>
#include "cusparse_helper.h"
#include "utilities/cusparse_helper.h"
// FIX ME #include <raft/sparse/cusparse_wrappers.h>
#include "error_utils.cuh"
#include "utilities/error_utils.h"

namespace cugraph {
namespace opg {
Expand All @@ -33,24 +33,24 @@ class OPGcsrmv {
size_t v_glob;
size_t v_loc;
size_t e_loc;
const comms::comms_t& comm;
const raft::comms::comms_t& comm;
size_t* part_off;
int i;
int p;
ET* off;
VT* ind;
WT* val;
rmm::device_vector<WT> y_loc;
rmm::device_vector<size_t> displs_d;
std::vector<size_t> displs_h;
std::vector<size_t> v_locs_h;
std::vector<VT> displs_h;

WT* y_loc;
cudaStream_t stream;
// FIX ME - access csrmv through RAFT
cugraph::detail::CusparseCsrMV<WT> spmv;

public:
OPGcsrmv(const raft::handle_t& handle, size_t* part_off_, ET* off_, VT* ind_, WT* val_, WT* x);
OPGcsrmv(
const raft::comms::comms_t& comm, size_t* part_off_, ET* off_, VT* ind_, WT* val_, WT* x);

~OPGcsrmv();

Expand Down

0 comments on commit 42d6bc6

Please sign in to comment.