From 42d6bc6aee68b55e6e9ada64676c57e77b8c8711 Mon Sep 17 00:00:00 2001 From: afender Date: Fri, 12 Jun 2020 15:48:29 -0500 Subject: [PATCH] builds but not tested (needs PR #944) --- cpp/CMakeLists.txt | 1 + cpp/src/link_analysis/pagerank_1D.cu | 48 ++++++++++++++++----------- cpp/src/link_analysis/pagerank_1D.cuh | 46 ++++++++++--------------- cpp/src/utilities/spmv_1D.cu | 32 ++++-------------- cpp/src/utilities/spmv_1D.cuh | 14 ++++---- 5 files changed, 60 insertions(+), 81 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 90c2e0c4ef..ded263bfae 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -308,6 +308,7 @@ add_library(cugraph SHARED src/utilities/spmv_1D.cu src/structure/graph.cu src/link_analysis/pagerank.cu + src/link_analysis/pagerank_1D.cu src/link_analysis/gunrock_hits.cpp src/traversal/bfs.cu src/traversal/sssp.cu diff --git a/cpp/src/link_analysis/pagerank_1D.cu b/cpp/src/link_analysis/pagerank_1D.cu index 68446daf42..f7fd8e567f 100644 --- a/cpp/src/link_analysis/pagerank_1D.cu +++ b/cpp/src/link_analysis/pagerank_1D.cu @@ -19,10 +19,14 @@ #include #include "pagerank_1D.cuh" +#include "utilities/graph_utils.cuh" namespace cugraph { namespace opg { +#define CUDA_MAX_KERNEL_THREADS 256 +#define CUDA_MAX_BLOCKS 65535 + template __global__ void __launch_bounds__(CUDA_MAX_KERNEL_THREADS) transition_kernel(const size_t e, const VT *ind, const VT *degree, WT *val) @@ -32,18 +36,21 @@ __global__ void __launch_bounds__(CUDA_MAX_KERNEL_THREADS) } template -Pagerank::Pagerank( - const comms::comms_t &comm_, size_t *part_off_, ET *off_, VT *ind_, cudaStream_t stream_) - : comm(comm_), part_off(part_off_), off(off_), ind(ind_), stream(stream_) +Pagerank::Pagerank(const raft::handle_t &handle_, + experimental::GraphCSCView const &G) + : comm(handle_.get_comms()) { - id = comm->get_rank(); - nt = comm->get_size(); - v_glob = part_off[nt]; - v_loc = part_off[id + 1] - part_off[id]; - VT tmp_e; - cudaMemcpy(&tmp_e, &off[v_loc], sizeof(VT), cudaMemcpyDeviceToHost); - CUDA_CHECK_LAST(); - e_loc = tmp_e; + v_glob = G.number_of_vertices; + + // FIXME needs PR #944 + // v_loc = G.number_of_local_vertices; + // e_loc = G.number_of_local_edges; + // part_off = G.local_offset; + + off = G.offsets; + ind = G.indices; + sm_count = handle_.get_device_properties().multiProcessorCount; + is_setup = false; bookmark.resize(v_glob); val.resize(e_loc); @@ -62,8 +69,8 @@ template void Pagerank::transition_vals(const VT *degree) { int threads = min(static_cast(e_loc), 256); - int blocks = min(static_cast(32 * comm->get_sm_count()), CUDA_MAX_BLOCKS); - transition_kernel<<>>(e_loc, ind, degree, val); + int blocks = min(static_cast(32 * sm_count), CUDA_MAX_BLOCKS); + transition_kernel<<>>(e_loc, ind, degree, val.data().get()); CUDA_CHECK_LAST(); } @@ -71,8 +78,9 @@ template void Pagerank::flag_leafs(const VT *degree) { int threads = min(static_cast(v_glob), 256); - int blocks = min(static_cast(32 * comm->get_sm_count()), CUDA_MAX_BLOCKS); - cugraph::detail::flag_leafs_kernel<<>>(v_glob, degree, bookmark); + int blocks = min(static_cast(32 * sm_count), CUDA_MAX_BLOCKS); + cugraph::detail::flag_leafs_kernel + <<>>(v_glob, degree, bookmark.data().get()); CUDA_CHECK_LAST(); } @@ -85,9 +93,9 @@ void Pagerank::setup(WT _alpha, VT *degree) WT zero = 0.0; // Update dangling node vector - cugraph::detail::fill(v_glob, bookmark, zero); + cugraph::detail::fill(v_glob, bookmark.data().get(), zero); flag_leafs(degree); - cugraph::detail::update_dangling_nodes(v_glob, bookmark, alpha); + cugraph::detail::update_dangling_nodes(v_glob, bookmark.data().get(), alpha); // Transition matrix transition_vals(degree); @@ -110,13 +118,13 @@ void Pagerank::solve(int max_iter, WT *pagerank) // This should not be requiered in theory // This is not needed on one GPU at this time cudaDeviceSynchronize(); - dot_res = cugraph::detail::dot(v_glob, bookmark, pr); - OPGcsrmv spmv_solver(comm, part_off, off, ind, val, pagerank); + dot_res = cugraph::detail::dot(v_glob, bookmark.data().get(), pr); + OPGcsrmv spmv_solver(comm, part_off, off, ind, val.data().get(), pagerank); for (auto i = 0; i < max_iter; ++i) { spmv_solver.run(pagerank); cugraph::detail::scal(v_glob, alpha, pr); cugraph::detail::addv(v_glob, dot_res * (one / v_glob), pr); - dot_res = cugraph::detail::dot(v_glob, bookmark, pr); + dot_res = cugraph::detail::dot(v_glob, bookmark.data().get(), pr); cugraph::detail::scal(v_glob, one / cugraph::detail::nrm2(v_glob, pr), pr); } cugraph::detail::scal(v_glob, one / cugraph::detail::nrm1(v_glob, pr), pr); diff --git a/cpp/src/link_analysis/pagerank_1D.cuh b/cpp/src/link_analysis/pagerank_1D.cuh index 226057e2a4..c2547a1cc8 100644 --- a/cpp/src/link_analysis/pagerank_1D.cuh +++ b/cpp/src/link_analysis/pagerank_1D.cuh @@ -24,6 +24,7 @@ #include #include "utilities/error_utils.h" +#include "utilities/spmv_1D.cuh" namespace cugraph { namespace opg { @@ -31,14 +32,13 @@ namespace opg { template class Pagerank { private: - size_t v_glob; // global number of vertices - size_t v_loc; // local number of vertices - size_t e_loc; // local number of edges - int id; // thread id - int nt; // number of threads - WT alpha; // damping factor - const comms::comms_t &comm; // info about the opg comm setup + size_t v_glob; // global number of vertices + size_t v_loc; // local number of vertices + size_t e_loc; // local number of edges + WT alpha; // damping factor + const raft::comms::comms_t &comm; // info about the opg comm setup cudaStream_t stream; + int sm_count; // Vertex offsets for each partition. // This information should be available on all threads/devices @@ -58,8 +58,7 @@ class Pagerank { bool is_setup; public: - Pagerank( - const raft::handle_t &handle, size_t *part_off_, ET *off_, VT *ind_, cudaStream_t stream = 0); + Pagerank(const raft::handle_t &handle, const experimental::GraphCSCView &G); ~Pagerank(); void transition_vals(const VT *degree); @@ -74,19 +73,15 @@ class Pagerank { }; template -void pagerank(const raft::handle_t &handle, - size_t v_loc, - ET *csr_off, - VT *csr_ind, - VT *degree, +void pagerank(raft::handle_t const &handle, + const experimental::GraphCSCView &G, WT *pagerank_result, const float damping_factor = 0.85, - const int n_iter = 40, - cudaStream_t stream = 0) + const int n_iter = 40) { // null pointers check - CUGRAPH_EXPECTS(csr_off != nullptr, "Invalid API parameter - csr_off is null"); - CUGRAPH_EXPECTS(csr_ind != nullptr, "Invalid API parameter - csr_ind is null"); + CUGRAPH_EXPECTS(G.offsets != nullptr, "Invalid API parameter - csr_off is null"); + CUGRAPH_EXPECTS(G.indices != nullptr, "Invalid API parameter - csr_ind is null"); CUGRAPH_EXPECTS(pagerank_result != nullptr, "Invalid API parameter - pagerank output memory must be allocated"); @@ -97,20 +92,13 @@ void pagerank(const raft::handle_t &handle, "Invalid API parameter - invalid damping factor value (alpha>1)"); CUGRAPH_EXPECTS(n_iter > 0, "Invalid API parameter - n_iter must be > 0"); - CUGRAPH_EXPECTS(v_loc > 0, "Invalid API parameter - v_loc must be > 0"); + rmm::device_vector degree(G.number_of_vertices); - // Must be shared - std::vector part_offset(comm.get_size() + 1); - - // MPICHECK(MPI_Allgather(&v_loc, 1, MPI_SIZE_T, &part_offset[1], 1, MPI_SIZE_T, MPI_COMM_WORLD)); - std::partial_sum(part_offset.begin(), part_offset.end(), part_offset.begin()); - if (comm.is_master()) - for (auto i = part_offset.begin(); i != part_offset.end(); ++i) std::cout << *i << ' '; - std::cout << std::endl; - sync_all(); + // in-degree of CSC (equivalent to out-degree of original edge list) + G.degree(degree, experimental::DegreeDirection::IN); // Allocate and intialize Pagerank class - Pagerank pr_solver(&comm, &part_offset[0], csr_off, csr_ind, stream); + Pagerank pr_solver(handle, G); // Set all constants info pr_solver.setup(damping_factor, degree); diff --git a/cpp/src/utilities/spmv_1D.cu b/cpp/src/utilities/spmv_1D.cu index 50ac4016d3..494e9f9135 100644 --- a/cpp/src/utilities/spmv_1D.cu +++ b/cpp/src/utilities/spmv_1D.cu @@ -23,10 +23,9 @@ namespace cugraph { namespace opg { template OPGcsrmv::OPGcsrmv( - const raft::handle_t &handle, size_t *part_off_, ET *off_, VT *ind_, WT *val_, WT *x) - : comm(handle.get_comms()), part_off(part_off_), off(off_), ind(ind_), val(val_) + const raft::comms::comms_t &comm_, size_t *part_off_, ET *off_, VT *ind_, WT *val_, WT *x) + : comm(comm_), part_off(part_off_), off(off_), ind(ind_), val(val_) { - sync_all(); stream = nullptr; i = comm.get_rank(); p = comm.get_size(); @@ -45,7 +44,7 @@ OPGcsrmv::OPGcsrmv( // comm.allgather(v_loc, displs_d, 1, stream); // memcpy displs_h displs_d - spmv.setup(v_loc, v_glob, e_loc, &h_one, val, off, ind, x, &h_zero, y_loc); + spmv.setup(v_loc, v_glob, e_loc, &h_one, val, off, ind, x, &h_zero, y_loc.data().get()); } template @@ -56,31 +55,14 @@ OPGcsrmv::~OPGcsrmv() template void OPGcsrmv::run(WT *x) { - sync_all(); WT h_one = 1.0; WT h_zero = 0.0; - spmv.run(v_loc, v_glob, e_loc, &h_one, val, off, ind, x, &h_zero, y_loc); - comm.allgatherv(y_loc, x, v_loc, displs_h, stream); + spmv.run(v_loc, v_glob, e_loc, &h_one, val, off, ind, x, &h_zero, y_loc.data().get()); + comm.allgatherv(y_loc.data().get(), x, &v_locs_h[0], &displs_h[0], stream); } -template class OPGcsrmv; -template class OPGcsrmv; - -template -void snmg_csrmv_impl(size_t *part_offsets, ET *off, VT *ind, WT *val, WT *x) -{ - CUGRAPH_EXPECTS(part_offsets != nullptr, "Invalid API parameter"); - CUGRAPH_EXPECTS(off != nullptr, "Invalid API parameter"); - CUGRAPH_EXPECTS(ind != nullptr, "Invalid API parameter"); - CUGRAPH_EXPECTS(val != nullptr, "Invalid API parameter"); - CUGRAPH_EXPECTS(x != nullptr, "Invalid API parameter"); - - cugraph::detail::Cusparse::get_handle(); - - OPGcsrmv spmv_solver(snmg_env, part_offsets, off, ind, val, x); - spmv_solver.run(x); - cugraph::detail::Cusparse::destroy_handle(); -} +template class OPGcsrmv; +template class OPGcsrmv; } // namespace opg } // namespace cugraph \ No newline at end of file diff --git a/cpp/src/utilities/spmv_1D.cuh b/cpp/src/utilities/spmv_1D.cuh index b38eef6a78..c582f3f71f 100644 --- a/cpp/src/utilities/spmv_1D.cuh +++ b/cpp/src/utilities/spmv_1D.cuh @@ -20,9 +20,9 @@ #pragma once #include #include -#include "cusparse_helper.h" +#include "utilities/cusparse_helper.h" // FIX ME #include -#include "error_utils.cuh" +#include "utilities/error_utils.h" namespace cugraph { namespace opg { @@ -33,7 +33,7 @@ class OPGcsrmv { size_t v_glob; size_t v_loc; size_t e_loc; - const comms::comms_t& comm; + const raft::comms::comms_t& comm; size_t* part_off; int i; int p; @@ -41,16 +41,16 @@ class OPGcsrmv { VT* ind; WT* val; rmm::device_vector y_loc; - rmm::device_vector displs_d; - std::vector displs_h; + std::vector v_locs_h; + std::vector displs_h; - WT* y_loc; cudaStream_t stream; // FIX ME - access csrmv through RAFT cugraph::detail::CusparseCsrMV spmv; public: - OPGcsrmv(const raft::handle_t& handle, size_t* part_off_, ET* off_, VT* ind_, WT* val_, WT* x); + OPGcsrmv( + const raft::comms::comms_t& comm, size_t* part_off_, ET* off_, VT* ind_, WT* val_, WT* x); ~OPGcsrmv();