From 42d6bc6aee68b55e6e9ada64676c57e77b8c8711 Mon Sep 17 00:00:00 2001
From: afender <afender@nvidia.com>
Date: Fri, 12 Jun 2020 15:48:29 -0500
Subject: [PATCH] builds but not tested (needs PR #944)

---
 cpp/CMakeLists.txt                    |  1 +
 cpp/src/link_analysis/pagerank_1D.cu  | 48 ++++++++++++++++-----------
 cpp/src/link_analysis/pagerank_1D.cuh | 46 ++++++++++---------------
 cpp/src/utilities/spmv_1D.cu          | 32 ++++--------------
 cpp/src/utilities/spmv_1D.cuh         | 14 ++++----
 5 files changed, 60 insertions(+), 81 deletions(-)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 90c2e0c4ef..ded263bfae 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -308,6 +308,7 @@ add_library(cugraph SHARED
     src/utilities/spmv_1D.cu
     src/structure/graph.cu
     src/link_analysis/pagerank.cu
+    src/link_analysis/pagerank_1D.cu
     src/link_analysis/gunrock_hits.cpp
     src/traversal/bfs.cu
     src/traversal/sssp.cu
diff --git a/cpp/src/link_analysis/pagerank_1D.cu b/cpp/src/link_analysis/pagerank_1D.cu
index 68446daf42..f7fd8e567f 100644
--- a/cpp/src/link_analysis/pagerank_1D.cu
+++ b/cpp/src/link_analysis/pagerank_1D.cu
@@ -19,10 +19,14 @@
 
 #include <graph.hpp>
 #include "pagerank_1D.cuh"
+#include "utilities/graph_utils.cuh"
 
 namespace cugraph {
 namespace opg {
 
+#define CUDA_MAX_KERNEL_THREADS 256
+#define CUDA_MAX_BLOCKS 65535
+
 template <typename VT, typename ET, typename WT>
 __global__ void __launch_bounds__(CUDA_MAX_KERNEL_THREADS)
   transition_kernel(const size_t e, const VT *ind, const VT *degree, WT *val)
@@ -32,18 +36,21 @@ __global__ void __launch_bounds__(CUDA_MAX_KERNEL_THREADS)
 }
 
 template <typename VT, typename ET, typename WT>
-Pagerank<VT, WT>::Pagerank(
-  const comms::comms_t &comm_, size_t *part_off_, ET *off_, VT *ind_, cudaStream_t stream_)
-  : comm(comm_), part_off(part_off_), off(off_), ind(ind_), stream(stream_)
+Pagerank<VT, ET, WT>::Pagerank(const raft::handle_t &handle_,
+                               experimental::GraphCSCView<VT, ET, WT> const &G)
+  : comm(handle_.get_comms())
 {
-  id     = comm->get_rank();
-  nt     = comm->get_size();
-  v_glob = part_off[nt];
-  v_loc  = part_off[id + 1] - part_off[id];
-  VT tmp_e;
-  cudaMemcpy(&tmp_e, &off[v_loc], sizeof(VT), cudaMemcpyDeviceToHost);
-  CUDA_CHECK_LAST();
-  e_loc    = tmp_e;
+  v_glob = G.number_of_vertices;
+
+  // FIXME needs PR #944
+  // v_loc  = G.number_of_local_vertices;
+  // e_loc  = G.number_of_local_edges;
+  // part_off = G.local_offset;
+
+  off      = G.offsets;
+  ind      = G.indices;
+  sm_count = handle_.get_device_properties().multiProcessorCount;
+
   is_setup = false;
   bookmark.resize(v_glob);
   val.resize(e_loc);
@@ -62,8 +69,8 @@ template <typename VT, typename ET, typename WT>
 void Pagerank<VT, ET, WT>::transition_vals(const VT *degree)
 {
   int threads = min(static_cast<VT>(e_loc), 256);
-  int blocks  = min(static_cast<VT>(32 * comm->get_sm_count()), CUDA_MAX_BLOCKS);
-  transition_kernel<VT, WT><<<blocks, threads>>>(e_loc, ind, degree, val);
+  int blocks  = min(static_cast<VT>(32 * sm_count), CUDA_MAX_BLOCKS);
+  transition_kernel<VT, ET, WT><<<blocks, threads>>>(e_loc, ind, degree, val.data().get());
   CUDA_CHECK_LAST();
 }
 
@@ -71,8 +78,9 @@ template <typename VT, typename ET, typename WT>
 void Pagerank<VT, ET, WT>::flag_leafs(const VT *degree)
 {
   int threads = min(static_cast<VT>(v_glob), 256);
-  int blocks  = min(static_cast<VT>(32 * comm->get_sm_count()), CUDA_MAX_BLOCKS);
-  cugraph::detail::flag_leafs_kernel<VT, WT><<<blocks, threads>>>(v_glob, degree, bookmark);
+  int blocks  = min(static_cast<VT>(32 * sm_count), CUDA_MAX_BLOCKS);
+  cugraph::detail::flag_leafs_kernel<VT, WT>
+    <<<blocks, threads>>>(v_glob, degree, bookmark.data().get());
   CUDA_CHECK_LAST();
 }
 
@@ -85,9 +93,9 @@ void Pagerank<VT, ET, WT>::setup(WT _alpha, VT *degree)
     WT zero = 0.0;
 
     // Update dangling node vector
-    cugraph::detail::fill(v_glob, bookmark, zero);
+    cugraph::detail::fill(v_glob, bookmark.data().get(), zero);
     flag_leafs(degree);
-    cugraph::detail::update_dangling_nodes(v_glob, bookmark, alpha);
+    cugraph::detail::update_dangling_nodes(v_glob, bookmark.data().get(), alpha);
 
     // Transition matrix
     transition_vals(degree);
@@ -110,13 +118,13 @@ void Pagerank<VT, ET, WT>::solve(int max_iter, WT *pagerank)
     // This should not be requiered in theory
     // This is not needed on one GPU at this time
     cudaDeviceSynchronize();
-    dot_res = cugraph::detail::dot(v_glob, bookmark, pr);
-    OPGcsrmv<VT, ET, WT> spmv_solver(comm, part_off, off, ind, val, pagerank);
+    dot_res = cugraph::detail::dot(v_glob, bookmark.data().get(), pr);
+    OPGcsrmv<VT, ET, WT> spmv_solver(comm, part_off, off, ind, val.data().get(), pagerank);
     for (auto i = 0; i < max_iter; ++i) {
       spmv_solver.run(pagerank);
       cugraph::detail::scal(v_glob, alpha, pr);
       cugraph::detail::addv(v_glob, dot_res * (one / v_glob), pr);
-      dot_res = cugraph::detail::dot(v_glob, bookmark, pr);
+      dot_res = cugraph::detail::dot(v_glob, bookmark.data().get(), pr);
       cugraph::detail::scal(v_glob, one / cugraph::detail::nrm2(v_glob, pr), pr);
     }
     cugraph::detail::scal(v_glob, one / cugraph::detail::nrm1(v_glob, pr), pr);
diff --git a/cpp/src/link_analysis/pagerank_1D.cuh b/cpp/src/link_analysis/pagerank_1D.cuh
index 226057e2a4..c2547a1cc8 100644
--- a/cpp/src/link_analysis/pagerank_1D.cuh
+++ b/cpp/src/link_analysis/pagerank_1D.cuh
@@ -24,6 +24,7 @@
 #include <raft/handle.hpp>
 
 #include "utilities/error_utils.h"
+#include "utilities/spmv_1D.cuh"
 
 namespace cugraph {
 namespace opg {
@@ -31,14 +32,13 @@ namespace opg {
 template <typename VT, typename ET, typename WT>
 class Pagerank {
  private:
-  size_t v_glob;               // global number of vertices
-  size_t v_loc;                // local number of vertices
-  size_t e_loc;                // local number of edges
-  int id;                      // thread id
-  int nt;                      // number of threads
-  WT alpha;                    // damping factor
-  const comms::comms_t &comm;  // info about the opg comm setup
+  size_t v_glob;                     // global number of vertices
+  size_t v_loc;                      // local number of vertices
+  size_t e_loc;                      // local number of edges
+  WT alpha;                          // damping factor
+  const raft::comms::comms_t &comm;  // info about the opg comm setup
   cudaStream_t stream;
+  int sm_count;
 
   // Vertex offsets for each partition.
   // This information should be available on all threads/devices
@@ -58,8 +58,7 @@ class Pagerank {
   bool is_setup;
 
  public:
-  Pagerank(
-    const raft::handle_t &handle, size_t *part_off_, ET *off_, VT *ind_, cudaStream_t stream = 0);
+  Pagerank(const raft::handle_t &handle, const experimental::GraphCSCView<VT, ET, WT> &G);
   ~Pagerank();
 
   void transition_vals(const VT *degree);
@@ -74,19 +73,15 @@ class Pagerank {
 };
 
 template <typename VT, typename ET, typename WT>
-void pagerank(const raft::handle_t &handle,
-              size_t v_loc,
-              ET *csr_off,
-              VT *csr_ind,
-              VT *degree,
+void pagerank(raft::handle_t const &handle,
+              const experimental::GraphCSCView<VT, ET, WT> &G,
               WT *pagerank_result,
               const float damping_factor = 0.85,
-              const int n_iter           = 40,
-              cudaStream_t stream        = 0)
+              const int n_iter           = 40)
 {
   // null pointers check
-  CUGRAPH_EXPECTS(csr_off != nullptr, "Invalid API parameter - csr_off is null");
-  CUGRAPH_EXPECTS(csr_ind != nullptr, "Invalid API parameter - csr_ind is null");
+  CUGRAPH_EXPECTS(G.offsets != nullptr, "Invalid API parameter - csr_off is null");
+  CUGRAPH_EXPECTS(G.indices != nullptr, "Invalid API parameter - csr_ind is null");
   CUGRAPH_EXPECTS(pagerank_result != nullptr,
                   "Invalid API parameter - pagerank output memory must be allocated");
 
@@ -97,20 +92,13 @@ void pagerank(const raft::handle_t &handle,
                   "Invalid API parameter - invalid damping factor value (alpha>1)");
   CUGRAPH_EXPECTS(n_iter > 0, "Invalid API parameter - n_iter must be > 0");
 
-  CUGRAPH_EXPECTS(v_loc > 0, "Invalid API parameter - v_loc must be > 0");
+  rmm::device_vector<VT> degree(G.number_of_vertices);
 
-  // Must be shared
-  std::vector<size_t> part_offset(comm.get_size() + 1);
-
-  // MPICHECK(MPI_Allgather(&v_loc, 1, MPI_SIZE_T, &part_offset[1], 1, MPI_SIZE_T, MPI_COMM_WORLD));
-  std::partial_sum(part_offset.begin(), part_offset.end(), part_offset.begin());
-  if (comm.is_master())
-    for (auto i = part_offset.begin(); i != part_offset.end(); ++i) std::cout << *i << ' ';
-  std::cout << std::endl;
-  sync_all();
+  // in-degree of CSC (equivalent to out-degree of original edge list)
+  G.degree(degree, experimental::DegreeDirection::IN);
 
   // Allocate and intialize Pagerank class
-  Pagerank<VT, ET, WT> pr_solver(&comm, &part_offset[0], csr_off, csr_ind, stream);
+  Pagerank<VT, ET, WT> pr_solver(handle, G);
 
   // Set all constants info
   pr_solver.setup(damping_factor, degree);
diff --git a/cpp/src/utilities/spmv_1D.cu b/cpp/src/utilities/spmv_1D.cu
index 50ac4016d3..494e9f9135 100644
--- a/cpp/src/utilities/spmv_1D.cu
+++ b/cpp/src/utilities/spmv_1D.cu
@@ -23,10 +23,9 @@ namespace cugraph {
 namespace opg {
 template <typename VT, typename ET, typename WT>
 OPGcsrmv<VT, ET, WT>::OPGcsrmv(
-  const raft::handle_t &handle, size_t *part_off_, ET *off_, VT *ind_, WT *val_, WT *x)
-  : comm(handle.get_comms()), part_off(part_off_), off(off_), ind(ind_), val(val_)
+  const raft::comms::comms_t &comm_, size_t *part_off_, ET *off_, VT *ind_, WT *val_, WT *x)
+  : comm(comm_), part_off(part_off_), off(off_), ind(ind_), val(val_)
 {
-  sync_all();
   stream = nullptr;
   i      = comm.get_rank();
   p      = comm.get_size();
@@ -45,7 +44,7 @@ OPGcsrmv<VT, ET, WT>::OPGcsrmv(
   // comm.allgather(v_loc, displs_d, 1, stream);
   // memcpy displs_h displs_d
 
-  spmv.setup(v_loc, v_glob, e_loc, &h_one, val, off, ind, x, &h_zero, y_loc);
+  spmv.setup(v_loc, v_glob, e_loc, &h_one, val, off, ind, x, &h_zero, y_loc.data().get());
 }
 
 template <typename VT, typename ET, typename WT>
@@ -56,31 +55,14 @@ OPGcsrmv<VT, ET, WT>::~OPGcsrmv()
 template <typename VT, typename ET, typename WT>
 void OPGcsrmv<VT, ET, WT>::run(WT *x)
 {
-  sync_all();
   WT h_one  = 1.0;
   WT h_zero = 0.0;
-  spmv.run(v_loc, v_glob, e_loc, &h_one, val, off, ind, x, &h_zero, y_loc);
-  comm.allgatherv(y_loc, x, v_loc, displs_h, stream);
+  spmv.run(v_loc, v_glob, e_loc, &h_one, val, off, ind, x, &h_zero, y_loc.data().get());
+  comm.allgatherv(y_loc.data().get(), x, &v_locs_h[0], &displs_h[0], stream);
 }
 
-template class OPGcsrmv<int, double>;
-template class OPGcsrmv<int, float>;
-
-template <typename VT, typename ET, typename WT>
-void snmg_csrmv_impl(size_t *part_offsets, ET *off, VT *ind, WT *val, WT *x)
-{
-  CUGRAPH_EXPECTS(part_offsets != nullptr, "Invalid API parameter");
-  CUGRAPH_EXPECTS(off != nullptr, "Invalid API parameter");
-  CUGRAPH_EXPECTS(ind != nullptr, "Invalid API parameter");
-  CUGRAPH_EXPECTS(val != nullptr, "Invalid API parameter");
-  CUGRAPH_EXPECTS(x != nullptr, "Invalid API parameter");
-
-  cugraph::detail::Cusparse::get_handle();
-
-  OPGcsrmv<VT, ET, WT> spmv_solver(snmg_env, part_offsets, off, ind, val, x);
-  spmv_solver.run(x);
-  cugraph::detail::Cusparse::destroy_handle();
-}
+template class OPGcsrmv<int, int, double>;
+template class OPGcsrmv<int, int, float>;
 
 }  // namespace opg
 }  // namespace cugraph
\ No newline at end of file
diff --git a/cpp/src/utilities/spmv_1D.cuh b/cpp/src/utilities/spmv_1D.cuh
index b38eef6a78..c582f3f71f 100644
--- a/cpp/src/utilities/spmv_1D.cuh
+++ b/cpp/src/utilities/spmv_1D.cuh
@@ -20,9 +20,9 @@
 #pragma once
 #include <rmm/thrust_rmm_allocator.h>
 #include <raft/handle.hpp>
-#include "cusparse_helper.h"
+#include "utilities/cusparse_helper.h"
 // FIX ME #include <raft/sparse/cusparse_wrappers.h>
-#include "error_utils.cuh"
+#include "utilities/error_utils.h"
 
 namespace cugraph {
 namespace opg {
@@ -33,7 +33,7 @@ class OPGcsrmv {
   size_t v_glob;
   size_t v_loc;
   size_t e_loc;
-  const comms::comms_t& comm;
+  const raft::comms::comms_t& comm;
   size_t* part_off;
   int i;
   int p;
@@ -41,16 +41,16 @@ class OPGcsrmv {
   VT* ind;
   WT* val;
   rmm::device_vector<WT> y_loc;
-  rmm::device_vector<size_t> displs_d;
-  std::vector<size_t> displs_h;
+  std::vector<size_t> v_locs_h;
+  std::vector<VT> displs_h;
 
-  WT* y_loc;
   cudaStream_t stream;
   // FIX ME - access csrmv through RAFT
   cugraph::detail::CusparseCsrMV<WT> spmv;
 
  public:
-  OPGcsrmv(const raft::handle_t& handle, size_t* part_off_, ET* off_, VT* ind_, WT* val_, WT* x);
+  OPGcsrmv(
+    const raft::comms::comms_t& comm, size_t* part_off_, ET* off_, VT* ind_, WT* val_, WT* x);
 
   ~OPGcsrmv();