Skip to content

Commit

Permalink
[src] Speed up VectorBase<Real>::Sum() by using BLAS (#2394)
Browse files Browse the repository at this point in the history
  • Loading branch information
galv authored and danpovey committed May 8, 2018
1 parent 954e69a commit b03b641
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 4 deletions.
11 changes: 10 additions & 1 deletion src/cudamatrix/cu-vector-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,16 @@ template<typename Real> void CuVectorUnitTestSum() {
A.SetRandn();
ones.Set(1.0);

AssertEqual(VecVec(A, ones), A.Sum());
Real x = VecVec(A, ones);
Real y = A.Sum();
Real diff = std::abs(x - y);
// Note: CuVectorBase<> does not have an ApplyAbs() member
// function, so we copy back to a host vector for simplicity in
// this test case.
Vector<Real> A_host(A);
A_host.ApplyAbs();
Real s = A_host.Sum();
KALDI_ASSERT ( diff <= 1.0e-04 * s);
}
}

Expand Down
8 changes: 5 additions & 3 deletions src/matrix/kaldi-vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -691,9 +691,11 @@ void VectorBase<Real>::CopyDiagFromPacked(const PackedMatrix<Real> &M) {

template<typename Real>
Real VectorBase<Real>::Sum() const {
double sum = 0.0;
for (MatrixIndexT i = 0; i < dim_; i++) { sum += data_[i]; }
return sum;
// Do a dot-product with a size-1 array with a stride of 0 to
// implement sum. This allows us to access SIMD operations in a
// cross-platform way via your BLAS library.
Real one(1);
return cblas_Xdot(dim_, data_, 1, &one, 0);
}

template<typename Real>
Expand Down

0 comments on commit b03b641

Please sign in to comment.