Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] initial implementation of sqrt_spd and inv_sqrt_spd #1145

Closed
wants to merge 11 commits into from
2 changes: 2 additions & 0 deletions stan/math/prim/mat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@
#include <stan/math/prim/mat/fun/inv_cloglog.hpp>
#include <stan/math/prim/mat/fun/inv_logit.hpp>
#include <stan/math/prim/mat/fun/inv_sqrt.hpp>
#include <stan/math/prim/mat/fun/inv_sqrt_spd.hpp>
#include <stan/math/prim/mat/fun/inv_square.hpp>
#include <stan/math/prim/mat/fun/inverse.hpp>
#include <stan/math/prim/mat/fun/inverse_spd.hpp>
Expand Down Expand Up @@ -221,6 +222,7 @@
#include <stan/math/prim/mat/fun/sort_indices_asc.hpp>
#include <stan/math/prim/mat/fun/sort_indices_desc.hpp>
#include <stan/math/prim/mat/fun/sqrt.hpp>
#include <stan/math/prim/mat/fun/sqrt_spd.hpp>
#include <stan/math/prim/mat/fun/square.hpp>
#include <stan/math/prim/mat/fun/squared_distance.hpp>
#include <stan/math/prim/mat/fun/stan_print.hpp>
Expand Down
39 changes: 39 additions & 0 deletions stan/math/prim/mat/fun/inv_sqrt_spd.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef STAN_MATH_PRIM_MAT_FUN_INV_SQRT_SPD_HPP
#define STAN_MATH_PRIM_MAT_FUN_INV_SQRT_SPD_HPP

#include <stan/math/prim/arr/err/check_nonzero_size.hpp>
#include <stan/math/prim/mat/err/check_symmetric.hpp>
#include <stan/math/prim/mat/fun/Eigen.hpp>

namespace stan {
namespace math {

/**
* Returns the inverse symmetric square root of the specified
* symmetric and positive definite matrix, which is defined as
* V * diag_matrix(inv_sqrt(d)) * V'
* where V is a matrix of eigenvectors of the input matrix and
* d is a vector of eigenvalue of the input matrix. If the
* input matrix is not actually positive definite, then the
* output matrix will have NaNs in it.
*
* @param[in] m Specified symmetric positive definite matrix.
* @return Inverse symmetric square root of the matrix.
* @throws If matrix is of size zero or asymmetric
*/

template <typename T>
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>
inv_sqrt_spd(const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> &m) {
check_nonzero_size("inv_sqrt_spd", "m", m);
check_symmetric("inv_sqrt_spd", "m", m);

Eigen::SelfAdjointEigenSolver<
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
solver(m);
return solver.operatorInverseSqrt();
}

} // namespace math
} // namespace stan
#endif
39 changes: 39 additions & 0 deletions stan/math/prim/mat/fun/sqrt_spd.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef STAN_MATH_PRIM_MAT_FUN_SQRT_SPD_HPP
#define STAN_MATH_PRIM_MAT_FUN_SQRT_SPD_HPP

#include <stan/math/prim/arr/err/check_nonzero_size.hpp>
#include <stan/math/prim/mat/err/check_symmetric.hpp>
#include <stan/math/prim/mat/fun/Eigen.hpp>

namespace stan {
namespace math {

/**
* Returns the symmetric square root of the specified symmetric
* and positive definite matrix, which is defined as
* V * diag_matrix(sqrt(d)) * V'
* where V is a matrix of eigenvectors of the input matrix and
* d is a vector of eigenvalue of the input matrix. If the
* input matrix is not actually positive definite, then the
* output matrix will have NaNs in it.
*
* @param[in] m Specified symmetric positive definite matrix.
* @return Symmetric square root of the matrix.
* @throws If matrix is of size zero or asymmetric
*/

template <typename T>
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>
sqrt_spd(const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> &m) {
check_nonzero_size("sqrt_spd", "m", m);
check_symmetric("sqrt_spd", "m", m);

Eigen::SelfAdjointEigenSolver<
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
solver(m);
return solver.operatorSqrt();
}

} // namespace math
} // namespace stan
#endif
11 changes: 6 additions & 5 deletions stan/math/rev/mat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
#define STAN_MATH_REV_MAT_HPP

#include <stan/math/rev/core.hpp>
#include <stan/math/rev/mat/meta/operands_and_partials.hpp>
#include <stan/math/rev/scal/meta/is_var.hpp>
#include <stan/math/rev/scal/meta/partials_type.hpp>
#include <stan/math/rev/mat/meta/operands_and_partials.hpp>

#include <stan/math/rev/mat/fun/Eigen_NumTraits.hpp>

Expand Down Expand Up @@ -47,6 +47,7 @@
#include <stan/math/rev/mat/fun/sd.hpp>
#include <stan/math/rev/mat/fun/simplex_constrain.hpp>
#include <stan/math/rev/mat/fun/softmax.hpp>
#include <stan/math/rev/mat/fun/sqrt_spd.hpp>
#include <stan/math/rev/mat/fun/squared_distance.hpp>
#include <stan/math/rev/mat/fun/stan_print.hpp>
#include <stan/math/rev/mat/fun/sum.hpp>
Expand All @@ -61,13 +62,13 @@

#include <stan/math/rev/mat/functor/adj_jac_apply.hpp>
#include <stan/math/rev/mat/functor/algebra_solver.hpp>
#include <stan/math/rev/mat/functor/gradient.hpp>
#include <stan/math/rev/mat/functor/jacobian.hpp>
#include <stan/math/rev/mat/functor/cvodes_utils.hpp>
#include <stan/math/rev/mat/functor/cvodes_ode_data.hpp>
#include <stan/math/rev/mat/functor/cvodes_utils.hpp>
#include <stan/math/rev/mat/functor/gradient.hpp>
#include <stan/math/rev/mat/functor/integrate_dae.hpp>
#include <stan/math/rev/mat/functor/integrate_ode_adams.hpp>
#include <stan/math/rev/mat/functor/integrate_ode_bdf.hpp>
#include <stan/math/rev/mat/functor/integrate_dae.hpp>
#include <stan/math/rev/mat/functor/jacobian.hpp>
#include <stan/math/rev/mat/functor/map_rect_reduce.hpp>

#endif
66 changes: 66 additions & 0 deletions stan/math/rev/mat/fun/sqrt_spd.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#ifndef STAN_MATH_REV_MAT_FUN_SQRT_SPD_HPP
#define STAN_MATH_REV_MAT_FUN_SQRT_SPD_HPP

#include <cstddef>
#include <stan/math/prim/mat/fun/sqrt_spd.hpp>
#include <stan/math/rev/mat/functor/adj_jac_apply.hpp>
#include <tuple>
#include <unsupported/Eigen/KroneckerProduct>
#include <vector>

namespace stan {
namespace math {

namespace {
class sqrt_spd_op {
int K_;
double *y_; // Holds the output, which has K_^2 elements

public:
sqrt_spd_op() : K_(0), y_(nullptr) {}

template <std::size_t size>
Eigen::MatrixXd operator()(const std::array<bool, size> &needs_adj,
const Eigen::MatrixXd &m) {
K_ = m.rows();
Eigen::MatrixXd sqrt_m = sqrt_spd(m);
int KK = K_ * K_;
y_ = ChainableStack::instance().memalloc_.alloc_array<double>(KK);
for (int k = 0; k < KK; ++k)
y_[k] = sqrt_m.coeff(k);
return sqrt_m;
}

// https://math.stackexchange.com/questions/540361/
// derivative-or-differential-of-symmetric-square-root-of-a-matrix

template <std::size_t size>
std::tuple<Eigen::MatrixXd>
multiply_adjoint_jacobian(const std::array<bool, size> &needs_adj,
const Eigen::MatrixXd &adj) const {
bgoodri marked this conversation as resolved.
Show resolved Hide resolved
using Eigen::kroneckerProduct;
using Eigen::MatrixXd;
Eigen::MatrixXd output(K_, K_);
Eigen::Map<const Eigen::VectorXd> map_input(adj.data(), adj.size());
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And if adj is a MatrixXd, how can it be mapped to a VectorXd?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That mapping is doing this (https://en.wikipedia.org/wiki/Vectorization_(mathematics)). I was just pattern-matching the eqs in the Stackoverflow link. I'm not really familiar with that. I was just double checking myself and Abhishek makes a note at the end of his post that the Jacobian he wrote is in terms of the vectorization, which I assume is the Wikipedia definition.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yeah fair warning, I was just kinda guessing on the math stuff to make things compile. Maybe add in a diagonal matrix test (with things that aren't 1 on the diagonal) so we know the answer and gradients and all just to make sure things are in the right direction.

Eigen::Map<Eigen::VectorXd> map_output(output.data(), output.size());
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar, how can map_output be a Map<VectorXd> but output be a square MatrixXd?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And why is map_output necessary? Can we just do the linear algebra to form output and do std::make_tuple(output)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The maps are just an eigen thing for mapping one data structure onto another. So the math happens in terms of the vectors like Abhishek had it but it just ends up in the right place in the matrices. I think it should be okay in terms of efficiency?

Eigen::Map<MatrixXd> sqrt_m(y_, K_, K_);

map_output = (kroneckerProduct(sqrt_m, MatrixXd::Identity(K_, K_)) +
kroneckerProduct(MatrixXd::Identity(K_, K_), sqrt_m))
// the above is symmetric so can skip the transpose
.ldlt()
.solve(map_input);

return std::make_tuple(output);
}
};
} // namespace

inline Eigen::Matrix<var, Eigen::Dynamic, Eigen::Dynamic>
sqrt_spd(const Eigen::Matrix<var, Eigen::Dynamic, Eigen::Dynamic> &m) {
return adj_jac_apply<sqrt_spd_op>(m);
bgoodri marked this conversation as resolved.
Show resolved Hide resolved
}

} // namespace math
} // namespace stan
#endif
15 changes: 15 additions & 0 deletions test/unit/math/prim/mat/fun/inv_sqrt_spd_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include <gtest/gtest.h>
#include <stan/math/prim/mat.hpp>

TEST(MathMatrix, inv_sqrt_spd) {
stan::math::matrix_d m0;
stan::math::matrix_d m1(2, 3);
m1 << 1, 2, 3, 4, 5, 6;
stan::math::matrix_d ev_m1(1, 1);
ev_m1 << 4.0;

using stan::math::inv_sqrt_spd;
EXPECT_THROW(inv_sqrt_spd(m0), std::invalid_argument);
EXPECT_NEAR(0.5, inv_sqrt_spd(ev_m1)(0, 0), 1e-16);
EXPECT_THROW(inv_sqrt_spd(m1), std::invalid_argument);
}
15 changes: 15 additions & 0 deletions test/unit/math/prim/mat/fun/sqrt_spd_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include <gtest/gtest.h>
#include <stan/math/prim/mat.hpp>

TEST(MathMatrix, sqrt_spd) {
stan::math::matrix_d m0;
stan::math::matrix_d m1(2, 3);
m1 << 1, 2, 3, 4, 5, 6;
stan::math::matrix_d ev_m1(1, 1);
ev_m1 << 4.0;

using stan::math::sqrt_spd;
EXPECT_THROW(sqrt_spd(m0), std::invalid_argument);
EXPECT_NEAR(2.0, sqrt_spd(ev_m1)(0, 0), 1e-16);
EXPECT_THROW(sqrt_spd(m1), std::invalid_argument);
}
60 changes: 60 additions & 0 deletions test/unit/math/rev/mat/fun/inv_sqrt_spd_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#include <gtest/gtest.h>
#include <stan/math/rev/mat.hpp>
#include <test/unit/math/rev/mat/fun/util.hpp>
#include <test/unit/math/rev/mat/util.hpp>

TEST(AgradRevMatrix, check_varis_on_stack) {
using stan::math::inv_sqrt_spd;
using stan::math::matrix_v;

matrix_v a(3, 3);
a << 1.0, 2.0, 3.0, 2.0, 5.0, 7.9, 3.0, 7.9, 1.08;
test::check_varis_on_stack(inv_sqrt_spd(a));
}

struct make_zero {
template <typename T>
Eigen::Matrix<T, Eigen::Dynamic, 1>
operator()(const Eigen::Matrix<T, Eigen::Dynamic, 1> &a) const {
typedef Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> matrix_t;
const int K = static_cast<int>(sqrt(a.rows()));
matrix_t A(K, K);
int pos = 0;
for (int i = 0; i < K; i++)
for (int j = 0; j < K; j++)
A(i, j) = a[pos++];

matrix_t inv_sqrt_A = stan::math::inv_sqrt_spd(A);
matrix_t zero = A.inverse() - inv_sqrt_A * inv_sqrt_A;
zero.resize(K * K, 1);
return zero;
}
};

TEST(AgradRevMatrix, sqrt_spd) {
using stan::math::sqrt_spd;
using stan::math::matrix_v;
using stan::math::vector_v;
using stan::math::matrix_d;

int K = 2;
matrix_d A(K, K);
A.setRandom();
A = A.transpose() * A;
matrix_d J;
Eigen::VectorXd f_x;
A.resize(K * K, 1);
make_zero f;
stan::math::jacobian(f, A, f_x, J);
const double TOL = 1e-14;
int pos = 0;
for (int i = 0; i < K; i++)
for (int j = 0; j < K; j++)
EXPECT_NEAR(f_x(pos++), 0, TOL);
/*
for (int i = 0; i < J.rows(); i++)
for (int j = 0; j < J.cols(); j++)
if (i != j) EXPECT_NEAR(J(i, j), 0, TOL);
*/
EXPECT_TRUE(J.array().any());
}
60 changes: 60 additions & 0 deletions test/unit/math/rev/mat/fun/sqrt_spd_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#include <gtest/gtest.h>
#include <stan/math/rev/mat.hpp>
#include <test/unit/math/rev/mat/fun/util.hpp>
#include <test/unit/math/rev/mat/util.hpp>

TEST(AgradRevMatrix, check_varis_on_stack) {
using stan::math::sqrt_spd;
using stan::math::matrix_v;

matrix_v a(3, 3);
a << 1.0, 2.0, 3.0, 2.0, 5.0, 7.9, 3.0, 7.9, 1.08;
test::check_varis_on_stack(sqrt_spd(a));
}


struct make_zero {
template <typename T>
Eigen::Matrix<T, Eigen::Dynamic, 1> operator()(
const Eigen::Matrix<T, Eigen::Dynamic, 1>& a) const {
typedef Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> matrix_t;
const int K = static_cast<int>(sqrt(a.rows()));
matrix_t A(K, K);
int pos = 0;
for (int i = 0; i < K; i++)
for (int j = 0; j < K; j++)
A(i, j) = a[pos++];

matrix_t sqrt_A = stan::math::sqrt_spd(A);
matrix_t zero = A - sqrt_A * sqrt_A;
zero.resize(K * K, 1);
return zero;
}
};

TEST(AgradRevMatrix, sqrt_spd) {
using stan::math::sqrt_spd;
using stan::math::matrix_v;
using stan::math::vector_v;
using stan::math::matrix_d;

int K = 2;
matrix_d A(K, K);
A.setRandom();
A = A.transpose() * A;
matrix_d J;
Eigen::VectorXd f_x;
A.resize(K * K, 1);
make_zero f;
stan::math::jacobian(f, A, f_x, J);
const double TOL = 1e-14;
int pos = 0;
for (int i = 0; i < K; i++)
for (int j = 0; j < K; j++)
EXPECT_NEAR(f_x(pos++), 0, TOL);
for (int i = 0; i < J.rows(); i++)
for (int j = 0; j < J.cols(); j++)
if (i != j) EXPECT_NEAR(J(i, j), 0, TOL);
EXPECT_TRUE(J.array().any());
}