Skip to content

Commit

Permalink
Gaussian curve fit
Browse files Browse the repository at this point in the history
added function to curve fit data to a gaussian curve and added missing #include in basicfxn
  • Loading branch information
asthachand committed Mar 31, 2024
1 parent 5b4239e commit bd4915f
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 18 deletions.
10 changes: 10 additions & 0 deletions include/diceforge_distributions.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,16 @@ namespace DiceForge {
real_t get_mu() const;
/// @brief Returns standard deviation of the distribution
real_t get_sigma() const;

/// @brief Fits the given sample points (x, y=pdf(x)) to a Gaussian distribution using non-linear least squares regression
/// following Gauss-Newton methods
/// @param x list of x coordinates
/// @param y list of corresponding y coordinates where y = pdf(x)
/// @param max_iter maximum iterations to attempt to fit the data (higher to try for better fits)
/// @param epsilon minimum acceptable error tolerance while attempting to fit the data (smaller to try for better fits)
/// @return A Gaussian distribution fit to the given sample points
Gaussian fitToGaussian(const std::vector<real_t>& x, const std::vector<real_t>& y, int max_iter = 10000, real_t epsilon = 1e-6);

};

/// @brief DiceForge::Maxwell - A Continuous Probability Distribution (Maxwell)
Expand Down
1 change: 1 addition & 0 deletions src/Core/basicfxn.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "basicfxn.h"
#include <limits>

namespace DiceForge
{
Expand Down
141 changes: 123 additions & 18 deletions src/Distributions/Continuous/Gaussian/Gaussian.cpp
Original file line number Diff line number Diff line change
@@ -1,60 +1,61 @@
#include "Gaussian.h"
#include <vector>
#include "basicfxn.h"

namespace DiceForge
{
{
Gaussian::Gaussian(real_t mu, real_t sigma)
: mu(mu), sigma(sigma)
: mu(mu), sigma(sigma)
{
if (sigma < std::numeric_limits<real_t>().epsilon())
{
std::cerr << "Error :"
"\n\tDiceForge::Gaussian::Gaussian(real_t mu, real_t sigma) : "
"\n\t\tValue of sigma must be positive\n" << std::endl;
"\n\tDiceForge::Gaussian::Gaussian(real_t mu, real_t sigma) : "
"\n\t\tValue of sigma must be positive\n"
<< std::endl;
exit(EXIT_FAILURE);
}
}

real_t Gaussian::next(real_t r1, real_t r2)
real_t Gaussian::next(real_t r1, real_t r2)
{
return (sqrt(-2.0 * log(r1)) * cos(2 * M_PI * r2)) * sigma + mu;
}

real_t Gaussian::variance() const
real_t Gaussian::variance() const
{
return sigma * sigma;
}

real_t Gaussian::expectation() const
real_t Gaussian::expectation() const
{
return mu;
}

real_t Gaussian::minValue() const
real_t Gaussian::minValue() const
{
return std::numeric_limits<real_t>().min();
}

real_t Gaussian::maxValue() const
real_t Gaussian::maxValue() const
{
return std::numeric_limits<real_t>().max();
}

real_t Gaussian::pdf(real_t x) const
real_t Gaussian::pdf(real_t x) const
{
return exp(-0.5 * pow((x - mu) / sigma, 2) )/ (sqrt(2.0 * M_PI) * sigma);
return exp(-0.5 * pow((x - mu) / sigma, 2)) / (sqrt(2.0 * M_PI) * sigma);
}

real_t Gaussian::cdf(real_t x) const
{
real_t Gaussian::cdf(real_t x) const
{
real_t erf = myerf((x - mu) / (sigma * sqrt(2)));
return 0.5 * (1 + erf );
return 0.5 * (1 + erf);
}

real_t Gaussian::myerf(real_t x) const
{
real_t erf;
erf = tanh((2/sqrt(M_PI)) *(x + (11 / 123) * pow(x, 3)));
real_t erf;
erf = tanh((2 / sqrt(M_PI)) * (x + (11 / 123) * pow(x, 3)));
return erf;
}

Expand All @@ -67,4 +68,108 @@ namespace DiceForge
{
return sigma;
}
}

Gaussian fitToGaussian(const std::vector<real_t> &x, const std::vector<real_t> &y, int max_iter, real_t epsilon)
{
if (x.size() != y.size())
{
throw "Number of x-coordinates and y-coordinates provided in the data do not match!";
}

const int N = x.size();

// Initial guessing of mu, sigma
real_t mu = 1, sigma = 1;

real_t ysum = 0;
real_t y2sum = 0;
for (size_t i = 0; i < N; i++)
{
ysum += y[i];
y2sum += y[i] * y[i];
}
real_t mu_guess = ysum / N;
real_t sigma_guess = sqrt((y2sum / N) - mu_guess * mu_guess);

real_t ymax = -INFINITY;
for (size_t i = 0; i < N; i++)
{
// Check for outliers
if (y[i] > ymax && y[i] - mu_guess < 3 * sigma_guess)
{
ymax = y[i];

// Guess mu as the x value corresponding to maximum y
mu = x[i];
}
}
//computing max and min x values from given data
real_t xmax = x[0], xmin = x[0];
for (size_t i = 0; i < N; i++)
{
if (x[i] > xmax)
xmax = x[i];
if (x[i] < xmin)
xmin = x[i];
}
//setting initial guess of sigma
sigma = (xmax - xmin) / 6;

// Start iterative updation
for (size_t iter = 0; iter < max_iter; iter++)
{
// Compute Jacobian matrix and error vector
matrix_t J(N, 2); // Jacobian matrix
matrix_t R(N, 1); // Error vector

for (size_t i = 0; i < N; i++)
{
real_t pdf = exp(-(x[i] - mu) * (x[i] - mu) / (2 * sigma * sigma)) / (sqrt(2 * M_PI) * sigma);
// std::cout << sigma << std::endl;
// Partial derivatives of the Gaussian function with respect to mu and sigma
real_t dpdf_dmu = (x[i] - mu) / (sigma * sigma) * pdf;
real_t dpdf_dsigma = ((x[i] - mu) * (x[i] - mu) / (sigma * sigma * sigma) - 1 / sigma) * pdf;

J[i][0] = -dpdf_dmu;
J[i][1] = -dpdf_dsigma;

R[i][0] = y[i] - pdf;
}

// Compute the transpose of the Jacobian matrix
matrix_t JT = J.transpose();

// Compute the move direction using the Gauss-Newton method
matrix_t d = inverse2x2(JT * J) * JT * R;

// Stop when error minimization is too little
if (fabs(d[0][0]) < epsilon && fabs(d[1][0]) < epsilon)
{
break;
}

real_t alpha = 0.001;

real_t pred_mu, pred_sigma;
pred_mu = mu - alpha * d[0][0];
pred_sigma = sigma - alpha * d[1][0];

// prevent possible incorrect jumping
if ((pred_mu / mu > 10 || pred_sigma / sigma > 10 || pred_mu / mu < 0.1 || pred_sigma / sigma < 0.1))
{
alpha = alpha * 0.0001;
}

// Gauss-Newton method
mu = mu - alpha * d[0][0];
sigma = sigma - alpha * d[1][0];
}

if (sigma < 0)
{
throw "Could not fit data to Gaussian!";
}

return Gaussian(mu, sigma);
}
}
8 changes: 8 additions & 0 deletions src/Distributions/Continuous/Gaussian/Gaussian.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ namespace DiceForge {
/// @brief Returns standard deviation of the distribution
real_t get_sigma() const;
};
/// @brief Fits the given sample points (x, y=pdf(x)) to a Gaussian distribution using non-linear least squares regression
/// following Gauss-Newton methods
/// @param x list of x coordinates
/// @param y list of corresponding y coordinates where y = pdf(x)
/// @param max_iter maximum iterations to attempt to fit the data (higher to try for better fits)
/// @param epsilon minimum acceptable error tolerance while attempting to fit the data (smaller to try for better fits)
/// @return A Gaussian distribution fit to the given sample points
Gaussian fitToGaussian(const std::vector<real_t>& x, const std::vector<real_t>& y, int max_iter = 10000, real_t epsilon = 1e-6);
}


Expand Down
42 changes: 42 additions & 0 deletions testing/test_fitting_gaussian.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include "diceforge.h"
#include <iostream>
#include <fstream>

#define NUM_POINTS 50
#define NOISE_AMP 0.01

int main(int argc, char const *argv[])
{
DiceForge::XORShift32 rng = DiceForge::XORShift32(time(NULL));

DiceForge::Gaussian noise_gen = DiceForge::Gaussian(1);
DiceForge::Gaussian gaussian = DiceForge::Gaussian(23, 1);

std::cout << "original: mu = " << gaussian.get_mu() << ", sigma = " << gaussian.get_sigma() << std::endl;

std::vector<double> x;
std::vector<double> y;

std::ofstream out = std::ofstream("noisy_data_gaussian.dat");
for (int i = 0; i < NUM_POINTS; i++)
{
double x1 = rng.next_in_crange(10, 50);
double y1 = gaussian.pdf(x1) + NOISE_AMP * M_1_PI * noise_gen.next(rng.next_unit(), rng.next_unit()) / gaussian.get_sigma();
x.push_back(x1);
y.push_back(y1);
out << x1 << " " << y1 << std::endl;
}
out.close();

DiceForge::Gaussian fit = DiceForge::fitToGaussian(x, y, 10000, 0.0000001);

std::cout << "fit: mu = " << fit.get_mu() << ", sigma = " << fit.get_sigma() << std::endl;

FILE* gnuplot = popen("gnuplot -persist", "w");
fprintf(gnuplot, "plot 'noisy_data_gaussian.dat' title 'samples', exp(-(x - %f) * (x - %f) / (2 * %f * %f)) / (%f) title 'Gaussian'\n",
fit.get_mu(), fit.get_mu(), fit.get_sigma(), fit.get_sigma(), sqrt(2 * M_PI) * fit.get_sigma());
//fprintf(gnuplot, "plot 'noisy_data_gaussian.dat' title 'samples'\n");
fclose(gnuplot);

return 0;
}

0 comments on commit bd4915f

Please sign in to comment.