calibration: fix goodness of fit

lumicks · Mar 26, 2024 · 33846f8 · 33846f8
1 parent a466fad
commit 33846f8
Show file tree

Hide file tree

Showing 5 changed files with 43 additions and 6 deletions.
diff --git a/changelog.md b/changelog.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## v1.4.1 | t.b.d.
+
+#### Bug fixes
+
+* Fixed statistical backing returning an incorrect value.
+
 ## v1.4.0 | 2024-02-28
 
 #### New features

diff --git a/docs/theory/force_calibration/fitting.rst b/docs/theory/force_calibration/fitting.rst
@@ -37,3 +37,20 @@ Setting the number of points per block too low would result in a bias from insuf
 (:math:`R_f`) and an underestimation of the distance response (:math:`R_d`). In practice, one should
 use a high number of points per block (:math:`n_b \gg 100`), unless a very low corner frequency precludes this.
 In such cases, it is preferable to increase the measurement time.
+
+Goodness of fit
+---------------
+
+Based on the model and noise assumptions, we can calculate a goodness of fit criterion.
+When sufficient blocking has taken place, the sum of squared residuals that is being minimized during the fitting procedure is distributed according to a chi-squared distribution characterized by :math:`N_{\mathit{dof}} = N_{\mathit{data}} - N_{\mathit{free}}` degrees of freedom.
+Here :math:`N_{\mathit{data}}` corresponds to the number of data points we fitted (after blocking) and :math:`N_{\mathit{free}}` corresponds to the number of parameters we fitted.
+We can use the value we obtain to determine how unusual the fit error we obtained is.
+
+.. math::
+
+    \mathrm{support} = 100 P(x > \chi_{\mathit{obtained}}^2) = 100 \int_{\chi_{\mathit{obtained}}^2}^{\infty} \chi^2_{N_{\mathit{dof}}}(x) dx
+
+The support or backing is the probability that a repetition of the measurement that produced the data we fitted to will, after fitting, produce residuals whose squared sum is greater than the one we initially obtained.
+More informally, it represents the probability that a fit error at least this large should occur by chance.
+
+Support less than 1% warrants investigating the residuals for any trend in the residuals.
diff --git a/lumicks/pylake/force_calibration/power_spectrum_calibration.py b/lumicks/pylake/force_calibration/power_spectrum_calibration.py
@@ -403,7 +403,7 @@ def fit_power_spectrum(
     # Calculate goodness-of-fit, in terms of the statistical backing (see ref. 1).
     n_degrees_of_freedom = power_spectrum.power.size - len(solution_params)
     chi_squared_per_deg = chi_squared / n_degrees_of_freedom
-    backing = (1 - scipy.special.gammainc(chi_squared / 2, n_degrees_of_freedom / 2)) * 100
+    backing = scipy.stats.chi2.sf(chi_squared, n_degrees_of_freedom) * 100
 
     # Fitted power spectrum values.
     ps_model = power_spectrum.with_spectrum(

diff --git a/lumicks/pylake/force_calibration/tests/test_hydro.py b/lumicks/pylake/force_calibration/tests/test_hydro.py
@@ -303,7 +303,7 @@ def test_integration_active_calibration_hydrodynamics(integration_test_parameter
         "err_f_diode": 352.2917702189488,
         "err_alpha": 0.014231238753589254,
         "chi_squared_per_deg": 0.8659867914094764,
-        "backing": 14.340689726784328,
+        "backing": 84.05224555962526,
     }
 
     for key, value in expected_params.items():
@@ -348,7 +348,7 @@ def test_integration_passive_calibration_hydrodynamics(integration_test_paramete
         "err_f_diode": 352.2917702189488,
         "err_alpha": 0.014231238753589254,
         "chi_squared_per_deg": 0.8659867914094764,
-        "backing": 14.340689726784328,
+        "backing": 84.05224555962526,
     }
 
     for key, value in expected_params.items():
@@ -402,7 +402,7 @@ def test_integration_active_calibration_hydrodynamics_bulk(integration_test_para
         "err_f_diode": 376.8360414675165,
         "err_alpha": 0.014653541838852356,
         "chi_squared_per_deg": 0.8692145118092963,
-        "backing": 14.917612794899505,
+        "backing": 83.40493086075664,
     }
 
     assert fit.params["Distance to surface"].value is None

diff --git a/lumicks/pylake/force_calibration/tests/test_power_spectrum_calibration.py b/lumicks/pylake/force_calibration/tests/test_power_spectrum_calibration.py
@@ -204,6 +204,20 @@ def reference_calibration_result():
     return ps_calibration, model, reference_spectrum
 
 
+def test_bad_fit(reference_calibration_result):
+    ps_calibration, model, reference_spectrum = reference_calibration_result
+    bad_spectrum = reference_spectrum.power.copy()
+    bad_spectrum[30:31] = reference_spectrum.power[10]  # Chop!
+    bad_spectrum = reference_spectrum.with_spectrum(
+        bad_spectrum, num_points_per_block=reference_spectrum.num_points_per_block
+    )
+    bad_calibration = psc.fit_power_spectrum(
+        power_spectrum=bad_spectrum, model=model, loss_function="gaussian"
+    )
+
+    assert ps_calibration["backing"].value > bad_calibration["backing"].value
+
+
 def test_actual_spectrum(reference_calibration_result):
     ps_calibration, model, reference_spectrum = reference_calibration_result
 
@@ -213,7 +227,7 @@ def test_actual_spectrum(reference_calibration_result):
         "Rf": {"desired": 1243.966729922322, "rtol": 1e-4},
         "kappa": {"desired": 0.17149463585651784, "rtol": 1e-4},
         "alpha": {"desired": 0.5006070381347969, "rtol": 1e-4},
-        "backing": {"desired": 66.43310564863437, "rtol": 1e-4},
+        "backing": {"desired": 30.570451, "rtol": 1e-4},
         "chi_squared_per_deg": {"desired": 1.0637833024139873, "rtol": 1e-4},
         "err_fc": {"desired": 32.22822335114943, "rtol": 1e-4},
         "err_D": {"desired": 6.429704886151389e-05, "rtol": 1e-4, "atol": 0},
@@ -309,7 +323,7 @@ def test_repr(reference_calibration_result):
         err_f_diode          Diode low-pass filtering roll-off frequency Std Err (Hz)  561.715
         err_alpha            Diode 'relaxation factor' Std Err                         0.0131406
         chi_squared_per_deg  Chi squared per degree of freedom                         1.06378
-        backing              Statistical backing (%)                                   66.4331"""
+        backing              Statistical backing (%)                                   30.5705"""
     )