Skip to content

Commit ae06c34

Browse files
committed
Minor bug fixes in hypothesis tests, MGBT, binomial. Improving convergence in mixture and GMM expectation-maximization method. Made degrees of freedom in student-t and noncentral-t doubles.
1 parent afe7a00 commit ae06c34

9 files changed

Lines changed: 96 additions & 68 deletions

File tree

Numerics/Data/Statistics/HypothesisTests.cs

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,14 @@
2828
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929
*/
3030

31-
using System;
32-
using System.Collections.Generic;
33-
using System.Linq;
3431
using Numerics.Distributions;
3532
using Numerics.MachineLearning;
3633
using Numerics.Mathematics.LinearAlgebra;
3734
using Numerics.Mathematics.SpecialFunctions;
35+
using System;
36+
using System.Collections.Generic;
37+
using System.Diagnostics;
38+
using System.Linq;
3839

3940
namespace Numerics.Data.Statistics
4041
{
@@ -111,7 +112,7 @@ public static double UnequalVarianceTtest(IList<double> sample1, IList<double> s
111112
int n2 = sample2.Count;
112113
double t = Math.Abs((ave1 - ave2) / Math.Sqrt(var1 / n1 + var2 / n2));
113114
double df = Tools.Sqr(var1 / n1 + var2 / n2) / (Tools.Sqr(var1 / n1) / (n1 - 1) + Tools.Sqr(var2 / n2) / (n2 - 1));
114-
var tdist = new StudentT((int)df);
115+
var tdist = new StudentT(df);
115116
return (1d - tdist.CDF(t)) * 2d;
116117
}
117118

@@ -134,7 +135,7 @@ public static double PairedTtest(IList<double> sample1, IList<double> sample2)
134135
cov /= (df = n - 1);
135136
sd = Math.Sqrt((var1 + var2 - 2.0 * cov) / n);
136137
double t = Math.Abs((ave1 - ave2) / sd);
137-
var tdist = new StudentT((int)df);
138+
var tdist = new StudentT(df);
138139
return (1d - tdist.CDF(t)) * 2d;
139140
}
140141

@@ -348,29 +349,36 @@ public static double UnimodalityTest(IList<double> sample)
348349
int n = sample.Count;
349350
if (n < 10) throw new ArgumentException("The sample size must be greater than or equal to 10.");
350351

351-
// Fit 1-component GMM (unimodal)
352-
var gmm1 = new GaussianMixtureModel(sample.ToArray(), 1);
353-
gmm1.Train(12345, true);
354-
var logLH1 = gmm1.LogLikelihood;
352+
try
353+
{
354+
// Fit 1-component GMM (unimodal)
355+
var gmm1 = new GaussianMixtureModel(sample.ToArray(), 1);
356+
gmm1.Train(12345, true);
357+
var logLH1 = gmm1.LogLikelihood;
355358

356-
// Fit 2-component GMM (potentially bimodal)
357-
var gmm2 = new GaussianMixtureModel(sample.ToArray(), 2);
358-
gmm2.Train(12345, true);
359-
var logLH2 = gmm2.LogLikelihood;
359+
// Fit 2-component GMM (potentially bimodal)
360+
var gmm2 = new GaussianMixtureModel(sample.ToArray(), 2);
361+
gmm2.Train(12345, true);
362+
var logLH2 = gmm2.LogLikelihood;
360363

361-
// Compute test statistic: Likelihood Ratio Test
362-
double testStat = 2 * (logLH2 - logLH1);
363-
int df = 3;
364+
// Compute test statistic: Likelihood Ratio Test
365+
double testStat = 2 * (logLH2 - logLH1);
366+
int df = 3;
364367

365-
// Compute p-value from chi-square distribution
366-
var chiSquared = new ChiSquared(df);
367-
double pval = 1.0 - chiSquared.CDF(testStat);
368+
// Compute p-value from chi-square distribution
369+
var chiSquared = new ChiSquared(df);
370+
double pval = 1.0 - chiSquared.CDF(testStat);
368371

369-
return pval;
372+
return pval;
373+
}
374+
catch (Exception ex)
375+
{
376+
Debug.WriteLine($"Error in hypothesis testing: {ex.Message}");
377+
return double.NaN;
378+
}
370379

371380
}
372381

373-
374382
}
375383

376384

Numerics/Data/Statistics/MultipleGrubbsBeckTest.cs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,19 @@ private static double FGGB(double PZR, int N, int R, double ETA)
233233
df = 2.0d * alpha;
234234
ncp = (MuMP - ZR) / Math.Sqrt(VarMP);
235235
q = -Math.Sqrt(MuS2 / VarMP) * EtaP;
236-
var NCTDist = new NoncentralT(df, ncp);
237-
ANS = 1.0d - NCTDist.CDF(q);
236+
// Match Fortran FP_TNC_CDF: use normal approximation for df > 20
237+
double cdfResult;
238+
if (df > 20.0d)
239+
{
240+
double Z = (q * (1.0d - 1.0d / (4.0d * df)) - ncp) / Math.Sqrt(1.0d + q * q / (2.0d * df));
241+
cdfResult = Normal.StandardCDF(Z);
242+
}
243+
else
244+
{
245+
var NCTDist = new NoncentralT(df, ncp);
246+
cdfResult = NCTDist.CDF(q);
247+
}
248+
ANS = 1.0d - cdfResult;
238249
return ANS;
239250
}
240251

Numerics/Distributions/Univariate/Binomial.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ public override double CDF(double k)
282282
k = Math.Floor(k);
283283
if (k < Minimum)
284284
return 0.0d;
285-
if (k > Maximum)
285+
if (k >= Maximum)
286286
return 1.0d;
287287
return Beta.Incomplete(NumberOfTrials - k, k + 1d, Complement);
288288
}
@@ -299,7 +299,7 @@ public override double InverseCDF(double probability)
299299
return Maximum;
300300
// Validate parameters
301301
if (_parametersValid == false)
302-
ValidateParameters([probability, NumberOfTrials], true);
302+
ValidateParameters([ProbabilityOfSuccess, NumberOfTrials], true);
303303
double k = 0d;
304304
for (int i = 0; i <= NumberOfTrials; i++)
305305
{

Numerics/Distributions/Univariate/Mixture.cs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -761,13 +761,14 @@ double logLH(double[] x)
761761
// Perform the expectation step
762762
newLogLH = EStep(mleParameters);
763763

764-
// Perform the maximization step
765-
mleParameters = MStep(mleParameters);
766-
767-
// Check convergence after M-step
764+
// Check convergence before M-step to avoid pushing parameters
765+
// into a degenerate state after the log-likelihood has already converged.
768766
if (Math.Abs((oldLogLH - newLogLH) / oldLogLH) < Tolerance)
769767
break;
770768

769+
// Perform the maximization step
770+
mleParameters = MStep(mleParameters);
771+
771772
// Update log-likelihood state
772773
oldLogLH = newLogLH;
773774

Numerics/Distributions/Univariate/NoncentralT.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,13 @@ public NoncentralT(double degreesOfFreedom, double noncentrality)
7474
SetParameters(degreesOfFreedom, noncentrality);
7575
}
7676

77-
private int _degreesOfFreedom;
77+
private double _degreesOfFreedom;
7878
private double _noncentrality;
7979

8080
/// <summary>
8181
/// Gets and sets the degrees of freedom ν (nu) of the distribution.
8282
/// </summary>
83-
public int DegreesOfFreedom
83+
public double DegreesOfFreedom
8484
{
8585
get { return _degreesOfFreedom; }
8686
set
@@ -260,7 +260,7 @@ public override double[] MaximumOfParameters
260260
/// <param name="mu">The noncentrality parameter μ (mu).</param>
261261
public void SetParameters(double v, double mu)
262262
{
263-
DegreesOfFreedom = (int)v;
263+
DegreesOfFreedom = v;
264264
Noncentrality = mu;
265265
}
266266

@@ -429,8 +429,8 @@ private double NCTDist(double t, double df, double delta)
429429
//
430430
// Note - ITRMAX and ERRMAX may be changed to suit one's needs.
431431
//
432-
const int ITRMAX = 1000;
433-
const double Errmax = 0.0000001d;
432+
const int ITRMAX = 10000;
433+
const double Errmax = 0.000000001d;
434434

435435
// DATA ITRMAX/100.1/, ERRMAX/1.E-06/
436436
//

Numerics/Distributions/Univariate/StudentT.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ public StudentT(double location, double scale, double degreesOfFreedom)
108108

109109
private double _mu;
110110
private double _sigma;
111-
private int _degreesOfFreedom;
111+
private double _degreesOfFreedom;
112112

113113
/// <summary>
114114
/// Gets and sets the location parameter µ (Mu).
@@ -139,7 +139,7 @@ public double Sigma
139139
/// <summary>
140140
/// Gets and sets the degrees of freedom ν (nu) of the distribution.
141141
/// </summary>
142-
public int DegreesOfFreedom
142+
public double DegreesOfFreedom
143143
{
144144
get { return _degreesOfFreedom; }
145145
set
@@ -325,7 +325,7 @@ public void SetParameters(double mu, double sigma, double v)
325325
{
326326
Mu = mu;
327327
Sigma = sigma;
328-
DegreesOfFreedom = (int)v;
328+
DegreesOfFreedom = v;
329329
}
330330

331331
/// <inheritdoc/>

Numerics/Machine Learning/Unsupervised/GaussianMixtureModel.cs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,14 +296,14 @@ private void MStep()
296296
for (int k = 0; k < K; k++)
297297
{
298298
double wgt = 0d;
299-
for (int i = 0; i < X.NumberOfRows; i++)
299+
for (int i = 0; i < X.NumberOfRows; i++)
300300
wgt += LikelihoodMatrix[i, k];
301301
Weights[k] = wgt / X.NumberOfRows;
302302
for (int d = 0; d < Dimension; d++)
303303
{
304304
// Compute centroids
305305
double sum = 0;
306-
for (int i = 0; i < X.NumberOfRows; i++)
306+
for (int i = 0; i < X.NumberOfRows; i++)
307307
sum += LikelihoodMatrix[i, k] * X[i, d];
308308
Means[k, d] = sum / wgt;
309309
// Compute covariance
@@ -317,6 +317,14 @@ private void MStep()
317317
Sigmas[k][d, j] = sum / wgt;
318318
}
319319
}
320+
321+
// Add small regularization to the diagonal to ensure the covariance
322+
// matrix remains positive-definite. When a component collapses to very
323+
// few points, the covariance can become singular, causing Cholesky
324+
// decomposition in the E-step to fail.
325+
for (int d = 0; d < Dimension; d++)
326+
MatrixRegularization.MakeSymmetricPositiveDefinite(Sigmas[k]);
327+
320328
}
321329
}
322330

Test_Numerics/Distributions/Univariate/Test_NoncentralT.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,12 @@ public void Test_NoncentralT_InverseCDF()
114114
public void Test_Construction()
115115
{
116116
var t = new NoncentralT();
117-
Assert.AreEqual(10,t.DegreesOfFreedom);
118-
Assert.AreEqual(0, t.Noncentrality);
117+
Assert.AreEqual(10d, t.DegreesOfFreedom);
118+
Assert.AreEqual(0d, t.Noncentrality);
119119

120120
var t2 = new NoncentralT(1, 1);
121-
Assert.AreEqual(1, t2.DegreesOfFreedom);
122-
Assert.AreEqual(1, t2.Noncentrality);
121+
Assert.AreEqual(1d, t2.DegreesOfFreedom);
122+
Assert.AreEqual(1d, t2.Noncentrality);
123123
}
124124

125125
/// <summary>

Test_Numerics/Distributions/Univariate/Test_StudentT.cs

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,14 @@ public class Test_StudentT
6060
[TestMethod()]
6161
public void Test_StudentT_PDF()
6262
{
63-
var t = new StudentT(4.2d);
63+
var t = new StudentT(4d);
6464
double pdf = t.PDF(1.4d);
6565
double result = 0.138377537135553d;
66-
Assert.AreEqual(pdf, result, 1E-10);
67-
t = new StudentT(2.5d, 0.5d, 4.2d);
66+
Assert.AreEqual(result, pdf, 1E-10);
67+
t = new StudentT(2.5d, 0.5d, 4d);
6868
pdf = t.PDF(1.4d);
6969
result = 0.0516476521260042d;
70-
Assert.AreEqual(pdf, result, 1E-10);
70+
Assert.AreEqual(result, pdf, 1E-10);
7171
}
7272

7373
/// <summary>
@@ -76,14 +76,14 @@ public void Test_StudentT_PDF()
7676
[TestMethod()]
7777
public void Test_StudentT_CDF()
7878
{
79-
var t = new StudentT(4.2d);
79+
var t = new StudentT(4d);
8080
double cdf = t.CDF(1.4d);
8181
double result = 0.882949686336585d;
82-
Assert.AreEqual(cdf, result, 1E-10);
83-
t = new StudentT(2.5d, 0.5d, 4.2d);
82+
Assert.AreEqual(result, cdf, 1E-10);
83+
t = new StudentT(2.5d, 0.5d, 4d);
8484
cdf = t.CDF(1.4d);
8585
result = 0.0463263350898173d;
86-
Assert.AreEqual(cdf, result, 1E-10);
86+
Assert.AreEqual(result, cdf, 1E-10);
8787
}
8888

8989
/// <summary>
@@ -92,16 +92,16 @@ public void Test_StudentT_CDF()
9292
[TestMethod()]
9393
public void Test_StudentT_InverseCDF()
9494
{
95-
var t = new StudentT(4.2d);
95+
var t = new StudentT(4d);
9696
double cdf = t.CDF(1.4d);
9797
double invcdf = t.InverseCDF(cdf);
9898
double result = 1.4d;
99-
Assert.AreEqual(invcdf, result, 1E-2);
100-
t = new StudentT(2.5d, 0.5d, 4.2d);
99+
Assert.AreEqual(result, invcdf, 1E-2);
100+
t = new StudentT(2.5d, 0.5d, 4d);
101101
cdf = t.CDF(1.4d);
102102
invcdf = t.InverseCDF(cdf);
103103
result = 1.4d;
104-
Assert.AreEqual(invcdf, result, 1E-2);
104+
Assert.AreEqual(result, invcdf, 1E-2);
105105
}
106106

107107
/// <summary>
@@ -111,14 +111,14 @@ public void Test_StudentT_InverseCDF()
111111
public void Test_Construction()
112112
{
113113
var t = new StudentT();
114-
Assert.AreEqual(0, t.Mu);
115-
Assert.AreEqual(1, t.Sigma);
116-
Assert.AreEqual(10, t.DegreesOfFreedom);
114+
Assert.AreEqual(0d, t.Mu);
115+
Assert.AreEqual(1d, t.Sigma);
116+
Assert.AreEqual(10d, t.DegreesOfFreedom);
117117

118118
var t2 = new StudentT(10, 10, 10);
119-
Assert.AreEqual(10, t2.Mu);
120-
Assert.AreEqual(10, t2.Sigma);
121-
Assert.AreEqual(10, t2.DegreesOfFreedom);
119+
Assert.AreEqual(10d, t2.Mu);
120+
Assert.AreEqual(10d, t2.Sigma);
121+
Assert.AreEqual(10d, t2.DegreesOfFreedom);
122122
}
123123

124124
/// <summary>
@@ -148,7 +148,7 @@ public void Test_ParametersToString()
148148
Assert.AreEqual("Scale (σ)", t.ParametersToString[1, 0]);
149149
Assert.AreEqual("Degrees of Freedom (ν)", t.ParametersToString[2, 0]);
150150
Assert.AreEqual("0", t.ParametersToString[0, 1]);
151-
Assert.AreEqual("1", t.ParametersToString[1, 1]);
151+
Assert.AreEqual("1", t.ParametersToString[1,1]);
152152
Assert.AreEqual("10", t.ParametersToString[2, 1]);
153153
}
154154

@@ -160,10 +160,10 @@ public void Test_Moments()
160160
{
161161
var dist = new StudentT(10, 1, 100);
162162
var mom = dist.CentralMoments(1E-8);
163-
Assert.AreEqual(mom[0], dist.Mean, 1E-2);
164-
Assert.AreEqual(mom[1], dist.StandardDeviation, 1E-2);
165-
Assert.AreEqual(mom[2], dist.Skewness, 1E-2);
166-
Assert.AreEqual(mom[3], dist.Kurtosis, 1E-2);
163+
Assert.AreEqual(dist.Mean, mom[0], 1E-2);
164+
Assert.AreEqual(dist.StandardDeviation, mom[1], 1E-2);
165+
Assert.AreEqual(dist.Skewness, mom[2], 1E-2);
166+
Assert.AreEqual(dist.Kurtosis, mom[3], 1E-2);
167167
}
168168

169169
/// <summary>
@@ -231,7 +231,7 @@ public void Test_Skewness()
231231
Assert.AreEqual(0, t.Skewness);
232232

233233
var t2 = new StudentT(1, 1, 1);
234-
Assert.AreEqual(double.NaN,t2.Skewness );
234+
Assert.AreEqual(double.NaN, t2.Skewness);
235235
}
236236

237237
/// <summary>
@@ -241,7 +241,7 @@ public void Test_Skewness()
241241
public void Test_Kurtosis()
242242
{
243243
var t = new StudentT();
244-
Assert.AreEqual(4, t.Kurtosis );
244+
Assert.AreEqual(4, t.Kurtosis);
245245

246246
var t2 = new StudentT(1, 1, 4);
247247
Assert.AreEqual(double.PositiveInfinity, t2.Kurtosis);

0 commit comments

Comments
 (0)