// // Math.NET Numerics, part of the Math.NET Project // http://numerics.mathdotnet.com // http://github.com/mathnet/mathnet-numerics // // Copyright (c) 2009-2018 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // using System; using System.Collections.Generic; using IStation.Numerics.Statistics; namespace IStation.Numerics { public static class GoodnessOfFit { /// /// Calculates r^2, the square of the sample correlation coefficient between /// the observed outcomes and the observed predictor values. /// Not to be confused with R^2, the coefficient of determination, see . /// /// The modelled/predicted values /// The observed/actual values /// Squared Person product-momentum correlation coefficient. public static double RSquared(IEnumerable modelledValues, IEnumerable observedValues) { var corr = Correlation.Pearson(modelledValues, observedValues); return corr * corr; } /// /// Calculates r, the sample correlation coefficient between the observed outcomes /// and the observed predictor values. /// /// The modelled/predicted values /// The observed/actual values /// Person product-momentum correlation coefficient. public static double R(IEnumerable modelledValues, IEnumerable observedValues) { return Correlation.Pearson(modelledValues, observedValues); } /// /// Calculates the Standard Error of the regression, given a sequence of /// modeled/predicted values, and a sequence of actual/observed values /// /// The modelled/predicted values /// The observed/actual values /// The Standard Error of the regression public static double PopulationStandardError(IEnumerable modelledValues, IEnumerable observedValues) { return StandardError(modelledValues, observedValues, 0); } /// /// Calculates the Standard Error of the regression, given a sequence of /// modeled/predicted values, and a sequence of actual/observed values /// /// The modelled/predicted values /// The observed/actual values /// The degrees of freedom by which the /// number of samples is reduced for performing the Standard Error calculation /// The Standard Error of the regression public static double StandardError(IEnumerable modelledValues, IEnumerable observedValues, int degreesOfFreedom) { using (IEnumerator ieM = modelledValues.GetEnumerator()) using (IEnumerator ieO = observedValues.GetEnumerator()) { double n = 0; double accumulator = 0; while (ieM.MoveNext()) { if (!ieO.MoveNext()) { throw new ArgumentOutOfRangeException(nameof(modelledValues), "The array arguments must have the same length."); } double currentM = ieM.Current; double currentO = ieO.Current; var diff = currentM - currentO; accumulator += diff * diff; n++; } if (degreesOfFreedom >= n) { throw new ArgumentOutOfRangeException(nameof(degreesOfFreedom), "The sample size must be larger than the given degrees of freedom."); } return Math.Sqrt(accumulator / (n - degreesOfFreedom)); } } /// /// Calculates the R-Squared value, also known as coefficient of determination, /// given some modelled and observed values. /// /// The values expected from the model. /// The actual values obtained. /// Coefficient of determination. public static double CoefficientOfDetermination(IEnumerable modelledValues, IEnumerable observedValues) { var y = observedValues; var f = modelledValues; int n = 0; double meanY = 0; double ssTot = 0; double ssRes = 0; using (IEnumerator ieY = y.GetEnumerator()) using (IEnumerator ieF = f.GetEnumerator()) { while (ieY.MoveNext()) { if (!ieF.MoveNext()) { throw new ArgumentOutOfRangeException(nameof(modelledValues), "The array arguments must have the same length."); } double currentY = ieY.Current; double currentF = ieF.Current; // If a large constant C is added to every y value, // then each new y have an error of about C*eps, // thus each new deltaY will change by about C*eps (compared to the old deltaY), // and thus ssTot will change by only C*eps*deltaY on each step // thus C*eps*deltaY*n in total. // (This error cannot be eliminated by a Kahan algorithm, // because it is introduced when C is added to the old Y value). // // This is better than summing the square of y values // and then substracting the correct multiple of the square of the sum of y values, // in this latter case ssTot will change by eps*n*(C^2+2*C*meanY) in total. double deltaY = currentY - meanY; double scaleDeltaY = deltaY / ++n; meanY += scaleDeltaY; ssTot += scaleDeltaY* deltaY* (n - 1); // This calculation is as safe as ssTot // in the case when a constant is added to both y and f. ssRes += (currentY - currentF)* (currentY-currentF); } if (ieF.MoveNext()) { throw new ArgumentOutOfRangeException(nameof(observedValues), "The array arguments must have the same length."); } } return 1 - ssRes/ssTot; } } }