//
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
//
// Copyright (c) 2009-2018 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
using System;
using System.Collections.Generic;
using IStation.Numerics.Statistics;
namespace IStation.Numerics
{
public static class GoodnessOfFit
{
///
/// Calculates r^2, the square of the sample correlation coefficient between
/// the observed outcomes and the observed predictor values.
/// Not to be confused with R^2, the coefficient of determination, see .
///
/// The modelled/predicted values
/// The observed/actual values
/// Squared Person product-momentum correlation coefficient.
public static double RSquared(IEnumerable modelledValues, IEnumerable observedValues)
{
var corr = Correlation.Pearson(modelledValues, observedValues);
return corr * corr;
}
///
/// Calculates r, the sample correlation coefficient between the observed outcomes
/// and the observed predictor values.
///
/// The modelled/predicted values
/// The observed/actual values
/// Person product-momentum correlation coefficient.
public static double R(IEnumerable modelledValues, IEnumerable observedValues)
{
return Correlation.Pearson(modelledValues, observedValues);
}
///
/// Calculates the Standard Error of the regression, given a sequence of
/// modeled/predicted values, and a sequence of actual/observed values
///
/// The modelled/predicted values
/// The observed/actual values
/// The Standard Error of the regression
public static double PopulationStandardError(IEnumerable modelledValues, IEnumerable observedValues)
{
return StandardError(modelledValues, observedValues, 0);
}
///
/// Calculates the Standard Error of the regression, given a sequence of
/// modeled/predicted values, and a sequence of actual/observed values
///
/// The modelled/predicted values
/// The observed/actual values
/// The degrees of freedom by which the
/// number of samples is reduced for performing the Standard Error calculation
/// The Standard Error of the regression
public static double StandardError(IEnumerable modelledValues, IEnumerable observedValues, int degreesOfFreedom)
{
using (IEnumerator ieM = modelledValues.GetEnumerator())
using (IEnumerator ieO = observedValues.GetEnumerator())
{
double n = 0;
double accumulator = 0;
while (ieM.MoveNext())
{
if (!ieO.MoveNext())
{
throw new ArgumentOutOfRangeException(nameof(modelledValues), "The array arguments must have the same length.");
}
double currentM = ieM.Current;
double currentO = ieO.Current;
var diff = currentM - currentO;
accumulator += diff * diff;
n++;
}
if (degreesOfFreedom >= n)
{
throw new ArgumentOutOfRangeException(nameof(degreesOfFreedom), "The sample size must be larger than the given degrees of freedom.");
}
return Math.Sqrt(accumulator / (n - degreesOfFreedom));
}
}
///
/// Calculates the R-Squared value, also known as coefficient of determination,
/// given some modelled and observed values.
///
/// The values expected from the model.
/// The actual values obtained.
/// Coefficient of determination.
public static double CoefficientOfDetermination(IEnumerable modelledValues, IEnumerable observedValues)
{
var y = observedValues;
var f = modelledValues;
int n = 0;
double meanY = 0;
double ssTot = 0;
double ssRes = 0;
using (IEnumerator ieY = y.GetEnumerator())
using (IEnumerator ieF = f.GetEnumerator())
{
while (ieY.MoveNext())
{
if (!ieF.MoveNext())
{
throw new ArgumentOutOfRangeException(nameof(modelledValues), "The array arguments must have the same length.");
}
double currentY = ieY.Current;
double currentF = ieF.Current;
// If a large constant C is added to every y value,
// then each new y have an error of about C*eps,
// thus each new deltaY will change by about C*eps (compared to the old deltaY),
// and thus ssTot will change by only C*eps*deltaY on each step
// thus C*eps*deltaY*n in total.
// (This error cannot be eliminated by a Kahan algorithm,
// because it is introduced when C is added to the old Y value).
//
// This is better than summing the square of y values
// and then substracting the correct multiple of the square of the sum of y values,
// in this latter case ssTot will change by eps*n*(C^2+2*C*meanY) in total.
double deltaY = currentY - meanY;
double scaleDeltaY = deltaY / ++n;
meanY += scaleDeltaY;
ssTot += scaleDeltaY* deltaY* (n - 1);
// This calculation is as safe as ssTot
// in the case when a constant is added to both y and f.
ssRes += (currentY - currentF)* (currentY-currentF);
}
if (ieF.MoveNext())
{
throw new ArgumentOutOfRangeException(nameof(observedValues), "The array arguments must have the same length.");
}
}
return 1 - ssRes/ssTot;
}
}
}