IStation/Service.V4.1.git

// <copyright file="GoodnessOfFit.cs">
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
//
// Copyright (c) 2009-2018 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// </copyright>
 
using System;
using System.Collections.Generic;
using IStation.Numerics.Statistics;
 
namespace IStation.Numerics
{
    public static class GoodnessOfFit
    {
        /// <summary>
        /// Calculates r^2, the square of the sample correlation coefficient between
        /// the observed outcomes and the observed predictor values.
        /// Not to be confused with R^2, the coefficient of determination, see <see cref="CoefficientOfDetermination"/>.
        /// </summary>
        /// <param name="modelledValues">The modelled/predicted values</param>
        /// <param name="observedValues">The observed/actual values</param>
        /// <returns>Squared Person product-momentum correlation coefficient.</returns>
        public static double RSquared(IEnumerable<double> modelledValues, IEnumerable<double> observedValues)
        {
            var corr = Correlation.Pearson(modelledValues, observedValues);
            return corr * corr;
        }
 
        /// <summary>
        /// Calculates r, the sample correlation coefficient between the observed outcomes
        /// and the observed predictor values.
        /// </summary>
        /// <param name="modelledValues">The modelled/predicted values</param>
        /// <param name="observedValues">The observed/actual values</param>
        /// <returns>Person product-momentum correlation coefficient.</returns>
        public static double R(IEnumerable<double> modelledValues, IEnumerable<double> observedValues)
        {
            return Correlation.Pearson(modelledValues, observedValues);
        }
 
        /// <summary>
        /// Calculates the Standard Error of the regression, given a sequence of
        /// modeled/predicted values, and a sequence of actual/observed values
        /// </summary>
        /// <param name="modelledValues">The modelled/predicted values</param>
        /// <param name="observedValues">The observed/actual values</param>
        /// <returns>The Standard Error of the regression</returns>
        public static double PopulationStandardError(IEnumerable<double> modelledValues, IEnumerable<double> observedValues)
        {
            return StandardError(modelledValues, observedValues, 0);
        }
 
        /// <summary>
        /// Calculates the Standard Error of the regression, given a sequence of
        /// modeled/predicted values, and a sequence of actual/observed values
        /// </summary>
        /// <param name="modelledValues">The modelled/predicted values</param>
        /// <param name="observedValues">The observed/actual values</param>
        /// <param name="degreesOfFreedom">The degrees of freedom by which the
        /// number of samples is reduced for performing the Standard Error calculation</param>
        /// <returns>The Standard Error of the regression</returns>
        public static double StandardError(IEnumerable<double> modelledValues, IEnumerable<double> observedValues, int degreesOfFreedom)
        {
            using (IEnumerator<double> ieM = modelledValues.GetEnumerator())
            using (IEnumerator<double> ieO = observedValues.GetEnumerator())
            {
                double n = 0;
                double accumulator = 0;
                while (ieM.MoveNext())
                {
                    if (!ieO.MoveNext())
                    {
                        throw new ArgumentOutOfRangeException(nameof(modelledValues), "The array arguments must have the same length.");
                    }
                    double currentM = ieM.Current;
                    double currentO = ieO.Current;
                    var diff = currentM - currentO;
                    accumulator += diff * diff;
                    n++;
                }
 
                if (degreesOfFreedom >= n)
                {
                    throw new ArgumentOutOfRangeException(nameof(degreesOfFreedom), "The sample size must be larger than the given degrees of freedom.");
                }
                return Math.Sqrt(accumulator / (n - degreesOfFreedom));
            }
        }
 
        /// <summary>
        /// Calculates the R-Squared value, also known as coefficient of determination,
        /// given some modelled and observed values.
        /// </summary>
        /// <param name="modelledValues">The values expected from the model.</param>
        /// <param name="observedValues">The actual values obtained.</param>
        /// <returns>Coefficient of determination.</returns>
        public static double CoefficientOfDetermination(IEnumerable<double> modelledValues, IEnumerable<double> observedValues)
        {
            var y = observedValues;
            var f = modelledValues;
            int n = 0;
 
            double meanY = 0;
            double ssTot = 0;
            double ssRes = 0;
 
            using (IEnumerator<double> ieY = y.GetEnumerator())
            using (IEnumerator<double> ieF = f.GetEnumerator())
            {
                while (ieY.MoveNext())
                {
                    if (!ieF.MoveNext())
                    {
                        throw new ArgumentOutOfRangeException(nameof(modelledValues), "The array arguments must have the same length.");
                    }
 
                    double currentY = ieY.Current;
                    double currentF = ieF.Current;
 
                    // If a large constant C is added to every y value,
                    // then each new y have an error of about C*eps,
                    // thus each new deltaY will change by about C*eps (compared to the old deltaY),
                    // and thus ssTot will change by only C*eps*deltaY on each step
                    // thus C*eps*deltaY*n in total.
                    // (This error cannot be eliminated by a Kahan algorithm,
                    // because it is introduced when C is added to the old Y value).
                    //
                    // This is better than summing the square of y values
                    // and then substracting the correct multiple of the square of the sum of y values,
                    // in this latter case ssTot will change by eps*n*(C^2+2*C*meanY) in total.
                    double deltaY = currentY - meanY;
                    double scaleDeltaY = deltaY / ++n;
 
                    meanY += scaleDeltaY;
                    ssTot += scaleDeltaY* deltaY* (n - 1);
 
                    // This calculation is as safe as ssTot
                    // in the case when a constant is added to both y and f.
                    ssRes += (currentY - currentF)* (currentY-currentF);
                }
 
                if (ieF.MoveNext())
                {
                    throw new ArgumentOutOfRangeException(nameof(observedValues), "The array arguments must have the same length.");
                }
            }
            return 1 - ssRes/ssTot;
        }
    }
}