//
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
//
// Copyright (c) 2009-2015 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
//
// Adapted from the old DescriptiveStatistics and inspired in design
// among others by http://www.johndcook.com/skewness_kurtosis.html
using System;
using System.Collections.Generic;
using System.Runtime.Serialization;
namespace IStation.Numerics.Statistics
{
///
/// Running statistics accumulator, allows updating by adding values
/// or by combining two accumulators.
///
///
/// This type declares a DataContract for out of the box ephemeral serialization
/// with engines like DataContractSerializer, Protocol Buffers and FsPickler,
/// but does not guarantee any compatibility between versions.
/// It is not recommended to rely on this mechanism for durable persistence.
///
[DataContract(Namespace = "urn:IStation/Numerics")]
public class RunningStatistics
{
[DataMember(Order = 1)]
long _n;
[DataMember(Order = 2)]
double _min = double.PositiveInfinity;
[DataMember(Order = 3)]
double _max = double.NegativeInfinity;
[DataMember(Order = 4)]
double _m1;
[DataMember(Order = 5)]
double _m2;
[DataMember(Order = 6)]
double _m3;
[DataMember(Order = 7)]
double _m4;
public RunningStatistics()
{
}
public RunningStatistics(IEnumerable values)
{
PushRange(values);
}
///
/// Gets the total number of samples.
///
public long Count => _n;
///
/// Returns the minimum value in the sample data.
/// Returns NaN if data is empty or if any entry is NaN.
///
public double Minimum => _n > 0 ? _min : double.NaN;
///
/// Returns the maximum value in the sample data.
/// Returns NaN if data is empty or if any entry is NaN.
///
public double Maximum => _n > 0 ? _max : double.NaN;
///
/// Evaluates the sample mean, an estimate of the population mean.
/// Returns NaN if data is empty or if any entry is NaN.
///
public double Mean => _n > 0 ? _m1 : double.NaN;
///
/// Estimates the unbiased population variance from the provided samples.
/// On a dataset of size N will use an N-1 normalizer (Bessel's correction).
/// Returns NaN if data has less than two entries or if any entry is NaN.
///
public double Variance => _n < 2 ? double.NaN : _m2/(_n - 1);
///
/// Evaluates the variance from the provided full population.
/// On a dataset of size N will use an N normalizer and would thus be biased if applied to a subset.
/// Returns NaN if data is empty or if any entry is NaN.
///
public double PopulationVariance => _n < 2 ? double.NaN : _m2/_n;
///
/// Estimates the unbiased population standard deviation from the provided samples.
/// On a dataset of size N will use an N-1 normalizer (Bessel's correction).
/// Returns NaN if data has less than two entries or if any entry is NaN.
///
public double StandardDeviation => _n < 2 ? double.NaN : Math.Sqrt(_m2/(_n - 1));
///
/// Evaluates the standard deviation from the provided full population.
/// On a dataset of size N will use an N normalizer and would thus be biased if applied to a subset.
/// Returns NaN if data is empty or if any entry is NaN.
///
public double PopulationStandardDeviation => _n < 2 ? double.NaN : Math.Sqrt(_m2/_n);
///
/// Estimates the unbiased population skewness from the provided samples.
/// Uses a normalizer (Bessel's correction; type 2).
/// Returns NaN if data has less than three entries or if any entry is NaN.
///
public double Skewness => _n < 3 ? double.NaN : (_n*_m3*Math.Sqrt(_m2/(_n - 1))/(_m2*_m2*(_n - 2)))*(_n - 1);
///
/// Evaluates the population skewness from the full population.
/// Does not use a normalizer and would thus be biased if applied to a subset (type 1).
/// Returns NaN if data has less than two entries or if any entry is NaN.
///
public double PopulationSkewness => _n < 2 ? double.NaN : Math.Sqrt(_n)*_m3/Math.Pow(_m2, 1.5);
///
/// Estimates the unbiased population kurtosis from the provided samples.
/// Uses a normalizer (Bessel's correction; type 2).
/// Returns NaN if data has less than four entries or if any entry is NaN.
///
public double Kurtosis => _n < 4 ? double.NaN : ((double)_n*_n - 1)/((_n - 2)*(_n - 3))*(_n*_m4/(_m2*_m2) - 3 + 6.0/(_n + 1));
///
/// Evaluates the population kurtosis from the full population.
/// Does not use a normalizer and would thus be biased if applied to a subset (type 1).
/// Returns NaN if data has less than three entries or if any entry is NaN.
///
public double PopulationKurtosis => _n < 3 ? double.NaN : _n*_m4/(_m2*_m2) - 3.0;
///
/// Update the running statistics by adding another observed sample (in-place).
///
public void Push(double value)
{
_n++;
double d = value - _m1;
double s = d/_n;
double s2 = s*s;
double t = d*s*(_n - 1);
_m1 += s;
_m4 += t*s2*(_n*_n - 3*_n + 3) + 6*s2*_m2 - 4*s*_m3;
_m3 += t*s*(_n - 2) - 3*s*_m2;
_m2 += t;
if (value < _min || double.IsNaN(value))
{
_min = value;
}
if (value > _max || double.IsNaN(value))
{
_max = value;
}
}
///
/// Update the running statistics by adding a sequence of observed sample (in-place).
///
public void PushRange(IEnumerable values)
{
foreach (double value in values)
{
Push(value);
}
}
///
/// Create a new running statistics over the combined samples of two existing running statistics.
///
public static RunningStatistics Combine(RunningStatistics a, RunningStatistics b)
{
if (a._n == 0)
{
return b;
}
else if (b._n == 0)
{
return a;
}
long n = a._n + b._n;
double d = b._m1 - a._m1;
double d2 = d*d;
double d3 = d2*d;
double d4 = d2*d2;
double m1 = (a._n*a._m1 + b._n*b._m1)/n;
double m2 = a._m2 + b._m2 + d2*a._n*b._n/n;
double m3 = a._m3 + b._m3 + d3*a._n*b._n*(a._n - b._n)/(n*n)
+ 3*d*(a._n*b._m2 - b._n*a._m2)/n;
double m4 = a._m4 + b._m4 + d4*a._n*b._n*(a._n*a._n - a._n*b._n + b._n*b._n)/(n*n*n)
+ 6*d2*(a._n*a._n*b._m2 + b._n*b._n*a._m2)/(n*n) + 4*d*(a._n*b._m3 - b._n*a._m3)/n;
double min = Math.Min(a._min, b._min);
double max = Math.Max(a._max, b._max);
return new RunningStatistics { _n = n, _m1 = m1, _m2 = m2, _m3 = m3, _m4 = m4, _min = min, _max = max };
}
public static RunningStatistics operator +(RunningStatistics a, RunningStatistics b)
{
return Combine(a, b);
}
}
}