//
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
//
// Copyright (c) 2009-2015 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
using System;
using System.Collections.Generic;
using System.Runtime.Serialization;
namespace IStation.Numerics.Statistics
{
///
/// Computes the basic statistics of data set. The class meets the
/// NIST standard of accuracy for mean, variance, and standard deviation
/// (the only statistics they provide exact values for) and exceeds them
/// in increased accuracy mode.
/// Recommendation: consider to use RunningStatistics instead.
///
///
/// This type declares a DataContract for out of the box ephemeral serialization
/// with engines like DataContractSerializer, Protocol Buffers and FsPickler,
/// but does not guarantee any compatibility between versions.
/// It is not recommended to rely on this mechanism for durable persistence.
///
[DataContract(Namespace = "urn:IStation/Numerics")]
public class DescriptiveStatistics
{
///
/// Initializes a new instance of the class.
///
/// The sample data.
///
/// If set to true, increased accuracy mode used.
/// Increased accuracy mode uses types for internal calculations.
///
///
/// Don't use increased accuracy for data sets containing large values (in absolute value).
/// This may cause the calculations to overflow.
///
public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy = false)
{
if (data == null)
{
throw new ArgumentNullException(nameof(data));
}
if (increasedAccuracy)
{
ComputeDecimal(data);
}
else
{
Compute(data);
}
}
///
/// Initializes a new instance of the class.
///
/// The sample data.
///
/// If set to true, increased accuracy mode used.
/// Increased accuracy mode uses types for internal calculations.
///
///
/// Don't use increased accuracy for data sets containing large values (in absolute value).
/// This may cause the calculations to overflow.
///
public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy = false)
{
if (data == null)
{
throw new ArgumentNullException(nameof(data));
}
if (increasedAccuracy)
{
ComputeDecimal(data);
}
else
{
Compute(data);
}
}
///
/// Gets the size of the sample.
///
/// The size of the sample.
[DataMember(Order = 1)]
public long Count { get; private set; }
///
/// Gets the sample mean.
///
/// The sample mean.
[DataMember(Order = 2)]
public double Mean { get; private set; }
///
/// Gets the unbiased population variance estimator (on a dataset of size N will use an N-1 normalizer).
///
/// The sample variance.
[DataMember(Order = 3)]
public double Variance { get; private set; }
///
/// Gets the unbiased population standard deviation (on a dataset of size N will use an N-1 normalizer).
///
/// The sample standard deviation.
[DataMember(Order = 4)]
public double StandardDeviation { get; private set; }
///
/// Gets the sample skewness.
///
/// The sample skewness.
/// Returns zero if is less than three.
[DataMember(Order = 5)]
public double Skewness { get; private set; }
///
/// Gets the sample kurtosis.
///
/// The sample kurtosis.
/// Returns zero if is less than four.
[DataMember(Order = 6)]
public double Kurtosis { get; private set; }
///
/// Gets the maximum sample value.
///
/// The maximum sample value.
[DataMember(Order = 7)]
public double Maximum { get; private set; }
///
/// Gets the minimum sample value.
///
/// The minimum sample value.
[DataMember(Order = 8)]
public double Minimum { get; private set; }
///
/// Computes descriptive statistics from a stream of data values.
///
/// A sequence of datapoints.
void Compute(IEnumerable data)
{
double mean = 0;
double variance = 0;
double skewness = 0;
double kurtosis = 0;
double minimum = double.PositiveInfinity;
double maximum = double.NegativeInfinity;
long n = 0;
foreach (var xi in data)
{
double delta = xi - mean;
double scaleDelta = delta/++n;
double scaleDeltaSqr = scaleDelta*scaleDelta;
double tmpDelta = delta*(n - 1);
mean += scaleDelta;
kurtosis += tmpDelta*scaleDelta*scaleDeltaSqr*(n*n - 3*n + 3)
+ 6*scaleDeltaSqr*variance - 4*scaleDelta*skewness;
skewness += tmpDelta*scaleDeltaSqr*(n - 2) - 3*scaleDelta*variance;
variance += tmpDelta*scaleDelta;
if (minimum > xi)
{
minimum = xi;
}
if (maximum < xi)
{
maximum = xi;
}
}
SetStatistics(mean, variance, skewness, kurtosis, minimum, maximum, n);
}
///
/// Computes descriptive statistics from a stream of nullable data values.
///
/// A sequence of datapoints.
void Compute(IEnumerable data)
{
double mean = 0;
double variance = 0;
double skewness = 0;
double kurtosis = 0;
double minimum = double.PositiveInfinity;
double maximum = double.NegativeInfinity;
long n = 0;
foreach (var xi in data)
{
if (xi.HasValue)
{
double delta = xi.Value - mean;
double scaleDelta = delta/++n;
double scaleDeltaSqr = scaleDelta*scaleDelta;
double tmpDelta = delta*(n - 1);
mean += scaleDelta;
kurtosis += tmpDelta*scaleDelta*scaleDeltaSqr*(n*n - 3*n + 3)
+ 6*scaleDeltaSqr*variance - 4*scaleDelta*skewness;
skewness += tmpDelta*scaleDeltaSqr*(n - 2) - 3*scaleDelta*variance;
variance += tmpDelta*scaleDelta;
if (minimum > xi)
{
minimum = xi.Value;
}
if (maximum < xi)
{
maximum = xi.Value;
}
}
}
SetStatistics(mean, variance, skewness, kurtosis, minimum, maximum, n);
}
///
/// Computes descriptive statistics from a stream of data values.
///
/// A sequence of datapoints.
void ComputeDecimal(IEnumerable data)
{
decimal mean = 0;
decimal variance = 0;
decimal skewness = 0;
decimal kurtosis = 0;
decimal minimum = decimal.MaxValue;
decimal maximum = decimal.MinValue;
long n = 0;
foreach (double x in data)
{
decimal xi = (decimal)x;
decimal delta = xi - mean;
decimal scaleDelta = delta/++n;
decimal scaleDelta2 = scaleDelta*scaleDelta;
decimal tmpDelta = delta*(n - 1);
mean += scaleDelta;
kurtosis += tmpDelta*scaleDelta*scaleDelta2*(n*n - 3*n + 3)
+ 6*scaleDelta2*variance - 4*scaleDelta*skewness;
skewness += tmpDelta*scaleDelta2*(n - 2) - 3*scaleDelta*variance;
variance += tmpDelta*scaleDelta;
if (minimum > xi)
{
minimum = xi;
}
if (maximum < xi)
{
maximum = xi;
}
}
SetStatistics((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n);
}
///
/// Computes descriptive statistics from a stream of nullable data values.
///
/// A sequence of datapoints.
void ComputeDecimal(IEnumerable data)
{
decimal mean = 0;
decimal variance = 0;
decimal skewness = 0;
decimal kurtosis = 0;
decimal minimum = decimal.MaxValue;
decimal maximum = decimal.MinValue;
long n = 0;
foreach (double? x in data)
{
if (x.HasValue)
{
decimal xi = (decimal)x.Value;
decimal delta = xi - mean;
decimal scaleDelta = delta/++n;
decimal scaleDeltaSQR = scaleDelta*scaleDelta;
decimal tmpDelta = delta*(n - 1);
mean += scaleDelta;
kurtosis += tmpDelta*scaleDelta*scaleDeltaSQR*(n*n - 3*n + 3)
+ 6*scaleDeltaSQR*variance - 4*scaleDelta*skewness;
skewness += tmpDelta*scaleDeltaSQR*(n - 2) - 3*scaleDelta*variance;
variance += tmpDelta*scaleDelta;
if (minimum > xi)
{
minimum = xi;
}
if (maximum < xi)
{
maximum = xi;
}
}
}
SetStatistics((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n);
}
///
/// Internal use. Method use for setting the statistics.
///
/// For setting Mean.
/// For setting Variance.
/// For setting Skewness.
/// For setting Kurtosis.
/// For setting Minimum.
/// For setting Maximum.
/// For setting Count.
void SetStatistics(double mean, double variance, double skewness, double kurtosis, double minimum, double maximum, long n)
{
Mean = mean;
Count = n;
Minimum = double.NaN;
Maximum = double.NaN;
Variance = double.NaN;
StandardDeviation = double.NaN;
Skewness = double.NaN;
Kurtosis = double.NaN;
if (n > 0)
{
Minimum = minimum;
Maximum = maximum;
if (n > 1)
{
Variance = variance/(n - 1);
StandardDeviation = Math.Sqrt(Variance);
}
if (Variance != 0)
{
if (n > 2)
{
Skewness = (double)n/((n - 1)*(n - 2))*(skewness/(Variance*StandardDeviation));
}
if (n > 3)
{
Kurtosis = ((double)n*n - 1)/((n - 2)*(n - 3))
*(n*kurtosis/(variance*variance) - 3 + 6.0/(n + 1));
}
}
}
}
}
}