// // Math.NET Numerics, part of the Math.NET Project // http://numerics.mathdotnet.com // http://github.com/mathnet/mathnet-numerics // // Copyright (c) 2009-2015 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // using System; using System.Collections.Generic; using System.Runtime.Serialization; namespace IStation.Numerics.Statistics { /// /// Computes the basic statistics of data set. The class meets the /// NIST standard of accuracy for mean, variance, and standard deviation /// (the only statistics they provide exact values for) and exceeds them /// in increased accuracy mode. /// Recommendation: consider to use RunningStatistics instead. /// /// /// This type declares a DataContract for out of the box ephemeral serialization /// with engines like DataContractSerializer, Protocol Buffers and FsPickler, /// but does not guarantee any compatibility between versions. /// It is not recommended to rely on this mechanism for durable persistence. /// [DataContract(Namespace = "urn:IStation/Numerics")] public class DescriptiveStatistics { /// /// Initializes a new instance of the class. /// /// The sample data. /// /// If set to true, increased accuracy mode used. /// Increased accuracy mode uses types for internal calculations. /// /// /// Don't use increased accuracy for data sets containing large values (in absolute value). /// This may cause the calculations to overflow. /// public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy = false) { if (data == null) { throw new ArgumentNullException(nameof(data)); } if (increasedAccuracy) { ComputeDecimal(data); } else { Compute(data); } } /// /// Initializes a new instance of the class. /// /// The sample data. /// /// If set to true, increased accuracy mode used. /// Increased accuracy mode uses types for internal calculations. /// /// /// Don't use increased accuracy for data sets containing large values (in absolute value). /// This may cause the calculations to overflow. /// public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy = false) { if (data == null) { throw new ArgumentNullException(nameof(data)); } if (increasedAccuracy) { ComputeDecimal(data); } else { Compute(data); } } /// /// Gets the size of the sample. /// /// The size of the sample. [DataMember(Order = 1)] public long Count { get; private set; } /// /// Gets the sample mean. /// /// The sample mean. [DataMember(Order = 2)] public double Mean { get; private set; } /// /// Gets the unbiased population variance estimator (on a dataset of size N will use an N-1 normalizer). /// /// The sample variance. [DataMember(Order = 3)] public double Variance { get; private set; } /// /// Gets the unbiased population standard deviation (on a dataset of size N will use an N-1 normalizer). /// /// The sample standard deviation. [DataMember(Order = 4)] public double StandardDeviation { get; private set; } /// /// Gets the sample skewness. /// /// The sample skewness. /// Returns zero if is less than three. [DataMember(Order = 5)] public double Skewness { get; private set; } /// /// Gets the sample kurtosis. /// /// The sample kurtosis. /// Returns zero if is less than four. [DataMember(Order = 6)] public double Kurtosis { get; private set; } /// /// Gets the maximum sample value. /// /// The maximum sample value. [DataMember(Order = 7)] public double Maximum { get; private set; } /// /// Gets the minimum sample value. /// /// The minimum sample value. [DataMember(Order = 8)] public double Minimum { get; private set; } /// /// Computes descriptive statistics from a stream of data values. /// /// A sequence of datapoints. void Compute(IEnumerable data) { double mean = 0; double variance = 0; double skewness = 0; double kurtosis = 0; double minimum = double.PositiveInfinity; double maximum = double.NegativeInfinity; long n = 0; foreach (var xi in data) { double delta = xi - mean; double scaleDelta = delta/++n; double scaleDeltaSqr = scaleDelta*scaleDelta; double tmpDelta = delta*(n - 1); mean += scaleDelta; kurtosis += tmpDelta*scaleDelta*scaleDeltaSqr*(n*n - 3*n + 3) + 6*scaleDeltaSqr*variance - 4*scaleDelta*skewness; skewness += tmpDelta*scaleDeltaSqr*(n - 2) - 3*scaleDelta*variance; variance += tmpDelta*scaleDelta; if (minimum > xi) { minimum = xi; } if (maximum < xi) { maximum = xi; } } SetStatistics(mean, variance, skewness, kurtosis, minimum, maximum, n); } /// /// Computes descriptive statistics from a stream of nullable data values. /// /// A sequence of datapoints. void Compute(IEnumerable data) { double mean = 0; double variance = 0; double skewness = 0; double kurtosis = 0; double minimum = double.PositiveInfinity; double maximum = double.NegativeInfinity; long n = 0; foreach (var xi in data) { if (xi.HasValue) { double delta = xi.Value - mean; double scaleDelta = delta/++n; double scaleDeltaSqr = scaleDelta*scaleDelta; double tmpDelta = delta*(n - 1); mean += scaleDelta; kurtosis += tmpDelta*scaleDelta*scaleDeltaSqr*(n*n - 3*n + 3) + 6*scaleDeltaSqr*variance - 4*scaleDelta*skewness; skewness += tmpDelta*scaleDeltaSqr*(n - 2) - 3*scaleDelta*variance; variance += tmpDelta*scaleDelta; if (minimum > xi) { minimum = xi.Value; } if (maximum < xi) { maximum = xi.Value; } } } SetStatistics(mean, variance, skewness, kurtosis, minimum, maximum, n); } /// /// Computes descriptive statistics from a stream of data values. /// /// A sequence of datapoints. void ComputeDecimal(IEnumerable data) { decimal mean = 0; decimal variance = 0; decimal skewness = 0; decimal kurtosis = 0; decimal minimum = decimal.MaxValue; decimal maximum = decimal.MinValue; long n = 0; foreach (double x in data) { decimal xi = (decimal)x; decimal delta = xi - mean; decimal scaleDelta = delta/++n; decimal scaleDelta2 = scaleDelta*scaleDelta; decimal tmpDelta = delta*(n - 1); mean += scaleDelta; kurtosis += tmpDelta*scaleDelta*scaleDelta2*(n*n - 3*n + 3) + 6*scaleDelta2*variance - 4*scaleDelta*skewness; skewness += tmpDelta*scaleDelta2*(n - 2) - 3*scaleDelta*variance; variance += tmpDelta*scaleDelta; if (minimum > xi) { minimum = xi; } if (maximum < xi) { maximum = xi; } } SetStatistics((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n); } /// /// Computes descriptive statistics from a stream of nullable data values. /// /// A sequence of datapoints. void ComputeDecimal(IEnumerable data) { decimal mean = 0; decimal variance = 0; decimal skewness = 0; decimal kurtosis = 0; decimal minimum = decimal.MaxValue; decimal maximum = decimal.MinValue; long n = 0; foreach (double? x in data) { if (x.HasValue) { decimal xi = (decimal)x.Value; decimal delta = xi - mean; decimal scaleDelta = delta/++n; decimal scaleDeltaSQR = scaleDelta*scaleDelta; decimal tmpDelta = delta*(n - 1); mean += scaleDelta; kurtosis += tmpDelta*scaleDelta*scaleDeltaSQR*(n*n - 3*n + 3) + 6*scaleDeltaSQR*variance - 4*scaleDelta*skewness; skewness += tmpDelta*scaleDeltaSQR*(n - 2) - 3*scaleDelta*variance; variance += tmpDelta*scaleDelta; if (minimum > xi) { minimum = xi; } if (maximum < xi) { maximum = xi; } } } SetStatistics((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n); } /// /// Internal use. Method use for setting the statistics. /// /// For setting Mean. /// For setting Variance. /// For setting Skewness. /// For setting Kurtosis. /// For setting Minimum. /// For setting Maximum. /// For setting Count. void SetStatistics(double mean, double variance, double skewness, double kurtosis, double minimum, double maximum, long n) { Mean = mean; Count = n; Minimum = double.NaN; Maximum = double.NaN; Variance = double.NaN; StandardDeviation = double.NaN; Skewness = double.NaN; Kurtosis = double.NaN; if (n > 0) { Minimum = minimum; Maximum = maximum; if (n > 1) { Variance = variance/(n - 1); StandardDeviation = Math.Sqrt(Variance); } if (Variance != 0) { if (n > 2) { Skewness = (double)n/((n - 1)*(n - 2))*(skewness/(Variance*StandardDeviation)); } if (n > 3) { Kurtosis = ((double)n*n - 1)/((n - 2)*(n - 3)) *(n*kurtosis/(variance*variance) - 3 + 6.0/(n + 1)); } } } } } }