// // Math.NET Numerics, part of the Math.NET Project // http://numerics.mathdotnet.com // http://github.com/mathnet/mathnet-numerics // // Copyright (c) 2009-2015 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // using System; namespace IStation.Numerics.Statistics { /// /// Statistics operating on an array already sorted ascendingly. /// /// /// /// public static partial class SortedArrayStatistics { /// /// Returns the smallest value from the sorted data array (ascending). /// /// Sample array, must be sorted ascendingly. public static double Minimum(double[] data) { if (data.Length == 0) { return double.NaN; } return data[0]; } /// /// Returns the largest value from the sorted data array (ascending). /// /// Sample array, must be sorted ascendingly. public static double Maximum(double[] data) { if (data.Length == 0) { return double.NaN; } return data[data.Length - 1]; } /// /// Returns the order statistic (order 1..N) from the sorted data array (ascending). /// /// Sample array, must be sorted ascendingly. /// One-based order of the statistic, must be between 1 and N (inclusive). public static double OrderStatistic(double[] data, int order) { if (order < 1 || order > data.Length) { return double.NaN; } return data[order - 1]; } /// /// Estimates the median value from the sorted data array (ascending). /// Approximately median-unbiased regardless of the sample distribution (R8). /// /// Sample array, must be sorted ascendingly. public static double Median(double[] data) { if (data.Length == 0) { return double.NaN; } var k = data.Length/2; return data.Length.IsOdd() ? data[k] : (data[k - 1] + data[k])/2.0; } /// /// Estimates the p-Percentile value from the sorted data array (ascending). /// If a non-integer Percentile is needed, use Quantile instead. /// Approximately median-unbiased regardless of the sample distribution (R8). /// /// Sample array, must be sorted ascendingly. /// Percentile selector, between 0 and 100 (inclusive). public static double Percentile(double[] data, int p) { return Quantile(data, p/100d); } /// /// Estimates the first quartile value from the sorted data array (ascending). /// Approximately median-unbiased regardless of the sample distribution (R8). /// /// Sample array, must be sorted ascendingly. public static double LowerQuartile(double[] data) { return Quantile(data, 0.25d); } /// /// Estimates the third quartile value from the sorted data array (ascending). /// Approximately median-unbiased regardless of the sample distribution (R8). /// /// Sample array, must be sorted ascendingly. public static double UpperQuartile(double[] data) { return Quantile(data, 0.75d); } /// /// Estimates the inter-quartile range from the sorted data array (ascending). /// Approximately median-unbiased regardless of the sample distribution (R8). /// /// Sample array, must be sorted ascendingly. public static double InterquartileRange(double[] data) { return Quantile(data, 0.75d) - Quantile(data, 0.25d); } /// /// Estimates {min, lower-quantile, median, upper-quantile, max} from the sorted data array (ascending). /// Approximately median-unbiased regardless of the sample distribution (R8). /// /// Sample array, must be sorted ascendingly. public static double[] FiveNumberSummary(double[] data) { if (data.Length == 0) { return new[] { double.NaN, double.NaN, double.NaN, double.NaN, double.NaN }; } return new[] { data[0], Quantile(data, 0.25), Quantile(data, 0.50), Quantile(data, 0.75), data[data.Length - 1] }; } /// /// Estimates the tau-th quantile from the sorted data array (ascending). /// The tau-th quantile is the data value where the cumulative distribution /// function crosses tau. /// Approximately median-unbiased regardless of the sample distribution (R8). /// /// Sample array, must be sorted ascendingly. /// Quantile selector, between 0.0 and 1.0 (inclusive). /// /// R-8, SciPy-(1/3,1/3): /// Linear interpolation of the approximate medians for order statistics. /// When tau < (2/3) / (N + 1/3), use x1. When tau >= (N - 1/3) / (N + 1/3), use xN. /// public static double Quantile(double[] data, double tau) { if (tau < 0d || tau > 1d || data.Length == 0) { return double.NaN; } if (tau == 0d || data.Length == 1) { return data[0]; } if (tau == 1d) { return data[data.Length - 1]; } double h = (data.Length + 1/3d)*tau + 1/3d; var hf = (int)h; return hf < 1 ? data[0] : hf >= data.Length ? data[data.Length - 1] : data[hf - 1] + (h - hf)*(data[hf] - data[hf - 1]); } /// /// Estimates the tau-th quantile from the sorted data array (ascending). /// The tau-th quantile is the data value where the cumulative distribution /// function crosses tau. The quantile definition can be specified /// by 4 parameters a, b, c and d, consistent with Mathematica. /// /// Sample array, must be sorted ascendingly. /// Quantile selector, between 0.0 and 1.0 (inclusive). /// a-parameter /// b-parameter /// c-parameter /// d-parameter public static double QuantileCustom(double[] data, double tau, double a, double b, double c, double d) { if (tau < 0d || tau > 1d || data.Length == 0) { return double.NaN; } var x = a + (data.Length + b)*tau - 1; var ip = Math.Truncate(x); var fp = x - ip; if (Math.Abs(fp) < 1e-9) { return data[Math.Min(Math.Max((int)ip, 0), data.Length - 1)]; } var lower = data[Math.Max((int)Math.Floor(x), 0)]; var upper = data[Math.Min((int)Math.Ceiling(x), data.Length - 1)]; return lower + (upper - lower)*(c + d*fp); } /// /// Estimates the tau-th quantile from the sorted data array (ascending). /// The tau-th quantile is the data value where the cumulative distribution /// function crosses tau. The quantile definition can be specified to be compatible /// with an existing system. /// /// Sample array, must be sorted ascendingly. /// Quantile selector, between 0.0 and 1.0 (inclusive). /// Quantile definition, to choose what product/definition it should be consistent with public static double QuantileCustom(double[] data, double tau, QuantileDefinition definition) { if (tau < 0d || tau > 1d || data.Length == 0) { return double.NaN; } if (tau == 0d || data.Length == 1) { return data[0]; } if (tau == 1d) { return data[data.Length - 1]; } switch (definition) { case QuantileDefinition.R1: { double h = data.Length*tau + 0.5d; return data[(int)Math.Ceiling(h - 0.5d) - 1]; } case QuantileDefinition.R2: { double h = data.Length*tau + 0.5d; return (data[(int)Math.Ceiling(h - 0.5d) - 1] + data[(int)(h + 0.5d) - 1])*0.5d; } case QuantileDefinition.R3: { double h = data.Length*tau; return data[Math.Max((int)Math.Round(h) - 1, 0)]; } case QuantileDefinition.R4: { double h = data.Length*tau; var hf = (int)h; var lower = data[Math.Max(hf - 1, 0)]; var upper = data[Math.Min(hf, data.Length - 1)]; return lower + (h - hf)*(upper - lower); } case QuantileDefinition.R5: { double h = data.Length*tau + 0.5d; var hf = (int)h; var lower = data[Math.Max(hf - 1, 0)]; var upper = data[Math.Min(hf, data.Length - 1)]; return lower + (h - hf)*(upper - lower); } case QuantileDefinition.R6: { double h = (data.Length + 1)*tau; var hf = (int)h; var lower = data[Math.Max(hf - 1, 0)]; var upper = data[Math.Min(hf, data.Length - 1)]; return lower + (h - hf)*(upper - lower); } case QuantileDefinition.R7: { double h = (data.Length - 1)*tau + 1d; var hf = (int)h; var lower = data[Math.Max(hf - 1, 0)]; var upper = data[Math.Min(hf, data.Length - 1)]; return lower + (h - hf)*(upper - lower); } case QuantileDefinition.R8: { double h = (data.Length + 1/3d)*tau + 1/3d; var hf = (int)h; var lower = data[Math.Max(hf - 1, 0)]; var upper = data[Math.Min(hf, data.Length - 1)]; return lower + (h - hf)*(upper - lower); } case QuantileDefinition.R9: { double h = (data.Length + 0.25d)*tau + 0.375d; var hf = (int)h; var lower = data[Math.Max(hf - 1, 0)]; var upper = data[Math.Min(hf, data.Length - 1)]; return lower + (h - hf)*(upper - lower); } default: throw new NotSupportedException(); } } /// /// Estimates the empirical cumulative distribution function (CDF) at x from the sorted data array (ascending). /// /// The data sample sequence. /// The value where to estimate the CDF at. public static double EmpiricalCDF(double[] data, double x) { if (x < data[0]) { return 0.0; } if (x >= data[data.Length - 1]) { return 1.0; } int right = Array.BinarySearch(data, x); if (right >= 0) { while (right < data.Length - 1 && data[right + 1] == data[right]) { right++; } return (right + 1)/(double)data.Length; } return (~right)/(double)data.Length; } /// /// Estimates the quantile tau from the sorted data array (ascending). /// The tau-th quantile is the data value where the cumulative distribution /// function crosses tau. The quantile definition can be specified to be compatible /// with an existing system. /// /// The data sample sequence. /// Quantile value. /// Rank definition, to choose how ties should be handled and what product/definition it should be consistent with public static double QuantileRank(double[] data, double x, RankDefinition definition = RankDefinition.Default) { if (x < data[0]) { return 0.0; } if (x >= data[data.Length - 1]) { return 1.0; } int right = Array.BinarySearch(data, x); if (right >= 0) { int left = right; while (left > 0 && data[left - 1] == data[left]) { left--; } while (right < data.Length - 1 && data[right + 1] == data[right]) { right++; } switch (definition) { case RankDefinition.EmpiricalCDF: return (right + 1)/(double)data.Length; case RankDefinition.Max: return right/(double)(data.Length - 1); case RankDefinition.Min: return left/(double)(data.Length - 1); case RankDefinition.Average: return (left/(double)(data.Length - 1) + right/(double)(data.Length - 1))/2; default: throw new NotSupportedException(); } } else { right = ~right; int left = right - 1; switch (definition) { case RankDefinition.EmpiricalCDF: return (left + 1)/(double)data.Length; default: { var a = left/(double)(data.Length - 1); var b = right/(double)(data.Length - 1); return ((data[right] - x)*a + (x - data[left])*b)/(data[right] - data[left]); } } } } /// /// Evaluates the rank of each entry of the sorted data array (ascending). /// The rank definition can be specified to be compatible /// with an existing system. /// public static double[] Ranks(double[] data, RankDefinition definition = RankDefinition.Default) { var ranks = new double[data.Length]; if (definition == RankDefinition.First) { for (int i = 0; i < ranks.Length; i++) { ranks[i] = i + 1; } return ranks; } int previousIndex = 0; for (int i = 1; i < data.Length; i++) { if (Math.Abs(data[i] - data[previousIndex]) <= 0d) { continue; } if (i == previousIndex + 1) { ranks[previousIndex] = i; } else { RanksTies(ranks, previousIndex, i, definition); } previousIndex = i; } RanksTies(ranks, previousIndex, data.Length, definition); return ranks; } static void RanksTies(double[] ranks, int a, int b, RankDefinition definition) { // TODO: potential for PERF optimization double rank; switch (definition) { case RankDefinition.Average: { rank = (b + a - 1)/2d + 1; break; } case RankDefinition.Min: { rank = a + 1; break; } case RankDefinition.Max: { rank = b; break; } default: throw new NotSupportedException(); } for (int k = a; k < b; k++) { ranks[k] = rank; } } } }