// // Math.NET Numerics, part of the Math.NET Project // http://numerics.mathdotnet.com // http://github.com/mathnet/mathnet-numerics // // Copyright (c) 2009-2015 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // using System; using System.Collections.Generic; using System.Runtime.Serialization; using System.Text; namespace IStation.Numerics.Statistics { /// /// A consists of a series of s, /// each representing a region limited by a lower bound (exclusive) and an upper bound (inclusive). /// /// /// This type declares a DataContract for out of the box ephemeral serialization /// with engines like DataContractSerializer, Protocol Buffers and FsPickler, /// but does not guarantee any compatibility between versions. /// It is not recommended to rely on this mechanism for durable persistence. /// [Serializable] [DataContract(Namespace = "urn:IStation/Numerics")] public class Bucket : IComparable #if !NETSTANDARD1_3 , ICloneable #endif { /// /// This IComparer performs comparisons between a point and a bucket. /// private sealed class PointComparer : IComparer { /// /// Compares a point and a bucket. The point will be encapsulated in a bucket with width 0. /// /// The first bucket to compare. /// The second bucket to compare. /// -1 when the point is less than this bucket, 0 when it is in this bucket and 1 otherwise. public int Compare(Bucket bkt1, Bucket bkt2) { return bkt2.IsSinglePoint ? -bkt1.Contains(bkt2.UpperBound) : -bkt2.Contains(bkt1.UpperBound); } } private static readonly PointComparer Comparer = new PointComparer(); /// /// Lower Bound of the Bucket. /// [DataMember(Order = 1)] public double LowerBound { get; set; } /// /// Upper Bound of the Bucket. /// [DataMember(Order = 2)] public double UpperBound { get; set; } /// /// The number of datapoints in the bucket. /// /// /// Value may be NaN if this was constructed as a argument. /// [DataMember(Order = 3)] public double Count { get; set; } /// /// Initializes a new instance of the Bucket class. /// public Bucket(double lowerBound, double upperBound, double count = 0.0) { if (lowerBound > upperBound) { throw new ArgumentException("The upper bound must be at least as large as the lower bound."); } if (count < 0.0) { throw new ArgumentOutOfRangeException(nameof(count), "Value must be positive."); } LowerBound = lowerBound; UpperBound = upperBound; Count = count; } /// /// Constructs a Bucket that can be used as an argument for a /// like when performing a Binary search. /// /// Value to look for public Bucket(double targetValue) { LowerBound = targetValue; UpperBound = targetValue; Count = double.NaN; } /// /// Creates a copy of the Bucket with the lowerbound, upperbound and counts exactly equal. /// /// A cloned Bucket object. public object Clone() { return new Bucket(LowerBound, UpperBound, Count); } /// /// Width of the Bucket. /// public double Width => UpperBound - LowerBound; /// /// True if this is a single point argument for /// when performing a Binary search. /// private bool IsSinglePoint => double.IsNaN(Count); /// /// Default comparer. /// public static IComparer DefaultPointComparer => Comparer; /// /// This method check whether a point is contained within this bucket. /// /// The point to check. /// /// 0 if the point falls within the bucket boundaries; /// -1 if the point is smaller than the bucket, /// +1 if the point is larger than the bucket. public int Contains(double x) { if (LowerBound < x) { if (UpperBound >= x) { return 0; } return 1; } return -1; } /// /// Comparison of two disjoint buckets. The buckets cannot be overlapping. /// /// /// 0 if UpperBound and LowerBound are bit-for-bit equal /// 1 if This bucket is lower that the compared bucket /// -1 otherwise /// public int CompareTo(Bucket bucket) { if (UpperBound > bucket.LowerBound && LowerBound < bucket.LowerBound) { throw new ArgumentException("The two arguments can\'t be compared (maybe they are part of a partial ordering?)"); } if (UpperBound.Equals(bucket.UpperBound) && LowerBound.Equals(bucket.LowerBound)) { return 0; } if (bucket.UpperBound <= LowerBound) { return 1; } return -1; } /// /// Checks whether two Buckets are equal. /// /// /// UpperBound and LowerBound are compared bit-for-bit, but This method tolerates a /// difference in Count given by . /// public override bool Equals(object obj) { if (!(obj is Bucket)) { return false; } var bucket = (Bucket) obj; return LowerBound.Equals(bucket.LowerBound) && UpperBound.Equals(bucket.UpperBound) && Count.AlmostEqual(bucket.Count); } /// /// Provides a hash code for this bucket. /// public override int GetHashCode() { return LowerBound.GetHashCode() ^ UpperBound.GetHashCode() ^ Count.GetHashCode(); } /// /// Formats a human-readable string for this bucket. /// public override string ToString() { return "(" + LowerBound + ";" + UpperBound + "] = " + Count; } } /// /// A class which computes histograms of data. /// [Serializable] [DataContract(Namespace = "urn:IStation/Numerics")] public class Histogram { /// /// Contains all the Buckets of the Histogram. /// [DataMember(Order = 1)] private readonly List _buckets; /// /// Indicates whether the elements of buckets are currently sorted. /// [DataMember(Order = 2)] private bool _areBucketsSorted; /// /// Initializes a new instance of the Histogram class. /// public Histogram() { _buckets = new List(); _areBucketsSorted = true; } /// /// Constructs a Histogram with a specific number of equally sized buckets. The upper and lower bound of the histogram /// will be set to the smallest and largest datapoint. /// /// The data sequence to build a histogram on. /// The number of buckets to use. public Histogram(IEnumerable data, int nbuckets) : this() { if (nbuckets < 1) { throw new ArgumentOutOfRangeException(nameof(data), "The number of bins in a histogram should be at least 1."); } double lower = data.Minimum(); double upper = data.Maximum(); double width = (upper - lower)/nbuckets; if (double.IsNaN(width)) { throw new ArgumentException("Data must contain at least one entry.", nameof(data)); } // Add buckets for each bin; the smallest bucket's lowerbound must be slightly smaller // than the minimal element. double fNextLowerBound = lower + width; AddBucket(new Bucket(lower.Decrement(), fNextLowerBound)); for (int n = 1; n < nbuckets; n++) { AddBucket(new Bucket( fNextLowerBound, fNextLowerBound = (lower + (n + 1) * width))); } AddData(data); } /// /// Constructs a Histogram with a specific number of equally sized buckets. /// /// The data sequence to build a histogram on. /// The number of buckets to use. /// The histogram lower bound. /// The histogram upper bound. public Histogram(IEnumerable data, int nbuckets, double lower, double upper) : this() { if (lower > upper) { throw new ArgumentOutOfRangeException(nameof(upper), "The histogram lower bound must be smaller than the upper bound."); } if (nbuckets < 1) { throw new ArgumentOutOfRangeException(nameof(nbuckets), "The number of bins in a histogram should be at least 1."); } double width = (upper - lower) / nbuckets; // Add buckets for each bin. for (int n = 0; n < nbuckets; n++) { AddBucket(new Bucket(lower + n * width, lower + (n + 1) * width)); } AddData(data); } /// /// Add one data point to the histogram. If the datapoint falls outside the range of the histogram, /// the lowerbound or upperbound will automatically adapt. /// /// The datapoint which we want to add. public void AddData(double d) { // Sort if needed. LazySort(); if (d <= LowerBound) { // Make the lower bound just slightly smaller than the datapoint so it is contained in this bucket. _buckets[0].LowerBound = d.Decrement(); _buckets[0].Count++; } else if (d > UpperBound) { _buckets[BucketCount - 1].UpperBound = d; _buckets[BucketCount - 1].Count++; } else { _buckets[GetBucketIndexOf(d)].Count++; } } /// /// Add a sequence of data point to the histogram. If the datapoint falls outside the range of the histogram, /// the lowerbound or upperbound will automatically adapt. /// /// The sequence of datapoints which we want to add. public void AddData(IEnumerable data) { foreach (double d in data) { AddData(d); } } /// /// Adds a Bucket to the Histogram. /// public void AddBucket(Bucket bucket) { _buckets.Add(bucket); _areBucketsSorted = false; } /// /// Sort the buckets if needed. /// private void LazySort() { if (!_areBucketsSorted) { _buckets.Sort(); _areBucketsSorted = true; } } /// /// Returns the Bucket that contains the value v. /// /// The point to search the bucket for. /// A copy of the bucket containing point . public Bucket GetBucketOf(double v) { return (Bucket) _buckets[GetBucketIndexOf(v)].Clone(); } /// /// Returns the index in the Histogram of the Bucket /// that contains the value v. /// /// The point to search the bucket index for. /// The index of the bucket containing the point. public int GetBucketIndexOf(double v) { // Sort if needed. LazySort(); // Binary search for the bucket index. int index = _buckets.BinarySearch(new Bucket(v), Bucket.DefaultPointComparer); if (index < 0) { throw new ArgumentException("The histogram does not contain the value."); } return index; } /// /// Returns the lower bound of the histogram. /// public double LowerBound { get { LazySort(); return _buckets[0].LowerBound; } } /// /// Returns the upper bound of the histogram. /// public double UpperBound { get { LazySort(); return _buckets[_buckets.Count - 1].UpperBound; } } /// /// Gets the n'th bucket. /// /// The index of the bucket to be returned. /// A copy of the n'th bucket. public Bucket this[int n] { get { LazySort(); return (Bucket) _buckets[n].Clone(); } } /// /// Gets the number of buckets. /// public int BucketCount => _buckets.Count; /// /// Gets the total number of datapoints in the histogram. /// public double DataCount { get { double totalCount = 0; for (int i = 0; i < BucketCount; i++) { totalCount += this[i].Count; } return totalCount; } } /// /// Prints the buckets contained in the . /// public override string ToString() { var sb = new StringBuilder(); foreach (Bucket b in _buckets) { sb.Append(b); } return sb.ToString(); } } }