// // Math.NET Numerics, part of the Math.NET Project // http://numerics.mathdotnet.com // http://github.com/mathnet/mathnet-numerics // // Copyright (c) 2009-2014 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // using System; using System.Collections.Generic; using IStation.Numerics.Random; namespace IStation.Numerics.Distributions { /// /// Discrete Univariate Hypergeometric distribution. /// This distribution is a discrete probability distribution that describes the number of successes in a sequence /// of n draws from a finite population without replacement, just as the binomial distribution /// describes the number of successes for draws with replacement /// Wikipedia - Hypergeometric distribution. /// public class Hypergeometric : IDiscreteDistribution { System.Random _random; readonly int _population; readonly int _success; readonly int _draws; /// /// Initializes a new instance of the Hypergeometric class. /// /// The size of the population (N). /// The number successes within the population (K, M). /// The number of draws without replacement (n). public Hypergeometric(int population, int success, int draws) { if (!IsValidParameterSet(population, success, draws)) { throw new ArgumentException("Invalid parametrization for the distribution."); } _random = SystemRandomSource.Default; _population = population; _success = success; _draws = draws; } /// /// Initializes a new instance of the Hypergeometric class. /// /// The size of the population (N). /// The number successes within the population (K, M). /// The number of draws without replacement (n). /// The random number generator which is used to draw random samples. public Hypergeometric(int population, int success, int draws, System.Random randomSource) { if (!IsValidParameterSet(population, success, draws)) { throw new ArgumentException("Invalid parametrization for the distribution."); } _random = randomSource ?? SystemRandomSource.Default; _population = population; _success = success; _draws = draws; } /// /// Returns a that represents this instance. /// /// /// A that represents this instance. /// public override string ToString() { return $"Hypergeometric(N = {_population}, M = {_success}, n = {_draws})"; } /// /// Tests whether the provided values are valid parameters for this distribution. /// /// The size of the population (N). /// The number successes within the population (K, M). /// The number of draws without replacement (n). public static bool IsValidParameterSet(int population, int success, int draws) { return population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population; } /// /// Gets or sets the random number generator which is used to draw random samples. /// public System.Random RandomSource { get => _random; set => _random = value ?? SystemRandomSource.Default; } /// /// Gets the size of the population (N). /// public int Population => _population; /// /// Gets the number of draws without replacement (n). /// public int Draws => _draws; /// /// Gets the number successes within the population (K, M). /// public int Success => _success; /// /// Gets the mean of the distribution. /// public double Mean => (double)_success*_draws/_population; /// /// Gets the variance of the distribution. /// public double Variance => _draws*_success*(_population - _draws)*(_population - _success)/(_population*_population*(_population - 1.0)); /// /// Gets the standard deviation of the distribution. /// public double StdDev => Math.Sqrt(Variance); /// /// Gets the entropy of the distribution. /// public double Entropy => throw new NotSupportedException(); /// /// Gets the skewness of the distribution. /// public double Skewness => (Math.Sqrt(_population - 1.0)*(_population - (2*_draws))*(_population - (2*_success)))/(Math.Sqrt(_draws*_success*(_population - _success)*(_population - _draws))*(_population - 2.0)); /// /// Gets the mode of the distribution. /// public int Mode => (_draws + 1)*(_success + 1)/(_population + 2); /// /// Gets the median of the distribution. /// public double Median => throw new NotSupportedException(); /// /// Gets the minimum of the distribution. /// public int Minimum => Math.Max(0, _draws + _success - _population); /// /// Gets the maximum of the distribution. /// public int Maximum => Math.Min(_success, _draws); /// /// Computes the probability mass (PMF) at k, i.e. P(X = k). /// /// The location in the domain where we want to evaluate the probability mass function. /// the probability mass at location . public double Probability(int k) { return SpecialFunctions.Binomial(_success, k)*SpecialFunctions.Binomial(_population - _success, _draws - k)/SpecialFunctions.Binomial(_population, _draws); } /// /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). /// /// The location in the domain where we want to evaluate the log probability mass function. /// the log probability mass at location . public double ProbabilityLn(int k) { return SpecialFunctions.BinomialLn(_success, k) + SpecialFunctions.BinomialLn(_population - _success, _draws - k) - SpecialFunctions.BinomialLn(_population, _draws); } /// /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). /// /// The location at which to compute the cumulative distribution function. /// the cumulative distribution at location . public double CumulativeDistribution(double x) { return CDF(_population, _success, _draws, x); } /// /// Computes the probability mass (PMF) at k, i.e. P(X = k). /// /// The location in the domain where we want to evaluate the probability mass function. /// The size of the population (N). /// The number successes within the population (K, M). /// The number of draws without replacement (n). /// the probability mass at location . public static double PMF(int population, int success, int draws, int k) { if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population)) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SpecialFunctions.Binomial(success, k)*SpecialFunctions.Binomial(population - success, draws - k)/SpecialFunctions.Binomial(population, draws); } /// /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). /// /// The location in the domain where we want to evaluate the log probability mass function. /// The size of the population (N). /// The number successes within the population (K, M). /// The number of draws without replacement (n). /// the log probability mass at location . public static double PMFLn(int population, int success, int draws, int k) { if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population)) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SpecialFunctions.BinomialLn(success, k) + SpecialFunctions.BinomialLn(population - success, draws - k) - SpecialFunctions.BinomialLn(population, draws); } /// /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). /// /// The location at which to compute the cumulative distribution function. /// The size of the population (N). /// The number successes within the population (K, M). /// The number of draws without replacement (n). /// the cumulative distribution at location . /// public static double CDF(int population, int success, int draws, double x) { if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population)) { throw new ArgumentException("Invalid parametrization for the distribution."); } if (x < Math.Max(0, draws + success - population)) { return 0.0; } if (x >= Math.Min(success, draws)) { return 1.0; } var k = (int)Math.Floor(x); var denominatorLn = SpecialFunctions.BinomialLn(population, draws); var sum = 0.0; for (var i = 0; i <= k; i++) { sum += Math.Exp(SpecialFunctions.BinomialLn(success, i) + SpecialFunctions.BinomialLn(population - success, draws - i) - denominatorLn); } return Math.Min(sum, 1.0); } /// /// Generates a sample from the Hypergeometric distribution without doing parameter checking. /// /// The random number generator to use. /// The size of the population (N). /// The number successes within the population (K, M). /// The n parameter of the distribution. /// a random number from the Hypergeometric distribution. static int SampleUnchecked(System.Random rnd, int population, int success, int draws) { var x = 0; do { var p = (double)success/population; var r = rnd.NextDouble(); if (r < p) { x++; success--; } population--; draws--; } while (0 < draws); return x; } static void SamplesUnchecked(System.Random rnd, int[] values, int population, int success, int draws) { for (int i = 0; i < values.Length; i++) { values[i] = SampleUnchecked(rnd, population, success, draws); } } static IEnumerable SamplesUnchecked(System.Random rnd, int population, int success, int draws) { while (true) { yield return SampleUnchecked(rnd, population, success, draws); } } /// /// Samples a Hypergeometric distributed random variable. /// /// The number of successes in n trials. public int Sample() { return SampleUnchecked(_random, _population, _success, _draws); } /// /// Fills an array with samples generated from the distribution. /// public void Samples(int[] values) { SamplesUnchecked(_random, values, _population, _success, _draws); } /// /// Samples an array of Hypergeometric distributed random variables. /// /// a sequence of successes in n trials. public IEnumerable Samples() { return SamplesUnchecked(_random, _population, _success, _draws); } /// /// Samples a random variable. /// /// The random number generator to use. /// The size of the population (N). /// The number successes within the population (K, M). /// The number of draws without replacement (n). public static int Sample(System.Random rnd, int population, int success, int draws) { if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population)) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SampleUnchecked(rnd, population, success, draws); } /// /// Samples a sequence of this random variable. /// /// The random number generator to use. /// The size of the population (N). /// The number successes within the population (K, M). /// The number of draws without replacement (n). public static IEnumerable Samples(System.Random rnd, int population, int success, int draws) { if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population)) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SamplesUnchecked(rnd, population, success, draws); } /// /// Fills an array with samples generated from the distribution. /// /// The random number generator to use. /// The array to fill with the samples. /// The size of the population (N). /// The number successes within the population (K, M). /// The number of draws without replacement (n). public static void Samples(System.Random rnd, int[] values, int population, int success, int draws) { if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population)) { throw new ArgumentException("Invalid parametrization for the distribution."); } SamplesUnchecked(rnd, values, population, success, draws); } /// /// Samples a random variable. /// /// The size of the population (N). /// The number successes within the population (K, M). /// The number of draws without replacement (n). public static int Sample(int population, int success, int draws) { if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population)) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SampleUnchecked(SystemRandomSource.Default, population, success, draws); } /// /// Samples a sequence of this random variable. /// /// The size of the population (N). /// The number successes within the population (K, M). /// The number of draws without replacement (n). public static IEnumerable Samples(int population, int success, int draws) { if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population)) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SamplesUnchecked(SystemRandomSource.Default, population, success, draws); } /// /// Fills an array with samples generated from the distribution. /// /// The array to fill with the samples. /// The size of the population (N). /// The number successes within the population (K, M). /// The number of draws without replacement (n). public static void Samples(int[] values, int population, int success, int draws) { if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population)) { throw new ArgumentException("Invalid parametrization for the distribution."); } SamplesUnchecked(SystemRandomSource.Default, values, population, success, draws); } } }