//
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
//
// Copyright (c) 2009-2014 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
using System;
using System.Collections.Generic;
using IStation.Numerics.Random;
namespace IStation.Numerics.Distributions
{
///
/// Discrete Univariate Hypergeometric distribution.
/// This distribution is a discrete probability distribution that describes the number of successes in a sequence
/// of n draws from a finite population without replacement, just as the binomial distribution
/// describes the number of successes for draws with replacement
/// Wikipedia - Hypergeometric distribution.
///
public class Hypergeometric : IDiscreteDistribution
{
System.Random _random;
readonly int _population;
readonly int _success;
readonly int _draws;
///
/// Initializes a new instance of the Hypergeometric class.
///
/// The size of the population (N).
/// The number successes within the population (K, M).
/// The number of draws without replacement (n).
public Hypergeometric(int population, int success, int draws)
{
if (!IsValidParameterSet(population, success, draws))
{
throw new ArgumentException("Invalid parametrization for the distribution.");
}
_random = SystemRandomSource.Default;
_population = population;
_success = success;
_draws = draws;
}
///
/// Initializes a new instance of the Hypergeometric class.
///
/// The size of the population (N).
/// The number successes within the population (K, M).
/// The number of draws without replacement (n).
/// The random number generator which is used to draw random samples.
public Hypergeometric(int population, int success, int draws, System.Random randomSource)
{
if (!IsValidParameterSet(population, success, draws))
{
throw new ArgumentException("Invalid parametrization for the distribution.");
}
_random = randomSource ?? SystemRandomSource.Default;
_population = population;
_success = success;
_draws = draws;
}
///
/// Returns a that represents this instance.
///
///
/// A that represents this instance.
///
public override string ToString()
{
return $"Hypergeometric(N = {_population}, M = {_success}, n = {_draws})";
}
///
/// Tests whether the provided values are valid parameters for this distribution.
///
/// The size of the population (N).
/// The number successes within the population (K, M).
/// The number of draws without replacement (n).
public static bool IsValidParameterSet(int population, int success, int draws)
{
return population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population;
}
///
/// Gets or sets the random number generator which is used to draw random samples.
///
public System.Random RandomSource
{
get => _random;
set => _random = value ?? SystemRandomSource.Default;
}
///
/// Gets the size of the population (N).
///
public int Population => _population;
///
/// Gets the number of draws without replacement (n).
///
public int Draws => _draws;
///
/// Gets the number successes within the population (K, M).
///
public int Success => _success;
///
/// Gets the mean of the distribution.
///
public double Mean => (double)_success*_draws/_population;
///
/// Gets the variance of the distribution.
///
public double Variance => _draws*_success*(_population - _draws)*(_population - _success)/(_population*_population*(_population - 1.0));
///
/// Gets the standard deviation of the distribution.
///
public double StdDev => Math.Sqrt(Variance);
///
/// Gets the entropy of the distribution.
///
public double Entropy => throw new NotSupportedException();
///
/// Gets the skewness of the distribution.
///
public double Skewness => (Math.Sqrt(_population - 1.0)*(_population - (2*_draws))*(_population - (2*_success)))/(Math.Sqrt(_draws*_success*(_population - _success)*(_population - _draws))*(_population - 2.0));
///
/// Gets the mode of the distribution.
///
public int Mode => (_draws + 1)*(_success + 1)/(_population + 2);
///
/// Gets the median of the distribution.
///
public double Median => throw new NotSupportedException();
///
/// Gets the minimum of the distribution.
///
public int Minimum => Math.Max(0, _draws + _success - _population);
///
/// Gets the maximum of the distribution.
///
public int Maximum => Math.Min(_success, _draws);
///
/// Computes the probability mass (PMF) at k, i.e. P(X = k).
///
/// The location in the domain where we want to evaluate the probability mass function.
/// the probability mass at location .
public double Probability(int k)
{
return SpecialFunctions.Binomial(_success, k)*SpecialFunctions.Binomial(_population - _success, _draws - k)/SpecialFunctions.Binomial(_population, _draws);
}
///
/// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)).
///
/// The location in the domain where we want to evaluate the log probability mass function.
/// the log probability mass at location .
public double ProbabilityLn(int k)
{
return SpecialFunctions.BinomialLn(_success, k) + SpecialFunctions.BinomialLn(_population - _success, _draws - k) - SpecialFunctions.BinomialLn(_population, _draws);
}
///
/// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x).
///
/// The location at which to compute the cumulative distribution function.
/// the cumulative distribution at location .
public double CumulativeDistribution(double x)
{
return CDF(_population, _success, _draws, x);
}
///
/// Computes the probability mass (PMF) at k, i.e. P(X = k).
///
/// The location in the domain where we want to evaluate the probability mass function.
/// The size of the population (N).
/// The number successes within the population (K, M).
/// The number of draws without replacement (n).
/// the probability mass at location .
public static double PMF(int population, int success, int draws, int k)
{
if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population))
{
throw new ArgumentException("Invalid parametrization for the distribution.");
}
return SpecialFunctions.Binomial(success, k)*SpecialFunctions.Binomial(population - success, draws - k)/SpecialFunctions.Binomial(population, draws);
}
///
/// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)).
///
/// The location in the domain where we want to evaluate the log probability mass function.
/// The size of the population (N).
/// The number successes within the population (K, M).
/// The number of draws without replacement (n).
/// the log probability mass at location .
public static double PMFLn(int population, int success, int draws, int k)
{
if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population))
{
throw new ArgumentException("Invalid parametrization for the distribution.");
}
return SpecialFunctions.BinomialLn(success, k) + SpecialFunctions.BinomialLn(population - success, draws - k) - SpecialFunctions.BinomialLn(population, draws);
}
///
/// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x).
///
/// The location at which to compute the cumulative distribution function.
/// The size of the population (N).
/// The number successes within the population (K, M).
/// The number of draws without replacement (n).
/// the cumulative distribution at location .
///
public static double CDF(int population, int success, int draws, double x)
{
if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population))
{
throw new ArgumentException("Invalid parametrization for the distribution.");
}
if (x < Math.Max(0, draws + success - population))
{
return 0.0;
}
if (x >= Math.Min(success, draws))
{
return 1.0;
}
var k = (int)Math.Floor(x);
var denominatorLn = SpecialFunctions.BinomialLn(population, draws);
var sum = 0.0;
for (var i = 0; i <= k; i++)
{
sum += Math.Exp(SpecialFunctions.BinomialLn(success, i) + SpecialFunctions.BinomialLn(population - success, draws - i) - denominatorLn);
}
return Math.Min(sum, 1.0);
}
///
/// Generates a sample from the Hypergeometric distribution without doing parameter checking.
///
/// The random number generator to use.
/// The size of the population (N).
/// The number successes within the population (K, M).
/// The n parameter of the distribution.
/// a random number from the Hypergeometric distribution.
static int SampleUnchecked(System.Random rnd, int population, int success, int draws)
{
var x = 0;
do
{
var p = (double)success/population;
var r = rnd.NextDouble();
if (r < p)
{
x++;
success--;
}
population--;
draws--;
}
while (0 < draws);
return x;
}
static void SamplesUnchecked(System.Random rnd, int[] values, int population, int success, int draws)
{
for (int i = 0; i < values.Length; i++)
{
values[i] = SampleUnchecked(rnd, population, success, draws);
}
}
static IEnumerable SamplesUnchecked(System.Random rnd, int population, int success, int draws)
{
while (true)
{
yield return SampleUnchecked(rnd, population, success, draws);
}
}
///
/// Samples a Hypergeometric distributed random variable.
///
/// The number of successes in n trials.
public int Sample()
{
return SampleUnchecked(_random, _population, _success, _draws);
}
///
/// Fills an array with samples generated from the distribution.
///
public void Samples(int[] values)
{
SamplesUnchecked(_random, values, _population, _success, _draws);
}
///
/// Samples an array of Hypergeometric distributed random variables.
///
/// a sequence of successes in n trials.
public IEnumerable Samples()
{
return SamplesUnchecked(_random, _population, _success, _draws);
}
///
/// Samples a random variable.
///
/// The random number generator to use.
/// The size of the population (N).
/// The number successes within the population (K, M).
/// The number of draws without replacement (n).
public static int Sample(System.Random rnd, int population, int success, int draws)
{
if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population))
{
throw new ArgumentException("Invalid parametrization for the distribution.");
}
return SampleUnchecked(rnd, population, success, draws);
}
///
/// Samples a sequence of this random variable.
///
/// The random number generator to use.
/// The size of the population (N).
/// The number successes within the population (K, M).
/// The number of draws without replacement (n).
public static IEnumerable Samples(System.Random rnd, int population, int success, int draws)
{
if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population))
{
throw new ArgumentException("Invalid parametrization for the distribution.");
}
return SamplesUnchecked(rnd, population, success, draws);
}
///
/// Fills an array with samples generated from the distribution.
///
/// The random number generator to use.
/// The array to fill with the samples.
/// The size of the population (N).
/// The number successes within the population (K, M).
/// The number of draws without replacement (n).
public static void Samples(System.Random rnd, int[] values, int population, int success, int draws)
{
if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population))
{
throw new ArgumentException("Invalid parametrization for the distribution.");
}
SamplesUnchecked(rnd, values, population, success, draws);
}
///
/// Samples a random variable.
///
/// The size of the population (N).
/// The number successes within the population (K, M).
/// The number of draws without replacement (n).
public static int Sample(int population, int success, int draws)
{
if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population))
{
throw new ArgumentException("Invalid parametrization for the distribution.");
}
return SampleUnchecked(SystemRandomSource.Default, population, success, draws);
}
///
/// Samples a sequence of this random variable.
///
/// The size of the population (N).
/// The number successes within the population (K, M).
/// The number of draws without replacement (n).
public static IEnumerable Samples(int population, int success, int draws)
{
if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population))
{
throw new ArgumentException("Invalid parametrization for the distribution.");
}
return SamplesUnchecked(SystemRandomSource.Default, population, success, draws);
}
///
/// Fills an array with samples generated from the distribution.
///
/// The array to fill with the samples.
/// The size of the population (N).
/// The number successes within the population (K, M).
/// The number of draws without replacement (n).
public static void Samples(int[] values, int population, int success, int draws)
{
if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population))
{
throw new ArgumentException("Invalid parametrization for the distribution.");
}
SamplesUnchecked(SystemRandomSource.Default, values, population, success, draws);
}
}
}