// // Math.NET Numerics, part of the Math.NET Project // http://numerics.mathdotnet.com // http://github.com/mathnet/mathnet-numerics // // Copyright (c) 2009-2013 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // using System; using System.Collections.Generic; using System.Linq; using IStation.Numerics.LinearAlgebra; using IStation.Numerics.LinearAlgebra.Double; using IStation.Numerics.Random; using IStation.Numerics.Statistics; namespace IStation.Numerics.Distributions { /// /// Multivariate Multinomial distribution. For details about this distribution, see /// Wikipedia - Multinomial distribution. /// /// /// The distribution is parameterized by a vector of ratios: in other words, the parameter /// does not have to be normalized and sum to 1. The reason is that some vectors can't be exactly normalized /// to sum to 1 in floating point representation. /// public class Multinomial : IDistribution { System.Random _random; /// /// Stores the normalized multinomial probabilities. /// readonly double[] _p; /// /// The number of trials. /// readonly int _trials; /// /// Initializes a new instance of the Multinomial class. /// /// An array of nonnegative ratios: this array does not need to be normalized /// as this is often impossible using floating point arithmetic. /// The number of trials. /// If any of the probabilities are negative or do not sum to one. /// If is negative. public Multinomial(double[] p, int n) { if (Control.CheckDistributionParameters && !IsValidParameterSet(p, n)) { throw new ArgumentException("Invalid parametrization for the distribution."); } _random = SystemRandomSource.Default; _p = (double[])p.Clone(); _trials = n; } /// /// Initializes a new instance of the Multinomial class. /// /// An array of nonnegative ratios: this array does not need to be normalized /// as this is often impossible using floating point arithmetic. /// The number of trials. /// The random number generator which is used to draw random samples. /// If any of the probabilities are negative or do not sum to one. /// If is negative. public Multinomial(double[] p, int n, System.Random randomSource) { if (Control.CheckDistributionParameters && !IsValidParameterSet(p, n)) { throw new ArgumentException("Invalid parametrization for the distribution."); } _random = randomSource ?? SystemRandomSource.Default; _p = (double[])p.Clone(); _trials = n; } /// /// Initializes a new instance of the Multinomial class from histogram . The distribution will /// not be automatically updated when the histogram changes. /// /// Histogram instance /// The number of trials. /// If any of the probabilities are negative or do not sum to one. /// If is negative. public Multinomial(Histogram h, int n) { if (h == null) { throw new ArgumentNullException(nameof(h)); } // The probability distribution vector. var p = new double[h.BucketCount]; // Fill in the distribution vector. for (var i = 0; i < h.BucketCount; i++) { p[i] = h[i].Count; } if (Control.CheckDistributionParameters && !IsValidParameterSet(p, n)) { throw new ArgumentException("Invalid parametrization for the distribution."); } _p = (double[])p.Clone(); _trials = n; RandomSource = SystemRandomSource.Default; } /// /// A string representation of the distribution. /// /// a string representation of the distribution. public override string ToString() { return $"Multinomial(Dimension = {_p.Length}, Number of Trails = {_trials})"; } /// /// Tests whether the provided values are valid parameters for this distribution. /// /// An array of nonnegative ratios: this array does not need to be normalized /// as this is often impossible using floating point arithmetic. /// The number of trials. /// If any of the probabilities are negative returns false, /// if the sum of parameters is 0.0, or if the number of trials is negative; otherwise true. public static bool IsValidParameterSet(IEnumerable p, int n) { var sum = 0.0; foreach (var t in p) { if (t < 0.0 || double.IsNaN(t)) { return false; } sum += t; } if (sum == 0.0) { return false; } return n >= 0; } /// /// Gets the proportion of ratios. /// public double[] P => (double[])_p.Clone(); /// /// Gets the number of trials. /// public int N => _trials; /// /// Gets or sets the random number generator which is used to draw random samples. /// public System.Random RandomSource { get => _random; set => _random = value ?? SystemRandomSource.Default; } /// /// Gets the mean of the distribution. /// public Vector Mean => _trials*(DenseVector)P; /// /// Gets the variance of the distribution. /// public Vector Variance { get { // Do not use _p, because operations below will modify _p array. Use P or _p.Clone(). var res = (DenseVector)P; for (var i = 0; i < res.Count; i++) { res[i] *= _trials*(1 - res[i]); } return res; } } /// /// Gets the skewness of the distribution. /// public Vector Skewness { get { // Do not use _p, because operations below will modify _p array. Use P or _p.Clone(). var res = (DenseVector)P; for (var i = 0; i < res.Count; i++) { res[i] = (1.0 - (2.0*res[i]))/Math.Sqrt(_trials*(1.0 - res[i])*res[i]); } return res; } } /// /// Computes values of the probability mass function. /// /// Non-negative integers x1, ..., xk /// The probability mass at location . /// When is null. /// When length of is not equal to event probabilities count. public double Probability(int[] x) { if (null == x) { throw new ArgumentNullException(nameof(x)); } if (x.Length != _p.Length) { throw new ArgumentException("All vectors must have the same dimensionality.", nameof(x)); } if (x.Sum() == _trials) { var coef = SpecialFunctions.Multinomial(_trials, x); var num = 1.0; for (var i = 0; i < x.Length; i++) { num *= Math.Pow(_p[i], x[i]); } return coef*num; } return 0.0; } /// /// Computes values of the log probability mass function. /// /// Non-negative integers x1, ..., xk /// The log probability mass at location . /// When is null. /// When length of is not equal to event probabilities count. public double ProbabilityLn(int[] x) { if (null == x) { throw new ArgumentNullException(nameof(x)); } if (x.Length != _p.Length) { throw new ArgumentException("All vectors must have the same dimensionality.", nameof(x)); } if (x.Sum() == _trials) { var coef = Math.Log(SpecialFunctions.Multinomial(_trials, x)); var num = x.Select((t, i) => t*Math.Log(_p[i])).Sum(); return coef + num; } return 0.0; } /// /// Samples one multinomial distributed random variable. /// /// the counts for each of the different possible values. public int[] Sample() { return Sample(_random, _p, _trials); } /// /// Samples a sequence multinomially distributed random variables. /// /// a sequence of counts for each of the different possible values. public IEnumerable Samples() { while (true) { yield return Sample(_random, _p, _trials); } } /// /// Samples one multinomial distributed random variable. /// /// The random number generator to use. /// An array of nonnegative ratios: this array does not need to be normalized /// as this is often impossible using floating point arithmetic. /// The number of trials. /// the counts for each of the different possible values. public static int[] Sample(System.Random rnd, double[] p, int n) { if (Control.CheckDistributionParameters && !IsValidParameterSet(p, n)) { throw new ArgumentException("Invalid parametrization for the distribution."); } // The cumulative density of p. var cp = Categorical.ProbabilityMassToCumulativeDistribution(p); // The variable that stores the counts. var ret = new int[p.Length]; for (var i = 0; i < n; i++) { ret[Categorical.SampleUnchecked(rnd, cp)]++; } return ret; } /// /// Samples a multinomially distributed random variable. /// /// The random number generator to use. /// An array of nonnegative ratios: this array does not need to be normalized /// as this is often impossible using floating point arithmetic. /// The number of variables needed. /// a sequence of counts for each of the different possible values. public static IEnumerable Samples(System.Random rnd, double[] p, int n) { if (Control.CheckDistributionParameters && !IsValidParameterSet(p, n)) { throw new ArgumentException("Invalid parametrization for the distribution."); } // The cumulative density of p. var cp = Categorical.ProbabilityMassToCumulativeDistribution(p); while (true) { // The variable that stores the counts. var ret = new int[p.Length]; for (var i = 0; i < n; i++) { ret[Categorical.SampleUnchecked(rnd, cp)]++; } yield return ret; } } } }