// // Math.NET Numerics, part of the Math.NET Project // http://numerics.mathdotnet.com // http://github.com/mathnet/mathnet-numerics // // Copyright (c) 2009-2014 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // using System; using System.Collections.Generic; using IStation.Numerics.Random; namespace IStation.Numerics.Distributions { /// /// Discrete Univariate Zipf distribution. /// Zipf's law, an empirical law formulated using mathematical statistics, refers to the fact /// that many types of data studied in the physical and social sciences can be approximated with /// a Zipfian distribution, one of a family of related discrete power law probability distributions. /// For details about this distribution, see /// Wikipedia - Zipf distribution. /// public class Zipf : IDiscreteDistribution { System.Random _random; /// /// The s parameter of the distribution. /// readonly double _s; /// /// The n parameter of the distribution. /// readonly int _n; /// /// Initializes a new instance of the class. /// /// The s parameter of the distribution. /// The n parameter of the distribution. public Zipf(double s, int n) { if (!IsValidParameterSet(s, n)) { throw new ArgumentException("Invalid parametrization for the distribution."); } _random = SystemRandomSource.Default; _s = s; _n = n; } /// /// Initializes a new instance of the class. /// /// The s parameter of the distribution. /// The n parameter of the distribution. /// The random number generator which is used to draw random samples. public Zipf(double s, int n, System.Random randomSource) { if (!IsValidParameterSet(s, n)) { throw new ArgumentException("Invalid parametrization for the distribution."); } _random = randomSource ?? SystemRandomSource.Default; _s = s; _n = n; } /// /// A string representation of the distribution. /// /// a string representation of the distribution. public override string ToString() { return $"Zipf(S = {_s}, N = {_n})"; } /// /// Tests whether the provided values are valid parameters for this distribution. /// /// The s parameter of the distribution. /// The n parameter of the distribution. public static bool IsValidParameterSet(double s, int n) { return n > 0 && s > 0.0; } /// /// Gets or sets the s parameter of the distribution. /// public double S => _s; /// /// Gets or sets the n parameter of the distribution. /// public int N => _n; /// /// Gets or sets the random number generator which is used to draw random samples. /// public System.Random RandomSource { get => _random; set => _random = value ?? SystemRandomSource.Default; } /// /// Gets the mean of the distribution. /// public double Mean => SpecialFunctions.GeneralHarmonic(_n, _s - 1.0)/SpecialFunctions.GeneralHarmonic(_n, _s); /// /// Gets the variance of the distribution. /// public double Variance { get { if (_s <= 3) { throw new NotSupportedException(); } var ghns = SpecialFunctions.GeneralHarmonic(_n, _s); return (SpecialFunctions.GeneralHarmonic(_n, _s - 2)*SpecialFunctions.GeneralHarmonic(_n, _s)) - (Math.Pow(SpecialFunctions.GeneralHarmonic(_n, _s - 1), 2)/(ghns*ghns)); } } /// /// Gets the standard deviation of the distribution. /// public double StdDev => Math.Sqrt(Variance); /// /// Gets the entropy of the distribution. /// public double Entropy { get { double sum = 0; for (var i = 0; i < _n; i++) { sum += Math.Log(i + 1)/Math.Pow(i + 1, _s); } return ((_s/SpecialFunctions.GeneralHarmonic(_n, _s))*sum) + Math.Log(SpecialFunctions.GeneralHarmonic(_n, _s)); } } /// /// Gets the skewness of the distribution. /// public double Skewness { get { if (_s <= 4) { throw new NotSupportedException(); } return ((SpecialFunctions.GeneralHarmonic(_n, _s - 3)*Math.Pow(SpecialFunctions.GeneralHarmonic(_n, _s), 2)) - (SpecialFunctions.GeneralHarmonic(_n, _s - 1)*((3*SpecialFunctions.GeneralHarmonic(_n, _s - 2)*SpecialFunctions.GeneralHarmonic(_n, _s)) - Math.Pow(SpecialFunctions.GeneralHarmonic(_n, _s - 1), 2))))/Math.Pow((SpecialFunctions.GeneralHarmonic(_n, _s - 2)*SpecialFunctions.GeneralHarmonic(_n, _s)) - Math.Pow(SpecialFunctions.GeneralHarmonic(_n, _s - 1), 2), 1.5); } } /// /// Gets the mode of the distribution. /// public int Mode => 1; /// /// Gets the median of the distribution. /// public double Median => throw new NotSupportedException(); /// /// Gets the smallest element in the domain of the distributions which can be represented by an integer. /// public int Minimum => 1; /// /// Gets the largest element in the domain of the distributions which can be represented by an integer. /// public int Maximum => _n; /// /// Computes the probability mass (PMF) at k, i.e. P(X = k). /// /// The location in the domain where we want to evaluate the probability mass function. /// the probability mass at location . public double Probability(int k) { return (1.0/Math.Pow(k, _s))/SpecialFunctions.GeneralHarmonic(_n, _s); } /// /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). /// /// The location in the domain where we want to evaluate the log probability mass function. /// the log probability mass at location . public double ProbabilityLn(int k) { return Math.Log(Probability(k)); } /// /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). /// /// The location at which to compute the cumulative distribution function. /// the cumulative distribution at location . public double CumulativeDistribution(double x) { if (x < 1) { return 0.0; } return SpecialFunctions.GeneralHarmonic((int)x, _s)/SpecialFunctions.GeneralHarmonic(_n, _s); } /// /// Computes the probability mass (PMF) at k, i.e. P(X = k). /// /// The location in the domain where we want to evaluate the probability mass function. /// The s parameter of the distribution. /// The n parameter of the distribution. /// the probability mass at location . public static double PMF(double s, int n, int k) { if (!(n > 0 && s > 0.0)) { throw new ArgumentException("Invalid parametrization for the distribution."); } return (1.0/Math.Pow(k, s))/SpecialFunctions.GeneralHarmonic(n, s); } /// /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). /// /// The location in the domain where we want to evaluate the log probability mass function. /// The s parameter of the distribution. /// The n parameter of the distribution. /// the log probability mass at location . public static double PMFLn(double s, int n, int k) { if (!(n > 0 && s > 0.0)) { throw new ArgumentException("Invalid parametrization for the distribution."); } return Math.Log(PMF(s, n, k)); } /// /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). /// /// The location at which to compute the cumulative distribution function. /// The s parameter of the distribution. /// The n parameter of the distribution. /// the cumulative distribution at location . /// public static double CDF(double s, int n, double x) { if (!(n > 0 && s > 0.0)) { throw new ArgumentException("Invalid parametrization for the distribution."); } if (x < 1) { return 0.0; } return SpecialFunctions.GeneralHarmonic((int)x, s)/SpecialFunctions.GeneralHarmonic(n, s); } /// /// Generates a sample from the Zipf distribution without doing parameter checking. /// /// The random number generator to use. /// The s parameter of the distribution. /// The n parameter of the distribution. /// a random number from the Zipf distribution. static int SampleUnchecked(System.Random rnd, double s, int n) { var r = 0.0; while (r == 0.0) { r = rnd.NextDouble(); } var p = 1.0/SpecialFunctions.GeneralHarmonic(n, s); int i; var sum = 0.0; for (i = 1; i <= n; i++) { sum += p/Math.Pow(i, s); if (sum >= r) { break; } } return i; } static void SamplesUnchecked(System.Random rnd, int[] values, double s, int n) { for (int i = 0; i < values.Length; i++) { values[i] = SampleUnchecked(rnd, s, n); } } static IEnumerable SamplesUnchecked(System.Random rnd, double s, int n) { while (true) { yield return SampleUnchecked(rnd, s, n); } } /// /// Draws a random sample from the distribution. /// /// a sample from the distribution. public int Sample() { return SampleUnchecked(_random, _s, _n); } /// /// Fills an array with samples generated from the distribution. /// public void Samples(int[] values) { SamplesUnchecked(_random, values, _s, _n); } /// /// Samples an array of zipf distributed random variables. /// /// a sequence of samples from the distribution. public IEnumerable Samples() { return SamplesUnchecked(_random, _s, _n); } /// /// Samples a random variable. /// /// The random number generator to use. /// The s parameter of the distribution. /// The n parameter of the distribution. public static int Sample(System.Random rnd, double s, int n) { if (!(n > 0 && s > 0.0)) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SampleUnchecked(rnd, s, n); } /// /// Samples a sequence of this random variable. /// /// The random number generator to use. /// The s parameter of the distribution. /// The n parameter of the distribution. public static IEnumerable Samples(System.Random rnd, double s, int n) { if (!(n > 0 && s > 0.0)) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SamplesUnchecked(rnd, s, n); } /// /// Fills an array with samples generated from the distribution. /// /// The random number generator to use. /// The array to fill with the samples. /// The s parameter of the distribution. /// The n parameter of the distribution. public static void Samples(System.Random rnd, int[] values, double s, int n) { if (!(n > 0 && s > 0.0)) { throw new ArgumentException("Invalid parametrization for the distribution."); } SamplesUnchecked(rnd, values, s, n); } /// /// Samples a random variable. /// /// The s parameter of the distribution. /// The n parameter of the distribution. public static int Sample(double s, int n) { if (!(n > 0 && s > 0.0)) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SampleUnchecked(SystemRandomSource.Default, s, n); } /// /// Samples a sequence of this random variable. /// /// The s parameter of the distribution. /// The n parameter of the distribution. public static IEnumerable Samples(double s, int n) { if (!(n > 0 && s > 0.0)) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SamplesUnchecked(SystemRandomSource.Default, s, n); } /// /// Fills an array with samples generated from the distribution. /// /// The array to fill with the samples. /// The s parameter of the distribution. /// The n parameter of the distribution. public static void Samples(int[] values, double s, int n) { if (!(n > 0 && s > 0.0)) { throw new ArgumentException("Invalid parametrization for the distribution."); } SamplesUnchecked(SystemRandomSource.Default, values, s, n); } } }