// // Math.NET Numerics, part of the Math.NET Project // http://numerics.mathdotnet.com // http://github.com/mathnet/mathnet-numerics // // Copyright (c) 2009-2014 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // using System; using System.Collections.Generic; using IStation.Numerics.Random; using IStation.Numerics.RootFinding; namespace IStation.Numerics.Distributions { /// /// Continuous Univariate Student's T-distribution. /// Implements the univariate Student t-distribution. For details about this /// distribution, see /// /// Wikipedia - Student's t-distribution. /// /// We use a slightly generalized version (compared to /// Wikipedia) of the Student t-distribution. Namely, one which also /// parameterizes the location and scale. See the book "Bayesian Data /// Analysis" by Gelman et al. for more details. /// The density of the Student t-distribution p(x|mu,scale,dof) = /// Gamma((dof+1)/2) (1 + (x - mu)^2 / (scale * scale * dof))^(-(dof+1)/2) / /// (Gamma(dof/2)*Sqrt(dof*pi*scale)). /// The distribution will use the by /// default. Users can get/set the random number generator by using the /// property. /// The statistics classes will check all the incoming parameters /// whether they are in the allowed range. This might involve heavy /// computation. Optionally, by setting Control.CheckDistributionParameters /// to false, all parameter checks can be turned off. public class StudentT : IContinuousDistribution { System.Random _random; readonly double _location; readonly double _scale; readonly double _freedom; /// /// Initializes a new instance of the StudentT class. This is a Student t-distribution with location 0.0 /// scale 1.0 and degrees of freedom 1. /// public StudentT() { _random = SystemRandomSource.Default; _location = 0.0; _scale = 1.0; _freedom = 1.0; } /// /// Initializes a new instance of the StudentT class with a particular location, scale and degrees of /// freedom. /// /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. public StudentT(double location, double scale, double freedom) { if (!IsValidParameterSet(location, scale, freedom)) { throw new ArgumentException("Invalid parametrization for the distribution."); } _random = SystemRandomSource.Default; _location = location; _scale = scale; _freedom = freedom; } /// /// Initializes a new instance of the StudentT class with a particular location, scale and degrees of /// freedom. /// /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. /// The random number generator which is used to draw random samples. public StudentT(double location, double scale, double freedom, System.Random randomSource) { if (!IsValidParameterSet(location, scale, freedom)) { throw new ArgumentException("Invalid parametrization for the distribution."); } _random = randomSource ?? SystemRandomSource.Default; _location = location; _scale = scale; _freedom = freedom; } /// /// A string representation of the distribution. /// /// a string representation of the distribution. public override string ToString() { return $"StudentT(μ = {_location}, σ = {_scale}, ν = {_freedom})"; } /// /// Tests whether the provided values are valid parameters for this distribution. /// /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. public static bool IsValidParameterSet(double location, double scale, double freedom) { return scale > 0.0 && freedom > 0.0 && !double.IsNaN(location); } /// /// Gets the location (μ) of the Student t-distribution. /// public double Location => _location; /// /// Gets the scale (σ) of the Student t-distribution. Range: σ > 0. /// public double Scale => _scale; /// /// Gets the degrees of freedom (ν) of the Student t-distribution. Range: ν > 0. /// public double DegreesOfFreedom => _freedom; /// /// Gets or sets the random number generator which is used to draw random samples. /// public System.Random RandomSource { get => _random; set => _random = value ?? SystemRandomSource.Default; } /// /// Gets the mean of the Student t-distribution. /// public double Mean => _freedom > 1.0 ? _location : double.NaN; /// /// Gets the variance of the Student t-distribution. /// public double Variance { get { if (double.IsPositiveInfinity(_freedom)) { return _scale*_scale; } if (_freedom > 2.0) { return _freedom*_scale*_scale/(_freedom - 2.0); } return _freedom > 1.0 ? double.PositiveInfinity : double.NaN; } } /// /// Gets the standard deviation of the Student t-distribution. /// public double StdDev { get { if (double.IsPositiveInfinity(_freedom)) { return Math.Sqrt(_scale*_scale); } if (_freedom > 2.0) { return Math.Sqrt(_freedom*_scale*_scale/(_freedom - 2.0)); } return _freedom > 1.0 ? double.PositiveInfinity : double.NaN; } } /// /// Gets the entropy of the Student t-distribution. /// public double Entropy { get { if (_location != 0 || _scale != 1.0) { throw new NotSupportedException(); } return (((_freedom + 1.0)/2.0)*(SpecialFunctions.DiGamma((1.0 + _freedom)/2.0) - SpecialFunctions.DiGamma(_freedom/2.0))) + Math.Log(Math.Sqrt(_freedom)*SpecialFunctions.Beta(_freedom/2.0, 1.0/2.0)); } } /// /// Gets the skewness of the Student t-distribution. /// public double Skewness { get { if (_freedom <= 3) { throw new NotSupportedException(); } return 0.0; } } /// /// Gets the mode of the Student t-distribution. /// public double Mode => _location; /// /// Gets the median of the Student t-distribution. /// public double Median => _location; /// /// Gets the minimum of the Student t-distribution. /// public double Minimum => double.NegativeInfinity; /// /// Gets the maximum of the Student t-distribution. /// public double Maximum => double.PositiveInfinity; /// /// Computes the probability density of the distribution (PDF) at x, i.e. ∂P(X ≤ x)/∂x. /// /// The location at which to compute the density. /// the density at . public double Density(double x) { return PDF(_location, _scale, _freedom, x); } /// /// Computes the log probability density of the distribution (lnPDF) at x, i.e. ln(∂P(X ≤ x)/∂x). /// /// The location at which to compute the log density. /// the log density at . public double DensityLn(double x) { return PDFLn(_location, _scale, _freedom, x); } /// /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). /// /// The location at which to compute the cumulative distribution function. /// the cumulative distribution at location . public double CumulativeDistribution(double x) { return CDF(_location, _scale, _freedom, x); } /// /// Computes the inverse of the cumulative distribution function (InvCDF) for the distribution /// at the given probability. This is also known as the quantile or percent point function. /// /// The location at which to compute the inverse cumulative density. /// the inverse cumulative density at . /// /// WARNING: currently not an explicit implementation, hence slow and unreliable. public double InverseCumulativeDistribution(double p) { return InvCDF(_location, _scale, _freedom, p); } /// /// Samples student-t distributed random variables. /// /// The algorithm is method 2 in section 5, chapter 9 /// in L. Devroye's "Non-Uniform Random Variate Generation" /// The random number generator to use. /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. /// a random number from the standard student-t distribution. static double SampleUnchecked(System.Random rnd, double location, double scale, double freedom) { var gamma = Gamma.SampleUnchecked(rnd, 0.5*freedom, 0.5); return Normal.Sample(rnd, location, scale*Math.Sqrt(freedom/gamma)); } static void SamplesUnchecked(System.Random rnd, double[] values, double location, double scale, double freedom) { Gamma.SamplesUnchecked(rnd, values, 0.5*freedom, 0.5); for (int i = 0; i < values.Length; i++) { values[i] = Normal.Sample(rnd, location, scale*Math.Sqrt(freedom/values[i])); } } static IEnumerable SamplesUnchecked(System.Random rnd, double location, double scale, double freedom) { while (true) { yield return SampleUnchecked(rnd, location, scale, freedom); } } /// /// Generates a sample from the Student t-distribution. /// /// a sample from the distribution. public double Sample() { return SampleUnchecked(_random, _location, _scale, _freedom); } /// /// Fills an array with samples generated from the distribution. /// public void Samples(double[] values) { SamplesUnchecked(_random, values, _location, _scale, _freedom); } /// /// Generates a sequence of samples from the Student t-distribution. /// /// a sequence of samples from the distribution. public IEnumerable Samples() { return SamplesUnchecked(_random, _location, _scale, _freedom); } /// /// Computes the probability density of the distribution (PDF) at x, i.e. ∂P(X ≤ x)/∂x. /// /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. /// The location at which to compute the density. /// the density at . /// public static double PDF(double location, double scale, double freedom, double x) { if (scale <= 0.0 || freedom <= 0.0) { throw new ArgumentException("Invalid parametrization for the distribution."); } // TODO JVG we can probably do a better job for Cauchy special case if (freedom >= 1e+8d) { return Normal.PDF(location, scale, x); } var d = (x - location)/scale; return Math.Exp(SpecialFunctions.GammaLn((freedom + 1.0)/2.0) - SpecialFunctions.GammaLn(freedom/2.0)) *Math.Pow(1.0 + (d*d/freedom), -0.5*(freedom + 1.0)) /Math.Sqrt(freedom*Math.PI) /scale; } /// /// Computes the log probability density of the distribution (lnPDF) at x, i.e. ln(∂P(X ≤ x)/∂x). /// /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. /// The location at which to compute the density. /// the log density at . /// public static double PDFLn(double location, double scale, double freedom, double x) { if (scale <= 0.0 || freedom <= 0.0) { throw new ArgumentException("Invalid parametrization for the distribution."); } // TODO JVG we can probably do a better job for Cauchy special case if (freedom >= 1e+8d) { return Normal.PDFLn(location, scale, x); } var d = (x - location)/scale; return SpecialFunctions.GammaLn((freedom + 1.0)/2.0) - (0.5*((freedom + 1.0)*Math.Log(1.0 + (d*d/freedom)))) - SpecialFunctions.GammaLn(freedom/2.0) - (0.5*Math.Log(freedom*Math.PI)) - Math.Log(scale); } /// /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). /// /// The location at which to compute the cumulative distribution function. /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. /// the cumulative distribution at location . /// public static double CDF(double location, double scale, double freedom, double x) { if (scale <= 0.0 || freedom <= 0.0) { throw new ArgumentException("Invalid parametrization for the distribution."); } // TODO JVG we can probably do a better job for Cauchy special case if (double.IsPositiveInfinity(freedom)) { return Normal.CDF(location, scale, x); } var k = (x - location)/scale; var h = freedom/(freedom + (k*k)); var ib = 0.5*SpecialFunctions.BetaRegularized(freedom/2.0, 0.5, h); return x <= location ? ib : 1.0 - ib; } /// /// Computes the inverse of the cumulative distribution function (InvCDF) for the distribution /// at the given probability. This is also known as the quantile or percent point function. /// /// The location at which to compute the inverse cumulative density. /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. /// the inverse cumulative density at . /// /// WARNING: currently not an explicit implementation, hence slow and unreliable. public static double InvCDF(double location, double scale, double freedom, double p) { if (scale <= 0.0 || freedom <= 0.0) { throw new ArgumentException("Invalid parametrization for the distribution."); } // TODO JVG we can probably do a better job for Cauchy special case if (double.IsPositiveInfinity(freedom)) { return Normal.InvCDF(location, scale, p); } if (p == 0.5d) { return location; } // TODO PERF: We must implement this explicitly instead of solving for CDF^-1 return Brent.FindRoot(x => { var k = (x - location)/scale; var h = freedom/(freedom + (k*k)); var ib = 0.5*SpecialFunctions.BetaRegularized(freedom/2.0, 0.5, h); return x <= location ? ib - p : 1.0 - ib - p; }, -800, 800, accuracy: 1e-12); } /// /// Generates a sample from the Student t-distribution. /// /// The random number generator to use. /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. /// a sample from the distribution. public static double Sample(System.Random rnd, double location, double scale, double freedom) { if (scale <= 0.0 || freedom <= 0.0) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SampleUnchecked(rnd, location, scale, freedom); } /// /// Generates a sequence of samples from the Student t-distribution using the Box-Muller algorithm. /// /// The random number generator to use. /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. /// a sequence of samples from the distribution. public static IEnumerable Samples(System.Random rnd, double location, double scale, double freedom) { if (scale <= 0.0 || freedom <= 0.0) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SamplesUnchecked(rnd, location, scale, freedom); } /// /// Fills an array with samples generated from the distribution. /// /// The random number generator to use. /// The array to fill with the samples. /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. /// a sequence of samples from the distribution. public static void Samples(System.Random rnd, double[] values, double location, double scale, double freedom) { if (scale <= 0.0 || freedom <= 0.0) { throw new ArgumentException("Invalid parametrization for the distribution."); } SamplesUnchecked(rnd, values, location, scale, freedom); } /// /// Generates a sample from the Student t-distribution. /// /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. /// a sample from the distribution. public static double Sample(double location, double scale, double freedom) { if (scale <= 0.0 || freedom <= 0.0) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SampleUnchecked(SystemRandomSource.Default, location, scale, freedom); } /// /// Generates a sequence of samples from the Student t-distribution using the Box-Muller algorithm. /// /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. /// a sequence of samples from the distribution. public static IEnumerable Samples(double location, double scale, double freedom) { if (scale <= 0.0 || freedom <= 0.0) { throw new ArgumentException("Invalid parametrization for the distribution."); } return SamplesUnchecked(SystemRandomSource.Default, location, scale, freedom); } /// /// Fills an array with samples generated from the distribution. /// /// The array to fill with the samples. /// The location (μ) of the distribution. /// The scale (σ) of the distribution. Range: σ > 0. /// The degrees of freedom (ν) for the distribution. Range: ν > 0. /// a sequence of samples from the distribution. public static void Samples(double[] values, double location, double scale, double freedom) { if (scale <= 0.0 || freedom <= 0.0) { throw new ArgumentException("Invalid parametrization for the distribution."); } SamplesUnchecked(SystemRandomSource.Default, values, location, scale, freedom); } } }