// // Math.NET Numerics, part of the Math.NET Project // http://numerics.mathdotnet.com // http://github.com/mathnet/mathnet-numerics // // Copyright (c) 2009-2018 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // using System; using System.Collections.Generic; using System.Linq; using IStation.Numerics.IntegralTransforms; using IStation.Numerics.LinearAlgebra; using Complex = System.Numerics.Complex; namespace IStation.Numerics.Statistics { /// /// A class with correlation measures between two datasets. /// public static class Correlation { /// /// Auto-correlation function (ACF) based on FFT for all possible lags k. /// /// Data array to calculate auto correlation for. /// An array with the ACF as a function of the lags k. public static double[] Auto(double[] x) { return AutoCorrelationFft(x, 0, x.Length - 1); } /// /// Auto-correlation function (ACF) based on FFT for lags between kMin and kMax. /// /// The data array to calculate auto correlation for. /// Max lag to calculate ACF for must be positive and smaller than x.Length. /// Min lag to calculate ACF for (0 = no shift with acf=1) must be zero or positive and smaller than x.Length. /// An array with the ACF as a function of the lags k. public static double[] Auto(double[] x, int kMax, int kMin = 0) { // assert max and min in proper order var kMax2 = Math.Max(kMax, kMin); var kMin2 = Math.Min(kMax, kMin); return AutoCorrelationFft(x, kMin2, kMax2); } /// /// Auto-correlation function based on FFT for lags k. /// /// The data array to calculate auto correlation for. /// Array with lags to calculate ACF for. /// An array with the ACF as a function of the lags k. public static double[] Auto(double[] x, int[] k) { if (k == null) { throw new ArgumentNullException(nameof(k)); } if (k.Length < 1) { throw new ArgumentException("k"); } var kMin = k.Min(); var kMax = k.Max(); // get acf between full range var acf = AutoCorrelationFft(x, kMin, kMax); // map output by indexing var result = new double[k.Length]; for (int i = 0; i < result.Length; i++) { result[i] = acf[k[i] - kMin]; } return result; } /// /// The internal method for calculating the auto-correlation. /// /// The data array to calculate auto-correlation for /// Min lag to calculate ACF for (0 = no shift with acf=1) must be zero or positive and smaller than x.Length /// Max lag (EXCLUSIVE) to calculate ACF for must be positive and smaller than x.Length /// An array with the ACF as a function of the lags k. private static double[] AutoCorrelationFft(double[] x, int kLow, int kHigh) { if (x == null) throw new ArgumentNullException(nameof(x)); int N = x.Length; // Sample size if (kLow < 0 || kLow >= N) throw new ArgumentOutOfRangeException(nameof(kLow), "kMin must be zero or positive and smaller than x.Length"); if (kHigh < 0 || kHigh >= N) throw new ArgumentOutOfRangeException(nameof(kHigh), "kMax must be positive and smaller than x.Length"); if (N < 1) return new double[0]; int nFFT = Euclid.CeilingToPowerOfTwo(N) * 2; Complex[] xFFT = new Complex[nFFT]; Complex[] xFFT2 = new Complex[nFFT]; double xDash = ArrayStatistics.Mean(x); // copy values in range and substract mean - all the remaining parts are padded with zero. for (int i = 0; i < x.Length; i++) { xFFT[i] = new Complex(x[i] - xDash, 0.0); // copy values in range and substract mean } Fourier.Forward(xFFT, FourierOptions.Matlab); // maybe a Vector implementation here would be faster for (int i = 0; i < xFFT.Length; i++) { xFFT2[i] = Complex.Multiply(xFFT[i], Complex.Conjugate(xFFT[i])); } Fourier.Inverse(xFFT2, FourierOptions.Matlab); double dc = xFFT2[0].Real; double[] result = new double[kHigh - kLow + 1]; // normalize such that acf[0] would be 1.0 for (int i = 0; i < (kHigh - kLow + 1); i++) { result[i] = xFFT2[kLow + i].Real / dc; } return result; } /// /// Computes the Pearson Product-Moment Correlation coefficient. /// /// Sample data A. /// Sample data B. /// The Pearson product-moment correlation coefficient. public static double Pearson(IEnumerable dataA, IEnumerable dataB) { int n = 0; double r = 0.0; double meanA = 0; double meanB = 0; double varA = 0; double varB = 0; // WARNING: do not try to "optimize" by summing up products instead of using differences. // It would indeed be faster, but numerically much less robust if large mean + low variance. using (IEnumerator ieA = dataA.GetEnumerator()) using (IEnumerator ieB = dataB.GetEnumerator()) { while (ieA.MoveNext()) { if (!ieB.MoveNext()) { throw new ArgumentOutOfRangeException(nameof(dataB), "The array arguments must have the same length."); } double currentA = ieA.Current; double currentB = ieB.Current; double deltaA = currentA - meanA; double scaleDeltaA = deltaA/++n; double deltaB = currentB - meanB; double scaleDeltaB = deltaB/n; meanA += scaleDeltaA; meanB += scaleDeltaB; varA += scaleDeltaA*deltaA*(n - 1); varB += scaleDeltaB*deltaB*(n - 1); r += (deltaA*deltaB*(n - 1))/n; } if (ieB.MoveNext()) { throw new ArgumentOutOfRangeException(nameof(dataA), "The array arguments must have the same length."); } } return r/Math.Sqrt(varA*varB); } /// /// Computes the Weighted Pearson Product-Moment Correlation coefficient. /// /// Sample data A. /// Sample data B. /// Corresponding weights of data. /// The Weighted Pearson product-moment correlation coefficient. public static double WeightedPearson(IEnumerable dataA, IEnumerable dataB, IEnumerable weights) { int n = 0; double meanA = 0; double meanB = 0; double varA = 0; double varB = 0; double sumWeight = 0; double covariance = 0; using (IEnumerator ieA = dataA.GetEnumerator()) using (IEnumerator ieB = dataB.GetEnumerator()) using (IEnumerator ieW = weights.GetEnumerator()) { while (ieA.MoveNext()) { if (!ieB.MoveNext()) { throw new ArgumentOutOfRangeException(nameof(dataB), "The array arguments must have the same length."); } if (!ieW.MoveNext()) { throw new ArgumentOutOfRangeException(nameof(weights), "The array arguments must have the same length."); } ++n; double xi = ieA.Current; double yi = ieB.Current; double wi = ieW.Current; double temp = sumWeight + wi; double deltaX = xi - meanA; double rX = deltaX*wi/temp; meanA += rX; varA += sumWeight*deltaX*rX; double deltaY = yi - meanB; double rY = deltaY*wi/temp; meanB += rY; varB += sumWeight*deltaY*rY; covariance += deltaX*deltaY*wi*(sumWeight/temp); sumWeight = temp; } if (ieB.MoveNext()) { throw new ArgumentOutOfRangeException(nameof(dataB), "The array arguments must have the same length."); } if (ieW.MoveNext()) { throw new ArgumentOutOfRangeException(nameof(weights), "The array arguments must have the same length."); } } return covariance/Math.Sqrt(varA*varB); } /// /// Computes the Pearson Product-Moment Correlation matrix. /// /// Array of sample data vectors. /// The Pearson product-moment correlation matrix. public static Matrix PearsonMatrix(params double[][] vectors) { var m = Matrix.Build.DenseIdentity(vectors.Length); for (int i = 0; i < vectors.Length; i++) { for (int j = i + 1; j < vectors.Length; j++) { var c = Pearson(vectors[i], vectors[j]); m.At(i, j, c); m.At(j, i, c); } } return m; } /// /// Computes the Pearson Product-Moment Correlation matrix. /// /// Enumerable of sample data vectors. /// The Pearson product-moment correlation matrix. public static Matrix PearsonMatrix(IEnumerable vectors) { return PearsonMatrix(vectors as double[][] ?? vectors.ToArray()); } /// /// Computes the Spearman Ranked Correlation coefficient. /// /// Sample data series A. /// Sample data series B. /// The Spearman ranked correlation coefficient. public static double Spearman(IEnumerable dataA, IEnumerable dataB) { return Pearson(Rank(dataA), Rank(dataB)); } /// /// Computes the Spearman Ranked Correlation matrix. /// /// Array of sample data vectors. /// The Spearman ranked correlation matrix. public static Matrix SpearmanMatrix(params double[][] vectors) { return PearsonMatrix(vectors.Select(Rank).ToArray()); } /// /// Computes the Spearman Ranked Correlation matrix. /// /// Enumerable of sample data vectors. /// The Spearman ranked correlation matrix. public static Matrix SpearmanMatrix(IEnumerable vectors) { return PearsonMatrix(vectors.Select(Rank).ToArray()); } static double[] Rank(IEnumerable series) { if (series == null) { return new double[0]; } // WARNING: do not try to cast series to an array and use it directly, // as we need to sort it (inplace operation) var data = series.ToArray(); return ArrayStatistics.RanksInplace(data, RankDefinition.Average); } } }