// // Math.NET Numerics, part of the Math.NET Project // http://numerics.mathdotnet.com // http://github.com/mathnet/mathnet-numerics // // Copyright (c) 2009-2013 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // using System; using System.Collections.Generic; using IStation.Numerics.LinearAlgebra; using IStation.Numerics.Providers.LinearAlgebra; using IStation.Numerics.Statistics; namespace IStation.Numerics { /// /// Metrics to measure the distance between two structures. /// public static class Distance { /// /// Sum of Absolute Difference (SAD), i.e. the L1-norm (Manhattan) of the difference. /// public static double SAD(Vector a, Vector b) where T : struct, IEquatable, IFormattable { return (a - b).L1Norm(); } /// /// Sum of Absolute Difference (SAD), i.e. the L1-norm (Manhattan) of the difference. /// public static double SAD(double[] a, double[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } double sum = 0d; for (var i = 0; i < a.Length; i++) { sum += Math.Abs(a[i] - b[i]); } return sum; } /// /// Sum of Absolute Difference (SAD), i.e. the L1-norm (Manhattan) of the difference. /// public static float SAD(float[] a, float[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } float sum = 0f; for (var i = 0; i < a.Length; i++) { sum += Math.Abs(a[i] - b[i]); } return sum; } /// /// Mean-Absolute Error (MAE), i.e. the normalized L1-norm (Manhattan) of the difference. /// public static double MAE(Vector a, Vector b) where T : struct, IEquatable, IFormattable { return (a - b).L1Norm()/a.Count; } /// /// Mean-Absolute Error (MAE), i.e. the normalized L1-norm (Manhattan) of the difference. /// public static double MAE(double[] a, double[] b) { return SAD(a, b)/a.Length; } /// /// Mean-Absolute Error (MAE), i.e. the normalized L1-norm (Manhattan) of the difference. /// public static float MAE(float[] a, float[] b) { return SAD(a, b)/a.Length; } /// /// Sum of Squared Difference (SSD), i.e. the squared L2-norm (Euclidean) of the difference. /// public static double SSD(Vector a, Vector b) where T : struct, IEquatable, IFormattable { var norm = (a - b).L2Norm(); return norm*norm; } /// /// Sum of Squared Difference (SSD), i.e. the squared L2-norm (Euclidean) of the difference. /// public static double SSD(double[] a, double[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } var diff = new double[a.Length]; LinearAlgebraControl.Provider.SubtractArrays(a, b, diff); return LinearAlgebraControl.Provider.DotProduct(diff, diff); } /// /// Sum of Squared Difference (SSD), i.e. the squared L2-norm (Euclidean) of the difference. /// public static float SSD(float[] a, float[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } var diff = new float[a.Length]; LinearAlgebraControl.Provider.SubtractArrays(a, b, diff); return LinearAlgebraControl.Provider.DotProduct(diff, diff); } /// /// Mean-Squared Error (MSE), i.e. the normalized squared L2-norm (Euclidean) of the difference. /// public static double MSE(Vector a, Vector b) where T : struct, IEquatable, IFormattable { var norm = (a - b).L2Norm(); return norm*norm/a.Count; } /// /// Mean-Squared Error (MSE), i.e. the normalized squared L2-norm (Euclidean) of the difference. /// public static double MSE(double[] a, double[] b) { return SSD(a, b)/a.Length; } /// /// Mean-Squared Error (MSE), i.e. the normalized squared L2-norm (Euclidean) of the difference. /// public static float MSE(float[] a, float[] b) { return SSD(a, b)/a.Length; } /// /// Euclidean Distance, i.e. the L2-norm of the difference. /// public static double Euclidean(Vector a, Vector b) where T : struct, IEquatable, IFormattable { return (a - b).L2Norm(); } /// /// Euclidean Distance, i.e. the L2-norm of the difference. /// public static double Euclidean(double[] a, double[] b) { return Math.Sqrt(SSD(a, b)); } /// /// Euclidean Distance, i.e. the L2-norm of the difference. /// public static float Euclidean(float[] a, float[] b) { return (float) Math.Sqrt(SSD(a, b)); } /// /// Manhattan Distance, i.e. the L1-norm of the difference. /// public static double Manhattan(Vector a, Vector b) where T : struct, IEquatable, IFormattable { return (a - b).L1Norm(); } /// /// Manhattan Distance, i.e. the L1-norm of the difference. /// public static double Manhattan(double[] a, double[] b) { return SAD(a, b); } /// /// Manhattan Distance, i.e. the L1-norm of the difference. /// public static float Manhattan(float[] a, float[] b) { return SAD(a, b); } /// /// Chebyshev Distance, i.e. the Infinity-norm of the difference. /// public static double Chebyshev(Vector a, Vector b) where T : struct, IEquatable, IFormattable { return (a - b).InfinityNorm(); } /// /// Chebyshev Distance, i.e. the Infinity-norm of the difference. /// public static double Chebyshev(double[] a, double[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } double max = Math.Abs(a[0] - b[0]); for (int i = 1; i < a.Length; i++) { var next = Math.Abs(a[i] - b[i]); if (next > max) { max = next; } } return max; } /// /// Chebyshev Distance, i.e. the Infinity-norm of the difference. /// public static float Chebyshev(float[] a, float[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } float max = Math.Abs(a[0] - b[0]); for (int i = 1; i < a.Length; i++) { var next = Math.Abs(a[i] - b[i]); if (next > max) { max = next; } } return max; } /// /// Minkowski Distance, i.e. the generalized p-norm of the difference. /// public static double Minkowski(double p, Vector a, Vector b) where T : struct, IEquatable, IFormattable { return (a - b).Norm(p); } /// /// Minkowski Distance, i.e. the generalized p-norm of the difference. /// public static double Minkowski(double p, double[] a, double[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } if (p < 0d) { throw new ArgumentOutOfRangeException(nameof(p)); } if (p == 1d) { return Manhattan(a, b); } if (p == 2d) { return Euclidean(a, b); } if (double.IsPositiveInfinity(p)) { return Chebyshev(a, b); } double sum = 0d; for (var i = 0; i < a.Length; i++) { sum += Math.Pow(Math.Abs(a[i] - b[i]), p); } return Math.Pow(sum, 1.0 / p); } /// /// Minkowski Distance, i.e. the generalized p-norm of the difference. /// public static float Minkowski(double p, float[] a, float[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } if (p < 0d) { throw new ArgumentOutOfRangeException(nameof(p)); } if (p == 1d) { return Manhattan(a, b); } if (p == 2d) { return Euclidean(a, b); } if (double.IsPositiveInfinity(p)) { return Chebyshev(a, b); } double sum = 0d; for (var i = 0; i < a.Length; i++) { sum += Math.Pow(Math.Abs(a[i] - b[i]), p); } return (float) Math.Pow(sum, 1.0/p); } /// /// Canberra Distance, a weighted version of the L1-norm of the difference. /// public static double Canberra(double[] a, double[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } double sum = 0d; for (var i = 0; i < a.Length; i++) { sum += Math.Abs(a[i] - b[i]) / (Math.Abs(a[i]) + Math.Abs(b[i])); } return sum; } /// /// Canberra Distance, a weighted version of the L1-norm of the difference. /// public static float Canberra(float[] a, float[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } float sum = 0f; for (var i = 0; i < a.Length; i++) { sum += Math.Abs(a[i] - b[i]) / (Math.Abs(a[i]) + Math.Abs(b[i])); } return sum; } /// /// Cosine Distance, representing the angular distance while ignoring the scale. /// public static double Cosine(double[] a, double[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } var ab = LinearAlgebraControl.Provider.DotProduct(a, b); var a2 = LinearAlgebraControl.Provider.DotProduct(a, a); var b2 = LinearAlgebraControl.Provider.DotProduct(b, b); return 1d - ab/Math.Sqrt(a2*b2); } /// /// Cosine Distance, representing the angular distance while ignoring the scale. /// public static float Cosine(float[] a, float[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } var ab = LinearAlgebraControl.Provider.DotProduct(a, b); var a2 = LinearAlgebraControl.Provider.DotProduct(a, a); var b2 = LinearAlgebraControl.Provider.DotProduct(b, b); return (float)(1d - ab/Math.Sqrt(a2*b2)); } /// /// Hamming Distance, i.e. the number of positions that have different values in the vectors. /// public static double Hamming(double[] a, double[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } int count = 0; for (int i = 0; i < a.Length; i++) { if (a[i] != b[i]) { count++; } } return count; } /// /// Hamming Distance, i.e. the number of positions that have different values in the vectors. /// public static float Hamming(float[] a, float[] b) { if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } int count = 0; for (int i = 0; i < a.Length; i++) { if (a[i] != b[i]) { count++; } } return count; } /// /// Pearson's distance, i.e. 1 - the person correlation coefficient. /// public static double Pearson(IEnumerable a, IEnumerable b) { return 1.0 - Correlation.Pearson(a, b); } /// /// Jaccard distance, i.e. 1 - the Jaccard index. /// /// Thrown if a or b are null. /// Throw if a and b are of different lengths. /// Jaccard distance. public static double Jaccard(double[] a, double[] b) { int intersection = 0, union = 0; if (a == null) { throw new ArgumentNullException(nameof(a)); } if (b == null) { throw new ArgumentNullException(nameof(b)); } if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } if (a.Length == 0 && b.Length == 0) { return 0; } for (int x = 0, len = a.Length; x < len; x++) { if (a[x] != 0 && b[x] != 0) { if (a[x] == b[x]) { intersection++; } union++; } } return 1.0 - ((double)intersection / (double)union); } /// /// Jaccard distance, i.e. 1 - the Jaccard index. /// /// Thrown if a or b are null. /// Throw if a and b are of different lengths. /// Jaccard distance. public static double Jaccard(float[] a, float[] b) { int intersection = 0, union = 0; if (a == null) { throw new ArgumentNullException(nameof(a)); } if (b == null) { throw new ArgumentNullException(nameof(b)); } if (a.Length != b.Length) { throw new ArgumentException("All vectors must have the same dimensionality."); } if (a.Length == 0 && b.Length == 0) { return 0; } for (int x = 0, len = a.Length; x < len; x++) { if (a[x] != 0 && b[x] != 0) { if (a[x] == b[x]) { intersection++; } union++; } } return 1.0 - ((float)intersection / (float)union); } } }