//
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
//
// Copyright (c) 2009-2015 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
using System;
using System.Collections.Generic;
using IStation.Numerics.LinearAlgebra;
namespace IStation.Numerics.LinearRegression
{
public static class MultipleRegression
{
///
/// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
///
/// Predictor matrix X
/// Response vector Y
/// The direct method to be used to compute the regression.
/// Best fitting vector for model parameters β
public static Vector DirectMethod(Matrix x, Vector y, DirectRegressionMethod method = DirectRegressionMethod.NormalEquations) where T : struct, IEquatable, IFormattable
{
switch (method)
{
case DirectRegressionMethod.NormalEquations:
return NormalEquations(x, y);
case DirectRegressionMethod.QR:
return QR(x, y);
case DirectRegressionMethod.Svd:
return Svd(x, y);
default:
throw new NotSupportedException(method.ToString());
}
}
///
/// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
///
/// Predictor matrix X
/// Response matrix Y
/// The direct method to be used to compute the regression.
/// Best fitting vector for model parameters β
public static Matrix DirectMethod(Matrix x, Matrix y, DirectRegressionMethod method = DirectRegressionMethod.NormalEquations) where T : struct, IEquatable, IFormattable
{
switch (method)
{
case DirectRegressionMethod.NormalEquations:
return NormalEquations(x, y);
case DirectRegressionMethod.QR:
return QR(x, y);
case DirectRegressionMethod.Svd:
return Svd(x, y);
default:
throw new NotSupportedException(method.ToString());
}
}
///
/// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
///
/// List of predictor-arrays.
/// List of responses
/// True if an intercept should be added as first artificial predictor value. Default = false.
/// The direct method to be used to compute the regression.
/// Best fitting list of model parameters β for each element in the predictor-arrays.
public static T[] DirectMethod(T[][] x, T[] y, bool intercept = false, DirectRegressionMethod method = DirectRegressionMethod.NormalEquations) where T : struct, IEquatable, IFormattable
{
switch (method)
{
case DirectRegressionMethod.NormalEquations:
return NormalEquations(x, y, intercept);
case DirectRegressionMethod.QR:
return QR(x, y, intercept);
case DirectRegressionMethod.Svd:
return Svd(x, y, intercept);
default:
throw new NotSupportedException(method.ToString());
}
}
///
/// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
/// Uses the cholesky decomposition of the normal equations.
///
/// Sequence of predictor-arrays and their response.
/// True if an intercept should be added as first artificial predictor value. Default = false.
/// The direct method to be used to compute the regression.
/// Best fitting list of model parameters β for each element in the predictor-arrays.
public static T[] DirectMethod(IEnumerable> samples, bool intercept = false, DirectRegressionMethod method = DirectRegressionMethod.NormalEquations) where T : struct, IEquatable, IFormattable
{
switch (method)
{
case DirectRegressionMethod.NormalEquations:
return NormalEquations(samples, intercept);
case DirectRegressionMethod.QR:
return QR(samples, intercept);
case DirectRegressionMethod.Svd:
return Svd(samples, intercept);
default:
throw new NotSupportedException(method.ToString());
}
}
///
/// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
/// Uses the cholesky decomposition of the normal equations.
///
/// Predictor matrix X
/// Response vector Y
/// Best fitting vector for model parameters β
public static Vector NormalEquations(Matrix x, Vector y) where T : struct, IEquatable, IFormattable
{
if (x.RowCount != y.Count)
{
throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {x.RowCount} and {y.Count} have been provided. A sample with index i is given by the value at index i of each provided vector.");
}
if (x.ColumnCount > y.Count)
{
throw new ArgumentException($"A regression of the requested order requires at least {x.ColumnCount} samples. Only {y.Count} samples have been provided.");
}
return x.TransposeThisAndMultiply(x).Cholesky().Solve(x.TransposeThisAndMultiply(y));
}
///
/// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
/// Uses the cholesky decomposition of the normal equations.
///
/// Predictor matrix X
/// Response matrix Y
/// Best fitting vector for model parameters β
public static Matrix NormalEquations(Matrix x, Matrix y) where T : struct, IEquatable, IFormattable
{
if (x.RowCount != y.RowCount)
{
throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {x.RowCount} and {y.RowCount} have been provided. A sample with index i is given by the value at index i of each provided vector.");
}
if (x.ColumnCount > y.RowCount)
{
throw new ArgumentException($"A regression of the requested order requires at least {x.ColumnCount} samples. Only {y.RowCount} samples have been provided.");
}
return x.TransposeThisAndMultiply(x).Cholesky().Solve(x.TransposeThisAndMultiply(y));
}
///
/// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
/// Uses the cholesky decomposition of the normal equations.
///
/// List of predictor-arrays.
/// List of responses
/// True if an intercept should be added as first artificial predictor value. Default = false.
/// Best fitting list of model parameters β for each element in the predictor-arrays.
public static T[] NormalEquations(T[][] x, T[] y, bool intercept = false) where T : struct, IEquatable, IFormattable
{
var predictor = Matrix.Build.DenseOfRowArrays(x);
if (intercept)
{
predictor = predictor.InsertColumn(0, Vector.Build.Dense(predictor.RowCount, Vector.One));
}
if (predictor.RowCount != y.Length)
{
throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {predictor.RowCount} and {y.Length} have been provided. A sample with index i is given by the value at index i of each provided vector.");
}
if (predictor.ColumnCount > y.Length)
{
throw new ArgumentException($"A regression of the requested order requires at least {predictor.ColumnCount} samples. Only {y.Length} samples have been provided.");
}
var response = Vector.Build.Dense(y);
return predictor.TransposeThisAndMultiply(predictor).Cholesky().Solve(predictor.TransposeThisAndMultiply(response)).ToArray();
}
///
/// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
/// Uses the cholesky decomposition of the normal equations.
///
/// Sequence of predictor-arrays and their response.
/// True if an intercept should be added as first artificial predictor value. Default = false.
/// Best fitting list of model parameters β for each element in the predictor-arrays.
public static T[] NormalEquations(IEnumerable> samples, bool intercept = false) where T : struct, IEquatable, IFormattable
{
var xy = samples.UnpackSinglePass();
return NormalEquations(xy.Item1, xy.Item2, intercept);
}
///
/// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
/// Uses an orthogonal decomposition and is therefore more numerically stable than the normal equations but also slower.
///
/// Predictor matrix X
/// Response vector Y
/// Best fitting vector for model parameters β
public static Vector QR(Matrix x, Vector y) where T : struct, IEquatable, IFormattable
{
if (x.RowCount != y.Count)
{
throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {x.RowCount} and {y.Count} have been provided. A sample with index i is given by the value at index i of each provided vector.");
}
if (x.ColumnCount > y.Count)
{
throw new ArgumentException($"A regression of the requested order requires at least {x.ColumnCount} samples. Only {y.Count} samples have been provided.");
}
return x.QR().Solve(y);
}
///
/// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
/// Uses an orthogonal decomposition and is therefore more numerically stable than the normal equations but also slower.
///
/// Predictor matrix X
/// Response matrix Y
/// Best fitting vector for model parameters β
public static Matrix QR(Matrix x, Matrix y) where T : struct, IEquatable, IFormattable
{
if (x.RowCount != y.RowCount)
{
throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {x.RowCount} and {y.RowCount} have been provided. A sample with index i is given by the value at index i of each provided vector.");
}
if (x.ColumnCount > y.RowCount)
{
throw new ArgumentException($"A regression of the requested order requires at least {x.ColumnCount} samples. Only {y.RowCount} samples have been provided.");
}
return x.QR().Solve(y);
}
///
/// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
/// Uses an orthogonal decomposition and is therefore more numerically stable than the normal equations but also slower.
///
/// List of predictor-arrays.
/// List of responses
/// True if an intercept should be added as first artificial predictor value. Default = false.
/// Best fitting list of model parameters β for each element in the predictor-arrays.
public static T[] QR(T[][] x, T[] y, bool intercept = false) where T : struct, IEquatable, IFormattable
{
var predictor = Matrix.Build.DenseOfRowArrays(x);
if (intercept)
{
predictor = predictor.InsertColumn(0, Vector.Build.Dense(predictor.RowCount, Vector.One));
}
if (predictor.RowCount != y.Length)
{
throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {predictor.RowCount} and {y.Length} have been provided. A sample with index i is given by the value at index i of each provided vector.");
}
if (predictor.ColumnCount > y.Length)
{
throw new ArgumentException($"A regression of the requested order requires at least {predictor.ColumnCount} samples. Only {y.Length} samples have been provided.");
}
return predictor.QR().Solve(Vector.Build.Dense(y)).ToArray();
}
///
/// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
/// Uses an orthogonal decomposition and is therefore more numerically stable than the normal equations but also slower.
///
/// Sequence of predictor-arrays and their response.
/// True if an intercept should be added as first artificial predictor value. Default = false.
/// Best fitting list of model parameters β for each element in the predictor-arrays.
public static T[] QR(IEnumerable> samples, bool intercept = false) where T : struct, IEquatable, IFormattable
{
var xy = samples.UnpackSinglePass();
return QR(xy.Item1, xy.Item2, intercept);
}
///
/// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
/// Uses a singular value decomposition and is therefore more numerically stable (especially if ill-conditioned) than the normal equations or QR but also slower.
///
/// Predictor matrix X
/// Response vector Y
/// Best fitting vector for model parameters β
public static Vector Svd(Matrix x, Vector y) where T : struct, IEquatable, IFormattable
{
if (x.RowCount != y.Count)
{
throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {x.RowCount} and {y.Count} have been provided. A sample with index i is given by the value at index i of each provided vector.");
}
if (x.ColumnCount > y.Count)
{
throw new ArgumentException($"A regression of the requested order requires at least {x.ColumnCount} samples. Only {y.Count} samples have been provided.");
}
return x.Svd().Solve(y);
}
///
/// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
/// Uses a singular value decomposition and is therefore more numerically stable (especially if ill-conditioned) than the normal equations or QR but also slower.
///
/// Predictor matrix X
/// Response matrix Y
/// Best fitting vector for model parameters β
public static Matrix Svd(Matrix x, Matrix y) where T : struct, IEquatable, IFormattable
{
if (x.RowCount != y.RowCount)
{
throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {x.RowCount} and {y.RowCount} have been provided. A sample with index i is given by the value at index i of each provided vector.");
}
if (x.ColumnCount > y.RowCount)
{
throw new ArgumentException($"A regression of the requested order requires at least {x.ColumnCount} samples. Only {y.RowCount} samples have been provided.");
}
return x.Svd().Solve(y);
}
///
/// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
/// Uses a singular value decomposition and is therefore more numerically stable (especially if ill-conditioned) than the normal equations or QR but also slower.
///
/// List of predictor-arrays.
/// List of responses
/// True if an intercept should be added as first artificial predictor value. Default = false.
/// Best fitting list of model parameters β for each element in the predictor-arrays.
public static T[] Svd(T[][] x, T[] y, bool intercept = false) where T : struct, IEquatable, IFormattable
{
var predictor = Matrix.Build.DenseOfRowArrays(x);
if (intercept)
{
predictor = predictor.InsertColumn(0, Vector.Build.Dense(predictor.RowCount, Vector.One));
}
if (predictor.RowCount != y.Length)
{
throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {predictor.RowCount} and {y.Length} have been provided. A sample with index i is given by the value at index i of each provided vector.");
}
if (predictor.ColumnCount > y.Length)
{
throw new ArgumentException($"A regression of the requested order requires at least {predictor.ColumnCount} samples. Only {y.Length} samples have been provided.");
}
return predictor.Svd().Solve(Vector.Build.Dense(y)).ToArray();
}
///
/// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
/// Uses a singular value decomposition and is therefore more numerically stable (especially if ill-conditioned) than the normal equations or QR but also slower.
///
/// Sequence of predictor-arrays and their response.
/// True if an intercept should be added as first artificial predictor value. Default = false.
/// Best fitting list of model parameters β for each element in the predictor-arrays.
public static T[] Svd(IEnumerable> samples, bool intercept = false) where T : struct, IEquatable, IFormattable
{
var xy = samples.UnpackSinglePass();
return Svd(xy.Item1, xy.Item2, intercept);
}
}
}