// <copyright file="MultipleRegression.cs" company="Math.NET">
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
//
// Copyright (c) 2009-2015 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// </copyright>

using System;
using System.Collections.Generic;
using IStation.Numerics.LinearAlgebra;

namespace IStation.Numerics.LinearRegression
{
    public static class MultipleRegression
    {
        /// <summary>
        /// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
        /// </summary>
        /// <param name="x">Predictor matrix X</param>
        /// <param name="y">Response vector Y</param>
        /// <param name="method">The direct method to be used to compute the regression.</param>
        /// <returns>Best fitting vector for model parameters β</returns>
        public static Vector<T> DirectMethod<T>(Matrix<T> x, Vector<T> y, DirectRegressionMethod method = DirectRegressionMethod.NormalEquations) where T : struct, IEquatable<T>, IFormattable
        {
            switch (method)
            {
                case DirectRegressionMethod.NormalEquations:
                    return NormalEquations(x, y);
                case DirectRegressionMethod.QR:
                    return QR(x, y);
                case DirectRegressionMethod.Svd:
                    return Svd(x, y);
                default:
                    throw new NotSupportedException(method.ToString());
            }
        }

        /// <summary>
        /// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
        /// </summary>
        /// <param name="x">Predictor matrix X</param>
        /// <param name="y">Response matrix Y</param>
        /// <param name="method">The direct method to be used to compute the regression.</param>
        /// <returns>Best fitting vector for model parameters β</returns>
        public static Matrix<T> DirectMethod<T>(Matrix<T> x, Matrix<T> y, DirectRegressionMethod method = DirectRegressionMethod.NormalEquations) where T : struct, IEquatable<T>, IFormattable
        {
            switch (method)
            {
                case DirectRegressionMethod.NormalEquations:
                    return NormalEquations(x, y);
                case DirectRegressionMethod.QR:
                    return QR(x, y);
                case DirectRegressionMethod.Svd:
                    return Svd(x, y);
                default:
                    throw new NotSupportedException(method.ToString());
            }
        }

        /// <summary>
        /// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
        /// </summary>
        /// <param name="x">List of predictor-arrays.</param>
        /// <param name="y">List of responses</param>
        /// <param name="intercept">True if an intercept should be added as first artificial predictor value. Default = false.</param>
        /// <param name="method">The direct method to be used to compute the regression.</param>
        /// <returns>Best fitting list of model parameters β for each element in the predictor-arrays.</returns>
        public static T[] DirectMethod<T>(T[][] x, T[] y, bool intercept = false, DirectRegressionMethod method = DirectRegressionMethod.NormalEquations) where T : struct, IEquatable<T>, IFormattable
        {
            switch (method)
            {
                case DirectRegressionMethod.NormalEquations:
                    return NormalEquations(x, y, intercept);
                case DirectRegressionMethod.QR:
                    return QR(x, y, intercept);
                case DirectRegressionMethod.Svd:
                    return Svd(x, y, intercept);
                default:
                    throw new NotSupportedException(method.ToString());
            }
        }

        /// <summary>
        /// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
        /// Uses the cholesky decomposition of the normal equations.
        /// </summary>
        /// <param name="samples">Sequence of predictor-arrays and their response.</param>
        /// <param name="intercept">True if an intercept should be added as first artificial predictor value. Default = false.</param>
        /// <param name="method">The direct method to be used to compute the regression.</param>
        /// <returns>Best fitting list of model parameters β for each element in the predictor-arrays.</returns>
        public static T[] DirectMethod<T>(IEnumerable<Tuple<T[], T>> samples, bool intercept = false, DirectRegressionMethod method = DirectRegressionMethod.NormalEquations) where T : struct, IEquatable<T>, IFormattable
        {
            switch (method)
            {
                case DirectRegressionMethod.NormalEquations:
                    return NormalEquations(samples, intercept);
                case DirectRegressionMethod.QR:
                    return QR(samples, intercept);
                case DirectRegressionMethod.Svd:
                    return Svd(samples, intercept);
                default:
                    throw new NotSupportedException(method.ToString());
            }
        }

        /// <summary>
        /// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
        /// Uses the cholesky decomposition of the normal equations.
        /// </summary>
        /// <param name="x">Predictor matrix X</param>
        /// <param name="y">Response vector Y</param>
        /// <returns>Best fitting vector for model parameters β</returns>
        public static Vector<T> NormalEquations<T>(Matrix<T> x, Vector<T> y) where T : struct, IEquatable<T>, IFormattable
        {
            if (x.RowCount != y.Count)
            {
                throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {x.RowCount} and {y.Count} have been provided. A sample with index i is given by the value at index i of each provided vector.");
            }

            if (x.ColumnCount > y.Count)
            {
                throw new ArgumentException($"A regression of the requested order requires at least {x.ColumnCount} samples. Only {y.Count} samples have been provided.");
            }

            return x.TransposeThisAndMultiply(x).Cholesky().Solve(x.TransposeThisAndMultiply(y));
        }

        /// <summary>
        /// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
        /// Uses the cholesky decomposition of the normal equations.
        /// </summary>
        /// <param name="x">Predictor matrix X</param>
        /// <param name="y">Response matrix Y</param>
        /// <returns>Best fitting vector for model parameters β</returns>
        public static Matrix<T> NormalEquations<T>(Matrix<T> x, Matrix<T> y) where T : struct, IEquatable<T>, IFormattable
        {
            if (x.RowCount != y.RowCount)
            {
                throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {x.RowCount} and {y.RowCount} have been provided. A sample with index i is given by the value at index i of each provided vector.");
            }

            if (x.ColumnCount > y.RowCount)
            {
                throw new ArgumentException($"A regression of the requested order requires at least {x.ColumnCount} samples. Only {y.RowCount} samples have been provided.");
            }

            return x.TransposeThisAndMultiply(x).Cholesky().Solve(x.TransposeThisAndMultiply(y));
        }

        /// <summary>
        /// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
        /// Uses the cholesky decomposition of the normal equations.
        /// </summary>
        /// <param name="x">List of predictor-arrays.</param>
        /// <param name="y">List of responses</param>
        /// <param name="intercept">True if an intercept should be added as first artificial predictor value. Default = false.</param>
        /// <returns>Best fitting list of model parameters β for each element in the predictor-arrays.</returns>
        public static T[] NormalEquations<T>(T[][] x, T[] y, bool intercept = false) where T : struct, IEquatable<T>, IFormattable
        {
            var predictor = Matrix<T>.Build.DenseOfRowArrays(x);
            if (intercept)
            {
                predictor = predictor.InsertColumn(0, Vector<T>.Build.Dense(predictor.RowCount, Vector<T>.One));
            }

            if (predictor.RowCount != y.Length)
            {
                throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {predictor.RowCount} and {y.Length} have been provided. A sample with index i is given by the value at index i of each provided vector.");
            }

            if (predictor.ColumnCount > y.Length)
            {
                throw new ArgumentException($"A regression of the requested order requires at least {predictor.ColumnCount} samples. Only {y.Length} samples have been provided.");
            }

            var response = Vector<T>.Build.Dense(y);
            return predictor.TransposeThisAndMultiply(predictor).Cholesky().Solve(predictor.TransposeThisAndMultiply(response)).ToArray();
        }

        /// <summary>
        /// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
        /// Uses the cholesky decomposition of the normal equations.
        /// </summary>
        /// <param name="samples">Sequence of predictor-arrays and their response.</param>
        /// <param name="intercept">True if an intercept should be added as first artificial predictor value. Default = false.</param>
        /// <returns>Best fitting list of model parameters β for each element in the predictor-arrays.</returns>
        public static T[] NormalEquations<T>(IEnumerable<Tuple<T[], T>> samples, bool intercept = false) where T : struct, IEquatable<T>, IFormattable
        {
            var xy = samples.UnpackSinglePass();
            return NormalEquations(xy.Item1, xy.Item2, intercept);
        }

        /// <summary>
        /// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
        /// Uses an orthogonal decomposition and is therefore more numerically stable than the normal equations but also slower.
        /// </summary>
        /// <param name="x">Predictor matrix X</param>
        /// <param name="y">Response vector Y</param>
        /// <returns>Best fitting vector for model parameters β</returns>
        public static Vector<T> QR<T>(Matrix<T> x, Vector<T> y) where T : struct, IEquatable<T>, IFormattable
        {
            if (x.RowCount != y.Count)
            {
                throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {x.RowCount} and {y.Count} have been provided. A sample with index i is given by the value at index i of each provided vector.");
            }

            if (x.ColumnCount > y.Count)
            {
                throw new ArgumentException($"A regression of the requested order requires at least {x.ColumnCount} samples. Only {y.Count} samples have been provided.");
            }

            return x.QR().Solve(y);
        }

        /// <summary>
        /// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
        /// Uses an orthogonal decomposition and is therefore more numerically stable than the normal equations but also slower.
        /// </summary>
        /// <param name="x">Predictor matrix X</param>
        /// <param name="y">Response matrix Y</param>
        /// <returns>Best fitting vector for model parameters β</returns>
        public static Matrix<T> QR<T>(Matrix<T> x, Matrix<T> y) where T : struct, IEquatable<T>, IFormattable
        {
            if (x.RowCount != y.RowCount)
            {
                throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {x.RowCount} and {y.RowCount} have been provided. A sample with index i is given by the value at index i of each provided vector.");
            }

            if (x.ColumnCount > y.RowCount)
            {
                throw new ArgumentException($"A regression of the requested order requires at least {x.ColumnCount} samples. Only {y.RowCount} samples have been provided.");
            }

            return x.QR().Solve(y);
        }

        /// <summary>
        /// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
        /// Uses an orthogonal decomposition and is therefore more numerically stable than the normal equations but also slower.
        /// </summary>
        /// <param name="x">List of predictor-arrays.</param>
        /// <param name="y">List of responses</param>
        /// <param name="intercept">True if an intercept should be added as first artificial predictor value. Default = false.</param>
        /// <returns>Best fitting list of model parameters β for each element in the predictor-arrays.</returns>
        public static T[] QR<T>(T[][] x, T[] y, bool intercept = false) where T : struct, IEquatable<T>, IFormattable
        {
            var predictor = Matrix<T>.Build.DenseOfRowArrays(x);
            if (intercept)
            {
                predictor = predictor.InsertColumn(0, Vector<T>.Build.Dense(predictor.RowCount, Vector<T>.One));
            }

            if (predictor.RowCount != y.Length)
            {
                throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {predictor.RowCount} and {y.Length} have been provided. A sample with index i is given by the value at index i of each provided vector.");
            }

            if (predictor.ColumnCount > y.Length)
            {
                throw new ArgumentException($"A regression of the requested order requires at least {predictor.ColumnCount} samples. Only {y.Length} samples have been provided.");
            }

            return predictor.QR().Solve(Vector<T>.Build.Dense(y)).ToArray();
        }

        /// <summary>
        /// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
        /// Uses an orthogonal decomposition and is therefore more numerically stable than the normal equations but also slower.
        /// </summary>
        /// <param name="samples">Sequence of predictor-arrays and their response.</param>
        /// <param name="intercept">True if an intercept should be added as first artificial predictor value. Default = false.</param>
        /// <returns>Best fitting list of model parameters β for each element in the predictor-arrays.</returns>
        public static T[] QR<T>(IEnumerable<Tuple<T[], T>> samples, bool intercept = false) where T : struct, IEquatable<T>, IFormattable
        {
            var xy = samples.UnpackSinglePass();
            return QR(xy.Item1, xy.Item2, intercept);
        }

        /// <summary>
        /// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
        /// Uses a singular value decomposition and is therefore more numerically stable (especially if ill-conditioned) than the normal equations or QR but also slower.
        /// </summary>
        /// <param name="x">Predictor matrix X</param>
        /// <param name="y">Response vector Y</param>
        /// <returns>Best fitting vector for model parameters β</returns>
        public static Vector<T> Svd<T>(Matrix<T> x, Vector<T> y) where T : struct, IEquatable<T>, IFormattable
        {
            if (x.RowCount != y.Count)
            {
                throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {x.RowCount} and {y.Count} have been provided. A sample with index i is given by the value at index i of each provided vector.");
            }

            if (x.ColumnCount > y.Count)
            {
                throw new ArgumentException($"A regression of the requested order requires at least {x.ColumnCount} samples. Only {y.Count} samples have been provided.");
            }

            return x.Svd().Solve(y);
        }

        /// <summary>
        /// Find the model parameters β such that X*β with predictor X becomes as close to response Y as possible, with least squares residuals.
        /// Uses a singular value decomposition and is therefore more numerically stable (especially if ill-conditioned) than the normal equations or QR but also slower.
        /// </summary>
        /// <param name="x">Predictor matrix X</param>
        /// <param name="y">Response matrix Y</param>
        /// <returns>Best fitting vector for model parameters β</returns>
        public static Matrix<T> Svd<T>(Matrix<T> x, Matrix<T> y) where T : struct, IEquatable<T>, IFormattable
        {
            if (x.RowCount != y.RowCount)
            {
                throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {x.RowCount} and {y.RowCount} have been provided. A sample with index i is given by the value at index i of each provided vector.");
            }

            if (x.ColumnCount > y.RowCount)
            {
                throw new ArgumentException($"A regression of the requested order requires at least {x.ColumnCount} samples. Only {y.RowCount} samples have been provided.");
            }

            return x.Svd().Solve(y);
        }

        /// <summary>
        /// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
        /// Uses a singular value decomposition and is therefore more numerically stable (especially if ill-conditioned) than the normal equations or QR but also slower.
        /// </summary>
        /// <param name="x">List of predictor-arrays.</param>
        /// <param name="y">List of responses</param>
        /// <param name="intercept">True if an intercept should be added as first artificial predictor value. Default = false.</param>
        /// <returns>Best fitting list of model parameters β for each element in the predictor-arrays.</returns>
        public static T[] Svd<T>(T[][] x, T[] y, bool intercept = false) where T : struct, IEquatable<T>, IFormattable
        {
            var predictor = Matrix<T>.Build.DenseOfRowArrays(x);
            if (intercept)
            {
                predictor = predictor.InsertColumn(0, Vector<T>.Build.Dense(predictor.RowCount, Vector<T>.One));
            }

            if (predictor.RowCount != y.Length)
            {
                throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {predictor.RowCount} and {y.Length} have been provided. A sample with index i is given by the value at index i of each provided vector.");
            }

            if (predictor.ColumnCount > y.Length)
            {
                throw new ArgumentException($"A regression of the requested order requires at least {predictor.ColumnCount} samples. Only {y.Length} samples have been provided.");
            }

            return predictor.Svd().Solve(Vector<T>.Build.Dense(y)).ToArray();
        }

        /// <summary>
        /// Find the model parameters β such that their linear combination with all predictor-arrays in X become as close to their response in Y as possible, with least squares residuals.
        /// Uses a singular value decomposition and is therefore more numerically stable (especially if ill-conditioned) than the normal equations or QR but also slower.
        /// </summary>
        /// <param name="samples">Sequence of predictor-arrays and their response.</param>
        /// <param name="intercept">True if an intercept should be added as first artificial predictor value. Default = false.</param>
        /// <returns>Best fitting list of model parameters β for each element in the predictor-arrays.</returns>
        public static T[] Svd<T>(IEnumerable<Tuple<T[], T>> samples, bool intercept = false) where T : struct, IEquatable<T>, IFormattable
        {
            var xy = samples.UnpackSinglePass();
            return Svd(xy.Item1, xy.Item2, intercept);
        }
    }
}