// <copyright file="BfgsSolver.cs" company="Math.NET">
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
//
// Copyright (c) 2009-2017 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// </copyright>

using System;
using IStation.Numerics.LinearAlgebra;
using IStation.Numerics.LinearAlgebra.Double;
using IStation.Numerics.Optimization.LineSearch;

namespace IStation.Numerics.Optimization
{
    /// <summary>
    /// Broyden-Fletcher-Goldfarb-Shanno solver for finding function minima
    /// See http://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm
    /// Inspired by implementation: https://github.com/PatWie/CppNumericalSolvers/blob/master/src/BfgsSolver.cpp
    /// </summary>
    public static class BfgsSolver
    {
        private const double GradientTolerance = 1e-5;
        private const int MaxIterations = 100000;

        /// <summary>
        /// Finds a minimum of a function by the BFGS quasi-Newton method
        /// This uses the function and it's gradient (partial derivatives in each direction) and approximates the Hessian
        /// </summary>
        /// <param name="initialGuess">An initial guess</param>
        /// <param name="functionValue">Evaluates the function at a point</param>
        /// <param name="functionGradient">Evaluates the gradient of the function at a point</param>
        /// <returns>The minimum found</returns>
        public static Vector<double> Solve(Vector initialGuess, Func<Vector<double>, double> functionValue, Func<Vector<double>, Vector<double>> functionGradient)
        {
            var objectiveFunction = ObjectiveFunction.Gradient(functionValue, functionGradient);
            objectiveFunction.EvaluateAt(initialGuess);

            int dim = initialGuess.Count;
            int iter = 0;
            // H represents the approximation of the inverse hessian matrix
            // it is updated via the Sherman–Morrison formula (http://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula)
            Matrix<double> H = DenseMatrix.CreateIdentity(dim);

            Vector<double> x = initialGuess;
            Vector<double> x_old = x;
            Vector<double> grad;
            WolfeLineSearch wolfeLineSearch = new WeakWolfeLineSearch(1e-4, 0.9, 1e-5, 200);
            do
            {
                // search along the direction of the gradient
                grad = objectiveFunction.Gradient;
                Vector<double> p = -1 * H * grad;
                var lineSearchResult = wolfeLineSearch.FindConformingStep(objectiveFunction, p, 1.0);
                double rate = lineSearchResult.FinalStep;
                x = x + rate * p;
                Vector<double> grad_old = grad;

                // update the gradient
                objectiveFunction.EvaluateAt(x);
                grad = objectiveFunction.Gradient;// functionGradient(x);

                Vector<double> s = x - x_old;
                Vector<double> y = grad - grad_old;

                double rho = 1.0 / (y * s);
                if (iter == 0)
                {
                    // set up an initial hessian
                    H = (y * s) / (y * y) * DenseMatrix.CreateIdentity(dim);
                }

                var sM = s.ToColumnMatrix();
                var yM = y.ToColumnMatrix();

                // Update the estimate of the hessian
                H = H
                    - rho * (sM * (yM.TransposeThisAndMultiply(H)) + (H * yM).TransposeAndMultiply(sM))
                    + rho * rho * (y.DotProduct(H * y) + 1.0 / rho) * (sM.TransposeAndMultiply(sM));
                x_old = x;
                iter++;
            }
            while ((grad.InfinityNorm() > GradientTolerance) && (iter < MaxIterations));

            return x;
        }
    }
}