function [x cost info] = mbfgs(problem, x, options)
% Manifold BFGS minimization algorithm for Manopt.
%
% function [x cost info] = mbfgs(problem)
% function [x cost info] = mbfgs(problem, x0)
% function [x cost info] = mbfgs(problem, x0, options)
% function [x cost info] = mbfgs(problem, [], options)
%
% Apply the bfgs minimization algorithm to the problem defined
% in the problem structure, starting at x0 if it is provided (otherwise, at
% a random point on the manifold). To specify options whilst not specifying
% an initial guess, give x0 as [] (the empty matrix).
%
% For more information about the algorithm see:
%  Qi et al; ???? 2010. Riemannian BFGS Algorithm with Applications 
%
%
% None of the options are mandatory. See the documentation for details.
%
% For input/output descriptions, stopping criteria, help on picking a line
% search algorithm etc, see the help for steepestdescent.
%
% See also: steepestdescent linesearch
%

% Original author: Reshad Hosseini, Aug. 30, 2013.
% Contributors: 
% Change log: 
%   Reshad Hosseini, Jun.26,2014: Improving speed when "transpf" is present 


% Verify that the problem description is sufficient for the solver.
if ~canGetCost(problem)
    warning('manopt:getCost', ...
        'No cost provided. The algorithm will likely abort.');
end
if ~canGetGradient(problem)
    warning('manopt:getGradient', ...
        'No gradient provided. The algorithm will likely abort.');
end

% Set local defaults here
localdefaults.minstepsize = 1e-10;
localdefaults.maxiter = 1000;
localdefaults.tolgradnorm = 1e-6;
localdefaults.numgrad = 20;

localdefaults.linesearch = @linesearch; %_adaptive

% Merge global and local defaults, then merge w/ user options, if any.
localdefaults = mergeOptions(getGlobalDefaults(), localdefaults);
if ~exist('options', 'var') || isempty(options)
    options = struct();
end
options = mergeOptions(localdefaults, options);

% Create a store database
storedb = struct();

timetic = tic();

% If no initial point x is given by the user, generate one at random.
if ~exist('x', 'var') || isempty(x)
    x = problem.M.rand();
end

% Compute objective-related quantities for x
[cost grad storedb] = getCostGrad(problem, x, storedb);
gradnorm = problem.M.norm(x, grad);

% Iteration counter (at any point, iter is the number of fully executed
% iterations so far)
iter = 0;

% Save stats in a struct array info, and preallocate
% (see http://people.csail.mit.edu/jskelly/blog/?x=entry:entry091030-033941)
stats = savestats();
info(1) = stats;
info(min(10000, options.maxiter+1)).iter = [];

% Initial line search memory
lsmem = [];

if options.verbosity >= 2
    fprintf(' iter\t    cost val\t grad. norm\n');
end

% Compute a normalized descent direction
desc_dir = problem.M.lincomb(x, -1/gradnorm, grad);

% Initialize the Hessian
H = 1;
x_all = {x};
desc_dir_all = {};
grad_diff_all = {};
ddgd_all = {};
gd_all = {};
Expc_all = {};
Expci_all = {};
% Start iterating until stopping criterion triggers
while true
    
    % Display iteration information
    if options.verbosity >= 2
        fprintf('%5d\t%+.4e\t%.4e\n', iter, cost, gradnorm);
    end
    
    % Start timing this iteration
    timetic = tic();
    
    % Run standard stopping criterion checks
    [stop reason] = stoppingcriterion(problem, x, options, ...
        info, iter+1);
    % Run specific stopping criterion check
    if ~stop && stats.stepsize < options.minstepsize
        stop = true;
        reason = 'Last stepsize smaller than minimum allowed.';
    end
    
    if stop
        if options.verbosity >= 1
            fprintf([reason '\n']);
        end
        break;
    end
      
    % The line search algorithms require the directional derivative of the
    % cost at the current point x along the search direction.
    df0 = problem.M.inner(x, grad, desc_dir);
    if df0 > 0
        if options.verbosity >= 1
            fprintf(['Line search warning: got an ascent direction ' ...
                '(df0 = %2e), went the other way.\n'], df0);
        end
        desc_dir = problem.M.lincomb(x, -1, desc_dir);
        df0 = -df0;
    end
    
    % The line search algorithms require a normalized search direction and
    % directional derivative
    if iter >= 0
        desc_dir_norm = problem.M.norm(x, desc_dir);
    else
        desc_dir_norm = 1;
    end
    normd_desc_dir = problem.M.lincomb(x, 1/desc_dir_norm, desc_dir);
    normd_df0 = df0 / desc_dir_norm;
    
    % Execute line search
    [stepsize newx storedb lsmem lsstats] = options.linesearch(problem, ...
        x, normd_desc_dir, cost, normd_df0, options, storedb, lsmem);
    
    % Compute the new objective-related quantities for x
    [newcost newgrad storedb] = getCostGrad(problem, newx, storedb);
    newgradnorm = problem.M.norm(newx, newgrad);
    
    % Make sure we don't use too much memory for the store database
    storedb = purgeStoredb(storedb, options.storedepth);
    
    % Using previous and new information to update
    if isfield(problem.M,'transpf')
        [gradC, Expc, Expci] = problem.M.transpf(x, newx, grad);
    else
        gradC = problem.M.transp(x, newx, grad);
    end
    grad_diff = problem.M.lincomb(newx, 1, newgrad, -1, gradC);
    
    % Multiplying the stepsize in normalized descent direction
    desc_dir_step = problem.M.lincomb(x, stepsize, normd_desc_dir);
    
    % Parallel transport descent to the new point
    if isfield(problem.M,'transpf')
        desc_dir_step = problem.M.transpF(Expc, desc_dir_step);
    else
        desc_dir_step = problem.M.transp(x, newx, desc_dir_step);
    end
    % disp('Calculating Updates');
    % Update the previous saved info
    if isfield(problem.M,'transpf')
        [grad_diff_all, desc_dir_all, x_all, gd_all, ddgd_all, H, ...
            Expc_all, Expci_all] = ...
            mbfgs_update(newx, problem.M, grad_diff, desc_dir_step, ...
            grad_diff_all, desc_dir_all, options.numgrad, x_all, gd_all, ...
            ddgd_all, H, Expc, Expci, Expc_all, Expci_all);
    else
        [grad_diff_all, desc_dir_all, x_all, gd_all, ddgd_all, H] = ...
            mbfgs_update(newx, problem.M, grad_diff, desc_dir_step, ...
            grad_diff_all, desc_dir_all, options.numgrad, x_all, gd_all, ...
            ddgd_all, H);
    end
    if H == 0
        break;
    end
    
    % Update BFGS inverse Hessian matrix and descent direction
    %  It is implemented by unrolling the inverse Hessian update
    if isempty(gd_all)
        desc_dir = problem.M.lincomb(x, 1/newgradnorm, newgrad);
    elseif isfield(problem.M,'transpf')
        desc_dir = desc_dir_cal(newgrad, problem.M, grad_diff_all, ...
            desc_dir_all, x_all, ddgd_all, gd_all, length(gd_all) , H, ...
            Expc_all, Expci_all);
    else
        desc_dir = desc_dir_cal(newgrad, problem.M, grad_diff_all, ...
            desc_dir_all, x_all, ddgd_all, gd_all, length(gd_all) , H);        
    end
    
    % Change search direction because it is gradient descend
    desc_dir = problem.M.lincomb(newx, -1, desc_dir);
    
    % Update iterate info
    x = newx;
    cost = newcost;
    grad = newgrad;
    gradnorm = newgradnorm;
    
    % iter is the number of iterations we have accomplished.
    iter = iter + 1;
    % disp('Done');
    % Log statistics for freshly executed iteration
    stats = savestats();
    info(iter+1) = stats; %#ok<AGROW>
    
end

info = info(1:iter+1);

if options.verbosity >= 1
    fprintf('Total time is %f [s] (excludes statsfun)\n', ...
        info(end).time);
end



% Routine in charge of collecting the current iteration stats
    function stats = savestats()
        stats.iter = iter;
        stats.cost = cost;
        stats.gradnorm = gradnorm;
        if iter == 0
            stats.stepsize = nan;
            stats.time = toc(timetic);
            stats.linesearch = [];
        else
            stats.stepsize = stepsize;
            stats.time = info(iter).time + toc(timetic);
            stats.linesearch = lsstats;
        end
        stats = applyStatsfun(...
            problem, x, storedb, options, stats);
    end

end
