LBFGS.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief LBFGS
6 *
7 * The Limited-Memory Broyden, Fletcher, Goldfarb, Shannon (BFGS) algorithm
8 * is a quasi-Newton method for unconstrained real-valued optimization.
9 * See: http://en.wikipedia.org/wiki/LBFGS for details.
10 *
11 *
12 *
13 * \author S. Dahlgaard, O.Krause
14 * \date 2013
15 *
16 *
17 * \par Copyright 1995-2017 Shark Development Team
18 *
19 * <BR><HR>
20 * This file is part of Shark.
21 * <https://shark-ml.github.io/Shark/>
22 *
23 * Shark is free software: you can redistribute it and/or modify
24 * it under the terms of the GNU Lesser General Public License as published
25 * by the Free Software Foundation, either version 3 of the License, or
26 * (at your option) any later version.
27 *
28 * Shark is distributed in the hope that it will be useful,
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 * GNU Lesser General Public License for more details.
32 *
33 * You should have received a copy of the GNU Lesser General Public License
34 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
35 *
36 */
37//===========================================================================
38
39
40#ifndef SHARK_ML_OPTIMIZER_LBFGS_H
41#define SHARK_ML_OPTIMIZER_LBFGS_H
42
44#include <deque>
45
46namespace shark {
47
48/// \brief Limited-Memory Broyden, Fletcher, Goldfarb, Shannon algorithm.
49///
50/// BFGS is one of the best performing quasi-newton methods. However for large scale
51/// optimization, storing the full hessian approximation is infeasible due to O(n^2) memory requirement.
52/// The L-BFGS algorithm does not store the full hessian approximation but only stores the
53/// data used for updating in the last steps. The matrix itself is then regenerated on-the-fly in
54/// an implicit matrix scheme. This brings runtime and memory rquirements
55/// of a single step down to O(n*hist_size).
56///
57/// The number of steps stored can be set with setHistCount. This is 100 as default.
58///
59/// The algorithm is implemented for unconstrained and constrained optimization
60/// in the case of box constraints. When box constraints are present and the algorithm
61/// encounters a constraint, a dog-leg style algorithm is used:
62///
63/// first, all variables with active constraints (e.g. x_i = l_i and g_i > 0)
64/// are fixed, i.e. p_i = 0. For the remaining variables, the unconstrained optimization
65/// problem is solved. If the solution does not statisfy the box constraints, in the next step
66/// the cauchy point is computed. If the cauchy point is feasible, we search the point
67/// along the line between unconstrained optimum and cauchy point that lies exactly on the constraint.
68/// This is the point with smallest value along the path.
69/// This does not find the true optimal step in the unconstrained problem, however a cheap and reasonably good optimum
70/// which often improves over naive coordinate descent.
71/// \ingroup gradientopt
72template<class SearchPointType = RealVector>
73class LBFGS : public AbstractLineSearchOptimizer<SearchPointType>{
74public:
76
77 LBFGS() :m_numHist(100){
78 this->m_features |= this->CAN_SOLVE_CONSTRAINED;
79 }
80
81 /// \brief From INameable: return the class name.
82 std::string name() const
83 { return "LBFGS"; }
84
85 /// \brief Specify the amount of steps to be memorized and used to find the L-BFGS direction.
86 ///
87 ///\param numhist The amount of steps to use.
88 void setHistCount(unsigned int numhist) {
89 SHARK_RUNTIME_CHECK(numhist > 0, "An empty history is not allowed");
90 m_numHist = numhist;
91 }
92
93 //from ISerializable
94 void read(InArchive &archive);
95 void write(OutArchive &archive) const;
96protected: // Methods inherited from AbstractLineSearchOptimizer
97 void initModel();
99private:
100 ///\brief Stores another step and searchDirection, discarding the oldest on if necessary.
101 ///
102 /// \param step Last performed step
103 /// \param y difference in gradients
104 void updateHist(SearchPointType& y, SearchPointType &step);
105 /// \brief Compute B^{-1}x
106 ///
107 /// The history is used to define B which is easy to invert
108 void multBInv(SearchPointType& searchDirection)const;
109
110 /// \brief Compute Bx
111 void multB(SearchPointType& searchDirection)const;
112
113 /// \brief Get the box-constrained LBFGS direction.
114 ///
115 /// Approximately solves the constrained optimization problem
116 /// min_p 1/2 p^TBp +g^Tp
117 /// s.t. l_i <= x_i + p_i <= u_i
118 /// This is done using a constrained dogleg approach.
119 ///
120 /// first, all variables with active constraints (e.g. x_i = l_i and g_i > 0)
121 /// are fixed, i.e. p_i = 0. For the remaining variables, the unconstrained optimization
122 /// problem is solved. If the solution does not statisfy the box constraints, in the next step
123 /// the cauchy point is computed. If the cauchy point is feasible, we search the point
124 /// along the line between unconstrained optimum and cauchy point that lies exactly on the constraint.
125 /// This is the point with smallest value along the path.
126 void getBoxConstrainedDirection(
127 SearchPointType& searchDirection,
128 SearchPointType const& lower,
129 SearchPointType const& upper
130 )const;
131
132 double m_updThres;///<Threshold for when to update history.
133 unsigned int m_numHist; ///< Number of steps to use for LBFGS.
134 // Initial Hessian approximation. We use a diagonal matrix, where each element is
135 // the same, so we only need to store one double.
136 double m_bdiag;
137
138 // Saved steps for creating the approximation.
139 // Use deque as it gives fast pop.front, push.back and access. Supposedly.
140 // steps holds the values x_(k+1) - x_k
141 // gradientDifferences holds the values g_(k+1) - g_k
142 std::deque<SearchPointType> m_steps;
143 std::deque<SearchPointType> m_gradientDifferences;
144};
145
146//implementation is included in the library
147extern template class LBFGS<RealVector>;
148extern template class LBFGS<FloatVector>;
149
150}
151#endif