Shark machine learning library
Installation
Tutorials
Benchmarks
Documentation
Quick references
Class list
Global functions
include
shark
Algorithms
GradientDescent
LBFGS.h
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief LBFGS
6
*
7
* The Limited-Memory Broyden, Fletcher, Goldfarb, Shannon (BFGS) algorithm
8
* is a quasi-Newton method for unconstrained real-valued optimization.
9
* See: http://en.wikipedia.org/wiki/LBFGS for details.
10
*
11
*
12
*
13
* \author S. Dahlgaard, O.Krause
14
* \date 2013
15
*
16
*
17
* \par Copyright 1995-2017 Shark Development Team
18
*
19
* <BR><HR>
20
* This file is part of Shark.
21
* <https://shark-ml.github.io/Shark/>
22
*
23
* Shark is free software: you can redistribute it and/or modify
24
* it under the terms of the GNU Lesser General Public License as published
25
* by the Free Software Foundation, either version 3 of the License, or
26
* (at your option) any later version.
27
*
28
* Shark is distributed in the hope that it will be useful,
29
* but WITHOUT ANY WARRANTY; without even the implied warranty of
30
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31
* GNU Lesser General Public License for more details.
32
*
33
* You should have received a copy of the GNU Lesser General Public License
34
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
35
*
36
*/
37
//===========================================================================
38
39
40
#ifndef SHARK_ML_OPTIMIZER_LBFGS_H
41
#define SHARK_ML_OPTIMIZER_LBFGS_H
42
43
#include <
shark/Algorithms/GradientDescent/AbstractLineSearchOptimizer.h
>
44
#include <deque>
45
46
namespace
shark
{
47
48
/// \brief Limited-Memory Broyden, Fletcher, Goldfarb, Shannon algorithm.
49
///
50
/// BFGS is one of the best performing quasi-newton methods. However for large scale
51
/// optimization, storing the full hessian approximation is infeasible due to O(n^2) memory requirement.
52
/// The L-BFGS algorithm does not store the full hessian approximation but only stores the
53
/// data used for updating in the last steps. The matrix itself is then regenerated on-the-fly in
54
/// an implicit matrix scheme. This brings runtime and memory rquirements
55
/// of a single step down to O(n*hist_size).
56
///
57
/// The number of steps stored can be set with setHistCount. This is 100 as default.
58
///
59
/// The algorithm is implemented for unconstrained and constrained optimization
60
/// in the case of box constraints. When box constraints are present and the algorithm
61
/// encounters a constraint, a dog-leg style algorithm is used:
62
///
63
/// first, all variables with active constraints (e.g. x_i = l_i and g_i > 0)
64
/// are fixed, i.e. p_i = 0. For the remaining variables, the unconstrained optimization
65
/// problem is solved. If the solution does not statisfy the box constraints, in the next step
66
/// the cauchy point is computed. If the cauchy point is feasible, we search the point
67
/// along the line between unconstrained optimum and cauchy point that lies exactly on the constraint.
68
/// This is the point with smallest value along the path.
69
/// This does not find the true optimal step in the unconstrained problem, however a cheap and reasonably good optimum
70
/// which often improves over naive coordinate descent.
71
/// \ingroup gradientopt
72
template
<
class
SearchPo
int
Type = RealVector>
73
class
LBFGS
:
public
AbstractLineSearchOptimizer
<SearchPointType>{
74
public
:
75
typedef
typename
AbstractLineSearchOptimizer<SearchPointType>::ObjectiveFunctionType
ObjectiveFunctionType
;
76
77
LBFGS
() :m_numHist(100){
78
this->
m_features
|= this->
CAN_SOLVE_CONSTRAINED
;
79
}
80
81
/// \brief From INameable: return the class name.
82
std::string
name
()
const
83
{
return
"LBFGS"
; }
84
85
/// \brief Specify the amount of steps to be memorized and used to find the L-BFGS direction.
86
///
87
///\param numhist The amount of steps to use.
88
void
setHistCount
(
unsigned
int
numhist) {
89
SHARK_RUNTIME_CHECK
(numhist > 0,
"An empty history is not allowed"
);
90
m_numHist = numhist;
91
}
92
93
//from ISerializable
94
void
read
(
InArchive
&archive);
95
void
write
(
OutArchive
&archive)
const
;
96
protected
:
// Methods inherited from AbstractLineSearchOptimizer
97
void
initModel
();
98
void
computeSearchDirection
(
ObjectiveFunctionType
const
&);
99
private
:
100
///\brief Stores another step and searchDirection, discarding the oldest on if necessary.
101
///
102
/// \param step Last performed step
103
/// \param y difference in gradients
104
void
updateHist(
SearchPointType
& y,
SearchPointType
&
step
);
105
/// \brief Compute B^{-1}x
106
///
107
/// The history is used to define B which is easy to invert
108
void
multBInv(
SearchPointType
& searchDirection)
const
;
109
110
/// \brief Compute Bx
111
void
multB(
SearchPointType
& searchDirection)
const
;
112
113
/// \brief Get the box-constrained LBFGS direction.
114
///
115
/// Approximately solves the constrained optimization problem
116
/// min_p 1/2 p^TBp +g^Tp
117
/// s.t. l_i <= x_i + p_i <= u_i
118
/// This is done using a constrained dogleg approach.
119
///
120
/// first, all variables with active constraints (e.g. x_i = l_i and g_i > 0)
121
/// are fixed, i.e. p_i = 0. For the remaining variables, the unconstrained optimization
122
/// problem is solved. If the solution does not statisfy the box constraints, in the next step
123
/// the cauchy point is computed. If the cauchy point is feasible, we search the point
124
/// along the line between unconstrained optimum and cauchy point that lies exactly on the constraint.
125
/// This is the point with smallest value along the path.
126
void
getBoxConstrainedDirection(
127
SearchPointType
& searchDirection,
128
SearchPointType
const
& lower,
129
SearchPointType
const
& upper
130
)
const
;
131
132
double
m_updThres;
///<Threshold for when to update history.
133
unsigned
int
m_numHist;
///< Number of steps to use for LBFGS.
134
// Initial Hessian approximation. We use a diagonal matrix, where each element is
135
// the same, so we only need to store one double.
136
double
m_bdiag;
137
138
// Saved steps for creating the approximation.
139
// Use deque as it gives fast pop.front, push.back and access. Supposedly.
140
// steps holds the values x_(k+1) - x_k
141
// gradientDifferences holds the values g_(k+1) - g_k
142
std::deque<SearchPointType> m_steps;
143
std::deque<SearchPointType> m_gradientDifferences;
144
};
145
146
//implementation is included in the library
147
extern
template
class
LBFGS<RealVector>
;
148
extern
template
class
LBFGS<FloatVector>
;
149
150
}
151
#endif