LMCMA.h
Go to the documentation of this file.
1/*!
2 * \brief -
3 *
4 * \author Thomas Voss and Christian Igel
5 * \date April 2014
6 *
7 * \par Copyright 1995-2017 Shark Development Team
8 *
9 * <BR><HR>
10 * This file is part of Shark.
11 * <https://shark-ml.github.io/Shark/>
12 *
13 * Shark is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU Lesser General Public License as published
15 * by the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * Shark is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public License
24 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
25 *
26 */
27#ifndef SHARK_ALGORITHMS_DIRECT_SEARCH_LM_CMA_H
28#define SHARK_ALGORITHMS_DIRECT_SEARCH_LM_CMA_H
29
33
37
38
39
40namespace shark {
41
42namespace detail{
43
44///\brief Approximates a Limited Memory Cholesky Matrix from a stream of samples.
45///
46/// Given a set of points \f$ v_i\f$, produces an approximation of the cholesky factor of a matrix:
47/// \f[ AA^T=C= (1-\alpha) C^{t-1} + \alpha* x_{j_t} x_{j_t}^T \f]
48/// here the \f$ j_t \f$ are chosen such to have an approximate distance \f$ N_{steps} \f$. It is assumed
49/// that the \f$x_i \f$ are correlated and thus a big \f$ N_{steps} \f$ tris to get points which are less
50/// correlated. The matrix keeps a set of vectors and decides at every step which is will discard.
51///
52/// This is the corrected algorithm as proposed in
53/// Ilya Loshchilov, "A Computationally Efficient Limited Memory CMA-ES for Large Scale Optimization"
54/// \ingroup singledirect
55class IncrementalCholeskyMatrix{
56public:
57 IncrementalCholeskyMatrix(){}
58 void init (double alpha,std::size_t dimensions, std::size_t numVectors, std::size_t Nsteps){
59 m_vArr.resize(numVectors,dimensions);
60 m_pcArr.resize(numVectors,dimensions);
61 m_b.resize(numVectors);
62 m_d.resize(numVectors);
63 m_l.resize(numVectors);
64 m_j.resize(0);//nothing stored at the bginning
65 m_Nsteps = Nsteps;
66 m_maxStoredVectors = numVectors;
67 m_counter = 0;
68 m_alpha = alpha;
69
70 m_vArr.clear();
71 m_pcArr.clear();
72 m_b.clear();
73 m_d.clear();
74 m_l.clear();
75 }
76
77 //computes x = Az
78 template<class T>
79 void prod(RealVector& x, T const& z)const{
80 x = z;
81 double a = std::sqrt(1-m_alpha);
82 for(std::size_t j=0; j != m_j.size(); j++){
83 std::size_t jcur = m_j[j];
84 double k = m_b(jcur) *inner_prod(row(m_vArr,jcur),z);
85 noalias(x) = a*x+k*row(m_pcArr,jcur);
86 }
87 }
88
89 //computes x= A^{-1}z
90 template<class T>
91 void inv(RealVector& x, T const& z)const{
92 inv(x,z,m_j.size());
93 }
94
95 void update(RealVector const& newPc){
96 std::size_t imin = 0;//the index of the removed point
97 if (m_j.size() < m_maxStoredVectors)
98 {
99 std::size_t index = m_j.size();
100 m_j.push_back(index);
101 imin = index;
102 }
103 else
104 {
105 //find the largest "age"gap between neighbouring points (i.e. the time between insertion)
106 //we want to remove the smallest gap as to make the
107 //time distances as equal as possible
108 std::size_t dmin = m_l[m_j[1]] - m_l[m_j[0]];
109 imin = 1;
110 for(std::size_t j=2; j != m_j.size(); j++)
111 {
112 std::size_t dcur = m_l[m_j[j]] - m_l[m_j[j-1]];
113 if (dcur < dmin)
114 {
115 dmin = dcur;
116 imin = j;
117 }
118 }
119 //if the gap is bigger than Nsteps, we remove the oldest point to
120 //shrink it.
121 if (dmin >= m_Nsteps)
122 imin = 0;
123 //we push all points backwards and append the freed index to the end of the list
124 if (imin != m_j.size()-1)
125 {
126 std::size_t sav = m_j[imin];
127 for(std::size_t j = imin; j != m_j.size()-1; j++)
128 m_j[j] = m_j[j+1];
129 m_j.back() = sav;
130 }
131 }
132 //set the values of the new added index
133 int newidx = m_j.back();
134 m_l[newidx] = m_counter;
135 noalias(row(m_pcArr,newidx)) = newPc;
136 ++m_counter;
137
138 // this procedure recomputes v vectors correctly, in the original LM-CMA-ES they were outdated/corrupted.
139 // all vectors v_k,v_{k+1},...,v_m are corrupted where k=j_imin. it also computes the proper v and b/d values for the newest
140 // inserted vector
141 RealVector v;
142 for(std::size_t i = imin; i != m_j.size(); ++i)
143 {
144 int index = m_j[i];
145 inv(v,row(m_pcArr,index),i);
146 noalias(row(m_vArr,index)) = v;
147
148 double normv2 = norm_sqr(row(m_vArr,index));
149 double c = std::sqrt(1.0-m_alpha);
150 double f = std::sqrt(1+m_alpha/(1-m_alpha)*normv2);
151 m_b[index] = c/normv2*(f-1);
152 m_d[index] = 1/(c*normv2)*(1-1/f);
153 }
154 }
155
156private:
157 template<class T>
158 void inv(RealVector& x, T const& z,std::size_t k)const{
159 x = z;
160 double c= 1.0/std::sqrt(1-m_alpha);
161 for(std::size_t j=0; j != k; j++){// O(m*n)
162 std::size_t jcur = m_j[j];
163 double k = m_d(jcur) * inner_prod(row(m_vArr,jcur),x);
164 noalias(x) = c*x - k*row(m_vArr,jcur);
165 }
166 }
167
168 //variables making up A
169 RealMatrix m_vArr;
170 RealMatrix m_pcArr;
171 RealVector m_b;
172 RealVector m_d;
173
174 //index variables for computation of A
175 std::vector<std::size_t> m_j;
176 std::vector<std::size_t> m_l;
177 std::size_t m_Nsteps;
178 std::size_t m_maxStoredVectors;
179 std::size_t m_counter;
180
181 double m_alpha;
182};
183}
184
185/// \brief Implements a Limited-Memory-CMA
186///
187/// This is the algorithm as proposed in
188/// Ilya Loshchilov, "A Computationally Efficient Limited Memory CMA-ES for Large Scale Optimization"
189/// with a few corrections regarding the covariance matrix update.
190///
191/// The algorithm stores a subset of previous evolution path vectors and approximates the covariance
192/// matrix based on this. This algorithm only requires O(nm) memory, where n is the dimensionality
193/// and n the problem dimensionality. To be more exact, 2*m vectors of size n are stored to calculate
194/// the matrix-vector product with the choelsky factor of the covariance matrix in O(mn).
195///
196/// The algorithm uses the population based step size adaptation strategy as proposed in
197/// the same paper.
198class LMCMA: public AbstractSingleObjectiveOptimizer<RealVector >
199{
200public:
201 /// \brief Default c'tor.
202 LMCMA(random::rng_type& rng = random::globalRng):mpe_rng(&rng){
204 }
205
206
207 /// \brief From INameable: return the class name.
208 std::string name() const
209 { return "LMCMA-ES"; }
210
211 /// \brief Calculates lambda for the supplied dimensionality n.
212 unsigned suggestLambda( unsigned int dimension ) {
213 unsigned lambda = unsigned( 4. + ::floor( 3. * ::log( static_cast<double>( dimension ) ) ) );
214 // heuristic for small search spaces
215 lambda = std::max<unsigned int>( 5, std::min( lambda, dimension ) );
216 return( lambda );
217 }
218
219 /// \brief Calculates mu for the supplied lambda and the recombination strategy.
220 double suggestMu( unsigned int lambda) {
221 return lambda / 2.;
222 }
223
225
226 /// \brief Initializes the algorithm for the supplied objective function.
227 void init( ObjectiveFunctionType const& function, SearchPointType const& p) {
228 unsigned int lambda = suggestLambda( p.size() );
229 unsigned int mu = suggestMu( lambda );
230 init( function,
231 p,
232 lambda,
233 mu,
234 1.0/std::sqrt(double(p.size()))
235 );
236 }
237
238 /// \brief Initializes the algorithm for the supplied objective function.
239 void init(
240 ObjectiveFunctionType const& function,
241 SearchPointType const& initialSearchPoint,
242 unsigned int lambda,
243 double mu,
244 double initialSigma
245 ) {
246 checkFeatures(function);
247
248 m_numberOfVariables = function.numberOfVariables();
249 m_lambda = lambda;
250 m_mu = static_cast<unsigned int>(::floor(mu));
251
252 //set initial point
253 m_mean = initialSearchPoint;
254 m_best.point = initialSearchPoint;
255 m_best.value = function(initialSearchPoint);
256
257 //init step size adaptation
258 m_stepSize.init(initialSigma);
259
260 //weighting of the mu-best individuals
261 m_weights.resize(m_mu);
262 for (unsigned int i = 0; i < m_mu; i++){
263 m_weights(i) = ::log(mu + 0.5) - ::log(1. + i);
264 }
265 m_weights /= sum(m_weights);
266
267 // learning rates
268 m_muEff = 1. / sum(sqr(m_weights)); // equal to sum(m_weights)^2 / sum(sqr(m_weights))
269 double c1 = 1/(10*std::log(m_numberOfVariables+1.0));
270 m_cC =1.0/m_lambda;
271
272 //init variables for covariance matrix update
273 m_evolutionPathC = blas::repeat(0.0,m_numberOfVariables);
274 m_A.init(c1,m_numberOfVariables,lambda,lambda);
275 }
276
277 /// \brief Executes one iteration of the algorithm.
278 void step(ObjectiveFunctionType const& function){
280 std::vector< IndividualType > offspring( m_lambda );
281
282 PenalizingEvaluator penalizingEvaluator;
283 for( unsigned int i = 0; i < offspring.size(); i++ ) {
284 createSample(offspring[i].searchPoint(),offspring[i].chromosome());
285 }
286 penalizingEvaluator( function, offspring.begin(), offspring.end() );
287
288 // Selection and parameter update
289 // opposed to normal CMA selection, we don't remove any indidivudals but only order
290 // them by rank to allow the use of the population based strategy.
291 std::vector< IndividualType > parents( lambda() );
293 selection(offspring.begin(),offspring.end(),parents.begin(), parents.end());
294 updateStrategyParameters( parents );
295
296 //update the best solution found so far.
297 m_best.point= parents[ 0 ].searchPoint();
298 m_best.value= parents[ 0 ].unpenalizedFitness();
299 }
300
301 /// \brief Accesses the current step size.
302 double sigma() const {
303 return m_stepSize.stepSize();
304 }
305
306 /// \brief Accesses the current population mean.
307 RealVector const& mean() const {
308 return m_mean;
309 }
310
311 /// \brief Accesses the current weighting vector.
312 RealVector const& weights() const {
313 return m_weights;
314 }
315
316 /// \brief Accesses the evolution path for the covariance matrix update.
317 RealVector const& evolutionPath() const {
318 return m_evolutionPathC;
319 }
320
321 /// \brief Returns the size of the parent population \f$\mu\f$.
322 unsigned int mu() const {
323 return m_mu;
324 }
325
326 /// \brief Returns a mutabl rference to the size of the parent population \f$\mu\f$.
327 unsigned int& mu(){
328 return m_mu;
329 }
330
331 /// \brief Returns a immutable reference to the size of the offspring population \f$\mu\f$.
332 unsigned int lambda()const{
333 return m_lambda;
334 }
335
336 /// \brief Returns a mutable reference to the size of the offspring population \f$\mu\f$.
337 unsigned int & lambda(){
338 return m_lambda;
339 }
340
341private:
342 /// \brief Updates the strategy parameters based on the supplied offspring population.
343 void updateStrategyParameters( std::vector<Individual<RealVector, double, RealVector> > const& offspring ) {
344 //line 8, creation of the new mean (but not updating the mean of the distribution
345 RealVector m( m_numberOfVariables, 0. );
346 for( unsigned int j = 0; j < mu(); j++ ){
347 noalias(m) += m_weights( j ) * offspring[j].searchPoint();
348 }
349
350 //update evolution path, line 9
351 noalias(m_evolutionPathC) = (1. - m_cC ) * m_evolutionPathC + std::sqrt( m_cC * (2. - m_cC) * m_muEff ) * (m - m_mean) / sigma();
352
353 //update mean now, as oldmean is not needed any more (line 8 continued)
354 m_mean = m;
355
356 //corrected version of lines 10-14- the covariance matrix adaptation
357 //we replace one vector that makes up the approximation of A by the newly updated evolution path
358 m_A.update(m_evolutionPathC);
359
360 //update the step size using the population success rule, line 15-18
361 m_stepSize.update(offspring);
362
363 }
364
365 /// \brief Creates a vector-sample pair x=Az, where z is a gaussian random vector.
366 void createSample(RealVector& x,RealVector& z)const{
367 x.resize(m_numberOfVariables);
368 z.resize(m_numberOfVariables);
369 for(std::size_t i = 0; i != m_numberOfVariables; ++i){
370 z(i) = gauss(*mpe_rng,0,1);
371 }
372 m_A.prod(x,z);
373 noalias(x) = sigma()*x +m_mean;
374 }
375
376 unsigned int m_numberOfVariables; ///< Stores the dimensionality of the search space.
377 unsigned int m_mu; ///< The size of the parent population.
378 unsigned int m_lambda; ///< The size of the offspring population, needs to be larger than mu.
379
380 double m_cC;///< learning rate of the evolution path
381
382
383 detail::IncrementalCholeskyMatrix m_A;
384 PopulationBasedStepSizeAdaptation m_stepSize;///< step size adaptation for the step size sigma()
385
386 RealVector m_mean; ///< current mean of the distribution
387 RealVector m_weights;///< weighting for the mu best individuals
388 double m_muEff;///< effective sample size for the weighted samples
389
390 RealVector m_evolutionPathC;///< evolution path
391
392 random::rng_type* mpe_rng;
393
394};
395
396}
397
398#endif