VDCMA.h
Go to the documentation of this file.
1/*!
2 * \brief Implements the VD-CMA-ES Algorithm
3 *
4 * \author Oswin Krause
5 * \date April 2014
6 *
7 * \par Copyright 1995-2017 Shark Development Team
8 *
9 * <BR><HR>
10 * This file is part of Shark.
11 * <https://shark-ml.github.io/Shark/>
12 *
13 * Shark is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU Lesser General Public License as published
15 * by the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * Shark is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public License
24 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
25 *
26 */
27
28#ifndef SHARK_ALGORITHMS_DIRECT_SEARCH_VD_CMA_H
29#define SHARK_ALGORITHMS_DIRECT_SEARCH_VD_CMA_H
30
34
37
38
39/// \brief Implements the VD-CMA-ES Algorithm
40///
41/// The VD-CMA-ES implements a restricted form of the CMA-ES where the covariance matrix is restriced to be (D+vv^T)
42/// where D is a diagonal matrix and v a single vector. Therefore this variant is capable of large-scale optimisation
43///
44/// For more reference, see the paper
45/// Akimoto, Y., A. Auger, and N. Hansen (2014). Comparison-Based Natural Gradient Optimization in High Dimension.
46/// To appear in Genetic and Evolutionary Computation Conference (GECCO 2014), Proceedings, ACM
47///
48/// The implementation differs from the paper to be closer to the reference implementation and to have better numerical
49/// accuracy.
50/// \ingroup singledirect
51namespace shark {
52class VDCMA : public AbstractSingleObjectiveOptimizer<RealVector >
53{
54private:
55 double chi( std::size_t n ) {
56 return( std::sqrt( static_cast<double>( n ) )*(1. - 1./(4.*n) + 1./(21.*n*n)) );
57 }
58public:
59
60 /// \brief Default c'tor.
61 VDCMA(random::rng_type& rng = random::globalRng):m_initialSigma(0.0), mpe_rng(&rng){
63 }
64
65 /// \brief From INameable: return the class name.
66 std::string name() const
67 { return "VDCMA-ES"; }
68
69 /// \brief Calculates lambda for the supplied dimensionality n.
70 std::size_t suggestLambda( std::size_t dimension ) {
71 return unsigned( 4. + ::floor( 3. * ::log( static_cast<double>( dimension ) ) ) ); // eq. (44)
72 }
73
74 /// \brief Calculates mu for the supplied lambda and the recombination strategy.
75 std::size_t suggestMu( std::size_t lambda) {
76 return lambda / 2; // eq. (44)
77 }
78
80
81 void init( ObjectiveFunctionType const& function, SearchPointType const& p) {
82 checkFeatures(function);
83
84 std::size_t lambda = suggestLambda( p.size() );
85 std::size_t mu = suggestMu( lambda );
86 double sigma = m_initialSigma;
87 if(m_initialSigma == 0)
88 sigma = 1.0/std::sqrt(double(p.size()));
89
90 init( function,
91 p,
92 lambda,
93 mu,
94 sigma
95 );
96 }
97
98 /// \brief Initializes the algorithm for the supplied objective function.
99 void init(
100 ObjectiveFunctionType const& function,
101 SearchPointType const& initialSearchPoint,
102 std::size_t lambda,
103 std::size_t mu,
104 double initialSigma
105 ) {
106
107 m_numberOfVariables = function.numberOfVariables();
108 m_lambda = lambda;
109 m_mu = mu;
110 m_sigma = initialSigma;
111
112 m_mean = blas::repeat(0.0,m_numberOfVariables);
113 m_vn.resize(m_numberOfVariables);
114 for(std::size_t i = 0; i != m_numberOfVariables;++i){
115 m_vn(i) = random::uni(*mpe_rng,0,1.0/m_numberOfVariables);
116 }
117 m_normv = norm_2(m_vn);
118 m_vn /= m_normv;
119
120 m_D = blas::repeat(1.0,m_numberOfVariables);
121 m_evolutionPathC = blas::repeat(0.0,m_numberOfVariables);
122 m_evolutionPathSigma = blas::repeat(0.0,m_numberOfVariables);
123
124 //set initial point
125 m_mean = initialSearchPoint;
126 m_best.point = initialSearchPoint;
127 m_best.value = function(initialSearchPoint);
128
129 m_counter = 0;//first iteration
130
131 //weighting of the mu-best individuals
132 m_weights.resize(m_mu);
133 for (std::size_t i = 0; i < m_mu; i++){
134 m_weights(i) = ::log(mu + 0.5) - ::log(1. + i);
135 }
136 m_weights /= sum(m_weights);
137
138 // constants based on (4) and Step 3 in the algorithm
139 m_muEff = 1. / sum(sqr(m_weights)); // equal to sum(m_weights)^2 / sum(sqr(m_weights))
140 m_cSigma = 0.5/(1+std::sqrt(m_numberOfVariables/m_muEff));
141 m_dSigma = 1. + 2. * std::max(0., std::sqrt((m_muEff-1.)/(m_numberOfVariables+1)) - 1.) + m_cSigma;
142
143 m_cC = (4. + m_muEff / m_numberOfVariables) / (m_numberOfVariables + 4. + 2 * m_muEff / m_numberOfVariables);
144 double correction = (m_numberOfVariables - 5.0)/6.0;
145 m_c1 = correction*2 / (sqr(m_numberOfVariables + 1.3) + m_muEff);
146 m_cMu = std::min(1. - m_c1, correction* 2 * (m_muEff - 2. + 1./m_muEff) / (sqr(m_numberOfVariables + 2) + m_muEff));
147 }
148
149 /// \brief Executes one iteration of the algorithm.
150 void step(ObjectiveFunctionType const& function){
152 std::vector< IndividualType > offspring( m_lambda );
153
154 PenalizingEvaluator penalizingEvaluator;
155 for( std::size_t i = 0; i < offspring.size(); i++ ) {
156 createSample(offspring[i].searchPoint(),offspring[i].chromosome());
157 }
158 penalizingEvaluator( function, offspring.begin(), offspring.end() );
159
160 // Selection
161 std::vector< IndividualType > parents( m_mu );
163 selection(offspring.begin(),offspring.end(),parents.begin(), parents.end());
164 // Strategy parameter update
165 m_counter++; // increase generation counter
166 updateStrategyParameters( parents );
167
168 m_best.point= parents[ 0 ].searchPoint();
169 m_best.value= parents[ 0 ].unpenalizedFitness();
170 }
171
172 /// \brief Accesses the current step size.
173 double sigma() const {
174 return m_sigma;
175 }
176
177 /// \brief Accesses the current step size.
178 void setSigma(double sigma) {
179 m_sigma = sigma;
180 }
181
182 /// \brief set the initial step size of the algorithm.
183 ///
184 /// Sets the initial sigma at init to a given value. If this is 0, which it is
185 /// by default, the default initialisation will be sigma= 1/sqrt(N) where N
186 /// is the number of variables to optimize.
187 ///
188 /// this method is the prefered one instead of init()
189 void setInitialSigma(double initialSigma){
190 m_initialSigma = initialSigma;
191 }
192
193
194 /// \brief Accesses the current population mean.
195 RealVector const& mean() const {
196 return m_mean;
197 }
198
199 /// \brief Accesses the current weighting vector.
200 RealVector const& weights() const {
201 return m_weights;
202 }
203
204 /// \brief Accesses the evolution path for the covariance matrix update.
205 RealVector const& evolutionPath() const {
206 return m_evolutionPathC;
207 }
208
209 /// \brief Accesses the evolution path for the step size update.
210 RealVector const& evolutionPathSigma() const {
211 return m_evolutionPathSigma;
212 }
213
214 ///\brief Returns the size of the parent population \f$\mu\f$.
215 std::size_t mu() const {
216 return m_mu;
217 }
218
219 ///\brief Returns a mutabl reference to the size of the parent population \f$\mu\f$.
220 std::size_t& mu(){
221 return m_mu;
222 }
223
224 ///\brief Returns a immutable reference to the size of the offspring population \f$\mu\f$.
225 std::size_t lambda()const{
226 return m_lambda;
227 }
228
229 ///\brief Returns a mutable reference to the size of the offspring population \f$\mu\f$.
230 std::size_t & lambda(){
231 return m_lambda;
232 }
233
234private:
235 /// \brief Updates the strategy parameters based on the supplied offspring population.
236 ///
237 /// The chromosome stores the y-vector that is the step from the mean in D=1, sigma=1 space.
238 void updateStrategyParameters( std::vector<Individual<RealVector, double, RealVector> >& offspring ) {
239 RealVector m( m_numberOfVariables, 0. );
240 RealVector z( m_numberOfVariables, 0. );
241
242 for( std::size_t j = 0; j < offspring.size(); j++ ){
243 noalias(m) += m_weights( j ) * offspring[j].searchPoint();
244 noalias(z) += m_weights( j ) * offspring[j].chromosome();
245 }
246 //compute z from y= (1+(sqrt(1+||v||^2)-1)v_n v_n^T)z
247 //therefore z= (1+(1/sqrt(1+||v||^2)-1)v_n v_n^T)y
248 double b=(1/std::sqrt(1+sqr(m_normv))-1);
249 noalias(z)+= b*inner_prod(z,m_vn)*m_vn;
250
251 //update paths
252 noalias(m_evolutionPathSigma) = (1. - m_cSigma)*m_evolutionPathSigma + std::sqrt( m_cSigma * (2. - m_cSigma) * m_muEff ) * z;
253 // compute h_sigma
254 double hSigLHS = norm_2( m_evolutionPathSigma ) / std::sqrt(1. - pow((1 - m_cSigma), 2.*(m_counter+1)));
255 double hSigRHS = (1.4 + 2 / (m_numberOfVariables+1.)) * chi( m_numberOfVariables );
256 double hSig = 0;
257 if(hSigLHS < hSigRHS) hSig = 1.;
258 noalias(m_evolutionPathC) = (1. - m_cC ) * m_evolutionPathC + hSig * std::sqrt( m_cC * (2. - m_cC) * m_muEff ) * (m - m_mean) / m_sigma;
259
260
261
262 //we split the computation of s and t in the paper in two parts
263 //we compute the first two steps and then compute the weighted mean over samples and
264 //evolution path. afterwards we compute the rest using the mean result
265 //the paper describes this as first computing S and T for all samples and compute the weighted
266 //mean of that, but the reference implementation does it the other way to prevent numerical instabilities
267 RealVector meanS(m_numberOfVariables,0.0);
268 RealVector meanT(m_numberOfVariables,0.0);
269 for(std::size_t j = 0; j != mu(); ++j){
270 computeSAndTFirst(offspring[j].chromosome(),meanS,meanT,m_cMu*m_weights(j));
271 }
272 computeSAndTFirst(m_evolutionPathC/m_D,meanS,meanT,hSig*m_c1);
273
274 //compute the remaining mean S and T steps
275 computeSAndTSecond(meanS,meanT);
276
277 //compute update to v and d
278 noalias(m_D) += m_D*meanS;
279 noalias(m_vn) = m_vn*m_normv+meanT/m_normv;//result is v and not vn
280 //store the new v separately as vn and its norm
281 m_normv = norm_2(m_vn);
282 m_vn /= m_normv;
283
284 //update step length
285 m_sigma *= std::exp( (m_cSigma / m_dSigma) * (norm_2(m_evolutionPathSigma)/ chi( m_numberOfVariables ) - 1.) ); // eq. (39)
286
287 //update mean
288 m_mean = m;
289 }
290
291 //samples a point and stores additionally y=(x-m_mean)/(sigma*D)
292 //as this is required for calculation later
293 void createSample(RealVector& x,RealVector& y)const{
294 x.resize(m_numberOfVariables);
295 y.resize(m_numberOfVariables);
296 for(std::size_t i = 0; i != m_numberOfVariables; ++i){
297 y(i) = random::gauss(*mpe_rng,0,1);
298 }
299 double a = std::sqrt(1+sqr(m_normv))-1;
300 a *= inner_prod(y,m_vn);
301 noalias(y) +=a*m_vn;
302 noalias(x) = m_mean+ m_sigma*m_D*y;
303 }
304
305 ///\brief computes the sample wise first two steps of S and T of theorem 3.6 in the paper
306 ///
307 /// S and T arguments accordingly
308 void computeSAndTFirst(RealVector const& y, RealVector& s,RealVector& t, double weight )const{
309 if(weight == 0) return;//nothing to do
310 double yvn = inner_prod(y,m_vn);
311 double normv2 = sqr(m_normv);
312 double gammav = 1+normv2;
313 //step 1
314 noalias(s) += weight*(sqr(y) - (normv2/gammav*yvn)*(y*m_vn)- 1.0);
315 //step 2
316 noalias(t) += weight*(yvn*y - 0.5*(sqr(yvn)+gammav)*m_vn);
317 }
318
319 ///\brief computes the last three steps of S and T of theorem 3.6 in the paper
320 void computeSAndTSecond(RealVector& s,RealVector& t)const{
321 RealVector vn2 = m_vn*m_vn;
322 double normv2 = sqr(m_normv);
323 double gammav = 1+normv2;
324 //alpha of 3.5
325 double alpha = sqr(normv2)+(2*gammav - std::sqrt(gammav))/max(vn2);
326 alpha=std::sqrt(alpha);
327 alpha /= 2+normv2;
328 alpha = std::min(alpha,1.0);
329 //constants (b,A) of 3.4
330 double b=-(1-sqr(alpha))*sqr(normv2)/gammav+2*sqr(alpha);
331 RealVector A= 2.0 - (b+2*sqr(alpha))*vn2;
332 RealVector invAvn2= vn2/A;
333
334 //step 3
335 noalias(s) -= alpha/gammav*((2+normv2)*(m_vn*t)-normv2*inner_prod(m_vn,t)*vn2);
336 //step 4
337 noalias(s) = s/A -b*inner_prod(s,invAvn2)/(1+b*inner_prod(vn2,invAvn2))*invAvn2;
338 //step 5
339 noalias(t) -= alpha*((2+normv2)*(m_vn*s)-inner_prod(s,vn2)*m_vn);
340 }
341
342 std::size_t m_numberOfVariables; ///< Stores the dimensionality of the search space.
343 std::size_t m_mu; ///< The size of the parent population.
344 std::size_t m_lambda; ///< The size of the offspring population, needs to be larger than mu.
345
346 double m_initialSigma;///0 by default which indicates initial sigma = 1/sqrt(N)
347 double m_sigma;
348 double m_cC;
349 double m_c1;
350 double m_cMu;
351 double m_cSigma;
352 double m_dSigma;
353 double m_muEff;
354
355 RealVector m_mean;
356 RealVector m_weights;
357
358 RealVector m_evolutionPathC;
359 RealVector m_evolutionPathSigma;
360
361 ///\brief normalised vector v
362 RealVector m_vn;
363 ///\brief norm of the vector v, therefore v=m_vn*m_normv
364 double m_normv;
365
366 RealVector m_D;
367
368 unsigned m_counter; ///< counter for generations
369
370 random::rng_type* mpe_rng;
371
372
373};
374
375}
376
377#endif