TwoPointStepSizeAdaptation.h
Go to the documentation of this file.
1/*!
2 * \brief Implements a two point step size adaptation rule based on a line-search type of approach
3 *
4 * \author O.Krause
5 * \date 2014
6 *
7 * \par Copyright 1995-2017 Shark Development Team
8 *
9 * <BR><HR>
10 * This file is part of Shark.
11 * <https://shark-ml.github.io/Shark/>
12 *
13 * Shark is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU Lesser General Public License as published
15 * by the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * Shark is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public License
24 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
25 *
26 */
27#ifndef SHARK_ALGORITHMS_DIRECTSEARCH_OPERATORS_TWO_POINT_STEP_SIZE_ADAPTATION_H
28#define SHARK_ALGORITHMS_DIRECTSEARCH_OPERATORS_TWO_POINT_STEP_SIZE_ADAPTATION_H
29
30#include <shark/LinAlg/Base.h>
31#include <cmath>
32
33namespace shark {
34
35/// \brief Step size adaptation based on the success of the new population compared to the old
36///
37/// This is the step size adaptation algorithm as proposed in
38/// Ilya Loshchilov, "A Computationally Efficient Limited Memory CMA-ES for Large Scale Optimization"
39///
40/// It ranks the old and new population together and checks whether the mean rank of the new population
41/// is lower than the old one in this combined population. If this is true, the step size is increased
42/// in an exponential fashion. More formally, let \f$ r_t(i) \f$ be the rank of the i-th individual in the
43/// current population in the combined ranking and \f$ r_{t-1}(i) \f$ the rank of the i-th previous
44/// individual. Then we have
45/// \f[ z_t \leftarrow \frac 1 {\lamba^2} \sum_i^{\lambda} r_{t-1}(i) - r_t(i) - z*\f]
46/// where \f$ z* \f$ is a target success value, which defaults to 0.25
47/// this statistic is stabilised using an exponential average:
48/// \f[ s_t \leftarrow (1-c)*s_{t-1} + c*z_t \f]
49/// where the learning rate c defaults to 0.3
50/// finally we adapt the step size sigma by
51/// \f[ \sigma_t = \sigma_{t-1} exp(s_t/d) \f]
52/// where the damping factor d defaults to 1
54public:
55 TwoPointStepSizeAdaptation():m_cAlpha(0.1), m_alphaStep(0.5){}
56
57 double stepSize()const{
58 return m_stepSize;
59 }
60
61 void setAlphaStep(double alphaStep){
62 m_alphaStep = alphaStep;
63 }
64
65 void setLearningRate(double learningRate){
66 m_cAlpha = learningRate;
67 }
68
69 ///\brief Initializes a new trial by setting the initial step size and resetting the internal values.
70 void init(double initialStepSize){
71 m_stepSize = initialStepSize;
72 m_alpha = 0;
73 }
74
75 void setStepSize(double stepSize){
76 m_stepSize = stepSize;
77 }
78
79 /// \brief updates the step size using the newly sampled population
80 ///
81 /// The offspring is assumed to be ordered in ascending order by their penalizedFitness
82 /// (this is the same as ordering by the unpenalized fitness in an unconstrained setting)
83 void update(SingleObjectiveFunction const& f, RealVector const& point,RealVector const& direction){
84 double fminus = f.eval(point +m_alphaStep * m_stepSize * direction);
85 double fplus = f.eval(point + (1+m_alphaStep) * m_stepSize * direction);
86
87 //~ double alphaCurrent = fminus < fplus? -m_alphaStep : m_alphaStep;
88 double alphaCurrent = fminus < fplus? std::log(m_alphaStep) : std::log(1+m_alphaStep);
89 m_alpha= (1 - m_cAlpha) * m_alpha + m_cAlpha * alphaCurrent;
90 m_stepSize *= std::exp(m_alpha);
91 }
92private:
93 double m_stepSize;///< currnt value for the step size
94 double m_alpha; ///< The time average of the successesful steps
95
96 //hyper parameters
97 double m_cAlpha;
98 double m_alphaStep;
99};
100
101}
102
103#endif