MarkovPole.h
Go to the documentation of this file.
1/*!
2 *
3 * \brief Objective function for single and double poles with full state information (Markovian task)
4 *
5 *
6 * Class for balancing one or two poles on a cart using a fitness
7 * function that decreases the longer the pole(s) balance(s). Based
8 * on code written by Verena Heidrich-Meisner for the paper
9 *
10 * V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for
11 * episodic reinforcement learning. Journal of Algorithms,
12 * 64(4):152–168, 2009.
13 *
14 * \author Johan Valentin Damgaard
15 * \date -
16 *
17 *
18 * \par Copyright 1995-2017 Shark Development Team
19 *
20 * This file is part of Shark.
21 * <https://shark-ml.github.io/Shark/>
22 *
23 * Shark is free software: you can redistribute it and/or modify
24 * it under the terms of the GNU Lesser General Public License as published
25 * by the Free Software Foundation, either version 3 of the License, or
26 * (at your option) any later version.
27 *
28 * Shark is distributed in the hope that it will be useful,
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 * GNU Lesser General Public License for more details.
32 *
33 * You should have received a copy of the GNU Lesser General Public License
34 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
35 *
36 */
37#ifndef SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_MARKOV_OBJECTIVE_FUNCTION
38#define SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_MARKOV_OBJECTIVE_FUNCTION
39
40#include <iostream>
41#include <typeinfo>
42
44#include <shark/LinAlg/Base.h>
45#include <shark/Models/FFNet.h>
46
49
50namespace shark {namespace benchmarks{
51
52
53
54/// Class for balancing one or two poles on a cart using a fitness function
55/// that decreases the longer the pole(s) balance(s).
56/// Based on code written by Verena Heidrich-Meisner for the paper
57///
58/// V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for episodic reinforcement learn-ing. Journal of Algorithms, 64(4):152–168, 2009.
59/// \ingroup benchmarks
60template<class HiddenNeuron,class OutputNeuron>
62public:
63 /// \param single_pole Indicates whether the cast has a single pole (true) or two poles (false)
64 /// \param hidden Number of hidden neurons in underlying neural network
65 /// \param shortcuts Whether to use shortcuts in neural network
66 /// \param bias Whether to use bias in neural network
67 /// \param normalize Whether to normalize input before use in neural network
68 /// \param max_pole_evaluations Balance goal of the function, i.e. number of steps that pole should be able to balance without failure
69 MarkovPole(bool single_pole, std::size_t hidden, bool shortcuts, bool bias,
70 bool normalize = true, std::size_t max_pole_evaluations = 100000)
71 : m_single(single_pole),
72 m_maxPoleEvals(max_pole_evaluations),
73 m_normalize(normalize) {
74 // number of inputs should be 4 for single pole, 6 for double.
75 std::size_t inputs = 0;
76 if (single_pole) {
77 inputs = 4;
78 }
79 else {
80 inputs = 6;
81 }
82 // set features
84
85 // set number of variables/weights.
86 // number of outputs is always 1.
87 // dimensions depend on whether we use bias and/or shortcuts
88 if (bias && shortcuts){
89 m_dimensions = hidden * (inputs + 1) + inputs + hidden + 1;
90 }
91 else if (shortcuts) {
92 m_dimensions = hidden * (inputs + 1) + inputs;
93 }
94 else if (bias) {
95 m_dimensions = hidden * (inputs + 1) + hidden + 1;
96 }
97 else {
98 m_dimensions = hidden * (inputs + 1);
99 }
100
101 // make FFNet
102 mp_net = new FFNet<HiddenNeuron, OutputNeuron>();
103 FFNetStructures::ConnectionType type = shortcuts ?
104 FFNetStructures::InputOutputShortcut : FFNetStructures::Normal;
105 mp_net->setStructure(inputs, hidden, 1, type, bias);
106
107 // check dimensions match
108 if(m_dimensions != mp_net->numberOfParameters()) {
109 std::cerr << "Markov pole FFNet: Dimensions do not match, " << m_dimensions
110 << " != " << mp_net->numberOfParameters() << std::endl;
111 exit(EXIT_FAILURE);
112 }
113
114 // set eval count
116 }
117
119 delete mp_net;
120 }
121
122
123 std::string name() {
124 return "Objective Function for Markovian pole balancing.";
125 }
126
127 /// \brief Returns degrees of freedom
128 std::size_t numberOfVariables()const{
129 return m_dimensions;
130 }
131
132 /// \brief Always proposes to start in a zero vector with appropriate degrees of freedom
134 SearchPointType startingPoint(m_dimensions);
135 for(std::size_t i = 0; i != m_dimensions; i++) {
136 startingPoint(i) = 0.0;
137 }
138 return startingPoint;
139 }
140
141 /// \brief Evaluates weight vector on fitness function
142 /// \param input Vector to be evaluated.
143 /// \return Fitness of vector
144 ResultType eval(const SearchPointType &input) const{
145 SIZE_CHECK(input.size() == m_dimensions);
146
148
149 if(m_single) {
150 return evalSingle(input);
151 }
152 else {
153 return evalDouble(input);
154 }
155 }
156
157private:
158
159 /// \brief Converts neural network output for use with pole simulator
160 /// \param output Output of the neural network.
161 /// \return double precision floating point between 0 and 1.
162 double convertToPoleMovement(double output) const{
163 if (typeid(mp_net->outputActivationFunction())
164 == typeid(LogisticNeuron)) {
165 return output;
166 }
167 else if (typeid(mp_net->outputActivationFunction())
168 == typeid(FastSigmoidNeuron)) {
169 return (output + 1.) / 2.;
170 }
171 else if (typeid(mp_net->outputActivationFunction()) == typeid(TanhNeuron)) {
172 return (output + 1.) / 2.;
173 }
174 else {
175 std::cerr << "Unsupported neuron type in Markov pole FFNet." << std::endl;
176 exit(EXIT_FAILURE);
177 }
178 }
179
180 /// \brief Fitness function for single poles. Gets lower as pole balances for longer.
181 /// \param input Vector to be evaluated.
182 /// \return Fitness of vector
183 ResultType evalSingle(const SearchPointType &input) const{
184 double init_angle = 0.07;
185 SinglePole pole(true, m_normalize);
186 RealVector state(4);
187 RealVector output(1);
188 std::size_t eval_count = 0;
189 bool failed = false;
190
191 pole.init(init_angle);
192
193 mp_net->setParameterVector(input);
194
195 while(!failed && eval_count < m_maxPoleEvals) {
196 pole.getState(state);
197 mp_net->eval(state,output);
198 pole.move(convertToPoleMovement(output(0)));
199 failed = pole.failure();
200 eval_count++;
201 }
202
203 // gets lower as number of evaluations grows. min = 0
204 return m_maxPoleEvals - eval_count;
205 }
206
207 /// \brief Fitness function for double poles. Gets lower as poles balance for longer.
208 /// \param input Vector to be evaluated.
209 /// \return Fitness of vector
210 ResultType evalDouble(const SearchPointType &input) const{
211 double init_angle = 0.07;
212 DoublePole pole(true, m_normalize);
213 RealVector state(6);
214 RealVector output(1);
215 std::size_t eval_count = 0;
216 bool failed = false;
217
218 pole.init(init_angle);
219 mp_net->setParameterVector(input);
220
221 while(!failed && eval_count < m_maxPoleEvals) {
222 pole.getState(state);
223 mp_net->eval(state,output);
224 pole.move(convertToPoleMovement(output(0)));
225 failed = pole.failure();
226 eval_count++;
227 }
228
229 // gets lower as number of evaluations grows. min = 0
230 return m_maxPoleEvals - eval_count;
231 }
232
233 /// True if this is a single pole, false if double pole.
234 bool m_single;
235 /// True if neural network input is normalized, false otherwise
236 bool m_normalize;
237 /// Degrees of freedom
238 std::size_t m_dimensions;
239 /// Balance goal
240 std::size_t m_maxPoleEvals;
241
242 /// Neural network
243 FFNet<HiddenNeuron, OutputNeuron> *mp_net;
244 HiddenNeuron m_hiddenNeuron;
245 OutputNeuron m_outputNeuron;
246
247};
248
249}}
250#endif