NonMarkovPole.h
Go to the documentation of this file.
1/*!
2 *
3 * \brief Objective function for single and double poles with partial state information (non-Markovian task)
4 *
5 *
6 * Class for balancing one or two poles on a cart using a fitness
7 * function that decreases the longer the pole(s) balance(s). Based
8 * on code written by Verena Heidrich-Meisner for the paper
9 *
10 * V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for
11 * episodic reinforcement learning. Journal of Algorithms,
12 * 64(4):152–168, 2009.
13 *
14 * \author Johan Valentin Damgaard
15 * \date -
16 *
17 *
18 * \par Copyright 1995-2017 Shark Development Team
19 *
20 * This file is part of Shark.
21 * <https://shark-ml.github.io/Shark/>
22 *
23 * Shark is free software: you can redistribute it and/or modify
24 * it under the terms of the GNU Lesser General Public License as published
25 * by the Free Software Foundation, either version 3 of the License, or
26 * (at your option) any later version.
27 *
28 * Shark is distributed in the hope that it will be useful,
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 * GNU Lesser General Public License for more details.
32 *
33 * You should have received a copy of the GNU Lesser General Public License
34 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
35 *
36 */
37#ifndef SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_NONMARKOV_OBJECTIVE_FUNCTION
38#define SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_NONMARKOV_OBJECTIVE_FUNCTION
39
40#include <iostream>
41#include <exception>
42
44#include <shark/Models/OnlineRNNet.h>
45#include <shark/LinAlg/Base.h>
46
49
50namespace shark {namespace benchmarks{
51
52/// \brief Objective function for single and double non-Markov poles
53///
54/// Class for balancing one or two poles on a cart using a fitness function
55/// that decreases the longer the pole(s) balance(s).
56/// Based on code written by Verena Heidrich-Meisner for the paper
57///
58/// V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for episodic reinforcement learn-ing. Journal of Algorithms, 64(4):152–168, 2009.
59/// \ingroup benchmarks
61
62public:
63 /// \param single Is this an instance of the single pole problem?
64 /// \param hidden Number of hidden neurons in underlying neural network
65 /// \param bias Whether to use bias in neural network
66 /// \param sigmoidType Activation sigmoid function for neural network
67 /// \param normalize Whether to normalize input before use in neural network
68 /// \param max_pole_evaluations Balance goal of the function, i.e. number of steps that pole should be able to balance without failure
69 NonMarkovPole(bool single, std::size_t hidden, bool bias,
70 RecurrentStructure::SigmoidType sigmoidType = RecurrentStructure::FastSigmoid,
71 bool normalize = true,
72 std::size_t max_pole_evaluations = 100000)
73 : m_single(single),
74 m_maxPoleEvals(max_pole_evaluations),
75 m_normalize(normalize) {
76 if (sigmoidType == RecurrentStructure::Linear) {
77 std::cerr << "Cannot use linear activation function for pole balancing."
78 << std::endl;
79 exit(EXIT_FAILURE);
80 }
81
82 // number of inputs should be 2 for single pole, 3 for double.
83 std::size_t inputs = 0;
84 if (single) {
85 inputs = 2;
86 }
87 else {
88 inputs = 3;
89 }
90 // set features
92
93 // set number of variables/weights.
94 // number of outputs is always 1.
95 // dimensions depend on whether we use bias
96 if (bias){
97 m_dimensions = (hidden + 1) * (hidden + 1) +
98 inputs * (hidden + 1) + hidden + 1;
99 }
100 else {
101 m_dimensions = (hidden + 1) * (hidden + 1) + inputs * (hidden + 1);
102 }
103
104 // make RNNet
105 mp_struct = new RecurrentStructure();
106 mp_struct->setStructure(inputs, hidden, 1, bias, sigmoidType);
107 mp_net = new PoleRNNet(mp_struct);
108
109 // check dimensions match
110 if(m_dimensions != mp_net->numberOfParameters()) {
111 std::cerr << "Non-Markov pole RNNet: Dimensions do not match, "
112 << m_dimensions << " != " << mp_net->numberOfParameters() << std::endl;
113 exit(EXIT_FAILURE);
114 }
115
116 // set eval count
118
119 }
120
122 delete mp_struct;
123 delete mp_net;
124 }
125
126 std::string name() {
127 return "Objective Function for Non-Markovian pole balancing.";
128 }
129
130 /// \brief Returns degrees of freedom
131 std::size_t numberOfVariables()const{
132 return m_dimensions;
133 }
134
135 /// \brief Always proposes to start in a zero vector with appropriate degrees of freedom
137 SearchPointType startingPoint(m_dimensions);
138 for(std::size_t i = 0; i != m_dimensions; i++) {
139 startingPoint(i) = 0.0;
140 }
141 return startingPoint;
142 }
143
144 /// \brief Evaluates weight vector on fitness function
145 /// \param input Vector to be evaluated.
146 /// \return Fitness of vector
147 ResultType eval(const SearchPointType &input) const{
148 SIZE_CHECK(input.size() == m_dimensions);
149
151
152 if(m_single) {
153 return evalSingle(input);
154 }
155 else {
156 return evalDouble(input);
157 }
158 }
159
160private:
161
162 // private class for recurrent neural network. not be used outside main class.
163 class PoleRNNet : public OnlineRNNet {
164 public:
165 PoleRNNet(RecurrentStructure* structure) : OnlineRNNet(structure){}
166 boost::shared_ptr<State> createState()const{
167 throw std::logic_error("State not available for PoleRNNet.");
168 }
169 void eval(BatchInputType const & patterns, BatchOutputType &outputs,
170 State& state) const{
171 throw std::logic_error("Batch not available for PoleRNNet.");
172 }
173 };
174
175 /// \brief Converts neural network output for use with pole simulator
176 /// \param output Output of the neural network.
177 /// \return double precision floating point between 0 and 1.
178 double convertToPoleMovement(double output) const{
179 switch(mp_struct->sigmoidType())
180 {
181 case RecurrentStructure::Logistic:
182 return output;
183 case RecurrentStructure::FastSigmoid:
184 return (output + 1.) / 2.;
185 case RecurrentStructure::Tanh:
186 return (output + 1.) / 2.;
187 default:
188 std::cerr << "Unsupported activation function for pole balancing." << std::endl;
189 exit(EXIT_FAILURE);
190 }
191
192 }
193
194 /// \brief Fitness function for single poles. Gets lower as pole balances for longer.
195 /// \param input Vector to be evaluated.
196 /// \return Fitness of vector
197 ResultType evalSingle(const SearchPointType &input) const{
198 double init_angle = 0.07;
199 SinglePole pole(false, m_normalize);
200 RealVector state(2);
201 RealMatrix output(1,1);
202 RealMatrix inState(1,2);
203 std::size_t eval_count = 0;
204 bool failed = false;
205
206 pole.init(init_angle);
207 mp_net->resetInternalState();
208 mp_net->setParameterVector(input);
209
210 while(!failed && eval_count < m_maxPoleEvals) {
211 pole.getState(state);
212 row(inState,0) = state;
213 mp_net->eval(inState,output);
214 pole.move(convertToPoleMovement(output(0,0)));
215 failed = pole.failure();
216 eval_count++;
217 }
218
219 // gets lower as number of evaluations grows. min = 0
220 return m_maxPoleEvals - eval_count;
221 }
222
223 /// \brief Fitness function for double poles. Gets lower as poles balance for longer.
224 /// \param input Vector to be evaluated.
225 /// \return Fitness of vector
226 ResultType evalDouble(const SearchPointType &input) const{
227 double init_angle = 0.07;
228 DoublePole pole(false, m_normalize);
229 RealVector state(3);
230 RealMatrix output(1,1);
231 RealMatrix inState(1,3);
232 std::size_t eval_count = 0;
233 bool failed = false;
234
235 pole.init(init_angle);
236 mp_net->resetInternalState();
237 mp_net->setParameterVector(input);
238
239 while(!failed && eval_count < m_maxPoleEvals) {
240 pole.getState(state);
241 row(inState,0) = state;
242 mp_net->eval(inState,output);
243 pole.move(convertToPoleMovement(output(0,0)));
244 failed = pole.failure();
245 eval_count++;
246 }
247 // gets lower as number of evaluations grows. min = 0
248 return m_maxPoleEvals - eval_count;
249 }
250
251 /// True if this is a single pole, false if double pole.
252 bool m_single;
253 /// True if neural network input is normalized, false otherwise
254 bool m_normalize;
255 /// Degrees of freedom
256 std::size_t m_dimensions;
257 /// Balance goal
258 std::size_t m_maxPoleEvals;
259
260 /// Neural network
261 RecurrentStructure *mp_struct;
262 OnlineRNNet *mp_net;
263
264};
265
266}}
267#endif