70 RecurrentStructure::SigmoidType sigmoidType = RecurrentStructure::FastSigmoid,
71 bool normalize =
true,
72 std::size_t max_pole_evaluations = 100000)
74 m_maxPoleEvals(max_pole_evaluations),
75 m_normalize(normalize) {
76 if (sigmoidType == RecurrentStructure::Linear) {
77 std::cerr <<
"Cannot use linear activation function for pole balancing."
83 std::size_t inputs = 0;
97 m_dimensions = (hidden + 1) * (hidden + 1) +
98 inputs * (hidden + 1) + hidden + 1;
101 m_dimensions = (hidden + 1) * (hidden + 1) + inputs * (hidden + 1);
105 mp_struct =
new RecurrentStructure();
106 mp_struct->setStructure(inputs, hidden, 1, bias, sigmoidType);
107 mp_net =
new PoleRNNet(mp_struct);
110 if(m_dimensions != mp_net->numberOfParameters()) {
111 std::cerr <<
"Non-Markov pole RNNet: Dimensions do not match, "
112 << m_dimensions <<
" != " << mp_net->numberOfParameters() << std::endl;
127 return "Objective Function for Non-Markovian pole balancing.";
138 for(std::size_t i = 0; i != m_dimensions; i++) {
139 startingPoint(i) = 0.0;
141 return startingPoint;
153 return evalSingle(input);
156 return evalDouble(input);
163 class PoleRNNet :
public OnlineRNNet {
165 PoleRNNet(RecurrentStructure* structure) : OnlineRNNet(structure){}
166 boost::shared_ptr<State> createState()
const{
167 throw std::logic_error(
"State not available for PoleRNNet.");
169 void eval(BatchInputType
const & patterns, BatchOutputType &outputs,
171 throw std::logic_error(
"Batch not available for PoleRNNet.");
178 double convertToPoleMovement(
double output)
const{
179 switch(mp_struct->sigmoidType())
181 case RecurrentStructure::Logistic:
183 case RecurrentStructure::FastSigmoid:
184 return (output + 1.) / 2.;
185 case RecurrentStructure::Tanh:
186 return (output + 1.) / 2.;
188 std::cerr <<
"Unsupported activation function for pole balancing." << std::endl;
198 double init_angle = 0.07;
199 SinglePole pole(
false, m_normalize);
201 RealMatrix output(1,1);
202 RealMatrix inState(1,2);
203 std::size_t eval_count = 0;
206 pole.init(init_angle);
207 mp_net->resetInternalState();
208 mp_net->setParameterVector(input);
210 while(!failed && eval_count < m_maxPoleEvals) {
211 pole.getState(state);
212 row(inState,0) = state;
213 mp_net->eval(inState,output);
214 pole.move(convertToPoleMovement(output(0,0)));
215 failed = pole.failure();
220 return m_maxPoleEvals - eval_count;
227 double init_angle = 0.07;
228 DoublePole pole(
false, m_normalize);
230 RealMatrix output(1,1);
231 RealMatrix inState(1,3);
232 std::size_t eval_count = 0;
235 pole.init(init_angle);
236 mp_net->resetInternalState();
237 mp_net->setParameterVector(input);
239 while(!failed && eval_count < m_maxPoleEvals) {
240 pole.getState(state);
241 row(inState,0) = state;
242 mp_net->eval(inState,output);
243 pole.move(convertToPoleMovement(output(0,0)));
244 failed = pole.failure();
248 return m_maxPoleEvals - eval_count;
256 std::size_t m_dimensions;
258 std::size_t m_maxPoleEvals;
261 RecurrentStructure *mp_struct;