SinglePole.h
Go to the documentation of this file.
1/*!
2 *
3 * \brief Pole balancing simulation for double pole
4 *
5 * Class for simulating a single pole balancing on a cart.
6 * Based on code written by Verena Heidrich-Meisner for the paper
7 *
8 * V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for
9 * episodic reinforcement learning. Journal of Algorithms,
10 * 64(4):152–168, 2009.
11 *
12 * which was in turn based on code available at http://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c
13 * as of 2015/4/19, written by Rich Sutton and Chuck Anderson and later modified.
14 * Faustino Gomez wrote the physics code using the differential equations from
15 * Alexis Weiland's paper and added the Runge-Kutta solver.
16 *
17 * \author Johan Valentin Damgaard
18 * \date -
19 *
20 *
21 * \par Copyright 1995-2017 Shark Development Team
22 *
23 * This file is part of Shark.
24 * <https://shark-ml.github.io/Shark/>
25 *
26 * Shark is free software: you can redistribute it and/or modify
27 * it under the terms of the GNU Lesser General Public License as published
28 * by the Free Software Foundation, either version 3 of the License, or
29 * (at your option) any later version.
30 *
31 * Shark is distributed in the hope that it will be useful,
32 * but WITHOUT ANY WARRANTY; without even the implied warranty of
33 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 * GNU Lesser General Public License for more details.
35 *
36 * You should have received a copy of the GNU Lesser General Public License
37 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
38 *
39 */
40
41#ifndef SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_SUTTON_ANDERSON
42#define SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_SUTTON_ANDERSON
43
44#include <math.h>
45#include <shark/LinAlg/Base.h>
46
47
48namespace shark {
49
51public:
52
53 //! Convert degrees to radians
54 static double degrad(double x) {
55 return x * M_PI / 180;
56 };
57
58 //! Number of variables
59 unsigned noVars() const {
60 if(m_isMarkov)
61 return 4;
62 return 2;
63 }
64
65 //! \param markovian Whether to return velocities in getState
66 //! \param normalize Whether to normalize return values in getState
67 SinglePole(bool markovian, bool normalize = true) {
68 this->m_isMarkov = markovian;
69
70 // current normalization constants are the same as used for
71 // the double pole simulation in Heidrich-Meisner code
72 // perhaps tweak them for use with single pole?
73 if(normalize) {
74 this->m_normal_cart = 4.8;
75 this->m_normal_pole = 0.52;
76 this->m_normal_velo = 2;
77 }
78 else {
79 this->m_normal_cart = 1.;
80 this->m_normal_pole = 1.;
81 this->m_normal_velo = 1.;
82 }
83 }
84
85 //! \brief Initialize with specific pole angle
86 //! \param state2init initial pole angle (in degrees)
87 void initDegree(double state2init) {
88 init(state2init * M_PI / 180);
89 }
90
91
92 //! \brief Initialize with specific pole angle
93 //! \param state2init initial pole angle (in radians)
94 void init(double state2init = 0.07) {
95 m_state[0] = m_state[1] = m_state[3] = 0.0;
96 m_state[2] = state2init;
97 }
98
99 //! \brief Initialize full m_state
100 //! \param a initial cart position
101 //! \param b initial cart velocity
102 //! \param c initial pole angle (in radians)
103 //! \param d initial pole angular velocity
104 void init(double a, double b, double c, double d) {
105 m_state[0] = a;
106 m_state[1] = b;
107 m_state[2] = c;
108 m_state[3] = d;
109 }
110
111 //! \brief Place m_state in a vector
112 //! \param v vector to place m_state in (assumed to be correct size already)
113 void getState(RealVector &v) {
114 // normalize the m_state variables
115 if(m_isMarkov) {
116 v(0) = m_state[0] / m_normal_cart;
117 v(1) = m_state[1] / m_normal_velo;
118 v(2) = m_state[2] / m_normal_pole;
119 v(3) = m_state[3] / m_normal_velo;
120 }
121 else {
122 v(0) = m_state[0] / m_normal_cart;
123 v(1) = m_state[2] / m_normal_pole;
124 }
125 }
126
127 //! Returns true when this pole is in an illegal position
128 bool failure() {
129 const double twelve_degrees = degrad(12);
130 if (m_state[0] < -2.4 || m_state[0] > 2.4 || m_state[2] < -twelve_degrees ||
131 m_state[2] > twelve_degrees) return true;
132 return false;
133 }
134
135 //! \brief Move the pole with some force
136 //! \param output Force to apply. Expects values in [0,1], where values below 0.5 indicate applying force towards the left side and values above indicate force towards the right.
137 void move(double output) {
138 double dydx[4];
139
140 dydx[1] = 0.;
141 dydx[3] = 0.;
142
143 for(int i = 0; i < 2; i++) {
144 dydx[0] = m_state[1];
145 dydx[2] = m_state[3];
146 step(output,m_state,dydx);
147 rk4(output,m_state,dydx,m_state);
148 }
149 }
150
151private:
152 void step(double output, double *st, double *derivs) {
153
154 const double MUP = 0.0;
155 const double GRAVITY = -9.8;
156 const double MASSCART = 1.0;
157 const double MASSPOLE = 0.1;
158 const double LENGTH = 0.5;
159 const double FORCE_MAG = 10.0;
160
161 double force = (output -0.5) * FORCE_MAG * 2;
162 double costheta = cos(st[2]);
163 double sintheta = sin(st[2]);
164 double gsintheta = GRAVITY * sintheta;
165
166 double ml = LENGTH * MASSPOLE;
167 double temp = MUP * st[3] / ml;
168 double fi = (ml * st[3] * st[3] * sintheta) + (0.75 * MASSPOLE * costheta * (temp + gsintheta));
169 double mi = MASSPOLE * (1 - (0.75 * costheta * costheta));
170
171 derivs[1] = (force + fi) / (mi + MASSCART);
172
173 derivs[3] = -0.75 * (derivs[1] * costheta + gsintheta + temp) / LENGTH;
174 }
175
176 void rk4(double f,double y[], double dydx[], double yout[]) {
177 const double TAU = 0.01;
178
179 double dym[4],dyt[4],yt[4];
180
181 double hh = TAU*0.5;
182 double h6 = TAU/6.0;
183 for (int i = 0; i <= 3; i++) {
184 yt[i] = y[i]+hh*dydx[i];
185 }
186 step(f,yt,dyt);
187 dyt[0] = yt[1];
188 dyt[2] = yt[3];
189 for (int i = 0; i <= 3; i++) {
190 yt[i] = y[i]+hh*dyt[i];
191 }
192 step(f,yt,dym);
193 dym[0] = yt[1];
194 dym[2] = yt[3];
195 for (int i = 0; i <= 3; i++) {
196 yt[i] = y[i] + TAU * dym[i];
197 dym[i] += dyt[i];
198 }
199 step(f,yt,dyt);
200 dyt[0] = yt[1];
201 dyt[2] = yt[3];
202 for (int i = 0; i <= 3; i++) {
203 yout[i] = y[i]+h6*(dydx[i]+dyt[i]+2.0*dym[i]);
204 }
205 }
206
207 double m_state[4];
208 bool m_isMarkov;
209 double m_normal_cart;
210 double m_normal_pole;
211 double m_normal_velo;
212
213};
214
215}
216#endif