DoublePole.h
Go to the documentation of this file.
1/*!
2 *
3 * \brief Pole balancing simulation for double poles
4 *
5 * Class for simulating two poles balancing on a cart.
6 * Based on code written by Verena Heidrich-Meisner for the paper
7 *
8 * V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for episodic reinforcement learning. Journal of Algorithms, 64(4):152–168, 2009.
9 *
10 * which was in turn based on code available at http://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c
11 * as of 2015/4/19, written by Rich Sutton and Chuck Anderson and later modified.
12 * Faustino Gomez wrote the physics code using the differential equations from
13 * Alexis Weiland's paper and added the Runge-Kutta solver.
14 *
15 * \author Johan Valentin Damgaard
16 * \date -
17 *
18 *
19 * \par Copyright 1995-2017 Shark Development Team
20 *
21 * This file is part of Shark.
22 * <https://shark-ml.github.io/Shark/>
23 *
24 * Shark is free software: you can redistribute it and/or modify
25 * it under the terms of the GNU Lesser General Public License as published
26 * by the Free Software Foundation, either version 3 of the License, or
27 * (at your option) any later version.
28 *
29 * Shark is distributed in the hope that it will be useful,
30 * but WITHOUT ANY WARRANTY; without even the implied warranty of
31 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 * GNU Lesser General Public License for more details.
33 *
34 * You should have received a copy of the GNU Lesser General Public License
35 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
36 *
37 */
38#ifndef SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_TEXAS
39#define SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_TEXAS
40
41#include <math.h>
42#include <shark/LinAlg/Base.h>
43
44
45
46namespace shark {
47
49public:
50
51 //! Number of variables
52 unsigned noVars() const {
53 if(m_isMarkovian)
54 return 6;
55 return 3;
56 }
57
58 //! \param markovian Whether to return velocities in getState
59 //! \param normalize Whether to normalize return values in getState
60 DoublePole(bool markovian, bool normalize = true) {
61 m_isMarkovian = markovian;
62
63 // current normalization constants are the same as used
64 // in the Heidrich-Meisner code
65 // perhaps tweak them?
66 if(normalize) {
67 this->m_normal_cart = 4.8;
68 this->m_normal_pole = 0.52;
69 this->m_normal_velo = 2;
70 }
71 else {
72 this->m_normal_cart = 1.;
73 this->m_normal_pole = 1.;
74 this->m_normal_velo = 1.;
75 }
76
77 }
78
79
80 //! \brief Place m_state in a vector
81 //! \param v vector to place m_state in (assumed to be correct size already)
82 void getState(RealVector &v) {
83 // normalize the m_state variables.
84 if(m_isMarkovian) {
85 v(0) = m_state[0] / m_normal_cart;
86 v(1) = m_state[1] / m_normal_velo;
87 v(2) = m_state[2] / m_normal_pole;
88 v(3) = m_state[3] / m_normal_velo;
89 v(4) = m_state[4] / m_normal_pole;
90 v(5) = m_state[5] / m_normal_velo;
91 }
92 else {
93 v(0) = m_state[0] / m_normal_cart;
94 v(1) = m_state[2] / m_normal_pole;
95 v(2) = m_state[4] / m_normal_pole;
96 }
97 }
98
99 //! \brief Initialize with specific angle for large pole
100 //! \param state2init initial pole angle (in radians)
101 void init(double state2init = 0.07) {
102 m_state[0] = m_state[1] = m_state[3] = m_state[4] = m_state[5] = 0;
103 m_state[2] = state2init;
104 }
105
106 //! \brief Initialize with specific angle for large pole
107 //! \param state2init initial pole angle (in degrees)
108 void initDegree(double state2init) {
109 init(state2init * M_PI / 180);
110 }
111
112 //! \brief Initialize full m_state
113 //! \param a initial cart position
114 //! \param b initial cart velocity
115 //! \param c initial large pole angle (in radians)
116 //! \param d initial large pole angular velocity
117 //! \param e initial small pole angle (in radians)
118 //! \param f initial small pole angular velocity
119 void init(double a, double b, double c, double d, double e = 0., double f = 0.) {
120 m_state[0] = a;
121 m_state[1] = b;
122 m_state[2] = c;
123 m_state[3] = d;
124 m_state[4] = e;
125 m_state[5] = f;
126 }
127
128 //! Returns true when this pole is in an illegal position
129 bool failure()
130 {
131 const double thirty_six_degrees= 0.628329;
132 const double failureAngle = thirty_six_degrees;
133
134 return(m_state[0] < -2.4 ||
135 m_state[0] > 2.4 ||
136 m_state[2] < -failureAngle ||
137 m_state[2] > failureAngle ||
138 m_state[4] < -failureAngle ||
139 m_state[4] > failureAngle);
140 }
141
142 //! Return "jiggle", abstract representation of how much the the cart oscillates
143 double getJiggle() {
144 return (std::abs(m_state[0]) + std::abs(m_state[1]) + std::abs(m_state[2]) + std::abs(m_state[3]));
145 }
146
147 //! \brief Move the pole with some force
148 //! \param output Force to apply. Expects values in [0,1], where values below 0.5 indicate applying force towards the left side and values above indicate force towards the right.
149 void move(double output)
150 {
151 double dydx[6];
152 const double TAU= 0.01;
153
154 const bool useRK4=true;
155 if(useRK4) {
156 for(int i=0; i<2; ++i) {
157 dydx[0] = m_state[1];
158 dydx[2] = m_state[3];
159 dydx[4] = m_state[5];
160 step(output,m_state,dydx);
161 rk4(output,m_state,dydx,m_state);
162 }
163 }
164 else {
165 const double EULER_STEPS = 4.;
166 const double EULER_TAU= TAU/EULER_STEPS;
167 for(int i=0; i<2 * EULER_STEPS; ++i) {
168 step(output,m_state,dydx);
169 m_state[0] += EULER_TAU * m_state[1];
170 m_state[1] += EULER_TAU * dydx[1];
171 m_state[2] += EULER_TAU * m_state[3];
172 m_state[3] += EULER_TAU * dydx[3];
173 m_state[4] += EULER_TAU * m_state[5];
174 m_state[5] += EULER_TAU * dydx[5];
175 }
176 }
177 }
178
179private:
180 void step(double action, double *st, double *derivs)
181 {
182 const double MUP = 0.000002;
183 const double MUC = 0.0005;
184 const double GRAVITY= -9.8;
185 const double MASSCART= 1.0;
186 const double LENGTH_1 = 0.5;
187 const double MASSPOLE_1 = 0.1;
188 const double LENGTH_2 = 0.1 * LENGTH_1;
189 const double MASSPOLE_2 = 0.1 * MASSPOLE_1;
190 const double FORCE_MAG= 10.0;
191
192 double signum = 0.;
193
194 if (st[1] < 0) {
195 signum = -1;
196 }
197 if (st[1] > 0) {
198 signum = 1;
199 }
200
201 double force = ((action - 0.5) * FORCE_MAG * 2) - (MUC*signum);
202 double costheta_1 = cos(st[2]);
203 double sintheta_1 = sin(st[2]);
204 double gsintheta_1 = GRAVITY * sintheta_1;
205 double costheta_2 = cos(st[4]);
206 double sintheta_2 = sin(st[4]);
207 double gsintheta_2 = GRAVITY * sintheta_2;
208
209 double ml_1 = LENGTH_1 * MASSPOLE_1;
210 double ml_2 = LENGTH_2 * MASSPOLE_2;
211 double temp_1 = MUP * st[3] / ml_1;
212 double temp_2 = MUP * st[5] / ml_2;
213 double fi_1 = (ml_1 * st[3] * st[3] * sintheta_1) +
214 (0.75 * MASSPOLE_1 * costheta_1 * (temp_1 + gsintheta_1));
215 double fi_2 = (ml_2 * st[5] * st[5] * sintheta_2) +
216 (0.75 * MASSPOLE_2 * costheta_2 * (temp_2 + gsintheta_2));
217 double mi_1 = MASSPOLE_1 * (1 - (0.75 * costheta_1 * costheta_1));
218 double mi_2 = MASSPOLE_2 * (1 - (0.75 * costheta_2 * costheta_2));
219
220 derivs[1] = (force + fi_1 + fi_2)
221 / (mi_1 + mi_2 + MASSCART);
222
223 derivs[3] = -0.75 * (derivs[1] * costheta_1 + gsintheta_1 + temp_1)
224 / LENGTH_1;
225 derivs[5] = -0.75 * (derivs[1] * costheta_2 + gsintheta_2 + temp_2)
226 / LENGTH_2;
227
228 }
229
230 void rk4(double f, double y[], double dydx[], double yout[])
231 {
232 const double TAU= 0.01;
233 double dym[6],dyt[6],yt[6];
234
235 double hh=TAU*0.5;
236 double h6=TAU/6.0;
237 for (int i=0; i<=5; i++) yt[i]=y[i]+hh*dydx[i];
238 step(f,yt,dyt);
239 dyt[0] = yt[1];
240 dyt[2] = yt[3];
241 dyt[4] = yt[5];
242 for (int i=0; i<=5; i++) yt[i]=y[i]+hh*dyt[i];
243 step(f,yt,dym);
244 dym[0] = yt[1];
245 dym[2] = yt[3];
246 dym[4] = yt[5];
247 for (int i=0; i<=5; i++) {
248 yt[i]=y[i]+TAU*dym[i];
249 dym[i] += dyt[i];
250 }
251 step(f,yt,dyt);
252 dyt[0] = yt[1];
253 dyt[2] = yt[3];
254 dyt[4] = yt[5];
255 for (int i=0; i<=5; i++)
256 yout[i]=y[i]+h6*(dydx[i]+dyt[i]+2.0*dym[i]);
257 }
258
259 bool m_isMarkovian;
260
261 double m_state[6];
262
263 double m_normal_cart;
264 double m_normal_pole;
265 double m_normal_velo;
266
267};
268}
269#endif