61 m_isMarkovian = markovian;
67 this->m_normal_cart = 4.8;
68 this->m_normal_pole = 0.52;
69 this->m_normal_velo = 2;
72 this->m_normal_cart = 1.;
73 this->m_normal_pole = 1.;
74 this->m_normal_velo = 1.;
85 v(0) = m_state[0] / m_normal_cart;
86 v(1) = m_state[1] / m_normal_velo;
87 v(2) = m_state[2] / m_normal_pole;
88 v(3) = m_state[3] / m_normal_velo;
89 v(4) = m_state[4] / m_normal_pole;
90 v(5) = m_state[5] / m_normal_velo;
93 v(0) = m_state[0] / m_normal_cart;
94 v(1) = m_state[2] / m_normal_pole;
95 v(2) = m_state[4] / m_normal_pole;
101 void init(
double state2init = 0.07) {
102 m_state[0] = m_state[1] = m_state[3] = m_state[4] = m_state[5] = 0;
103 m_state[2] = state2init;
109 init(state2init * M_PI / 180);
119 void init(
double a,
double b,
double c,
double d,
double e = 0.,
double f = 0.) {
131 const double thirty_six_degrees= 0.628329;
132 const double failureAngle = thirty_six_degrees;
134 return(m_state[0] < -2.4 ||
136 m_state[2] < -failureAngle ||
137 m_state[2] > failureAngle ||
138 m_state[4] < -failureAngle ||
139 m_state[4] > failureAngle);
144 return (std::abs(m_state[0]) + std::abs(m_state[1]) + std::abs(m_state[2]) + std::abs(m_state[3]));
152 const double TAU= 0.01;
154 const bool useRK4=
true;
156 for(
int i=0; i<2; ++i) {
157 dydx[0] = m_state[1];
158 dydx[2] = m_state[3];
159 dydx[4] = m_state[5];
160 step(output,m_state,dydx);
161 rk4(output,m_state,dydx,m_state);
165 const double EULER_STEPS = 4.;
166 const double EULER_TAU= TAU/EULER_STEPS;
167 for(
int i=0; i<2 * EULER_STEPS; ++i) {
168 step(output,m_state,dydx);
169 m_state[0] += EULER_TAU * m_state[1];
170 m_state[1] += EULER_TAU * dydx[1];
171 m_state[2] += EULER_TAU * m_state[3];
172 m_state[3] += EULER_TAU * dydx[3];
173 m_state[4] += EULER_TAU * m_state[5];
174 m_state[5] += EULER_TAU * dydx[5];
180 void step(
double action,
double *st,
double *derivs)
182 const double MUP = 0.000002;
183 const double MUC = 0.0005;
184 const double GRAVITY= -9.8;
185 const double MASSCART= 1.0;
186 const double LENGTH_1 = 0.5;
187 const double MASSPOLE_1 = 0.1;
188 const double LENGTH_2 = 0.1 * LENGTH_1;
189 const double MASSPOLE_2 = 0.1 * MASSPOLE_1;
190 const double FORCE_MAG= 10.0;
201 double force = ((action - 0.5) * FORCE_MAG * 2) - (MUC*signum);
202 double costheta_1 = cos(st[2]);
203 double sintheta_1 = sin(st[2]);
204 double gsintheta_1 = GRAVITY * sintheta_1;
205 double costheta_2 = cos(st[4]);
206 double sintheta_2 = sin(st[4]);
207 double gsintheta_2 = GRAVITY * sintheta_2;
209 double ml_1 = LENGTH_1 * MASSPOLE_1;
210 double ml_2 = LENGTH_2 * MASSPOLE_2;
211 double temp_1 = MUP * st[3] / ml_1;
212 double temp_2 = MUP * st[5] / ml_2;
213 double fi_1 = (ml_1 * st[3] * st[3] * sintheta_1) +
214 (0.75 * MASSPOLE_1 * costheta_1 * (temp_1 + gsintheta_1));
215 double fi_2 = (ml_2 * st[5] * st[5] * sintheta_2) +
216 (0.75 * MASSPOLE_2 * costheta_2 * (temp_2 + gsintheta_2));
217 double mi_1 = MASSPOLE_1 * (1 - (0.75 * costheta_1 * costheta_1));
218 double mi_2 = MASSPOLE_2 * (1 - (0.75 * costheta_2 * costheta_2));
220 derivs[1] = (force + fi_1 + fi_2)
221 / (mi_1 + mi_2 + MASSCART);
223 derivs[3] = -0.75 * (derivs[1] * costheta_1 + gsintheta_1 + temp_1)
225 derivs[5] = -0.75 * (derivs[1] * costheta_2 + gsintheta_2 + temp_2)
230 void rk4(
double f,
double y[],
double dydx[],
double yout[])
232 const double TAU= 0.01;
233 double dym[6],dyt[6],yt[6];
237 for (
int i=0; i<=5; i++) yt[i]=y[i]+hh*dydx[i];
242 for (
int i=0; i<=5; i++) yt[i]=y[i]+hh*dyt[i];
247 for (
int i=0; i<=5; i++) {
248 yt[i]=y[i]+TAU*dym[i];
255 for (
int i=0; i<=5; i++)
256 yout[i]=y[i]+h6*(dydx[i]+dyt[i]+2.0*dym[i]);
263 double m_normal_cart;
264 double m_normal_pole;
265 double m_normal_velo;