61        m_isMarkovian = markovian;
 
   67            this->m_normal_cart = 4.8;
 
   68            this->m_normal_pole = 0.52;
 
   69            this->m_normal_velo = 2;
 
   72            this->m_normal_cart = 1.;
 
   73            this->m_normal_pole = 1.;
 
   74            this->m_normal_velo = 1.;
 
 
   85            v(0) = m_state[0] / m_normal_cart;
 
   86            v(1) = m_state[1] / m_normal_velo;
 
   87            v(2) = m_state[2] / m_normal_pole;
 
   88            v(3) = m_state[3] / m_normal_velo;
 
   89            v(4) = m_state[4] / m_normal_pole;
 
   90            v(5) = m_state[5] / m_normal_velo;
 
   93            v(0) = m_state[0] / m_normal_cart;
 
   94            v(1) = m_state[2] / m_normal_pole;
 
   95            v(2) = m_state[4] / m_normal_pole;
 
 
  101    void init(
double state2init = 0.07)  {
 
  102        m_state[0] = m_state[1] = m_state[3] = m_state[4] = m_state[5] = 0;
 
  103        m_state[2] = state2init;
 
 
  109        init(state2init * M_PI / 180);
 
 
  119    void init(
double a, 
double b, 
double c, 
double d, 
double e = 0., 
double f = 0.)  {
 
 
  131        const double thirty_six_degrees= 0.628329;
 
  132        const double failureAngle = thirty_six_degrees;
 
  134        return(m_state[0] < -2.4              ||
 
  136               m_state[2] < -failureAngle     ||
 
  137               m_state[2] > failureAngle      ||
 
  138               m_state[4] < -failureAngle     ||
 
  139               m_state[4] > failureAngle);
 
 
  144        return (std::abs(m_state[0]) + std::abs(m_state[1]) + std::abs(m_state[2]) + std::abs(m_state[3]));
 
 
  152        const double TAU= 0.01;
 
  154        const bool useRK4=
true;
 
  156            for(
int i=0; i<2; ++i) {
 
  157                dydx[0] = m_state[1];
 
  158                dydx[2] = m_state[3];
 
  159                dydx[4] = m_state[5];
 
  160                step(output,m_state,dydx);
 
  161                rk4(output,m_state,dydx,m_state);
 
  165            const double EULER_STEPS = 4.;
 
  166            const double EULER_TAU= TAU/EULER_STEPS;
 
  167            for(
int i=0; i<2 * EULER_STEPS; ++i) {
 
  168                step(output,m_state,dydx);
 
  169                m_state[0] += EULER_TAU * m_state[1];
 
  170                m_state[1] += EULER_TAU * dydx[1];
 
  171                m_state[2] += EULER_TAU * m_state[3];
 
  172                m_state[3] += EULER_TAU * dydx[3];
 
  173                m_state[4] += EULER_TAU * m_state[5];
 
  174                m_state[5] += EULER_TAU * dydx[5];
 
 
  180    void step(
double action, 
double *st, 
double *derivs)
 
  182        const double MUP = 0.000002;
 
  183        const double MUC = 0.0005;
 
  184        const double GRAVITY= -9.8;
 
  185        const double MASSCART= 1.0;
 
  186        const double LENGTH_1   = 0.5;
 
  187        const double MASSPOLE_1 = 0.1;
 
  188        const double LENGTH_2   = 0.1 * LENGTH_1;
 
  189        const double MASSPOLE_2 = 0.1 * MASSPOLE_1;
 
  190        const double FORCE_MAG= 10.0;
 
  201        double force =  ((action - 0.5) * FORCE_MAG * 2) - (MUC*signum);
 
  202        double costheta_1 = cos(st[2]);
 
  203        double sintheta_1 = sin(st[2]);
 
  204        double gsintheta_1 = GRAVITY * sintheta_1;
 
  205        double costheta_2 = cos(st[4]);
 
  206        double sintheta_2 = sin(st[4]);
 
  207        double gsintheta_2 = GRAVITY * sintheta_2;
 
  209        double ml_1 = LENGTH_1 * MASSPOLE_1;
 
  210        double ml_2 = LENGTH_2 * MASSPOLE_2;
 
  211        double temp_1 = MUP * st[3] / ml_1;
 
  212        double temp_2 = MUP * st[5] / ml_2;
 
  213        double fi_1 = (ml_1 * st[3] * st[3] * sintheta_1) +
 
  214               (0.75 * MASSPOLE_1 * costheta_1 * (temp_1 + gsintheta_1));
 
  215        double fi_2 = (ml_2 * st[5] * st[5] * sintheta_2) +
 
  216               (0.75 * MASSPOLE_2 * costheta_2 * (temp_2 + gsintheta_2));
 
  217        double mi_1 = MASSPOLE_1 * (1 - (0.75 * costheta_1 * costheta_1));
 
  218        double mi_2 = MASSPOLE_2 * (1 - (0.75 * costheta_2 * costheta_2));
 
  220        derivs[1] = (force + fi_1 + fi_2)
 
  221                    / (mi_1 + mi_2 + MASSCART);
 
  223        derivs[3] = -0.75 * (derivs[1] * costheta_1 + gsintheta_1 + temp_1)
 
  225        derivs[5] = -0.75 * (derivs[1] * costheta_2 + gsintheta_2 + temp_2)
 
  230    void rk4(
double f, 
double y[], 
double dydx[], 
double yout[])
 
  232        const double TAU= 0.01;
 
  233        double dym[6],dyt[6],yt[6];
 
  237        for (
int i=0; i<=5; i++) yt[i]=y[i]+hh*dydx[i];
 
  242        for (
int i=0; i<=5; i++) yt[i]=y[i]+hh*dyt[i];
 
  247        for (
int i=0; i<=5; i++) {
 
  248            yt[i]=y[i]+TAU*dym[i];
 
  255        for (
int i=0; i<=5; i++)
 
  256            yout[i]=y[i]+h6*(dydx[i]+dyt[i]+2.0*dym[i]);
 
  263    double m_normal_cart;
 
  264    double m_normal_pole;
 
  265    double m_normal_velo;