├── .gitignore
├── Acla.cpp
├── Acla.h
├── Action.h
├── Algorithm.cpp
├── Algorithm.h
├── Arm.cpp
├── Arm.h
├── ArmCfgCacla
├── ArmCfgCaclaSimple
├── ArmInterface.cpp
├── ArmInterface.h
├── CMakeLists.txt
├── Cacla.cpp
├── Cacla.h
├── CartPole.cpp
├── CartPole.h
├── CartPoleCfg
├── CartPoleCfgCacla
├── CartPoleCfgCaclaSimple
├── CartPoleInterface.cpp
├── CartPoleInterface.h
├── Experiment.cpp
├── Experiment.h
├── List.h
├── Makefile
├── Matrix.cpp
├── Matrix.h
├── QVlearning.cpp
├── QVlearning.h
├── Qlearning.cpp
├── Qlearning.h
├── README.md
├── README.org
├── Sarsa.cpp
├── Sarsa.h
├── SmallInterface.cpp
├── SmallInterface.h
├── SmallMaze.cpp
├── SmallMaze.h
├── SmallMazeCfg
├── State.h
├── StateActionAlgorithm.cpp
├── StateActionAlgorithm.h
├── StateActionUtils.cpp
├── StateActionUtils.h
├── World.h
├── cFunction.h
├── cLinear.cpp
├── cLinear.h
├── cNeuralNetwork.cpp
├── cNeuralNetwork.h
├── cTanH.cpp
├── cTanH.h
├── cThreshold.cpp
└── cThreshold.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | 
 3 | # Object files
 4 | *.o
 5 | *.ko
 6 | *.obj
 7 | *.elf
 8 | 
 9 | # Precompiled Headers
10 | *.gch
11 | *.pch
12 | 
13 | # Libraries
14 | *.lib
15 | *.a
16 | *.la
17 | *.lo
18 | 
19 | # Shared objects (inc. Windows DLLs)
20 | *.dll
21 | *.so
22 | *.so.*
23 | *.dylib
24 | 
25 | # Executables
26 | *.exe
27 | *.out
28 | *.app
29 | *.i*86
30 | *.x86_64
31 | *.hex
32 | 
33 | # Debug files
34 | *.dSYM/
35 | 


--------------------------------------------------------------------------------
/Acla.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef ACLA
  2 | #define ACLA
  3 | 
  4 | # include "Acla.h"
  5 | 
  6 | 
  7 | Acla::Acla( const char * parameterFile, World * w ) {
  8 | 
  9 |     discreteStates      = w->getDiscreteStates() ;
 10 |     stateDimension      = w->getStateDimension() ;
 11 | 
 12 |     if ( not w->getDiscreteActions() ) {
 13 | 
 14 |         cout << "Acla does not support continuous actions." << endl ;
 15 |         cout << "Please check which MDP you are using it on." << endl ;
 16 |         exit(0) ;
 17 | 
 18 |     } else {
 19 | 
 20 |         numberOfActions = w->getNumberOfActions() ;
 21 | 
 22 |     }
 23 | 
 24 |     srand48( clock() ) ;
 25 | 
 26 |     readParameterFile( parameterFile ) ;
 27 | 
 28 |     if ( discreteStates ) {
 29 | 
 30 |         numberOfStates = w->getNumberOfStates() ;
 31 | 
 32 |         Q = new double*[ numberOfStates ] ;
 33 |         V = new double[ numberOfStates ] ;
 34 | 
 35 |         for ( int s = 0 ; s < numberOfStates ; s++ ) {
 36 | 
 37 |             Q[s] = new double[ numberOfActions ] ;
 38 |             V[s] = 0.0 ;
 39 | 
 40 |             for ( int a = 0 ; a < numberOfActions ; a++ ) {
 41 |                 Q[s][a] = 0.0 ;
 42 |             }
 43 | 
 44 |         }
 45 | 
 46 |     } else {
 47 | 
 48 |         int layerSizesA[] = { stateDimension, nHiddenQ, 1 } ;
 49 |         int layerSizesV[] = { stateDimension, nHiddenV, 1 } ;
 50 | 
 51 |         for ( int a = 0 ; a < numberOfActions ; a++ ) {
 52 |             QNN.push_back( new cNeuralNetwork( 1, layerSizesA ) ) ;
 53 |         }
 54 |         VNN = new cNeuralNetwork( 1, layerSizesV ) ;
 55 | 
 56 | 
 57 |         VTarget = new double[ 1 ] ;
 58 |         QTarget = new double[ 1 ] ;
 59 | 
 60 |         Qs = new double[ numberOfActions ] ;
 61 | 
 62 |     }
 63 | 
 64 |     policy = new double[ numberOfActions ] ;
 65 | 
 66 | }
 67 | 
 68 | Acla::~Acla() {
 69 | 
 70 |     if ( discreteStates ) {
 71 | 
 72 |         for ( int s = 0 ; s < numberOfStates ; s++ ) {
 73 | 
 74 |             delete [] Q[s] ;
 75 | 
 76 |         }
 77 | 
 78 |         delete [] Q ;
 79 |         delete [] V ;
 80 | 
 81 |     } else {
 82 | 
 83 |         for ( int a = 0 ; a < numberOfActions ; a++ ) {
 84 |             delete QNN[a] ;
 85 |         }
 86 |         QNN.clear() ;
 87 |         delete VNN ;
 88 | 
 89 |         delete [] QTarget ;
 90 |         delete [] VTarget ;
 91 | 
 92 |         delete [] Qs ;
 93 | 
 94 |     }
 95 | 
 96 |     delete [] policy ;
 97 | 
 98 | }
 99 | 
100 | 
101 | void Acla::readParameterFile( const char * parameterFile ) {
102 | 
103 |     if ( not discreteStates ) {
104 | 
105 |         ifstream ifile ;
106 | 
107 |         ifile.open( parameterFile, ifstream::in ) ;
108 | 
109 |         read_moveTo( &ifile, "nn" ) ;
110 | 
111 |         read_moveTo( &ifile, "nHiddenQ" ) ;
112 |         ifile >> nHiddenQ ;
113 |         read_moveTo( &ifile, "nHiddenV" ) ;
114 |         ifile >> nHiddenV ;
115 | 
116 |         ifile.close() ;
117 | 
118 |     }
119 | 
120 | }
121 | 
122 | void Acla::update( State * state, Action * action, double rt, State * nextState, bool endOfEpisode, double * learningRate, double gamma  ) {
123 | 
124 |     int at = action->discreteAction ;
125 | 
126 |     if ( state->discrete ) {
127 | 
128 |         int st = state->discreteState ;
129 |         int st_ = nextState->discreteState;
130 | 
131 |         double Vt = V[ st ] ;
132 | 
133 |         if ( endOfEpisode ) {
134 | 
135 |             V[ st ]       += learningRate[1]*( rt - V[ st ]       ) ;
136 | 
137 |         } else {
138 | 
139 |             V[ st ]       += learningRate[1]*( rt + gamma*V[ st_ ] - V[ st ]       ) ;
140 | 
141 |         }
142 | 
143 |         if ( V[ st ] > Vt ) {
144 | 
145 |             Q[ st ][ at ] += learningRate[0]*( 1.0 - Q[ st ][ at ] ) ;
146 | 
147 |         } else {
148 | 
149 |             Q[ st ][ at ] += learningRate[2]*( 0.0 - Q[ st ][ at ] ) ;
150 | 
151 |         }
152 | 
153 |     } else {
154 | 
155 |         double * st = state->continuousState ;
156 |         double * st_ = nextState->continuousState ;
157 | 
158 |         if ( endOfEpisode ) {
159 | 
160 |             VTarget[ 0 ] = rt ;
161 | 
162 |         } else {
163 | 
164 |             double Vs_   = VNN->forwardPropagate( st_ )[0] ;
165 | 
166 |             VTarget[ 0 ] = rt + gamma*Vs_ ;
167 | 
168 |         }
169 | 
170 |         double Vt = VNN->forwardPropagate( st )[0] ;
171 | 
172 |         VNN->backPropagate( st, VTarget, learningRate[1] ) ;
173 | 
174 |         if ( VTarget[0] > Vt ) {
175 | 
176 |             QTarget[ 0 ] = 1.0 ;
177 |             QNN[ at ]->backPropagate( st, QTarget, learningRate[0] ) ;
178 | 
179 |         } else {
180 | 
181 |             QTarget[ 0 ] = 0.0 ;
182 |             QNN[ at ]->backPropagate( st, QTarget, learningRate[2] ) ;
183 | 
184 |         }
185 |     }
186 | }
187 | 
188 | unsigned int Acla::getNumberOfLearningRates() {
189 | 
190 |     return 3 ;
191 | 
192 | }
193 | 
194 | const char * Acla::getName() {
195 | 
196 |     return "Acla" ;
197 | 
198 | }
199 | 
200 | #endif //ACLA
201 | 


--------------------------------------------------------------------------------
/Acla.h:
--------------------------------------------------------------------------------
 1 | #ifndef ACLA_H
 2 | #define ACLA_H
 3 | 
 4 | # include "StateActionAlgorithm.h"
 5 | 
 6 | class Acla : public StateActionAlgorithm {
 7 |     public:
 8 |         Acla( const char * parameterFile, World * w ) ;
 9 |         ~Acla() ;
10 |         void readParameterFile( const char * parameterFile ) ;
11 |         void update( State * st, Action * action, double rt, State * st_, bool endOfEpisode, double * learningRate, double gamma  ) ;
12 |         unsigned int getNumberOfLearningRates() ;
13 |         const char * getName() ;
14 | 
15 |     private:
16 |         int nHiddenV ;
17 |         double * V ;
18 |         cNeuralNetwork * VNN ;
19 |         double * VTarget ;
20 | 
21 | };
22 | 
23 | #endif //ACLA_H
24 | 


--------------------------------------------------------------------------------
/Action.h:
--------------------------------------------------------------------------------
 1 | #ifndef ACTION
 2 | #define ACTION
 3 | 
 4 | struct Action {
 5 |     bool continuous ;
 6 |     bool discrete ;
 7 | 
 8 |     int actionDimension ;
 9 |     int numberOfActions ;
10 | 
11 |     int discreteAction ;
12 |     double * continuousAction ;
13 | };
14 | 
15 | #endif //Action
16 | 


--------------------------------------------------------------------------------
/Algorithm.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef ALGORITHM
  2 | #define ALGORITHM
  3 | 
  4 | # include "Algorithm.h"
  5 | 
  6 | using namespace std ;
  7 | 
  8 | Algorithm::Algorithm() {
  9 |     discreteStates = false ;
 10 |     continuousStates = false ;
 11 |     discreteActions = false ;
 12 |     continuousActions = false ;
 13 | }
 14 | 
 15 | Algorithm::~Algorithm() {}
 16 | 
 17 | double Algorithm::max( double * array, int n ) {
 18 | 
 19 |     maxX    = array[ 0 ] ;
 20 | 
 21 |     for ( int i = 1 ; i < n; i++ ) {
 22 | 
 23 |         if ( array[i] > maxX ) {
 24 |             maxX = array[i] ;
 25 |         }
 26 | 
 27 |     }
 28 | 
 29 |     return maxX ;
 30 | 
 31 | }
 32 | 
 33 | 
 34 | int Algorithm::argmax( double * array, int n ) {
 35 | 
 36 |     maxX    = array[ 0 ] ;
 37 |     maxI    = 0 ;
 38 | 
 39 |     for ( int i = 1 ; i < n; i++ ) {
 40 | 
 41 |         X = array[ i ] ;
 42 | 
 43 |         if ( X > maxX ) {
 44 |             maxX = X ;
 45 |             maxI = i ;
 46 |         }
 47 | 
 48 |     }
 49 | 
 50 |     return maxI ;
 51 | 
 52 | }
 53 | 
 54 | std::vector<int> Algorithm::argmaxAll( double * array, int n ) {
 55 | 
 56 |     maxX    = array[ 0 ] ;
 57 |     std::vector<int> maxI ;
 58 |     maxI.push_back( 0 ) ;
 59 | 
 60 |     for ( int i = 1 ; i < n; i++ ) {
 61 | 
 62 |         X = array[ i ] ;
 63 | 
 64 |         if ( X > maxX ) {
 65 | 
 66 |             maxX = X ;
 67 |             maxI.clear() ;
 68 |             maxI.push_back( i ) ;
 69 | 
 70 |         } else if ( X == maxX ) {
 71 | 
 72 |             maxI.push_back( i ) ;
 73 | 
 74 |         }
 75 | 
 76 |     }
 77 | 
 78 |     return maxI ;
 79 | 
 80 | }
 81 | 
 82 | void Algorithm::egreedy( State * state, Action * action, double epsilon ) {
 83 | 
 84 |     if ( drand48() < epsilon ) {
 85 | 
 86 |         getMaxAction( state, action ) ;
 87 | 
 88 |     } else {
 89 | 
 90 |         getRandomAction( state, action ) ;
 91 | 
 92 |     }
 93 | 
 94 | }
 95 | 
 96 | void Algorithm::read_moveTo( ifstream * ifile, string label ) {
 97 |     string temp ;
 98 | 
 99 |     while ( temp.compare( label ) != 0 and ifile ) {
100 | 
101 |         *ifile >> temp ;
102 | 
103 |         if ( ifile->eof() ) {
104 |             cout << "Read error: Could not find label '" << label << "' while reading parameter file." << endl ;
105 |             exit(0) ;
106 |         }
107 | 
108 |     }
109 | }
110 | 
111 | 
112 | 
113 | #endif //ALGORITHM
114 | 


--------------------------------------------------------------------------------
/Algorithm.h:
--------------------------------------------------------------------------------
 1 | #ifndef ALGORITHM_H
 2 | #define ALGORITHM_H
 3 | 
 4 | # include <iostream>
 5 | # include <string>
 6 | # include <fstream>
 7 | # include <math.h>
 8 | # include <vector>
 9 | # include <stdlib.h>
10 | 
11 | # include "State.h"
12 | # include "Action.h"
13 | # include "World.h"
14 | 
15 | using namespace std ;
16 | 
17 | class Algorithm {
18 |     public:
19 |         Algorithm() ;
20 |         virtual ~Algorithm() ;
21 |         double max( double * array, int n ) ;
22 |         int argmax( double * array, int n ) ;
23 |         std::vector< int > argmaxAll( double * array, int n ) ;
24 |         void egreedy( State * state, Action * action, double epsilon ) ;
25 | 
26 |         virtual void getMaxAction( State * state, Action * action ) =0 ;
27 |         virtual void getRandomAction( State * state, Action * action ) =0 ;
28 |         virtual void explore( State * state, Action * action, double explorationRate, std::string explorationType, bool endOfEpisode ) =0 ;
29 |         virtual void update( State * st, Action * action, double rt, State * st_, bool endOfEpisode, double * learningRate, double gamma  ) =0 ;
30 |         virtual unsigned int getNumberOfLearningRates() =0;
31 |         virtual bool getContinuousStates() =0;
32 |         virtual bool getDiscreteStates() =0;
33 |         virtual bool getContinuousActions() =0;
34 |         virtual bool getDiscreteActions() =0;
35 |         virtual const char * getName() =0;
36 | 
37 |     protected:
38 |         void read_moveTo( ifstream *, string ) ;
39 | 
40 |         bool continuousStates, discreteStates, continuousActions, discreteActions ;
41 |         int stateDimension, numberOfStates, actionDimension, numberOfActions ;
42 |         double X, maxX ;
43 |         int maxI ;
44 | };
45 | 
46 | #endif //ALGORITHM_H
47 | 


--------------------------------------------------------------------------------
/Arm.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef ARM
  2 | #define ARM
  3 | # include <ctime>
  4 | # include <cstdlib>
  5 | # include <iostream>
  6 | # include <math.h>
  7 | # include "Arm.h"
  8 | #include <ros/ros.h>
  9 | #include "std_msgs/MultiArrayLayout.h"
 10 | #include "std_msgs/MultiArrayDimension.h"
 11 | #include <std_msgs/Float64MultiArray.h>
 12 | #include <std_msgs/Float32MultiArray.h>
 13 | #include <std_msgs/Int32.h>
 14 | 
 15 | using namespace std;
 16 | 
 17 | Arm::Arm() {
 18 |     srand((unsigned)time(0));
 19 |     MINACTION = -1.0;
 20 |     MAXACTION = 1.0;
 21 | 
 22 |     continuousActions   = true ;
 23 |     discreteActions     = true ;
 24 |     continuousStates    = true ;
 25 |     discreteStates      = false ;
 26 | 
 27 |     stateDimension  = 4;
 28 |     actionDimension = 3;
 29 |     numberOfActions = 21;
 30 | 
 31 |     cnt = 0;
 32 | 
 33 |     char * p=0;
 34 |     int argc=0;
 35 |     ros::init(argc, &p, getName());
 36 | 
 37 |     ros::NodeHandle n;
 38 |     pub_reset = n.advertise<std_msgs::Int32>("/robot/0/reset", 1);
 39 |     pub_target = n.advertise<std_msgs::Float32MultiArray>("/robot/0/target", 1);
 40 |     pub_motor = n.advertise<std_msgs::Float32MultiArray>("/robot/0/motors", 1);
 41 |     pub_reward = n.advertise<std_msgs::Float32MultiArray>("/robot/0/perf", 1);
 42 |     sub_sensor  = n.subscribe("/robot/0/sensors", 1, &Arm::sensorCallback, this);
 43 |     sensorValues = (sensor*)malloc(sizeof(sensor)*actionDimension);
 44 |     memset(sensorValues,0,sizeof(double)*actionDimension);
 45 |     // ros::R
 46 |     usleep(1000000);
 47 |     // loop_rate = new ros::Rate(10);
 48 |     
 49 |     gotsensor = false;
 50 | 
 51 |     // Mc = 1.0;
 52 |     // mp = 0.1;
 53 |     // l = 0.5;
 54 |     // t = 0.02;
 55 |     // g = 9.81;
 56 | 
 57 |     // totalM = Mc + mp;
 58 |     // ml = mp*l;
 59 | 
 60 |     // maxPhi = M_PI / 15.0;
 61 |     // maxX = 2.4;
 62 |     // maxDPhi = M_PI / 15.0;
 63 |     // maxDX = 2.4;
 64 |     t = 0.;
 65 | 
 66 |     // target
 67 |     tx = 0.;
 68 |     ty = 0.;
 69 | 
 70 |     // l0 = 0.69230769;
 71 |     // l1 = 0.23076923;
 72 |     // l2 = 0.07692308;
 73 | 
 74 |     reset() ;
 75 | }
 76 | 
 77 | Arm::~Arm() {
 78 | }
 79 | 
 80 | void Arm::sensorCallback(const std_msgs::Float32MultiArray::ConstPtr& sensormsg) {
 81 |   // std::cerr << "got something: [" << sensormsg->data[0] << ", " << sensormsg->data[1] << "]" << std::endl;
 82 |   // std::cerr << "data size: " << sensormsg->data.size() << ", " << stateDimension << std::endl;
 83 |   int len=std::min((int)sensormsg->data.size(), stateDimension);
 84 |   for(int k=0;k<len;k++){
 85 |     sensorValues[k] = sensormsg->data[k];
 86 |     // std::cout << "sensors: " << sensorValues[k] << std::endl;
 87 |   }
 88 |   // error is the input
 89 |   sensorValues[0] -= tx;
 90 |   sensorValues[1] -= ty;
 91 |   gotsensor = true;
 92 |   // cout << "gotsensor? cb " << gotsensor << endl;
 93 | }
 94 | 
 95 | void Arm::settarget() {
 96 |     double rnd = ((double) rand()/RAND_MAX);
 97 |     double phi = rnd * 0.5 * M_PI;
 98 |     double r = 0.8 + rnd * 0.2;
 99 |     tx = r * cos(phi);
100 |     ty = r * sin(phi);
101 |     cout << "rnd = " << rnd << ", phi = " << phi << ", r = " << r << ", tx " << tx << ", ty = " << ty << endl;
102 | 
103 |     std_msgs::Float32MultiArray msg_target;
104 |     msg_target.data.clear();
105 |     msg_target.data.push_back(tx);
106 |     msg_target.data.push_back(ty);
107 |     msg_target.data.push_back(0.);
108 |     pub_target.publish(msg_target);
109 | }
110 | 
111 | void Arm::reset() {
112 |     std_msgs::Int32 msg;
113 |     msg.data = 23;
114 |     pub_reset.publish(msg);
115 | 
116 |     settarget();
117 |     // cout << "tx " << tx << ", ty = " << ty << endl;
118 |     cnt = 0;
119 |     // motor
120 |     eoe     = true ;
121 | }
122 | 
123 | void Arm::step( double h, double *force ) {
124 |   std_msgs::Float32MultiArray msg;
125 |   //   msg.layout.dim_length = 1;
126 |   msg.data.clear();
127 |   for(int k=0;k<actionDimension;k++){
128 |     msg.data.push_back(force[k]);
129 |     // cout << "msg.data " << k << ", " << msg.data[k] << endl;
130 |   }
131 |   // msg.data = std::vector<double>(sensors[0], sensors[sensornumber-1]);
132 |   // memcpy(msg.data,sensors, msg.data);
133 |   // for (int i = 0; i < 10; i++) {
134 |   // cout << "publishing " << i << endl;
135 |   pub_motor.publish(msg);
136 |   // cout << "published " << i << endl;
137 |   // ros::spinOnce();
138 |   // cout << "spun " << i << endl;
139 |   // loop_rate->sleep();
140 |   // }
141 |   // spin once so to harvest incoming data
142 |   // cout << "step" << endl;
143 | }
144 | 
145 | double Arm::reward() {
146 |     std_msgs::Float32MultiArray msg_reward;
147 |     double reward = 1.0;
148 |     double err = 0., xerr = 0, yerr = 0;
149 |     // double tx = 1.;
150 |     // double ty = 0.;
151 | 
152 |     xerr = fabs(sensorValues[0] - tx);
153 |     yerr = fabs(sensorValues[1] - ty);
154 |     err = sqrt(pow(xerr + yerr, 2));
155 |     // cout << "sensors: " << sensorValues[0] << ", " << sensorValues[1] << endl;
156 |     // cout << "errx " << fabs(sensorValues[0] - tx) <<  ", " << "erry " << fabs(sensorValues[1] - ty) << endl;
157 |     // if (err < 0.2) {
158 |     // 	reward = 1.0;
159 |     // }
160 |     // else {
161 |     //   reward = -1.;
162 |     // }
163 |     // if (reward > 0) {
164 |     //   // cout << "reward = " << reward << endl;
165 |     //   cout << reward << flush;
166 |     // }
167 |     reward = -err;
168 | 
169 |     msg_reward.data.clear();
170 |     msg_reward.data.push_back(reward);
171 |     pub_reward.publish(msg_reward);
172 | 
173 |     // cout << "reward" << endl;
174 |     return reward;
175 | }
176 | 
177 | void Arm::update(double deltaTime, double *force) {
178 |   int n = 1;
179 |   double h = deltaTime/n;
180 |   gotsensor = false;
181 |   for( int i = 0 ; i < n ; i++ ) {
182 |     step(h, force);
183 |   }
184 |   // cout << "gotsensor? pre" << gotsensor << endl;
185 |   while (!gotsensor && ros::ok()) {
186 |     // wait
187 |     // cout << "gotsensor? inner " << gotsensor << endl;
188 |     ros::spinOnce();
189 |     usleep(100);
190 |     // // loop_rate->sleep();
191 |   }
192 |   // cout << "gotsensor? post" << gotsensor << endl;
193 |   std_msgs::Float32MultiArray msg_target;
194 |   msg_target.data.clear();
195 |   msg_target.data.push_back(tx);
196 |   msg_target.data.push_back(ty);
197 |   msg_target.data.push_back(0.);
198 |   pub_target.publish(msg_target);
199 |   cnt++;
200 | 
201 |   if (cnt % 500 == 0)
202 |     settarget();
203 |   // cout << "update" << endl;
204 | }
205 | 
206 | double Arm::act( Action * action ) {
207 | 
208 |     double force = 0.0 ;
209 | 
210 |     if ( action->continuous ) {
211 |       // cout << "a0: " << action->continuousAction[0] << ", a1: " << action->continuousAction[1] << endl;
212 |         // This version of cart pole scales continuous actions in [-1.0, 1.0] to the nearest
213 |         // discrete action in the set { -10, -9, ..., 9, 10 }
214 | 
215 |         // force = action->continuousAction[0] ;
216 | 
217 |         // if ( force < -1 ) {
218 |         //     force = -1.0 ;
219 |         // } else if ( force > 1 ) {
220 |         //     force = 1.0 ;
221 |         // }
222 | 
223 |         // force += 1.0 ;                  // range: [0.0, 2.0]
224 |         // force *= 0.5 ;                  // range: [0.0, 1.0]
225 |         // force *= numberOfActions - 1 ;  // range: [0.0, numberOfActions - 1]
226 |         // force = floor( force + 0.5 ) ;    // set: { 0, 1, ..., numberOfActions - 1 }
227 | 
228 |     } else if ( action->discrete ) {
229 | 
230 |         force = action->discreteAction ;
231 |     }
232 | 
233 |     // //Scale force from { 0, 1, ..., numberOfActions - 1 } to { -10, 10 }:
234 |     // force /= numberOfActions - 1 ;      // set: {0.0, 1/(numberOfActions-1), ..., 1.0 }
235 |     // force *= MAXACTION - MINACTION ;    // set: {0.0, 20.0/(numberOfActions-1), ..., 20.0 }
236 |     // force += MINACTION ;                // set: {-10.0, -10.0 + 20.0/(numberOfActions-1), ..., 10.0 }
237 | 
238 |     // return act( force ) ;
239 |     update(t, action->continuousAction);
240 |     // cout << "act" << endl;
241 |     double r = reward();
242 | 
243 |     // if (r <= -1.5) {
244 |     //   reset();
245 |     // }
246 |     // else {
247 |     //   eoe = false;
248 |     // }
249 |     return r;
250 | }
251 | 
252 | // double Arm::act( double force ) {
253 | //   // send out command
254 | //     update( t, force );
255 | //     double r = reward();
256 | 
257 | //     if (r < 0) {
258 | //         reset();
259 | //     } else {
260 | //         eoe = false ;
261 | //     }
262 | //     return r;
263 | // }
264 | 
265 | bool Arm::endOfEpisode() {
266 |     return eoe ;
267 | }
268 | 
269 | void Arm::getState( State * state ) {
270 |   int i;
271 |   for (i = 0; i < stateDimension; i++) {
272 |     state->continuousState[i] = sensorValues[i];
273 |   // state->continuousState[1] = 0.;
274 |   // state->continuousState[2] = 0.;
275 |   // state->continuousState[3] = 0.;
276 |   }
277 |   // cout << "getState" << endl;
278 | 
279 | }
280 | 
281 | void Arm::setState( State * state ) {
282 | 
283 |   // phi     = state->continuousState[0];
284 |   // dphi    = state->continuousState[1];
285 |   // x       = state->continuousState[2];
286 |   // dx      = state->continuousState[3];
287 | 
288 | }
289 | 
290 | bool Arm::getDiscreteStates() {
291 |     return false ;
292 | }
293 | 
294 | bool Arm::getContinuousStates() {
295 |     return true ;
296 | }
297 | 
298 | int  Arm::getStateDimension() {
299 |     return stateDimension ;
300 | }
301 | 
302 | bool Arm::getDiscreteActions() {
303 |     return true ;
304 | }
305 | 
306 | bool Arm::getContinuousActions() {
307 |     return true ;
308 | }
309 | 
310 | int  Arm::getActionDimension() {
311 |     return actionDimension ;
312 | }
313 | 
314 | int  Arm::getNumberOfActions() {
315 |     return numberOfActions;
316 | }
317 | 
318 | const char * Arm::getName() {
319 |     return "Arm" ;
320 | }
321 | 
322 | #endif //ARM
323 | 


--------------------------------------------------------------------------------
/Arm.h:
--------------------------------------------------------------------------------
 1 | #ifndef ARM_H
 2 | #define ARM_H
 3 | # include "State.h"
 4 | # include "StateActionUtils.h"
 5 | # include "World.h"
 6 | #include <std_msgs/Float64MultiArray.h>
 7 | #include <std_msgs/Float32MultiArray.h>
 8 | #include <ros/ros.h>
 9 | 
10 | typedef float motor;
11 | typedef float sensor;
12 | 
13 | class Arm : public World {
14 |     public:
15 |         Arm();
16 |         ~Arm();
17 | 
18 |         double act( Action * );
19 |         void getState( State * );
20 |         void setState( State * );
21 |         bool endOfEpisode();
22 |         void reset();
23 | 
24 |         bool getDiscreteStates() ;
25 |         bool getContinuousStates() ;
26 |         int  getStateDimension() ;
27 |         bool getDiscreteActions() ;
28 |         bool getContinuousActions() ;
29 |         int  getActionDimension() ;
30 |         int  getNumberOfActions() ;
31 | 
32 |         const char * getName() ;
33 | 	// ros
34 | 	void settarget();
35 | 	void sensorCallback(const std_msgs::Float32MultiArray::ConstPtr& motormsg);
36 | 
37 |     private:
38 |         double act( double ) ;
39 |         void accel();
40 |         void step( double, double* );
41 |         void update(double , double* );
42 |         double reward();
43 |         // double phi, dphi, x, dx;
44 |         // double maxPhi, maxDPhi, maxX, maxDX;
45 |         double MINACTION, MAXACTION;
46 | 	double t;
47 |         // double Mc, mp, l, t, g;
48 |         // double totalM, ml, NORMANGLE;
49 | 	double theta0, theta1, theta2;
50 | 	double l0, l1, l2;
51 |         bool eoe ;
52 | 	// ros
53 | 	// int number_sensors;
54 | 	// int number_motors;
55 | 	int cnt;
56 | 	bool gotsensor;
57 | 	ros::Publisher pub_reset;
58 | 	ros::Publisher pub_motor;
59 | 	ros::Subscriber sub_sensor;
60 | 	ros::Publisher pub_target;
61 | 	ros::Publisher pub_reward;
62 | 	// ros::Rate *loop_rate;
63 | 	motor* motorValues;
64 | 	sensor* sensorValues;
65 |         double tx, ty;
66 |         //double ty;
67 | 
68 | };
69 | 
70 | #endif // ARM_H
71 | 


--------------------------------------------------------------------------------
/ArmCfgCacla:
--------------------------------------------------------------------------------
 1 | nExperiments        10
 2 | 
 3 | 
 4 | steps
 5 | nTrainSteps                 30000
 6 | trainStorePer               3000
 7 | nTestSteps                  15000
 8 | testStorePer                15000
 9 | nTrainEpisodes              0
10 | nTestEpisodes               0
11 | nMaxStepsPerTrainEpisode    0
12 | nMaxStepsPerTestEpisode     0
13 | 
14 | algorithm
15 | algorithms          Cacla
16 | tau                 0.001 0.01 0.1 1.0 10.0
17 | epsilon             0.001 0.01 0.1 1.0
18 | sigma               0.01 0.1 1.0 10.0
19 | 
20 | learningRates
21 | decreaseType        none
22 | nLearningRates      3
23 | learningRate        0.0001 0.001 0.01 0.1
24 | learningRate        0.0    0.001 0.01 0.1
25 | learningRate        0.0    0.001 0.01 0.1
26 | 
27 | discount
28 | gamma               0.99
29 | 
30 | nn
31 | nHiddenQ            20
32 | nHiddenV            20
33 | 


--------------------------------------------------------------------------------
/ArmCfgCaclaSimple:
--------------------------------------------------------------------------------
 1 | nExperiments        10
 2 | 
 3 | 
 4 | steps
 5 | nTrainSteps                 30000
 6 | trainStorePer               3000
 7 | nTestSteps                  15000
 8 | testStorePer                15000
 9 | nTrainEpisodes              0
10 | nTestEpisodes               0
11 | nMaxStepsPerTrainEpisode    0
12 | nMaxStepsPerTestEpisode     0
13 | 
14 | algorithm
15 | algorithms          Cacla
16 | tau                 
17 | epsilon             
18 | sigma               0.05
19 | 
20 | learningRates
21 | decreaseType        none
22 | nLearningRates      3
23 | learningRate        0.1
24 | learningRate        0.1
25 | learningRate        0.1
26 | 
27 | discount
28 | gamma               0.
29 | 
30 | nn
31 | nHiddenQ            20
32 | nHiddenV            20
33 | 


--------------------------------------------------------------------------------
/ArmInterface.cpp:
--------------------------------------------------------------------------------
 1 | # ifndef ARMINTERFACE
 2 | # define ARMINTERFACE
 3 | 
 4 | # include "ArmInterface.h"
 5 | # include "Arm.h"
 6 | 
 7 | #include <ros/ros.h>
 8 | 
 9 | #include <signal.h>
10 | #include <stdlib.h>
11 | #include <stdio.h>
12 | #include <unistd.h>
13 | 
14 | ArmInterface::ArmInterface( const char * pFile ) {
15 | 
16 |   parameterFile = pFile ;
17 | 
18 |   parameterPath.assign( parameterFile ) ;
19 | 
20 |   readParameterFile() ;
21 | 
22 | }
23 | 
24 | 
25 | // void my_handler1(sig_t s){
26 | //   printf("Caught signal %d\n",s);
27 | //   ros::shutdown();
28 | //   exit(1); 
29 | // }
30 | 
31 | void my_handler(int s){
32 |   printf("Caught signal %d\n",s);
33 |   ros::shutdown();
34 |   exit(1); 
35 | }
36 | 
37 | int main( int argn, char *argv[] ) {
38 | 
39 |   if ( argn != 2 ) {
40 | 
41 |     cout << "You need to specify exactly one configuration (parameter) file." << endl ;
42 |     return 0 ;
43 | 
44 |   }
45 | 
46 |   // struct sigaction sigIntHandler;
47 | 
48 |   // sigIntHandler.sa_handler = my_handler;
49 |   // sigemptyset(&sigIntHandler.sa_mask);
50 |   // sigIntHandler.sa_flags = 0;
51 | 
52 |   // sigaction(SIGINT, &sigIntHandler, NULL);
53 |   signal (SIGINT,my_handler);
54 | 
55 |   ArmInterface * interface = new ArmInterface( argv[1] ) ;
56 | 
57 |   Arm * arm = new Arm() ;
58 | 
59 |   // while(ros::ok()) {
60 |   interface->getParametersAndRunExperiments( arm ) ;
61 |   // }
62 | 
63 |   delete arm ;
64 |   delete interface ;
65 | cout << "ending" << endl;
66 | //pause();
67 | }
68 | 
69 | # endif //ARMINTERFACE
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/ArmInterface.h:
--------------------------------------------------------------------------------
 1 | # ifndef ARMINTERFACE_H
 2 | # define ARMINTERFACE_H
 3 | 
 4 | # include "Experiment.h"
 5 | # include "Arm.h"
 6 | 
 7 | class ArmInterface : public Experiment {
 8 |     public:
 9 |         ArmInterface( const char * parameterFile ) ;
10 |         ~ArmInterface() {};
11 | 
12 | };
13 | 
14 | # endif //ARMINTERFACE_H
15 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.3)
 2 | 
 3 | project(rlcpp)
 4 | 
 5 | # find_package(catkin REQUIRED)
 6 | 
 7 | link_directories(/opt/ros/kinetic/lib)
 8 | 
 9 | add_library(rlcpp_algorithms SHARED Sarsa.cpp Qlearning.cpp QVlearning.cpp Acla.cpp Cacla.cpp)
10 | 
11 | add_library(rlcpp_util SHARED StateActionUtils.cpp cNeuralNetwork.cpp Algorithm.cpp StateActionAlgorithm.cpp Experiment.cpp)
12 | 
13 | # this doesn't work as ROS code needs to be included conditionally
14 | # add_executable(CartPole CartPole.cpp CartPoleInterface.cpp)
15 | # target_link_libraries(CartPole rlcpp_util rlcpp_algorithms)
16 | 
17 | add_executable(CartPoleRos CartPole.cpp CartPoleInterface.cpp)
18 | target_link_libraries(CartPoleRos rlcpp_util rlcpp_algorithms roscpp rosconsole roscpp_serialization roslib rostime)
19 | 
20 | add_executable(ArmRos Arm.cpp ArmInterface.cpp)
21 | target_link_libraries(ArmRos rlcpp_util rlcpp_algorithms roscpp rosconsole roscpp_serialization roslib rostime)
22 | 
23 | 


--------------------------------------------------------------------------------
/Cacla.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef CACLA
  2 | #define CACLA
  3 | 
  4 | # include "Cacla.h"
  5 | 
  6 | 
  7 | Cacla::Cacla( const char * parameterFile, World * w ) {
  8 | 
  9 |     discreteStates      = w->getDiscreteStates() ;
 10 | 
 11 |     actionDimension     = w->getActionDimension() ;
 12 | 
 13 |     srand48( clock() ) ;
 14 | 
 15 |     readParameterFile( parameterFile ) ;
 16 | 
 17 |     if ( discreteStates ) {
 18 | 
 19 |         numberOfStates = w->getNumberOfStates() ;
 20 | 
 21 |         A = new double*[ numberOfStates ] ;
 22 |         V = new double[ numberOfStates ] ;
 23 | 
 24 |         for ( int s = 0 ; s < numberOfStates ; s++ ) {
 25 | 
 26 |             A[ s ] = new double[ actionDimension ] ;
 27 | 
 28 |             for ( int a = 0 ; a < actionDimension ; a++ ) {
 29 |                 A[ s ][ a ] = 0.0 ;
 30 |             }
 31 | 
 32 |             V[ s ] = 0.0 ;
 33 | 
 34 |         }
 35 | 
 36 |     } else {
 37 | 
 38 |         stateDimension = w->getStateDimension() ;
 39 | 
 40 |         int layerSizesA[] = { stateDimension, nHiddenQ, actionDimension } ;
 41 |         int layerSizesV[] = { stateDimension, nHiddenV, 1 } ;
 42 | 
 43 |         ANN = new cNeuralNetwork( 1, layerSizesA ) ;
 44 |         VNN = new cNeuralNetwork( 1, layerSizesV ) ;
 45 | 
 46 |         VTarget = new double[ 1 ] ;
 47 | 
 48 |     }
 49 | 
 50 |     storedGauss = false ;
 51 | 
 52 | }
 53 | 
 54 | Cacla::~Cacla() {
 55 | 
 56 |     if ( discreteStates ) {
 57 | 
 58 |         for ( int s = 0 ; s < numberOfStates ; s++ ) {
 59 |             delete[] A[s] ;
 60 |         }
 61 | 
 62 |         delete [] A ;
 63 |         delete [] V ;
 64 | 
 65 |     } else {
 66 | 
 67 |         delete ANN ;
 68 |         delete VNN ;
 69 | 
 70 |         delete [] VTarget ;
 71 | 
 72 |     }
 73 | 
 74 | }
 75 | 
 76 | 
 77 | void Cacla::readParameterFile( const char * parameterFile ) {
 78 | 
 79 |     if ( not discreteStates ) {
 80 | 
 81 |         ifstream ifile ;
 82 | 
 83 |         ifile.open( parameterFile, ifstream::in ) ;
 84 | 
 85 |         read_moveTo( &ifile, "nn" ) ;
 86 | 
 87 |         read_moveTo( &ifile, "nHiddenQ" ) ;
 88 |         ifile >> nHiddenQ ;
 89 |         read_moveTo( &ifile, "nHiddenV" ) ;
 90 |         ifile >> nHiddenV ;
 91 | 
 92 |         ifile.close() ;
 93 | 
 94 |     }
 95 | 
 96 | }
 97 | 
 98 | void Cacla::getMaxAction( State * state, Action * action ) {
 99 | 
100 |     double * As ;
101 | 
102 |     if ( state->discrete ) {
103 | 
104 |         As = A[ state->discreteState ] ;
105 | 
106 |     } else {
107 | 
108 |         As = ANN->forwardPropagate( state->continuousState );
109 | 
110 |     }
111 | 
112 |    for ( int a = 0 ; a < actionDimension ; a++ ) {
113 | 
114 |         action->continuousAction[a] = As[a]  ;
115 | 
116 |     }
117 | 
118 | }
119 | 
120 | void Cacla::getRandomAction( State * state, Action * action ) {
121 | 
122 |     for ( int a = 0 ; a < actionDimension ; a++ ) {
123 | 
124 |         action->continuousAction[a] = 2.0*drand48() - 1.0 ;
125 | 
126 |     }
127 | 
128 | }
129 | 
130 | 
131 | void Cacla::explore( State * state, Action * action, double explorationRate, string explorationType, bool endOfEpisode ) {
132 | 
133 |     if ( explorationType.compare("boltzmann") == 0 ) {
134 | 
135 |         cout << "Boltzmann exploration is as of yet undefined for Cacla." << endl ;
136 |         exit(0) ;
137 | 
138 |     } else if ( explorationType.compare("egreedy") == 0  ) {
139 | 
140 |         egreedy( state, action, explorationRate ) ;
141 | 
142 |     } else if ( explorationType.compare("gaussian") == 0  ) {
143 | 
144 |         gaussian( state, action, explorationRate ) ;
145 | 
146 |     } else {
147 | 
148 |         cout << "Warning, no exploreType: " << explorationType << endl ;
149 |         exit(0) ;
150 | 
151 |     }
152 | 
153 | }
154 | 
155 | double Cacla::gaussianRandom() {
156 |     // Generates gaussian (or normal) random numbers, with mean = 0 and
157 |     // std dev = 1. Used for gaussian exploration.
158 | 
159 |     if ( storedGauss ) {
160 | 
161 |         storedGauss = false ;
162 | 
163 |         return g2 ;
164 | 
165 |     } else {
166 | 
167 |         double x, y ;
168 | 
169 |         double z = 1.0 ;
170 | 
171 |         while ( z >= 1.0 ) {
172 |             x = 2.0*drand48() - 1.0;
173 |             y = 2.0*drand48() - 1.0;
174 |             z = x * x + y * y;
175 |         }
176 | 
177 |         z = sqrt( -2.0 * log( z ) / z );
178 | 
179 |         g1 = x * z;
180 |         g2 = y * z;
181 | 
182 |         storedGauss = true ;
183 | 
184 |         return g1 ;
185 | 
186 |     }
187 | 
188 | }
189 | 
190 | void Cacla::gaussian( State * state, Action * action, double sigma ) {
191 | 
192 |     getMaxAction( state, action ) ;
193 | 
194 |     for ( int a = 0 ; a < actionDimension ; a++ ) {
195 | 
196 |         action->continuousAction[a] += sigma*gaussianRandom() ;
197 | 
198 |     }
199 | 
200 | }
201 | 
202 | void Cacla::update( State * state, Action * action, double rt, State * nextState, bool endOfEpisode, double * learningRate, double gamma  ) {
203 | 
204 |     double * at = action->continuousAction ;
205 | 
206 |     // cout << "Cacla.cpp:learningRate[0] = " << learningRate[0] << ", [1] = " << learningRate[1] << endl;
207 |     if ( state->discrete ) {
208 | 
209 |         int st      = state->discreteState ;
210 |         int st_     = nextState->discreteState ;
211 | 
212 |         double Vt = V[ st ] ;
213 | 
214 |         if ( endOfEpisode ) {
215 | 
216 |             V[ st ]       += learningRate[1]*( rt - V[ st ] ) ;
217 | 
218 |         } else {
219 | 
220 |             V[ st ]       += learningRate[1]*( rt + gamma*V[ st_ ] - V[ st ] ) ;
221 | 
222 |         }
223 | 
224 |         if ( V[ st ] > Vt ) {
225 | 
226 |             for ( int a = 0 ; a < actionDimension ; a++ ) {
227 | 
228 |                 A[ st ][ a ] += learningRate[0]*( at[ a ] - A[ st ][ a ] ) ;
229 | 
230 |             }
231 | 
232 |         }
233 | 
234 |     } else {
235 | 
236 |         double * st = state->continuousState ;
237 |         double * st_ = nextState->continuousState ;
238 | 
239 |         if ( endOfEpisode ) {
240 | 
241 |             VTarget[ 0 ] = rt ;
242 | 
243 |         } else {
244 | 
245 |             double Vs_   = VNN->forwardPropagate( st_ )[0] ;
246 | 
247 |             VTarget[ 0 ] = rt + gamma*Vs_ ;
248 | 
249 |         }
250 | 
251 |         double Vt = VNN->forwardPropagate( st )[0] ;
252 | 
253 |         VNN->backPropagate( st, VTarget, learningRate[1] ) ;
254 | 
255 |         if ( VTarget[0] > Vt ) {
256 | 	  // cout << "st: " << *st << ", at: " << *at << endl;
257 |             ANN->backPropagate( st, at, learningRate[0] ) ;
258 | 
259 |         }
260 | 
261 |     }
262 | 
263 | }
264 | 
265 | 
266 | unsigned int Cacla::getNumberOfLearningRates() {
267 | 
268 |     return 2 ;
269 | 
270 | }
271 | 
272 | bool Cacla::getContinuousStates() {
273 | 
274 |     return true ;
275 | 
276 | }
277 | 
278 | bool Cacla::getDiscreteStates() {
279 | 
280 |     return true ;
281 | 
282 | }
283 | 
284 | bool Cacla::getContinuousActions() {
285 | 
286 |     return true ;
287 | 
288 | }
289 | 
290 | bool Cacla::getDiscreteActions() {
291 | 
292 |     return false ;
293 | 
294 | }
295 | 
296 | const char * Cacla::getName() {
297 | 
298 |     return "Cacla" ;
299 | 
300 | }
301 | 
302 | #endif //CACLA
303 | 


--------------------------------------------------------------------------------
/Cacla.h:
--------------------------------------------------------------------------------
 1 | #ifndef CACLA_H
 2 | #define CACLA_H
 3 | 
 4 | # include <string>
 5 | # include <math.h>
 6 | # include <vector>
 7 | # include <time.h>
 8 | # include "cNeuralNetwork.h"
 9 | # include "Algorithm.h"
10 | # include "StateActionUtils.h"
11 | 
12 | class Cacla : public Algorithm {
13 |     public:
14 |         Cacla( const char * parameterFile, World * w ) ;
15 |         ~Cacla() ;
16 |         void readParameterFile( const char * parameterFile ) ;
17 |         void getMaxAction( State * state, Action * action ) ;
18 |         void getRandomAction( State * state, Action * action ) ;
19 |         void explore( State * state, Action * action, double explorationRate, string explorationType, bool endOfEpisode ) ;
20 |         void update( State * st, Action * action, double rt, State * st_, bool endOfEpisode, double * learningRate, double gamma  ) ;
21 |         unsigned int getNumberOfLearningRates() ;
22 |         bool getContinuousStates() ;
23 |         bool getDiscreteStates() ;
24 |         bool getContinuousActions() ;
25 |         bool getDiscreteActions() ;
26 |         const char * getName() ;
27 | 
28 |     private:
29 |         void gaussian( State * state, Action * action, double sigma ) ;
30 | 
31 |         double gaussianRandom() ;
32 |         double g1, g2 ;
33 |         bool storedGauss ;
34 | 
35 |         int nHiddenQ, nHiddenV ;
36 |         double epsilon, sigma ;
37 |         double * V ;
38 |         double ** A ;
39 |         cNeuralNetwork * ANN ;
40 |         cNeuralNetwork * VNN ;
41 |         double * VTarget ;
42 | };
43 | 
44 | #endif //CACLA_H
45 | 


--------------------------------------------------------------------------------
/CartPole.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef CARTPOLE
  2 | #define CARTPOLE
  3 | # include <ctime>
  4 | # include <cstdlib>
  5 | # include <iostream>
  6 | # include <math.h>
  7 | # include "CartPole.h"
  8 | #include <ros/ros.h>
  9 | #include "std_msgs/MultiArrayLayout.h"
 10 | #include "std_msgs/MultiArrayDimension.h"
 11 | #include <std_msgs/Float64MultiArray.h>
 12 | #include <std_msgs/Float32MultiArray.h>
 13 | #include <std_msgs/Int32.h>
 14 | 
 15 | using namespace std;
 16 | 
 17 | CartPole::CartPole() {
 18 |     srand((unsigned)time(0));
 19 |     MINACTION = -10.0;
 20 |     MAXACTION = 10.0;
 21 | 
 22 |     continuousActions   = true ;
 23 |     discreteActions     = true ;
 24 |     continuousStates    = true ;
 25 |     discreteStates      = false ;
 26 | 
 27 |     stateDimension  = 4;
 28 |     actionDimension = 1;
 29 |     numberOfActions = 21 ;
 30 | 
 31 |     Mc = 1.0;
 32 |     mp = 0.1;
 33 |     l = 0.5;
 34 |     t = 0.02;
 35 |     g = 9.81;
 36 | 
 37 |     totalM = Mc + mp;
 38 |     ml = mp*l;
 39 | 
 40 |     maxPhi = M_PI / 15.0;
 41 |     maxX = 2.4;
 42 |     maxDPhi = M_PI / 15.0;
 43 |     maxDX = 2.4;
 44 | 
 45 |     char * p=0;
 46 |     int argc=0;
 47 |     ros::init(argc, &p, getName());
 48 | 
 49 |     ros::NodeHandle n;
 50 |     // pub_reset = n.advertise<std_msgs::Int32>("/arm/reset", 1);
 51 |     //pub_target = n.advertise<std_msgs::Float32MultiArray>("/arm/target", 1);
 52 |     pub_motor = n.advertise<std_msgs::Float32MultiArray>("/robot/0/motors", 1);
 53 |     pub_sensor1  = n.advertise<std_msgs::Float32MultiArray>("/robot/0/pos", 1);
 54 |     pub_sensor2  = n.advertise<std_msgs::Float32MultiArray>("/robot/1/pos", 1);
 55 |     usleep(1000000);
 56 |     loop_rate = new ros::Rate(1000);
 57 | 
 58 |     // msg.layout.dim = (std_msgs::MultiArrayDimension *)
 59 |     //   malloc(sizeof(std_msgs::MultiArrayDimension) * 1);
 60 |     // msg.layout.dim[0].label = "msg";
 61 |     // msg.layout.dim[0].size = 4;
 62 |     // msg.layout.dim[0].stride = 1*4;
 63 |     // msg.layout.dim_length = 1;
 64 |     // msg.layout.data_offset = 0;
 65 | 
 66 |     // msg.data = (float *)malloc(sizeof(float)*3);
 67 |     // msg.data_length = 3;
 68 | 
 69 |     reset() ;
 70 | }
 71 | 
 72 | CartPole::~CartPole() {
 73 | }
 74 | 
 75 | void CartPole::reset() {
 76 |     double rnd = ((double) rand()/RAND_MAX);
 77 | 
 78 |     phi     = 0.1*rnd - 0.05;
 79 |     dphi    = 0.0;
 80 |     x       = 0.0;
 81 |     dx      = 0.0;
 82 | 
 83 |     eoe     = true ;
 84 | }
 85 | 
 86 | void CartPole::step( double h, double force ) {
 87 |   std_msgs::Float32MultiArray msg;
 88 |   double cos_phi = cos( phi );
 89 |   double sin_phi = sin( phi );
 90 |   double dphi_sq = dphi*dphi;
 91 |   double cos_phi_sq = cos_phi*cos_phi;
 92 | 
 93 |   double ddphi = force*cos_phi - totalM*g*sin_phi + ml*(cos_phi*sin_phi)*dphi*dphi ;
 94 |   ddphi /= ml*cos_phi*cos_phi - totalM*l ;
 95 | 
 96 |   double ddx = force + ml*sin_phi*dphi_sq - mp*g*cos_phi*sin_phi ;
 97 |   ddx /= totalM - mp*cos_phi_sq ;
 98 | 
 99 |   phi     += h*dphi;
100 |   dphi    += h*ddphi;
101 |   x       += h*dx;
102 |   dx      += h*ddx;
103 | 
104 |   cos_phi = cos( phi );
105 |   sin_phi = sin( phi );
106 | 
107 |   // do ros pubishing
108 |   // motors / action
109 |   msg.data.clear();
110 |   for(int k=0;k<actionDimension;k++){
111 |     msg.data.push_back(force);
112 |     // cout << "msg.data " << k << ", " << msg.data[k] << endl;
113 |   }
114 |   pub_motor.publish(msg);
115 |   // sensors / pos
116 |   // cart
117 |   msg.data.clear();
118 |   msg.data.push_back(x);
119 |   msg.data.push_back(0.);
120 |   msg.data.push_back(0.);
121 |   // cout << "msg.data " << k << ", " << msg.data[k] << endl;
122 |   pub_sensor1.publish(msg);
123 | 
124 |   // pole
125 |   msg.data.clear();
126 |   msg.data.push_back(sin_phi);
127 |   msg.data.push_back(cos_phi);
128 |   msg.data.push_back(0.);
129 |   // cout << "msg.data " << k << ", " << msg.data[k] << endl;
130 |   pub_sensor2.publish(msg);
131 | 
132 | }
133 | 
134 | double CartPole::reward() {
135 |     double reward = 1.0;
136 | 
137 |     if (phi < -maxPhi) {
138 |         reward = -1.0;
139 |     } else if (phi > maxPhi) {
140 |         reward = -1.0;
141 |     } else if (x > maxX) {
142 |         reward = -1.0;
143 |     } else if (x < -maxX) {
144 |         reward = -1.0;
145 |     }
146 |     return reward;
147 | }
148 | 
149 | void CartPole::update(double deltaTime, double force) {
150 |     int n = 10;
151 |     double h = deltaTime/n;
152 |     for( int i = 0 ; i < n ; i++ ) {
153 |         step(h, force);
154 |     }
155 |     loop_rate->sleep();
156 | }
157 | 
158 | double CartPole::act( Action * action ) {
159 | 
160 |     double force = 0.0 ;
161 | 
162 |     if ( action->continuous ) {
163 |         // This version of cart pole scales continuous actions in [-1.0, 1.0] to the nearest
164 |         // discrete action in the set { -10, -9, ..., 9, 10 }
165 | 
166 |         force = action->continuousAction[0] ;
167 | 
168 |         if ( force < -1 ) {
169 |             force = -1.0 ;
170 |         } else if ( force > 1 ) {
171 |             force = 1.0 ;
172 |         }
173 | 
174 |         force += 1.0 ;                  // range: [0.0, 2.0]
175 |         force *= 0.5 ;                  // range: [0.0, 1.0]
176 |         force *= numberOfActions - 1 ;  // range: [0.0, numberOfActions - 1]
177 |         force = floor( force + 0.5 ) ;    // set: { 0, 1, ..., numberOfActions - 1 }
178 | 
179 |     } else if ( action->discrete ) {
180 | 
181 |         force = action->discreteAction ;
182 |     }
183 | 
184 |     //Scale force from { 0, 1, ..., numberOfActions - 1 } to { -10, 10 }:
185 |     force /= numberOfActions - 1 ;      // set: {0.0, 1/(numberOfActions-1), ..., 1.0 }
186 |     force *= MAXACTION - MINACTION ;    // set: {0.0, 20.0/(numberOfActions-1), ..., 20.0 }
187 |     force += MINACTION ;                // set: {-10.0, -10.0 + 20.0/(numberOfActions-1), ..., 10.0 }
188 | 
189 |     return act( force ) ;
190 | 
191 | }
192 | 
193 | double CartPole::act( double force ) {
194 | 
195 |     update( t, force );
196 |     double r = reward();
197 | 
198 |     if (r < 0) {
199 |         reset();
200 |     } else {
201 |         eoe = false ;
202 |     }
203 |     return r;
204 | }
205 | 
206 | bool CartPole::endOfEpisode() {
207 |     return eoe ;
208 | }
209 | 
210 | void CartPole::getState( State * state ) {
211 | 
212 |     state->continuousState[0] = phi /maxPhi;
213 |     state->continuousState[1] = dphi/maxDPhi;
214 |     state->continuousState[2] = x   /maxX;
215 |     state->continuousState[3] = dx  /maxDX;
216 | 
217 | }
218 | 
219 | void CartPole::setState( State * state ) {
220 | 
221 |     phi     = state->continuousState[0]*maxPhi;
222 |     dphi    = state->continuousState[1]*maxDPhi;
223 |     x       = state->continuousState[2]*maxX;
224 |     dx      = state->continuousState[3]*maxDX;
225 | 
226 | }
227 | 
228 | bool CartPole::getDiscreteStates() {
229 |     return false ;
230 | }
231 | 
232 | bool CartPole::getContinuousStates() {
233 |     return true ;
234 | }
235 | 
236 | int  CartPole::getStateDimension() {
237 |     return stateDimension ;
238 | }
239 | 
240 | bool CartPole::getDiscreteActions() {
241 |     return true ;
242 | }
243 | 
244 | bool CartPole::getContinuousActions() {
245 |     return true ;
246 | }
247 | 
248 | int  CartPole::getActionDimension() {
249 |     return actionDimension ;
250 | }
251 | 
252 | int  CartPole::getNumberOfActions() {
253 |     return numberOfActions;
254 | }
255 | 
256 | const char * CartPole::getName() {
257 |     return "CartPole" ;
258 | }
259 | 
260 | #endif //CARTPOLE
261 | 


--------------------------------------------------------------------------------
/CartPole.h:
--------------------------------------------------------------------------------
 1 | #ifndef CARTPOLE_H
 2 | #define CARTPOLE_H
 3 | # include "State.h"
 4 | # include "StateActionUtils.h"
 5 | # include "World.h"
 6 | #include <std_msgs/Float32MultiArray.h>
 7 | #include <ros/ros.h>
 8 | 
 9 | class CartPole : public World {
10 |     public:
11 |         CartPole();
12 |         ~CartPole();
13 | 
14 |         double act( Action * );
15 |         void getState( State * );
16 |         void setState( State * );
17 |         bool endOfEpisode();
18 |         void reset();
19 | 
20 |         bool getDiscreteStates() ;
21 |         bool getContinuousStates() ;
22 |         int  getStateDimension() ;
23 |         bool getDiscreteActions() ;
24 |         bool getContinuousActions() ;
25 |         int  getActionDimension() ;
26 |         int  getNumberOfActions() ;
27 | 
28 |         const char * getName() ;
29 | 
30 |     private:
31 |         double act( double ) ;
32 |         void accel();
33 |         void step( double, double );
34 |         void update(double , double );
35 |         double reward();
36 |         double phi, dphi, x, dx;
37 |         double maxPhi, maxDPhi, maxX, maxDX;
38 |         double MINACTION, MAXACTION;
39 |         double Mc, mp, l, t, g;
40 |         double totalM, ml, NORMANGLE;
41 |         bool eoe ;
42 | 	ros::Rate *loop_rate;
43 | 	ros::Publisher pub_motor;
44 | 	ros::Publisher pub_sensor1;
45 | 	ros::Publisher pub_sensor2;
46 | };
47 | 
48 | #endif // CARTPOLE_H
49 | 


--------------------------------------------------------------------------------
/CartPoleCfg:
--------------------------------------------------------------------------------
 1 | nExperiments        10
 2 | 
 3 | 
 4 | steps
 5 | nTrainSteps                 30000
 6 | trainStorePer               3000
 7 | nTestSteps                  15000
 8 | testStorePer                15000
 9 | nTrainEpisodes              0
10 | nTestEpisodes               0
11 | nMaxStepsPerTrainEpisode    0
12 | nMaxStepsPerTestEpisode     0
13 | 
14 | algorithm
15 | algorithms          Q Sarsa QV Acla Cacla
16 | tau                 0.001 0.01 0.1 1.0 10.0
17 | epsilon             0.001 0.01 0.1 1.0
18 | sigma               0.01 0.1 1.0 10.0
19 | 
20 | learningRates
21 | decreaseType        none
22 | nLearningRates      3
23 | learningRate        0.0001 0.001 0.01 0.1
24 | learningRate        0.0    0.001 0.01 0.1
25 | learningRate        0.0    0.001 0.01 0.1
26 | 
27 | discount
28 | gamma               0.99
29 | 
30 | nn
31 | nHiddenQ            20
32 | nHiddenV            20
33 | 


--------------------------------------------------------------------------------
/CartPoleCfgCacla:
--------------------------------------------------------------------------------
 1 | nExperiments        10
 2 | 
 3 | 
 4 | steps
 5 | nTrainSteps                 30000
 6 | trainStorePer               3000
 7 | nTestSteps                  15000
 8 | testStorePer                15000
 9 | nTrainEpisodes              0
10 | nTestEpisodes               0
11 | nMaxStepsPerTrainEpisode    0
12 | nMaxStepsPerTestEpisode     0
13 | 
14 | algorithm
15 | algorithms          Cacla
16 | tau                 0.001 0.01 0.1 1.0 10.0
17 | epsilon             0.001 0.01 0.1 1.0
18 | sigma               0.01 0.1 1.0 10.0
19 | 
20 | learningRates
21 | decreaseType        none
22 | nLearningRates      3
23 | learningRate        0.0001 0.001 0.01 0.1
24 | learningRate        0.0    0.001 0.01 0.1
25 | learningRate        0.0    0.001 0.01 0.1
26 | 
27 | discount
28 | gamma               0.99
29 | 
30 | nn
31 | nHiddenQ            20
32 | nHiddenV            20
33 | 


--------------------------------------------------------------------------------
/CartPoleCfgCaclaSimple:
--------------------------------------------------------------------------------
 1 | nExperiments        10
 2 | 
 3 | 
 4 | steps
 5 | nTrainSteps                 30000
 6 | trainStorePer               3000
 7 | nTestSteps                  15000
 8 | testStorePer                15000
 9 | nTrainEpisodes              0
10 | nTestEpisodes               0
11 | nMaxStepsPerTrainEpisode    0
12 | nMaxStepsPerTestEpisode     0
13 | 
14 | algorithm
15 | algorithms          Cacla
16 | tau
17 | epsilon
18 | sigma               0.01 0.1 1.0 10.0
19 | 
20 | learningRates
21 | decreaseType        none
22 | nLearningRates      3
23 | learningRate        0.0001 0.001 0.01 0.1
24 | learningRate        0.0    0.001 0.01 0.1
25 | learningRate        0.0    0.001 0.01 0.1
26 | 
27 | discount
28 | gamma               0.99
29 | 
30 | nn
31 | nHiddenQ            20
32 | nHiddenV            20
33 | 


--------------------------------------------------------------------------------
/CartPoleInterface.cpp:
--------------------------------------------------------------------------------
 1 | # ifndef CARTPOLEINTERFACE
 2 | # define CARTPOLEINTERFACE
 3 | 
 4 | # include "CartPoleInterface.h"
 5 | # include "CartPole.h"
 6 | 
 7 | CartPoleInterface::CartPoleInterface( const char * pFile ) {
 8 | 
 9 |     parameterFile = pFile ;
10 | 
11 |     parameterPath.assign( parameterFile ) ;
12 | 
13 |     readParameterFile() ;
14 | 
15 | }
16 | 
17 | int main( int argn, char *argv[] ) {
18 | 
19 |     if ( argn != 2 ) {
20 | 
21 |         cout << "You need to specify exactly one configuration (parameter) file." << endl ;
22 |         return 0 ;
23 | 
24 |     }
25 | 
26 |     CartPoleInterface * interface = new CartPoleInterface( argv[1] ) ;
27 | 
28 |     CartPole * cartpole = new CartPole() ;
29 | 
30 |     interface->getParametersAndRunExperiments( cartpole ) ;
31 | 
32 |     delete cartpole ;
33 |     delete interface ;
34 | }
35 | 
36 | # endif //CARTPOLEINTERFACE
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/CartPoleInterface.h:
--------------------------------------------------------------------------------
 1 | # ifndef CARTPOLEINTERFACE_H
 2 | # define CARTPOLEINTERFACE_H
 3 | 
 4 | # include "Experiment.h"
 5 | # include "CartPole.h"
 6 | 
 7 | class CartPoleInterface : public Experiment {
 8 |     public:
 9 |         CartPoleInterface( const char * parameterFile ) ;
10 |         ~CartPoleInterface() {};
11 | 
12 | };
13 | 
14 | # endif //CARTPOLEINTERFACE_H
15 | 


--------------------------------------------------------------------------------
/Experiment.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef EXPERIMENT
  2 | #define EXPERIMENT
  3 | # include "Experiment.h"
  4 | 
  5 | #include <ros/ros.h>
  6 | 
  7 | using namespace std;
  8 | 
  9 | bool Experiment::initializeState( State * state, Algorithm * algorithm, World * world ) {
 10 | 
 11 |     if ( algorithm->getContinuousStates() and world->getContinuousStates() ) {
 12 | 
 13 |         state->stateDimension   = world->getStateDimension() ;
 14 |         state->continuousState  = new double[ state->stateDimension ] ;
 15 |         state->continuous       = true ;
 16 |         state->discrete         = false ;
 17 | 
 18 |     } else if ( algorithm->getDiscreteStates() and world->getDiscreteStates() ) {
 19 | 
 20 |         state->numberOfStates   = world->getNumberOfStates() ;
 21 |         state->continuous       = false ;
 22 |         state->discrete         = true ;
 23 | 
 24 |     } else {
 25 | 
 26 |         cout << "World " << world->getName() << " and algorithm " << algorithm->getName() << " are incompatible, because of different state types (discrete vs continuous). Skipping..." << endl ;
 27 | 
 28 |         return true ;
 29 | 
 30 |     }
 31 | 
 32 |     return false ;
 33 | 
 34 | }
 35 | 
 36 | bool Experiment::initializeAction( Action * action, Algorithm * algorithm, World * world ) {
 37 | 
 38 |     if ( algorithm->getContinuousActions() and world->getContinuousActions() ) {
 39 | 
 40 |         action->actionDimension     = world->getActionDimension() ;
 41 |         action->continuousAction    = new double[ action->actionDimension ] ;
 42 |         action->continuous          = true ;
 43 |         action->discrete            = false ;
 44 | 
 45 |     } else if ( algorithm->getDiscreteActions() and world->getDiscreteActions() ) {
 46 | 
 47 |         action->numberOfActions     = world->getNumberOfActions() ;
 48 |         action->continuous          = false ;
 49 |         action->discrete            = true ;
 50 | 
 51 |     } else {
 52 | 
 53 |         cout << "World " << world->getName() << " and algorithm " << algorithm->getName() << " are incompatible, because of different action types (discrete vs continuous). Skipping..." << endl ;
 54 | 
 55 |         return true ;
 56 | 
 57 |     }
 58 | 
 59 |     return false ;
 60 | 
 61 | }
 62 | 
 63 | Experiment::~Experiment() {
 64 | 
 65 | }
 66 | 
 67 | 
 68 | void Experiment::setAlgorithm( string algorithmName, World * world ) {
 69 | 
 70 |     if ( algorithmName.compare("Q") == 0 ) {
 71 | 
 72 |         algorithm = new Qlearning( parameterFile, world ) ;
 73 | 
 74 |     } else if ( algorithmName.compare("Sarsa") == 0 ) {
 75 | 
 76 |         algorithm = new Sarsa( parameterFile, world ) ;
 77 | 
 78 |     } else if ( algorithmName.compare("QV") == 0 ) {
 79 | 
 80 |         algorithm = new QVlearning( parameterFile, world ) ;
 81 | 
 82 |     } else if ( algorithmName.compare("Acla") == 0 ) {
 83 | 
 84 |         algorithm = new Acla( parameterFile, world ) ;
 85 | 
 86 |     } else if ( algorithmName.compare("Cacla") == 0 ) {
 87 | 
 88 |         algorithm = new Cacla( parameterFile, world ) ;
 89 | 
 90 |     } else {
 91 | 
 92 |         cout << "Unknown algorithm: " << algorithmName << endl ;
 93 |         exit(-1) ;
 94 | 
 95 |     }
 96 | 
 97 | }
 98 | 
 99 | void Experiment::getParametersAndRunExperiments( World * world ) {
100 |     int nTau = taus.size() ;
101 |     int nEpsilon = epsilons.size() ;
102 |     int nSigma = sigmas.size() ;
103 | 
104 |     int nAlgorithms = algorithms.size() ;
105 | 
106 |     int nExploration = nTau + nEpsilon + nSigma ;
107 | 
108 |     int nTotalLearningRates = 1 ;
109 | 
110 |     for ( unsigned int l = 0 ; l < learningRates.size() ; l++ ){
111 | 
112 |         nTotalLearningRates *= learningRates[l].size() ;
113 | 
114 |     }
115 | 
116 |     int nTotalExperiments = nExperiments*nAlgorithms*nExploration*nTotalLearningRates*gammas.size() ;
117 | 
118 |     cout << "nTotalExperiments " << nTotalExperiments << endl ;
119 | 
120 |     bool printedWarning1 = false ;
121 | 
122 |     for ( int i = 0 ; i < nTotalExperiments ; i++ ) {
123 | 
124 |         bool skip = false ;
125 | 
126 |         int N = i ;
127 | 
128 |         algorithmName     = algorithms[ N % nAlgorithms ] ;
129 |         N       /= nAlgorithms ;
130 | 
131 |         setAlgorithm( algorithmName, world ) ;
132 | 
133 |         State s1, s2 ;
134 |         Action a1, a2 ;
135 | 
136 |         state       = &s1 ;
137 |         nextState   = &s2 ;
138 |         action      = &a1 ;
139 |         nextAction  = &a2 ;
140 | 
141 |         bool f1 = initializeState( state, algorithm, world ) ;
142 |         bool f2 = initializeState( nextState, algorithm, world ) ;
143 |         bool f3 = initializeAction( action, algorithm, world ) ;
144 |         bool f4 = initializeAction( nextAction, algorithm, world ) ;
145 | 
146 |         skip = f1 or f2 or f3 or f4 ;
147 | 
148 |         unsigned int nAlgorithmLearningRates = algorithm->getNumberOfLearningRates() ;
149 | 
150 |         learningRate = new double[ nAlgorithmLearningRates ] ;
151 | 
152 |         for ( unsigned int l = 0 ; l < learningRates.size() ; l++ ) {
153 | 
154 |             unsigned int L = learningRates[l].size() ;
155 | 
156 |             if ( l < nAlgorithmLearningRates ) {
157 | 
158 |                 learningRate[l] = learningRates[l][ N % L ] ;
159 | 
160 |             } else if ( N % L > 0 ) {
161 |                 //if we have more different learning rates than used (i.e. a second
162 |                 //or third learning rate for an algorithm that only uses one, such as Sarsa)
163 |                 //we skip all redundant experiments.
164 | 
165 |                 skip = true ;
166 | 
167 |             }
168 | 
169 |             N  /= L ;
170 | 
171 |         }
172 | 
173 |         for ( unsigned int l = learningRates.size() ; l < nAlgorithmLearningRates ; l++ ) {
174 |             //In case that there are less learning rates specified than needed
175 |             //for the current algorithm, reuse the first learning rate for all
176 |             //missing values.
177 | 
178 |             if ( not printedWarning1 ) {
179 |                 cout << "Warning: underspecified number of learning rates for algorithm " << algorithmName << ". " ;
180 |                 cout << "(" << learningRates.size() << " learning rate(s) specified, " << nAlgorithmLearningRates << " needed.)" << endl ;
181 |                 cout << "Using first specified learning rate for missing values." << endl ;
182 |                 printedWarning1 = true ;
183 |             }
184 | 
185 |             learningRate[l] = learningRate[0] ;
186 | 
187 |         }
188 | 
189 |         int exp = N % nExploration ;
190 | 
191 |         if ( exp < nTau ) {
192 | 
193 |             boltzmann   = true ;
194 |             tau         = taus[ N % nTau ] ;
195 | 
196 |             egreedy     = false ;
197 |             epsilon     = 0.0 ;
198 | 
199 |             gaussian    = false ;
200 |             sigma       = 0.0 ;
201 | 
202 | 
203 |         } else if ( exp < (nTau + nEpsilon) ) {
204 | 
205 |             boltzmann   = false ;
206 |             tau         = 0.0 ;
207 | 
208 |             egreedy     = true;
209 |             epsilon     = epsilons[ ( N - nTau ) % nEpsilon ] ;
210 | 
211 |             gaussian    = false ;
212 |             sigma       = 0.0 ;
213 | 
214 |         } else {
215 | 
216 |             boltzmann   = false ;
217 |             tau         = 0.0 ;
218 | 
219 |             egreedy     = false ;
220 |             epsilon     = 0.0 ;
221 | 
222 |             gaussian    = true ;
223 |             sigma       = sigmas[ ( N - nTau - nEpsilon ) % nSigma ] ;
224 | 
225 |         }
226 | 
227 |         if ( algorithmName.compare("Cacla") == 0 ) {
228 | 
229 |             if ( boltzmann ) {
230 |                 //~ cout << "Boltzmann exploration is undefined for Cacla. Skipping..." << endl ;
231 |                 skip = true ;
232 |             }
233 | 
234 |         } else {
235 | 
236 |             if ( gaussian ) {
237 |                 //~ cout << "Gaussian exploration is as of yet undefined for " << algorithmName << ". Skipping..." << endl ;
238 |                 skip = true ;
239 |             }
240 |         }
241 | 
242 |         if ( not skip ) {
243 |             //At this point only skips when:
244 |             // 1) considering learning rates that aren't used by
245 |             //    the current algorithm.
246 |             // 2) initialization of actions and/or states failed.
247 |             //    This happens when the MDP only allows discrete
248 |             //    or continuous spaces and the algorithm vice-
249 |             //    versa.
250 |             //Later may add skipping in other cases, such as when
251 |             //an incompatible exploration scheme is used (such as
252 |             //boltzmann for Cacla).
253 | 
254 |             N       /= nExploration ;
255 | 
256 |             gamma   = gammas[ N % gammas.size() ] ;
257 |             N       /= gammas.size() ;
258 | 
259 |             ostringstream os ;
260 | 
261 |             os << parameterPath << "_d/" ;
262 | 
263 |             os << algorithmName << '_' ;
264 |             if ( boltzmann ) {
265 |                 os << "B" << tau << '_' ;
266 |             } else if ( egreedy ) {
267 |                 os << "E" << epsilon << '_' ;
268 |             } else if ( gaussian ) {
269 |                 os << "G" << sigma << '_' ;
270 |             }
271 |             os << 'L' ;
272 |             for ( unsigned int l = 0 ; l < nAlgorithmLearningRates ; l++ ) {
273 |                 os << learningRate[l] << '_' ;
274 |             }
275 |             os << 'D' ;
276 |             os << gamma  ;
277 | 
278 |             cout << "Storing in :  " << os.str() << endl ;
279 | 
280 |             train               = true ;
281 |             nSteps              = nTrainSteps ;
282 |             nEpisodes           = nTrainEpisodes ;
283 |             nMaxStepsPerEpisode = nMaxStepsPerTrainEpisode ;
284 |             nResults            = nTrainResults ;
285 | 
286 | 	    
287 |             double * results    = runExperiment( world ) ;
288 | 
289 |             ostringstream ostemp ("") ;
290 |             ostemp << os.str().c_str() << ".onresults" ;
291 |             storeRewards( (ostemp.str()).c_str(), results, nTrainResults ) ;
292 | 
293 |             cout << "Final train results: " << results[ nTrainResults - 1 ] << endl ;
294 |             delete [] results ;
295 | 
296 |             train               = false ;
297 |             nSteps              = nTestSteps ;
298 |             nEpisodes           = nTestEpisodes ;
299 |             nMaxStepsPerEpisode = nMaxStepsPerTestEpisode ;
300 |             nResults            = nTestResults ;
301 | 
302 |             results             = runExperiment( world ) ;
303 | 
304 |             ostemp.str("") ;
305 |             ostemp << os.str().c_str() << ".offresults" ;
306 |             storeRewards( (ostemp.str()).c_str(), results, nTestResults ) ;
307 | 
308 |             cout << "Final test results:  " << results[ nTestResults - 1 ] << endl ;
309 |             delete [] results ;
310 | 	    if (!ros::ok()) {
311 | 	      cout << "ROS failed, exiting" << endl;
312 | 	      exit(1);
313 | 	    }
314 |         }
315 | 
316 |         delete [] learningRate;
317 | 
318 |         if ( state->continuous ) {
319 |             delete [] state->continuousState ;
320 |             delete [] nextState->continuousState ;
321 |         }
322 | 
323 |         if ( action->continuous ) {
324 |             delete [] action->continuousAction ;
325 |             delete [] nextAction->continuousAction ;
326 |         }
327 | 
328 |         delete algorithm ;
329 | 
330 |     }
331 | 
332 |     //delete world ;
333 | 
334 | }
335 | 
336 | void Experiment::storeRewards( const char * filepath, double * rewards, int nRewards ) {
337 | 
338 |     ifstream ifile ;
339 | 
340 |     ifile.open( filepath, ifstream::in ) ;
341 | 
342 |     int N = 0 ;
343 | 
344 |     if (ifile) {
345 |         ifile >> N ;
346 |     }
347 | 
348 |     ofstream ofile ;
349 | 
350 |     ofile.open( filepath, ofstream::out ) ;
351 |     ofile << (N+1)  << '\n' ;
352 | 
353 |     double last, lastvar, lastdif ;
354 | 
355 |     for ( int e = 0 ; e < nRewards ; e++ ) {
356 | 
357 | 
358 |         if ( N > 0 ) {
359 | 
360 |             ifile >> last ;
361 |             ifile >> lastvar ;
362 | 
363 |             lastdif = (rewards[e] - last) ;
364 |             lastvar = N*lastvar/(N+1.0) + lastdif*lastdif/N ;
365 | 
366 |             last    = (N*last + rewards[e])/(N+1.0) ;
367 | 
368 |         } else {
369 | 
370 |             lastvar = 0 ;
371 |             last    = rewards[e] ;
372 | 
373 |         }
374 | 
375 |         ofile << last << " " << lastvar << '\n' ;
376 | 
377 |     }
378 | 
379 |     ifile.close() ;
380 | 
381 |     ofile.close() ;
382 | }
383 | 
384 | double * Experiment::runExperiment( World * world ) {
385 | 
386 |   Action * actions = new Action[2] ; //For Sarsa, that needs both the present and next action.
387 | 
388 |   double reward ;
389 | 
390 |   //* Training *//
391 |   double * results = new double[ nResults ] ;
392 | 
393 |   int episode = 0 ;
394 |   int step = 0 ;
395 |   int result = 0 ;
396 |   double rewardSum = 0.0 ;
397 | 
398 |   world->reset() ;
399 | 
400 |   world->getState( state ) ;
401 | 
402 |   explore( state, action ) ;
403 | 
404 |   endOfEpisode = true ;
405 | 
406 |   int storePer ;
407 | 
408 |   if ( train ) {
409 |     storePer = trainStorePer ;
410 |   } else {
411 |     storePer = testStorePer ;
412 |   }
413 | 
414 |   for ( step = 0 ; (step < nSteps) and (episode < nEpisodes) ; step++ ) {
415 | 
416 |     if(!ros::ok()) return results;
417 |     reward = world->act( action ) ;
418 | 
419 |     world->getState( nextState ) ;
420 | 
421 |     explore( nextState, nextAction ) ;
422 | 
423 |     rewardSum += reward ;
424 | 
425 |     endOfEpisode = world->endOfEpisode() ;
426 | 
427 |     if ( train ) {
428 | 
429 |       if ( algorithmName.compare("Sarsa") == 0 ) {
430 | 
431 | 	actions[0] = *action ;
432 | 	actions[1] = *nextAction ;
433 | 	algorithm->update( state, actions, reward, nextState, endOfEpisode, learningRate, gamma ) ;
434 | 
435 |       } else {
436 | 
437 | 	algorithm->update( state, action, reward, nextState, endOfEpisode, learningRate, gamma ) ;
438 | 
439 |       }
440 | 
441 |     }
442 | 
443 |     copyState( nextState, state ) ;
444 |     copyAction( nextAction, action ) ;
445 | 
446 |     if ( endOfEpisode ) {
447 | 
448 |       episode++ ;
449 | 
450 |     }
451 | 
452 |     // Store results :
453 |     bool store = false ;
454 | 
455 |     if ( storePerEpisode and ( episode % storePer == 0 ) and endOfEpisode ) {
456 | 
457 |       store = true ;
458 | 
459 |     } else if ( storePerStep and ( (step + 1) % storePer == 0 ) ) {
460 | 
461 |       store = true ;
462 | 
463 |     }
464 | 
465 |     if ( store ) {
466 | 
467 |       results[ result ] = rewardSum/storePer ;
468 |       rewardSum = 0.0 ;
469 |       result++ ;
470 | 
471 |     }
472 | 
473 |   }
474 | 
475 |   delete [] actions ;
476 | 
477 |   return results ;
478 | 
479 | }
480 | 
481 | void Experiment::explore( State * state, Action * action ) {
482 | 
483 |     if ( not train ) {
484 | 
485 |         algorithm->getMaxAction( state, action ) ;
486 | 
487 |     } else if ( boltzmann ) {
488 | 
489 |         algorithm->explore( state, action, tau, "boltzmann", endOfEpisode ) ;
490 | 
491 |     } else if ( egreedy ) {
492 | 
493 |         algorithm->explore( state, action, epsilon, "egreedy", endOfEpisode ) ;
494 | 
495 |     } else if ( gaussian ) {
496 | 
497 |         algorithm->explore( state, action, sigma, "gaussian", endOfEpisode ) ;
498 | 
499 |     } else {
500 | 
501 |         algorithm->getMaxAction( state, action ) ;
502 | 
503 |     }
504 | 
505 | }
506 | 
507 | void Experiment::read_moveTo( ifstream * ifile, string label ) {
508 |     string temp ;
509 | 
510 |     while ( temp.compare( label ) != 0 and ifile ) {
511 | 
512 |         *ifile >> temp ;
513 | 
514 |         if ( ifile->eof() ) {
515 |             cout << "Read error: Could not find label '" << label << "' while reading parameter file '" << parameterFile << "'" << endl ;
516 |             exit(0) ;
517 |         }
518 | 
519 |     }
520 | }
521 | 
522 | vector< double > Experiment::read_doubleArray( string temp ) {
523 | 
524 |     vector< double > parameters ;
525 | 
526 |     istringstream iss( temp ) ;
527 | 
528 |     double parameter ;
529 | 
530 |     while( iss >> parameter ) {
531 | 
532 |         parameters.push_back( parameter ) ;
533 | 
534 |     }
535 | 
536 |     return parameters ;
537 | 
538 | }
539 | 
540 | void Experiment::readParameterFile( ) {
541 | 
542 |     ifstream ifile ;
543 | 
544 |     ifile.open( parameterFile, ifstream::in ) ;
545 | 
546 |     string temp ;
547 | 
548 |     read_moveTo( &ifile, "nExperiments" ) ;
549 |     ifile >> nExperiments ;
550 | 
551 |     read_moveTo( &ifile, "steps" ) ;
552 | 
553 |     read_moveTo( &ifile, "nTrainSteps" ) ;
554 |     ifile >> nTrainSteps ;
555 | 
556 |     read_moveTo( &ifile, "trainStorePer" ) ;
557 |     ifile >> trainStorePer ;
558 | 
559 |     read_moveTo( &ifile, "nTestSteps" ) ;
560 |     ifile >> nTestSteps ;
561 | 
562 |     read_moveTo( &ifile, "testStorePer" ) ;
563 |     ifile >> testStorePer ;
564 | 
565 |     read_moveTo( &ifile, "nTrainEpisodes" ) ;
566 |     ifile >> nTrainEpisodes ;
567 | 
568 |     read_moveTo( &ifile, "nTestEpisodes" ) ;
569 |     ifile >> nTestEpisodes ;
570 | 
571 |     read_moveTo( &ifile, "nMaxStepsPerTrainEpisode" ) ;
572 |     ifile >> nMaxStepsPerTrainEpisode ;
573 | 
574 |     read_moveTo( &ifile, "nMaxStepsPerTestEpisode" ) ;
575 |     ifile >> nMaxStepsPerTestEpisode ;
576 | 
577 | 
578 |     if ( nTrainSteps == 0 ) {
579 | 
580 |         nTrainSteps     = nTrainEpisodes*nMaxStepsPerTrainEpisode ;
581 |         nTrainResults   = nTrainSteps/trainStorePer ;
582 |         storePerStep    = false ;
583 |         storePerEpisode = true ;
584 | 
585 |     } else if ( nMaxStepsPerTrainEpisode == 0 ) {
586 | 
587 |         nTrainEpisodes  = nTrainSteps ;
588 |         nTrainResults   = nTrainSteps/trainStorePer ;
589 |         storePerStep    = true ;
590 |         storePerEpisode = false ;
591 | 
592 |     }
593 | 
594 |     if ( nTestSteps == 0 ) {
595 | 
596 |         nTestSteps     = nTestEpisodes*nMaxStepsPerTestEpisode ;
597 |         nTestResults   = nTestSteps/testStorePer ;
598 |         storePerStep    = false ;
599 |         storePerEpisode = true ;
600 | 
601 |     } else if ( nMaxStepsPerTestEpisode == 0 ) {
602 | 
603 |         nTestEpisodes  = nTestSteps ;
604 |         nTestResults   = nTestSteps/testStorePer ;
605 |         storePerStep    = true ;
606 |         storePerEpisode = false ;
607 | 
608 |     }
609 | 
610 | 
611 |     read_moveTo( &ifile, "algorithm" ) ;
612 | 
613 |     read_moveTo( &ifile, "algorithms" ) ;
614 |     getline( ifile, temp ) ;
615 |     istringstream iss( temp ) ;
616 | 
617 |     while ( iss >> temp ) {
618 |         algorithms.push_back( temp ) ;
619 |     }
620 | 
621 |     read_moveTo( &ifile, "tau" ) ;
622 |     getline( ifile, temp ) ;
623 |     taus = read_doubleArray( temp ) ;
624 | 
625 |     read_moveTo( &ifile, "epsilon" ) ;
626 |     getline( ifile, temp ) ;
627 |     epsilons = read_doubleArray( temp ) ;
628 | 
629 |     read_moveTo( &ifile, "sigma" ) ;
630 |     getline( ifile, temp ) ;
631 |     sigmas = read_doubleArray( temp ) ;
632 | 
633 |     read_moveTo( &ifile, "learningRates" ) ;
634 | 
635 |     read_moveTo( &ifile, "decreaseType" ) ;
636 |     ifile >> learningRateDecreaseType ;
637 | 
638 |     read_moveTo( &ifile, "nLearningRates" ) ;
639 |     ifile >> nLearningRates ;
640 | 
641 |     for ( int l = 0 ; l < nLearningRates ; l++ ) {
642 | 
643 |         read_moveTo( &ifile, "learningRate" ) ;
644 |         getline( ifile, temp ) ;
645 |         learningRates.push_back( read_doubleArray( temp ) ) ;
646 | 
647 |     }
648 | 
649 |     read_moveTo( &ifile, "discount" ) ;
650 | 
651 |     read_moveTo( &ifile, "gamma" ) ;
652 |     getline( ifile, temp ) ;
653 |     gammas = read_doubleArray( temp ) ;
654 | 
655 | }
656 | 
657 | #endif //EXPERIMENT
658 | 


--------------------------------------------------------------------------------
/Experiment.h:
--------------------------------------------------------------------------------
 1 | #ifndef EXPERIMENT_H
 2 | #define EXPERIMENT_H
 3 | 
 4 | # include <iostream>
 5 | # include <string>
 6 | # include <sstream>
 7 | # include <vector>
 8 | 
 9 | # include "State.h"
10 | # include "Action.h"
11 | # include "World.h"
12 | # include "Algorithm.h"
13 | 
14 | # include "Qlearning.h"
15 | # include "QVlearning.h"
16 | # include "Sarsa.h"
17 | # include "Acla.h"
18 | # include "Cacla.h"
19 | 
20 | class Experiment {
21 |     
22 |     public:
23 |         virtual ~Experiment() ;
24 |         bool initializeState( State * state, Algorithm * algorithm, World * world ) ;
25 |         bool initializeAction( Action * action, Algorithm * algorithm, World * world ) ;
26 |         void getParametersAndRunExperiments( World * world ) ;
27 |         
28 |     protected:
29 |         double * runExperiment( World * world ) ;
30 |         void explore( State * state, Action * action ) ;
31 |         void storeRewards( const char *, double * , int ) ;
32 |         void setAlgorithm( string algorithmName, World * world ) ;
33 |         void readParameterFile( ) ;
34 |         void read_moveTo( ifstream *, string ) ;
35 |         vector< double > read_doubleArray( string ) ;
36 |         
37 |         Algorithm * algorithm ;
38 |         World * world ;
39 |         
40 |         State * state ;
41 |         State * nextState ;
42 |         Action * action ;
43 |         Action * nextAction ;
44 |         
45 |         int nExperiments, nAlgorithms ;
46 |         int nSteps, nEpisodes, nMaxStepsPerEpisode, nResults ;
47 |         int nTrainSteps, nTrainEpisodes, nMaxStepsPerTrainEpisode, nTrainResults, trainStorePer ;
48 |         int nTestSteps, nTestEpisodes, nMaxStepsPerTestEpisode, nTestResults, testStorePer ;
49 |         int stateDimension, actionDimension ;
50 |         bool discreteStates, discreteActions, endOfEpisode ;
51 |         bool storePerStep, storePerEpisode ;
52 |         bool boltzmann, egreedy, gaussian ;
53 |         int nLearningRates ;
54 |         
55 |         const char * parameterFile ;
56 |         string parameterPath ;
57 |         string algorithmName ;
58 |         vector< string > algorithms ;
59 |         vector< double > taus ;
60 |         vector< double > epsilons ;
61 |         vector< double > sigmas ;
62 |         string learningRateDecreaseType ;
63 |         double * learningRate ;
64 |         vector< vector < double > >  learningRates ;
65 |         vector< double >  gammas ;
66 |         double tau, epsilon, sigma, gamma ;
67 |         
68 |         bool train ;
69 |         
70 |     
71 | };
72 | 
73 | #endif //EXPERIMENT_H
74 | 
75 | 


--------------------------------------------------------------------------------
/List.h:
--------------------------------------------------------------------------------
 1 | #ifndef LIST
 2 | #define LIST
 3 | 
 4 | struct List {
 5 |     int length;
 6 |     double * contents;
 7 | };
 8 | 
 9 | #endif //LIST
10 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CFLAGS=-Wall -O4
 2 | 
 3 | # World.h
 4 | UTILFILES = StateActionUtils.cpp cNeuralNetwork.cpp Algorithm.cpp StateActionAlgorithm.cpp Experiment.cpp
 5 | ALGORITHMS = Sarsa.cpp Qlearning.cpp QVlearning.cpp Acla.cpp Cacla.cpp
 6 | CPWORLD = CartPole.cpp
 7 | CPINTERFACE = CartPoleInterface.cpp
 8 | SMALLWORLD = SmallMaze.cpp
 9 | SMALLINTERFACE = SmallInterface.cpp
10 | ARMWORLD = Arm.cpp
11 | ARMINTERFACE = ArmInterface.cpp
12 | 
13 | LDFLAGS = -L/usr/lib -L/usr/lib/gcc/x86_64-linux-gnu/4.8 -lstdc++
14 | # ROS_DEFS = -DUSE_ROS
15 | ROS_LDFLAGS = -L/opt/ros/jade/lib -lroscpp -lrosconsole -lroscpp_serialization -lroslib -lrostime
16 | # ROS_LDFLAGS = -L/home/src/ros/hydro/catkin_ws/install_isolated/lib -lroscpp -lrosconsole -lroscpp_serialization -lroslib -lrostime
17 | # ROS_LDFLAGS = -L/opt/ros/jade/lib -lroscpp_serialization -lrostime -l:/usr/lib/x86_64-linux-gnu/libboost_date_time.so -l:/usr/lib/x86_64-linux-gnu/libboost_system.so -l:/usr/lib/x86_64-linux-gnu/libboost_thread.so -l:/usr/lib/x86_64-linux-gnu/libpthread.so -lcpp_common -l:/usr/lib/x86_64-linux-gnu/libboost_system.so -l:/usr/lib/x86_64-linux-gnu/libboost_thread.so -l:/usr/lib/x86_64-linux-gnu/libpthread.so -l:/usr/lib/x86_64-linux-gnu/libconsole_bridge.so 
18 | # ROS_LDFLAGS = -L/opt/ros/jade/lib -lroscpp_serialization -lrostime -l:/usr/lib/x86_64-linux-gnu/libboost_date_time.so -l:/usr/lib/x86_64-linux-gnu/libboost_system.so -l:/usr/lib/x86_64-linux-gnu/libboost_thread.so -l:/usr/lib/x86_64-linux-gnu/libpthread.so -lcpp_common -l:/usr/lib/x86_64-linux-gnu/libboost_system.so -l:/usr/lib/x86_64-linux-gnu/libboost_thread.so -l:/usr/lib/x86_64-linux-gnu/libpthread.so -l:/usr/lib/x86_64-linux-gnu/libconsole_bridge.so 
19 | 
20 | # objects := $(patsubst %.cpp,%.o,$(wildcard *.cpp))
21 | objects := $(patsubst %.cpp,%.o,$(UTILFILES))
22 | objects += $(patsubst %.cpp,%.o,$(ALGORITHMS))
23 | 
24 | cp_objs = $(patsubst %.cpp,%.o,$(CPWORLD))
25 | cp_objs += $(patsubst %.cpp,%.o,$(CPINTERFACE))
26 | 
27 | cp_objs_inc = $(patsubst %.cpp,%.h,$(CPWORLD))
28 | cp_objs_inc += $(patsubst %.cpp,%.h,$(CPINTERFACE))
29 | 
30 | arm_objs = $(patsubst %.cpp,%.o,$(ARMWORLD))
31 | arm_objs += $(patsubst %.cpp,%.o,$(ARMINTERFACE))
32 | 
33 | arm_objs_inc = $(patsubst %.cpp,%.h,$(ARMWORLD))
34 | arm_objs_inc += $(patsubst %.cpp,%.h,$(ARMINTERFACE))
35 | 
36 | all: cp small arm
37 | 
38 | # cp
39 | cp:
40 | 	c++ $(CFLAGS) $(UTILFILES) $(ALGORITHMS) $(CPWORLD) $(CPINTERFACE) $(ROS_INCLUDES) -o CartPole
41 | 	@echo
42 | 	@echo Done compiling CartPole.
43 | 	@echo Run with:    ./CartPole \'cfg\'
44 | 	@echo where \'cfg\' is a configuration file.
45 | 	@echo
46 | 
47 | cp_objs: $(cp_objs_inc)
48 | cp_ros: $(objects) $(cp_objs)
49 | # c++ $(CFLAGS) $(UTILFILES) $(ALGORITHMS) $(CPWORLD) $(CPINTERFACE) -o CartPole
50 | 	g++ $(objects) $(cp_objs) -o CartPoleRos $(LDFLAGS) $(ROS_LDFLAGS) $(ROS_INCLUDES)
51 | 	@echo
52 | 	@echo Done compiling CartPoleRos.
53 | 	@echo Run with:    ./CartPoleRos \'cfg\'
54 | 	@echo where \'cfg\' is a configuration file.
55 | 	@echo
56 | 
57 | # small
58 | small:
59 | 	c++ $(CFLAGS) $(UTILFILES) $(ALGORITHMS) $(SMALLWORLD) $(SMALLINTERFACE) -o SmallMaze
60 | 	@echo
61 | 	@echo Done compiling SmallMaze.
62 | 	@echo Run with:    ./SmallMaze \'cfg\'
63 | 	@echo where \'cfg\' is a configuration file.
64 | 	@echo
65 | 
66 | arm_objs: $(arm_objs_inc)
67 | 
68 | arm: $(objects) $(arm_objs)
69 | # c++ $(CFLAGS) $(ARM_LDFLAGS) $(UTILFILES) $(ALGORITHMS) $(ARMWORLD) $(ARMINTERFACE) -o Arm
70 | 	g++ $(objects) $(arm_objs) -o Arm $(LDFLAGS) $(ROS_LDFLAGS) $(ROS_INCLUDES)
71 | 	@echo
72 | 	@echo Done compiling Arm.
73 | 	@echo Run with:    ./Arm \'cfg\'
74 | 	@echo where \'cfg\' is a configuration file.
75 | 	@echo
76 | 
77 | # foo : $(objects)
78 | # 	ld -o foo $(ARM_LDFLAGS) $(objects)
79 | 
80 | clean:
81 | 	rm -rf Arm CartPole CartPoleRos SmallMaze
82 | 	rm -rf *.o
83 | 	rm -f *.out
84 | 	@echo done removing
85 | 
86 | 


--------------------------------------------------------------------------------
/Matrix.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef MATRIX
  2 | #define MATRIX
  3 | #include <iostream>
  4 | #include <cstdlib> 
  5 | using namespace std ;
  6 | 
  7 | #include "Matrix.h"
  8 | 
  9 | Matrix::Matrix() {
 10 |     _data = new double[1] ;
 11 |     _nRows = 0 ;
 12 |     _nCols = 0 ;
 13 | }
 14 | 
 15 | Matrix::Matrix(int size) {
 16 |     setSize( size ) ;
 17 | }
 18 | 
 19 | Matrix::Matrix(int nRows, int nCols) {
 20 |     setSize( nRows, nCols ) ;
 21 | }
 22 | 
 23 | Matrix::Matrix(int nRows, int nCols, double *data) {
 24 |     setSize( nRows, nCols ) ;
 25 |     for ( int r = 0 ; r < nRows ; r++ ){
 26 |         for ( int c = 0 ; c < nCols ; c++ ){
 27 |             _data[ r*nCols + c ] = data[ r*nCols + c ] ;
 28 |         }
 29 |     }
 30 | }
 31 | 
 32 | Matrix::~Matrix() {
 33 |     delete [] _data ;
 34 | }
 35 | 
 36 | void Matrix::setSize( int size ) {
 37 |     _size = size ;
 38 |     _data = new double[size] ;
 39 |     int x ;
 40 |     for ( x = 0 ; x < size ; x++ ){
 41 |         _data[ x ] = 0.0 ;
 42 |     }
 43 |     _nRows = size ;
 44 |     _nCols = 1 ;
 45 | }
 46 | 
 47 | void Matrix::setSize( int nRows, int nCols ) {
 48 |     _size = nRows * nCols ;
 49 |     _data = new double[ _size ] ;
 50 |     int x ;
 51 |     for ( x = 0 ; x < _size ; x++ ){
 52 |         _data[ x ] = 0.0 ;
 53 |     }
 54 |     _nRows = nRows ;
 55 |     _nCols = nCols ;
 56 | }
 57 | 
 58 | int Matrix::getSize( ) {
 59 |     return _size ; 
 60 | }
 61 | 
 62 | double * Matrix::getPtr(int i) {
 63 |     return (_data + i ) ;
 64 | }
 65 | 
 66 | double * Matrix::getPtr( ) {
 67 |     return (_data) ;
 68 | }
 69 | 
 70 | double Matrix::get(int i) const {
 71 |     return _data[ i ] ;
 72 | }
 73 | 
 74 | double Matrix::get(int i, int j) const {
 75 |     return _data[ i*_nCols + j ] ;
 76 | }
 77 | 
 78 | void Matrix::set(int i, double val) {
 79 |     _data[ i ] = val ;
 80 | }
 81 | 
 82 | void Matrix::set(int i, int j, double val) {
 83 |     _data[ i*_nCols + j ] = val ;
 84 | }
 85 | 
 86 | void Matrix::add(int i, double val) {
 87 |     _data[ i ] += val ;
 88 | }
 89 | 
 90 | void Matrix::add(int i, int j, double val) {
 91 |     _data[ i*_nCols + j ] += val ;
 92 | }
 93 | 
 94 | //~ void Matrix::increaseCols() {
 95 |     //~ nCols   = _nCols + 1 ;
 96 |     //~ _size  += _nRows ;
 97 |     //~ data    = new double[ _size ] ;
 98 |     
 99 |     //~ for ( int r = 0 ; r < _nRows ; r++ ){
100 |         //~ for ( int c = 0 ; c < _nCols ; c++ ){
101 |             //~ data[ r*nCols + c ] = _data[ r*_nCols + c ] ;
102 |         //~ }
103 |         //~ data[ r*nCols + _nCols ] = 0.0 ;
104 |     //~ }
105 |     
106 |     //~ delete [] _data ;
107 |     //~ _data = data ;
108 |     //~ _nCols = nCols
109 | //~ }
110 | 
111 | //~ void Matrix::increaseRows() {
112 |     //~ nRows   = _nRows + 1 ;
113 |     //~ _size  += _nCols ;
114 |     //~ data    = new double[ _size ] ;
115 |     
116 |     //~ for ( int r = 0 ; r < _nRows ; r++ ){
117 |         //~ for ( int c = 0 ; c < _nCols ; c++ ){
118 |             //~ data[ r*_nCols + c ] = _data[ r*_nCols + c ] ;
119 |         //~ }
120 |     //~ }
121 |     //~ for ( int c = 0 ; c < _nCols ; c++ ){
122 |         //~ data[ _nRows*_nCols + c ] = 0.0 ;
123 |     //~ }
124 |     
125 |     //~ delete [] _data ;
126 |     //~ _data = data ;
127 |     //~ _nRows = nRows
128 | //~ }
129 | 
130 | //~ Matrix Matrix::multiply( Matrix M, int nRows, int nCols ) {
131 |     //~ if ( _nCols != nRows ) {
132 |         //~ throw "Error in Matrix multiplication: _nCols != nRows." ;
133 |     //~ }
134 |     
135 |     //~ int i,j,k ;
136 |     //~ double sum ;
137 |     
138 |     //~ Matrix Result = new Matrix( _nRows, nCols ) ;
139 |     
140 |     //~ for ( i = 0 ; i < _nRows ; i++ ) {
141 |         //~ for ( j = 0 ; j < nCols ; j++ ) {
142 |             //~ sum = 0 ;
143 |             
144 |             //~ for ( k = 0 ; k < _nCols ; k++ ) {
145 |                //~ sum += get( i, k ) * M.get( k, j ) ;
146 |             //~ }
147 |             
148 |             //~ Result.set( i, j, sum ) ;
149 |         //~ }
150 |     //~ }
151 |     
152 |     //~ return Result ;
153 | //~ }
154 | 
155 | #endif //MATRIX
156 | 


--------------------------------------------------------------------------------
/Matrix.h:
--------------------------------------------------------------------------------
 1 |  #ifndef MATRIX_H
 2 | #define MATRIX_H
 3 | 
 4 | class Matrix {
 5 |     public:
 6 |         Matrix() ;
 7 |         Matrix( int  );
 8 |         Matrix( int , int );
 9 |         Matrix( int , int, double* );
10 |         ~Matrix();
11 | 
12 |         void setSize( int ) ;
13 |         void setSize( int, int ) ;
14 |         int getSize() ;
15 |     
16 |         double * getPtr( int i ) ;
17 |         double * getPtr() ;
18 |     
19 |         double get( int ) const ;
20 |         void set( int, double );
21 |         void add( int, double );
22 |         double get( int, int ) const ;
23 |         void set( int, int , double );
24 |         void add( int, int , double );
25 |         //~ Matrix multiply( Matrix, int, int );
26 | 
27 |     private:
28 |         int _nRows, _nCols, _size, _id;
29 |         double* _data;
30 | };
31 | #endif // MATRIX_H
32 | 


--------------------------------------------------------------------------------
/QVlearning.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef QVLEARNING
  2 | #define QVLEARNING
  3 | 
  4 | # include "QVlearning.h"
  5 | 
  6 | 
  7 | QVlearning::QVlearning( const char * parameterFile, World * w ) {
  8 | 
  9 |     discreteStates      = w->getDiscreteStates() ;
 10 |     stateDimension      = w->getStateDimension() ;
 11 | 
 12 |     if ( not w->getDiscreteActions() ) {
 13 | 
 14 |         cout << "QV-learning does not support continuous actions." << endl ;
 15 |         cout << "Please check which MDP you are using it on." << endl ;
 16 |         exit(0) ;
 17 | 
 18 |     } else {
 19 | 
 20 |         numberOfActions = w->getNumberOfActions() ;
 21 | 
 22 |     }
 23 | 
 24 |     srand48( clock() ) ;
 25 | 
 26 |     readParameterFile( parameterFile ) ;
 27 | 
 28 |     if ( discreteStates ) {
 29 | 
 30 |         numberOfStates = w->getNumberOfStates() ;
 31 | 
 32 |         Q = new double*[ numberOfStates ] ;
 33 |         V = new double[ numberOfStates ] ;
 34 | 
 35 |         for ( int s = 0 ; s < numberOfStates ; s++ ) {
 36 | 
 37 |             Q[s] = new double[ numberOfActions ] ;
 38 |             V[s] = 0.0 ;
 39 | 
 40 |             for ( int a = 0 ; a < numberOfActions ; a++ ) {
 41 |                 Q[s][a] = 0.0 ;
 42 |             }
 43 | 
 44 |         }
 45 | 
 46 |     } else {
 47 | 
 48 |         int layerSizesA[] = { stateDimension, nHiddenQ, 1 } ;
 49 |         int layerSizesV[] = { stateDimension, nHiddenV, 1 } ;
 50 | 
 51 |         for ( int a = 0 ; a < numberOfActions ; a++ ) {
 52 |             QNN.push_back( new cNeuralNetwork( 1, layerSizesA ) ) ;
 53 |         }
 54 |         VNN = new cNeuralNetwork( 1, layerSizesV ) ;
 55 | 
 56 | 
 57 |         VTarget = new double[ 1 ] ;
 58 |         QTarget = new double[ 1 ] ;
 59 | 
 60 |         Qs = new double[ numberOfActions ] ;
 61 | 
 62 |     }
 63 | 
 64 |     policy = new double[ numberOfActions ] ;
 65 | 
 66 | }
 67 | 
 68 | QVlearning::~QVlearning() {
 69 | 
 70 |     if ( discreteStates ) {
 71 | 
 72 |         for ( int s = 0 ; s < numberOfStates ; s++ ) {
 73 | 
 74 |             delete [] Q[s] ;
 75 | 
 76 |         }
 77 | 
 78 |         delete [] Q ;
 79 |         delete [] V ;
 80 | 
 81 |     } else {
 82 | 
 83 |         for ( int a = 0 ; a < numberOfActions ; a++ ) {
 84 |             delete QNN[a] ;
 85 |         }
 86 |         QNN.clear() ;
 87 |         delete VNN ;
 88 | 
 89 |         delete [] QTarget ;
 90 |         delete [] VTarget ;
 91 | 
 92 |         delete [] Qs ;
 93 | 
 94 |     }
 95 | 
 96 |     delete [] policy ;
 97 | 
 98 | }
 99 | 
100 | 
101 | void QVlearning::readParameterFile( const char * parameterFile ) {
102 | 
103 |     if ( not discreteStates ) {
104 | 
105 |         ifstream ifile ;
106 | 
107 |         ifile.open( parameterFile, ifstream::in ) ;
108 | 
109 |         read_moveTo( &ifile, "nn" ) ;
110 | 
111 |         read_moveTo( &ifile, "nHiddenQ" ) ;
112 |         ifile >> nHiddenQ ;
113 |         read_moveTo( &ifile, "nHiddenV" ) ;
114 |         ifile >> nHiddenV ;
115 | 
116 |         ifile.close() ;
117 | 
118 |     }
119 | 
120 | }
121 | 
122 | void QVlearning::update( State * state, Action * action, double rt, State * nextState, bool endOfEpisode, double * learningRate, double gamma  ) {
123 | 
124 |     int at = action->discreteAction ;
125 | 
126 |     if ( state->discrete ) {
127 | 
128 |         int st = state->discreteState ;
129 |         int st_ = nextState->discreteState ;
130 | 
131 |         if ( endOfEpisode ) {
132 | 
133 |             Q[ st ][ at ] += learningRate[0]*( rt - Q[ st ][ at ] ) ;
134 |             V[ st ]       += learningRate[1]*( rt - V[ st ]       ) ;
135 | 
136 |         } else {
137 | 
138 |             Q[ st ][ at ] += learningRate[0]*( rt + gamma*V[ st_ ] - Q[ st ][ at ] ) ;
139 |             V[ st ]       += learningRate[1]*( rt + gamma*V[ st_ ] - V[ st ]       ) ;
140 | 
141 |         }
142 | 
143 |     } else {
144 | 
145 |         double * st = state->continuousState ;
146 |         double * st_ = nextState->continuousState ;
147 | 
148 |         if ( endOfEpisode ) {
149 | 
150 |             QTarget[ 0 ] = rt ;
151 |             VTarget[ 0 ] = rt ;
152 | 
153 |         } else {
154 | 
155 |             double Vs_   = VNN->forwardPropagate( st_ )[0] ;
156 | 
157 |             QTarget[ 0 ] = rt + gamma*Vs_ ;
158 |             VTarget[ 0 ] = rt + gamma*Vs_ ;
159 | 
160 |         }
161 | 
162 |         QNN[ at ]->backPropagate( st, QTarget, learningRate[0] ) ;
163 |         VNN->backPropagate( st, VTarget, learningRate[1] ) ;
164 | 
165 |     }
166 | 
167 | }
168 | 
169 | unsigned int QVlearning::getNumberOfLearningRates() {
170 | 
171 |     return 2 ;
172 | 
173 | }
174 | 
175 | const char * QVlearning::getName() {
176 | 
177 |     return "QVlearning" ;
178 | 
179 | }
180 | 
181 | #endif //QVLEARNING
182 | 


--------------------------------------------------------------------------------
/QVlearning.h:
--------------------------------------------------------------------------------
 1 | #ifndef QVLEARNING_H
 2 | #define QVLEARNING_H
 3 | 
 4 | # include "StateActionAlgorithm.h"
 5 | 
 6 | class QVlearning : public StateActionAlgorithm {
 7 |     public:
 8 |         QVlearning( const char * parameterFile, World * w ) ;
 9 |         ~QVlearning() ;
10 |         void readParameterFile( const char * parameterFile ) ;
11 |         void update( State * st, Action * action, double rt, State * st_, bool endOfEpisode, double * learningRate, double gamma  ) ;
12 |         unsigned int getNumberOfLearningRates() ;
13 |         const char * getName() ;
14 | 
15 |     private:
16 |         int nHiddenV ;
17 |         double * V ;
18 |         cNeuralNetwork * VNN ;
19 |         double * VTarget ;
20 | };
21 | 
22 | #endif //QVLEARNING_H
23 | 


--------------------------------------------------------------------------------
/Qlearning.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef QLEARNING
  2 | #define QLEARNING
  3 | 
  4 | # include "Qlearning.h"
  5 | # include <iostream>
  6 | 
  7 | using namespace std ;
  8 | 
  9 | Qlearning::Qlearning( const char * parameterFile, World * w ) {
 10 | 
 11 |     discreteStates      = w->getDiscreteStates() ;
 12 | 
 13 |     if ( not w->getDiscreteActions() ) {
 14 | 
 15 |         cout << "Q-learning does not support continuous actions." << endl ;
 16 |         cout << "Please check which MDP you are using it on." << endl ;
 17 |         exit(0) ;
 18 | 
 19 |     } else {
 20 | 
 21 |         numberOfActions = w->getNumberOfActions() ;
 22 | 
 23 |     }
 24 | 
 25 |     srand48( clock() ) ;
 26 | 
 27 |     readParameterFile( parameterFile ) ;
 28 | 
 29 |     if ( discreteStates ) {
 30 | 
 31 |         numberOfStates = w->getNumberOfStates() ;
 32 | 
 33 |         Q = new double*[ numberOfStates ] ;
 34 | 
 35 |         for ( int s = 0 ; s < numberOfStates ; s++ ) {
 36 | 
 37 |             Q[s] = new double[ numberOfActions ] ;
 38 | 
 39 |             for ( int a = 0 ; a < numberOfActions ; a++ ) {
 40 |                 Q[s][a] = 0.0 ;
 41 |             }
 42 | 
 43 |         }
 44 | 
 45 |     } else {
 46 | 
 47 |         stateDimension = w->getStateDimension() ;
 48 | 
 49 |         int layerSizesA[] = { stateDimension, nHiddenQ, 1 } ;
 50 | 
 51 |         for ( int a = 0 ; a < numberOfActions ; a++ ) {
 52 | 
 53 |             QNN.push_back( new cNeuralNetwork( 1, layerSizesA ) ) ;
 54 | 
 55 |         }
 56 | 
 57 |         Qs = new double[ numberOfActions ] ;
 58 | 
 59 |     }
 60 | 
 61 |     QTarget = new double[ 1 ] ;
 62 |     policy = new double[ numberOfActions ] ;
 63 | 
 64 | }
 65 | 
 66 | Qlearning::~Qlearning() {
 67 | 
 68 |     if ( discreteStates ) {
 69 | 
 70 |         for ( int s = 0 ; s < numberOfStates ; s++ ) {
 71 | 
 72 |             delete [] Q[s] ;
 73 | 
 74 |         }
 75 | 
 76 |         delete [] Q ;
 77 | 
 78 |     } else {
 79 | 
 80 |         for ( int a = 0 ; a < numberOfActions ; a++ ) {
 81 | 
 82 |             delete QNN[a] ;
 83 | 
 84 |         }
 85 | 
 86 |         QNN.clear() ;
 87 | 
 88 |         delete [] Qs ;
 89 | 
 90 |     }
 91 | 
 92 |     delete [] QTarget ;
 93 |     delete [] policy ;
 94 | 
 95 | }
 96 | 
 97 | void Qlearning::readParameterFile( const char * parameterFile ) {
 98 | 
 99 |     if ( not discreteStates ) {
100 | 
101 |         ifstream ifile ;
102 | 
103 |         ifile.open( parameterFile, ifstream::in ) ;
104 | 
105 |         read_moveTo( &ifile, "nn" ) ;
106 | 
107 |         read_moveTo( &ifile, "nHiddenQ" ) ;
108 |         ifile >> nHiddenQ ;
109 | 
110 |         ifile.close() ;
111 | 
112 |     }
113 | 
114 | }
115 | 
116 | void Qlearning::update( State * state, Action * action, double rt, State * nextState, bool endOfEpisode, double * learningRate, double gamma  ) {
117 | 
118 |     int at = action->discreteAction ;
119 | 
120 |     if ( state->discrete ) {
121 | 
122 |         int st = state->discreteState ;
123 |         int st_ = nextState->discreteState ;
124 | 
125 |         if ( endOfEpisode ) {
126 | 
127 |             Q[ st ][ at ] += learningRate[0]*( rt - Q[ st ][ at ] ) ;
128 | 
129 |         } else {
130 | 
131 |             double maxQs = max( Q[ st_ ], numberOfActions ) ;
132 | 
133 |             Q[ st ][ at ] += learningRate[0]*( rt + gamma*maxQs - Q[ st ][ at ] ) ;
134 | 
135 |         }
136 | 
137 |     } else {
138 | 
139 |         double * st = state->continuousState ;
140 |         double * st_ = nextState->continuousState ;
141 | 
142 |         if ( endOfEpisode ) {
143 | 
144 |             QTarget[ 0 ] = rt ;
145 | 
146 |         } else {
147 | 
148 |             for ( int a = 0 ; a < numberOfActions ; a++ ) {
149 |                 Qs[a] = QNN[a]->forwardPropagate( st_ )[0] ;
150 |             }
151 | 
152 |             double maxQs = max( Qs, numberOfActions ) ;
153 | 
154 |             QTarget[ 0 ] = rt + gamma*maxQs ;
155 | 
156 |         }
157 | 
158 |         QNN[ at ]->backPropagate( st, QTarget, learningRate[0] ) ;
159 | 
160 |     }
161 | 
162 | }
163 | 
164 | unsigned int Qlearning::getNumberOfLearningRates() {
165 | 
166 |     return 1 ;
167 | 
168 | }
169 | 
170 | const char * Qlearning::getName() {
171 | 
172 |     return "Qlearning" ;
173 | 
174 | }
175 | 
176 | #endif //QLEARNING
177 | 


--------------------------------------------------------------------------------
/Qlearning.h:
--------------------------------------------------------------------------------
 1 | #ifndef QLEARNING_H
 2 | #define QLEARNING_H
 3 | 
 4 | # include "StateActionAlgorithm.h"
 5 | 
 6 | class Qlearning : public StateActionAlgorithm {
 7 |     public:
 8 |         Qlearning( const char * parameterFile, World * w ) ;
 9 |         ~Qlearning() ;
10 |         void readParameterFile( const char * parameterFile ) ;
11 |         void update( State * st, Action * action, double rt, State * st_, bool endOfEpisode, double * learningRate, double gamma  ) ;
12 |         unsigned int getNumberOfLearningRates() ;
13 |         const char * getName() ;
14 | 
15 | };
16 | 
17 | #endif //QLEARNING_H
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Import of Hado van Hasselt's Reinforcement Learning Code, c++ version
 2 | rlcpp.tar.gz <sup><a id="fnr.1" class="footref" href="#fn.1">1</a></sup>. 
 3 | 
 4 | To build it do the following, assuming you have ROS Jade installed in
 5 | /opt/ros/jade:
 6 | 
 7 |     mkdir build
 8 |     cmake ..
 9 |     make
10 | 
11 | Should create CartPoleRos and ArmRos executables in the build directory.
12 | 
13 | -   Added cmake file to build CartPoleRos and ArmRos
14 | -   ROS interface to the CACLA implementation so the RL algorithms can
15 |     be used on a variety of ROS enabled systems.
16 | 
17 | 
18 | # Footnotes
19 | 
20 | <sup><a id="fn.1" href="#fnr.1">1</a></sup> <http://web.archive.org/web/20131213231018/http://homepages.cwi.nl/~hasselt/code.html>
21 | 


--------------------------------------------------------------------------------
/README.org:
--------------------------------------------------------------------------------
 1 | #    -*- mode: org -*-
 2 | 
 3 | Import of Hado van Hasselt's Reinforcement Learning Code, c++ version
 4 | rlcpp.tar.gz [fn:1]. 
 5 | 
 6 | To build it do the following, assuming you have ROS Jade installed in
 7 | /opt/ros/jade:
 8 | : mkdir build
 9 | : cmake ..
10 | : make
11 | 
12 | Should create CartPoleRos and ArmRos executables in the build directory.
13 | 
14 |  - Added cmake file to build CartPoleRos and ArmRos
15 |  - ROS interface to the CACLA implementation so the RL algorithms can
16 |    be used on a variety of ROS enabled systems.
17 | 
18 | * Footnotes
19 | 
20 | [fn:1] http://web.archive.org/web/20131213231018/http://homepages.cwi.nl/~hasselt/code.html
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/Sarsa.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef SARSA
  2 | #define SARSA
  3 | 
  4 | # include "Sarsa.h"
  5 | 
  6 | 
  7 | Sarsa::Sarsa( const char * parameterFile, World * w ) {
  8 | 
  9 |     discreteStates      = w->getDiscreteStates() ;
 10 | 
 11 |     if ( not w->getDiscreteActions() ) {
 12 | 
 13 |         cout << "Sarsa does not support continuous actions." << endl ;
 14 |         cout << "Please check which MDP you are using it on." << endl ;
 15 |         exit(0) ;
 16 | 
 17 |     } else {
 18 | 
 19 |         numberOfActions = w->getNumberOfActions() ;
 20 | 
 21 |     }
 22 | 
 23 |     srand48( clock() ) ;
 24 | 
 25 |     readParameterFile( parameterFile ) ;
 26 | 
 27 |     if ( discreteStates ) {
 28 | 
 29 |         numberOfStates = w->getNumberOfStates() ;
 30 | 
 31 |         Q = new double*[ numberOfStates ] ;
 32 | 
 33 |         for ( int s = 0 ; s < numberOfStates ; s++ ) {
 34 | 
 35 |             Q[s] = new double[ numberOfActions ] ;
 36 | 
 37 |             for ( int a = 0 ; a < numberOfActions ; a++ ) {
 38 |                 Q[s][a] = 0.0 ;
 39 |             }
 40 | 
 41 |         }
 42 | 
 43 |     } else {
 44 | 
 45 |         stateDimension      = w->getStateDimension() ;
 46 | 
 47 |         int layerSizesA[] = { stateDimension, nHiddenQ, 1 } ;
 48 | 
 49 |         for ( int a = 0 ; a < numberOfActions ; a++ ) {
 50 | 
 51 |             QNN.push_back( new cNeuralNetwork( 1, layerSizesA ) ) ;
 52 | 
 53 |         }
 54 | 
 55 |         Qs = new double[ numberOfActions ] ;
 56 | 
 57 |     }
 58 | 
 59 |     QTarget = new double[ 1 ] ;
 60 |     policy = new double[ numberOfActions ] ;
 61 | 
 62 | }
 63 | 
 64 | Sarsa::~Sarsa() {
 65 | 
 66 |     if ( discreteStates ) {
 67 | 
 68 |         for ( int s = 0 ; s < numberOfStates ; s++ ) {
 69 | 
 70 |             delete [] Q[s] ;
 71 | 
 72 |         }
 73 | 
 74 |         delete [] Q ;
 75 | 
 76 |     } else {
 77 | 
 78 |         for ( int a = 0 ; a < numberOfActions ; a++ ) {
 79 | 
 80 |             delete QNN[a] ;
 81 | 
 82 |         }
 83 | 
 84 |         QNN.clear() ;
 85 | 
 86 |         delete [] Qs ;
 87 | 
 88 |     }
 89 | 
 90 |     delete [] QTarget ;
 91 |     delete [] policy ;
 92 | 
 93 | }
 94 | 
 95 | void Sarsa::readParameterFile( const char * parameterFile ) {
 96 | 
 97 |     if ( not discreteStates ) {
 98 | 
 99 |         ifstream ifile ;
100 | 
101 |         ifile.open( parameterFile, ifstream::in ) ;
102 | 
103 |         read_moveTo( &ifile, "nn" ) ;
104 | 
105 |         read_moveTo( &ifile, "nHiddenQ" ) ;
106 |         ifile >> nHiddenQ ;
107 | 
108 |         ifile.close() ;
109 | 
110 |     }
111 | 
112 | }
113 | 
114 | void Sarsa::update( State * state, Action * actions, double rt, State * nextState, bool endOfEpisode, double * learningRate, double gamma  ) {
115 | 
116 |     int at  = actions[0].discreteAction ;
117 |     int at_ = actions[1].discreteAction ;
118 | 
119 |     if ( state->discrete ) {
120 | 
121 |         int st  = state->discreteState ;
122 |         int st_ = nextState->discreteState ;
123 | 
124 |         if ( endOfEpisode ) {
125 | 
126 |             Q[ st ][ at ] += learningRate[0]*( rt - Q[ st ][ at ] ) ;
127 | 
128 |         } else {
129 | 
130 |             Q[ st ][ at ] += learningRate[0]*( rt + gamma*Q[ st_ ][ at_ ] - Q[ st ][ at ] ) ;
131 | 
132 |         }
133 | 
134 |     } else {
135 | 
136 |         double * st = state->continuousState ;
137 |         double * st_ = nextState->continuousState ;
138 | 
139 |         if ( endOfEpisode ) {
140 | 
141 |             QTarget[ 0 ] = rt ;
142 | 
143 |         } else {
144 | 
145 |             double Qsa_ = QNN[ at_ ]->forwardPropagate( st_ )[0] ;
146 | 
147 |             QTarget[ 0 ] = rt + gamma*Qsa_ ;
148 | 
149 |         }
150 | 
151 |         QNN[ at ]->backPropagate( st, QTarget, learningRate[0] ) ;
152 | 
153 |     }
154 | 
155 | }
156 | 
157 | unsigned int Sarsa::getNumberOfLearningRates() {
158 | 
159 |     return 1 ;
160 | 
161 | }
162 | 
163 | const char * Sarsa::getName() {
164 | 
165 |     return "Sarsa" ;
166 | 
167 | }
168 | 
169 | #endif //SARSA
170 | 


--------------------------------------------------------------------------------
/Sarsa.h:
--------------------------------------------------------------------------------
 1 | #ifndef SARSA_H
 2 | #define SARSA_H
 3 | 
 4 | # include "StateActionAlgorithm.h"
 5 | 
 6 | class Sarsa : public StateActionAlgorithm {
 7 |     public:
 8 |         Sarsa( const char * parameterFile, World * w ) ;
 9 |         ~Sarsa() ;
10 |         void readParameterFile( const char * parameterFile ) ;
11 |         void update( State * st, Action * actions, double rt, State * st_, bool endOfEpisode, double * learningRate, double gamma  ) ;
12 |         unsigned int getNumberOfLearningRates() ;
13 |         const char * getName() ;
14 | 
15 | };
16 | 
17 | #endif //SARSA_H
18 | 


--------------------------------------------------------------------------------
/SmallInterface.cpp:
--------------------------------------------------------------------------------
 1 | # ifndef SMALLINTERFACE
 2 | # define SMALLINTERFACE
 3 | 
 4 | # include "SmallInterface.h"
 5 | # include "SmallMaze.h"
 6 | 
 7 | SmallInterface::SmallInterface( const char * pFile ) {
 8 | 
 9 |     parameterFile = pFile ;
10 | 
11 |     parameterPath.assign( parameterFile ) ;
12 | 
13 |     readParameterFile() ;
14 | 
15 | }
16 | 
17 | int main( int argn, char *argv[] ) {
18 | 
19 |      if ( argn != 2 ) {
20 | 
21 |         cout << "You need to specify exactly one configuration (parameter) file." << endl ;
22 |         return 0 ;
23 | 
24 |     }
25 | 
26 |     SmallInterface * interface = new SmallInterface( argv[1] ) ;
27 | 
28 |     SmallMaze * maze = new SmallMaze() ;
29 | 
30 |     interface->getParametersAndRunExperiments( maze ) ;
31 | 
32 |     delete interface ;
33 | 
34 |     delete maze ;
35 | 
36 | }
37 | 
38 | # endif //SMALLINTERFACE
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/SmallInterface.h:
--------------------------------------------------------------------------------
 1 | # ifndef SMALLINTERFACE_H
 2 | # define SMALLINTERFACE_H
 3 | 
 4 | # include "Experiment.h"
 5 | # include "SmallMaze.h"
 6 | 
 7 | class SmallInterface : public Experiment {
 8 | 
 9 |     public:
10 |         SmallInterface( const char * ) ;
11 |         ~SmallInterface() {};
12 | 
13 | };
14 | 
15 | # endif //SMALLINTERFACE_H
16 | 


--------------------------------------------------------------------------------
/SmallMaze.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef SMALLMAZE
  2 | #define SMALLMAZE
  3 | # include "SmallMaze.h"
  4 | 
  5 | using namespace std;
  6 | 
  7 | // The 'maze' looks as follows:
  8 | //
  9 | //  ___ ___ ___
 10 | // |   |   |   |
 11 | // | 2 | 5 | 8 |
 12 | // |___|___|___|
 13 | // |   |   |   |
 14 | // | 1 | 4 | 7 |
 15 | // |___|___|___|
 16 | // |   |   |   |
 17 | // | 0 | 3 | 6 |
 18 | // |___|___|___|
 19 | //
 20 | // where '0' is the starting state
 21 | // and '8' is the goal state. Any
 22 | // action leading to state 8 ends
 23 | // an episode and yields reward 0.
 24 | // All other actions yield reward
 25 | // -1.0.
 26 | 
 27 | SmallMaze::SmallMaze() {
 28 |     state = 0 ;
 29 | 
 30 |     discreteStates      = true ;
 31 |     numberOfStates      = 9 ;
 32 | 
 33 |     discreteActions     = true ;
 34 |     numberOfActions     = 4 ;
 35 | 
 36 |     //For continuous actions:
 37 |     continuousActions   = true ;
 38 |     actionDimension     = 2 ;
 39 | }
 40 | 
 41 | SmallMaze::~SmallMaze() {
 42 | 
 43 | }
 44 | 
 45 | double SmallMaze::act( Action * action ) {
 46 | 
 47 |     int direction ;
 48 | 
 49 |     if ( action->continuous ) {
 50 | 
 51 |         // The continuous action is interpreted as a vector and is mapped to the closest of the available vectors:
 52 |         double d1 = action->continuousAction[0] ; //up-down dimension
 53 |         double d2 = action->continuousAction[1] ; //left-right dimension
 54 | 
 55 |         if ( d1*d1 > d2*d2 ) { //if up-down is the largest dimension
 56 | 
 57 |             if ( d1 > 0 ) { //determine up or down
 58 |                 direction = 0 ; //up
 59 |             } else {
 60 |                 direction = 2 ; //down
 61 |             }
 62 | 
 63 |         } else {
 64 | 
 65 |             if ( d2 > 0 ) {
 66 |                 direction = 3 ; //right
 67 |             } else {
 68 |                 direction = 1 ; //left
 69 |             }
 70 | 
 71 |         }
 72 | 
 73 |         //cout << d1 << " " << d2 << ": " << direction << endl ;
 74 | 
 75 |     } else {
 76 | 
 77 |         direction = action->discreteAction ;
 78 | 
 79 |     }
 80 | 
 81 |     double reward = -1.0 ;
 82 | 
 83 |     eoe = false ; //We assume the episode does not end.
 84 | 
 85 |     if ( direction == 0 ) {             // up
 86 | 
 87 |         if ( state % 3 < 2 ) {
 88 |             state++ ;
 89 |         }
 90 | 
 91 |     } else if ( direction == 1 ) {      // left
 92 | 
 93 |         if ( state > 2 ) {
 94 |             state -= 3 ;
 95 |         }
 96 | 
 97 |     } else if ( direction == 2 ) {      // down
 98 | 
 99 |         if ( state % 3 > 0 ) {
100 |             state-- ;
101 |         }
102 | 
103 |     } else {                            // right
104 | 
105 |         if ( state < 6 ) {
106 |             state += 3 ;
107 |         }
108 | 
109 |     }
110 | 
111 |     if ( state == 8 ) {
112 |         // If state == 8, end episode and return to 0.
113 |         reward = 0.0 ;
114 |         eoe = true ;
115 | 
116 |         state = 0 ;
117 |     }
118 | 
119 |     return reward ;
120 | }
121 | 
122 | void SmallMaze::getState( State * s ) {
123 | 
124 |     s->discreteState = state ;
125 | 
126 | }
127 | 
128 | void SmallMaze::setState( State * s ) {
129 | 
130 |     state = s->discreteState ;
131 | 
132 | }
133 | 
134 | bool SmallMaze::endOfEpisode() {
135 | 
136 |     return eoe ;
137 | 
138 | }
139 | 
140 | const char * SmallMaze::getName() {
141 | 
142 |     return "SmallMaze" ;
143 | 
144 | }
145 | 
146 | #endif //SMALLMAZE
147 | 


--------------------------------------------------------------------------------
/SmallMaze.h:
--------------------------------------------------------------------------------
 1 | #ifndef SMALLMAZE_H
 2 | #define SMALLMAZE_H
 3 | # include <cstdlib>
 4 | # include <iostream>
 5 | # include <math.h>
 6 | # include "State.h"
 7 | # include "Action.h"
 8 | # include "World.h"
 9 | 
10 | using namespace std;
11 | 
12 | class SmallMaze : public World {
13 | 
14 |     public:
15 |         SmallMaze() ;
16 |         ~SmallMaze() ;
17 |         double act( Action * action ) ;
18 |         void getState( State * state ) ;
19 |         void setState( State * state ) ;
20 |         bool endOfEpisode() ;
21 |         const char * getName() ;
22 | 
23 |         bool getDiscreteStates() { return discreteStates ; }
24 |         int getNumberOfStates() { return numberOfStates ; }
25 | 
26 |         bool getDiscreteActions() { return discreteActions ; }
27 |         int getNumberOfActions() { return numberOfActions ; }
28 | 
29 |         bool getContinuousActions() { return continuousActions ; }
30 |         int getActionDimension() { return actionDimension ; }
31 | 
32 | 
33 |     private:
34 |         bool eoe ;
35 |         int state ;
36 | 
37 | };
38 | 
39 | #endif //SMALLMAZE_H
40 | 


--------------------------------------------------------------------------------
/SmallMazeCfg:
--------------------------------------------------------------------------------
 1 | nExperiments        100
 2 | 
 3 | 
 4 | steps
 5 | nTrainSteps                 1000
 6 | trainStorePer               100
 7 | nTestSteps                  100
 8 | testStorePer                100
 9 | nTrainEpisodes              0
10 | nTestEpisodes               0
11 | nMaxStepsPerTrainEpisode    0
12 | nMaxStepsPerTestEpisode     0
13 | 
14 | algorithm
15 | algorithms          Q
16 | tau                 0.001 0.01 0.1 1.0 10.0
17 | epsilon             0.001 0.01 0.1 1.0
18 | sigma               0.001 0.01 0.1 1.0
19 | 
20 | learningRates
21 | decreaseType        none
22 | nLearningRates      3
23 | learningRate        0.0001 0.001 0.01 0.1
24 | learningRate        0.0    0.001 0.01 0.1
25 | learningRate        0.0    0.001 0.01 0.1
26 | 
27 | discount
28 | gamma               0.99
29 | 
30 | nn
31 | nHiddenQ            20
32 | nHiddenV            20
33 | 


--------------------------------------------------------------------------------
/State.h:
--------------------------------------------------------------------------------
 1 | #ifndef STATE
 2 | #define STATE
 3 | 
 4 | struct State {
 5 |     bool continuous ;
 6 |     bool discrete ;
 7 | 
 8 |     int stateDimension ;
 9 |     int numberOfStates ;
10 | 
11 |     int discreteState ;
12 |     double * continuousState ;
13 | };
14 | 
15 | #endif //STATE
16 | 


--------------------------------------------------------------------------------
/StateActionAlgorithm.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef QALGORITHM
  2 | #define QALGORITHM
  3 | 
  4 | # include "StateActionAlgorithm.h"
  5 | 
  6 | StateActionAlgorithm::StateActionAlgorithm() {
  7 |     continuousStates = true ;
  8 |     discreteStates = true ;
  9 |     continuousActions = false ;
 10 |     discreteActions = true ;
 11 | }
 12 | 
 13 | void StateActionAlgorithm::getMaxActionFirst( State * state, Action * action ) {
 14 | 
 15 |     if ( state->discrete ) {
 16 | 
 17 |         Qs = Q[ state->discreteState ] ;
 18 | 
 19 |     } else {
 20 | 
 21 |         for ( int a = 0 ; a < numberOfActions ; a++ ) {
 22 | 
 23 |             Qs[a] = QNN[a]->forwardPropagate( state->continuousState )[0] ;
 24 | 
 25 |         }
 26 | 
 27 |     }
 28 | 
 29 |     action->discreteAction = argmax( Qs, numberOfActions ) ;
 30 | 
 31 | }
 32 | 
 33 | void StateActionAlgorithm::getMaxActionRandom( State * state, Action * action ) {
 34 | 
 35 |     if ( state->discrete ) {
 36 | 
 37 |         Qs = Q[ state->discreteState ] ;
 38 | 
 39 |     } else {
 40 | 
 41 |         for ( int a = 0 ; a < numberOfActions ; a++ ) {
 42 | 
 43 |             Qs[a] = QNN[a]->forwardPropagate( state->continuousState )[0] ;
 44 | 
 45 |         }
 46 | 
 47 |     }
 48 | 
 49 |     vector<int> maxA = argmaxAll( Qs, numberOfActions ) ;
 50 | 
 51 |     int n = maxA.size() ;
 52 | 
 53 |     action->discreteAction = maxA[ (int) ( n*drand48() ) ] ;
 54 | 
 55 | }
 56 | 
 57 | void StateActionAlgorithm::getMaxAction( State * state, Action * action ) {
 58 | 
 59 |     getMaxActionRandom( state, action ) ;
 60 | 
 61 | }
 62 | 
 63 | void StateActionAlgorithm::getRandomAction( State * state, Action * action ) {
 64 | 
 65 |     action->discreteAction = (int) ( numberOfActions*drand48() ) ;
 66 | 
 67 | }
 68 | 
 69 | 
 70 | void StateActionAlgorithm::explore( State * state, Action * action, double explorationRate, string explorationType, bool endOfEpisode ) {
 71 | 
 72 |     if ( explorationType.compare("boltzmann") == 0 ) {
 73 | 
 74 |         boltzmann( state, action, explorationRate ) ;
 75 | 
 76 |     } else if ( explorationType.compare("egreedy") == 0  ) {
 77 | 
 78 |         egreedy( state, action, explorationRate ) ;
 79 | 
 80 |     } else if ( explorationType.compare("gaussian") == 0  ) {
 81 | 
 82 |         cout << "You are trying to use gaussian exploration for an algorithm that" << endl ;
 83 |         cout << "does not support it. Please check your parameter file." << endl ;
 84 |         exit(0) ;
 85 | 
 86 |     } else {
 87 | 
 88 |     cout << "Warning, no exploreType: " << explorationType << endl ;
 89 |     exit(0) ;
 90 | 
 91 |     }
 92 | 
 93 |     action->continuous  = false ;
 94 |     action->discrete    = true ;
 95 | 
 96 | }
 97 | void StateActionAlgorithm::egreedy( State * state, Action * action, double epsilon ) {
 98 | 
 99 |     if ( drand48() < epsilon ) {
100 | 
101 |         getMaxAction( state, action ) ;
102 | 
103 |     } else {
104 | 
105 |         getRandomAction( state, action ) ;
106 | 
107 |     }
108 | 
109 | }
110 | 
111 | void StateActionAlgorithm::boltzmann( State * state, Action * action, double tau ) {
112 | 
113 |     setQs( state, action ) ;
114 | 
115 |     boltzmann( action, tau ) ;
116 | 
117 | }
118 | 
119 | void StateActionAlgorithm::setQs( State * state, Action * action ) {
120 | 
121 |     if ( state->discrete ) {
122 | 
123 |         Qs = Q[state->discreteState] ;
124 | 
125 |     } else {
126 | 
127 |         for ( int a = 0 ; a < numberOfActions ; a++ ) {
128 |             Qs[a] = QNN[a]->forwardPropagate( state->continuousState )[0] ;
129 |         }
130 | 
131 |     }
132 | 
133 | }
134 | 
135 | 
136 | void StateActionAlgorithm::boltzmann( Action * action, double tau ) {
137 | 
138 |     double sumQs = 0 ;
139 |     double maxQs = max( Qs, numberOfActions ) ;
140 | 
141 |     for ( int a = 0 ; a < numberOfActions ; a++ ) {
142 |         policy[a]   = exp( (Qs[a]-maxQs)/tau ) ;
143 |         sumQs       += policy[a] ;
144 |     }
145 | 
146 |     for ( int a = 0 ; a < numberOfActions ; a++ ) {
147 |         policy[a] /= sumQs ;
148 |     }
149 | 
150 |     double rnd      = drand48() ;
151 | 
152 |     double total    = policy[0] ;
153 |     int a           = 0 ;
154 | 
155 |     while ( total < rnd ) {
156 |         a++ ;
157 |         total += policy[ a ] ;
158 |         if ( a >= numberOfActions ) {
159 |             // Something went wrong...
160 |             cout << total << " " << rnd << endl ;
161 |             for ( int _a = 0 ; _a < numberOfActions ; _a++ ) {
162 |                 cout << policy[_a] << " " ;
163 |             }
164 |             cout << endl ;
165 |             for ( int _a = 0 ; _a < numberOfActions ; _a++ ) {
166 |                 cout << Qs[_a] << " " ;
167 |             }
168 |             cout << endl ;
169 |             exit(0) ;
170 |         }
171 |     }
172 | 
173 |     action->discreteAction = a ;
174 | 
175 | }
176 | 
177 | bool StateActionAlgorithm::getContinuousStates() {
178 |     return continuousStates ;
179 | }
180 | 
181 | bool StateActionAlgorithm::getDiscreteStates() {
182 |     return discreteStates ;
183 | }
184 | 
185 | bool StateActionAlgorithm::getContinuousActions() {
186 |     return continuousActions ;
187 | }
188 | 
189 | bool StateActionAlgorithm::getDiscreteActions() {
190 |     return discreteActions ;
191 | }
192 | 
193 | #endif //QALGORITHM
194 | 


--------------------------------------------------------------------------------
/StateActionAlgorithm.h:
--------------------------------------------------------------------------------
 1 | #ifndef QALGORITHM_H
 2 | #define QALGORITHM_H
 3 | 
 4 | # include <string>
 5 | # include <math.h>
 6 | # include <vector>
 7 | # include "cNeuralNetwork.h"
 8 | # include "Algorithm.h"
 9 | # include "StateActionUtils.h"
10 | 
11 | class StateActionAlgorithm : public Algorithm {
12 |     public:
13 |         StateActionAlgorithm() ;
14 |         virtual ~StateActionAlgorithm() {}
15 |         void getMaxAction( State * state, Action * action ) ;
16 |         void explore( State * state, Action * action, double explorationRate, string explorationType, bool endOfEpisode ) ;
17 |         bool getContinuousStates() ;
18 |         bool getDiscreteStates() ;
19 |         bool getContinuousActions() ;
20 |         bool getDiscreteActions() ;
21 | 
22 |     protected:
23 |         void getMaxActionFirst( State *, Action * action ) ;
24 |         void getMaxActionRandom( State *, Action * action ) ;
25 |         void getRandomAction( State * state, Action * action ) ;
26 | 
27 |         void setQs( State * state, Action * action ) ;
28 | 
29 |         void egreedy( State * state, Action * action, double epsilon ) ;
30 |         void boltzmann( State  * state, Action * action, double tau ) ;
31 |         void boltzmann( Action * action, double tau ) ;
32 |         void gaussian( State * state, Action * action, double tau ) ;
33 | 
34 |         int nHiddenQ ;
35 |         double ** Q ;
36 |         vector< cNeuralNetwork * > QNN ;
37 |         double * Qs ;
38 |         double * QTarget ;
39 |         double * policy ;
40 | };
41 | 
42 | #endif //QALGORITHM_H
43 | 


--------------------------------------------------------------------------------
/StateActionUtils.cpp:
--------------------------------------------------------------------------------
 1 | #ifndef STATEACTIONUTILS
 2 | #define STATEACTIONUTILS
 3 | 
 4 | # include "StateActionUtils.h"
 5 | 
 6 | 
 7 | void copyAction( Action * aFROM, Action * aTO ) {
 8 | 
 9 |     if ( aFROM->discrete and aTO->discrete ) {
10 | 
11 |         aTO->discreteAction = aFROM->discreteAction ;
12 | 
13 |     }
14 | 
15 |     if ( aFROM->continuous and aTO->continuous and ( aFROM->actionDimension == aTO->actionDimension ) ) {
16 | 
17 |         for ( int a = 0 ; a < aFROM->actionDimension ; a++ ) {
18 | 
19 |             aTO->continuousAction[a] = aFROM->continuousAction[a] ;
20 | 
21 |         }
22 | 
23 |     }
24 | 
25 | }
26 | 
27 | void copyState( State * sFROM, State * sTO ) {
28 | 
29 |     if ( sFROM->discrete and sTO->discrete ) {
30 | 
31 |         sTO->discreteState = sFROM->discreteState ;
32 | 
33 |     }
34 | 
35 |     if ( sFROM->continuous and sTO->continuous and ( sFROM->stateDimension == sTO->stateDimension ) ) {
36 | 
37 |         for ( int s = 0 ; s < sFROM->stateDimension ; s++ ) {
38 | 
39 |             sTO->continuousState[s] = sFROM->continuousState[s] ;
40 | 
41 |         }
42 | 
43 |     }
44 | 
45 | }
46 | 
47 | #endif //STATEACTIONUTILS
48 | 


--------------------------------------------------------------------------------
/StateActionUtils.h:
--------------------------------------------------------------------------------
 1 | #ifndef STATEACTIONUTILS_H
 2 | #define STATEACTIONUTILS_H
 3 | 
 4 | # include "State.h"
 5 | # include "Action.h"
 6 | # include <iostream>
 7 | 
 8 | void copyState( State * sFROM, State * sTO ) ;
 9 | void copyAction( Action * aFROM, Action * aTO ) ;
10 | 
11 | #endif //STATEACTIONUTILS_H
12 | 


--------------------------------------------------------------------------------
/World.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORLD_H
 2 | #define WORLD_H
 3 | 
 4 | # include "State.h"
 5 | # include "Action.h"
 6 | 
 7 | class World {
 8 |     public:
 9 |         World() {
10 |             continuousStates = false ;
11 |             discreteStates = false ;
12 |             continuousActions = false ;
13 |             discreteActions = false ;
14 |         }
15 |         virtual ~World() { }
16 |         virtual bool getContinuousStates() { return continuousStates ; }
17 |         virtual bool getDiscreteStates() { return discreteStates ; }
18 |         virtual int  getStateDimension() { return stateDimension ; }
19 |         virtual int  getNumberOfStates() { return numberOfStates ; }
20 |         virtual bool getContinuousActions() { return continuousActions ; }
21 |         virtual bool getDiscreteActions() { return discreteActions ; }
22 |         virtual int  getActionDimension() { return actionDimension ; }
23 |         virtual int  getNumberOfActions() { return numberOfActions ; }
24 |         virtual void reset() {}
25 |         virtual bool endOfEpisode() { return false ; }
26 | 
27 |         virtual double act( Action * ) =0;
28 | 
29 |         virtual const char * getName() =0;
30 |         virtual void getState( State * ) =0 ;
31 |         virtual void setState( State * ) =0 ;
32 | 
33 |     protected:
34 |         bool discreteStates, discreteActions, continuousStates, continuousActions ;
35 |         int stateDimension, actionDimension, numberOfStates, numberOfActions ;
36 | 
37 | };
38 | 
39 | #endif //WORLD_H
40 | 


--------------------------------------------------------------------------------
/cFunction.h:
--------------------------------------------------------------------------------
 1 | #ifndef CFUNCTION_H
 2 | #define CFUNCTION_H
 3 | 
 4 | #include <iostream>
 5 | using namespace std;
 6 | 
 7 | 
 8 | class cFunction {
 9 |     public:
10 |         cFunction() {}
11 |         virtual ~cFunction() {}
12 |         virtual int printName() =0;
13 |         virtual double output( double input ) =0;
14 |         virtual void output( double * input, double * output, int n ) =0;
15 |         virtual double derivative( double input, double output ) =0;
16 | };
17 | 
18 | #endif //CFUNCTION_H
19 | 


--------------------------------------------------------------------------------
/cLinear.cpp:
--------------------------------------------------------------------------------
 1 | #ifndef CLINEAR
 2 | #define CLINEAR
 3 | 
 4 | #include "cLinear.h"
 5 | using namespace std;
 6 | 
 7 | cLinear::cLinear() : cFunction() {
 8 |     //~ cout << "constructor Linear\n" ;
 9 | }
10 | 
11 | cLinear::~cLinear() {
12 |     //~ cout << "destructor Linear\n" ;
13 | }
14 | 
15 | int cLinear::printName() {
16 |     cout << "cLinear\n" ;
17 |     return 0 ;
18 | }
19 | 
20 | double cLinear::output( double input ) {
21 |     return input ;
22 |     
23 | }
24 | 
25 | void cLinear::output( double * inp, double * outp, int n) {
26 |     for ( int i = 0 ; i < n ; i++ ) {
27 |         outp[i] = inp[i] ;
28 |     }
29 | }
30 | 
31 | double cLinear::derivative( double input, double output ) {
32 |     return 1.0 ;
33 | }
34 | 
35 | void derivative( double * input, double * output, double * derivative, int n ) {
36 |     for ( int i = 0 ; i < n ; i++ ) {
37 |         derivative[i] = 1.0 ;
38 |     }
39 | }
40 | 
41 | 
42 | #endif //CLINEAR
43 | 


--------------------------------------------------------------------------------
/cLinear.h:
--------------------------------------------------------------------------------
 1 | #ifndef CLINEAR_H
 2 | #define CLINEAR_H
 3 | 
 4 | #include "cFunction.h"
 5 | using namespace std;
 6 | 
 7 | class cLinear : public cFunction {
 8 |     public:
 9 |         cLinear() ;
10 |         ~cLinear() ;
11 |         int printName() ;
12 |         double output( double input );
13 |         void output( double * input, double * output, int n );
14 |         double derivative( double input, double output );
15 |         void derivative( double * input, double * output, double * derivative, int n );
16 | };
17 | 
18 | #endif //CLINEAR_H
19 | 


--------------------------------------------------------------------------------
/cNeuralNetwork.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef CNEURALNETWORK
  2 | #define CNEURALNETWORK
  3 | #define THRESHOLD 2
  4 | #define TANH 1
  5 | #define LINEAR 0
  6 | # include <cstdlib>
  7 | # include <iostream>
  8 | # include <math.h>
  9 | # include "cNeuralNetwork.h"
 10 | # include "cFunction.h"
 11 | # include "cLinear.cpp"
 12 | # include "cThreshold.cpp"
 13 | # include "cTanH.cpp"
 14 | # include "Matrix.cpp"
 15 | 
 16 | using namespace std ;
 17 | 
 18 | cNeuralNetwork::cNeuralNetwork( const char * nnFile ) {
 19 |     SIZEOFDOUBLE = sizeof( double ) ;
 20 |     readNetwork( nnFile ) ;
 21 | }
 22 | 
 23 | cNeuralNetwork::cNeuralNetwork( int nLayersInit, int * layerSizeInit ) {
 24 |     int * layerFunctions = new int[ nLayersInit + 2 ] ;
 25 |     for ( int l = 1 ; l < nLayersInit + 1 ; l++ ) {
 26 |         layerFunctions[ l ] = TANH ;
 27 |     }
 28 |     layerFunctions[ 0 ] = LINEAR ;
 29 |     layerFunctions[ nLayersInit + 1 ] = LINEAR ;
 30 |     
 31 |     init( nLayersInit, layerSizeInit, layerFunctions ) ;
 32 |     
 33 |     delete [] layerFunctions ;
 34 |     
 35 | }
 36 |     
 37 | cNeuralNetwork::cNeuralNetwork( int nLayersInit, int * layerSizeInit, int * layerFunctionInit ) {
 38 |     // int nLayersInit : the number of hidden layers (So for input, hidden, output -> 1)
 39 |     // int * layerSizeInit : the number of nodes per layer (# nodes layers = # weights layers + 1)
 40 |     //                       (size should be nLayersInit + 2)
 41 |     // int * layerFunctionInit : code, specifying the function per (node) layer
 42 | 
 43 |     //~ cout << "constructor NN L\n" ;
 44 |     init( nLayersInit, layerSizeInit, layerFunctionInit ) ;
 45 | }
 46 | 
 47 | void cNeuralNetwork::init( int nLayersInit, int * layerSizeInit, int * layerFunctionInit ) {
 48 |     srand48(time(0));
 49 |     
 50 |     SIZEOFDOUBLE = sizeof( double ) ;
 51 |     
 52 |     nLayers = nLayersInit + 2 ; // # node layers == # hidden layers + 2
 53 |     nInput  = layerSizeInit[ 0 ] ;
 54 |     nOutput = layerSizeInit[ nLayers - 1 ] ;
 55 |     
 56 |     //~ cout << "nInput" << nInput << endl ;
 57 |     //~ cout << "nOutput" << nOutput << endl ;
 58 |     //~ cout << "nLayers" << nLayers << endl ;
 59 |     
 60 |     layerFunction       = new cFunction*[ nLayers ] ;
 61 |     layerFunctionInts   = new int[ nLayers ] ;
 62 |     weights             = new Matrix*[ nLayers - 1 ] ; // # weights layers = # node layers - 1
 63 |     layerIn             = new Matrix*[ nLayers ] ; 
 64 |     layerOut            = new Matrix*[ nLayers ] ;
 65 |     layerSize           = new int[ nLayers ] ;
 66 |     
 67 |     for( l = 0 ; l < ( nLayers - 1 ) ; l++ ) {
 68 |         weights[ l ]  = new Matrix( layerSizeInit[ l ] + 1, layerSizeInit[ l + 1 ] ) ; // layerSize[ l ] + 1, for bias
 69 |     }
 70 |     for( l = 0 ; l < nLayers ; l++ ) {
 71 |         layerSize[ l ] = layerSizeInit[ l ] ;
 72 |         layerIn[ l ]  = new Matrix( layerSize[ l ] ) ;
 73 |         layerOut[ l ] = new Matrix( layerSize[ l ] ) ;
 74 |         
 75 |         layerFunctionInts[ l ] = layerFunctionInit[ l ] ;
 76 |         if ( layerFunctionInit[ l ] == TANH ) {
 77 |             layerFunction[ l ] = new cTanH() ;
 78 |         } else if ( layerFunctionInit[ l ] == LINEAR ) {
 79 |             layerFunction[ l ] = new cLinear() ;
 80 |         } else {
 81 |             cout << "WARNING: Unknown layer function type: " ;
 82 |             cout << layerFunctionInit[ l ] << '\n' ;
 83 |             cout << "layer: " << l << '\n' ;
 84 |             exit(1) ;
 85 |         }
 86 |     }
 87 |     
 88 |     // Initialise the weights randomly between -0.3 and 0.3
 89 |     // randomizeWeights( -0.3, 0.3 ) ;
 90 |     randomizeWeights( -0.03, 0.03 ) ;
 91 |     recentlyUpdated = true ;
 92 | }
 93 | 
 94 | cNeuralNetwork::~cNeuralNetwork( ) {
 95 |     //~ cout << "destructor NN\n" ;
 96 |     for( l = 0 ; l < nLayers - 1 ; l++ ) {
 97 |         delete weights[ l ] ;
 98 |     }
 99 |     for( l = 0 ; l < nLayers ; l++ ) {
100 |         delete layerIn[ l ] ;
101 |         delete layerOut[ l ] ;
102 |         delete layerFunction[ l ] ;
103 |     }
104 |     delete [] layerIn ;
105 |     delete [] layerOut ;
106 |     delete [] layerFunction ;
107 |     delete [] layerFunctionInts ;
108 |     delete [] layerSize ;
109 |     delete [] weights ;
110 | }
111 | 
112 | void        cNeuralNetwork::changeFunction( int layer, int function ) {
113 |     delete layerFunction[ layer ] ;
114 |     
115 |     if ( function == TANH ) {
116 |         
117 |         layerFunction[ layer ] = new cTanH() ;
118 |     
119 |     } else if ( function == LINEAR ) {
120 |         
121 |         layerFunction[ layer ] = new cLinear() ;
122 |     
123 |     } else if ( function == THRESHOLD ) {
124 |         
125 |         layerFunction[ layer ] = new cThreshold() ;
126 |     
127 |     }
128 |     
129 |     recentlyUpdated = true ;
130 | }
131 |     
132 | void        cNeuralNetwork::_forwardPropLayer( int layer ) {
133 | 
134 |     w = weights[ layer ]->getPtr() ; // The weights of the current layer
135 |     
136 |     nFormer = layerSize[ layer ] ;       // # nodes in the former nodes layer
137 |     nNext   = layerSize[ layer + 1 ] ;   // # nodes in the next nodes layer
138 |     
139 |     formerOut  = layerOut[ layer ]->getPtr() ;      // The output of the former layer
140 |     nextIn     = layerIn[ layer + 1 ]->getPtr() ;   // The input of the next layer (to be set)
141 |     nextOut    = layerOut[ layer + 1 ]->getPtr() ;  // The output of the next layer (follows from the input and function)
142 |     
143 |     cFunction * f = layerFunction[layer+1] ;                 // The function on the next layer
144 | 
145 |     // Initialise inputs to the next layer to zero
146 |     for( o = 0 ; o < nNext ; o++ ) {
147 |         nextIn[ o ] = 0.0 ;
148 |     }
149 |     
150 |     // initialise counter for the weights
151 |     int wPos = 0 ;
152 |     // add weighted inputs
153 |     for( i = 0 ; i < nFormer ; i++ ) {
154 |         if ( formerOut[ i ] != 0.0) {
155 |             for( o = 0 ; o < nNext ; o++ ) {
156 |                 nextIn[ o ] += w[ wPos ] * formerOut[ i ] ;
157 |                 wPos++ ;
158 |             }
159 |         } else {
160 |             wPos += nNext ;
161 |         }
162 |     }
163 |     // add bias and calculate output
164 |     for( o = 0 ; o < nNext ; o++ ) {
165 |         nextIn[ o ] += w[ wPos ] ; 
166 |         wPos++ ;
167 |     }
168 |     f->output( nextIn, nextOut, nNext ) ;
169 | }
170 | 
171 | double *    cNeuralNetwork::_backPropLayer( int layer, double * oError, double learningSpeed ) {
172 |     
173 |     w = weights[ layer ]->getPtr() ; // The weights of the current layer
174 |     
175 |     nFormer = layerSize[ layer ] ;   // # nodes in the former nodes layer
176 |     nNext   = layerSize[ layer + 1 ] ;       // # nodes in the next nodes layer
177 |     
178 |     formerOut = layerOut[ layer ]->getPtr() ; // The output of the former layer
179 |     formerIn  = layerIn[ layer ]->getPtr() ;  // The input of the former layer
180 |     
181 |     cFunction * f = layerFunction[ layer ] ; // Function of the former layer
182 |     
183 |     wPos = 0 ;
184 |     
185 |     iError = new double[ nFormer ] ;   // Error of the input of the former layer
186 |     
187 |     if ( layer > 0 ) {
188 |         
189 |         for( i = 0 ; i < nFormer ; i++ ) {
190 |             
191 |             iError[ i ] = 0.0 ;
192 |             
193 |             if ( formerOut[ i ] != 0.0 ) {
194 |                 
195 |                 for ( o = 0 ; o < nNext ; o++ ) {
196 |                     
197 |                     // First get error:
198 |                     iError[ i ] += w[ wPos ] * oError[ o ] ;
199 |                     
200 |                     // Then update the weight:
201 |                     w[ wPos ] += formerOut[ i ] * oError[ o ] ;
202 |                     
203 |                     wPos++ ;
204 |                     
205 |                 }
206 |                 
207 |             } else {
208 |                 
209 |                 for ( o = 0 ; o < nNext ; o++ ) {
210 |                     
211 |                     // Only get error:
212 |                     iError[ i ] += w[ wPos ] * oError[ o ] ;
213 |                     
214 |                     wPos++ ;
215 |                     
216 |                 }
217 |                 
218 |             }
219 |             // Pass the error through the layers function:
220 |             iError[ i ] *= f->derivative( formerIn[ i ], formerOut[ i ] ) ;
221 |             
222 |         }
223 |         
224 |     } else {
225 |         
226 |         for( i = 0 ; i < nFormer ; i++ ) {
227 |             
228 |             if ( formerOut[ i ] != 0.0 ) {
229 |                 
230 |                 for ( o = 0 ; o < nNext ; o++ ) {
231 |                    
232 |                     // Only update weight:
233 |                     w[ wPos ] += formerOut[ i ] * oError[ o ] ;
234 |                     
235 |                     wPos++ ;
236 |                     
237 |                 }
238 |                 
239 |             } else {
240 |                 
241 |                 wPos += nNext ;
242 |                 
243 |             }
244 |             
245 |         }
246 |     }
247 | 
248 |     for ( o = 0 ; o < nNext ; o++ ) {
249 |         
250 |         // Update the bias
251 |         w[ wPos ] += oError[ o ] ;
252 |         
253 |         wPos++ ;
254 |         
255 |     }
256 |     
257 |     
258 |     
259 |     return iError ;
260 | }
261 | 
262 | void        cNeuralNetwork::backPropagate( double *input, double *target, double learningSpeed ) {
263 |     //If the network has been adapted after the last forward propagation,
264 |     //forwardPropagate first to correctly set the hidden activations:
265 |     if ( recentlyUpdated ) {
266 |         forwardPropagate( input ) ;
267 |     } else {
268 |         //Check whether the activations of the layers correspond with the present input
269 |         inputIn = layerIn[ 0 ]->getPtr() ;
270 |         bool useLastActivations = true ;
271 |         for ( i = 0 ; ( i < nInput ) & useLastActivations ; i++ ){
272 |             if ( input[i] != inputIn[i] ) {
273 |                 useLastActivations = false ;
274 |             }
275 |         }
276 |         if ( !useLastActivations ) {
277 |             //If the activations don't correspond to the last input (and the network
278 |             //has been adapted in the meantime) set the activations by a forward
279 |             //propagation
280 |             forwardPropagate( input ) ;
281 |         }
282 |     }
283 |     error = new double[ nOutput ] ; //error of the output layer.
284 |     outputOut  = layerOut[ nLayers - 1 ]->getPtr() ;// Output of the output layer
285 |     
286 |     for( o = 0 ; o < nOutput ; o++) {
287 |         error[ o ] = target[ o ] - outputOut[ o ] ;
288 |     }
289 |     //backPropagate the error
290 |     backPropagateError( input, error, learningSpeed ) ;
291 |     delete[] error ;
292 | }
293 | 
294 | void        cNeuralNetwork::backPropagateError( double *input, double *error, double learningSpeed ) {
295 |     //If the network has been adapted after the last forward propagation,
296 |     //forwardPropagate first to correctly set the hidden activations:
297 |     if ( recentlyUpdated ) {
298 |         forwardPropagate( input ) ;
299 |     } else {
300 |         //Check whether the activations of the layers correspond with the present input
301 |         inputIn = layerIn[ 0 ]->getPtr() ;
302 |         bool useLastActivations = true ;
303 |         for ( i = 0 ; ( i < nInput ) & useLastActivations ; i++ ){
304 |             if ( input[i] != inputIn[i] ) {
305 |                 useLastActivations = false ;
306 |             }
307 |         }
308 |         if ( !useLastActivations ) {
309 |             //If the activations don't correspond to the last input (and the network
310 |             //has been adapted in the meantime) set the activations by a forward
311 |             //propagation
312 |             forwardPropagate( input ) ;
313 |         }
314 |     }
315 |     oError = new double[ nOutput ] ; //error of the output layer.
316 |     
317 |     outputIn   = layerIn[ nLayers - 1 ]->getPtr() ; // Input of the output layer
318 |     outputOut  = layerOut[ nLayers - 1 ]->getPtr() ;// Output of the output layer
319 |     cFunction * f       = layerFunction[ nLayers - 1 ] ;    // Function of the output layer
320 | 
321 |     //First calculate the error of the input of the output layer:
322 |     for( o = 0 ; o < nOutput ; o++) {
323 |         oError[ o ] = learningSpeed*error[ o ] ;
324 |         oError[ o ] *= f->derivative( outputIn[ o ], outputOut[ o ] ) ;
325 |     }
326 |     
327 |     //Now propagate until reaching the input layer:
328 |     for ( l = nLayers - 2 ; l >= 0 ; l-- ) {
329 |         iError = _backPropLayer( l, oError, learningSpeed ) ;
330 |         delete [] oError ;
331 |         oError = iError ;
332 |     }
333 |     
334 |     delete [] iError ;
335 |     
336 |     recentlyUpdated = true ;
337 | }
338 |     
339 | 
340 | 
341 | double *    cNeuralNetwork::forwardPropagate( double *input ) {
342 |     cFunction * f       = layerFunction[ 0 ] ; // Function of the input layer
343 |     
344 |     double * inputIn  = layerIn[ 0 ]->getPtr() ;  // Input of the first layer (to be set)
345 |     double * inputOut = layerOut[ 0 ]->getPtr() ; // Output of the first layer (to be set)
346 |     
347 |     //First set the first layer
348 |     for( i = 0 ; i < nInput ; i++ ) {
349 |         inputIn[ i ] = input[ i ] ;
350 |     }
351 |     
352 |     f->output( input, inputOut, nInput ) ;
353 |     
354 |     //Now propagate (sets layerIn and layerOut for each layer)
355 |     for ( l = 0 ; l < (nLayers - 1) ; l++ ) {
356 |         _forwardPropLayer( l ) ;
357 |     }
358 |     
359 |     //Set recentlyUpdated to false to show that the activations where set after the last change to the network
360 |     recentlyUpdated = false ;
361 |     
362 |     return layerOut[ nLayers - 1 ]->getPtr() ; // Output of the last layer
363 | 
364 | }
365 | 
366 | double *    cNeuralNetwork::forwardPropagate( vector<double> *input_vector ) {
367 |     if ( (int) input_vector->size() != nInput ) {
368 |         cerr << "Vector input is incorrect size: " << input_vector->size() << " instead of " << nInput << endl ;
369 |         throw -1 ;
370 |     }
371 |     
372 |     double * input = new double[ nInput ] ;
373 |     for ( i = 0 ; i < nInput ; i++ ) {
374 |         input[ i ] = input_vector->at( i ) ;
375 |     }
376 |     
377 |     double * output = forwardPropagate( input ) ;
378 |     
379 |     delete [] input ;
380 |     
381 |     return output ;
382 | }
383 |     
384 |     
385 | void        cNeuralNetwork::forwardPropagate( double *input, List * output ) {
386 |     cFunction * f       = layerFunction[ 0 ] ; // Function of the input layer
387 |     
388 |     double * inputIn  = layerIn[ 0 ]->getPtr() ;  // Input of the first layer (to be set)
389 |     double * inputOut = layerOut[ 0 ]->getPtr() ; // Output of the first layer (to be set)
390 |     
391 |     //~ cout << "nInput" << nInput << endl ;
392 |     //~ cout << "nOutput" << nOutput << endl ;
393 |     //~ cout << "nLayers" << nLayers << endl ;
394 |     
395 |     //First set the first layer
396 |     for( i = 0 ; i < nInput ; i++ ) {
397 |         inputIn[ i ] = input[ i ] ;
398 |     }
399 |     
400 |     f->output( input, inputOut, nInput ) ;
401 |     
402 |     //Now propagate (sets layerIn and layerOut for each layer)
403 |     for ( l = 0 ; l < (nLayers - 1) ; l++ ) {
404 |         _forwardPropLayer( l ) ;
405 |     }
406 | 
407 |     double * outputOut = layerOut[ nLayers - 1 ]->getPtr() ; // Output of the last layer
408 |     
409 |     //Finally, return the output of the last layer through the argument
410 |     output->contents = new double[ nOutput ] ;
411 |     for ( o = 0 ; o < nOutput ; o++ ) {
412 |         output->contents[ o ] = outputOut[ o ] ;
413 |     }
414 |     output->length = nOutput ;
415 |     //Set recentlyUpdated to false to show that the activations where set after the last change to the network
416 |     recentlyUpdated = false ;
417 | }
418 | 
419 | double      cNeuralNetwork::getActivation( int layer, int node )  {
420 |     return layerOut[ layer ]->get( node ) ;
421 | }
422 | 
423 | void        cNeuralNetwork::getActivations( int layer, List * activations )  {
424 |     double * layerActivation = layerOut[ layer ]->getPtr() ; // Output of the requested layer
425 |     
426 |     //Return the output of the requested layer through the argument
427 |     int nActivations = layerSize[ layer ] ;
428 |     activations->contents = new double[ nActivations ] ;
429 |     for ( o = 0 ; o < nActivations ; o++ ) {
430 |         activations->contents[ o ] = layerActivation[ o ] ;
431 |     }
432 |     activations->length = nActivations ;
433 | }
434 | 
435 | double *    cNeuralNetwork::getActivations( int layer )  {
436 |     return layerOut[ layer ]->getPtr() ; // Output of the requested layer
437 | }
438 | 
439 | double      cNeuralNetwork::getWeights( int layer, int i, int j )  {
440 |     return weights[ layer ]->get( i*(layerSize[ layer + 1 ]) + j ) ;
441 | }
442 | 
443 | void        cNeuralNetwork::setWeights( int layer, int i, int j, double val ) {
444 |     recentlyUpdated = true ;
445 |     weights[ layer ]->set( i*(layerSize[ layer + 1 ]) + j, val ) ;
446 | }
447 | 
448 | void        cNeuralNetwork::randomizeWeights( double min, double max ) {
449 |     for ( l = 0 ; l < ( nLayers - 1 ) ; l++ ) {
450 |         for ( i = 0 ; i < ( layerSize[l] + 1 ) ; i++ ) {
451 |             for ( o = 0 ; o < layerSize[ l + 1 ] ; o++ ) {
452 |                 setWeights( l, i, o, min + (max - min)*drand48() ) ;
453 |             }
454 |         }
455 |     }
456 | }
457 | 
458 | void        cNeuralNetwork::randomizeWeights( double min, double max, int seed ) {
459 |     srand( seed ) ;
460 |     for ( l = 0 ; l < ( nLayers - 1 ) ; l++ ) {
461 |         for ( i = 0 ; i < ( layerSize[l] + 1 ) ; i++ ) {
462 |             for ( o = 0 ; o < layerSize[ l + 1 ] ; o++ ) {
463 |                 setWeights( l, i, o, min + (max - min)*drand48() ) ;
464 |             }
465 |         }
466 |     }
467 | }
468 | 
469 | void        cNeuralNetwork::printNetwork() {
470 |     for ( l = 0 ; l < (nLayers - 1) ; l++ ) {
471 |         _printLayer( l ) ;
472 |     }
473 | }
474 | 
475 | void        cNeuralNetwork::_printLayer( int layer ) {
476 |     double * w = weights[ layer ]->getPtr() ; // The weights of the current layer
477 |     cout << "layer " << layer ;
478 |     
479 |     int nFormer = layerSize[ layer ] ;       // # nodes in the former nodes layer
480 |     int nNext   = layerSize[ layer + 1 ] ;   // # nodes in the next nodes layer
481 |     
482 |     cout << " nFormer " << nFormer ;
483 |     cout << " nNext " << nNext << endl ;
484 |     
485 |     int wPos = 0 ;
486 |     for( i = 0 ; i < nFormer ; i++ ) {
487 |         for( o = 0 ; o < nNext ; o++ ) {
488 |             cout << w[ wPos ] << ", " ;
489 |             wPos++ ;
490 |         }
491 |         cout << '\n' ;
492 |     }
493 |     // add bias and calculate output
494 |     for( o = 0 ; o < nNext ; o++ ) {
495 |         cout << " B " <<  w[ wPos ] << '\n' ; 
496 |         wPos++ ;
497 |     }
498 | }
499 | 
500 | int         cNeuralNetwork::read_int(istream &is) {
501 |     int result;
502 |     char *s = (char *) &result;
503 |     is.read(s, sizeof(result));
504 |     return result;
505 | }
506 | 
507 | void        cNeuralNetwork::write_int(ostream &is, int result) {
508 |     char *s = (char *) &result;
509 |     is.write(s, sizeof(result));
510 | }
511 | 
512 | double      cNeuralNetwork::read_double(istream &is) {
513 |     double result;
514 |     char *s = (char *) &result;
515 |     is.read(s, sizeof(result));
516 |     return result;
517 | }
518 | 
519 | void        cNeuralNetwork::write_double(ostream &is, double result) {
520 |     char *s = (char *) &result;
521 |     is.write(s, sizeof(result));
522 | }
523 | 
524 | void        cNeuralNetwork::readNetwork( const char * file ) { 
525 |     ifstream ifile ;
526 |     
527 |     ifile.open( file, ifstream::in ) ;
528 |     
529 |     nLayers                 = read_int( ifile ) ; 
530 |     
531 |     int * layerSizeInit     = new int[nLayers] ;
532 |     int * layerFunctionInit = new int[nLayers] ;
533 |     
534 |     for ( int l = 0 ; l < nLayers ; l++ ) {
535 |         layerSizeInit[l]        = read_int( ifile ) ; 
536 |         layerFunctionInit[l]    = read_int( ifile ) ;
537 |     }
538 | 
539 |     nInput  = layerSizeInit[ 0 ] ;
540 |     nOutput = layerSizeInit[ nLayers - 1 ] ;
541 |     //~ cout << "nInput" << nInput << endl ;
542 |     //~ cout << "nOutput" << nOutput << endl ;
543 |     //~ cout << "nLayers" << nLayers << endl ;
544 |     
545 |     layerFunction = new cFunction*[ nLayers ] ;
546 |     layerFunctionInts   = new int[ nLayers ] ;
547 |     weights       = new Matrix*[ nLayers - 1 ] ; // # weights layers = # node layers - 1
548 |     layerIn       = new Matrix*[ nLayers ] ; 
549 |     layerOut      = new Matrix*[ nLayers ] ;
550 |     layerSize     = new int[ nLayers ] ;
551 |     
552 |     for( int l = 0 ; l < ( nLayers - 1 ) ; l++ ) {
553 |         weights[ l ]  = new Matrix( layerSizeInit[ l ] + 1, layerSizeInit[ l + 1 ] ) ; // layerSize[ l ] + 1, for bias
554 |     }
555 |     for( int l = 0 ; l < nLayers ; l++ ) {
556 |         layerSize[ l ] = layerSizeInit[ l ] ;
557 |         layerIn[ l ]  = new Matrix( layerSize[ l ] ) ;
558 |         layerOut[ l ] = new Matrix( layerSize[ l ] ) ;
559 |             
560 |         layerFunctionInts[ l ] = layerFunctionInit[ l ] ;
561 |         if ( layerFunctionInit[ l ] == TANH ) {
562 |             layerFunction[ l ] = new cTanH() ;
563 |         } else if ( layerFunctionInit[ l ] == LINEAR ) {
564 |             layerFunction[ l ] = new cLinear() ;
565 |         } else {
566 |             cout << "WARNING: Unknown layer function type: " ;
567 |             cout << layerFunctionInit[ l ] << '\n' ;
568 |             cout << "layer: " << l << '\n' ;
569 |             exit(1) ;
570 |         }
571 |     }
572 |     // Get weights
573 |     for ( int l = 0 ; l < ( nLayers - 1 ) ; l++ ) {
574 |         for ( int i = 0 ; i < ( layerSize[l] + 1 ) ; i++ ) {
575 |             for ( int o = 0 ; o < layerSize[ l + 1 ] ; o++ ) {
576 |                 setWeights( l, i, o, read_double( ifile ) ) ;
577 |             }
578 |         }
579 |     }
580 |     recentlyUpdated = true ;
581 |     
582 |     delete [] layerSizeInit ;
583 |     delete [] layerFunctionInit ;
584 | }
585 |     
586 | void        cNeuralNetwork::writeNetwork( const char * file ) { 
587 |     ofstream ofile ;
588 |     
589 |     ofile.open( file, ofstream::out ) ;
590 |     
591 |     write_int( ofile, nLayers ) ; 
592 |     
593 |     for ( int l = 0 ; l < nLayers ; l++ ) {
594 |         write_int( ofile, layerSize[l] ) ; 
595 |         write_int( ofile, layerFunctionInts[l] ) ;
596 |     }
597 | 
598 |     // Weights
599 |     for ( int l = 0 ; l < ( nLayers - 1 ) ; l++ ) {
600 |         for ( int i = 0 ; i < ( layerSize[l] + 1 ) ; i++ ) {
601 |             for ( int o = 0 ; o < layerSize[ l + 1 ] ; o++ ) {
602 |                 write_double( ofile, getWeights( l, i, o ) ) ;
603 |             }
604 |         }
605 |     }
606 | }
607 | 
608 | #endif //CNEURALNETWORK
609 | 
610 | 


--------------------------------------------------------------------------------
/cNeuralNetwork.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNEURALNETWORK_H
 2 | #define CNEURALNETWORK_H
 3 | # include <vector>
 4 | # include <iostream>
 5 | # include <fstream>
 6 | # include "cFunction.h"
 7 | # include "cTanH.h"
 8 | # include "cLinear.h"
 9 | # include "Matrix.h"
10 | # include "List.h"
11 | 
12 | 
13 | using namespace std;
14 | 
15 | class cNeuralNetwork {
16 |     public:
17 |         cNeuralNetwork( const char * );
18 |         cNeuralNetwork( int, int * );
19 |         cNeuralNetwork( int, int *, int * );
20 |         ~cNeuralNetwork();
21 |     
22 |         void init( int, int *, int * ) ;
23 |     
24 |         void forwardPropagate( double *, List * );
25 |         double * forwardPropagate( double * );
26 |         double * forwardPropagate( vector<double> * );
27 |         
28 |         void backPropagate(      double *, double *, double );
29 |         void backPropagateError( double *, double *, double );
30 |     
31 |         void changeFunction( int, int ) ;
32 |     
33 |         double getActivation( int, int ) ;
34 |         void getActivations( int, List * ) ;
35 |         double * getActivations( int ) ;
36 |         double getWeights( int, int, int ) ;
37 |         void setWeights( int, int, int, double ) ;
38 |         void randomizeWeights( double, double ) ;
39 |         void randomizeWeights( double, double, int ) ;
40 |         void printNetwork() ;
41 |         void readNetwork( const char * ) ;
42 |         void writeNetwork( const char * ) ;
43 |     private:
44 |         int nLayers, nInput, nOutput, nFormer, nNext ;
45 |         unsigned int wPos ;
46 |         int * layerSize ;
47 |         int * layerFunctionInts ;
48 |         double * formerOut ;
49 |         double * formerIn ;
50 |         double * nextIn ;
51 |         double * nextOut ;
52 |         double * iError ;
53 |         double * oError ;
54 |         double * w ;
55 |         double * inputIn ;
56 |         double * inputOut ;
57 |         double * outputIn ;
58 |         double * outputOut ;
59 |         double * error ;
60 |         cFunction ** layerFunction ;
61 |         Matrix ** weights ;
62 |         Matrix ** layerIn ;
63 |         Matrix ** layerOut ;
64 |         int i, h, o, l ; //Indices
65 |         int SIZEOFDOUBLE ;
66 |         void _forwardPropLayer( int ) ;
67 |         double * _backPropLayer( int layer, double * outError, double learningSpeed ) ;
68 |         bool recentlyUpdated ;
69 |         void _printLayer( int ) ;
70 |         int read_int( istream & ) ;
71 |         void write_int( ostream &, int ) ;
72 |         double read_double( istream & ) ;
73 |         void write_double( ostream &, double ) ;
74 | };
75 | 
76 | #endif // CNEURALNETWORK_H
77 | 


--------------------------------------------------------------------------------
/cTanH.cpp:
--------------------------------------------------------------------------------
 1 | #ifndef CTANH
 2 | #define CTANH
 3 | 
 4 | using namespace std;
 5 | //# include <math.h>
 6 | # include "cTanH.h"
 7 | 
 8 | cTanH::cTanH() : cFunction() {
 9 |     //~ cout << "constructor TanH\n" ;
10 | }
11 | 
12 | cTanH::~cTanH() {
13 |     //~ cout << "destructor TanH\n" ;
14 | }
15 | 
16 | int cTanH::printName() {
17 |     cout << "cTanH\n" ;
18 |     return 0;
19 | }
20 | 
21 | double cTanH::output( double input ) {
22 |     //~ //cout << "tanh in;" ;
23 |     double output = -1 ;
24 |     if (input > 1.92032) {
25 |         output = 0.96016;
26 |     } else if (input > 0.0) {
27 |         output = -0.260373271*input*input + input;
28 |     } else if (input > -1.92032) {
29 |         output = 0.260373271*input*input + input;
30 |     } else {
31 |         output = -0.96016;
32 |     }
33 |     return output;
34 |     //~ return tanh( input ) ;
35 | }
36 | 
37 | void cTanH::output( double * input , double * output , int n ) {
38 |     
39 |     for ( int i = 0 ; i < n ; i++ ) {
40 |         output[i] = -1 ;
41 |         if (input[i] > 1.92032) {
42 |             output[i] = 0.96016;
43 |         } else if (input[i] > 0.0) {
44 |             output[i] = -0.260373271*input[i]*input[i] + input[i];
45 |         } else if (input[i] > -1.92032) {
46 |             output[i] = 0.260373271*input[i]*input[i] + input[i];
47 |         } else {
48 |             output[i] = -0.96016;
49 |         }
50 |     }
51 | }
52 | 
53 | double cTanH::derivative( double input, double output ) {
54 |     return 1.0 - (output*output) ;
55 | }
56 | 
57 | void cTanH::derivative( double * input, double * output, double * derivative, int n ) {
58 |     for ( int i = 0 ; i < n ; i++ ) {
59 |         derivative[i] = 1.0 - (output[i]*output[i]) ;
60 |     }
61 | }
62 | 
63 | #endif //CTANH
64 | 


--------------------------------------------------------------------------------
/cTanH.h:
--------------------------------------------------------------------------------
 1 | #ifndef CTANH_H
 2 | #define CTANH_H
 3 | 
 4 | #include "cFunction.h"
 5 | using namespace std;
 6 | class cTanH : public cFunction {
 7 |     public:
 8 |         cTanH() ;
 9 |         ~cTanH() ;
10 |         int printName() ;
11 |         double output( double input );
12 |         void output( double * input, double * output, int n );
13 |         double derivative( double input, double output );
14 |         void derivative( double * input, double * output, double * derivative, int n );
15 | };
16 | 
17 | #endif //CTANH_H
18 | 


--------------------------------------------------------------------------------
/cThreshold.cpp:
--------------------------------------------------------------------------------
 1 | #ifndef CTHRESHOLD
 2 | #define CTHRESHOLD
 3 | 
 4 | #include "cThreshold.h"
 5 | using namespace std;
 6 | 
 7 | cThreshold::cThreshold() : cFunction() {
 8 |     //~ cout << "constructor Threshold\n" ;
 9 | }
10 | 
11 | cThreshold::~cThreshold() {
12 |     //~ cout << "destructor Threshold\n" ;
13 | }
14 | 
15 | int cThreshold::printName() {
16 |     cout << "cThreshold\n" ;
17 |     return 0 ;
18 | }
19 | 
20 | double cThreshold::output( double input ) {
21 |     if ( input > 0 ) {
22 |         return 1.0 ;
23 |     } else {
24 |         return -1.0 ;
25 |     }
26 | }
27 | 
28 | void cThreshold::output( double * inp, double * outp, int n) {
29 |     for ( int i = 0 ; i < n ; i++ ) {
30 |         if ( inp[i] > 0 ) {
31 |             outp[i] = 1.0 ;
32 |         } else {
33 |             outp[i] = -1.0 ;
34 |         }
35 |     }
36 | }
37 | 
38 | double cThreshold::derivative( double input, double output ) {
39 |     return 1.0 ;
40 | }
41 | 
42 | 
43 | #endif //CTHRESHOLD
44 | 


--------------------------------------------------------------------------------
/cThreshold.h:
--------------------------------------------------------------------------------
 1 | #ifndef CTHRESHOLD_H
 2 | #define CTHRESHOLD_H
 3 | 
 4 | #include "cFunction.h"
 5 | using namespace std;
 6 | 
 7 | class cThreshold : public cFunction {
 8 |     public:
 9 |         cThreshold() ;
10 |         ~cThreshold() ;
11 |         int printName() ;
12 |         double output( double input );
13 |         void output( double * input, double * output, int n );
14 |         double derivative( double input, double output );
15 | };
16 | 
17 | #endif //CTHRESHOLD_H
18 | 


--------------------------------------------------------------------------------