├── .gitignore ├── Acla.cpp ├── Acla.h ├── Action.h ├── Algorithm.cpp ├── Algorithm.h ├── Arm.cpp ├── Arm.h ├── ArmCfgCacla ├── ArmCfgCaclaSimple ├── ArmInterface.cpp ├── ArmInterface.h ├── CMakeLists.txt ├── Cacla.cpp ├── Cacla.h ├── CartPole.cpp ├── CartPole.h ├── CartPoleCfg ├── CartPoleCfgCacla ├── CartPoleCfgCaclaSimple ├── CartPoleInterface.cpp ├── CartPoleInterface.h ├── Experiment.cpp ├── Experiment.h ├── List.h ├── Makefile ├── Matrix.cpp ├── Matrix.h ├── QVlearning.cpp ├── QVlearning.h ├── Qlearning.cpp ├── Qlearning.h ├── README.md ├── README.org ├── Sarsa.cpp ├── Sarsa.h ├── SmallInterface.cpp ├── SmallInterface.h ├── SmallMaze.cpp ├── SmallMaze.h ├── SmallMazeCfg ├── State.h ├── StateActionAlgorithm.cpp ├── StateActionAlgorithm.h ├── StateActionUtils.cpp ├── StateActionUtils.h ├── World.h ├── cFunction.h ├── cLinear.cpp ├── cLinear.h ├── cNeuralNetwork.cpp ├── cNeuralNetwork.h ├── cTanH.cpp ├── cTanH.h ├── cThreshold.cpp └── cThreshold.h /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | 3 | # Object files 4 | *.o 5 | *.ko 6 | *.obj 7 | *.elf 8 | 9 | # Precompiled Headers 10 | *.gch 11 | *.pch 12 | 13 | # Libraries 14 | *.lib 15 | *.a 16 | *.la 17 | *.lo 18 | 19 | # Shared objects (inc. Windows DLLs) 20 | *.dll 21 | *.so 22 | *.so.* 23 | *.dylib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | *.i*86 30 | *.x86_64 31 | *.hex 32 | 33 | # Debug files 34 | *.dSYM/ 35 | -------------------------------------------------------------------------------- /Acla.cpp: -------------------------------------------------------------------------------- 1 | #ifndef ACLA 2 | #define ACLA 3 | 4 | # include "Acla.h" 5 | 6 | 7 | Acla::Acla( const char * parameterFile, World * w ) { 8 | 9 | discreteStates = w->getDiscreteStates() ; 10 | stateDimension = w->getStateDimension() ; 11 | 12 | if ( not w->getDiscreteActions() ) { 13 | 14 | cout << "Acla does not support continuous actions." << endl ; 15 | cout << "Please check which MDP you are using it on." << endl ; 16 | exit(0) ; 17 | 18 | } else { 19 | 20 | numberOfActions = w->getNumberOfActions() ; 21 | 22 | } 23 | 24 | srand48( clock() ) ; 25 | 26 | readParameterFile( parameterFile ) ; 27 | 28 | if ( discreteStates ) { 29 | 30 | numberOfStates = w->getNumberOfStates() ; 31 | 32 | Q = new double*[ numberOfStates ] ; 33 | V = new double[ numberOfStates ] ; 34 | 35 | for ( int s = 0 ; s < numberOfStates ; s++ ) { 36 | 37 | Q[s] = new double[ numberOfActions ] ; 38 | V[s] = 0.0 ; 39 | 40 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 41 | Q[s][a] = 0.0 ; 42 | } 43 | 44 | } 45 | 46 | } else { 47 | 48 | int layerSizesA[] = { stateDimension, nHiddenQ, 1 } ; 49 | int layerSizesV[] = { stateDimension, nHiddenV, 1 } ; 50 | 51 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 52 | QNN.push_back( new cNeuralNetwork( 1, layerSizesA ) ) ; 53 | } 54 | VNN = new cNeuralNetwork( 1, layerSizesV ) ; 55 | 56 | 57 | VTarget = new double[ 1 ] ; 58 | QTarget = new double[ 1 ] ; 59 | 60 | Qs = new double[ numberOfActions ] ; 61 | 62 | } 63 | 64 | policy = new double[ numberOfActions ] ; 65 | 66 | } 67 | 68 | Acla::~Acla() { 69 | 70 | if ( discreteStates ) { 71 | 72 | for ( int s = 0 ; s < numberOfStates ; s++ ) { 73 | 74 | delete [] Q[s] ; 75 | 76 | } 77 | 78 | delete [] Q ; 79 | delete [] V ; 80 | 81 | } else { 82 | 83 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 84 | delete QNN[a] ; 85 | } 86 | QNN.clear() ; 87 | delete VNN ; 88 | 89 | delete [] QTarget ; 90 | delete [] VTarget ; 91 | 92 | delete [] Qs ; 93 | 94 | } 95 | 96 | delete [] policy ; 97 | 98 | } 99 | 100 | 101 | void Acla::readParameterFile( const char * parameterFile ) { 102 | 103 | if ( not discreteStates ) { 104 | 105 | ifstream ifile ; 106 | 107 | ifile.open( parameterFile, ifstream::in ) ; 108 | 109 | read_moveTo( &ifile, "nn" ) ; 110 | 111 | read_moveTo( &ifile, "nHiddenQ" ) ; 112 | ifile >> nHiddenQ ; 113 | read_moveTo( &ifile, "nHiddenV" ) ; 114 | ifile >> nHiddenV ; 115 | 116 | ifile.close() ; 117 | 118 | } 119 | 120 | } 121 | 122 | void Acla::update( State * state, Action * action, double rt, State * nextState, bool endOfEpisode, double * learningRate, double gamma ) { 123 | 124 | int at = action->discreteAction ; 125 | 126 | if ( state->discrete ) { 127 | 128 | int st = state->discreteState ; 129 | int st_ = nextState->discreteState; 130 | 131 | double Vt = V[ st ] ; 132 | 133 | if ( endOfEpisode ) { 134 | 135 | V[ st ] += learningRate[1]*( rt - V[ st ] ) ; 136 | 137 | } else { 138 | 139 | V[ st ] += learningRate[1]*( rt + gamma*V[ st_ ] - V[ st ] ) ; 140 | 141 | } 142 | 143 | if ( V[ st ] > Vt ) { 144 | 145 | Q[ st ][ at ] += learningRate[0]*( 1.0 - Q[ st ][ at ] ) ; 146 | 147 | } else { 148 | 149 | Q[ st ][ at ] += learningRate[2]*( 0.0 - Q[ st ][ at ] ) ; 150 | 151 | } 152 | 153 | } else { 154 | 155 | double * st = state->continuousState ; 156 | double * st_ = nextState->continuousState ; 157 | 158 | if ( endOfEpisode ) { 159 | 160 | VTarget[ 0 ] = rt ; 161 | 162 | } else { 163 | 164 | double Vs_ = VNN->forwardPropagate( st_ )[0] ; 165 | 166 | VTarget[ 0 ] = rt + gamma*Vs_ ; 167 | 168 | } 169 | 170 | double Vt = VNN->forwardPropagate( st )[0] ; 171 | 172 | VNN->backPropagate( st, VTarget, learningRate[1] ) ; 173 | 174 | if ( VTarget[0] > Vt ) { 175 | 176 | QTarget[ 0 ] = 1.0 ; 177 | QNN[ at ]->backPropagate( st, QTarget, learningRate[0] ) ; 178 | 179 | } else { 180 | 181 | QTarget[ 0 ] = 0.0 ; 182 | QNN[ at ]->backPropagate( st, QTarget, learningRate[2] ) ; 183 | 184 | } 185 | } 186 | } 187 | 188 | unsigned int Acla::getNumberOfLearningRates() { 189 | 190 | return 3 ; 191 | 192 | } 193 | 194 | const char * Acla::getName() { 195 | 196 | return "Acla" ; 197 | 198 | } 199 | 200 | #endif //ACLA 201 | -------------------------------------------------------------------------------- /Acla.h: -------------------------------------------------------------------------------- 1 | #ifndef ACLA_H 2 | #define ACLA_H 3 | 4 | # include "StateActionAlgorithm.h" 5 | 6 | class Acla : public StateActionAlgorithm { 7 | public: 8 | Acla( const char * parameterFile, World * w ) ; 9 | ~Acla() ; 10 | void readParameterFile( const char * parameterFile ) ; 11 | void update( State * st, Action * action, double rt, State * st_, bool endOfEpisode, double * learningRate, double gamma ) ; 12 | unsigned int getNumberOfLearningRates() ; 13 | const char * getName() ; 14 | 15 | private: 16 | int nHiddenV ; 17 | double * V ; 18 | cNeuralNetwork * VNN ; 19 | double * VTarget ; 20 | 21 | }; 22 | 23 | #endif //ACLA_H 24 | -------------------------------------------------------------------------------- /Action.h: -------------------------------------------------------------------------------- 1 | #ifndef ACTION 2 | #define ACTION 3 | 4 | struct Action { 5 | bool continuous ; 6 | bool discrete ; 7 | 8 | int actionDimension ; 9 | int numberOfActions ; 10 | 11 | int discreteAction ; 12 | double * continuousAction ; 13 | }; 14 | 15 | #endif //Action 16 | -------------------------------------------------------------------------------- /Algorithm.cpp: -------------------------------------------------------------------------------- 1 | #ifndef ALGORITHM 2 | #define ALGORITHM 3 | 4 | # include "Algorithm.h" 5 | 6 | using namespace std ; 7 | 8 | Algorithm::Algorithm() { 9 | discreteStates = false ; 10 | continuousStates = false ; 11 | discreteActions = false ; 12 | continuousActions = false ; 13 | } 14 | 15 | Algorithm::~Algorithm() {} 16 | 17 | double Algorithm::max( double * array, int n ) { 18 | 19 | maxX = array[ 0 ] ; 20 | 21 | for ( int i = 1 ; i < n; i++ ) { 22 | 23 | if ( array[i] > maxX ) { 24 | maxX = array[i] ; 25 | } 26 | 27 | } 28 | 29 | return maxX ; 30 | 31 | } 32 | 33 | 34 | int Algorithm::argmax( double * array, int n ) { 35 | 36 | maxX = array[ 0 ] ; 37 | maxI = 0 ; 38 | 39 | for ( int i = 1 ; i < n; i++ ) { 40 | 41 | X = array[ i ] ; 42 | 43 | if ( X > maxX ) { 44 | maxX = X ; 45 | maxI = i ; 46 | } 47 | 48 | } 49 | 50 | return maxI ; 51 | 52 | } 53 | 54 | std::vector Algorithm::argmaxAll( double * array, int n ) { 55 | 56 | maxX = array[ 0 ] ; 57 | std::vector maxI ; 58 | maxI.push_back( 0 ) ; 59 | 60 | for ( int i = 1 ; i < n; i++ ) { 61 | 62 | X = array[ i ] ; 63 | 64 | if ( X > maxX ) { 65 | 66 | maxX = X ; 67 | maxI.clear() ; 68 | maxI.push_back( i ) ; 69 | 70 | } else if ( X == maxX ) { 71 | 72 | maxI.push_back( i ) ; 73 | 74 | } 75 | 76 | } 77 | 78 | return maxI ; 79 | 80 | } 81 | 82 | void Algorithm::egreedy( State * state, Action * action, double epsilon ) { 83 | 84 | if ( drand48() < epsilon ) { 85 | 86 | getMaxAction( state, action ) ; 87 | 88 | } else { 89 | 90 | getRandomAction( state, action ) ; 91 | 92 | } 93 | 94 | } 95 | 96 | void Algorithm::read_moveTo( ifstream * ifile, string label ) { 97 | string temp ; 98 | 99 | while ( temp.compare( label ) != 0 and ifile ) { 100 | 101 | *ifile >> temp ; 102 | 103 | if ( ifile->eof() ) { 104 | cout << "Read error: Could not find label '" << label << "' while reading parameter file." << endl ; 105 | exit(0) ; 106 | } 107 | 108 | } 109 | } 110 | 111 | 112 | 113 | #endif //ALGORITHM 114 | -------------------------------------------------------------------------------- /Algorithm.h: -------------------------------------------------------------------------------- 1 | #ifndef ALGORITHM_H 2 | #define ALGORITHM_H 3 | 4 | # include 5 | # include 6 | # include 7 | # include 8 | # include 9 | # include 10 | 11 | # include "State.h" 12 | # include "Action.h" 13 | # include "World.h" 14 | 15 | using namespace std ; 16 | 17 | class Algorithm { 18 | public: 19 | Algorithm() ; 20 | virtual ~Algorithm() ; 21 | double max( double * array, int n ) ; 22 | int argmax( double * array, int n ) ; 23 | std::vector< int > argmaxAll( double * array, int n ) ; 24 | void egreedy( State * state, Action * action, double epsilon ) ; 25 | 26 | virtual void getMaxAction( State * state, Action * action ) =0 ; 27 | virtual void getRandomAction( State * state, Action * action ) =0 ; 28 | virtual void explore( State * state, Action * action, double explorationRate, std::string explorationType, bool endOfEpisode ) =0 ; 29 | virtual void update( State * st, Action * action, double rt, State * st_, bool endOfEpisode, double * learningRate, double gamma ) =0 ; 30 | virtual unsigned int getNumberOfLearningRates() =0; 31 | virtual bool getContinuousStates() =0; 32 | virtual bool getDiscreteStates() =0; 33 | virtual bool getContinuousActions() =0; 34 | virtual bool getDiscreteActions() =0; 35 | virtual const char * getName() =0; 36 | 37 | protected: 38 | void read_moveTo( ifstream *, string ) ; 39 | 40 | bool continuousStates, discreteStates, continuousActions, discreteActions ; 41 | int stateDimension, numberOfStates, actionDimension, numberOfActions ; 42 | double X, maxX ; 43 | int maxI ; 44 | }; 45 | 46 | #endif //ALGORITHM_H 47 | -------------------------------------------------------------------------------- /Arm.cpp: -------------------------------------------------------------------------------- 1 | #ifndef ARM 2 | #define ARM 3 | # include 4 | # include 5 | # include 6 | # include 7 | # include "Arm.h" 8 | #include 9 | #include "std_msgs/MultiArrayLayout.h" 10 | #include "std_msgs/MultiArrayDimension.h" 11 | #include 12 | #include 13 | #include 14 | 15 | using namespace std; 16 | 17 | Arm::Arm() { 18 | srand((unsigned)time(0)); 19 | MINACTION = -1.0; 20 | MAXACTION = 1.0; 21 | 22 | continuousActions = true ; 23 | discreteActions = true ; 24 | continuousStates = true ; 25 | discreteStates = false ; 26 | 27 | stateDimension = 4; 28 | actionDimension = 3; 29 | numberOfActions = 21; 30 | 31 | cnt = 0; 32 | 33 | char * p=0; 34 | int argc=0; 35 | ros::init(argc, &p, getName()); 36 | 37 | ros::NodeHandle n; 38 | pub_reset = n.advertise("/robot/0/reset", 1); 39 | pub_target = n.advertise("/robot/0/target", 1); 40 | pub_motor = n.advertise("/robot/0/motors", 1); 41 | pub_reward = n.advertise("/robot/0/perf", 1); 42 | sub_sensor = n.subscribe("/robot/0/sensors", 1, &Arm::sensorCallback, this); 43 | sensorValues = (sensor*)malloc(sizeof(sensor)*actionDimension); 44 | memset(sensorValues,0,sizeof(double)*actionDimension); 45 | // ros::R 46 | usleep(1000000); 47 | // loop_rate = new ros::Rate(10); 48 | 49 | gotsensor = false; 50 | 51 | // Mc = 1.0; 52 | // mp = 0.1; 53 | // l = 0.5; 54 | // t = 0.02; 55 | // g = 9.81; 56 | 57 | // totalM = Mc + mp; 58 | // ml = mp*l; 59 | 60 | // maxPhi = M_PI / 15.0; 61 | // maxX = 2.4; 62 | // maxDPhi = M_PI / 15.0; 63 | // maxDX = 2.4; 64 | t = 0.; 65 | 66 | // target 67 | tx = 0.; 68 | ty = 0.; 69 | 70 | // l0 = 0.69230769; 71 | // l1 = 0.23076923; 72 | // l2 = 0.07692308; 73 | 74 | reset() ; 75 | } 76 | 77 | Arm::~Arm() { 78 | } 79 | 80 | void Arm::sensorCallback(const std_msgs::Float32MultiArray::ConstPtr& sensormsg) { 81 | // std::cerr << "got something: [" << sensormsg->data[0] << ", " << sensormsg->data[1] << "]" << std::endl; 82 | // std::cerr << "data size: " << sensormsg->data.size() << ", " << stateDimension << std::endl; 83 | int len=std::min((int)sensormsg->data.size(), stateDimension); 84 | for(int k=0;kdata[k]; 86 | // std::cout << "sensors: " << sensorValues[k] << std::endl; 87 | } 88 | // error is the input 89 | sensorValues[0] -= tx; 90 | sensorValues[1] -= ty; 91 | gotsensor = true; 92 | // cout << "gotsensor? cb " << gotsensor << endl; 93 | } 94 | 95 | void Arm::settarget() { 96 | double rnd = ((double) rand()/RAND_MAX); 97 | double phi = rnd * 0.5 * M_PI; 98 | double r = 0.8 + rnd * 0.2; 99 | tx = r * cos(phi); 100 | ty = r * sin(phi); 101 | cout << "rnd = " << rnd << ", phi = " << phi << ", r = " << r << ", tx " << tx << ", ty = " << ty << endl; 102 | 103 | std_msgs::Float32MultiArray msg_target; 104 | msg_target.data.clear(); 105 | msg_target.data.push_back(tx); 106 | msg_target.data.push_back(ty); 107 | msg_target.data.push_back(0.); 108 | pub_target.publish(msg_target); 109 | } 110 | 111 | void Arm::reset() { 112 | std_msgs::Int32 msg; 113 | msg.data = 23; 114 | pub_reset.publish(msg); 115 | 116 | settarget(); 117 | // cout << "tx " << tx << ", ty = " << ty << endl; 118 | cnt = 0; 119 | // motor 120 | eoe = true ; 121 | } 122 | 123 | void Arm::step( double h, double *force ) { 124 | std_msgs::Float32MultiArray msg; 125 | // msg.layout.dim_length = 1; 126 | msg.data.clear(); 127 | for(int k=0;k(sensors[0], sensors[sensornumber-1]); 132 | // memcpy(msg.data,sensors, msg.data); 133 | // for (int i = 0; i < 10; i++) { 134 | // cout << "publishing " << i << endl; 135 | pub_motor.publish(msg); 136 | // cout << "published " << i << endl; 137 | // ros::spinOnce(); 138 | // cout << "spun " << i << endl; 139 | // loop_rate->sleep(); 140 | // } 141 | // spin once so to harvest incoming data 142 | // cout << "step" << endl; 143 | } 144 | 145 | double Arm::reward() { 146 | std_msgs::Float32MultiArray msg_reward; 147 | double reward = 1.0; 148 | double err = 0., xerr = 0, yerr = 0; 149 | // double tx = 1.; 150 | // double ty = 0.; 151 | 152 | xerr = fabs(sensorValues[0] - tx); 153 | yerr = fabs(sensorValues[1] - ty); 154 | err = sqrt(pow(xerr + yerr, 2)); 155 | // cout << "sensors: " << sensorValues[0] << ", " << sensorValues[1] << endl; 156 | // cout << "errx " << fabs(sensorValues[0] - tx) << ", " << "erry " << fabs(sensorValues[1] - ty) << endl; 157 | // if (err < 0.2) { 158 | // reward = 1.0; 159 | // } 160 | // else { 161 | // reward = -1.; 162 | // } 163 | // if (reward > 0) { 164 | // // cout << "reward = " << reward << endl; 165 | // cout << reward << flush; 166 | // } 167 | reward = -err; 168 | 169 | msg_reward.data.clear(); 170 | msg_reward.data.push_back(reward); 171 | pub_reward.publish(msg_reward); 172 | 173 | // cout << "reward" << endl; 174 | return reward; 175 | } 176 | 177 | void Arm::update(double deltaTime, double *force) { 178 | int n = 1; 179 | double h = deltaTime/n; 180 | gotsensor = false; 181 | for( int i = 0 ; i < n ; i++ ) { 182 | step(h, force); 183 | } 184 | // cout << "gotsensor? pre" << gotsensor << endl; 185 | while (!gotsensor && ros::ok()) { 186 | // wait 187 | // cout << "gotsensor? inner " << gotsensor << endl; 188 | ros::spinOnce(); 189 | usleep(100); 190 | // // loop_rate->sleep(); 191 | } 192 | // cout << "gotsensor? post" << gotsensor << endl; 193 | std_msgs::Float32MultiArray msg_target; 194 | msg_target.data.clear(); 195 | msg_target.data.push_back(tx); 196 | msg_target.data.push_back(ty); 197 | msg_target.data.push_back(0.); 198 | pub_target.publish(msg_target); 199 | cnt++; 200 | 201 | if (cnt % 500 == 0) 202 | settarget(); 203 | // cout << "update" << endl; 204 | } 205 | 206 | double Arm::act( Action * action ) { 207 | 208 | double force = 0.0 ; 209 | 210 | if ( action->continuous ) { 211 | // cout << "a0: " << action->continuousAction[0] << ", a1: " << action->continuousAction[1] << endl; 212 | // This version of cart pole scales continuous actions in [-1.0, 1.0] to the nearest 213 | // discrete action in the set { -10, -9, ..., 9, 10 } 214 | 215 | // force = action->continuousAction[0] ; 216 | 217 | // if ( force < -1 ) { 218 | // force = -1.0 ; 219 | // } else if ( force > 1 ) { 220 | // force = 1.0 ; 221 | // } 222 | 223 | // force += 1.0 ; // range: [0.0, 2.0] 224 | // force *= 0.5 ; // range: [0.0, 1.0] 225 | // force *= numberOfActions - 1 ; // range: [0.0, numberOfActions - 1] 226 | // force = floor( force + 0.5 ) ; // set: { 0, 1, ..., numberOfActions - 1 } 227 | 228 | } else if ( action->discrete ) { 229 | 230 | force = action->discreteAction ; 231 | } 232 | 233 | // //Scale force from { 0, 1, ..., numberOfActions - 1 } to { -10, 10 }: 234 | // force /= numberOfActions - 1 ; // set: {0.0, 1/(numberOfActions-1), ..., 1.0 } 235 | // force *= MAXACTION - MINACTION ; // set: {0.0, 20.0/(numberOfActions-1), ..., 20.0 } 236 | // force += MINACTION ; // set: {-10.0, -10.0 + 20.0/(numberOfActions-1), ..., 10.0 } 237 | 238 | // return act( force ) ; 239 | update(t, action->continuousAction); 240 | // cout << "act" << endl; 241 | double r = reward(); 242 | 243 | // if (r <= -1.5) { 244 | // reset(); 245 | // } 246 | // else { 247 | // eoe = false; 248 | // } 249 | return r; 250 | } 251 | 252 | // double Arm::act( double force ) { 253 | // // send out command 254 | // update( t, force ); 255 | // double r = reward(); 256 | 257 | // if (r < 0) { 258 | // reset(); 259 | // } else { 260 | // eoe = false ; 261 | // } 262 | // return r; 263 | // } 264 | 265 | bool Arm::endOfEpisode() { 266 | return eoe ; 267 | } 268 | 269 | void Arm::getState( State * state ) { 270 | int i; 271 | for (i = 0; i < stateDimension; i++) { 272 | state->continuousState[i] = sensorValues[i]; 273 | // state->continuousState[1] = 0.; 274 | // state->continuousState[2] = 0.; 275 | // state->continuousState[3] = 0.; 276 | } 277 | // cout << "getState" << endl; 278 | 279 | } 280 | 281 | void Arm::setState( State * state ) { 282 | 283 | // phi = state->continuousState[0]; 284 | // dphi = state->continuousState[1]; 285 | // x = state->continuousState[2]; 286 | // dx = state->continuousState[3]; 287 | 288 | } 289 | 290 | bool Arm::getDiscreteStates() { 291 | return false ; 292 | } 293 | 294 | bool Arm::getContinuousStates() { 295 | return true ; 296 | } 297 | 298 | int Arm::getStateDimension() { 299 | return stateDimension ; 300 | } 301 | 302 | bool Arm::getDiscreteActions() { 303 | return true ; 304 | } 305 | 306 | bool Arm::getContinuousActions() { 307 | return true ; 308 | } 309 | 310 | int Arm::getActionDimension() { 311 | return actionDimension ; 312 | } 313 | 314 | int Arm::getNumberOfActions() { 315 | return numberOfActions; 316 | } 317 | 318 | const char * Arm::getName() { 319 | return "Arm" ; 320 | } 321 | 322 | #endif //ARM 323 | -------------------------------------------------------------------------------- /Arm.h: -------------------------------------------------------------------------------- 1 | #ifndef ARM_H 2 | #define ARM_H 3 | # include "State.h" 4 | # include "StateActionUtils.h" 5 | # include "World.h" 6 | #include 7 | #include 8 | #include 9 | 10 | typedef float motor; 11 | typedef float sensor; 12 | 13 | class Arm : public World { 14 | public: 15 | Arm(); 16 | ~Arm(); 17 | 18 | double act( Action * ); 19 | void getState( State * ); 20 | void setState( State * ); 21 | bool endOfEpisode(); 22 | void reset(); 23 | 24 | bool getDiscreteStates() ; 25 | bool getContinuousStates() ; 26 | int getStateDimension() ; 27 | bool getDiscreteActions() ; 28 | bool getContinuousActions() ; 29 | int getActionDimension() ; 30 | int getNumberOfActions() ; 31 | 32 | const char * getName() ; 33 | // ros 34 | void settarget(); 35 | void sensorCallback(const std_msgs::Float32MultiArray::ConstPtr& motormsg); 36 | 37 | private: 38 | double act( double ) ; 39 | void accel(); 40 | void step( double, double* ); 41 | void update(double , double* ); 42 | double reward(); 43 | // double phi, dphi, x, dx; 44 | // double maxPhi, maxDPhi, maxX, maxDX; 45 | double MINACTION, MAXACTION; 46 | double t; 47 | // double Mc, mp, l, t, g; 48 | // double totalM, ml, NORMANGLE; 49 | double theta0, theta1, theta2; 50 | double l0, l1, l2; 51 | bool eoe ; 52 | // ros 53 | // int number_sensors; 54 | // int number_motors; 55 | int cnt; 56 | bool gotsensor; 57 | ros::Publisher pub_reset; 58 | ros::Publisher pub_motor; 59 | ros::Subscriber sub_sensor; 60 | ros::Publisher pub_target; 61 | ros::Publisher pub_reward; 62 | // ros::Rate *loop_rate; 63 | motor* motorValues; 64 | sensor* sensorValues; 65 | double tx, ty; 66 | //double ty; 67 | 68 | }; 69 | 70 | #endif // ARM_H 71 | -------------------------------------------------------------------------------- /ArmCfgCacla: -------------------------------------------------------------------------------- 1 | nExperiments 10 2 | 3 | 4 | steps 5 | nTrainSteps 30000 6 | trainStorePer 3000 7 | nTestSteps 15000 8 | testStorePer 15000 9 | nTrainEpisodes 0 10 | nTestEpisodes 0 11 | nMaxStepsPerTrainEpisode 0 12 | nMaxStepsPerTestEpisode 0 13 | 14 | algorithm 15 | algorithms Cacla 16 | tau 0.001 0.01 0.1 1.0 10.0 17 | epsilon 0.001 0.01 0.1 1.0 18 | sigma 0.01 0.1 1.0 10.0 19 | 20 | learningRates 21 | decreaseType none 22 | nLearningRates 3 23 | learningRate 0.0001 0.001 0.01 0.1 24 | learningRate 0.0 0.001 0.01 0.1 25 | learningRate 0.0 0.001 0.01 0.1 26 | 27 | discount 28 | gamma 0.99 29 | 30 | nn 31 | nHiddenQ 20 32 | nHiddenV 20 33 | -------------------------------------------------------------------------------- /ArmCfgCaclaSimple: -------------------------------------------------------------------------------- 1 | nExperiments 10 2 | 3 | 4 | steps 5 | nTrainSteps 30000 6 | trainStorePer 3000 7 | nTestSteps 15000 8 | testStorePer 15000 9 | nTrainEpisodes 0 10 | nTestEpisodes 0 11 | nMaxStepsPerTrainEpisode 0 12 | nMaxStepsPerTestEpisode 0 13 | 14 | algorithm 15 | algorithms Cacla 16 | tau 17 | epsilon 18 | sigma 0.05 19 | 20 | learningRates 21 | decreaseType none 22 | nLearningRates 3 23 | learningRate 0.1 24 | learningRate 0.1 25 | learningRate 0.1 26 | 27 | discount 28 | gamma 0. 29 | 30 | nn 31 | nHiddenQ 20 32 | nHiddenV 20 33 | -------------------------------------------------------------------------------- /ArmInterface.cpp: -------------------------------------------------------------------------------- 1 | # ifndef ARMINTERFACE 2 | # define ARMINTERFACE 3 | 4 | # include "ArmInterface.h" 5 | # include "Arm.h" 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | ArmInterface::ArmInterface( const char * pFile ) { 15 | 16 | parameterFile = pFile ; 17 | 18 | parameterPath.assign( parameterFile ) ; 19 | 20 | readParameterFile() ; 21 | 22 | } 23 | 24 | 25 | // void my_handler1(sig_t s){ 26 | // printf("Caught signal %d\n",s); 27 | // ros::shutdown(); 28 | // exit(1); 29 | // } 30 | 31 | void my_handler(int s){ 32 | printf("Caught signal %d\n",s); 33 | ros::shutdown(); 34 | exit(1); 35 | } 36 | 37 | int main( int argn, char *argv[] ) { 38 | 39 | if ( argn != 2 ) { 40 | 41 | cout << "You need to specify exactly one configuration (parameter) file." << endl ; 42 | return 0 ; 43 | 44 | } 45 | 46 | // struct sigaction sigIntHandler; 47 | 48 | // sigIntHandler.sa_handler = my_handler; 49 | // sigemptyset(&sigIntHandler.sa_mask); 50 | // sigIntHandler.sa_flags = 0; 51 | 52 | // sigaction(SIGINT, &sigIntHandler, NULL); 53 | signal (SIGINT,my_handler); 54 | 55 | ArmInterface * interface = new ArmInterface( argv[1] ) ; 56 | 57 | Arm * arm = new Arm() ; 58 | 59 | // while(ros::ok()) { 60 | interface->getParametersAndRunExperiments( arm ) ; 61 | // } 62 | 63 | delete arm ; 64 | delete interface ; 65 | cout << "ending" << endl; 66 | //pause(); 67 | } 68 | 69 | # endif //ARMINTERFACE 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /ArmInterface.h: -------------------------------------------------------------------------------- 1 | # ifndef ARMINTERFACE_H 2 | # define ARMINTERFACE_H 3 | 4 | # include "Experiment.h" 5 | # include "Arm.h" 6 | 7 | class ArmInterface : public Experiment { 8 | public: 9 | ArmInterface( const char * parameterFile ) ; 10 | ~ArmInterface() {}; 11 | 12 | }; 13 | 14 | # endif //ARMINTERFACE_H 15 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.3) 2 | 3 | project(rlcpp) 4 | 5 | # find_package(catkin REQUIRED) 6 | 7 | link_directories(/opt/ros/kinetic/lib) 8 | 9 | add_library(rlcpp_algorithms SHARED Sarsa.cpp Qlearning.cpp QVlearning.cpp Acla.cpp Cacla.cpp) 10 | 11 | add_library(rlcpp_util SHARED StateActionUtils.cpp cNeuralNetwork.cpp Algorithm.cpp StateActionAlgorithm.cpp Experiment.cpp) 12 | 13 | # this doesn't work as ROS code needs to be included conditionally 14 | # add_executable(CartPole CartPole.cpp CartPoleInterface.cpp) 15 | # target_link_libraries(CartPole rlcpp_util rlcpp_algorithms) 16 | 17 | add_executable(CartPoleRos CartPole.cpp CartPoleInterface.cpp) 18 | target_link_libraries(CartPoleRos rlcpp_util rlcpp_algorithms roscpp rosconsole roscpp_serialization roslib rostime) 19 | 20 | add_executable(ArmRos Arm.cpp ArmInterface.cpp) 21 | target_link_libraries(ArmRos rlcpp_util rlcpp_algorithms roscpp rosconsole roscpp_serialization roslib rostime) 22 | 23 | -------------------------------------------------------------------------------- /Cacla.cpp: -------------------------------------------------------------------------------- 1 | #ifndef CACLA 2 | #define CACLA 3 | 4 | # include "Cacla.h" 5 | 6 | 7 | Cacla::Cacla( const char * parameterFile, World * w ) { 8 | 9 | discreteStates = w->getDiscreteStates() ; 10 | 11 | actionDimension = w->getActionDimension() ; 12 | 13 | srand48( clock() ) ; 14 | 15 | readParameterFile( parameterFile ) ; 16 | 17 | if ( discreteStates ) { 18 | 19 | numberOfStates = w->getNumberOfStates() ; 20 | 21 | A = new double*[ numberOfStates ] ; 22 | V = new double[ numberOfStates ] ; 23 | 24 | for ( int s = 0 ; s < numberOfStates ; s++ ) { 25 | 26 | A[ s ] = new double[ actionDimension ] ; 27 | 28 | for ( int a = 0 ; a < actionDimension ; a++ ) { 29 | A[ s ][ a ] = 0.0 ; 30 | } 31 | 32 | V[ s ] = 0.0 ; 33 | 34 | } 35 | 36 | } else { 37 | 38 | stateDimension = w->getStateDimension() ; 39 | 40 | int layerSizesA[] = { stateDimension, nHiddenQ, actionDimension } ; 41 | int layerSizesV[] = { stateDimension, nHiddenV, 1 } ; 42 | 43 | ANN = new cNeuralNetwork( 1, layerSizesA ) ; 44 | VNN = new cNeuralNetwork( 1, layerSizesV ) ; 45 | 46 | VTarget = new double[ 1 ] ; 47 | 48 | } 49 | 50 | storedGauss = false ; 51 | 52 | } 53 | 54 | Cacla::~Cacla() { 55 | 56 | if ( discreteStates ) { 57 | 58 | for ( int s = 0 ; s < numberOfStates ; s++ ) { 59 | delete[] A[s] ; 60 | } 61 | 62 | delete [] A ; 63 | delete [] V ; 64 | 65 | } else { 66 | 67 | delete ANN ; 68 | delete VNN ; 69 | 70 | delete [] VTarget ; 71 | 72 | } 73 | 74 | } 75 | 76 | 77 | void Cacla::readParameterFile( const char * parameterFile ) { 78 | 79 | if ( not discreteStates ) { 80 | 81 | ifstream ifile ; 82 | 83 | ifile.open( parameterFile, ifstream::in ) ; 84 | 85 | read_moveTo( &ifile, "nn" ) ; 86 | 87 | read_moveTo( &ifile, "nHiddenQ" ) ; 88 | ifile >> nHiddenQ ; 89 | read_moveTo( &ifile, "nHiddenV" ) ; 90 | ifile >> nHiddenV ; 91 | 92 | ifile.close() ; 93 | 94 | } 95 | 96 | } 97 | 98 | void Cacla::getMaxAction( State * state, Action * action ) { 99 | 100 | double * As ; 101 | 102 | if ( state->discrete ) { 103 | 104 | As = A[ state->discreteState ] ; 105 | 106 | } else { 107 | 108 | As = ANN->forwardPropagate( state->continuousState ); 109 | 110 | } 111 | 112 | for ( int a = 0 ; a < actionDimension ; a++ ) { 113 | 114 | action->continuousAction[a] = As[a] ; 115 | 116 | } 117 | 118 | } 119 | 120 | void Cacla::getRandomAction( State * state, Action * action ) { 121 | 122 | for ( int a = 0 ; a < actionDimension ; a++ ) { 123 | 124 | action->continuousAction[a] = 2.0*drand48() - 1.0 ; 125 | 126 | } 127 | 128 | } 129 | 130 | 131 | void Cacla::explore( State * state, Action * action, double explorationRate, string explorationType, bool endOfEpisode ) { 132 | 133 | if ( explorationType.compare("boltzmann") == 0 ) { 134 | 135 | cout << "Boltzmann exploration is as of yet undefined for Cacla." << endl ; 136 | exit(0) ; 137 | 138 | } else if ( explorationType.compare("egreedy") == 0 ) { 139 | 140 | egreedy( state, action, explorationRate ) ; 141 | 142 | } else if ( explorationType.compare("gaussian") == 0 ) { 143 | 144 | gaussian( state, action, explorationRate ) ; 145 | 146 | } else { 147 | 148 | cout << "Warning, no exploreType: " << explorationType << endl ; 149 | exit(0) ; 150 | 151 | } 152 | 153 | } 154 | 155 | double Cacla::gaussianRandom() { 156 | // Generates gaussian (or normal) random numbers, with mean = 0 and 157 | // std dev = 1. Used for gaussian exploration. 158 | 159 | if ( storedGauss ) { 160 | 161 | storedGauss = false ; 162 | 163 | return g2 ; 164 | 165 | } else { 166 | 167 | double x, y ; 168 | 169 | double z = 1.0 ; 170 | 171 | while ( z >= 1.0 ) { 172 | x = 2.0*drand48() - 1.0; 173 | y = 2.0*drand48() - 1.0; 174 | z = x * x + y * y; 175 | } 176 | 177 | z = sqrt( -2.0 * log( z ) / z ); 178 | 179 | g1 = x * z; 180 | g2 = y * z; 181 | 182 | storedGauss = true ; 183 | 184 | return g1 ; 185 | 186 | } 187 | 188 | } 189 | 190 | void Cacla::gaussian( State * state, Action * action, double sigma ) { 191 | 192 | getMaxAction( state, action ) ; 193 | 194 | for ( int a = 0 ; a < actionDimension ; a++ ) { 195 | 196 | action->continuousAction[a] += sigma*gaussianRandom() ; 197 | 198 | } 199 | 200 | } 201 | 202 | void Cacla::update( State * state, Action * action, double rt, State * nextState, bool endOfEpisode, double * learningRate, double gamma ) { 203 | 204 | double * at = action->continuousAction ; 205 | 206 | // cout << "Cacla.cpp:learningRate[0] = " << learningRate[0] << ", [1] = " << learningRate[1] << endl; 207 | if ( state->discrete ) { 208 | 209 | int st = state->discreteState ; 210 | int st_ = nextState->discreteState ; 211 | 212 | double Vt = V[ st ] ; 213 | 214 | if ( endOfEpisode ) { 215 | 216 | V[ st ] += learningRate[1]*( rt - V[ st ] ) ; 217 | 218 | } else { 219 | 220 | V[ st ] += learningRate[1]*( rt + gamma*V[ st_ ] - V[ st ] ) ; 221 | 222 | } 223 | 224 | if ( V[ st ] > Vt ) { 225 | 226 | for ( int a = 0 ; a < actionDimension ; a++ ) { 227 | 228 | A[ st ][ a ] += learningRate[0]*( at[ a ] - A[ st ][ a ] ) ; 229 | 230 | } 231 | 232 | } 233 | 234 | } else { 235 | 236 | double * st = state->continuousState ; 237 | double * st_ = nextState->continuousState ; 238 | 239 | if ( endOfEpisode ) { 240 | 241 | VTarget[ 0 ] = rt ; 242 | 243 | } else { 244 | 245 | double Vs_ = VNN->forwardPropagate( st_ )[0] ; 246 | 247 | VTarget[ 0 ] = rt + gamma*Vs_ ; 248 | 249 | } 250 | 251 | double Vt = VNN->forwardPropagate( st )[0] ; 252 | 253 | VNN->backPropagate( st, VTarget, learningRate[1] ) ; 254 | 255 | if ( VTarget[0] > Vt ) { 256 | // cout << "st: " << *st << ", at: " << *at << endl; 257 | ANN->backPropagate( st, at, learningRate[0] ) ; 258 | 259 | } 260 | 261 | } 262 | 263 | } 264 | 265 | 266 | unsigned int Cacla::getNumberOfLearningRates() { 267 | 268 | return 2 ; 269 | 270 | } 271 | 272 | bool Cacla::getContinuousStates() { 273 | 274 | return true ; 275 | 276 | } 277 | 278 | bool Cacla::getDiscreteStates() { 279 | 280 | return true ; 281 | 282 | } 283 | 284 | bool Cacla::getContinuousActions() { 285 | 286 | return true ; 287 | 288 | } 289 | 290 | bool Cacla::getDiscreteActions() { 291 | 292 | return false ; 293 | 294 | } 295 | 296 | const char * Cacla::getName() { 297 | 298 | return "Cacla" ; 299 | 300 | } 301 | 302 | #endif //CACLA 303 | -------------------------------------------------------------------------------- /Cacla.h: -------------------------------------------------------------------------------- 1 | #ifndef CACLA_H 2 | #define CACLA_H 3 | 4 | # include 5 | # include 6 | # include 7 | # include 8 | # include "cNeuralNetwork.h" 9 | # include "Algorithm.h" 10 | # include "StateActionUtils.h" 11 | 12 | class Cacla : public Algorithm { 13 | public: 14 | Cacla( const char * parameterFile, World * w ) ; 15 | ~Cacla() ; 16 | void readParameterFile( const char * parameterFile ) ; 17 | void getMaxAction( State * state, Action * action ) ; 18 | void getRandomAction( State * state, Action * action ) ; 19 | void explore( State * state, Action * action, double explorationRate, string explorationType, bool endOfEpisode ) ; 20 | void update( State * st, Action * action, double rt, State * st_, bool endOfEpisode, double * learningRate, double gamma ) ; 21 | unsigned int getNumberOfLearningRates() ; 22 | bool getContinuousStates() ; 23 | bool getDiscreteStates() ; 24 | bool getContinuousActions() ; 25 | bool getDiscreteActions() ; 26 | const char * getName() ; 27 | 28 | private: 29 | void gaussian( State * state, Action * action, double sigma ) ; 30 | 31 | double gaussianRandom() ; 32 | double g1, g2 ; 33 | bool storedGauss ; 34 | 35 | int nHiddenQ, nHiddenV ; 36 | double epsilon, sigma ; 37 | double * V ; 38 | double ** A ; 39 | cNeuralNetwork * ANN ; 40 | cNeuralNetwork * VNN ; 41 | double * VTarget ; 42 | }; 43 | 44 | #endif //CACLA_H 45 | -------------------------------------------------------------------------------- /CartPole.cpp: -------------------------------------------------------------------------------- 1 | #ifndef CARTPOLE 2 | #define CARTPOLE 3 | # include 4 | # include 5 | # include 6 | # include 7 | # include "CartPole.h" 8 | #include 9 | #include "std_msgs/MultiArrayLayout.h" 10 | #include "std_msgs/MultiArrayDimension.h" 11 | #include 12 | #include 13 | #include 14 | 15 | using namespace std; 16 | 17 | CartPole::CartPole() { 18 | srand((unsigned)time(0)); 19 | MINACTION = -10.0; 20 | MAXACTION = 10.0; 21 | 22 | continuousActions = true ; 23 | discreteActions = true ; 24 | continuousStates = true ; 25 | discreteStates = false ; 26 | 27 | stateDimension = 4; 28 | actionDimension = 1; 29 | numberOfActions = 21 ; 30 | 31 | Mc = 1.0; 32 | mp = 0.1; 33 | l = 0.5; 34 | t = 0.02; 35 | g = 9.81; 36 | 37 | totalM = Mc + mp; 38 | ml = mp*l; 39 | 40 | maxPhi = M_PI / 15.0; 41 | maxX = 2.4; 42 | maxDPhi = M_PI / 15.0; 43 | maxDX = 2.4; 44 | 45 | char * p=0; 46 | int argc=0; 47 | ros::init(argc, &p, getName()); 48 | 49 | ros::NodeHandle n; 50 | // pub_reset = n.advertise("/arm/reset", 1); 51 | //pub_target = n.advertise("/arm/target", 1); 52 | pub_motor = n.advertise("/robot/0/motors", 1); 53 | pub_sensor1 = n.advertise("/robot/0/pos", 1); 54 | pub_sensor2 = n.advertise("/robot/1/pos", 1); 55 | usleep(1000000); 56 | loop_rate = new ros::Rate(1000); 57 | 58 | // msg.layout.dim = (std_msgs::MultiArrayDimension *) 59 | // malloc(sizeof(std_msgs::MultiArrayDimension) * 1); 60 | // msg.layout.dim[0].label = "msg"; 61 | // msg.layout.dim[0].size = 4; 62 | // msg.layout.dim[0].stride = 1*4; 63 | // msg.layout.dim_length = 1; 64 | // msg.layout.data_offset = 0; 65 | 66 | // msg.data = (float *)malloc(sizeof(float)*3); 67 | // msg.data_length = 3; 68 | 69 | reset() ; 70 | } 71 | 72 | CartPole::~CartPole() { 73 | } 74 | 75 | void CartPole::reset() { 76 | double rnd = ((double) rand()/RAND_MAX); 77 | 78 | phi = 0.1*rnd - 0.05; 79 | dphi = 0.0; 80 | x = 0.0; 81 | dx = 0.0; 82 | 83 | eoe = true ; 84 | } 85 | 86 | void CartPole::step( double h, double force ) { 87 | std_msgs::Float32MultiArray msg; 88 | double cos_phi = cos( phi ); 89 | double sin_phi = sin( phi ); 90 | double dphi_sq = dphi*dphi; 91 | double cos_phi_sq = cos_phi*cos_phi; 92 | 93 | double ddphi = force*cos_phi - totalM*g*sin_phi + ml*(cos_phi*sin_phi)*dphi*dphi ; 94 | ddphi /= ml*cos_phi*cos_phi - totalM*l ; 95 | 96 | double ddx = force + ml*sin_phi*dphi_sq - mp*g*cos_phi*sin_phi ; 97 | ddx /= totalM - mp*cos_phi_sq ; 98 | 99 | phi += h*dphi; 100 | dphi += h*ddphi; 101 | x += h*dx; 102 | dx += h*ddx; 103 | 104 | cos_phi = cos( phi ); 105 | sin_phi = sin( phi ); 106 | 107 | // do ros pubishing 108 | // motors / action 109 | msg.data.clear(); 110 | for(int k=0;k maxPhi) { 140 | reward = -1.0; 141 | } else if (x > maxX) { 142 | reward = -1.0; 143 | } else if (x < -maxX) { 144 | reward = -1.0; 145 | } 146 | return reward; 147 | } 148 | 149 | void CartPole::update(double deltaTime, double force) { 150 | int n = 10; 151 | double h = deltaTime/n; 152 | for( int i = 0 ; i < n ; i++ ) { 153 | step(h, force); 154 | } 155 | loop_rate->sleep(); 156 | } 157 | 158 | double CartPole::act( Action * action ) { 159 | 160 | double force = 0.0 ; 161 | 162 | if ( action->continuous ) { 163 | // This version of cart pole scales continuous actions in [-1.0, 1.0] to the nearest 164 | // discrete action in the set { -10, -9, ..., 9, 10 } 165 | 166 | force = action->continuousAction[0] ; 167 | 168 | if ( force < -1 ) { 169 | force = -1.0 ; 170 | } else if ( force > 1 ) { 171 | force = 1.0 ; 172 | } 173 | 174 | force += 1.0 ; // range: [0.0, 2.0] 175 | force *= 0.5 ; // range: [0.0, 1.0] 176 | force *= numberOfActions - 1 ; // range: [0.0, numberOfActions - 1] 177 | force = floor( force + 0.5 ) ; // set: { 0, 1, ..., numberOfActions - 1 } 178 | 179 | } else if ( action->discrete ) { 180 | 181 | force = action->discreteAction ; 182 | } 183 | 184 | //Scale force from { 0, 1, ..., numberOfActions - 1 } to { -10, 10 }: 185 | force /= numberOfActions - 1 ; // set: {0.0, 1/(numberOfActions-1), ..., 1.0 } 186 | force *= MAXACTION - MINACTION ; // set: {0.0, 20.0/(numberOfActions-1), ..., 20.0 } 187 | force += MINACTION ; // set: {-10.0, -10.0 + 20.0/(numberOfActions-1), ..., 10.0 } 188 | 189 | return act( force ) ; 190 | 191 | } 192 | 193 | double CartPole::act( double force ) { 194 | 195 | update( t, force ); 196 | double r = reward(); 197 | 198 | if (r < 0) { 199 | reset(); 200 | } else { 201 | eoe = false ; 202 | } 203 | return r; 204 | } 205 | 206 | bool CartPole::endOfEpisode() { 207 | return eoe ; 208 | } 209 | 210 | void CartPole::getState( State * state ) { 211 | 212 | state->continuousState[0] = phi /maxPhi; 213 | state->continuousState[1] = dphi/maxDPhi; 214 | state->continuousState[2] = x /maxX; 215 | state->continuousState[3] = dx /maxDX; 216 | 217 | } 218 | 219 | void CartPole::setState( State * state ) { 220 | 221 | phi = state->continuousState[0]*maxPhi; 222 | dphi = state->continuousState[1]*maxDPhi; 223 | x = state->continuousState[2]*maxX; 224 | dx = state->continuousState[3]*maxDX; 225 | 226 | } 227 | 228 | bool CartPole::getDiscreteStates() { 229 | return false ; 230 | } 231 | 232 | bool CartPole::getContinuousStates() { 233 | return true ; 234 | } 235 | 236 | int CartPole::getStateDimension() { 237 | return stateDimension ; 238 | } 239 | 240 | bool CartPole::getDiscreteActions() { 241 | return true ; 242 | } 243 | 244 | bool CartPole::getContinuousActions() { 245 | return true ; 246 | } 247 | 248 | int CartPole::getActionDimension() { 249 | return actionDimension ; 250 | } 251 | 252 | int CartPole::getNumberOfActions() { 253 | return numberOfActions; 254 | } 255 | 256 | const char * CartPole::getName() { 257 | return "CartPole" ; 258 | } 259 | 260 | #endif //CARTPOLE 261 | -------------------------------------------------------------------------------- /CartPole.h: -------------------------------------------------------------------------------- 1 | #ifndef CARTPOLE_H 2 | #define CARTPOLE_H 3 | # include "State.h" 4 | # include "StateActionUtils.h" 5 | # include "World.h" 6 | #include 7 | #include 8 | 9 | class CartPole : public World { 10 | public: 11 | CartPole(); 12 | ~CartPole(); 13 | 14 | double act( Action * ); 15 | void getState( State * ); 16 | void setState( State * ); 17 | bool endOfEpisode(); 18 | void reset(); 19 | 20 | bool getDiscreteStates() ; 21 | bool getContinuousStates() ; 22 | int getStateDimension() ; 23 | bool getDiscreteActions() ; 24 | bool getContinuousActions() ; 25 | int getActionDimension() ; 26 | int getNumberOfActions() ; 27 | 28 | const char * getName() ; 29 | 30 | private: 31 | double act( double ) ; 32 | void accel(); 33 | void step( double, double ); 34 | void update(double , double ); 35 | double reward(); 36 | double phi, dphi, x, dx; 37 | double maxPhi, maxDPhi, maxX, maxDX; 38 | double MINACTION, MAXACTION; 39 | double Mc, mp, l, t, g; 40 | double totalM, ml, NORMANGLE; 41 | bool eoe ; 42 | ros::Rate *loop_rate; 43 | ros::Publisher pub_motor; 44 | ros::Publisher pub_sensor1; 45 | ros::Publisher pub_sensor2; 46 | }; 47 | 48 | #endif // CARTPOLE_H 49 | -------------------------------------------------------------------------------- /CartPoleCfg: -------------------------------------------------------------------------------- 1 | nExperiments 10 2 | 3 | 4 | steps 5 | nTrainSteps 30000 6 | trainStorePer 3000 7 | nTestSteps 15000 8 | testStorePer 15000 9 | nTrainEpisodes 0 10 | nTestEpisodes 0 11 | nMaxStepsPerTrainEpisode 0 12 | nMaxStepsPerTestEpisode 0 13 | 14 | algorithm 15 | algorithms Q Sarsa QV Acla Cacla 16 | tau 0.001 0.01 0.1 1.0 10.0 17 | epsilon 0.001 0.01 0.1 1.0 18 | sigma 0.01 0.1 1.0 10.0 19 | 20 | learningRates 21 | decreaseType none 22 | nLearningRates 3 23 | learningRate 0.0001 0.001 0.01 0.1 24 | learningRate 0.0 0.001 0.01 0.1 25 | learningRate 0.0 0.001 0.01 0.1 26 | 27 | discount 28 | gamma 0.99 29 | 30 | nn 31 | nHiddenQ 20 32 | nHiddenV 20 33 | -------------------------------------------------------------------------------- /CartPoleCfgCacla: -------------------------------------------------------------------------------- 1 | nExperiments 10 2 | 3 | 4 | steps 5 | nTrainSteps 30000 6 | trainStorePer 3000 7 | nTestSteps 15000 8 | testStorePer 15000 9 | nTrainEpisodes 0 10 | nTestEpisodes 0 11 | nMaxStepsPerTrainEpisode 0 12 | nMaxStepsPerTestEpisode 0 13 | 14 | algorithm 15 | algorithms Cacla 16 | tau 0.001 0.01 0.1 1.0 10.0 17 | epsilon 0.001 0.01 0.1 1.0 18 | sigma 0.01 0.1 1.0 10.0 19 | 20 | learningRates 21 | decreaseType none 22 | nLearningRates 3 23 | learningRate 0.0001 0.001 0.01 0.1 24 | learningRate 0.0 0.001 0.01 0.1 25 | learningRate 0.0 0.001 0.01 0.1 26 | 27 | discount 28 | gamma 0.99 29 | 30 | nn 31 | nHiddenQ 20 32 | nHiddenV 20 33 | -------------------------------------------------------------------------------- /CartPoleCfgCaclaSimple: -------------------------------------------------------------------------------- 1 | nExperiments 10 2 | 3 | 4 | steps 5 | nTrainSteps 30000 6 | trainStorePer 3000 7 | nTestSteps 15000 8 | testStorePer 15000 9 | nTrainEpisodes 0 10 | nTestEpisodes 0 11 | nMaxStepsPerTrainEpisode 0 12 | nMaxStepsPerTestEpisode 0 13 | 14 | algorithm 15 | algorithms Cacla 16 | tau 17 | epsilon 18 | sigma 0.01 0.1 1.0 10.0 19 | 20 | learningRates 21 | decreaseType none 22 | nLearningRates 3 23 | learningRate 0.0001 0.001 0.01 0.1 24 | learningRate 0.0 0.001 0.01 0.1 25 | learningRate 0.0 0.001 0.01 0.1 26 | 27 | discount 28 | gamma 0.99 29 | 30 | nn 31 | nHiddenQ 20 32 | nHiddenV 20 33 | -------------------------------------------------------------------------------- /CartPoleInterface.cpp: -------------------------------------------------------------------------------- 1 | # ifndef CARTPOLEINTERFACE 2 | # define CARTPOLEINTERFACE 3 | 4 | # include "CartPoleInterface.h" 5 | # include "CartPole.h" 6 | 7 | CartPoleInterface::CartPoleInterface( const char * pFile ) { 8 | 9 | parameterFile = pFile ; 10 | 11 | parameterPath.assign( parameterFile ) ; 12 | 13 | readParameterFile() ; 14 | 15 | } 16 | 17 | int main( int argn, char *argv[] ) { 18 | 19 | if ( argn != 2 ) { 20 | 21 | cout << "You need to specify exactly one configuration (parameter) file." << endl ; 22 | return 0 ; 23 | 24 | } 25 | 26 | CartPoleInterface * interface = new CartPoleInterface( argv[1] ) ; 27 | 28 | CartPole * cartpole = new CartPole() ; 29 | 30 | interface->getParametersAndRunExperiments( cartpole ) ; 31 | 32 | delete cartpole ; 33 | delete interface ; 34 | } 35 | 36 | # endif //CARTPOLEINTERFACE 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /CartPoleInterface.h: -------------------------------------------------------------------------------- 1 | # ifndef CARTPOLEINTERFACE_H 2 | # define CARTPOLEINTERFACE_H 3 | 4 | # include "Experiment.h" 5 | # include "CartPole.h" 6 | 7 | class CartPoleInterface : public Experiment { 8 | public: 9 | CartPoleInterface( const char * parameterFile ) ; 10 | ~CartPoleInterface() {}; 11 | 12 | }; 13 | 14 | # endif //CARTPOLEINTERFACE_H 15 | -------------------------------------------------------------------------------- /Experiment.cpp: -------------------------------------------------------------------------------- 1 | #ifndef EXPERIMENT 2 | #define EXPERIMENT 3 | # include "Experiment.h" 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | bool Experiment::initializeState( State * state, Algorithm * algorithm, World * world ) { 10 | 11 | if ( algorithm->getContinuousStates() and world->getContinuousStates() ) { 12 | 13 | state->stateDimension = world->getStateDimension() ; 14 | state->continuousState = new double[ state->stateDimension ] ; 15 | state->continuous = true ; 16 | state->discrete = false ; 17 | 18 | } else if ( algorithm->getDiscreteStates() and world->getDiscreteStates() ) { 19 | 20 | state->numberOfStates = world->getNumberOfStates() ; 21 | state->continuous = false ; 22 | state->discrete = true ; 23 | 24 | } else { 25 | 26 | cout << "World " << world->getName() << " and algorithm " << algorithm->getName() << " are incompatible, because of different state types (discrete vs continuous). Skipping..." << endl ; 27 | 28 | return true ; 29 | 30 | } 31 | 32 | return false ; 33 | 34 | } 35 | 36 | bool Experiment::initializeAction( Action * action, Algorithm * algorithm, World * world ) { 37 | 38 | if ( algorithm->getContinuousActions() and world->getContinuousActions() ) { 39 | 40 | action->actionDimension = world->getActionDimension() ; 41 | action->continuousAction = new double[ action->actionDimension ] ; 42 | action->continuous = true ; 43 | action->discrete = false ; 44 | 45 | } else if ( algorithm->getDiscreteActions() and world->getDiscreteActions() ) { 46 | 47 | action->numberOfActions = world->getNumberOfActions() ; 48 | action->continuous = false ; 49 | action->discrete = true ; 50 | 51 | } else { 52 | 53 | cout << "World " << world->getName() << " and algorithm " << algorithm->getName() << " are incompatible, because of different action types (discrete vs continuous). Skipping..." << endl ; 54 | 55 | return true ; 56 | 57 | } 58 | 59 | return false ; 60 | 61 | } 62 | 63 | Experiment::~Experiment() { 64 | 65 | } 66 | 67 | 68 | void Experiment::setAlgorithm( string algorithmName, World * world ) { 69 | 70 | if ( algorithmName.compare("Q") == 0 ) { 71 | 72 | algorithm = new Qlearning( parameterFile, world ) ; 73 | 74 | } else if ( algorithmName.compare("Sarsa") == 0 ) { 75 | 76 | algorithm = new Sarsa( parameterFile, world ) ; 77 | 78 | } else if ( algorithmName.compare("QV") == 0 ) { 79 | 80 | algorithm = new QVlearning( parameterFile, world ) ; 81 | 82 | } else if ( algorithmName.compare("Acla") == 0 ) { 83 | 84 | algorithm = new Acla( parameterFile, world ) ; 85 | 86 | } else if ( algorithmName.compare("Cacla") == 0 ) { 87 | 88 | algorithm = new Cacla( parameterFile, world ) ; 89 | 90 | } else { 91 | 92 | cout << "Unknown algorithm: " << algorithmName << endl ; 93 | exit(-1) ; 94 | 95 | } 96 | 97 | } 98 | 99 | void Experiment::getParametersAndRunExperiments( World * world ) { 100 | int nTau = taus.size() ; 101 | int nEpsilon = epsilons.size() ; 102 | int nSigma = sigmas.size() ; 103 | 104 | int nAlgorithms = algorithms.size() ; 105 | 106 | int nExploration = nTau + nEpsilon + nSigma ; 107 | 108 | int nTotalLearningRates = 1 ; 109 | 110 | for ( unsigned int l = 0 ; l < learningRates.size() ; l++ ){ 111 | 112 | nTotalLearningRates *= learningRates[l].size() ; 113 | 114 | } 115 | 116 | int nTotalExperiments = nExperiments*nAlgorithms*nExploration*nTotalLearningRates*gammas.size() ; 117 | 118 | cout << "nTotalExperiments " << nTotalExperiments << endl ; 119 | 120 | bool printedWarning1 = false ; 121 | 122 | for ( int i = 0 ; i < nTotalExperiments ; i++ ) { 123 | 124 | bool skip = false ; 125 | 126 | int N = i ; 127 | 128 | algorithmName = algorithms[ N % nAlgorithms ] ; 129 | N /= nAlgorithms ; 130 | 131 | setAlgorithm( algorithmName, world ) ; 132 | 133 | State s1, s2 ; 134 | Action a1, a2 ; 135 | 136 | state = &s1 ; 137 | nextState = &s2 ; 138 | action = &a1 ; 139 | nextAction = &a2 ; 140 | 141 | bool f1 = initializeState( state, algorithm, world ) ; 142 | bool f2 = initializeState( nextState, algorithm, world ) ; 143 | bool f3 = initializeAction( action, algorithm, world ) ; 144 | bool f4 = initializeAction( nextAction, algorithm, world ) ; 145 | 146 | skip = f1 or f2 or f3 or f4 ; 147 | 148 | unsigned int nAlgorithmLearningRates = algorithm->getNumberOfLearningRates() ; 149 | 150 | learningRate = new double[ nAlgorithmLearningRates ] ; 151 | 152 | for ( unsigned int l = 0 ; l < learningRates.size() ; l++ ) { 153 | 154 | unsigned int L = learningRates[l].size() ; 155 | 156 | if ( l < nAlgorithmLearningRates ) { 157 | 158 | learningRate[l] = learningRates[l][ N % L ] ; 159 | 160 | } else if ( N % L > 0 ) { 161 | //if we have more different learning rates than used (i.e. a second 162 | //or third learning rate for an algorithm that only uses one, such as Sarsa) 163 | //we skip all redundant experiments. 164 | 165 | skip = true ; 166 | 167 | } 168 | 169 | N /= L ; 170 | 171 | } 172 | 173 | for ( unsigned int l = learningRates.size() ; l < nAlgorithmLearningRates ; l++ ) { 174 | //In case that there are less learning rates specified than needed 175 | //for the current algorithm, reuse the first learning rate for all 176 | //missing values. 177 | 178 | if ( not printedWarning1 ) { 179 | cout << "Warning: underspecified number of learning rates for algorithm " << algorithmName << ". " ; 180 | cout << "(" << learningRates.size() << " learning rate(s) specified, " << nAlgorithmLearningRates << " needed.)" << endl ; 181 | cout << "Using first specified learning rate for missing values." << endl ; 182 | printedWarning1 = true ; 183 | } 184 | 185 | learningRate[l] = learningRate[0] ; 186 | 187 | } 188 | 189 | int exp = N % nExploration ; 190 | 191 | if ( exp < nTau ) { 192 | 193 | boltzmann = true ; 194 | tau = taus[ N % nTau ] ; 195 | 196 | egreedy = false ; 197 | epsilon = 0.0 ; 198 | 199 | gaussian = false ; 200 | sigma = 0.0 ; 201 | 202 | 203 | } else if ( exp < (nTau + nEpsilon) ) { 204 | 205 | boltzmann = false ; 206 | tau = 0.0 ; 207 | 208 | egreedy = true; 209 | epsilon = epsilons[ ( N - nTau ) % nEpsilon ] ; 210 | 211 | gaussian = false ; 212 | sigma = 0.0 ; 213 | 214 | } else { 215 | 216 | boltzmann = false ; 217 | tau = 0.0 ; 218 | 219 | egreedy = false ; 220 | epsilon = 0.0 ; 221 | 222 | gaussian = true ; 223 | sigma = sigmas[ ( N - nTau - nEpsilon ) % nSigma ] ; 224 | 225 | } 226 | 227 | if ( algorithmName.compare("Cacla") == 0 ) { 228 | 229 | if ( boltzmann ) { 230 | //~ cout << "Boltzmann exploration is undefined for Cacla. Skipping..." << endl ; 231 | skip = true ; 232 | } 233 | 234 | } else { 235 | 236 | if ( gaussian ) { 237 | //~ cout << "Gaussian exploration is as of yet undefined for " << algorithmName << ". Skipping..." << endl ; 238 | skip = true ; 239 | } 240 | } 241 | 242 | if ( not skip ) { 243 | //At this point only skips when: 244 | // 1) considering learning rates that aren't used by 245 | // the current algorithm. 246 | // 2) initialization of actions and/or states failed. 247 | // This happens when the MDP only allows discrete 248 | // or continuous spaces and the algorithm vice- 249 | // versa. 250 | //Later may add skipping in other cases, such as when 251 | //an incompatible exploration scheme is used (such as 252 | //boltzmann for Cacla). 253 | 254 | N /= nExploration ; 255 | 256 | gamma = gammas[ N % gammas.size() ] ; 257 | N /= gammas.size() ; 258 | 259 | ostringstream os ; 260 | 261 | os << parameterPath << "_d/" ; 262 | 263 | os << algorithmName << '_' ; 264 | if ( boltzmann ) { 265 | os << "B" << tau << '_' ; 266 | } else if ( egreedy ) { 267 | os << "E" << epsilon << '_' ; 268 | } else if ( gaussian ) { 269 | os << "G" << sigma << '_' ; 270 | } 271 | os << 'L' ; 272 | for ( unsigned int l = 0 ; l < nAlgorithmLearningRates ; l++ ) { 273 | os << learningRate[l] << '_' ; 274 | } 275 | os << 'D' ; 276 | os << gamma ; 277 | 278 | cout << "Storing in : " << os.str() << endl ; 279 | 280 | train = true ; 281 | nSteps = nTrainSteps ; 282 | nEpisodes = nTrainEpisodes ; 283 | nMaxStepsPerEpisode = nMaxStepsPerTrainEpisode ; 284 | nResults = nTrainResults ; 285 | 286 | 287 | double * results = runExperiment( world ) ; 288 | 289 | ostringstream ostemp ("") ; 290 | ostemp << os.str().c_str() << ".onresults" ; 291 | storeRewards( (ostemp.str()).c_str(), results, nTrainResults ) ; 292 | 293 | cout << "Final train results: " << results[ nTrainResults - 1 ] << endl ; 294 | delete [] results ; 295 | 296 | train = false ; 297 | nSteps = nTestSteps ; 298 | nEpisodes = nTestEpisodes ; 299 | nMaxStepsPerEpisode = nMaxStepsPerTestEpisode ; 300 | nResults = nTestResults ; 301 | 302 | results = runExperiment( world ) ; 303 | 304 | ostemp.str("") ; 305 | ostemp << os.str().c_str() << ".offresults" ; 306 | storeRewards( (ostemp.str()).c_str(), results, nTestResults ) ; 307 | 308 | cout << "Final test results: " << results[ nTestResults - 1 ] << endl ; 309 | delete [] results ; 310 | if (!ros::ok()) { 311 | cout << "ROS failed, exiting" << endl; 312 | exit(1); 313 | } 314 | } 315 | 316 | delete [] learningRate; 317 | 318 | if ( state->continuous ) { 319 | delete [] state->continuousState ; 320 | delete [] nextState->continuousState ; 321 | } 322 | 323 | if ( action->continuous ) { 324 | delete [] action->continuousAction ; 325 | delete [] nextAction->continuousAction ; 326 | } 327 | 328 | delete algorithm ; 329 | 330 | } 331 | 332 | //delete world ; 333 | 334 | } 335 | 336 | void Experiment::storeRewards( const char * filepath, double * rewards, int nRewards ) { 337 | 338 | ifstream ifile ; 339 | 340 | ifile.open( filepath, ifstream::in ) ; 341 | 342 | int N = 0 ; 343 | 344 | if (ifile) { 345 | ifile >> N ; 346 | } 347 | 348 | ofstream ofile ; 349 | 350 | ofile.open( filepath, ofstream::out ) ; 351 | ofile << (N+1) << '\n' ; 352 | 353 | double last, lastvar, lastdif ; 354 | 355 | for ( int e = 0 ; e < nRewards ; e++ ) { 356 | 357 | 358 | if ( N > 0 ) { 359 | 360 | ifile >> last ; 361 | ifile >> lastvar ; 362 | 363 | lastdif = (rewards[e] - last) ; 364 | lastvar = N*lastvar/(N+1.0) + lastdif*lastdif/N ; 365 | 366 | last = (N*last + rewards[e])/(N+1.0) ; 367 | 368 | } else { 369 | 370 | lastvar = 0 ; 371 | last = rewards[e] ; 372 | 373 | } 374 | 375 | ofile << last << " " << lastvar << '\n' ; 376 | 377 | } 378 | 379 | ifile.close() ; 380 | 381 | ofile.close() ; 382 | } 383 | 384 | double * Experiment::runExperiment( World * world ) { 385 | 386 | Action * actions = new Action[2] ; //For Sarsa, that needs both the present and next action. 387 | 388 | double reward ; 389 | 390 | //* Training *// 391 | double * results = new double[ nResults ] ; 392 | 393 | int episode = 0 ; 394 | int step = 0 ; 395 | int result = 0 ; 396 | double rewardSum = 0.0 ; 397 | 398 | world->reset() ; 399 | 400 | world->getState( state ) ; 401 | 402 | explore( state, action ) ; 403 | 404 | endOfEpisode = true ; 405 | 406 | int storePer ; 407 | 408 | if ( train ) { 409 | storePer = trainStorePer ; 410 | } else { 411 | storePer = testStorePer ; 412 | } 413 | 414 | for ( step = 0 ; (step < nSteps) and (episode < nEpisodes) ; step++ ) { 415 | 416 | if(!ros::ok()) return results; 417 | reward = world->act( action ) ; 418 | 419 | world->getState( nextState ) ; 420 | 421 | explore( nextState, nextAction ) ; 422 | 423 | rewardSum += reward ; 424 | 425 | endOfEpisode = world->endOfEpisode() ; 426 | 427 | if ( train ) { 428 | 429 | if ( algorithmName.compare("Sarsa") == 0 ) { 430 | 431 | actions[0] = *action ; 432 | actions[1] = *nextAction ; 433 | algorithm->update( state, actions, reward, nextState, endOfEpisode, learningRate, gamma ) ; 434 | 435 | } else { 436 | 437 | algorithm->update( state, action, reward, nextState, endOfEpisode, learningRate, gamma ) ; 438 | 439 | } 440 | 441 | } 442 | 443 | copyState( nextState, state ) ; 444 | copyAction( nextAction, action ) ; 445 | 446 | if ( endOfEpisode ) { 447 | 448 | episode++ ; 449 | 450 | } 451 | 452 | // Store results : 453 | bool store = false ; 454 | 455 | if ( storePerEpisode and ( episode % storePer == 0 ) and endOfEpisode ) { 456 | 457 | store = true ; 458 | 459 | } else if ( storePerStep and ( (step + 1) % storePer == 0 ) ) { 460 | 461 | store = true ; 462 | 463 | } 464 | 465 | if ( store ) { 466 | 467 | results[ result ] = rewardSum/storePer ; 468 | rewardSum = 0.0 ; 469 | result++ ; 470 | 471 | } 472 | 473 | } 474 | 475 | delete [] actions ; 476 | 477 | return results ; 478 | 479 | } 480 | 481 | void Experiment::explore( State * state, Action * action ) { 482 | 483 | if ( not train ) { 484 | 485 | algorithm->getMaxAction( state, action ) ; 486 | 487 | } else if ( boltzmann ) { 488 | 489 | algorithm->explore( state, action, tau, "boltzmann", endOfEpisode ) ; 490 | 491 | } else if ( egreedy ) { 492 | 493 | algorithm->explore( state, action, epsilon, "egreedy", endOfEpisode ) ; 494 | 495 | } else if ( gaussian ) { 496 | 497 | algorithm->explore( state, action, sigma, "gaussian", endOfEpisode ) ; 498 | 499 | } else { 500 | 501 | algorithm->getMaxAction( state, action ) ; 502 | 503 | } 504 | 505 | } 506 | 507 | void Experiment::read_moveTo( ifstream * ifile, string label ) { 508 | string temp ; 509 | 510 | while ( temp.compare( label ) != 0 and ifile ) { 511 | 512 | *ifile >> temp ; 513 | 514 | if ( ifile->eof() ) { 515 | cout << "Read error: Could not find label '" << label << "' while reading parameter file '" << parameterFile << "'" << endl ; 516 | exit(0) ; 517 | } 518 | 519 | } 520 | } 521 | 522 | vector< double > Experiment::read_doubleArray( string temp ) { 523 | 524 | vector< double > parameters ; 525 | 526 | istringstream iss( temp ) ; 527 | 528 | double parameter ; 529 | 530 | while( iss >> parameter ) { 531 | 532 | parameters.push_back( parameter ) ; 533 | 534 | } 535 | 536 | return parameters ; 537 | 538 | } 539 | 540 | void Experiment::readParameterFile( ) { 541 | 542 | ifstream ifile ; 543 | 544 | ifile.open( parameterFile, ifstream::in ) ; 545 | 546 | string temp ; 547 | 548 | read_moveTo( &ifile, "nExperiments" ) ; 549 | ifile >> nExperiments ; 550 | 551 | read_moveTo( &ifile, "steps" ) ; 552 | 553 | read_moveTo( &ifile, "nTrainSteps" ) ; 554 | ifile >> nTrainSteps ; 555 | 556 | read_moveTo( &ifile, "trainStorePer" ) ; 557 | ifile >> trainStorePer ; 558 | 559 | read_moveTo( &ifile, "nTestSteps" ) ; 560 | ifile >> nTestSteps ; 561 | 562 | read_moveTo( &ifile, "testStorePer" ) ; 563 | ifile >> testStorePer ; 564 | 565 | read_moveTo( &ifile, "nTrainEpisodes" ) ; 566 | ifile >> nTrainEpisodes ; 567 | 568 | read_moveTo( &ifile, "nTestEpisodes" ) ; 569 | ifile >> nTestEpisodes ; 570 | 571 | read_moveTo( &ifile, "nMaxStepsPerTrainEpisode" ) ; 572 | ifile >> nMaxStepsPerTrainEpisode ; 573 | 574 | read_moveTo( &ifile, "nMaxStepsPerTestEpisode" ) ; 575 | ifile >> nMaxStepsPerTestEpisode ; 576 | 577 | 578 | if ( nTrainSteps == 0 ) { 579 | 580 | nTrainSteps = nTrainEpisodes*nMaxStepsPerTrainEpisode ; 581 | nTrainResults = nTrainSteps/trainStorePer ; 582 | storePerStep = false ; 583 | storePerEpisode = true ; 584 | 585 | } else if ( nMaxStepsPerTrainEpisode == 0 ) { 586 | 587 | nTrainEpisodes = nTrainSteps ; 588 | nTrainResults = nTrainSteps/trainStorePer ; 589 | storePerStep = true ; 590 | storePerEpisode = false ; 591 | 592 | } 593 | 594 | if ( nTestSteps == 0 ) { 595 | 596 | nTestSteps = nTestEpisodes*nMaxStepsPerTestEpisode ; 597 | nTestResults = nTestSteps/testStorePer ; 598 | storePerStep = false ; 599 | storePerEpisode = true ; 600 | 601 | } else if ( nMaxStepsPerTestEpisode == 0 ) { 602 | 603 | nTestEpisodes = nTestSteps ; 604 | nTestResults = nTestSteps/testStorePer ; 605 | storePerStep = true ; 606 | storePerEpisode = false ; 607 | 608 | } 609 | 610 | 611 | read_moveTo( &ifile, "algorithm" ) ; 612 | 613 | read_moveTo( &ifile, "algorithms" ) ; 614 | getline( ifile, temp ) ; 615 | istringstream iss( temp ) ; 616 | 617 | while ( iss >> temp ) { 618 | algorithms.push_back( temp ) ; 619 | } 620 | 621 | read_moveTo( &ifile, "tau" ) ; 622 | getline( ifile, temp ) ; 623 | taus = read_doubleArray( temp ) ; 624 | 625 | read_moveTo( &ifile, "epsilon" ) ; 626 | getline( ifile, temp ) ; 627 | epsilons = read_doubleArray( temp ) ; 628 | 629 | read_moveTo( &ifile, "sigma" ) ; 630 | getline( ifile, temp ) ; 631 | sigmas = read_doubleArray( temp ) ; 632 | 633 | read_moveTo( &ifile, "learningRates" ) ; 634 | 635 | read_moveTo( &ifile, "decreaseType" ) ; 636 | ifile >> learningRateDecreaseType ; 637 | 638 | read_moveTo( &ifile, "nLearningRates" ) ; 639 | ifile >> nLearningRates ; 640 | 641 | for ( int l = 0 ; l < nLearningRates ; l++ ) { 642 | 643 | read_moveTo( &ifile, "learningRate" ) ; 644 | getline( ifile, temp ) ; 645 | learningRates.push_back( read_doubleArray( temp ) ) ; 646 | 647 | } 648 | 649 | read_moveTo( &ifile, "discount" ) ; 650 | 651 | read_moveTo( &ifile, "gamma" ) ; 652 | getline( ifile, temp ) ; 653 | gammas = read_doubleArray( temp ) ; 654 | 655 | } 656 | 657 | #endif //EXPERIMENT 658 | -------------------------------------------------------------------------------- /Experiment.h: -------------------------------------------------------------------------------- 1 | #ifndef EXPERIMENT_H 2 | #define EXPERIMENT_H 3 | 4 | # include 5 | # include 6 | # include 7 | # include 8 | 9 | # include "State.h" 10 | # include "Action.h" 11 | # include "World.h" 12 | # include "Algorithm.h" 13 | 14 | # include "Qlearning.h" 15 | # include "QVlearning.h" 16 | # include "Sarsa.h" 17 | # include "Acla.h" 18 | # include "Cacla.h" 19 | 20 | class Experiment { 21 | 22 | public: 23 | virtual ~Experiment() ; 24 | bool initializeState( State * state, Algorithm * algorithm, World * world ) ; 25 | bool initializeAction( Action * action, Algorithm * algorithm, World * world ) ; 26 | void getParametersAndRunExperiments( World * world ) ; 27 | 28 | protected: 29 | double * runExperiment( World * world ) ; 30 | void explore( State * state, Action * action ) ; 31 | void storeRewards( const char *, double * , int ) ; 32 | void setAlgorithm( string algorithmName, World * world ) ; 33 | void readParameterFile( ) ; 34 | void read_moveTo( ifstream *, string ) ; 35 | vector< double > read_doubleArray( string ) ; 36 | 37 | Algorithm * algorithm ; 38 | World * world ; 39 | 40 | State * state ; 41 | State * nextState ; 42 | Action * action ; 43 | Action * nextAction ; 44 | 45 | int nExperiments, nAlgorithms ; 46 | int nSteps, nEpisodes, nMaxStepsPerEpisode, nResults ; 47 | int nTrainSteps, nTrainEpisodes, nMaxStepsPerTrainEpisode, nTrainResults, trainStorePer ; 48 | int nTestSteps, nTestEpisodes, nMaxStepsPerTestEpisode, nTestResults, testStorePer ; 49 | int stateDimension, actionDimension ; 50 | bool discreteStates, discreteActions, endOfEpisode ; 51 | bool storePerStep, storePerEpisode ; 52 | bool boltzmann, egreedy, gaussian ; 53 | int nLearningRates ; 54 | 55 | const char * parameterFile ; 56 | string parameterPath ; 57 | string algorithmName ; 58 | vector< string > algorithms ; 59 | vector< double > taus ; 60 | vector< double > epsilons ; 61 | vector< double > sigmas ; 62 | string learningRateDecreaseType ; 63 | double * learningRate ; 64 | vector< vector < double > > learningRates ; 65 | vector< double > gammas ; 66 | double tau, epsilon, sigma, gamma ; 67 | 68 | bool train ; 69 | 70 | 71 | }; 72 | 73 | #endif //EXPERIMENT_H 74 | 75 | -------------------------------------------------------------------------------- /List.h: -------------------------------------------------------------------------------- 1 | #ifndef LIST 2 | #define LIST 3 | 4 | struct List { 5 | int length; 6 | double * contents; 7 | }; 8 | 9 | #endif //LIST 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS=-Wall -O4 2 | 3 | # World.h 4 | UTILFILES = StateActionUtils.cpp cNeuralNetwork.cpp Algorithm.cpp StateActionAlgorithm.cpp Experiment.cpp 5 | ALGORITHMS = Sarsa.cpp Qlearning.cpp QVlearning.cpp Acla.cpp Cacla.cpp 6 | CPWORLD = CartPole.cpp 7 | CPINTERFACE = CartPoleInterface.cpp 8 | SMALLWORLD = SmallMaze.cpp 9 | SMALLINTERFACE = SmallInterface.cpp 10 | ARMWORLD = Arm.cpp 11 | ARMINTERFACE = ArmInterface.cpp 12 | 13 | LDFLAGS = -L/usr/lib -L/usr/lib/gcc/x86_64-linux-gnu/4.8 -lstdc++ 14 | # ROS_DEFS = -DUSE_ROS 15 | ROS_LDFLAGS = -L/opt/ros/jade/lib -lroscpp -lrosconsole -lroscpp_serialization -lroslib -lrostime 16 | # ROS_LDFLAGS = -L/home/src/ros/hydro/catkin_ws/install_isolated/lib -lroscpp -lrosconsole -lroscpp_serialization -lroslib -lrostime 17 | # ROS_LDFLAGS = -L/opt/ros/jade/lib -lroscpp_serialization -lrostime -l:/usr/lib/x86_64-linux-gnu/libboost_date_time.so -l:/usr/lib/x86_64-linux-gnu/libboost_system.so -l:/usr/lib/x86_64-linux-gnu/libboost_thread.so -l:/usr/lib/x86_64-linux-gnu/libpthread.so -lcpp_common -l:/usr/lib/x86_64-linux-gnu/libboost_system.so -l:/usr/lib/x86_64-linux-gnu/libboost_thread.so -l:/usr/lib/x86_64-linux-gnu/libpthread.so -l:/usr/lib/x86_64-linux-gnu/libconsole_bridge.so 18 | # ROS_LDFLAGS = -L/opt/ros/jade/lib -lroscpp_serialization -lrostime -l:/usr/lib/x86_64-linux-gnu/libboost_date_time.so -l:/usr/lib/x86_64-linux-gnu/libboost_system.so -l:/usr/lib/x86_64-linux-gnu/libboost_thread.so -l:/usr/lib/x86_64-linux-gnu/libpthread.so -lcpp_common -l:/usr/lib/x86_64-linux-gnu/libboost_system.so -l:/usr/lib/x86_64-linux-gnu/libboost_thread.so -l:/usr/lib/x86_64-linux-gnu/libpthread.so -l:/usr/lib/x86_64-linux-gnu/libconsole_bridge.so 19 | 20 | # objects := $(patsubst %.cpp,%.o,$(wildcard *.cpp)) 21 | objects := $(patsubst %.cpp,%.o,$(UTILFILES)) 22 | objects += $(patsubst %.cpp,%.o,$(ALGORITHMS)) 23 | 24 | cp_objs = $(patsubst %.cpp,%.o,$(CPWORLD)) 25 | cp_objs += $(patsubst %.cpp,%.o,$(CPINTERFACE)) 26 | 27 | cp_objs_inc = $(patsubst %.cpp,%.h,$(CPWORLD)) 28 | cp_objs_inc += $(patsubst %.cpp,%.h,$(CPINTERFACE)) 29 | 30 | arm_objs = $(patsubst %.cpp,%.o,$(ARMWORLD)) 31 | arm_objs += $(patsubst %.cpp,%.o,$(ARMINTERFACE)) 32 | 33 | arm_objs_inc = $(patsubst %.cpp,%.h,$(ARMWORLD)) 34 | arm_objs_inc += $(patsubst %.cpp,%.h,$(ARMINTERFACE)) 35 | 36 | all: cp small arm 37 | 38 | # cp 39 | cp: 40 | c++ $(CFLAGS) $(UTILFILES) $(ALGORITHMS) $(CPWORLD) $(CPINTERFACE) $(ROS_INCLUDES) -o CartPole 41 | @echo 42 | @echo Done compiling CartPole. 43 | @echo Run with: ./CartPole \'cfg\' 44 | @echo where \'cfg\' is a configuration file. 45 | @echo 46 | 47 | cp_objs: $(cp_objs_inc) 48 | cp_ros: $(objects) $(cp_objs) 49 | # c++ $(CFLAGS) $(UTILFILES) $(ALGORITHMS) $(CPWORLD) $(CPINTERFACE) -o CartPole 50 | g++ $(objects) $(cp_objs) -o CartPoleRos $(LDFLAGS) $(ROS_LDFLAGS) $(ROS_INCLUDES) 51 | @echo 52 | @echo Done compiling CartPoleRos. 53 | @echo Run with: ./CartPoleRos \'cfg\' 54 | @echo where \'cfg\' is a configuration file. 55 | @echo 56 | 57 | # small 58 | small: 59 | c++ $(CFLAGS) $(UTILFILES) $(ALGORITHMS) $(SMALLWORLD) $(SMALLINTERFACE) -o SmallMaze 60 | @echo 61 | @echo Done compiling SmallMaze. 62 | @echo Run with: ./SmallMaze \'cfg\' 63 | @echo where \'cfg\' is a configuration file. 64 | @echo 65 | 66 | arm_objs: $(arm_objs_inc) 67 | 68 | arm: $(objects) $(arm_objs) 69 | # c++ $(CFLAGS) $(ARM_LDFLAGS) $(UTILFILES) $(ALGORITHMS) $(ARMWORLD) $(ARMINTERFACE) -o Arm 70 | g++ $(objects) $(arm_objs) -o Arm $(LDFLAGS) $(ROS_LDFLAGS) $(ROS_INCLUDES) 71 | @echo 72 | @echo Done compiling Arm. 73 | @echo Run with: ./Arm \'cfg\' 74 | @echo where \'cfg\' is a configuration file. 75 | @echo 76 | 77 | # foo : $(objects) 78 | # ld -o foo $(ARM_LDFLAGS) $(objects) 79 | 80 | clean: 81 | rm -rf Arm CartPole CartPoleRos SmallMaze 82 | rm -rf *.o 83 | rm -f *.out 84 | @echo done removing 85 | 86 | -------------------------------------------------------------------------------- /Matrix.cpp: -------------------------------------------------------------------------------- 1 | #ifndef MATRIX 2 | #define MATRIX 3 | #include 4 | #include 5 | using namespace std ; 6 | 7 | #include "Matrix.h" 8 | 9 | Matrix::Matrix() { 10 | _data = new double[1] ; 11 | _nRows = 0 ; 12 | _nCols = 0 ; 13 | } 14 | 15 | Matrix::Matrix(int size) { 16 | setSize( size ) ; 17 | } 18 | 19 | Matrix::Matrix(int nRows, int nCols) { 20 | setSize( nRows, nCols ) ; 21 | } 22 | 23 | Matrix::Matrix(int nRows, int nCols, double *data) { 24 | setSize( nRows, nCols ) ; 25 | for ( int r = 0 ; r < nRows ; r++ ){ 26 | for ( int c = 0 ; c < nCols ; c++ ){ 27 | _data[ r*nCols + c ] = data[ r*nCols + c ] ; 28 | } 29 | } 30 | } 31 | 32 | Matrix::~Matrix() { 33 | delete [] _data ; 34 | } 35 | 36 | void Matrix::setSize( int size ) { 37 | _size = size ; 38 | _data = new double[size] ; 39 | int x ; 40 | for ( x = 0 ; x < size ; x++ ){ 41 | _data[ x ] = 0.0 ; 42 | } 43 | _nRows = size ; 44 | _nCols = 1 ; 45 | } 46 | 47 | void Matrix::setSize( int nRows, int nCols ) { 48 | _size = nRows * nCols ; 49 | _data = new double[ _size ] ; 50 | int x ; 51 | for ( x = 0 ; x < _size ; x++ ){ 52 | _data[ x ] = 0.0 ; 53 | } 54 | _nRows = nRows ; 55 | _nCols = nCols ; 56 | } 57 | 58 | int Matrix::getSize( ) { 59 | return _size ; 60 | } 61 | 62 | double * Matrix::getPtr(int i) { 63 | return (_data + i ) ; 64 | } 65 | 66 | double * Matrix::getPtr( ) { 67 | return (_data) ; 68 | } 69 | 70 | double Matrix::get(int i) const { 71 | return _data[ i ] ; 72 | } 73 | 74 | double Matrix::get(int i, int j) const { 75 | return _data[ i*_nCols + j ] ; 76 | } 77 | 78 | void Matrix::set(int i, double val) { 79 | _data[ i ] = val ; 80 | } 81 | 82 | void Matrix::set(int i, int j, double val) { 83 | _data[ i*_nCols + j ] = val ; 84 | } 85 | 86 | void Matrix::add(int i, double val) { 87 | _data[ i ] += val ; 88 | } 89 | 90 | void Matrix::add(int i, int j, double val) { 91 | _data[ i*_nCols + j ] += val ; 92 | } 93 | 94 | //~ void Matrix::increaseCols() { 95 | //~ nCols = _nCols + 1 ; 96 | //~ _size += _nRows ; 97 | //~ data = new double[ _size ] ; 98 | 99 | //~ for ( int r = 0 ; r < _nRows ; r++ ){ 100 | //~ for ( int c = 0 ; c < _nCols ; c++ ){ 101 | //~ data[ r*nCols + c ] = _data[ r*_nCols + c ] ; 102 | //~ } 103 | //~ data[ r*nCols + _nCols ] = 0.0 ; 104 | //~ } 105 | 106 | //~ delete [] _data ; 107 | //~ _data = data ; 108 | //~ _nCols = nCols 109 | //~ } 110 | 111 | //~ void Matrix::increaseRows() { 112 | //~ nRows = _nRows + 1 ; 113 | //~ _size += _nCols ; 114 | //~ data = new double[ _size ] ; 115 | 116 | //~ for ( int r = 0 ; r < _nRows ; r++ ){ 117 | //~ for ( int c = 0 ; c < _nCols ; c++ ){ 118 | //~ data[ r*_nCols + c ] = _data[ r*_nCols + c ] ; 119 | //~ } 120 | //~ } 121 | //~ for ( int c = 0 ; c < _nCols ; c++ ){ 122 | //~ data[ _nRows*_nCols + c ] = 0.0 ; 123 | //~ } 124 | 125 | //~ delete [] _data ; 126 | //~ _data = data ; 127 | //~ _nRows = nRows 128 | //~ } 129 | 130 | //~ Matrix Matrix::multiply( Matrix M, int nRows, int nCols ) { 131 | //~ if ( _nCols != nRows ) { 132 | //~ throw "Error in Matrix multiplication: _nCols != nRows." ; 133 | //~ } 134 | 135 | //~ int i,j,k ; 136 | //~ double sum ; 137 | 138 | //~ Matrix Result = new Matrix( _nRows, nCols ) ; 139 | 140 | //~ for ( i = 0 ; i < _nRows ; i++ ) { 141 | //~ for ( j = 0 ; j < nCols ; j++ ) { 142 | //~ sum = 0 ; 143 | 144 | //~ for ( k = 0 ; k < _nCols ; k++ ) { 145 | //~ sum += get( i, k ) * M.get( k, j ) ; 146 | //~ } 147 | 148 | //~ Result.set( i, j, sum ) ; 149 | //~ } 150 | //~ } 151 | 152 | //~ return Result ; 153 | //~ } 154 | 155 | #endif //MATRIX 156 | -------------------------------------------------------------------------------- /Matrix.h: -------------------------------------------------------------------------------- 1 | #ifndef MATRIX_H 2 | #define MATRIX_H 3 | 4 | class Matrix { 5 | public: 6 | Matrix() ; 7 | Matrix( int ); 8 | Matrix( int , int ); 9 | Matrix( int , int, double* ); 10 | ~Matrix(); 11 | 12 | void setSize( int ) ; 13 | void setSize( int, int ) ; 14 | int getSize() ; 15 | 16 | double * getPtr( int i ) ; 17 | double * getPtr() ; 18 | 19 | double get( int ) const ; 20 | void set( int, double ); 21 | void add( int, double ); 22 | double get( int, int ) const ; 23 | void set( int, int , double ); 24 | void add( int, int , double ); 25 | //~ Matrix multiply( Matrix, int, int ); 26 | 27 | private: 28 | int _nRows, _nCols, _size, _id; 29 | double* _data; 30 | }; 31 | #endif // MATRIX_H 32 | -------------------------------------------------------------------------------- /QVlearning.cpp: -------------------------------------------------------------------------------- 1 | #ifndef QVLEARNING 2 | #define QVLEARNING 3 | 4 | # include "QVlearning.h" 5 | 6 | 7 | QVlearning::QVlearning( const char * parameterFile, World * w ) { 8 | 9 | discreteStates = w->getDiscreteStates() ; 10 | stateDimension = w->getStateDimension() ; 11 | 12 | if ( not w->getDiscreteActions() ) { 13 | 14 | cout << "QV-learning does not support continuous actions." << endl ; 15 | cout << "Please check which MDP you are using it on." << endl ; 16 | exit(0) ; 17 | 18 | } else { 19 | 20 | numberOfActions = w->getNumberOfActions() ; 21 | 22 | } 23 | 24 | srand48( clock() ) ; 25 | 26 | readParameterFile( parameterFile ) ; 27 | 28 | if ( discreteStates ) { 29 | 30 | numberOfStates = w->getNumberOfStates() ; 31 | 32 | Q = new double*[ numberOfStates ] ; 33 | V = new double[ numberOfStates ] ; 34 | 35 | for ( int s = 0 ; s < numberOfStates ; s++ ) { 36 | 37 | Q[s] = new double[ numberOfActions ] ; 38 | V[s] = 0.0 ; 39 | 40 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 41 | Q[s][a] = 0.0 ; 42 | } 43 | 44 | } 45 | 46 | } else { 47 | 48 | int layerSizesA[] = { stateDimension, nHiddenQ, 1 } ; 49 | int layerSizesV[] = { stateDimension, nHiddenV, 1 } ; 50 | 51 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 52 | QNN.push_back( new cNeuralNetwork( 1, layerSizesA ) ) ; 53 | } 54 | VNN = new cNeuralNetwork( 1, layerSizesV ) ; 55 | 56 | 57 | VTarget = new double[ 1 ] ; 58 | QTarget = new double[ 1 ] ; 59 | 60 | Qs = new double[ numberOfActions ] ; 61 | 62 | } 63 | 64 | policy = new double[ numberOfActions ] ; 65 | 66 | } 67 | 68 | QVlearning::~QVlearning() { 69 | 70 | if ( discreteStates ) { 71 | 72 | for ( int s = 0 ; s < numberOfStates ; s++ ) { 73 | 74 | delete [] Q[s] ; 75 | 76 | } 77 | 78 | delete [] Q ; 79 | delete [] V ; 80 | 81 | } else { 82 | 83 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 84 | delete QNN[a] ; 85 | } 86 | QNN.clear() ; 87 | delete VNN ; 88 | 89 | delete [] QTarget ; 90 | delete [] VTarget ; 91 | 92 | delete [] Qs ; 93 | 94 | } 95 | 96 | delete [] policy ; 97 | 98 | } 99 | 100 | 101 | void QVlearning::readParameterFile( const char * parameterFile ) { 102 | 103 | if ( not discreteStates ) { 104 | 105 | ifstream ifile ; 106 | 107 | ifile.open( parameterFile, ifstream::in ) ; 108 | 109 | read_moveTo( &ifile, "nn" ) ; 110 | 111 | read_moveTo( &ifile, "nHiddenQ" ) ; 112 | ifile >> nHiddenQ ; 113 | read_moveTo( &ifile, "nHiddenV" ) ; 114 | ifile >> nHiddenV ; 115 | 116 | ifile.close() ; 117 | 118 | } 119 | 120 | } 121 | 122 | void QVlearning::update( State * state, Action * action, double rt, State * nextState, bool endOfEpisode, double * learningRate, double gamma ) { 123 | 124 | int at = action->discreteAction ; 125 | 126 | if ( state->discrete ) { 127 | 128 | int st = state->discreteState ; 129 | int st_ = nextState->discreteState ; 130 | 131 | if ( endOfEpisode ) { 132 | 133 | Q[ st ][ at ] += learningRate[0]*( rt - Q[ st ][ at ] ) ; 134 | V[ st ] += learningRate[1]*( rt - V[ st ] ) ; 135 | 136 | } else { 137 | 138 | Q[ st ][ at ] += learningRate[0]*( rt + gamma*V[ st_ ] - Q[ st ][ at ] ) ; 139 | V[ st ] += learningRate[1]*( rt + gamma*V[ st_ ] - V[ st ] ) ; 140 | 141 | } 142 | 143 | } else { 144 | 145 | double * st = state->continuousState ; 146 | double * st_ = nextState->continuousState ; 147 | 148 | if ( endOfEpisode ) { 149 | 150 | QTarget[ 0 ] = rt ; 151 | VTarget[ 0 ] = rt ; 152 | 153 | } else { 154 | 155 | double Vs_ = VNN->forwardPropagate( st_ )[0] ; 156 | 157 | QTarget[ 0 ] = rt + gamma*Vs_ ; 158 | VTarget[ 0 ] = rt + gamma*Vs_ ; 159 | 160 | } 161 | 162 | QNN[ at ]->backPropagate( st, QTarget, learningRate[0] ) ; 163 | VNN->backPropagate( st, VTarget, learningRate[1] ) ; 164 | 165 | } 166 | 167 | } 168 | 169 | unsigned int QVlearning::getNumberOfLearningRates() { 170 | 171 | return 2 ; 172 | 173 | } 174 | 175 | const char * QVlearning::getName() { 176 | 177 | return "QVlearning" ; 178 | 179 | } 180 | 181 | #endif //QVLEARNING 182 | -------------------------------------------------------------------------------- /QVlearning.h: -------------------------------------------------------------------------------- 1 | #ifndef QVLEARNING_H 2 | #define QVLEARNING_H 3 | 4 | # include "StateActionAlgorithm.h" 5 | 6 | class QVlearning : public StateActionAlgorithm { 7 | public: 8 | QVlearning( const char * parameterFile, World * w ) ; 9 | ~QVlearning() ; 10 | void readParameterFile( const char * parameterFile ) ; 11 | void update( State * st, Action * action, double rt, State * st_, bool endOfEpisode, double * learningRate, double gamma ) ; 12 | unsigned int getNumberOfLearningRates() ; 13 | const char * getName() ; 14 | 15 | private: 16 | int nHiddenV ; 17 | double * V ; 18 | cNeuralNetwork * VNN ; 19 | double * VTarget ; 20 | }; 21 | 22 | #endif //QVLEARNING_H 23 | -------------------------------------------------------------------------------- /Qlearning.cpp: -------------------------------------------------------------------------------- 1 | #ifndef QLEARNING 2 | #define QLEARNING 3 | 4 | # include "Qlearning.h" 5 | # include 6 | 7 | using namespace std ; 8 | 9 | Qlearning::Qlearning( const char * parameterFile, World * w ) { 10 | 11 | discreteStates = w->getDiscreteStates() ; 12 | 13 | if ( not w->getDiscreteActions() ) { 14 | 15 | cout << "Q-learning does not support continuous actions." << endl ; 16 | cout << "Please check which MDP you are using it on." << endl ; 17 | exit(0) ; 18 | 19 | } else { 20 | 21 | numberOfActions = w->getNumberOfActions() ; 22 | 23 | } 24 | 25 | srand48( clock() ) ; 26 | 27 | readParameterFile( parameterFile ) ; 28 | 29 | if ( discreteStates ) { 30 | 31 | numberOfStates = w->getNumberOfStates() ; 32 | 33 | Q = new double*[ numberOfStates ] ; 34 | 35 | for ( int s = 0 ; s < numberOfStates ; s++ ) { 36 | 37 | Q[s] = new double[ numberOfActions ] ; 38 | 39 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 40 | Q[s][a] = 0.0 ; 41 | } 42 | 43 | } 44 | 45 | } else { 46 | 47 | stateDimension = w->getStateDimension() ; 48 | 49 | int layerSizesA[] = { stateDimension, nHiddenQ, 1 } ; 50 | 51 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 52 | 53 | QNN.push_back( new cNeuralNetwork( 1, layerSizesA ) ) ; 54 | 55 | } 56 | 57 | Qs = new double[ numberOfActions ] ; 58 | 59 | } 60 | 61 | QTarget = new double[ 1 ] ; 62 | policy = new double[ numberOfActions ] ; 63 | 64 | } 65 | 66 | Qlearning::~Qlearning() { 67 | 68 | if ( discreteStates ) { 69 | 70 | for ( int s = 0 ; s < numberOfStates ; s++ ) { 71 | 72 | delete [] Q[s] ; 73 | 74 | } 75 | 76 | delete [] Q ; 77 | 78 | } else { 79 | 80 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 81 | 82 | delete QNN[a] ; 83 | 84 | } 85 | 86 | QNN.clear() ; 87 | 88 | delete [] Qs ; 89 | 90 | } 91 | 92 | delete [] QTarget ; 93 | delete [] policy ; 94 | 95 | } 96 | 97 | void Qlearning::readParameterFile( const char * parameterFile ) { 98 | 99 | if ( not discreteStates ) { 100 | 101 | ifstream ifile ; 102 | 103 | ifile.open( parameterFile, ifstream::in ) ; 104 | 105 | read_moveTo( &ifile, "nn" ) ; 106 | 107 | read_moveTo( &ifile, "nHiddenQ" ) ; 108 | ifile >> nHiddenQ ; 109 | 110 | ifile.close() ; 111 | 112 | } 113 | 114 | } 115 | 116 | void Qlearning::update( State * state, Action * action, double rt, State * nextState, bool endOfEpisode, double * learningRate, double gamma ) { 117 | 118 | int at = action->discreteAction ; 119 | 120 | if ( state->discrete ) { 121 | 122 | int st = state->discreteState ; 123 | int st_ = nextState->discreteState ; 124 | 125 | if ( endOfEpisode ) { 126 | 127 | Q[ st ][ at ] += learningRate[0]*( rt - Q[ st ][ at ] ) ; 128 | 129 | } else { 130 | 131 | double maxQs = max( Q[ st_ ], numberOfActions ) ; 132 | 133 | Q[ st ][ at ] += learningRate[0]*( rt + gamma*maxQs - Q[ st ][ at ] ) ; 134 | 135 | } 136 | 137 | } else { 138 | 139 | double * st = state->continuousState ; 140 | double * st_ = nextState->continuousState ; 141 | 142 | if ( endOfEpisode ) { 143 | 144 | QTarget[ 0 ] = rt ; 145 | 146 | } else { 147 | 148 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 149 | Qs[a] = QNN[a]->forwardPropagate( st_ )[0] ; 150 | } 151 | 152 | double maxQs = max( Qs, numberOfActions ) ; 153 | 154 | QTarget[ 0 ] = rt + gamma*maxQs ; 155 | 156 | } 157 | 158 | QNN[ at ]->backPropagate( st, QTarget, learningRate[0] ) ; 159 | 160 | } 161 | 162 | } 163 | 164 | unsigned int Qlearning::getNumberOfLearningRates() { 165 | 166 | return 1 ; 167 | 168 | } 169 | 170 | const char * Qlearning::getName() { 171 | 172 | return "Qlearning" ; 173 | 174 | } 175 | 176 | #endif //QLEARNING 177 | -------------------------------------------------------------------------------- /Qlearning.h: -------------------------------------------------------------------------------- 1 | #ifndef QLEARNING_H 2 | #define QLEARNING_H 3 | 4 | # include "StateActionAlgorithm.h" 5 | 6 | class Qlearning : public StateActionAlgorithm { 7 | public: 8 | Qlearning( const char * parameterFile, World * w ) ; 9 | ~Qlearning() ; 10 | void readParameterFile( const char * parameterFile ) ; 11 | void update( State * st, Action * action, double rt, State * st_, bool endOfEpisode, double * learningRate, double gamma ) ; 12 | unsigned int getNumberOfLearningRates() ; 13 | const char * getName() ; 14 | 15 | }; 16 | 17 | #endif //QLEARNING_H 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Import of Hado van Hasselt's Reinforcement Learning Code, c++ version 2 | rlcpp.tar.gz 1. 3 | 4 | To build it do the following, assuming you have ROS Jade installed in 5 | /opt/ros/jade: 6 | 7 | mkdir build 8 | cmake .. 9 | make 10 | 11 | Should create CartPoleRos and ArmRos executables in the build directory. 12 | 13 | - Added cmake file to build CartPoleRos and ArmRos 14 | - ROS interface to the CACLA implementation so the RL algorithms can 15 | be used on a variety of ROS enabled systems. 16 | 17 | 18 | # Footnotes 19 | 20 | 1 21 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | # -*- mode: org -*- 2 | 3 | Import of Hado van Hasselt's Reinforcement Learning Code, c++ version 4 | rlcpp.tar.gz [fn:1]. 5 | 6 | To build it do the following, assuming you have ROS Jade installed in 7 | /opt/ros/jade: 8 | : mkdir build 9 | : cmake .. 10 | : make 11 | 12 | Should create CartPoleRos and ArmRos executables in the build directory. 13 | 14 | - Added cmake file to build CartPoleRos and ArmRos 15 | - ROS interface to the CACLA implementation so the RL algorithms can 16 | be used on a variety of ROS enabled systems. 17 | 18 | * Footnotes 19 | 20 | [fn:1] http://web.archive.org/web/20131213231018/http://homepages.cwi.nl/~hasselt/code.html 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /Sarsa.cpp: -------------------------------------------------------------------------------- 1 | #ifndef SARSA 2 | #define SARSA 3 | 4 | # include "Sarsa.h" 5 | 6 | 7 | Sarsa::Sarsa( const char * parameterFile, World * w ) { 8 | 9 | discreteStates = w->getDiscreteStates() ; 10 | 11 | if ( not w->getDiscreteActions() ) { 12 | 13 | cout << "Sarsa does not support continuous actions." << endl ; 14 | cout << "Please check which MDP you are using it on." << endl ; 15 | exit(0) ; 16 | 17 | } else { 18 | 19 | numberOfActions = w->getNumberOfActions() ; 20 | 21 | } 22 | 23 | srand48( clock() ) ; 24 | 25 | readParameterFile( parameterFile ) ; 26 | 27 | if ( discreteStates ) { 28 | 29 | numberOfStates = w->getNumberOfStates() ; 30 | 31 | Q = new double*[ numberOfStates ] ; 32 | 33 | for ( int s = 0 ; s < numberOfStates ; s++ ) { 34 | 35 | Q[s] = new double[ numberOfActions ] ; 36 | 37 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 38 | Q[s][a] = 0.0 ; 39 | } 40 | 41 | } 42 | 43 | } else { 44 | 45 | stateDimension = w->getStateDimension() ; 46 | 47 | int layerSizesA[] = { stateDimension, nHiddenQ, 1 } ; 48 | 49 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 50 | 51 | QNN.push_back( new cNeuralNetwork( 1, layerSizesA ) ) ; 52 | 53 | } 54 | 55 | Qs = new double[ numberOfActions ] ; 56 | 57 | } 58 | 59 | QTarget = new double[ 1 ] ; 60 | policy = new double[ numberOfActions ] ; 61 | 62 | } 63 | 64 | Sarsa::~Sarsa() { 65 | 66 | if ( discreteStates ) { 67 | 68 | for ( int s = 0 ; s < numberOfStates ; s++ ) { 69 | 70 | delete [] Q[s] ; 71 | 72 | } 73 | 74 | delete [] Q ; 75 | 76 | } else { 77 | 78 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 79 | 80 | delete QNN[a] ; 81 | 82 | } 83 | 84 | QNN.clear() ; 85 | 86 | delete [] Qs ; 87 | 88 | } 89 | 90 | delete [] QTarget ; 91 | delete [] policy ; 92 | 93 | } 94 | 95 | void Sarsa::readParameterFile( const char * parameterFile ) { 96 | 97 | if ( not discreteStates ) { 98 | 99 | ifstream ifile ; 100 | 101 | ifile.open( parameterFile, ifstream::in ) ; 102 | 103 | read_moveTo( &ifile, "nn" ) ; 104 | 105 | read_moveTo( &ifile, "nHiddenQ" ) ; 106 | ifile >> nHiddenQ ; 107 | 108 | ifile.close() ; 109 | 110 | } 111 | 112 | } 113 | 114 | void Sarsa::update( State * state, Action * actions, double rt, State * nextState, bool endOfEpisode, double * learningRate, double gamma ) { 115 | 116 | int at = actions[0].discreteAction ; 117 | int at_ = actions[1].discreteAction ; 118 | 119 | if ( state->discrete ) { 120 | 121 | int st = state->discreteState ; 122 | int st_ = nextState->discreteState ; 123 | 124 | if ( endOfEpisode ) { 125 | 126 | Q[ st ][ at ] += learningRate[0]*( rt - Q[ st ][ at ] ) ; 127 | 128 | } else { 129 | 130 | Q[ st ][ at ] += learningRate[0]*( rt + gamma*Q[ st_ ][ at_ ] - Q[ st ][ at ] ) ; 131 | 132 | } 133 | 134 | } else { 135 | 136 | double * st = state->continuousState ; 137 | double * st_ = nextState->continuousState ; 138 | 139 | if ( endOfEpisode ) { 140 | 141 | QTarget[ 0 ] = rt ; 142 | 143 | } else { 144 | 145 | double Qsa_ = QNN[ at_ ]->forwardPropagate( st_ )[0] ; 146 | 147 | QTarget[ 0 ] = rt + gamma*Qsa_ ; 148 | 149 | } 150 | 151 | QNN[ at ]->backPropagate( st, QTarget, learningRate[0] ) ; 152 | 153 | } 154 | 155 | } 156 | 157 | unsigned int Sarsa::getNumberOfLearningRates() { 158 | 159 | return 1 ; 160 | 161 | } 162 | 163 | const char * Sarsa::getName() { 164 | 165 | return "Sarsa" ; 166 | 167 | } 168 | 169 | #endif //SARSA 170 | -------------------------------------------------------------------------------- /Sarsa.h: -------------------------------------------------------------------------------- 1 | #ifndef SARSA_H 2 | #define SARSA_H 3 | 4 | # include "StateActionAlgorithm.h" 5 | 6 | class Sarsa : public StateActionAlgorithm { 7 | public: 8 | Sarsa( const char * parameterFile, World * w ) ; 9 | ~Sarsa() ; 10 | void readParameterFile( const char * parameterFile ) ; 11 | void update( State * st, Action * actions, double rt, State * st_, bool endOfEpisode, double * learningRate, double gamma ) ; 12 | unsigned int getNumberOfLearningRates() ; 13 | const char * getName() ; 14 | 15 | }; 16 | 17 | #endif //SARSA_H 18 | -------------------------------------------------------------------------------- /SmallInterface.cpp: -------------------------------------------------------------------------------- 1 | # ifndef SMALLINTERFACE 2 | # define SMALLINTERFACE 3 | 4 | # include "SmallInterface.h" 5 | # include "SmallMaze.h" 6 | 7 | SmallInterface::SmallInterface( const char * pFile ) { 8 | 9 | parameterFile = pFile ; 10 | 11 | parameterPath.assign( parameterFile ) ; 12 | 13 | readParameterFile() ; 14 | 15 | } 16 | 17 | int main( int argn, char *argv[] ) { 18 | 19 | if ( argn != 2 ) { 20 | 21 | cout << "You need to specify exactly one configuration (parameter) file." << endl ; 22 | return 0 ; 23 | 24 | } 25 | 26 | SmallInterface * interface = new SmallInterface( argv[1] ) ; 27 | 28 | SmallMaze * maze = new SmallMaze() ; 29 | 30 | interface->getParametersAndRunExperiments( maze ) ; 31 | 32 | delete interface ; 33 | 34 | delete maze ; 35 | 36 | } 37 | 38 | # endif //SMALLINTERFACE 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /SmallInterface.h: -------------------------------------------------------------------------------- 1 | # ifndef SMALLINTERFACE_H 2 | # define SMALLINTERFACE_H 3 | 4 | # include "Experiment.h" 5 | # include "SmallMaze.h" 6 | 7 | class SmallInterface : public Experiment { 8 | 9 | public: 10 | SmallInterface( const char * ) ; 11 | ~SmallInterface() {}; 12 | 13 | }; 14 | 15 | # endif //SMALLINTERFACE_H 16 | -------------------------------------------------------------------------------- /SmallMaze.cpp: -------------------------------------------------------------------------------- 1 | #ifndef SMALLMAZE 2 | #define SMALLMAZE 3 | # include "SmallMaze.h" 4 | 5 | using namespace std; 6 | 7 | // The 'maze' looks as follows: 8 | // 9 | // ___ ___ ___ 10 | // | | | | 11 | // | 2 | 5 | 8 | 12 | // |___|___|___| 13 | // | | | | 14 | // | 1 | 4 | 7 | 15 | // |___|___|___| 16 | // | | | | 17 | // | 0 | 3 | 6 | 18 | // |___|___|___| 19 | // 20 | // where '0' is the starting state 21 | // and '8' is the goal state. Any 22 | // action leading to state 8 ends 23 | // an episode and yields reward 0. 24 | // All other actions yield reward 25 | // -1.0. 26 | 27 | SmallMaze::SmallMaze() { 28 | state = 0 ; 29 | 30 | discreteStates = true ; 31 | numberOfStates = 9 ; 32 | 33 | discreteActions = true ; 34 | numberOfActions = 4 ; 35 | 36 | //For continuous actions: 37 | continuousActions = true ; 38 | actionDimension = 2 ; 39 | } 40 | 41 | SmallMaze::~SmallMaze() { 42 | 43 | } 44 | 45 | double SmallMaze::act( Action * action ) { 46 | 47 | int direction ; 48 | 49 | if ( action->continuous ) { 50 | 51 | // The continuous action is interpreted as a vector and is mapped to the closest of the available vectors: 52 | double d1 = action->continuousAction[0] ; //up-down dimension 53 | double d2 = action->continuousAction[1] ; //left-right dimension 54 | 55 | if ( d1*d1 > d2*d2 ) { //if up-down is the largest dimension 56 | 57 | if ( d1 > 0 ) { //determine up or down 58 | direction = 0 ; //up 59 | } else { 60 | direction = 2 ; //down 61 | } 62 | 63 | } else { 64 | 65 | if ( d2 > 0 ) { 66 | direction = 3 ; //right 67 | } else { 68 | direction = 1 ; //left 69 | } 70 | 71 | } 72 | 73 | //cout << d1 << " " << d2 << ": " << direction << endl ; 74 | 75 | } else { 76 | 77 | direction = action->discreteAction ; 78 | 79 | } 80 | 81 | double reward = -1.0 ; 82 | 83 | eoe = false ; //We assume the episode does not end. 84 | 85 | if ( direction == 0 ) { // up 86 | 87 | if ( state % 3 < 2 ) { 88 | state++ ; 89 | } 90 | 91 | } else if ( direction == 1 ) { // left 92 | 93 | if ( state > 2 ) { 94 | state -= 3 ; 95 | } 96 | 97 | } else if ( direction == 2 ) { // down 98 | 99 | if ( state % 3 > 0 ) { 100 | state-- ; 101 | } 102 | 103 | } else { // right 104 | 105 | if ( state < 6 ) { 106 | state += 3 ; 107 | } 108 | 109 | } 110 | 111 | if ( state == 8 ) { 112 | // If state == 8, end episode and return to 0. 113 | reward = 0.0 ; 114 | eoe = true ; 115 | 116 | state = 0 ; 117 | } 118 | 119 | return reward ; 120 | } 121 | 122 | void SmallMaze::getState( State * s ) { 123 | 124 | s->discreteState = state ; 125 | 126 | } 127 | 128 | void SmallMaze::setState( State * s ) { 129 | 130 | state = s->discreteState ; 131 | 132 | } 133 | 134 | bool SmallMaze::endOfEpisode() { 135 | 136 | return eoe ; 137 | 138 | } 139 | 140 | const char * SmallMaze::getName() { 141 | 142 | return "SmallMaze" ; 143 | 144 | } 145 | 146 | #endif //SMALLMAZE 147 | -------------------------------------------------------------------------------- /SmallMaze.h: -------------------------------------------------------------------------------- 1 | #ifndef SMALLMAZE_H 2 | #define SMALLMAZE_H 3 | # include 4 | # include 5 | # include 6 | # include "State.h" 7 | # include "Action.h" 8 | # include "World.h" 9 | 10 | using namespace std; 11 | 12 | class SmallMaze : public World { 13 | 14 | public: 15 | SmallMaze() ; 16 | ~SmallMaze() ; 17 | double act( Action * action ) ; 18 | void getState( State * state ) ; 19 | void setState( State * state ) ; 20 | bool endOfEpisode() ; 21 | const char * getName() ; 22 | 23 | bool getDiscreteStates() { return discreteStates ; } 24 | int getNumberOfStates() { return numberOfStates ; } 25 | 26 | bool getDiscreteActions() { return discreteActions ; } 27 | int getNumberOfActions() { return numberOfActions ; } 28 | 29 | bool getContinuousActions() { return continuousActions ; } 30 | int getActionDimension() { return actionDimension ; } 31 | 32 | 33 | private: 34 | bool eoe ; 35 | int state ; 36 | 37 | }; 38 | 39 | #endif //SMALLMAZE_H 40 | -------------------------------------------------------------------------------- /SmallMazeCfg: -------------------------------------------------------------------------------- 1 | nExperiments 100 2 | 3 | 4 | steps 5 | nTrainSteps 1000 6 | trainStorePer 100 7 | nTestSteps 100 8 | testStorePer 100 9 | nTrainEpisodes 0 10 | nTestEpisodes 0 11 | nMaxStepsPerTrainEpisode 0 12 | nMaxStepsPerTestEpisode 0 13 | 14 | algorithm 15 | algorithms Q 16 | tau 0.001 0.01 0.1 1.0 10.0 17 | epsilon 0.001 0.01 0.1 1.0 18 | sigma 0.001 0.01 0.1 1.0 19 | 20 | learningRates 21 | decreaseType none 22 | nLearningRates 3 23 | learningRate 0.0001 0.001 0.01 0.1 24 | learningRate 0.0 0.001 0.01 0.1 25 | learningRate 0.0 0.001 0.01 0.1 26 | 27 | discount 28 | gamma 0.99 29 | 30 | nn 31 | nHiddenQ 20 32 | nHiddenV 20 33 | -------------------------------------------------------------------------------- /State.h: -------------------------------------------------------------------------------- 1 | #ifndef STATE 2 | #define STATE 3 | 4 | struct State { 5 | bool continuous ; 6 | bool discrete ; 7 | 8 | int stateDimension ; 9 | int numberOfStates ; 10 | 11 | int discreteState ; 12 | double * continuousState ; 13 | }; 14 | 15 | #endif //STATE 16 | -------------------------------------------------------------------------------- /StateActionAlgorithm.cpp: -------------------------------------------------------------------------------- 1 | #ifndef QALGORITHM 2 | #define QALGORITHM 3 | 4 | # include "StateActionAlgorithm.h" 5 | 6 | StateActionAlgorithm::StateActionAlgorithm() { 7 | continuousStates = true ; 8 | discreteStates = true ; 9 | continuousActions = false ; 10 | discreteActions = true ; 11 | } 12 | 13 | void StateActionAlgorithm::getMaxActionFirst( State * state, Action * action ) { 14 | 15 | if ( state->discrete ) { 16 | 17 | Qs = Q[ state->discreteState ] ; 18 | 19 | } else { 20 | 21 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 22 | 23 | Qs[a] = QNN[a]->forwardPropagate( state->continuousState )[0] ; 24 | 25 | } 26 | 27 | } 28 | 29 | action->discreteAction = argmax( Qs, numberOfActions ) ; 30 | 31 | } 32 | 33 | void StateActionAlgorithm::getMaxActionRandom( State * state, Action * action ) { 34 | 35 | if ( state->discrete ) { 36 | 37 | Qs = Q[ state->discreteState ] ; 38 | 39 | } else { 40 | 41 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 42 | 43 | Qs[a] = QNN[a]->forwardPropagate( state->continuousState )[0] ; 44 | 45 | } 46 | 47 | } 48 | 49 | vector maxA = argmaxAll( Qs, numberOfActions ) ; 50 | 51 | int n = maxA.size() ; 52 | 53 | action->discreteAction = maxA[ (int) ( n*drand48() ) ] ; 54 | 55 | } 56 | 57 | void StateActionAlgorithm::getMaxAction( State * state, Action * action ) { 58 | 59 | getMaxActionRandom( state, action ) ; 60 | 61 | } 62 | 63 | void StateActionAlgorithm::getRandomAction( State * state, Action * action ) { 64 | 65 | action->discreteAction = (int) ( numberOfActions*drand48() ) ; 66 | 67 | } 68 | 69 | 70 | void StateActionAlgorithm::explore( State * state, Action * action, double explorationRate, string explorationType, bool endOfEpisode ) { 71 | 72 | if ( explorationType.compare("boltzmann") == 0 ) { 73 | 74 | boltzmann( state, action, explorationRate ) ; 75 | 76 | } else if ( explorationType.compare("egreedy") == 0 ) { 77 | 78 | egreedy( state, action, explorationRate ) ; 79 | 80 | } else if ( explorationType.compare("gaussian") == 0 ) { 81 | 82 | cout << "You are trying to use gaussian exploration for an algorithm that" << endl ; 83 | cout << "does not support it. Please check your parameter file." << endl ; 84 | exit(0) ; 85 | 86 | } else { 87 | 88 | cout << "Warning, no exploreType: " << explorationType << endl ; 89 | exit(0) ; 90 | 91 | } 92 | 93 | action->continuous = false ; 94 | action->discrete = true ; 95 | 96 | } 97 | void StateActionAlgorithm::egreedy( State * state, Action * action, double epsilon ) { 98 | 99 | if ( drand48() < epsilon ) { 100 | 101 | getMaxAction( state, action ) ; 102 | 103 | } else { 104 | 105 | getRandomAction( state, action ) ; 106 | 107 | } 108 | 109 | } 110 | 111 | void StateActionAlgorithm::boltzmann( State * state, Action * action, double tau ) { 112 | 113 | setQs( state, action ) ; 114 | 115 | boltzmann( action, tau ) ; 116 | 117 | } 118 | 119 | void StateActionAlgorithm::setQs( State * state, Action * action ) { 120 | 121 | if ( state->discrete ) { 122 | 123 | Qs = Q[state->discreteState] ; 124 | 125 | } else { 126 | 127 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 128 | Qs[a] = QNN[a]->forwardPropagate( state->continuousState )[0] ; 129 | } 130 | 131 | } 132 | 133 | } 134 | 135 | 136 | void StateActionAlgorithm::boltzmann( Action * action, double tau ) { 137 | 138 | double sumQs = 0 ; 139 | double maxQs = max( Qs, numberOfActions ) ; 140 | 141 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 142 | policy[a] = exp( (Qs[a]-maxQs)/tau ) ; 143 | sumQs += policy[a] ; 144 | } 145 | 146 | for ( int a = 0 ; a < numberOfActions ; a++ ) { 147 | policy[a] /= sumQs ; 148 | } 149 | 150 | double rnd = drand48() ; 151 | 152 | double total = policy[0] ; 153 | int a = 0 ; 154 | 155 | while ( total < rnd ) { 156 | a++ ; 157 | total += policy[ a ] ; 158 | if ( a >= numberOfActions ) { 159 | // Something went wrong... 160 | cout << total << " " << rnd << endl ; 161 | for ( int _a = 0 ; _a < numberOfActions ; _a++ ) { 162 | cout << policy[_a] << " " ; 163 | } 164 | cout << endl ; 165 | for ( int _a = 0 ; _a < numberOfActions ; _a++ ) { 166 | cout << Qs[_a] << " " ; 167 | } 168 | cout << endl ; 169 | exit(0) ; 170 | } 171 | } 172 | 173 | action->discreteAction = a ; 174 | 175 | } 176 | 177 | bool StateActionAlgorithm::getContinuousStates() { 178 | return continuousStates ; 179 | } 180 | 181 | bool StateActionAlgorithm::getDiscreteStates() { 182 | return discreteStates ; 183 | } 184 | 185 | bool StateActionAlgorithm::getContinuousActions() { 186 | return continuousActions ; 187 | } 188 | 189 | bool StateActionAlgorithm::getDiscreteActions() { 190 | return discreteActions ; 191 | } 192 | 193 | #endif //QALGORITHM 194 | -------------------------------------------------------------------------------- /StateActionAlgorithm.h: -------------------------------------------------------------------------------- 1 | #ifndef QALGORITHM_H 2 | #define QALGORITHM_H 3 | 4 | # include 5 | # include 6 | # include 7 | # include "cNeuralNetwork.h" 8 | # include "Algorithm.h" 9 | # include "StateActionUtils.h" 10 | 11 | class StateActionAlgorithm : public Algorithm { 12 | public: 13 | StateActionAlgorithm() ; 14 | virtual ~StateActionAlgorithm() {} 15 | void getMaxAction( State * state, Action * action ) ; 16 | void explore( State * state, Action * action, double explorationRate, string explorationType, bool endOfEpisode ) ; 17 | bool getContinuousStates() ; 18 | bool getDiscreteStates() ; 19 | bool getContinuousActions() ; 20 | bool getDiscreteActions() ; 21 | 22 | protected: 23 | void getMaxActionFirst( State *, Action * action ) ; 24 | void getMaxActionRandom( State *, Action * action ) ; 25 | void getRandomAction( State * state, Action * action ) ; 26 | 27 | void setQs( State * state, Action * action ) ; 28 | 29 | void egreedy( State * state, Action * action, double epsilon ) ; 30 | void boltzmann( State * state, Action * action, double tau ) ; 31 | void boltzmann( Action * action, double tau ) ; 32 | void gaussian( State * state, Action * action, double tau ) ; 33 | 34 | int nHiddenQ ; 35 | double ** Q ; 36 | vector< cNeuralNetwork * > QNN ; 37 | double * Qs ; 38 | double * QTarget ; 39 | double * policy ; 40 | }; 41 | 42 | #endif //QALGORITHM_H 43 | -------------------------------------------------------------------------------- /StateActionUtils.cpp: -------------------------------------------------------------------------------- 1 | #ifndef STATEACTIONUTILS 2 | #define STATEACTIONUTILS 3 | 4 | # include "StateActionUtils.h" 5 | 6 | 7 | void copyAction( Action * aFROM, Action * aTO ) { 8 | 9 | if ( aFROM->discrete and aTO->discrete ) { 10 | 11 | aTO->discreteAction = aFROM->discreteAction ; 12 | 13 | } 14 | 15 | if ( aFROM->continuous and aTO->continuous and ( aFROM->actionDimension == aTO->actionDimension ) ) { 16 | 17 | for ( int a = 0 ; a < aFROM->actionDimension ; a++ ) { 18 | 19 | aTO->continuousAction[a] = aFROM->continuousAction[a] ; 20 | 21 | } 22 | 23 | } 24 | 25 | } 26 | 27 | void copyState( State * sFROM, State * sTO ) { 28 | 29 | if ( sFROM->discrete and sTO->discrete ) { 30 | 31 | sTO->discreteState = sFROM->discreteState ; 32 | 33 | } 34 | 35 | if ( sFROM->continuous and sTO->continuous and ( sFROM->stateDimension == sTO->stateDimension ) ) { 36 | 37 | for ( int s = 0 ; s < sFROM->stateDimension ; s++ ) { 38 | 39 | sTO->continuousState[s] = sFROM->continuousState[s] ; 40 | 41 | } 42 | 43 | } 44 | 45 | } 46 | 47 | #endif //STATEACTIONUTILS 48 | -------------------------------------------------------------------------------- /StateActionUtils.h: -------------------------------------------------------------------------------- 1 | #ifndef STATEACTIONUTILS_H 2 | #define STATEACTIONUTILS_H 3 | 4 | # include "State.h" 5 | # include "Action.h" 6 | # include 7 | 8 | void copyState( State * sFROM, State * sTO ) ; 9 | void copyAction( Action * aFROM, Action * aTO ) ; 10 | 11 | #endif //STATEACTIONUTILS_H 12 | -------------------------------------------------------------------------------- /World.h: -------------------------------------------------------------------------------- 1 | #ifndef WORLD_H 2 | #define WORLD_H 3 | 4 | # include "State.h" 5 | # include "Action.h" 6 | 7 | class World { 8 | public: 9 | World() { 10 | continuousStates = false ; 11 | discreteStates = false ; 12 | continuousActions = false ; 13 | discreteActions = false ; 14 | } 15 | virtual ~World() { } 16 | virtual bool getContinuousStates() { return continuousStates ; } 17 | virtual bool getDiscreteStates() { return discreteStates ; } 18 | virtual int getStateDimension() { return stateDimension ; } 19 | virtual int getNumberOfStates() { return numberOfStates ; } 20 | virtual bool getContinuousActions() { return continuousActions ; } 21 | virtual bool getDiscreteActions() { return discreteActions ; } 22 | virtual int getActionDimension() { return actionDimension ; } 23 | virtual int getNumberOfActions() { return numberOfActions ; } 24 | virtual void reset() {} 25 | virtual bool endOfEpisode() { return false ; } 26 | 27 | virtual double act( Action * ) =0; 28 | 29 | virtual const char * getName() =0; 30 | virtual void getState( State * ) =0 ; 31 | virtual void setState( State * ) =0 ; 32 | 33 | protected: 34 | bool discreteStates, discreteActions, continuousStates, continuousActions ; 35 | int stateDimension, actionDimension, numberOfStates, numberOfActions ; 36 | 37 | }; 38 | 39 | #endif //WORLD_H 40 | -------------------------------------------------------------------------------- /cFunction.h: -------------------------------------------------------------------------------- 1 | #ifndef CFUNCTION_H 2 | #define CFUNCTION_H 3 | 4 | #include 5 | using namespace std; 6 | 7 | 8 | class cFunction { 9 | public: 10 | cFunction() {} 11 | virtual ~cFunction() {} 12 | virtual int printName() =0; 13 | virtual double output( double input ) =0; 14 | virtual void output( double * input, double * output, int n ) =0; 15 | virtual double derivative( double input, double output ) =0; 16 | }; 17 | 18 | #endif //CFUNCTION_H 19 | -------------------------------------------------------------------------------- /cLinear.cpp: -------------------------------------------------------------------------------- 1 | #ifndef CLINEAR 2 | #define CLINEAR 3 | 4 | #include "cLinear.h" 5 | using namespace std; 6 | 7 | cLinear::cLinear() : cFunction() { 8 | //~ cout << "constructor Linear\n" ; 9 | } 10 | 11 | cLinear::~cLinear() { 12 | //~ cout << "destructor Linear\n" ; 13 | } 14 | 15 | int cLinear::printName() { 16 | cout << "cLinear\n" ; 17 | return 0 ; 18 | } 19 | 20 | double cLinear::output( double input ) { 21 | return input ; 22 | 23 | } 24 | 25 | void cLinear::output( double * inp, double * outp, int n) { 26 | for ( int i = 0 ; i < n ; i++ ) { 27 | outp[i] = inp[i] ; 28 | } 29 | } 30 | 31 | double cLinear::derivative( double input, double output ) { 32 | return 1.0 ; 33 | } 34 | 35 | void derivative( double * input, double * output, double * derivative, int n ) { 36 | for ( int i = 0 ; i < n ; i++ ) { 37 | derivative[i] = 1.0 ; 38 | } 39 | } 40 | 41 | 42 | #endif //CLINEAR 43 | -------------------------------------------------------------------------------- /cLinear.h: -------------------------------------------------------------------------------- 1 | #ifndef CLINEAR_H 2 | #define CLINEAR_H 3 | 4 | #include "cFunction.h" 5 | using namespace std; 6 | 7 | class cLinear : public cFunction { 8 | public: 9 | cLinear() ; 10 | ~cLinear() ; 11 | int printName() ; 12 | double output( double input ); 13 | void output( double * input, double * output, int n ); 14 | double derivative( double input, double output ); 15 | void derivative( double * input, double * output, double * derivative, int n ); 16 | }; 17 | 18 | #endif //CLINEAR_H 19 | -------------------------------------------------------------------------------- /cNeuralNetwork.cpp: -------------------------------------------------------------------------------- 1 | #ifndef CNEURALNETWORK 2 | #define CNEURALNETWORK 3 | #define THRESHOLD 2 4 | #define TANH 1 5 | #define LINEAR 0 6 | # include 7 | # include 8 | # include 9 | # include "cNeuralNetwork.h" 10 | # include "cFunction.h" 11 | # include "cLinear.cpp" 12 | # include "cThreshold.cpp" 13 | # include "cTanH.cpp" 14 | # include "Matrix.cpp" 15 | 16 | using namespace std ; 17 | 18 | cNeuralNetwork::cNeuralNetwork( const char * nnFile ) { 19 | SIZEOFDOUBLE = sizeof( double ) ; 20 | readNetwork( nnFile ) ; 21 | } 22 | 23 | cNeuralNetwork::cNeuralNetwork( int nLayersInit, int * layerSizeInit ) { 24 | int * layerFunctions = new int[ nLayersInit + 2 ] ; 25 | for ( int l = 1 ; l < nLayersInit + 1 ; l++ ) { 26 | layerFunctions[ l ] = TANH ; 27 | } 28 | layerFunctions[ 0 ] = LINEAR ; 29 | layerFunctions[ nLayersInit + 1 ] = LINEAR ; 30 | 31 | init( nLayersInit, layerSizeInit, layerFunctions ) ; 32 | 33 | delete [] layerFunctions ; 34 | 35 | } 36 | 37 | cNeuralNetwork::cNeuralNetwork( int nLayersInit, int * layerSizeInit, int * layerFunctionInit ) { 38 | // int nLayersInit : the number of hidden layers (So for input, hidden, output -> 1) 39 | // int * layerSizeInit : the number of nodes per layer (# nodes layers = # weights layers + 1) 40 | // (size should be nLayersInit + 2) 41 | // int * layerFunctionInit : code, specifying the function per (node) layer 42 | 43 | //~ cout << "constructor NN L\n" ; 44 | init( nLayersInit, layerSizeInit, layerFunctionInit ) ; 45 | } 46 | 47 | void cNeuralNetwork::init( int nLayersInit, int * layerSizeInit, int * layerFunctionInit ) { 48 | srand48(time(0)); 49 | 50 | SIZEOFDOUBLE = sizeof( double ) ; 51 | 52 | nLayers = nLayersInit + 2 ; // # node layers == # hidden layers + 2 53 | nInput = layerSizeInit[ 0 ] ; 54 | nOutput = layerSizeInit[ nLayers - 1 ] ; 55 | 56 | //~ cout << "nInput" << nInput << endl ; 57 | //~ cout << "nOutput" << nOutput << endl ; 58 | //~ cout << "nLayers" << nLayers << endl ; 59 | 60 | layerFunction = new cFunction*[ nLayers ] ; 61 | layerFunctionInts = new int[ nLayers ] ; 62 | weights = new Matrix*[ nLayers - 1 ] ; // # weights layers = # node layers - 1 63 | layerIn = new Matrix*[ nLayers ] ; 64 | layerOut = new Matrix*[ nLayers ] ; 65 | layerSize = new int[ nLayers ] ; 66 | 67 | for( l = 0 ; l < ( nLayers - 1 ) ; l++ ) { 68 | weights[ l ] = new Matrix( layerSizeInit[ l ] + 1, layerSizeInit[ l + 1 ] ) ; // layerSize[ l ] + 1, for bias 69 | } 70 | for( l = 0 ; l < nLayers ; l++ ) { 71 | layerSize[ l ] = layerSizeInit[ l ] ; 72 | layerIn[ l ] = new Matrix( layerSize[ l ] ) ; 73 | layerOut[ l ] = new Matrix( layerSize[ l ] ) ; 74 | 75 | layerFunctionInts[ l ] = layerFunctionInit[ l ] ; 76 | if ( layerFunctionInit[ l ] == TANH ) { 77 | layerFunction[ l ] = new cTanH() ; 78 | } else if ( layerFunctionInit[ l ] == LINEAR ) { 79 | layerFunction[ l ] = new cLinear() ; 80 | } else { 81 | cout << "WARNING: Unknown layer function type: " ; 82 | cout << layerFunctionInit[ l ] << '\n' ; 83 | cout << "layer: " << l << '\n' ; 84 | exit(1) ; 85 | } 86 | } 87 | 88 | // Initialise the weights randomly between -0.3 and 0.3 89 | // randomizeWeights( -0.3, 0.3 ) ; 90 | randomizeWeights( -0.03, 0.03 ) ; 91 | recentlyUpdated = true ; 92 | } 93 | 94 | cNeuralNetwork::~cNeuralNetwork( ) { 95 | //~ cout << "destructor NN\n" ; 96 | for( l = 0 ; l < nLayers - 1 ; l++ ) { 97 | delete weights[ l ] ; 98 | } 99 | for( l = 0 ; l < nLayers ; l++ ) { 100 | delete layerIn[ l ] ; 101 | delete layerOut[ l ] ; 102 | delete layerFunction[ l ] ; 103 | } 104 | delete [] layerIn ; 105 | delete [] layerOut ; 106 | delete [] layerFunction ; 107 | delete [] layerFunctionInts ; 108 | delete [] layerSize ; 109 | delete [] weights ; 110 | } 111 | 112 | void cNeuralNetwork::changeFunction( int layer, int function ) { 113 | delete layerFunction[ layer ] ; 114 | 115 | if ( function == TANH ) { 116 | 117 | layerFunction[ layer ] = new cTanH() ; 118 | 119 | } else if ( function == LINEAR ) { 120 | 121 | layerFunction[ layer ] = new cLinear() ; 122 | 123 | } else if ( function == THRESHOLD ) { 124 | 125 | layerFunction[ layer ] = new cThreshold() ; 126 | 127 | } 128 | 129 | recentlyUpdated = true ; 130 | } 131 | 132 | void cNeuralNetwork::_forwardPropLayer( int layer ) { 133 | 134 | w = weights[ layer ]->getPtr() ; // The weights of the current layer 135 | 136 | nFormer = layerSize[ layer ] ; // # nodes in the former nodes layer 137 | nNext = layerSize[ layer + 1 ] ; // # nodes in the next nodes layer 138 | 139 | formerOut = layerOut[ layer ]->getPtr() ; // The output of the former layer 140 | nextIn = layerIn[ layer + 1 ]->getPtr() ; // The input of the next layer (to be set) 141 | nextOut = layerOut[ layer + 1 ]->getPtr() ; // The output of the next layer (follows from the input and function) 142 | 143 | cFunction * f = layerFunction[layer+1] ; // The function on the next layer 144 | 145 | // Initialise inputs to the next layer to zero 146 | for( o = 0 ; o < nNext ; o++ ) { 147 | nextIn[ o ] = 0.0 ; 148 | } 149 | 150 | // initialise counter for the weights 151 | int wPos = 0 ; 152 | // add weighted inputs 153 | for( i = 0 ; i < nFormer ; i++ ) { 154 | if ( formerOut[ i ] != 0.0) { 155 | for( o = 0 ; o < nNext ; o++ ) { 156 | nextIn[ o ] += w[ wPos ] * formerOut[ i ] ; 157 | wPos++ ; 158 | } 159 | } else { 160 | wPos += nNext ; 161 | } 162 | } 163 | // add bias and calculate output 164 | for( o = 0 ; o < nNext ; o++ ) { 165 | nextIn[ o ] += w[ wPos ] ; 166 | wPos++ ; 167 | } 168 | f->output( nextIn, nextOut, nNext ) ; 169 | } 170 | 171 | double * cNeuralNetwork::_backPropLayer( int layer, double * oError, double learningSpeed ) { 172 | 173 | w = weights[ layer ]->getPtr() ; // The weights of the current layer 174 | 175 | nFormer = layerSize[ layer ] ; // # nodes in the former nodes layer 176 | nNext = layerSize[ layer + 1 ] ; // # nodes in the next nodes layer 177 | 178 | formerOut = layerOut[ layer ]->getPtr() ; // The output of the former layer 179 | formerIn = layerIn[ layer ]->getPtr() ; // The input of the former layer 180 | 181 | cFunction * f = layerFunction[ layer ] ; // Function of the former layer 182 | 183 | wPos = 0 ; 184 | 185 | iError = new double[ nFormer ] ; // Error of the input of the former layer 186 | 187 | if ( layer > 0 ) { 188 | 189 | for( i = 0 ; i < nFormer ; i++ ) { 190 | 191 | iError[ i ] = 0.0 ; 192 | 193 | if ( formerOut[ i ] != 0.0 ) { 194 | 195 | for ( o = 0 ; o < nNext ; o++ ) { 196 | 197 | // First get error: 198 | iError[ i ] += w[ wPos ] * oError[ o ] ; 199 | 200 | // Then update the weight: 201 | w[ wPos ] += formerOut[ i ] * oError[ o ] ; 202 | 203 | wPos++ ; 204 | 205 | } 206 | 207 | } else { 208 | 209 | for ( o = 0 ; o < nNext ; o++ ) { 210 | 211 | // Only get error: 212 | iError[ i ] += w[ wPos ] * oError[ o ] ; 213 | 214 | wPos++ ; 215 | 216 | } 217 | 218 | } 219 | // Pass the error through the layers function: 220 | iError[ i ] *= f->derivative( formerIn[ i ], formerOut[ i ] ) ; 221 | 222 | } 223 | 224 | } else { 225 | 226 | for( i = 0 ; i < nFormer ; i++ ) { 227 | 228 | if ( formerOut[ i ] != 0.0 ) { 229 | 230 | for ( o = 0 ; o < nNext ; o++ ) { 231 | 232 | // Only update weight: 233 | w[ wPos ] += formerOut[ i ] * oError[ o ] ; 234 | 235 | wPos++ ; 236 | 237 | } 238 | 239 | } else { 240 | 241 | wPos += nNext ; 242 | 243 | } 244 | 245 | } 246 | } 247 | 248 | for ( o = 0 ; o < nNext ; o++ ) { 249 | 250 | // Update the bias 251 | w[ wPos ] += oError[ o ] ; 252 | 253 | wPos++ ; 254 | 255 | } 256 | 257 | 258 | 259 | return iError ; 260 | } 261 | 262 | void cNeuralNetwork::backPropagate( double *input, double *target, double learningSpeed ) { 263 | //If the network has been adapted after the last forward propagation, 264 | //forwardPropagate first to correctly set the hidden activations: 265 | if ( recentlyUpdated ) { 266 | forwardPropagate( input ) ; 267 | } else { 268 | //Check whether the activations of the layers correspond with the present input 269 | inputIn = layerIn[ 0 ]->getPtr() ; 270 | bool useLastActivations = true ; 271 | for ( i = 0 ; ( i < nInput ) & useLastActivations ; i++ ){ 272 | if ( input[i] != inputIn[i] ) { 273 | useLastActivations = false ; 274 | } 275 | } 276 | if ( !useLastActivations ) { 277 | //If the activations don't correspond to the last input (and the network 278 | //has been adapted in the meantime) set the activations by a forward 279 | //propagation 280 | forwardPropagate( input ) ; 281 | } 282 | } 283 | error = new double[ nOutput ] ; //error of the output layer. 284 | outputOut = layerOut[ nLayers - 1 ]->getPtr() ;// Output of the output layer 285 | 286 | for( o = 0 ; o < nOutput ; o++) { 287 | error[ o ] = target[ o ] - outputOut[ o ] ; 288 | } 289 | //backPropagate the error 290 | backPropagateError( input, error, learningSpeed ) ; 291 | delete[] error ; 292 | } 293 | 294 | void cNeuralNetwork::backPropagateError( double *input, double *error, double learningSpeed ) { 295 | //If the network has been adapted after the last forward propagation, 296 | //forwardPropagate first to correctly set the hidden activations: 297 | if ( recentlyUpdated ) { 298 | forwardPropagate( input ) ; 299 | } else { 300 | //Check whether the activations of the layers correspond with the present input 301 | inputIn = layerIn[ 0 ]->getPtr() ; 302 | bool useLastActivations = true ; 303 | for ( i = 0 ; ( i < nInput ) & useLastActivations ; i++ ){ 304 | if ( input[i] != inputIn[i] ) { 305 | useLastActivations = false ; 306 | } 307 | } 308 | if ( !useLastActivations ) { 309 | //If the activations don't correspond to the last input (and the network 310 | //has been adapted in the meantime) set the activations by a forward 311 | //propagation 312 | forwardPropagate( input ) ; 313 | } 314 | } 315 | oError = new double[ nOutput ] ; //error of the output layer. 316 | 317 | outputIn = layerIn[ nLayers - 1 ]->getPtr() ; // Input of the output layer 318 | outputOut = layerOut[ nLayers - 1 ]->getPtr() ;// Output of the output layer 319 | cFunction * f = layerFunction[ nLayers - 1 ] ; // Function of the output layer 320 | 321 | //First calculate the error of the input of the output layer: 322 | for( o = 0 ; o < nOutput ; o++) { 323 | oError[ o ] = learningSpeed*error[ o ] ; 324 | oError[ o ] *= f->derivative( outputIn[ o ], outputOut[ o ] ) ; 325 | } 326 | 327 | //Now propagate until reaching the input layer: 328 | for ( l = nLayers - 2 ; l >= 0 ; l-- ) { 329 | iError = _backPropLayer( l, oError, learningSpeed ) ; 330 | delete [] oError ; 331 | oError = iError ; 332 | } 333 | 334 | delete [] iError ; 335 | 336 | recentlyUpdated = true ; 337 | } 338 | 339 | 340 | 341 | double * cNeuralNetwork::forwardPropagate( double *input ) { 342 | cFunction * f = layerFunction[ 0 ] ; // Function of the input layer 343 | 344 | double * inputIn = layerIn[ 0 ]->getPtr() ; // Input of the first layer (to be set) 345 | double * inputOut = layerOut[ 0 ]->getPtr() ; // Output of the first layer (to be set) 346 | 347 | //First set the first layer 348 | for( i = 0 ; i < nInput ; i++ ) { 349 | inputIn[ i ] = input[ i ] ; 350 | } 351 | 352 | f->output( input, inputOut, nInput ) ; 353 | 354 | //Now propagate (sets layerIn and layerOut for each layer) 355 | for ( l = 0 ; l < (nLayers - 1) ; l++ ) { 356 | _forwardPropLayer( l ) ; 357 | } 358 | 359 | //Set recentlyUpdated to false to show that the activations where set after the last change to the network 360 | recentlyUpdated = false ; 361 | 362 | return layerOut[ nLayers - 1 ]->getPtr() ; // Output of the last layer 363 | 364 | } 365 | 366 | double * cNeuralNetwork::forwardPropagate( vector *input_vector ) { 367 | if ( (int) input_vector->size() != nInput ) { 368 | cerr << "Vector input is incorrect size: " << input_vector->size() << " instead of " << nInput << endl ; 369 | throw -1 ; 370 | } 371 | 372 | double * input = new double[ nInput ] ; 373 | for ( i = 0 ; i < nInput ; i++ ) { 374 | input[ i ] = input_vector->at( i ) ; 375 | } 376 | 377 | double * output = forwardPropagate( input ) ; 378 | 379 | delete [] input ; 380 | 381 | return output ; 382 | } 383 | 384 | 385 | void cNeuralNetwork::forwardPropagate( double *input, List * output ) { 386 | cFunction * f = layerFunction[ 0 ] ; // Function of the input layer 387 | 388 | double * inputIn = layerIn[ 0 ]->getPtr() ; // Input of the first layer (to be set) 389 | double * inputOut = layerOut[ 0 ]->getPtr() ; // Output of the first layer (to be set) 390 | 391 | //~ cout << "nInput" << nInput << endl ; 392 | //~ cout << "nOutput" << nOutput << endl ; 393 | //~ cout << "nLayers" << nLayers << endl ; 394 | 395 | //First set the first layer 396 | for( i = 0 ; i < nInput ; i++ ) { 397 | inputIn[ i ] = input[ i ] ; 398 | } 399 | 400 | f->output( input, inputOut, nInput ) ; 401 | 402 | //Now propagate (sets layerIn and layerOut for each layer) 403 | for ( l = 0 ; l < (nLayers - 1) ; l++ ) { 404 | _forwardPropLayer( l ) ; 405 | } 406 | 407 | double * outputOut = layerOut[ nLayers - 1 ]->getPtr() ; // Output of the last layer 408 | 409 | //Finally, return the output of the last layer through the argument 410 | output->contents = new double[ nOutput ] ; 411 | for ( o = 0 ; o < nOutput ; o++ ) { 412 | output->contents[ o ] = outputOut[ o ] ; 413 | } 414 | output->length = nOutput ; 415 | //Set recentlyUpdated to false to show that the activations where set after the last change to the network 416 | recentlyUpdated = false ; 417 | } 418 | 419 | double cNeuralNetwork::getActivation( int layer, int node ) { 420 | return layerOut[ layer ]->get( node ) ; 421 | } 422 | 423 | void cNeuralNetwork::getActivations( int layer, List * activations ) { 424 | double * layerActivation = layerOut[ layer ]->getPtr() ; // Output of the requested layer 425 | 426 | //Return the output of the requested layer through the argument 427 | int nActivations = layerSize[ layer ] ; 428 | activations->contents = new double[ nActivations ] ; 429 | for ( o = 0 ; o < nActivations ; o++ ) { 430 | activations->contents[ o ] = layerActivation[ o ] ; 431 | } 432 | activations->length = nActivations ; 433 | } 434 | 435 | double * cNeuralNetwork::getActivations( int layer ) { 436 | return layerOut[ layer ]->getPtr() ; // Output of the requested layer 437 | } 438 | 439 | double cNeuralNetwork::getWeights( int layer, int i, int j ) { 440 | return weights[ layer ]->get( i*(layerSize[ layer + 1 ]) + j ) ; 441 | } 442 | 443 | void cNeuralNetwork::setWeights( int layer, int i, int j, double val ) { 444 | recentlyUpdated = true ; 445 | weights[ layer ]->set( i*(layerSize[ layer + 1 ]) + j, val ) ; 446 | } 447 | 448 | void cNeuralNetwork::randomizeWeights( double min, double max ) { 449 | for ( l = 0 ; l < ( nLayers - 1 ) ; l++ ) { 450 | for ( i = 0 ; i < ( layerSize[l] + 1 ) ; i++ ) { 451 | for ( o = 0 ; o < layerSize[ l + 1 ] ; o++ ) { 452 | setWeights( l, i, o, min + (max - min)*drand48() ) ; 453 | } 454 | } 455 | } 456 | } 457 | 458 | void cNeuralNetwork::randomizeWeights( double min, double max, int seed ) { 459 | srand( seed ) ; 460 | for ( l = 0 ; l < ( nLayers - 1 ) ; l++ ) { 461 | for ( i = 0 ; i < ( layerSize[l] + 1 ) ; i++ ) { 462 | for ( o = 0 ; o < layerSize[ l + 1 ] ; o++ ) { 463 | setWeights( l, i, o, min + (max - min)*drand48() ) ; 464 | } 465 | } 466 | } 467 | } 468 | 469 | void cNeuralNetwork::printNetwork() { 470 | for ( l = 0 ; l < (nLayers - 1) ; l++ ) { 471 | _printLayer( l ) ; 472 | } 473 | } 474 | 475 | void cNeuralNetwork::_printLayer( int layer ) { 476 | double * w = weights[ layer ]->getPtr() ; // The weights of the current layer 477 | cout << "layer " << layer ; 478 | 479 | int nFormer = layerSize[ layer ] ; // # nodes in the former nodes layer 480 | int nNext = layerSize[ layer + 1 ] ; // # nodes in the next nodes layer 481 | 482 | cout << " nFormer " << nFormer ; 483 | cout << " nNext " << nNext << endl ; 484 | 485 | int wPos = 0 ; 486 | for( i = 0 ; i < nFormer ; i++ ) { 487 | for( o = 0 ; o < nNext ; o++ ) { 488 | cout << w[ wPos ] << ", " ; 489 | wPos++ ; 490 | } 491 | cout << '\n' ; 492 | } 493 | // add bias and calculate output 494 | for( o = 0 ; o < nNext ; o++ ) { 495 | cout << " B " << w[ wPos ] << '\n' ; 496 | wPos++ ; 497 | } 498 | } 499 | 500 | int cNeuralNetwork::read_int(istream &is) { 501 | int result; 502 | char *s = (char *) &result; 503 | is.read(s, sizeof(result)); 504 | return result; 505 | } 506 | 507 | void cNeuralNetwork::write_int(ostream &is, int result) { 508 | char *s = (char *) &result; 509 | is.write(s, sizeof(result)); 510 | } 511 | 512 | double cNeuralNetwork::read_double(istream &is) { 513 | double result; 514 | char *s = (char *) &result; 515 | is.read(s, sizeof(result)); 516 | return result; 517 | } 518 | 519 | void cNeuralNetwork::write_double(ostream &is, double result) { 520 | char *s = (char *) &result; 521 | is.write(s, sizeof(result)); 522 | } 523 | 524 | void cNeuralNetwork::readNetwork( const char * file ) { 525 | ifstream ifile ; 526 | 527 | ifile.open( file, ifstream::in ) ; 528 | 529 | nLayers = read_int( ifile ) ; 530 | 531 | int * layerSizeInit = new int[nLayers] ; 532 | int * layerFunctionInit = new int[nLayers] ; 533 | 534 | for ( int l = 0 ; l < nLayers ; l++ ) { 535 | layerSizeInit[l] = read_int( ifile ) ; 536 | layerFunctionInit[l] = read_int( ifile ) ; 537 | } 538 | 539 | nInput = layerSizeInit[ 0 ] ; 540 | nOutput = layerSizeInit[ nLayers - 1 ] ; 541 | //~ cout << "nInput" << nInput << endl ; 542 | //~ cout << "nOutput" << nOutput << endl ; 543 | //~ cout << "nLayers" << nLayers << endl ; 544 | 545 | layerFunction = new cFunction*[ nLayers ] ; 546 | layerFunctionInts = new int[ nLayers ] ; 547 | weights = new Matrix*[ nLayers - 1 ] ; // # weights layers = # node layers - 1 548 | layerIn = new Matrix*[ nLayers ] ; 549 | layerOut = new Matrix*[ nLayers ] ; 550 | layerSize = new int[ nLayers ] ; 551 | 552 | for( int l = 0 ; l < ( nLayers - 1 ) ; l++ ) { 553 | weights[ l ] = new Matrix( layerSizeInit[ l ] + 1, layerSizeInit[ l + 1 ] ) ; // layerSize[ l ] + 1, for bias 554 | } 555 | for( int l = 0 ; l < nLayers ; l++ ) { 556 | layerSize[ l ] = layerSizeInit[ l ] ; 557 | layerIn[ l ] = new Matrix( layerSize[ l ] ) ; 558 | layerOut[ l ] = new Matrix( layerSize[ l ] ) ; 559 | 560 | layerFunctionInts[ l ] = layerFunctionInit[ l ] ; 561 | if ( layerFunctionInit[ l ] == TANH ) { 562 | layerFunction[ l ] = new cTanH() ; 563 | } else if ( layerFunctionInit[ l ] == LINEAR ) { 564 | layerFunction[ l ] = new cLinear() ; 565 | } else { 566 | cout << "WARNING: Unknown layer function type: " ; 567 | cout << layerFunctionInit[ l ] << '\n' ; 568 | cout << "layer: " << l << '\n' ; 569 | exit(1) ; 570 | } 571 | } 572 | // Get weights 573 | for ( int l = 0 ; l < ( nLayers - 1 ) ; l++ ) { 574 | for ( int i = 0 ; i < ( layerSize[l] + 1 ) ; i++ ) { 575 | for ( int o = 0 ; o < layerSize[ l + 1 ] ; o++ ) { 576 | setWeights( l, i, o, read_double( ifile ) ) ; 577 | } 578 | } 579 | } 580 | recentlyUpdated = true ; 581 | 582 | delete [] layerSizeInit ; 583 | delete [] layerFunctionInit ; 584 | } 585 | 586 | void cNeuralNetwork::writeNetwork( const char * file ) { 587 | ofstream ofile ; 588 | 589 | ofile.open( file, ofstream::out ) ; 590 | 591 | write_int( ofile, nLayers ) ; 592 | 593 | for ( int l = 0 ; l < nLayers ; l++ ) { 594 | write_int( ofile, layerSize[l] ) ; 595 | write_int( ofile, layerFunctionInts[l] ) ; 596 | } 597 | 598 | // Weights 599 | for ( int l = 0 ; l < ( nLayers - 1 ) ; l++ ) { 600 | for ( int i = 0 ; i < ( layerSize[l] + 1 ) ; i++ ) { 601 | for ( int o = 0 ; o < layerSize[ l + 1 ] ; o++ ) { 602 | write_double( ofile, getWeights( l, i, o ) ) ; 603 | } 604 | } 605 | } 606 | } 607 | 608 | #endif //CNEURALNETWORK 609 | 610 | -------------------------------------------------------------------------------- /cNeuralNetwork.h: -------------------------------------------------------------------------------- 1 | #ifndef CNEURALNETWORK_H 2 | #define CNEURALNETWORK_H 3 | # include 4 | # include 5 | # include 6 | # include "cFunction.h" 7 | # include "cTanH.h" 8 | # include "cLinear.h" 9 | # include "Matrix.h" 10 | # include "List.h" 11 | 12 | 13 | using namespace std; 14 | 15 | class cNeuralNetwork { 16 | public: 17 | cNeuralNetwork( const char * ); 18 | cNeuralNetwork( int, int * ); 19 | cNeuralNetwork( int, int *, int * ); 20 | ~cNeuralNetwork(); 21 | 22 | void init( int, int *, int * ) ; 23 | 24 | void forwardPropagate( double *, List * ); 25 | double * forwardPropagate( double * ); 26 | double * forwardPropagate( vector * ); 27 | 28 | void backPropagate( double *, double *, double ); 29 | void backPropagateError( double *, double *, double ); 30 | 31 | void changeFunction( int, int ) ; 32 | 33 | double getActivation( int, int ) ; 34 | void getActivations( int, List * ) ; 35 | double * getActivations( int ) ; 36 | double getWeights( int, int, int ) ; 37 | void setWeights( int, int, int, double ) ; 38 | void randomizeWeights( double, double ) ; 39 | void randomizeWeights( double, double, int ) ; 40 | void printNetwork() ; 41 | void readNetwork( const char * ) ; 42 | void writeNetwork( const char * ) ; 43 | private: 44 | int nLayers, nInput, nOutput, nFormer, nNext ; 45 | unsigned int wPos ; 46 | int * layerSize ; 47 | int * layerFunctionInts ; 48 | double * formerOut ; 49 | double * formerIn ; 50 | double * nextIn ; 51 | double * nextOut ; 52 | double * iError ; 53 | double * oError ; 54 | double * w ; 55 | double * inputIn ; 56 | double * inputOut ; 57 | double * outputIn ; 58 | double * outputOut ; 59 | double * error ; 60 | cFunction ** layerFunction ; 61 | Matrix ** weights ; 62 | Matrix ** layerIn ; 63 | Matrix ** layerOut ; 64 | int i, h, o, l ; //Indices 65 | int SIZEOFDOUBLE ; 66 | void _forwardPropLayer( int ) ; 67 | double * _backPropLayer( int layer, double * outError, double learningSpeed ) ; 68 | bool recentlyUpdated ; 69 | void _printLayer( int ) ; 70 | int read_int( istream & ) ; 71 | void write_int( ostream &, int ) ; 72 | double read_double( istream & ) ; 73 | void write_double( ostream &, double ) ; 74 | }; 75 | 76 | #endif // CNEURALNETWORK_H 77 | -------------------------------------------------------------------------------- /cTanH.cpp: -------------------------------------------------------------------------------- 1 | #ifndef CTANH 2 | #define CTANH 3 | 4 | using namespace std; 5 | //# include 6 | # include "cTanH.h" 7 | 8 | cTanH::cTanH() : cFunction() { 9 | //~ cout << "constructor TanH\n" ; 10 | } 11 | 12 | cTanH::~cTanH() { 13 | //~ cout << "destructor TanH\n" ; 14 | } 15 | 16 | int cTanH::printName() { 17 | cout << "cTanH\n" ; 18 | return 0; 19 | } 20 | 21 | double cTanH::output( double input ) { 22 | //~ //cout << "tanh in;" ; 23 | double output = -1 ; 24 | if (input > 1.92032) { 25 | output = 0.96016; 26 | } else if (input > 0.0) { 27 | output = -0.260373271*input*input + input; 28 | } else if (input > -1.92032) { 29 | output = 0.260373271*input*input + input; 30 | } else { 31 | output = -0.96016; 32 | } 33 | return output; 34 | //~ return tanh( input ) ; 35 | } 36 | 37 | void cTanH::output( double * input , double * output , int n ) { 38 | 39 | for ( int i = 0 ; i < n ; i++ ) { 40 | output[i] = -1 ; 41 | if (input[i] > 1.92032) { 42 | output[i] = 0.96016; 43 | } else if (input[i] > 0.0) { 44 | output[i] = -0.260373271*input[i]*input[i] + input[i]; 45 | } else if (input[i] > -1.92032) { 46 | output[i] = 0.260373271*input[i]*input[i] + input[i]; 47 | } else { 48 | output[i] = -0.96016; 49 | } 50 | } 51 | } 52 | 53 | double cTanH::derivative( double input, double output ) { 54 | return 1.0 - (output*output) ; 55 | } 56 | 57 | void cTanH::derivative( double * input, double * output, double * derivative, int n ) { 58 | for ( int i = 0 ; i < n ; i++ ) { 59 | derivative[i] = 1.0 - (output[i]*output[i]) ; 60 | } 61 | } 62 | 63 | #endif //CTANH 64 | -------------------------------------------------------------------------------- /cTanH.h: -------------------------------------------------------------------------------- 1 | #ifndef CTANH_H 2 | #define CTANH_H 3 | 4 | #include "cFunction.h" 5 | using namespace std; 6 | class cTanH : public cFunction { 7 | public: 8 | cTanH() ; 9 | ~cTanH() ; 10 | int printName() ; 11 | double output( double input ); 12 | void output( double * input, double * output, int n ); 13 | double derivative( double input, double output ); 14 | void derivative( double * input, double * output, double * derivative, int n ); 15 | }; 16 | 17 | #endif //CTANH_H 18 | -------------------------------------------------------------------------------- /cThreshold.cpp: -------------------------------------------------------------------------------- 1 | #ifndef CTHRESHOLD 2 | #define CTHRESHOLD 3 | 4 | #include "cThreshold.h" 5 | using namespace std; 6 | 7 | cThreshold::cThreshold() : cFunction() { 8 | //~ cout << "constructor Threshold\n" ; 9 | } 10 | 11 | cThreshold::~cThreshold() { 12 | //~ cout << "destructor Threshold\n" ; 13 | } 14 | 15 | int cThreshold::printName() { 16 | cout << "cThreshold\n" ; 17 | return 0 ; 18 | } 19 | 20 | double cThreshold::output( double input ) { 21 | if ( input > 0 ) { 22 | return 1.0 ; 23 | } else { 24 | return -1.0 ; 25 | } 26 | } 27 | 28 | void cThreshold::output( double * inp, double * outp, int n) { 29 | for ( int i = 0 ; i < n ; i++ ) { 30 | if ( inp[i] > 0 ) { 31 | outp[i] = 1.0 ; 32 | } else { 33 | outp[i] = -1.0 ; 34 | } 35 | } 36 | } 37 | 38 | double cThreshold::derivative( double input, double output ) { 39 | return 1.0 ; 40 | } 41 | 42 | 43 | #endif //CTHRESHOLD 44 | -------------------------------------------------------------------------------- /cThreshold.h: -------------------------------------------------------------------------------- 1 | #ifndef CTHRESHOLD_H 2 | #define CTHRESHOLD_H 3 | 4 | #include "cFunction.h" 5 | using namespace std; 6 | 7 | class cThreshold : public cFunction { 8 | public: 9 | cThreshold() ; 10 | ~cThreshold() ; 11 | int printName() ; 12 | double output( double input ); 13 | void output( double * input, double * output, int n ); 14 | double derivative( double input, double output ); 15 | }; 16 | 17 | #endif //CTHRESHOLD_H 18 | --------------------------------------------------------------------------------