├── .vscode └── settings.json ├── Agent.cpp ├── Makefile ├── NeuralNetwork ├── NeuralNet.cpp └── NeuralNet.hpp ├── Pong.cpp ├── README.md ├── main ├── model.txt ├── plot.py ├── reward.txt ├── test.cpp └── train.cpp /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | "fstream": "cpp" 4 | } 5 | } -------------------------------------------------------------------------------- /Agent.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "NeuralNetwork/NeuralNet.hpp" 6 | using namespace std; 7 | class ReplayMemory { 8 | private: 9 | vector>> mem; 10 | int capacity; 11 | public: 12 | vector current_state; 13 | int action; 14 | int reward; 15 | vector next_state; 16 | bool is_done; 17 | ReplayMemory () {} 18 | ReplayMemory (int capacity) { 19 | this->capacity = capacity; 20 | srand(time(NULL)); 21 | } 22 | void store (vector current_state, 23 | int action, 24 | int reward, 25 | vector next_state, 26 | bool is_done) { 27 | if (this->mem.size() == capacity) { 28 | this->mem.erase(this->mem.begin()); 29 | } 30 | 31 | vector> append_mem; 32 | append_mem.push_back(current_state); 33 | append_mem.push_back({(double)action}); 34 | append_mem.push_back({(double)reward}); 35 | append_mem.push_back(next_state); 36 | append_mem.push_back({(double)is_done}); 37 | this->mem.push_back(append_mem); 38 | } 39 | void random () { 40 | int random_num = rand() % this->mem.size(); 41 | vector> mem_read = this->mem[random_num]; 42 | this->current_state = mem_read[0]; 43 | this->action = (int)mem_read[1][0]; 44 | this->reward = (int)mem_read[2][0]; 45 | this->next_state = mem_read[3]; 46 | this->is_done = (bool)mem_read[4][0]; 47 | } 48 | }; 49 | 50 | class Agent { 51 | private: 52 | NeuralNet net = NeuralNet(); 53 | NeuralNet target_net = NeuralNet(); 54 | ReplayMemory mem = ReplayMemory(); 55 | int frameReachProb; 56 | int batches; 57 | int targetFreqUpdate; 58 | bool is_testing = false; 59 | public: 60 | vector last_prediction; 61 | int frames = 0; 62 | 63 | Agent (vector layout, string filename) { 64 | this->net = NeuralNet(layout, -1); // the learning rate doesn't matter 65 | this->net.open_params(filename); 66 | this->target_net = this->net; 67 | this->is_testing = true; 68 | } 69 | 70 | Agent (vector layout, double lr, int mem_capacity, int frameReachProb, int targetFreqUpdate, int batches) { 71 | this->mem = ReplayMemory(mem_capacity); 72 | this->net = NeuralNet(layout, lr); 73 | this->target_net = net; 74 | this->frameReachProb = frameReachProb; 75 | this->targetFreqUpdate = targetFreqUpdate; 76 | this->batches = batches; 77 | srand(time(NULL)); 78 | } 79 | int argmax (vector array) { 80 | int index = 0; 81 | double max_value = array[0]; 82 | for (int i = 1; i < array.size(); i++) { 83 | if (array[i] > max_value) { 84 | max_value = array[i]; 85 | index = i; 86 | } 87 | } 88 | return index; 89 | } 90 | 91 | int action (vector input) { 92 | if (!is_testing) { 93 | this->frames++; 94 | double probability; 95 | if (frames <= frameReachProb) { 96 | probability = (-0.95 / double(frameReachProb)) * frames + 1; 97 | } else { 98 | probability = 0.05; 99 | } 100 | bool isRandom = (rand() % 100) < (probability * 100); 101 | int action; 102 | if (isRandom) { 103 | action = rand() % 3; 104 | last_prediction = vector({-1, -1, -1}); 105 | } else { 106 | last_prediction = this->net.predict(input); 107 | action = argmax(last_prediction); 108 | } 109 | return action; 110 | } else { 111 | last_prediction = this->net.predict(input); 112 | return argmax(last_prediction); 113 | } 114 | } 115 | void store_mem (vector current_state, int action, int reward, vector next_state, bool is_done) { 116 | if (is_testing) throw invalid_argument("Cannot use this function while testing"); 117 | mem.store(current_state, action, reward, next_state, is_done); 118 | } 119 | double max (vector array) { 120 | double max_val = array[0]; 121 | for (int i = 1; i < array.size(); i++) { 122 | if (array[i] > max_val) { 123 | max_val = array[i]; 124 | } 125 | } 126 | return max_val; 127 | } 128 | void train () { 129 | if (is_testing) throw invalid_argument("Cannot use this function while testing"); 130 | // sample minibatch 131 | for (int i = 0; i < batches; i++) { 132 | mem.random(); 133 | vector current_state = mem.current_state; 134 | int action = mem.action; 135 | int reward = mem.reward; 136 | vector next_state = mem.next_state; 137 | bool is_done = mem.is_done; 138 | // train 139 | double y; 140 | if (is_done) { 141 | y = reward; 142 | } else { 143 | y = reward + (0.99 * max(this->target_net.predict(next_state))); 144 | } 145 | vector target = this->net.predict(current_state); 146 | target[action] = y; 147 | this->net.backprop(current_state, target); 148 | } 149 | if (frames % this->targetFreqUpdate == 0) { 150 | this->target_net = this->net; 151 | this->net.save_params("model.txt"); // yes im using .txt file dont bully me 152 | } 153 | } 154 | }; 155 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | train: 2 | @c++ -std=c++11 train.cpp NeuralNetwork/NeuralNet.cpp -o main 3 | @./main 4 | @python plot.py 5 | @rm main 6 | 7 | test: 8 | @c++ -std=c++11 test.cpp NeuralNetwork/NeuralNet.cpp -o main 9 | @./main -------------------------------------------------------------------------------- /NeuralNetwork/NeuralNet.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // NeuralNet.cpp 3 | // NeuralNet 4 | // 5 | // Created by Eshaan Arnav on 9/7/20. 6 | // Copyright © 2020 Eshaan. All rights reserved. 7 | // 8 | 9 | #include "NeuralNet.hpp" 10 | 11 | Layer :: Layer(int starting_node, int ending_node, double lr) { 12 | this->lr = lr; 13 | this->ending_node = ending_node; 14 | this->starting_node = starting_node; 15 | // random num 16 | srand(time(NULL)); 17 | // init weight 18 | this->weights = (double*)malloc(sizeof(double) * ending_node * starting_node); 19 | for (int i = 0; i < ending_node * starting_node; i++) { 20 | double random_num = (rand() % 1000000) / double(1000000) - 0.5; 21 | weights[i] = random_num; 22 | } 23 | // init bias 24 | this->bias = (double*)malloc(sizeof(double) * ending_node); 25 | for (int i = 0; i < ending_node; i++) { 26 | double random_num = (rand() % 1000000) / double(1000000) - 0.5; 27 | bias[i] = random_num; 28 | } 29 | } 30 | 31 | double Layer :: sigmoid (double x) { 32 | return 1 / (1 + pow(this->e, -x)); 33 | } 34 | 35 | vector Layer :: predict (vector input) { 36 | if (input.size() != this->starting_node) { 37 | throw invalid_argument("Input size not equal to input array. Expected " + to_string(this->starting_node) + " but got: " + to_string(input.size())); 38 | } 39 | 40 | vector output; 41 | // fill output with zero 42 | for (int i = 0; i < this->ending_node; i++) { 43 | output.push_back(0); 44 | } 45 | // weight 46 | for (int x = 0; x < this->ending_node; x++) { 47 | for (int i = 0; i < this->starting_node; i++) { 48 | output[x] += input[i] * weights[(i * this->ending_node) + x]; 49 | } 50 | } 51 | // bias & sigmoid 52 | for (int i = 0; i < this->ending_node; i++) { 53 | output[i] += bias[i]; // bias 54 | output[i] = this->sigmoid(output[i]); 55 | } 56 | return output; 57 | } 58 | 59 | 60 | vector Layer :: backprop(vector inputs, vector outputs, vector target) { 61 | vector return_array; 62 | if (inputs.size() != this->starting_node) throw invalid_argument("input size not valid"); 63 | if (outputs.size() != this->ending_node) throw invalid_argument("output size not valid"); 64 | if (target.size() != this->ending_node) throw invalid_argument("target size not valid"); 65 | 66 | for (int i = 0; i < starting_node; i++) { 67 | return_array.push_back(0); 68 | } 69 | for (int x = 0; x < this->ending_node; x++) { 70 | double output = outputs[x]; 71 | for (int i = 0; i < this->starting_node; i++) { 72 | double input = inputs[i]; 73 | 74 | // derivative of the weights 75 | double dir_w = input * output * (1 - output) * target[x]; 76 | // applying dir to weights 77 | weights[(i * this->ending_node) + x] -= this->lr * dir_w; 78 | 79 | // derivative of "first layer" (the derivative to be passed down to the next layer) 80 | double dir_a_l_1 = output * (1 - output) * target[x]; 81 | // multiply by weights 82 | dir_a_l_1 *= weights[(i * this->ending_node) + x]; 83 | // accumulate the derivative 84 | return_array[i] += dir_a_l_1; 85 | } 86 | 87 | // derivative of the bias 88 | double dir_b = output * (1 - output) * target[x]; 89 | // apply der to bias 90 | bias[x] -= this->lr * dir_b; 91 | } 92 | return return_array; 93 | } 94 | 95 | void Layer :: free_vars() { 96 | free(this->weights); 97 | free(this->bias); 98 | } 99 | 100 | NeuralNet :: NeuralNet () { 101 | 102 | } 103 | 104 | void split_string (string const &str, const char* delim, vector &out) { 105 | char *token = strtok(const_cast(str.c_str()), delim); 106 | while (token != nullptr) { 107 | out.push_back(stod(std::string(token))); 108 | token = strtok(nullptr, delim); 109 | } 110 | } 111 | 112 | void NeuralNet :: save_params (string filename) { 113 | ofstream file; 114 | file.open(filename); 115 | file << this->layout_size - 1 << endl; 116 | // iterate over the layers 117 | for (int i = 0; i < this->layout_size - 1; i++) { 118 | file << this->layers[i].starting_node << endl; 119 | file << this->layers[i].ending_node << endl; 120 | 121 | // iterate over the weights 122 | for (int weight_iter = 0; weight_iter < this->layers[i].starting_node * this->layers[i].ending_node; weight_iter++) { 123 | file << this->layers[i].weights[weight_iter] << " "; 124 | } 125 | file << endl; 126 | // iterate over the bias 127 | for (int bias_iter = 0; bias_iter < this->layers[i].ending_node; bias_iter++) { 128 | file << this->layers[i].bias[bias_iter] << " "; 129 | } 130 | file << endl; 131 | } 132 | file.close(); 133 | } 134 | 135 | void NeuralNet :: open_params (string filename) { 136 | // get contents of file 137 | int file_iter = -1; 138 | int layer_iter = 0; 139 | ifstream file; 140 | file.open(filename); 141 | string contents; 142 | while (getline(file, contents)) { 143 | // if beginning of file 144 | if (file_iter == -1) { 145 | if (stoi(contents)+1 != this->layout_size) { 146 | throw invalid_argument("file has different neural net architecture. Layout size is different."); 147 | } 148 | file_iter = 0; 149 | } 150 | // first input, stores starting node 151 | else if (file_iter == 0) { 152 | if (stoi(contents) != this->layers[layer_iter].starting_node) { 153 | throw invalid_argument("file has different neural net architecture. Starting node is different."); 154 | } 155 | file_iter = 1; 156 | } 157 | // second input, stores ending_node 158 | else if (file_iter == 1) { 159 | if (stoi(contents) != this->layers[layer_iter].ending_node) { 160 | throw invalid_argument("file has different neural net architecture. Ending node is different."); 161 | } 162 | file_iter = 2; 163 | } 164 | // third input, stores weights 165 | else if (file_iter == 2) { 166 | vector weight_array; 167 | split_string(contents, " ", weight_array); 168 | this->layers[layer_iter].weights = &weight_array[0]; 169 | file_iter = 3; 170 | } 171 | // fourth input, stores bias 172 | else if (file_iter == 3) { 173 | vector bias_array; 174 | split_string(contents, " ", bias_array); 175 | this->layers[layer_iter].bias = &bias_array[0]; 176 | file_iter = 0; 177 | layer_iter += 1; 178 | } 179 | } 180 | file.close(); 181 | 182 | } 183 | 184 | NeuralNet :: NeuralNet(vector layout, double lr) { 185 | this->lr = lr; 186 | this->layout_size = int(layout.size()); 187 | this->output_layout = layout[layout_size - 1]; 188 | // malloc layers 189 | layers = (Layer*)malloc(sizeof(Layer) * layout_size); 190 | // init layers 191 | for (int i = 0; i < this->layout_size - 1; i++) { 192 | layers[i] = Layer(layout[i], layout[i+1], this->lr); 193 | } 194 | if (this->layout_size == 0) throw invalid_argument("layout size 0"); 195 | } 196 | 197 | vector NeuralNet :: predict (vector input) { 198 | vector return_array = input; 199 | for (int i = 0; i < this->layout_size - 1; i++) { 200 | return_array = this->layers[i].predict(return_array); 201 | } 202 | return return_array; 203 | } 204 | 205 | double NeuralNet:: mean (vector array) { 206 | double return_num = 0; 207 | for (int i = 0; i < array.size(); i++) { 208 | return_num += array[i]; 209 | } 210 | return return_num / array.size(); 211 | } 212 | 213 | vector NeuralNet :: subtract (vector arrayOne, vector arrayTwo) { 214 | vector return_array; 215 | for (int i = 0; i < arrayOne.size(); i++) { 216 | return_array.push_back(arrayOne[i] - arrayTwo[i]); 217 | } 218 | return return_array; 219 | } 220 | 221 | vector NeuralNet:: square (vector arrayOne) { 222 | vector return_array; 223 | for (int i = 0; i < arrayOne.size(); i++) { 224 | return_array.push_back(pow(arrayOne[i], 2)); 225 | } 226 | return return_array; 227 | } 228 | 229 | vector NeuralNet:: multiply (vector arrayOne, double two) { 230 | vector return_arr; 231 | for (int i = 0; i < arrayOne.size(); i++) { 232 | return_arr.push_back(arrayOne[i] * two); 233 | } 234 | return return_arr; 235 | } 236 | 237 | double NeuralNet:: backprop (vector input, vector expected_output) { 238 | vector final_prediction = input; 239 | vector prediction_array[this->layout_size]; 240 | prediction_array[0] = input; 241 | for (int i = 0; i < this->layout_size - 1; i++) { 242 | final_prediction = this->layers[i].predict(final_prediction); 243 | prediction_array[i+1] = final_prediction; 244 | } 245 | // MSE 246 | vector error = multiply(subtract(final_prediction, expected_output),2); 247 | for (int i = this->layout_size - 2; i > -1; i--) { 248 | error = this->layers[i].backprop(prediction_array[i], prediction_array[i+1], error); 249 | } 250 | return mean(square(subtract(final_prediction, expected_output))); 251 | } 252 | 253 | // basically the same as NeuralNet::backprop except the error is the grad 254 | void NeuralNet:: apply_grad (vector input, vector grad) { 255 | // forward propagate 256 | vector output_array[this->layout_size]; 257 | output_array[0] = input; 258 | for (int i = 0; i < this->layout_size - 1; i++) { 259 | output_array[i+1] = this->layers[i].predict(output_array[i]); 260 | } 261 | // backward propagate 262 | vector error = grad; 263 | for (int i = this->layout_size - 2; i > -1; i--) { 264 | error = this->layers[i].backprop(output_array[i], output_array[i+1], error); 265 | } 266 | } -------------------------------------------------------------------------------- /NeuralNetwork/NeuralNet.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // NeuralNet.hpp 3 | // NeuralNet 4 | // 5 | // Created by Eshaan Arnav on 9/7/20. 6 | // Copyright © 2020 Eshaan. All rights reserved. 7 | // 8 | 9 | #ifndef NeuralNetwork_hpp 10 | #define NeuralNetwork_hpp 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | using namespace std; 20 | 21 | #endif /* NeuralNet_hpp */ 22 | 23 | class Layer { 24 | public: 25 | double* weights; 26 | double* bias; 27 | int starting_node; 28 | int ending_node; 29 | double e = 2.7182818; 30 | double lr; 31 | 32 | Layer (int starting_node, int ending_node, double lr); 33 | vector predict (vector input); 34 | vector backprop(vector inputs, vector outputs, vector target); 35 | 36 | void free_vars (); 37 | double sigmoid (double x); 38 | }; 39 | 40 | class NeuralNet { 41 | private: 42 | double mean (vector array); 43 | vector subtract (vector arrayOne, vector arrayTwo); 44 | vector square (vector arrayOne); 45 | vector multiply (vector arrayOne, double two); 46 | public: 47 | Layer *layers; 48 | int layout_size = 0; 49 | int output_layout; 50 | double lr; 51 | // class / vars initialization 52 | NeuralNet (vector layout, double lr); 53 | 54 | NeuralNet (); 55 | 56 | // Forward propagate 57 | vector predict (vector input); 58 | 59 | // Performs stochastic gradient descent and applies the gradient to the weights 60 | double backprop (vector input, vector expected_output); 61 | 62 | // If you can find the gradient of whatever function you have with respect to the output of the neural network, this function will figure out the derivative of the weights and bias and apply them } 63 | void apply_grad (vector input, vector grad); 64 | 65 | void save_params (string filename); 66 | 67 | void open_params (string filename); 68 | }; -------------------------------------------------------------------------------- /Pong.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | using namespace std; 8 | class Pong { 9 | private: 10 | int pongX; 11 | int pongY; 12 | int pongVelX; 13 | int pongVelY; 14 | int width; 15 | int height; 16 | int playerX = 0; 17 | int paddleWidth; 18 | public: 19 | int score = 0; 20 | int episodes = 0; 21 | bool is_done = false; 22 | void reset () { 23 | this->pongX = 0; 24 | this->pongY = 0; 25 | this->playerX = 0; 26 | bool isPongVelX = rand() % 2; 27 | bool isPongVelY = rand() % 2; 28 | if (isPongVelX) { 29 | this->pongVelX = 1; 30 | } else { 31 | this->pongVelX = -1; 32 | } 33 | if (isPongVelY) { 34 | this->pongVelY = 1; 35 | } else { 36 | this->pongVelY = -1; 37 | } 38 | episodes++; 39 | score = 0; 40 | } 41 | Pong (int width, int height, int paddleWidth = 1) { 42 | reset(); 43 | this->width = width; 44 | this->height = height; 45 | this->paddleWidth = paddleWidth; 46 | } 47 | // 0 : Left 48 | // 1 : Stay 49 | // 2 : Right 50 | int act (int action) { 51 | int reward = 0; 52 | is_done = false; 53 | if (action > 2) { 54 | throw invalid_argument("invalid action"); 55 | } 56 | if (action == 0 && this->playerX - this->paddleWidth != -width + 1) { 57 | this->playerX -= 1; 58 | } 59 | if (action == 2 && this->playerX + this->paddleWidth != width - 1) { 60 | this->playerX += 1; 61 | } 62 | if (action == 0 && this->playerX - this->paddleWidth == -width + 1) { 63 | reward = 0; 64 | } 65 | if (action == 2 && this->playerX + this->paddleWidth == width - 1) { 66 | reward = 0; 67 | } 68 | // update pong position 69 | this->pongX += this->pongVelX; 70 | this->pongY += this->pongVelY; 71 | // boundry 72 | if (pongX == -width + 1 && pongVelX < 0) { 73 | pongVelX *= -1; 74 | } 75 | if (pongX == width - 1 && pongVelX > 0) { 76 | pongVelX *= -1; 77 | } 78 | // hits player paddle 79 | if (abs(pongX - playerX) <= paddleWidth && pongY == -height + 2 && pongVelY < 0) { 80 | pongVelY *= -1; 81 | reward = 5; 82 | score++; 83 | } 84 | if (pongY == -height + 1) { 85 | reward = -10; 86 | is_done = true; 87 | this->reset(); 88 | } 89 | // hit enemy paddle 90 | if (pongY == height - 2 && pongVelY > 0) { 91 | pongVelY *= -1; 92 | } 93 | return reward; 94 | } 95 | int coordX (int x) { 96 | int widthOffset = this->width; 97 | return x + widthOffset; 98 | } 99 | int coordY (int y) { 100 | int heightOffset = this->height; 101 | return (y * -1) + heightOffset; 102 | } 103 | void print_state () { 104 | string a[(height * 2)+1][(width * 2)+1]; 105 | a[coordY(pongY)][coordX(pongX)] = "O"; 106 | a[coordY(height - 1)][coordX(pongX)] = "-"; 107 | a[coordY(-height + 1)][coordX(playerX)] = "-"; 108 | for (int i = 0; i <= this->paddleWidth; i++) { 109 | a[coordY(-height + 1)][coordX(playerX + i)] = "-"; 110 | a[coordY(-height + 1)][coordX(playerX - i)] = "-"; 111 | } 112 | for (int i = 0; i < (width * 2) + 3; i++) { 113 | cout<<"#"; 114 | } 115 | cout< return_big_state () { 133 | double a[(height * 2)+1][(width * 2)+1]; 134 | a[coordY(pongY)][coordX(pongX)] = 255; 135 | a[coordY(height - 0)][coordX(pongX)] = 100; 136 | a[coordY(-height + 1)][coordX(playerX)] = 100; 137 | vector state; 138 | for (int i = 0; i < (height * 2) + 1; i++) { 139 | for (int x = 0; x < (width * 2) + 1; x++) { 140 | state.push_back(a[i][x]); 141 | } 142 | } 143 | return state; 144 | } 145 | vector return_state () { 146 | vector state; 147 | state.push_back(pongX); 148 | state.push_back(pongY); 149 | state.push_back(pongVelX); 150 | state.push_back(pongVelY); 151 | state.push_back(width); 152 | state.push_back(height); 153 | state.push_back(playerX); 154 | state.push_back(paddleWidth); 155 | return state; 156 | } 157 | }; 158 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DQN 2 | My implementation of DQN. 3 | 4 | To test, simply run `make test` in this directory. To train, simply run `make train` in this directory. 5 | Email: eshaanbarkataki@gmail.com 6 | 7 | Video: https://youtu.be/NDrtKeFO38U 8 | -------------------------------------------------------------------------------- /main: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eshaancoding/DQN/ca022c3b8269cf6c8a5f1dc9043875f011e51120/main -------------------------------------------------------------------------------- /model.txt: -------------------------------------------------------------------------------- 1 | 2 2 | 8 3 | 50 4 | 0.307126 -0.126629 0.248805 0.237812 0.201558 -0.0856388 -0.358904 0.107981 0.304269 -0.173557 -0.350979 -0.0917283 -0.242868 -0.351931 0.40381 -0.44489 0.338332 -0.449317 0.018838 -0.490421 -0.0687647 0.381202 0.165018 -0.00337168 0.252811 0.393945 -0.385272 -0.0498808 0.288083 0.0625708 -0.34751 0.19254 0.393551 -0.457588 0.38073 0.12152 0.0594065 -0.453509 -0.26685 0.40171 0.423671 0.343228 0.179016 0.199992 -0.382127 0.218085 -0.192435 0.392089 -0.147976 0.115293 0.25101 0.349611 -0.29851 0.18718 -0.363605 -0.664946 -0.360653 0.597188 -0.0385892 -0.0784038 -0.0203299 -0.344043 -0.330767 0.161437 -0.0752496 -0.417714 -0.0362941 -0.18027 -0.317207 0.293649 0.022914 -0.617287 -0.579606 -0.0207907 0.029144 -0.341818 0.155161 0.566339 0.220844 -0.386077 0.103537 -0.473094 0.401136 0.524724 0.0951419 0.368077 -0.190364 0.350788 0.0235048 0.456237 0.32481 0.429128 -0.637424 0.478613 0.0958714 0.605135 0.705039 -0.535919 -0.0219827 -0.307588 -0.183916 0.107566 0.128537 -0.0693159 -0.370329 0.310521 0.0632282 -0.0921419 0.236846 -0.334511 -0.00579967 0.481511 0.190248 0.424843 -0.0503003 0.404803 -0.341281 0.468033 0.248046 -0.153934 0.425307 -0.453182 0.494212 0.0194459 0.388841 0.0424251 -0.249529 0.311645 -0.358287 -0.268023 0.3907 -0.0339957 0.362992 -0.492787 0.395019 -0.00586251 0.339672 0.471154 0.371912 0.0697029 0.139271 0.390602 -0.449947 -0.157989 0.326345 0.00369501 0.276568 0.0120228 -0.0707132 -0.425011 0.410534 0.482202 -0.589449 0.308631 -0.232186 0.361319 0.229665 -0.297646 -0.244706 -0.514091 0.397148 -0.426677 -0.253642 0.34271 -0.381144 0.348069 0.277146 0.489981 0.17335 0.159582 -0.444744 -0.100802 -0.0199931 -0.38108 -0.17405 0.379182 -0.411198 -0.298659 -0.061262 0.0441602 0.163849 -0.350746 0.421615 0.2119 0.00743659 0.350173 -0.507164 -0.071306 -0.0222672 -0.116432 0.446154 -0.007062 -0.0227495 0.395483 0.153875 -0.192142 0.298349 -0.163527 -0.28822 -0.573608 -0.403513 0.42913 -0.379606 -0.336725 -0.157288 -0.133358 0.0512874 0.183093 -0.365981 0.413694 0.126595 0.250191 0.109167 -0.0512167 0.0912215 0.234607 0.338177 -0.433865 0.048965 -0.100415 0.0326458 -0.164973 -0.161085 -0.437239 -0.0272674 0.459579 0.393272 0.323045 -0.1138 0.0252149 0.303603 -0.434239 -0.176687 0.371691 0.271893 0.183344 0.33085 -0.243368 0.412505 -0.489241 0.306047 -0.0247218 -0.24422 0.305446 -0.339819 0.310502 -0.0544908 -0.288067 -0.49658 -0.382517 0.276647 -0.24093 0.135923 -0.441467 -0.042121 -0.166113 0.0114596 0.140193 -0.327693 -0.0919918 0.46096 -0.159765 -0.102576 0.131803 -0.0376737 0.265366 -0.494562 0.0731674 -0.109176 0.477305 0.241906 -0.0840239 0.00293569 -0.41321 0.232416 0.44432 0.135157 -0.0534234 -0.176793 0.49805 0.393886 0.230586 -0.467507 -0.0598604 0.334555 0.0200626 -0.0790502 -0.268068 0.22755 -0.311722 -0.139074 0.0791024 -0.00838255 0.0421417 0.632033 -0.0922762 0.161785 0.118424 -0.301768 0.279832 0.382861 -0.049943 0.0891369 0.313648 0.103274 -0.311777 0.221461 -0.00175168 0.247842 0.113201 0.0134314 0.0800661 -0.281906 -0.394092 -0.0426525 -0.044234 -0.236529 -0.488346 -0.238464 0.39163 -0.263907 -0.0228377 0.470157 -0.113338 0.149889 0.344254 -0.0922071 -0.476223 -0.164956 0.0992082 0.314364 -0.303246 -0.435011 -0.130719 -0.443051 -0.372081 0.421169 -0.191034 0.141208 0.232294 -0.0716055 0.192982 0.305704 -0.336945 0.276017 -0.121793 -0.329987 0.052916 0.344657 0.370193 0.453098 0.102069 0.364518 0.427804 -0.0264144 -0.510297 -0.206824 0.39872 -0.488589 0.117874 -0.0164789 0.345997 -0.147585 -0.449979 0.253659 0.41574 -0.312226 -0.268148 0.214138 -0.12769 -0.00523146 0.126328 -0.434537 -0.205407 0.337314 -0.163345 -0.361001 0.0283506 0.380894 0.473542 -0.0800711 -0.140123 -0.426756 -0.223026 0.307819 0.049304 -0.222441 -0.408513 -0.0477303 0.311878 -0.257234 -0.0569745 -0.328847 -0.406485 0.0267781 0.419598 0.0121013 -0.31235 0.185915 -0.256188 5 | -0.399563 -0.301854 0.39696 0.174044 0.477387 -0.279259 -0.491703 -0.365577 0.239508 -0.0943313 0.106889 0.1599 0.25615 0.166439 -0.0256516 0.0662744 0.252097 -0.248528 -0.337762 0.222001 -0.422991 0.391263 -0.331174 -0.23892 0.0481437 -0.316543 0.218324 -0.410358 0.413214 -0.108471 -0.180086 -0.476816 0.0709572 0.225753 0.201014 0.050818 -0.0262729 -0.283845 -0.294162 -0.280209 0.138877 0.303609 -0.107031 0.393361 0.520132 0.362104 0.462481 -0.254673 -0.368312 0.158644 6 | 50 7 | 3 8 | 0.414719 -0.017116 0.476243 0.344408 0.399265 0.166117 -0.735082 -0.26202 -0.0518411 -0.150106 -0.358275 -0.0864257 -0.55981 -0.616685 0.163889 -0.852513 0.0263847 -0.866736 -0.0643916 -0.585684 -0.142488 0.65436 0.426644 0.177789 0.235425 0.397765 -0.386845 -0.335819 0.108176 -0.213773 -0.581107 0.0136555 0.167327 -0.82717 0.125919 -0.264669 -0.178796 -0.716511 -0.580599 0.247235 0.332504 0.140424 0.164859 0.00615249 -0.552883 0.0190522 -0.457 0.0975591 -0.258272 0.248959 -0.11853 0.209817 -0.322262 -0.0208425 -0.33838 -0.593057 -0.0424966 0.404167 -0.0993841 -0.119461 -0.188092 -0.531476 -0.0661353 -0.415958 -0.186859 -0.71525 -0.342695 -0.605461 -0.161756 0.228811 0.146195 -0.458833 -0.50521 -0.0884274 0.177824 -0.579945 0.098576 0.18863 0.00798734 -0.469792 0.171853 -0.388456 0.525807 0.526201 0.192602 0.226864 0.0667864 0.0841 -0.0104881 0.239378 0.0877448 0.0108136 -0.58966 0.0223682 -0.361852 -0.200525 0.446225 -0.414638 -0.00851323 0.14482 -0.0447374 0.271652 -0.0762383 -0.173561 -0.587382 0.326854 0.150753 -0.177298 -0.052223 -0.59146 -0.255742 0.454257 0.219964 0.40816 -0.267156 0.259272 -0.5382 0.453296 0.233886 -0.154149 0.553438 -0.328858 0.657628 0.0986781 0.541066 0.17684 -0.642235 0.0193824 -0.744923 -0.247755 0.498362 -0.0859763 0.296492 -0.59495 0.367687 0.157128 0.541059 0.615838 0.613866 0.254322 0.333709 0.0876428 -0.752033 -0.424977 0.312049 -0.0237831 0.273725 -0.331224 -0.334779 -0.740585 9 | 0.0903879 0.205413 -0.700831 10 | -------------------------------------------------------------------------------- /plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | # read file from reward.txt 4 | rewards = [] 5 | with open('reward.txt', 'r') as f: 6 | lines = f.readlines() 7 | for line in lines: 8 | line = line.strip() 9 | rewards.append(float(line)) 10 | 11 | plt.plot(rewards) 12 | plt.ylabel('Reward') 13 | plt.xlabel('Iteration') 14 | plt.show() -------------------------------------------------------------------------------- /reward.txt: -------------------------------------------------------------------------------- 1 | -10 2 | -10 3 | -10 4 | -10 5 | -10 6 | -10 7 | -10 8 | -2.5 9 | -10 10 | -10 11 | -10 12 | -10 13 | -10 14 | -10 15 | -10 16 | -10 17 | -10 18 | -10 19 | -2.5 20 | -10 21 | -10 22 | -10 23 | -10 24 | -10 25 | -10 26 | -10 27 | -2.5 28 | -10 29 | -2.5 30 | -10 31 | -2.5 32 | -10 33 | -10 34 | -10 35 | -10 36 | -2.5 37 | -10 38 | -2.5 39 | -10 40 | -2.5 41 | -10 42 | -10 43 | -10 44 | -10 45 | -10 46 | -10 47 | -10 48 | -10 49 | -10 50 | -10 51 | -2.5 52 | -10 53 | -10 54 | -10 55 | -10 56 | -10 57 | -10 58 | -10 59 | -10 60 | -10 61 | -10 62 | -10 63 | -2.5 64 | -10 65 | -10 66 | -10 67 | -2.5 68 | -10 69 | -10 70 | -2.5 71 | -10 72 | -10 73 | -10 74 | -2.5 75 | -10 76 | -10 77 | -10 78 | -10 79 | -10 80 | -10 81 | -10 82 | -10 83 | -2.5 84 | -10 85 | -2.5 86 | -10 87 | -10 88 | -2.5 89 | -2.5 90 | -2.5 91 | -10 92 | -10 93 | -10 94 | -10 95 | -10 96 | -10 97 | -10 98 | -10 99 | -10 100 | -2.5 101 | -10 102 | -10 103 | -10 104 | -10 105 | -10 106 | -10 107 | -10 108 | -10 109 | -2.5 110 | -10 111 | -10 112 | -10 113 | -10 114 | -10 115 | -10 116 | -10 117 | -10 118 | -2.5 119 | -2.5 120 | -2.5 121 | -10 122 | -10 123 | -10 124 | -10 125 | -10 126 | -10 127 | -10 128 | -2.5 129 | -10 130 | -2.5 131 | -10 132 | -10 133 | -10 134 | -10 135 | -2.5 136 | -10 137 | -2.5 138 | -10 139 | -10 140 | -10 141 | -10 142 | -10 143 | -2.5 144 | -10 145 | -10 146 | -10 147 | -10 148 | -10 149 | -10 150 | -2.5 151 | -10 152 | -10 153 | -2.5 154 | -2.5 155 | -2.5 156 | -2.5 157 | -2.5 158 | -10 159 | -10 160 | -2.5 161 | -2.5 162 | -2.5 163 | -10 164 | -10 165 | -10 166 | -10 167 | -10 168 | -2.5 169 | -2.5 170 | -2.5 171 | -10 172 | -10 173 | -10 174 | -2.5 175 | -10 176 | -10 177 | -10 178 | -10 179 | -10 180 | -10 181 | -10 182 | -10 183 | -10 184 | -10 185 | -2.5 186 | -10 187 | -2.5 188 | -10 189 | -2.5 190 | -10 191 | -10 192 | -10 193 | -2.5 194 | -10 195 | -10 196 | -10 197 | -10 198 | -10 199 | -10 200 | -10 201 | -10 202 | -2.5 203 | -10 204 | -2.5 205 | -10 206 | -2.5 207 | -2.5 208 | -10 209 | -10 210 | -10 211 | -10 212 | -10 213 | -10 214 | -10 215 | -10 216 | -10 217 | -10 218 | -10 219 | -10 220 | -10 221 | -10 222 | -10 223 | -10 224 | -10 225 | -2.5 226 | -2.5 227 | -10 228 | -10 229 | -------------------------------------------------------------------------------- /test.cpp: -------------------------------------------------------------------------------- 1 | #include "Agent.cpp" 2 | #include "Pong.cpp" 3 | #include 4 | #include 5 | using namespace std; 6 | 7 | void gotoxy(int x,int y) 8 | { 9 | printf("%c[%d;%df",0x1B,y,x); 10 | } 11 | 12 | int gameWidth = 7; 13 | int gameHeight = 12; // if you want to alter width or height try to play around with the reward system 14 | vector layout ({8, 50, 3}); // Neural Network layout 15 | // main 16 | int main () { 17 | Pong game = Pong(gameWidth, gameHeight); // PASS 18 | Agent agent = Agent(layout, "model.txt"); 19 | vector current_state = game.return_state(); 20 | int max_score = 0; 21 | system("clear"); 22 | while (true) { 23 | int action = agent.action(current_state); 24 | cout<<"Action: "< next_state = game.return_state(); 27 | // print 28 | gotoxy(0,0); 29 | game.print_state(); 30 | current_state = next_state; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /train.cpp: -------------------------------------------------------------------------------- 1 | #include "Agent.cpp" 2 | #include "Pong.cpp" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | using namespace std; 10 | 11 | void gotoxy(int x,int y) 12 | { 13 | printf("%c[%d;%df",0x1B,y,x); 14 | } 15 | 16 | // Parameters 17 | int BATCHES = 32; // number of training data trained per frame 18 | double LR = 0.001; // 0.001 Learning rate for the neural networks 19 | int MEM_CAP = 10000; // 1 000 000 replay mem capacity 20 | int FRAME_REACH = 10000; // 5 000 frames till epsilon reaches 0.05 21 | int TARGET_UPDATE = 5000; // 1000 frames for target net to update with net, and also saves neural network parameters every 5000 iter 22 | vector layout ({8, 50, 3}); // Neural Network layout 23 | int gameWidth = 7; 24 | int gameHeight = 12; // if you want to alter width or height try to play around with the reward system 25 | int episodes = 0; 26 | // main 27 | int main () { 28 | Pong game = Pong(gameWidth, gameHeight); // PASS 29 | Agent agent = Agent(layout, LR, MEM_CAP, FRAME_REACH, TARGET_UPDATE, BATCHES); 30 | vector current_state = game.return_state(); 31 | int max_score = 0; 32 | int reward_count = 0; 33 | float avg_reward = 0; 34 | ofstream ofs; 35 | ofs.open("reward.txt", std::ofstream::out | std::ofstream::trunc); // clear txt file 36 | ofs.close(); 37 | system("clear"); 38 | while (episodes < 20000) { 39 | int action = agent.action(current_state); 40 | int reward = game.act(action); 41 | vector next_state = game.return_state(); 42 | if (reward != 0) { 43 | avg_reward += reward; 44 | reward_count += 1; 45 | } 46 | if (game.is_done) { 47 | episodes += 1; 48 | // save reward to .txt 49 | if (avg_reward != 0) { 50 | avg_reward /= reward_count; 51 | ofstream myfile; 52 | myfile.open ("reward.txt", ios::app); 53 | myfile << avg_reward << endl; 54 | myfile.close(); 55 | } 56 | // show progress 57 | gotoxy(0,0); 58 | cout << "Episodes: " << episodes << " | Max Score: " << max_score << " | Avg Reward: " << avg_reward << " " << endl; 59 | avg_reward = 0; 60 | reward_count = 0; 61 | } 62 | 63 | // train 64 | if (game.score > max_score) { 65 | max_score = game.score; 66 | } 67 | agent.store_mem(current_state, action, reward, next_state, game.is_done); 68 | agent.train(); 69 | current_state = next_state; 70 | } 71 | } 72 | --------------------------------------------------------------------------------