├── clean_project.sh
├── env.sh
├── controllers
    ├── global_parameters.hpp
    ├── vpg_controller
    │   ├── vpg_parameters.hpp
    │   ├── vpg_controller.cpp
    │   ├── Makefile
    │   ├── vpg_arbitrator.hpp
    │   └── vpg_arbitrator.cpp
    └── khepera_test
    │   ├── khepera_test.cpp
    │   └── Makefile
├── unit_test
    ├── makefile
    └── vpg_test.cpp
├── libraries
    ├── networks
    │   └── vpg_network
    │   │   ├── build.sh
    │   │   ├── vpg_episode.cpp
    │   │   ├── vpg_episode.hpp
    │   │   ├── CMakeLists.txt
    │   │   ├── vpg_wrapper.hpp
    │   │   ├── vpg_network.hpp
    │   │   └── vpg_network.cpp
    └── utils
    │   ├── training_handler.hpp
    │   ├── state.hpp
    │   ├── state.cpp
    │   ├── reward.hpp
    │   ├── reward.cpp
    │   ├── khepera.hpp
    │   ├── data_logger.hpp
    │   ├── training_handler.cpp
    │   ├── khepera.cpp
    │   └── data_logger.cpp
├── .gitignore
├── LICENSE
├── README.md
├── log_analyzer
    ├── vpg_analyzer.py
    └── Analyzer.py
└── worlds
    └── learned_obstacle_avoidance.wbt


/clean_project.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo ""
 3 | echo -e "\033[1;33mCleaning the PyTorch shared object...\033[0m"
 4 | cd - 
 5 | cd ./libraries/networks/vpg_network
 6 | rm -r build
 7 | cd ../..
 8 | rm libvpg_network.so
 9 | rm vpg_episode.hpp
10 | rm vpg_wrapper.hpp
11 | 
12 | echo ""
13 | echo -e "\033[1;33mCleaning the webots controller...\033[0m"
14 | cd ../controllers/vpg_controller
15 | make clean 
16 | 
17 | echo "done" 
18 | 


--------------------------------------------------------------------------------
/env.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash 
 2 | # Setup for running khepera IV robots and RL
 3 | 
 4 | echo Exporting environment variables
 5 | export WEBOTS_HOME=/usr/local/webots ; echo WEBOTS_HOME=$WEBOTS_HOME
 6 | export WEBOTS_HOME_PATH=$WEBOTS_HOME 
 7 | 
 8 | # Change this path to point to your libtorch installation 
 9 | export LIBTORCH_PATH=/home/$(whoami)/Documents/PyTorch/libtorch # <- CHANGE ME! 
10 | echo LIBTORCH_PATH=$LIBTORCH_PATH
11 | 


--------------------------------------------------------------------------------
/controllers/global_parameters.hpp:
--------------------------------------------------------------------------------
 1 | # pragma once
 2 | 
 3 | #define ROBOT_BASIC_TIMESTEP 8      // Time step in ms 
 4 | 
 5 | // State parameters
 6 | #define STATE_DIM           10      // Dimension of the state (8 sensors, 2 wheel speeds)
 7 | #define STATE_NUM_IR         8
 8 | #define STATE_LEFT           8
 9 | #define STATE_RIGHT          9
10 | 
11 | // Action parameters
12 | #define ACTION_DIM           2      // Dimension of the action space
13 | 
14 | 


--------------------------------------------------------------------------------
/unit_test/makefile:
--------------------------------------------------------------------------------
 1 | # See: http://www.cs.colby.edu/maxwell/courses/tutorials/maketutor/
 2 | # Note -Wl,rpath,... links for the dynamic linker 
 3 | 
 4 | CC = g++
 5 | LIBS_PATH = ../libraries
 6 | LIBS = -lvpg_network 
 7 | CPPFLAGS = -I. -I$(LIBS_PATH) -I../controllers/ -I../controllers/vpg_controller -L$(LIBS_PATH) -Wl,-rpath,$(LIBS_PATH)
 8 | 
 9 | DEPS = vpg_test.hpp
10 | OBJ = vpg_test.o 
11 | 
12 | %.o: %.c $(DEPS)
13 | 	$(CC) -Wall -c -o $@ $(DEPS) $(CPPFLAGS)
14 | 
15 | dqn_test: $(OBJ) 
16 | 	$(CC) -Wall -o vpg_test vpg_test.o $(CPPFLAGS) $(LIBS)
17 | 
18 | .PHONY: clean # do not do anything with files named "clean" 
19 | clean: 
20 | 	rm -f *.o *~ vpg_test 


--------------------------------------------------------------------------------
/libraries/networks/vpg_network/build.sh:
--------------------------------------------------------------------------------
 1 | # Build the target 
 2 | mkdir build 
 3 | cd build
 4 | cmake -DCMAKE_PREFIX_PATH=$LIBTORCH_PATH ..
 5 | make
 6 | 
 7 | # Move the generated shared object to the libraries folder of the Webots project
 8 | cp ./libvpg_network.so ../../..
 9 | 
10 | # Move the required headers as well
11 | cp ../vpg_wrapper.hpp ../../..
12 | echo '// Do not modify! This file is generated when compiling the `libvpg_network.so` file.' | cat - ../../../vpg_wrapper.hpp > temp && mv temp ../../../vpg_wrapper.hpp
13 | cp ../vpg_episode.hpp ../../..
14 | echo '// Do not modify! This file is generated when compiling the `libvpg_network.so` file.' | cat - ../../../vpg_episode.hpp > temp && mv temp ../../../vpg_episode.hpp
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # No files without file extension
 2 | *
 3 | !*/
 4 | !*.*
 5 | !Makefile
 6 | !makefile
 7 | 
 8 | # Shared object build
 9 | build/
10 | libraries/*.hpp
11 | 
12 | # Prerequisites
13 | *.d
14 | 
15 | # Compiled Object files
16 | *.slo
17 | *.lo
18 | *.o
19 | *.obj
20 | 
21 | # Precompiled Headers
22 | *.gch
23 | *.pch
24 | 
25 | # Compiled Dynamic libraries
26 | *.so
27 | *.dylib
28 | *.dll
29 | 
30 | # Fortran module files
31 | *.mod
32 | *.smod
33 | 
34 | # Compiled Static libraries
35 | *.lai
36 | *.la
37 | *.a
38 | *.lib
39 | 
40 | # Executables
41 | *.exe
42 | *.out
43 | *.app
44 | 
45 | # editor related files
46 | *.vscode/
47 | 
48 | # python 
49 | __pycache__/
50 | 
51 | # Logging
52 | log/
53 | 
54 | # Trained models
55 | model/
56 | 
57 | # webots
58 | *.wbproj
59 | 


--------------------------------------------------------------------------------
/libraries/networks/vpg_network/vpg_episode.cpp:
--------------------------------------------------------------------------------
 1 | #include "vpg_episode.hpp"
 2 | 
 3 | std::string VPG_episode::to_string() const{
 4 |     std::string output = "Episode " + std::to_string(this->t) + ":\n\tstate: ";
 5 |     for(unsigned int i=0; i<this->state.size(); i++){
 6 |         output += std::to_string(this->state[i]) + " ";
 7 |     }
 8 |     output += ",\n\taction: " + std::to_string(this->action[0]) + ", " + std::to_string(this->action[1]) + ",\n";
 9 |     output += "\treward: " + std::to_string(this->reward) + ",\n";
10 |     output += "\treward to go: " + std::to_string(this->reward_to_go) + ",\n\tnext state: ";
11 |     for(unsigned int i=0; i<this->state_next.size(); i++){
12 |         output += std::to_string(this->state_next[i]) + " ";
13 |     }
14 |     return output;
15 | }
16 | 


--------------------------------------------------------------------------------
/libraries/utils/training_handler.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <iostream>
 4 | #include <numeric>
 5 | #include <random>
 6 | #include <vector>
 7 | 
 8 | #include <time.h>
 9 | 
10 | #include <webots/supervisor.h>
11 | 
12 | /**
13 |  * Detect if the robot hit something. 
14 |  * @output (bool) Whether the robot is currently touching something. 
15 |  */
16 | bool detect_collision();
17 | 
18 | /**
19 |  * Detect if the simulation exploded. Problem occuring in 
20 |  * Webots version R2019b and previous. Solved in R2020a. 
21 |  */
22 | bool simulation_exploded(); 
23 | 
24 | /**
25 |  * Adds the speeds set to compute the "distance" travelled. 
26 |  */
27 | void measure_travelled_distance(double left, double right);
28 | 
29 | /**
30 |  * Reset the measured travelled distance. 
31 |  */
32 | void reset_distance();
33 | 
34 | /**
35 |  * Get the measured travelled distance. 
36 |  */
37 | double get_distance();
38 | 
39 | /**
40 |  * Move the robot at a pre-defined random location. 
41 |  */
42 | void supervisor_move_robot();


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Lucas W.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/libraries/utils/state.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once 
 2 | 
 3 | #include <iostream>
 4 | #include <vector>
 5 | #include <memory> 
 6 | #include <math.h>
 7 | 
 8 | // Kephera API
 9 | #include <khepera.hpp>
10 | 
11 | #include <global_parameters.hpp>
12 | 
13 | #define PREV_STATE      0
14 | #define CURRENT_STATE   1
15 | 
16 | typedef std::vector<std::vector<double>> t_state; 
17 | 
18 | /**
19 |  * The State class stores the two last observed states
20 |  * as 1D vectors. 8 IR values, 2 wheel speeds. 
21 |  */
22 | class State{
23 |     public:
24 |         State();
25 | 
26 |         /**
27 |          * Make state measurment. 
28 |          */
29 |         void measure();
30 | 
31 |         /**
32 |          * Convert the state to a string. 
33 |          * t = 0 (PREV_STATE) is previous, = 1 (CURRENT_STATE) is last measured
34 |          */
35 |         std::string to_string(uint t = CURRENT_STATE);
36 | 
37 |         /**
38 |          * t = 0 (PREV_STATE) is previous, = 1 (CURRENT_STATE) is last measured
39 |          */
40 |         std::vector<double> get_state(uint t = CURRENT_STATE);
41 |         t_state get_states();
42 |         t_state step();
43 | 
44 |         void reset();
45 | 
46 |     protected: 
47 | 
48 |     private: 
49 |         t_state m_states;
50 | };
51 | 


--------------------------------------------------------------------------------
/libraries/networks/vpg_network/vpg_episode.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once 
 2 | 
 3 | #include <iostream>
 4 | #include <string>
 5 | #include <vector>
 6 | 
 7 | /**
 8 | * Structure storing an Episode 
 9 | *   {state, action, reward, reward_to_go, state_next}
10 | * At time t, `state` and `action` are sampled. 
11 | * At time t+1, `reward` and `state_next` are sampled. 
12 | * Once a rollout is complete, the reward_to_go can be computed. 
13 | */
14 | struct VPG_episode{
15 |     int t;                          // Timestamp. 
16 |     std::vector<double> action;     // Action taken by the agent when in `state`. 
17 |     double reward;                  // Reward perceived by the agent for performing `action` in `state`. 
18 |     double reward_to_go;            // Cumulated reward evaluated from time t. 
19 |     std::vector<double> state;      // State when taking the action. 
20 |     std::vector<double> state_next; // State after taking the action. 
21 | 
22 |     /**
23 |      * Convert Episode to string:
24 |      * "Episode x:
25 |      *      state: x x x x x x 
26 |      *      action: x
27 |      *      reward: x 
28 |      *      reward to go: x
29 |      *      next state: x x x x x x 
30 |      * "
31 |      */
32 |     std::string to_string() const;
33 | 
34 | } typedef DQN_episode;


--------------------------------------------------------------------------------
/libraries/networks/vpg_network/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
 2 | project(vpg_network)
 3 | 
 4 | find_package(Torch REQUIRED)
 5 | 
 6 | # Paths to include vpg_parameters.hpp and global_parameters.hpp for global settings
 7 | set(CONTROLLER_PATH ../../../controllers/vpg_controller)
 8 | set(PARAMETERS_PATH ../../../controllers)
 9 | 
10 | # Specify target as shared object 
11 | add_library(vpg_network SHARED vpg_network.cpp vpg_episode.cpp) 
12 | 
13 | # Include the directory of the controller 
14 | target_include_directories(vpg_network PRIVATE ${CONTROLLER_PATH} ${PARAMETERS_PATH}) 
15 | 
16 | # Link the PyTorch libraries
17 | target_link_libraries(vpg_network "${TORCH_LIBRARIES}")
18 | 
19 | set_property(TARGET vpg_network PROPERTY CXX_STANDARD 11)
20 | 
21 | # The following code block is suggested to be used on Windows.
22 | # According to https://github.com/pytorch/pytorch/issues/25457,
23 | # the DLLs need to be copied to avoid memory errors.
24 | if (MSVC)
25 |   file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
26 |   add_custom_command(TARGET vpg_network
27 |                      POST_BUILD
28 |                      COMMAND ${CMAKE_COMMAND} -E copy_if_different
29 |                      ${TORCH_DLLS}
30 |                      $<TARGET_FILE_DIR:vpg_network>)
31 | endif (MSVC)
32 | 


--------------------------------------------------------------------------------
/controllers/vpg_controller/vpg_parameters.hpp:
--------------------------------------------------------------------------------
 1 | # pragma once
 2 | 
 3 | // Simulation parameters
 4 | #define TIMEOUT             1*30    // Seconds before starting a new rollout 
 5 | #define QUERY_PERIOD        0.01    // Period in seconds between each run of the step method (min is 0.008) 
 6 | #define WAIT_STEPS          floor(QUERY_PERIOD/(double)ROBOT_BASIC_TIMESTEP*1000.0)
 7 | #define ROLLOUTS            40000   // Number of rollouts to execute in total. 
 8 | #define ROLL_PER_SESSION    100     // Restart the simulation after 100 rollouts. 
 9 | 
10 | // Training parameters
11 | #define GAMMA               0.99    // Define the discount factor
12 | #define BATCH_SIZE          500     // Batch size to train the network 
13 | #define USE_VALUE_FUNCTION  true    // Whether to use a Value Function network to support the policy 
14 | #define VALUE_UPDATE_RATE   20      // Number of rollouts before updating the weights of the value function
15 | 
16 | // Model saving/loading file
17 | #define MODEL_PATH          "model/"
18 | #define MODEL_POLICY_FILE   "model/vpg_policy"
19 | #define MODEL_VALUE_FILE    "model/vpg_value" 
20 | #define OPTIM_PATH          "optim/"
21 | #define OPTIM_POLICY_FILE   "optim/optim_policy"
22 | #define OPTIM_VALUE_FILE    "optim/optim_value"
23 | 
24 | // Data logging destination
25 | #define LOG_PATH            "log/"
26 | #define LOG_FILE            "log/log.csv"


--------------------------------------------------------------------------------
/libraries/utils/state.cpp:
--------------------------------------------------------------------------------
 1 | #include "state.hpp"
 2 | 
 3 | State::State(){
 4 | 
 5 |     // Set the default starting states (all false)
 6 |     std::vector<double> v(STATE_DIM,0.);
 7 |     this->m_states.push_back(v);
 8 |     this->m_states.push_back(v);
 9 | }
10 | 
11 | void State::measure(){
12 |     // Roll states
13 |     m_states[PREV_STATE] = m_states[CURRENT_STATE];
14 | 
15 |     // Get the sensor readings
16 |     std::vector<double> state = k4::get_normalized_ranges();
17 |     std::vector<double> speeds = k4::get_normalized_motors_speed();
18 |     state.push_back(speeds[0]);
19 |     state.push_back(speeds[1]);
20 |     m_states[CURRENT_STATE] = state;
21 | }
22 | 
23 | std::string State::to_string(uint t){
24 |     std::string string = "";
25 |     for(uint i=0; i<this->m_states[t].size(); i++){
26 |         string += std::to_string(this->m_states[t][i]) + " ";
27 |     }
28 |     return string;
29 | }
30 | 
31 | std::vector<double> State::get_state(uint t){
32 |     return this->m_states.at(t);
33 | }
34 | 
35 | t_state State::get_states(){
36 |     return this->m_states;
37 | }
38 | 
39 | t_state State::step(){
40 |     this->measure();
41 |     return this->get_states();
42 | }
43 | 
44 | void State::reset(){
45 |     // Set the default starting state (all 0)
46 |     for(uint i=0; i<STATE_DIM; i++){
47 |         this->m_states[0].at(i) = 0.;
48 |         this->m_states[1].at(i) = 0.;
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/libraries/utils/reward.hpp:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | 
 3 | #include "state.hpp"
 4 | #include "training_handler.hpp"
 5 | 
 6 | /**
 7 |  * The Reward class enables to compute the reward associtated
 8 |  * to a given State. It stores the current (last evaluated) reward, 
 9 |  * and the cumulated reward. 
10 |  */
11 | class Reward{
12 |     public:
13 |         /**
14 |          * Reward constructor. 
15 |          */
16 |         Reward();
17 | 
18 |         /**
19 |          * Compute the reward selected when instantiating the class. 
20 |          * @param state (std::vector<double>) Robot's current state. 
21 |          * @param timestamp (int) Rollout time stamp at which the reward is computed. If equal to 0, no decay is applied. 
22 |          */ 
23 |         double compute_reward(t_state state, int timestamp = 0);
24 | 
25 |         /**
26 |          * Get the last computed reward. 
27 |          * @output (double) reward. 
28 |          */
29 |         double get_reward();
30 | 
31 |         /**
32 |          * Get the cumulated computed reward. 
33 |          * @output (double) cumulated reward. 
34 |          */
35 |         double get_cumulated_reward();
36 | 
37 |         /**
38 |          * Reset the currently stored values. 
39 |          */
40 |         void reset();
41 | 
42 |     private: 
43 |         double m_reward;
44 |         double m_cumulated_reward;
45 |         double m_decay;
46 |         std::string m_type;
47 | };


--------------------------------------------------------------------------------
/controllers/vpg_controller/vpg_controller.cpp:
--------------------------------------------------------------------------------
 1 | // File:          vpg_controller.cpp
 2 | // Date:          28.10.2019
 3 | // Description:   Main file of the VPG implementation
 4 | // Author:        Lucas Wälti
 5 | // Modifications:
 6 | 
 7 | #include <vpg_arbitrator.hpp>
 8 | 
 9 | // The arguments of the main function can be specified by the
10 | // "controllerArgs" field of the Robot node
11 | int main(int argc, char **argv) {
12 | 
13 |   // Listen to arguments for mode 
14 |   bool eval = true; 
15 |   if(argc == 1){
16 |     std::cout << "No argument, using default 'eval' mode. Usage is 'training' or 'eval'." << std::endl; 
17 |   }else if(argc == 2){
18 |     std::string mode = argv[1]; 
19 |     if(mode == "training")
20 |       eval = false; 
21 |     else
22 |       eval = true; 
23 |   }
24 | 
25 |   // Create an arbitrator instance
26 |   VPG_arbitrator arbitrator = VPG_arbitrator(); 
27 |   arbitrator.mode(/*evaluating*/ eval, /*verbose*/ true);
28 |   arbitrator.reset();
29 | 
30 |   std::cout << "Arbitrator launched in " << (eval ? "evaluation" : "training") << " mode" << std::endl;
31 |   
32 |   while (arbitrator.step()) {
33 |     // Check if simulation break
34 |     if(simulation_exploded()){
35 |       std::cout << "Simulation exploded." << std::endl;
36 |       wb_supervisor_simulation_reset();
37 |     }
38 |   };
39 | 
40 |   if(arbitrator.is_training())
41 |     wb_supervisor_simulation_quit(EXIT_SUCCESS); // ask Webots to terminate
42 | 
43 |   // cleanup code 
44 |   return 0;
45 | }
46 | 


--------------------------------------------------------------------------------
/controllers/khepera_test/khepera_test.cpp:
--------------------------------------------------------------------------------
 1 | // File:        khepera_test.cpp
 2 | // Date:        28.03.2020  
 3 | // Description: Simple demo controller showing basic controls 
 4 | // Author:      Lucas Waelti
 5 | // Modifications:
 6 | 
 7 | /*
 8 |  * This controller was readapted and modified from the default 
 9 |  * khepera.c controller provided by Cyberbotics. 
10 |  */
11 | 
12 | #include <khepera.hpp> 
13 | 
14 | int main(int argc, char **argv) {
15 |   
16 |     // Initililaize the robot 
17 |     int time_step = k4::init_robot(); 
18 | 
19 |     // store the last time a message was displayed
20 |     int last_display_second = 0;
21 | 
22 |     std::vector<double> ranges, normalized_ranges; 
23 | 
24 |     // main loop
25 |     while (wb_robot_step(time_step) != -1) {
26 | 
27 |         // Get infrared data 
28 |         normalized_ranges = k4::get_normalized_ranges(); 
29 |         ranges = k4::get_ranges(); 
30 | 
31 |         // Display the normalized front sensor reading
32 |         if(last_display_second != (int)wb_robot_get_time()){
33 |             last_display_second = (int)wb_robot_get_time();
34 |             std::cout << "Distance front: " << normalized_ranges[3] << std::endl;
35 |         }
36 |         
37 |         // simple obstacle avoidance algorithm
38 |         // based on the front infrared sensors
39 |         double speed_offset = 0.2 * (MAX_SPEED - 0.03 * ranges[3]);
40 |         double speed_delta = 0.03 * ranges[2] - 0.03 * ranges[4];
41 |         k4::set_motors_speed(speed_offset + speed_delta, speed_offset - speed_delta);
42 |     };
43 | 
44 |     wb_robot_cleanup();
45 | 
46 |     return EXIT_SUCCESS;
47 | }
48 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RL_Webots
 2 | This repository shows how Deep Reinforcement Learning can be used within Webots. This project was developed under Linux. While it should be possible to use it with other operating systems, this was not tested. 
 3 | 
 4 | The code presented in this repository was adapted from a semester project at the [Distributed Intelligent Systems and Algorithms Laboratory (DISAL)](https://www.epfl.ch/labs/disal/). A summary of the project can be found [here](https://disalw3.epfl.ch/teaching/student_projects/ay_2019-20/ws/DISAL-SP133_summary.pdf). The part presented here is about **Deep Reinforcement Learning**, leveraging **Policy Gradient** to train a fully connected, feed forward neural network to solve a simple **obstacle avoidance task** on a **continuous action space** with a Khepera IV robot. 
 5 | 
 6 | The project was entirely implemented in C++ and relies on [libtorch](https://pytorch.org/cppdocs/), [PyTorch](https://pytorch.org/)'s C++ API. 
 7 | 
 8 | ## Install
 9 | Download [PyTorch's C++ API](https://pytorch.org/). Choose following options: 
10 | - Build: Stable
11 | - OS: Linux
12 | - Package: LibTorch
13 | - Language: C++
14 | - CUDA: None
15 | 
16 | and click on the second link (cxx11 ABI, 87.4M). Extract the zip file wherever you like on your computer. You need to edit the `env.sh` script and adapt the path to your libtorch installation folder: 
17 | ```bash
18 | export LIBTORCH_PATH=/path/to/libtorch 
19 | ```
20 | 
21 | Open up a terminal, go to the project folder and run the following commands to build the project: 
22 | ```bash
23 | source env.sh
24 | ./build_project.sh
25 | ```
26 | ## Documentation
27 | Please visit the repository's [wiki](https://github.com/LucasWaelti/RL_Webots/wiki) for more details. 


--------------------------------------------------------------------------------
/libraries/utils/reward.cpp:
--------------------------------------------------------------------------------
 1 | #include "reward.hpp"
 2 | 
 3 | /**
 4 |  * Verify whether one if the IR sensor is seeing an 
 5 |  * obstacle. 
 6 |  */
 7 | bool _obstacle_present(const std::vector<double>& a){
 8 |     for(int i=0; i<STATE_NUM_IR; i++){
 9 |         if(a[i] > 0.0)
10 |             return true; 
11 |     }
12 |     return false; 
13 | }
14 | 
15 | Reward::Reward(){
16 |     reset(); 
17 | }
18 | 
19 | double Reward::compute_reward(t_state state, int timestamp){
20 | 
21 |     m_reward = 0.; 
22 | 
23 |     // Adapt speed representation
24 |     state[CURRENT_STATE][STATE_LEFT]  = state[CURRENT_STATE][STATE_LEFT] *2.0-1.0; 
25 |     state[CURRENT_STATE][STATE_RIGHT] = state[CURRENT_STATE][STATE_RIGHT]*2.0-1.0; 
26 | 
27 |     // Reward driving forward when no obstacle
28 |     if(!_obstacle_present(state.at(CURRENT_STATE))){
29 |         if(state.at(CURRENT_STATE)[STATE_LEFT] > 0.0
30 |             && state.at(CURRENT_STATE)[STATE_RIGHT] > 0.0){
31 |             m_reward += 0.01;
32 |             if(abs(state.at(CURRENT_STATE)[STATE_LEFT] - state.at(CURRENT_STATE)[STATE_RIGHT]) < 0.1)
33 |                 m_reward += 0.01;
34 |             else 
35 |                 m_reward -= 0.02;
36 |         }
37 |         else 
38 |             m_reward -= 0.01; 
39 |     }
40 | 
41 |     // Reward clearing obstacles
42 |     if(_obstacle_present(state.at(PREV_STATE)) && 
43 |         !_obstacle_present(state.at(CURRENT_STATE)))
44 |         m_reward += 1.0; 
45 | 
46 |     // Penalize collisions (override reward)
47 |     if(detect_collision())
48 |         m_reward = -1.0; 
49 |     
50 |     m_cumulated_reward += m_reward;
51 | 
52 |     return m_reward;
53 | }
54 | 
55 | double Reward::get_reward(){
56 |     return m_reward;
57 | }
58 | 
59 | double Reward::get_cumulated_reward(){
60 |     return m_cumulated_reward;
61 | }
62 | 
63 | void Reward::reset(){
64 |     m_reward = 0.;
65 |     m_cumulated_reward = 0.;
66 | }


--------------------------------------------------------------------------------
/log_analyzer/vpg_analyzer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | import pandas as pd
 3 | from matplotlib import pyplot as plt 
 4 | import time 
 5 | 
 6 | from Analyzer import Analyzer
 7 | 
 8 | class Vpg_analyzer(Analyzer):
 9 |     '''
10 |     Child class of Analyzer. 
11 |     Implement functions specialized for the given subject. 
12 |     '''
13 | 
14 |     def __init__(self, loc = '../controllers/vpg_controller/log/', file = 'log.csv'):
15 |         super(Vpg_analyzer, self).__init__(loc, file)
16 | 
17 |     def results(self):
18 |         fig, axes = plt.subplots(4,1,figsize=(12,8))
19 |         fig.canvas.set_window_title('Statistics for {} rollouts'.format(self.m_data_df.shape[0]))
20 | 
21 |         # plot the loss
22 |         sample_dim = 100 
23 |         self._sub_plot_single(axes[0], 'Max_loss',legend='Max loss')
24 |         self._sub_plot_single_from_array(axes[0], 
25 |             self.compute_moving_average('Max_loss', sample_dim), 
26 |             'rollouts', 'Max loss Moving Average (N={})'.format(sample_dim),legend='Max loss moving average')
27 |         axes[0].legend()
28 |         axes[0].grid()
29 |         # Plot the cumulated reward 
30 |         self._sub_plot_single_from_array(axes[1], 
31 |             self.compute_moving_average('Cumulated_reward', sample_dim), 
32 |             'rollouts', 'Cumulated Reward Moving Average (N={})'.format(sample_dim))
33 |         axes[1].grid()
34 |         # PLot the average success rate
35 |         self._sub_plot_single_from_array(axes[2], 
36 |             self.compute_moving_average('Distance', sample_dim), 
37 |             'rollouts', 'Distance Moving Average (N={})'.format(sample_dim))
38 |         axes[2].grid()
39 |         # Plot the number of timesteps 
40 |         self._sub_plot_single(axes[3], 'Num_timesteps') 
41 | 
42 |         fig.tight_layout()
43 | 
44 | if __name__ == "__main__":
45 |     a = Vpg_analyzer(loc = '../controllers/vpg_controller/log/', file = 'log.csv')
46 |     a.results()
47 |     #a.plot_distance()
48 |     a.show()
49 | 
50 | 


--------------------------------------------------------------------------------
/libraries/utils/khepera.hpp:
--------------------------------------------------------------------------------
 1 | #include <webots/motor.h>
 2 | #include <webots/robot.h>
 3 | 
 4 | #include <webots/camera.h>
 5 | #include <webots/distance_sensor.h>
 6 | #include <webots/led.h>
 7 | 
 8 | #include <iostream>
 9 | #include <vector> 
10 | 
11 | namespace k4{
12 | 
13 |     #define MAX_SPEED 47.6
14 |     #define REDUCER   0.5 // Only use 50% of max speed 
15 | 
16 |     #define IR_MIN 100.0  // No obstacle
17 |     #define IR_MAX 1000.0 // Almost touching obstacle 
18 | 
19 |     #define NUMBER_OF_INFRARED_SENSORS 12
20 |     #define NUMBER_OF_PERIMETER_IR_SENSORS 8
21 | 
22 |     /**
23 |      * Initialize the robot's infrared sensors, motors and LEDs 
24 |      */
25 |     int init_robot();
26 | 
27 |     /**
28 |      * Perform a time step 
29 |      */
30 |     int khepera_running();
31 | 
32 |     /**
33 |      * Set the left and right motor speeds. 
34 |      */
35 |     void set_motors_speed(double left, double right);
36 | 
37 |     /**
38 |      * Get the left and right motor speeds. 
39 |      */
40 |     std::vector<double> get_motors_speed();
41 | 
42 |     /**
43 |      * Set the left and right motor speeds. Input must be 
44 |      * normalized between [-1,1]. Backward: -1, Forward: +1
45 |      */
46 |     void set_normalized_motors_speed(double left, double right);
47 | 
48 |     /**
49 |      * Get the left and right normalized motor speeds. 
50 |      */
51 |     std::vector<double> get_normalized_motors_speed();
52 | 
53 |     /**
54 |      * Normalize a range reading between 0 and 1. 
55 |      * 0 (no obstacle), 1 (obstacle very close)
56 |      */
57 |     double normalize_range(double range);
58 | 
59 |     /**
60 |      * Get the range of infrared sensor i
61 |      */
62 |     double get_range(int i);
63 | 
64 |     /**
65 |      * Get the range of each infrared sensor 
66 |      */
67 |     std::vector<double> get_ranges();
68 | 
69 |     /**
70 |      * Get the normalized range of each infrared sensor. 
71 |      * Returns values between 0 (no obstacle) and 1 (obstacle very close)
72 |      */
73 |     std::vector<double> get_normalized_ranges();
74 | 
75 | }


--------------------------------------------------------------------------------
/libraries/utils/data_logger.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <iostream>
 4 | #include <fstream>
 5 | #include <memory>
 6 | #include <string.h>
 7 | #include <sstream>
 8 | #include <sys/stat.h>
 9 | #include <vector>
10 | 
11 | /**
12 |  * Create a csv file in the specified folder if it does not exist yet. 
13 |  * @param dir (std::string) Path to the file. 
14 |  * @param file (std::string) The file name must include the path to it. 
15 |  * @param headers (std::vector<std::string>) Name of the columns. 
16 |  */
17 | int create_log(std::string dir, std::string file, std::vector<std::string> headers);
18 | 
19 | /**
20 |  * Write a line into the specified file (path to file included).
21 |  * Each element of the vector will be written on the same line, 
22 |  * separated by a comma. 
23 |  * @param file (std::string) the file name must include the path to it. 
24 |  * @param values (std::vector<std::string>) vector containing the values to write. 
25 |  */
26 | void write_line(std::string file, std::vector<std::string> values);
27 | 
28 | /**
29 |  * Write a line into the specified file (path to file included).
30 |  * Each element of the vector will be written on the same line, 
31 |  * separated by a comma. 
32 |  * @overload void write_line(std::string file, std::vector<std::string> values);
33 |  * @param file (std::string) the file name must include the path to it. 
34 |  * @param values (std::vector<double>) vector containing the values to write.
35 |  * The values are converted to std::string.  
36 |  */
37 | void write_line(std::string file, std::vector<double> values);
38 | 
39 | /**
40 |  * Read the value of the last stored rollout. Defaults to 0 if no file
41 |  * or if no rollout was previously logged. 
42 |  * @param file (std::string) the file name must include the path to it. 
43 |  * @output (int) value of the last rollout. 
44 |  */
45 | int read_last_rollout(std::string file);
46 | 
47 | /**
48 |  * Read the index of the last stored particle. Defaults to 0 if no file
49 |  * or if no particle was previously logged. 
50 |  * @param file (std::string) the file name must include the path to it. 
51 |  * @output (int) index of the last logged particle. 
52 |  */
53 | int read_last_particle(std::string file);


--------------------------------------------------------------------------------
/libraries/utils/training_handler.cpp:
--------------------------------------------------------------------------------
 1 | #include "training_handler.hpp"
 2 | 
 3 | static const char *def = "K4"; // Robot def 
 4 | 
 5 | bool detect_collision(){
 6 |     // Supervisor - get handle on robot position 
 7 |     WbNodeRef robot_node_ref = wb_supervisor_node_get_from_def(def);
 8 |     int num_contact_points = wb_supervisor_node_get_number_of_contact_points(robot_node_ref);
 9 |     if(num_contact_points > 0)
10 |         return true; 
11 |     else
12 |         return false; 
13 | }
14 | 
15 | bool simulation_exploded(){
16 |     // Supervisor - get handle on robot position 
17 |     WbNodeRef robot_node_ref = wb_supervisor_node_get_from_def(def);
18 |     WbFieldRef position_field = wb_supervisor_node_get_field(robot_node_ref, "translation");
19 |     double y_pose =  wb_supervisor_field_get_sf_vec3f(position_field)[1];
20 |     return (y_pose > 0.01);
21 | }
22 | 
23 | static double distance = 0.0; 
24 | 
25 | void measure_travelled_distance(double left, double right){
26 |     distance += (left + right)/1000.0; 
27 | }
28 | 
29 | void reset_distance(){
30 |     distance = 0.0; 
31 | }
32 | 
33 | double get_distance(){
34 |     return distance; 
35 | }
36 | 
37 | static std::default_random_engine _generator;
38 | void supervisor_move_robot(){
39 | 
40 |     // Supervisor - get handle on robot position 
41 |     WbNodeRef robot_node_ref = wb_supervisor_node_get_from_def(def);
42 |     WbFieldRef position_field = wb_supervisor_node_get_field(robot_node_ref, "translation");
43 |     //WbFieldRef rotation_field = wb_supervisor_node_get_field(robot_node_ref, "rotation");
44 | 
45 |     // Define some locations 
46 |     std::vector<double*> locations;
47 |     double loc0[3] = {-0.66,0,-0.68};   // Upper left
48 |     double loc1[3] = {-0.79,0,0.79};    // Lower left
49 |     double loc2[3] = {0,0,0};           // Middle
50 |     double loc3[3] = {0.71,0,0.72};     // In White area
51 |     locations.push_back(loc0);
52 |     locations.push_back(loc1);
53 |     locations.push_back(loc2);
54 |     locations.push_back(loc3);
55 | 
56 |     // Sample a location
57 |     std::uniform_int_distribution<> dis(0, locations.size()-1);
58 |     int ind = dis(_generator);
59 | 
60 |     // Spawn the robot at chosen location
61 |     wb_supervisor_field_set_sf_vec3f(position_field, locations[ind]);
62 | }


--------------------------------------------------------------------------------
/controllers/khepera_test/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright 1996-2020 Cyberbotics Ltd.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | ### Generic Makefile.include for Webots controllers, physics plugins, robot
16 | ### window libraries, remote control libraries and other libraries
17 | ### to be used with GNU make
18 | ###
19 | ### Platforms: Windows, macOS, Linux
20 | ### Languages: C, C++
21 | ###
22 | ### Authors: Olivier Michel, Yvan Bourquin, Fabien Rohrer
23 | ###          Edmund Ronald, Sergei Poskriakov
24 | ###
25 | ###-----------------------------------------------------------------------------
26 | ###
27 | ### This file is meant to be included from the Makefile files located in the
28 | ### Webots projects subdirectories. It is possible to set a number of variables
29 | ### to customize the build process, i.e., add source files, compilation flags,
30 | ### include paths, libraries, etc. These variables should be set in your local
31 | ### Makefile just before including this Makefile.include. This Makefile.include
32 | ### should never be modified.
33 | ###
34 | ### Here is a description of the variables you may set in your local Makefile:
35 | ###
36 | ### ---- C Sources ----
37 | ### if your program uses several C source files:
38 | ### C_SOURCES = my_plugin.c my_clever_algo.c my_graphics.c
39 | ###
40 | ### ---- C++ Sources ----
41 | ### if your program uses several C++ source files:
42 | ### CXX_SOURCES = my_plugin.cc my_clever_algo.cpp my_graphics.c++
43 | ###
44 | ### ---- Compilation options ----
45 | ### if special compilation flags are necessary:
46 | ### CFLAGS = -Wno-unused-result
47 | ###
48 | ### ---- Linked libraries ----
49 | ### if your program needs additional libraries:
50 | ### INCLUDE = -I"/my_library_path/include"
51 | ### LIBRARIES = -L"/path/to/my/library" -lmy_library -lmy_other_library
52 | ###
53 | ### ---- Linking options ----
54 | ### if special linking flags are needed:
55 | ### LFLAGS = -s
56 | ###
57 | ### ---- Webots included libraries ----
58 | ### if you want to use the Webots C API in your C++ controller program:
59 | ### USE_C_API = true
60 | ###
61 | ### ---- Debug mode ----
62 | ### if you want to display the gcc command line for compilation and link, as
63 | ### well as the rm command details used for cleaning:
64 | ### VERBOSE = 1
65 | ###
66 | ###-----------------------------------------------------------------------------
67 | 
68 | USE_C_API = true
69 | 
70 | UTILS_PATH = ../../libraries/utils/
71 | 
72 | INCLUDE = -I. -I$(UTILS_PATH)
73 | CXX_SOURCES = khepera_test.cpp $(UTILS_PATH)khepera.cpp 
74 | 
75 | ### Do not modify: this includes Webots global Makefile.include
76 | space :=
77 | space +=
78 | WEBOTS_HOME_PATH=$(subst $(space),\ ,$(strip $(subst \,/,$(WEBOTS_HOME))))
79 | include $(WEBOTS_HOME_PATH)/resources/Makefile.include
80 | 


--------------------------------------------------------------------------------
/libraries/networks/vpg_network/vpg_wrapper.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once 
  2 | 
  3 | #include <vector>
  4 | #include <sstream>
  5 | #include <sys/stat.h>
  6 | 
  7 | #include "vpg_episode.hpp" 
  8 | 
  9 | enum SAVE_ARGS {POLICY,VALUE,POLICY_OPTIM,VALUE_OPTIM};
 10 | 
 11 | // Namespace for the functions of the VPG wrapper 
 12 | namespace vpg_w{
 13 | 
 14 |     /**
 15 |      * Initialize the networks and other parameters. 
 16 |      */
 17 |     void init();
 18 | 
 19 |     /**
 20 |      * Store provided episode to use later for training. 
 21 |      * @param episode (Episode) {state, action, reward, next state} tuple. 
 22 |      */
 23 |     void store_episode(VPG_episode episode);
 24 | 
 25 |     /**
 26 |      * Compute the reward_to_go of the currently stored episodes. 
 27 |      */
 28 |     void compute_reward_to_go();
 29 | 
 30 |     /**
 31 |      * @return (uint) number of episodes currently stored. 
 32 |      */
 33 |     uint get_memory_size();
 34 | 
 35 |     /**
 36 |      * @return (std::vector<VPG_episode>) get the memory's reference. 
 37 |      */
 38 |     const std::vector<VPG_episode>& get_memory(); 
 39 | 
 40 |     /**
 41 |      * Dump the currently stored episodes. 
 42 |      */
 43 |     void reset_memory();
 44 | 
 45 |     /**
 46 |      * Run inference on the policy network and return the action probabilities. 
 47 |      * @param state (std::vector<double>) Robot's state. 
 48 |      * @return (std::vector<float>) Each action's probability. 
 49 |      */
 50 |     std::vector<float> eval_policy(std::vector<double> state);
 51 | 
 52 |     /**
 53 |      * Run inference on the value function network and return the state's value. 
 54 |      * @param state (std::vector<double>) Robot's state. 
 55 |      * @return (float) State's estimated value.  
 56 |      */
 57 |     float eval_value_function(std::vector<double> state);
 58 | 
 59 |     /**
 60 |      * Train the network based on the currently stored episodes. 
 61 |      * @param batch_size (uint) Number of episodes to use in a single batch. 
 62 |      */
 63 |     void train(uint batch_size=100);
 64 | 
 65 |     /**
 66 |      * Get the maximum loss. 
 67 |      */
 68 |     float get_max_loss();
 69 |     
 70 |     /**
 71 |      * Reset the maximum loss. 
 72 |      */
 73 |     void reset_max_loss();
 74 | 
 75 |     /**
 76 |      * Check if the specified directory exists.
 77 |      * @param path (std::string)
 78 |      * @return (bool)
 79 |      */
 80 |     bool _dir_exists(std::string path);
 81 | 
 82 |     /**
 83 |      * Create the specified directory exists.
 84 |      * @param path (std::string)
 85 |      */
 86 |     void _create_path(std::string path);
 87 | 
 88 |     /**
 89 |      * Save the model into the specified file
 90 |      * @param path (std::string) path to the file
 91 |      * @param file (std::string) path to the file + file name
 92 |      * @param model (uint) whether to save the policy network (POLICY=0) or the value function network (VALUE=1)
 93 |      */
 94 |     void save(std::string path, std::string file, uint model);
 95 | 
 96 |     /**
 97 |      * Load the model from the specified file
 98 |      * @param file (std::string) path to the file + file name
 99 |      * @param model (uint) whether to load the policy network (POLICY=0) or the value function network (VALUE=1)
100 |      * @return (bool) whether loading has succeeded. 
101 |      */
102 |     bool load(std::string file, uint model);
103 | }
104 | 


--------------------------------------------------------------------------------
/libraries/networks/vpg_network/vpg_network.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once 
  2 | #include <torch/torch.h>
  3 | #include <torch/script.h>
  4 | #include <iostream>
  5 | #include <random>
  6 | #include <vector>
  7 | 
  8 | #include "vpg_episode.hpp" 
  9 | 
 10 | #include <vpg_parameters.hpp>
 11 | #include <global_parameters.hpp>
 12 | 
 13 | // Generator's seed
 14 | #define SEED 12345 
 15 | 
 16 | 
 17 | /**
 18 |  * Namespace "vpg" containing the Vanilla Policy Gradient implementation. 
 19 |  */
 20 | namespace vpg{
 21 | 
 22 |     // Random generator
 23 |     std::default_random_engine generator;
 24 |     // Seeds the generator for reproducibility 
 25 |     void seed_generator();
 26 |     
 27 |     // --------------------- Policy Network ---------------------
 28 |     struct PolicyImpl: torch::nn::Module{
 29 |         PolicyImpl();
 30 |         torch::Tensor forward(torch::Tensor x);
 31 |         torch::nn::Linear l1{nullptr},l2{nullptr},l3{nullptr};
 32 |     };
 33 |     TORCH_MODULE(Policy);
 34 |     // Network instance
 35 |     static Policy policy_model; 
 36 |     // Optimizer used to train the network
 37 |     static torch::optim::Adam* p_optimizer;
 38 | 
 39 |     // --------------------- Value function Network ---------------------
 40 |     struct ValueImpl: torch::nn::Module{
 41 |         ValueImpl();
 42 |         torch::Tensor forward(torch::Tensor x);
 43 |         torch::nn::Linear l1{nullptr},l2{nullptr},l3{nullptr};
 44 |     };
 45 |     TORCH_MODULE(Value);
 46 |     // Network instance
 47 |     static Value value_model; 
 48 |     // Optimizer used to train the network
 49 |     static torch::optim::Adam* v_optimizer; 
 50 | 
 51 |     // Rollout counter 
 52 |     static int value_rollout_count = 0;
 53 |     // -----------------------------------------------------------------
 54 | 
 55 | 
 56 |     // Vector storing the generated episodes 
 57 |     static std::vector<VPG_episode> memory;
 58 |     static std::vector<VPG_episode> memory_batch;
 59 | 
 60 |     /**
 61 |      * Extract the states stored by the episodes inside the memory. 
 62 |      * @param next (bool) If false, the state of the episode is queried. 
 63 |      *              Otherwise the next state of the episode is returned. 
 64 |      */
 65 |     std::vector<std::vector<double>> get_state_from_memory_batch(bool next = false);
 66 | 
 67 |     torch::Tensor get_action_from_memory_batch();
 68 | 
 69 |     torch::Tensor get_reward_from_memory_batch();
 70 | 
 71 |     torch::Tensor get_reward_to_go_from_memory_batch();
 72 | 
 73 |     /**
 74 |      * Utility method to convert a state vector into a Tensor including a dimension for the batch. 
 75 |      * @param state (std::vector<double>) 
 76 |      */
 77 |     torch::Tensor convert_state_to_batch(std::vector<double> state);
 78 | 
 79 |     /**
 80 |      * Utility method to convert a vector of state vectors into a Tensor generating an actual batch.
 81 |      * @overload convert_state_to_batch(std::vector<double> state)
 82 |      * @param states (std::vector<std::vector<double>>) 
 83 |      */
 84 |     torch::Tensor convert_state_to_batch(std::vector<std::vector<double>> states);
 85 | 
 86 |     /**
 87 |      * Compute parameters gradients based on the current memory state when called. 
 88 |      */
 89 |     int compute_gradients();
 90 | 
 91 |     // Maximum loss encountered during a rollout 
 92 |     static float max_loss = 0.0F;
 93 | 
 94 |     /**
 95 |      * Update the maximum loss. 
 96 |      */
 97 |     void update_max_loss(float loss);
 98 | }
 99 | 
100 | 


--------------------------------------------------------------------------------
/controllers/vpg_controller/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright 1996-2019 Cyberbotics Ltd.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | ### Generic Makefile.include for Webots controllers, physics plugins, robot
16 | ### window libraries, remote control libraries and other libraries
17 | ### to be used with GNU make
18 | ###
19 | ### Platforms: Windows, macOS, Linux
20 | ### Languages: C, C++
21 | ###
22 | ### Authors: Olivier Michel, Yvan Bourquin, Fabien Rohrer
23 | ###          Edmund Ronald, Sergei Poskriakov
24 | ###
25 | ###-----------------------------------------------------------------------------
26 | ###
27 | ### This file is meant to be included from the Makefile files located in the
28 | ### Webots projects subdirectories. It is possible to set a number of variables
29 | ### to customize the build process, i.e., add source files, compilation flags,
30 | ### include paths, libraries, etc. These variables should be set in your local
31 | ### Makefile just before including this Makefile.include. This Makefile.include
32 | ### should never be modified.
33 | ###
34 | ### Here is a description of the variables you may set in your local Makefile:
35 | ###
36 | ### ---- C Sources ----
37 | ### if your program uses several C source files:
38 | ### C_SOURCES = my_plugin.c my_clever_algo.c my_graphics.c
39 | ###
40 | ### ---- C++ Sources ----
41 | ### if your program uses several C++ source files:
42 | ### CXX_SOURCES = my_plugin.cc my_clever_algo.cpp my_graphics.c++
43 | ###
44 | ### ---- Compilation options ----
45 | ### if special compilation flags are necessary:
46 | ### CFLAGS = -Wno-unused-result
47 | ###
48 | ### ---- Linked libraries ----
49 | ### if your program needs additional libraries:
50 | ### INCLUDE = -I"/my_library_path/include"
51 | ### LIBRARIES = -L"/path/to/my/library" -lmy_library -lmy_other_library
52 | ###
53 | ### ---- Linking options ----
54 | ### if special linking flags are needed:
55 | ### LFLAGS = -s
56 | ###
57 | ### ---- Webots included libraries ----
58 | ### if you want to use the Webots C API in your C++ controller program:
59 | ### USE_C_API = true
60 | ### if you want to link with the Qt framework embedded in Webots:
61 | ### QT = core gui widgets network
62 | ###
63 | ### ---- Debug mode ----
64 | ### if you want to display the gcc command line for compilation and link, as
65 | ### well as the rm command details used for cleaning:
66 | ### VERBOSE = 1
67 | ###
68 | ###-----------------------------------------------------------------------------
69 | 
70 | USE_C_API = true 
71 | 
72 | # Utility files (world modification, data logging, state and reward computations)
73 | UTILS = ../../libraries/utils/
74 | # Global parameters path
75 | GLOBAL_PARAM = ../
76 | 
77 | CXX_SOURCES = $(wildcard *.cpp) $(wildcard $(UTILS)*.cpp)
78 | 
79 | INCLUDE = -I$(GLOBAL_PARAM) -I$(UTILS) -I../../libraries 
80 | 
81 | LIBRARIES= -L../../libraries -lvpg_network # -L $(WK4_PORTABLE) -lk4_webots -lgsl -lgslcblas
82 | 
83 | ### Do not modify: this includes Webots global Makefile.include
84 | space :=
85 | space +=
86 | WEBOTS_HOME_PATH=$(subst $(space),\ ,$(strip $(subst \,/,$(WEBOTS_HOME))))
87 | include $(WEBOTS_HOME_PATH)/resources/Makefile.include
88 | 


--------------------------------------------------------------------------------
/worlds/learned_obstacle_avoidance.wbt:
--------------------------------------------------------------------------------
  1 | #VRML_SIM R2020a utf8
  2 | WorldInfo {
  3 |   basicTimeStep 8
  4 |   lineScale 0.05
  5 |   contactProperties [
  6 |     ContactProperties {
  7 |       material2 "khepera4 caster wheel"
  8 |       coulombFriction [
  9 |         0.01
 10 |       ]
 11 |     }
 12 |   ]
 13 | }
 14 | Viewpoint {
 15 |   orientation 0.00032259533168366796 0.7031169061809662 0.7110741959702646 3.1422306242127007
 16 |   position 0.032327726197468154 2.6940283690489477 -0.0019851336983633394
 17 |   follow "Khepera_IV"
 18 |   followType "None"
 19 | }
 20 | RectangleArena {
 21 |   floorSize 2 2
 22 |   floorTileSize 4 4
 23 |   floorAppearance Appearance {
 24 |     material Material {
 25 |       diffuseColor 0.2 0.2 0.2
 26 |     }
 27 |   }
 28 |   wallHeight 0.3
 29 | }
 30 | Background {
 31 |   skyColor [
 32 |     0.2 0.2 0.2
 33 |     0 0 0
 34 |     0 0 0
 35 |   ]
 36 | }
 37 | DirectionalLight {
 38 |   ambientIntensity 1
 39 |   direction 0 -1 0
 40 |   intensity 1.5
 41 | }
 42 | DEF K4 Khepera4 {
 43 |   name "Khepera_IV"
 44 |   controller "vpg_controller"
 45 |   controllerArgs "training"
 46 |   supervisor TRUE
 47 | }
 48 | DEF OBST_0 Solid {
 49 |   translation -0.4 0.04955854302621261 -0.3
 50 |   rotation -1.435231588108557e-17 1 -2.118236274989482e-18 0
 51 |   children [
 52 |     Shape {
 53 |       appearance Appearance {
 54 |         material Material {
 55 |           diffuseColor 0.8 0.72 0.529
 56 |         }
 57 |       }
 58 |       geometry Box {
 59 |         size 0.16 0.16 0.16
 60 |       }
 61 |     }
 62 |   ]
 63 |   boundingObject Box {
 64 |     size 0.15 0.1 0.15
 65 |   }
 66 | }
 67 | DEF OBST_1 Solid {
 68 |   translation 0.35 0.04955854302621261 0.59
 69 |   rotation 0.25163385176495073 0.9678225067882721 -5.028279834967654e-18 0
 70 |   children [
 71 |     Shape {
 72 |       appearance Appearance {
 73 |         material Material {
 74 |           diffuseColor 0.8 0.72 0.529
 75 |         }
 76 |       }
 77 |       geometry Box {
 78 |         size 0.16 0.16 0.16
 79 |       }
 80 |     }
 81 |   ]
 82 |   name "solid(1)"
 83 |   boundingObject Box {
 84 |     size 0.15 0.1 0.15
 85 |   }
 86 | }
 87 | DEF OBST_2 Solid {
 88 |   translation 0.26 0.04955854302621261 -0.75
 89 |   rotation 2.31139824055369e-15 1 -1.3894918185828773e-15 -0.005936323319313137
 90 |   children [
 91 |     Shape {
 92 |       appearance Appearance {
 93 |         material Material {
 94 |           diffuseColor 0.8 0.72 0.529
 95 |         }
 96 |       }
 97 |       geometry Box {
 98 |         size 0.16 0.16 0.16
 99 |       }
100 |     }
101 |   ]
102 |   name "solid(5)"
103 |   boundingObject Box {
104 |     size 0.15 0.1 0.15
105 |   }
106 | }
107 | DEF OBST_3 Solid {
108 |   translation 0.58 0.0495585430262156 0
109 |   rotation -7.621102877601795e-10 1 -2.02435756457418e-09 0
110 |   children [
111 |     Shape {
112 |       appearance Appearance {
113 |         material Material {
114 |           diffuseColor 0.8 0.72 0.529
115 |         }
116 |       }
117 |       geometry Box {
118 |         size 0.16 0.16 0.16
119 |       }
120 |     }
121 |   ]
122 |   name "solid(2)"
123 |   boundingObject Box {
124 |     size 0.15 0.1 0.15
125 |   }
126 | }
127 | DEF OBST_4 Solid {
128 |   translation -0.75 0.04955854302621261 0.46
129 |   rotation -2.7321952489371356e-17 1 1.7619336741772742e-17 0
130 |   children [
131 |     Shape {
132 |       appearance Appearance {
133 |         material Material {
134 |           diffuseColor 0.8 0.72 0.529
135 |         }
136 |       }
137 |       geometry Box {
138 |         size 0.16 0.16 0.16
139 |       }
140 |     }
141 |   ]
142 |   name "solid(3)"
143 |   boundingObject Box {
144 |     size 0.15 0.1 0.15
145 |   }
146 | }
147 | 


--------------------------------------------------------------------------------
/libraries/utils/khepera.cpp:
--------------------------------------------------------------------------------
  1 | #include "khepera.hpp"
  2 | 
  3 | static int time_step = 8; 
  4 | 
  5 | static const char *infrared_sensors_names[NUMBER_OF_INFRARED_SENSORS] = {
  6 |   // turret sensors
  7 |   "rear left infrared sensor", "left infrared sensor", "front left infrared sensor", "front infrared sensor",
  8 |   "front right infrared sensor", "right infrared sensor", "rear right infrared sensor", "rear infrared sensor",
  9 |   // ground sensors
 10 |   "ground left infrared sensor", "ground front left infrared sensor", "ground front right infrared sensor",
 11 |   "ground right infrared sensor"};
 12 | 
 13 | // Declare required device tags 
 14 | static WbDeviceTag leds[3]; 
 15 | static WbDeviceTag infrared_sensors[12];
 16 | static WbDeviceTag left_motor, right_motor;
 17 | 
 18 | 
 19 | int k4::init_robot(){
 20 |     wb_robot_init();
 21 | 
 22 |     time_step = (int)wb_robot_get_basic_time_step();
 23 | 
 24 |     // get and enable the infrared sensors
 25 |     for (int i = 0; i < 12; ++i) {
 26 |         infrared_sensors[i] = wb_robot_get_device(infrared_sensors_names[i]);
 27 |         wb_distance_sensor_enable(infrared_sensors[i], time_step);
 28 |     }
 29 | 
 30 |     // get the led actuators
 31 |     leds[0] = wb_robot_get_device("front left led"); 
 32 |     leds[1] = wb_robot_get_device("front right led");
 33 |     leds[2] = wb_robot_get_device("rear led");
 34 |     for (int i = 0; i < 3; ++i)
 35 |         wb_led_set(leds[i], 0xFFFFFF & rand());
 36 | 
 37 |     // get the motors and set target position to infinity (speed control)
 38 |     left_motor = wb_robot_get_device("left wheel motor");
 39 |     right_motor = wb_robot_get_device("right wheel motor");
 40 |     wb_motor_set_position(left_motor, INFINITY);
 41 |     wb_motor_set_position(right_motor, INFINITY);
 42 |     wb_motor_set_velocity(left_motor, 0.0);
 43 |     wb_motor_set_velocity(right_motor, 0.0);
 44 | 
 45 |     return time_step; 
 46 | }
 47 | 
 48 | int k4::khepera_running(){
 49 |     return (wb_robot_step(time_step) != -1) ? 1 : 0; 
 50 | }
 51 | 
 52 | void k4::set_motors_speed(double left, double right){
 53 |     wb_motor_set_velocity(left_motor, left);
 54 |     wb_motor_set_velocity(right_motor, right);
 55 | }
 56 | 
 57 | std::vector<double> k4::get_motors_speed(){
 58 |     std::vector<double> speed; 
 59 |     speed.push_back(wb_motor_get_velocity(left_motor));
 60 |     speed.push_back(wb_motor_get_velocity(right_motor));
 61 |     return speed; 
 62 | }
 63 | 
 64 | void k4::set_normalized_motors_speed(double left, double right){
 65 |     if(abs(left)>1 || abs(right)>1){
 66 |         std::cout << "ERROR in set_normalized_motors_speed: input must be in [-1,1]" <<std::endl; 
 67 |         set_motors_speed(0.0,0.0);
 68 |         return; 
 69 |     }
 70 |     set_motors_speed(left*MAX_SPEED*REDUCER,right*MAX_SPEED*REDUCER);
 71 | }
 72 | 
 73 | std::vector<double> k4::get_normalized_motors_speed(){
 74 |     std::vector<double> speed = get_motors_speed(); 
 75 |     speed[0] /= (MAX_SPEED*REDUCER);
 76 |     speed[1] /= (MAX_SPEED*REDUCER);
 77 |     return speed; 
 78 | }
 79 | 
 80 | double k4::normalize_range(double range){
 81 |     double offset = 50.0; 
 82 |     double value = (range - IR_MIN - offset)/(IR_MAX - IR_MIN - offset);
 83 |     if(value < 0.0)
 84 |         value = 0.0; 
 85 |     else if(value > 1.0)
 86 |         value = 1.0; 
 87 |     return value; 
 88 | }
 89 | 
 90 | double k4::get_range(int i){
 91 |     return wb_distance_sensor_get_value(infrared_sensors[i]);
 92 | }
 93 | 
 94 | std::vector<double> k4::get_ranges(){
 95 |     std::vector<double> ranges; 
 96 |     for(int i=0; i<NUMBER_OF_PERIMETER_IR_SENSORS; i++)
 97 |         ranges.push_back(wb_distance_sensor_get_value(infrared_sensors[i]));
 98 |     return ranges; 
 99 | }
100 | 
101 | std::vector<double> k4::get_normalized_ranges(){
102 |     std::vector<double> ranges; 
103 |     for(int i=0; i<NUMBER_OF_PERIMETER_IR_SENSORS; i++)
104 |         ranges.push_back(normalize_range(wb_distance_sensor_get_value(infrared_sensors[i])));
105 |     return ranges;
106 | }


--------------------------------------------------------------------------------
/controllers/vpg_controller/vpg_arbitrator.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once 
  2 | 
  3 | // Standard library 
  4 | #include <iostream>
  5 | #include <vector>
  6 | #include <memory>
  7 | #include <iomanip>
  8 | #include <random>
  9 | 
 10 | // Utils
 11 | #include <khepera.hpp>
 12 | #include <state.hpp>
 13 | #include <reward.hpp>
 14 | #include <training_handler.hpp>
 15 | 
 16 | // Network 
 17 | #include <vpg_wrapper.hpp>  
 18 | #include "vpg_parameters.hpp"
 19 | 
 20 | #include <global_parameters.hpp> 
 21 | 
 22 | #include <data_logger.hpp>
 23 | // Log file column names
 24 | const std::vector<std::string> log_columns = {"Rollout", "Num_episodes", 
 25 |             "Num_timesteps", "Cumulated_reward", "Max_loss", "Distance"};
 26 | 
 27 | /**
 28 |  * VPG_arbitrator orchestrates the learning and evaluation of the network and
 29 |  * handles state and reward computations. 
 30 |  */
 31 | class VPG_arbitrator{
 32 |     public:
 33 |         /**
 34 |          * Instantiate the robot. 
 35 |          */
 36 |         VPG_arbitrator();
 37 | 
 38 |         /**
 39 |          * Cleanup webots resources. 
 40 |          */
 41 |         ~VPG_arbitrator();
 42 | 
 43 |         /**
 44 |          * Step the robot once to let the sensors settle (executes STOP behavior)
 45 |          * @param steps (int) number of steps to idle for. 
 46 |          */
 47 |         int idle(int steps=1);
 48 | 
 49 |         /**
 50 |          * Step the robot. Action depends on mode (train/eval). 
 51 |          */
 52 |         int step();
 53 | 
 54 |         /**
 55 |          * Reset the arbitrator. 
 56 |          */
 57 |         void reset();
 58 | 
 59 |         /**
 60 |          * Detect task completion
 61 |          * @return (bool) indicating wether task is complete or not. 
 62 |          */
 63 |         bool detect_task_completion();
 64 | 
 65 |         /**
 66 |          * Build the episodes that are required to train the network.
 67 |          * @param action (int) Action taken at time t.  
 68 |          */
 69 |         void build_episodes(std::vector<float> action);
 70 | 
 71 |         /**
 72 |          * Build the final epsiode containing the final reward.
 73 |          */
 74 |         void build_final_episode();
 75 | 
 76 |         /**
 77 |          * Transfer data between current and previous episodes for next iteration. 
 78 |          */
 79 |         void roll_episodes();
 80 | 
 81 |         /**
 82 |          * Set the arbitrator in evaluation or training mode. 
 83 |          * @param evaluating (bool) evaluation mode if true, training mode otherwise (default). 
 84 |          * @param verbose (bool) print the action probabilities after evaluating the state when in evaluation mode. 
 85 |          */
 86 |         void mode(bool evaluating = true, bool verbose = false);
 87 | 
 88 |         /**
 89 |          * Train the network on the precedent roll out.  
 90 |          */
 91 |         void train();
 92 | 
 93 |         /**
 94 |          * Get the current timestamp of the arbutrator. 
 95 |          */
 96 |         int get_timestamp();
 97 | 
 98 |         /**
 99 |          * Get the current rollout count. 
100 |          */
101 |         int get_rollout_count();
102 | 
103 |         /**
104 |          * Tells if in training mode or eval mode. 
105 |          * @return (bool) training
106 |          */
107 |         bool is_training();
108 | 
109 |         /**
110 |          * Retreive all the stats relative to the rollout. 
111 |          * @return (std::vector<std::string>) vector of values. 
112 |          */
113 |         std::vector<std::string> gather_log_data();
114 |     
115 |     protected:  
116 | 
117 |     private:
118 |         int _training_step(); 
119 |         int _eval_step();
120 |         int m_timestamp;
121 |         int m_rollout_count;
122 |         int m_rollout_local;
123 |         bool m_is_training; // default is true
124 |         bool m_verbose_eval;
125 |         std::vector<float> m_current_action; // Left and right wheels velocity 
126 |         State m_last_states;
127 |         Reward m_reward;
128 |         VPG_episode m_current_episode;
129 |         VPG_episode m_previous_episode; 
130 | };


--------------------------------------------------------------------------------
/libraries/utils/data_logger.cpp:
--------------------------------------------------------------------------------
  1 | #include "data_logger.hpp"
  2 | 
  3 | char* _string_to_char(std::string s){
  4 |     char* p = new char[s.length()+1];
  5 |     memcpy(p, s.c_str(), s.length()+1);
  6 |     return p;
  7 | }
  8 | 
  9 | bool _dir_exists(std::string path){
 10 |     struct stat sb;
 11 |     return (stat(_string_to_char(path), &sb) == 0 && S_ISDIR(sb.st_mode));
 12 | }
 13 | 
 14 | bool _file_exists(const std::string& file) {
 15 |     struct stat buffer;   
 16 |     return (stat (file.c_str(), &buffer) == 0); 
 17 | }
 18 | 
 19 | int create_log(std::string dir, std::string file, std::vector<std::string> headers){
 20 |     if(_file_exists(file))
 21 |         return 0;
 22 |     int r = 0;
 23 |     std::string command = "";
 24 |     // Create directory
 25 |     if(dir != "./" || dir == ""){
 26 |         command = "mkdir -p " + dir;
 27 |         r = system(_string_to_char(command));
 28 |     }
 29 |     command = "touch " + file;
 30 |     r = system(_string_to_char(command));
 31 | 
 32 |     write_line(file, headers);
 33 |      
 34 |     return r; 
 35 | }
 36 | 
 37 | void write_line(std::string file, std::vector<std::string> values){
 38 |     try{
 39 |         std::ofstream outfile;
 40 |         outfile.open(file, std::ios::app); // append
 41 |         
 42 |         std::string line = "";
 43 |         for(uint i=0; i<values.size(); i++){
 44 |             line += values.at(i);
 45 |             if(i<values.size()-1)
 46 |                 line += ",";
 47 |         }
 48 |         outfile << line << std::endl; 
 49 |         outfile.close();
 50 |     }catch(const std::exception& e){
 51 |         const std::string exception = "\n\033[1;33mFollowing exception occured:\033[0m\n";
 52 |         std::cout << exception << e.what() << std::endl;
 53 |     }
 54 | }
 55 | void write_line(std::string file, std::vector<double> values){
 56 |     std::vector<std::string> str_values;
 57 |     for(uint i=0; i<values.size(); i++){
 58 |         str_values.push_back(std::to_string(values[i]));
 59 |     }
 60 |     write_line(file, str_values);
 61 | }
 62 | 
 63 | int read_last_rollout(std::string file){
 64 |     if(!_file_exists(file))
 65 |         return 0;
 66 |     
 67 |     // Find the last line
 68 |     std::ifstream fin;
 69 |     fin.open(file);
 70 |     if(fin.is_open()){
 71 |         uint num_lines = 0;
 72 |         // Find the last line
 73 |         std::string prev_line = "";
 74 |         std::string line = "";
 75 |         while(! fin.eof()){
 76 |             prev_line = line;
 77 |             std::getline(fin, line);
 78 |             num_lines++;
 79 |         }
 80 |         fin.close();
 81 |         if(line == "")
 82 |             line = prev_line;
 83 | 
 84 |         if(num_lines == 2){
 85 |             // There is only a header. No rollout logged yet. 
 86 |             return 0;
 87 |         }
 88 | 
 89 |         // Extract the first value
 90 |         char del = ',';
 91 |         int c = 0;
 92 |         for(uint i=0; i<line.length(); i++){
 93 |             if(line[i] == del){
 94 |                 c = i;
 95 |                 break;
 96 |             }
 97 |         }
 98 |         return std::stoi(line.substr(0,c));
 99 |     }
100 |     return 0;
101 | }
102 | 
103 | int read_last_particle(std::string file){
104 |     if(!_file_exists(file))
105 |         return 0;
106 |     
107 |     // Find the last line
108 |     std::ifstream fin;
109 |     fin.open(file);
110 |     if(fin.is_open()){
111 |         uint num_lines = 0;
112 |         // Find the last line
113 |         std::string prev_line = "";
114 |         std::string line = "";
115 |         while(! fin.eof()){
116 |             prev_line = line;
117 |             std::getline(fin, line);
118 |             num_lines++;
119 |         }
120 |         fin.close();
121 |         if(line == "")
122 |             line = prev_line;
123 | 
124 |         if(num_lines == 2){
125 |             // There is only a header. No rollout logged yet. 
126 |             return 0;
127 |         }
128 | 
129 |         // Extract the last evaluated particle
130 |         // Find second and third delimiter locations
131 |         char del = ',';
132 |         uint del_count = 0;
133 |         uint first_del = 0;
134 |         uint sec_del = 0;
135 |         for(uint i=0; i<line.length(); i++){
136 |             if(line[i] == del){
137 |                 del_count++;
138 |                 if(del_count == 2)
139 |                     first_del = i+1;
140 |                 else if (del_count == 3){
141 |                     sec_del = i;
142 |                     break;
143 |                 }
144 |                 
145 |             }
146 |         }
147 |         return std::stoi(line.substr(first_del,sec_del));
148 |     }
149 |     return 0;
150 | }


--------------------------------------------------------------------------------
/log_analyzer/Analyzer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import pandas as pd
  3 | from matplotlib import pyplot as plt 
  4 | 
  5 | 
  6 | class Analyzer:
  7 |     '''
  8 |     Base class for log analysis. 
  9 |     It provides tools for rapid analysis and plotting of the data. 
 10 |     '''
 11 |     def __init__(self, loc : str, file : str):
 12 |         ''' Init class members '''
 13 |         self.m_file_name = file 
 14 |         self.m_loc_name = loc
 15 |         self.read_log()
 16 |     
 17 |     ### Pandas -> #####################################################
 18 | 
 19 |     def read_log(self):
 20 |         self.m_data_df = pd.read_csv(self.m_loc_name+self.m_file_name, sep=',')
 21 |     
 22 |     def col_to_int_(self, columns:list):
 23 |         ''' Convert provided columns to int (in place). '''
 24 |         for c in columns:
 25 |             self.m_data_df[c] = self.m_data_df[c].apply(lambda x : int(x))
 26 | 
 27 |     def compute_quantity_rate(self, quantity:str, sample_dim=100):
 28 |         ''' Return the rates of a given quantity (DataFrame column) '''
 29 |         results = np.array(self.m_data_df[quantity].values)
 30 |         dim = len(results)
 31 |         rate = []
 32 |         for i in range(0,dim,sample_dim):
 33 |             if(i+sample_dim > dim):
 34 |                 # Ignore computation for subset smaller than sample_dim
 35 |                 break 
 36 |             sample = results[i:i+sample_dim]
 37 |             rate.append(np.sum(sample)/len(sample))
 38 |         return rate
 39 |     
 40 |     def compute_moving_average(self, quantity:str, sample_dim=100):
 41 |         ''' Return the rates of a given quantity (DataFrame column) as a moving average '''
 42 |         results = np.array(self.m_data_df[quantity].values)
 43 |         dim = len(results)
 44 |         rate = []
 45 |         for i in range(0,dim-sample_dim,1):
 46 |             #print('{} % done'.format(i/dim),end='\r')
 47 |             if(i+sample_dim > dim):
 48 |                 # Ignore computation for subset smaller than sample_dim
 49 |                 break 
 50 |             sample = results[i:i+sample_dim]
 51 |             rate.append(np.sum(sample)/len(sample))
 52 |         return rate
 53 | 
 54 |     def estimated_run_time(self):
 55 |         c = 'Num_timesteps'
 56 |         T = 0.008/10 # 8 ms, accelerated by ~23 ()fast mode
 57 |         return self.m_data_df[c].sum() * T / 3600 # Estimated run time in hour
 58 |         
 59 |     def show_head(self):
 60 |         print(self.m_data_df.head())
 61 | 
 62 |     ### -> Pandas / Pyplot -> ##########################################
 63 |     
 64 |     def _sub_plot_single(self, ax:plt.axis, column:str, legend=None):
 65 |         '''
 66 |         Create a sub-plot for a given column. 
 67 | 
 68 |         Parameters
 69 |         ----------
 70 |         ax : :plt.axis
 71 |             figure axis
 72 |         
 73 |         column : str
 74 |             column name
 75 |         '''
 76 |         if legend == None:
 77 |             ax.plot(self.m_data_df[column],linestyle='-',linewidth=0.1)
 78 |         else:
 79 |             ax.plot(self.m_data_df[column],linestyle='-',linewidth=0.1,label=legend)
 80 |         
 81 |         ax.set_xlabel('Rollouts')
 82 |         if False and column == 'Num_timesteps':
 83 |             column += ' (approx. run time: {0:.2f} hours)'.format(self.estimated_run_time())
 84 |         ax.set_title(column) 
 85 | 
 86 |     def _sub_plot_single_from_array(self, ax:plt.axis, data:np.array, x_title:str, title:str, legend=None):
 87 |         '''
 88 |         Create a sub-plot for a given column. 
 89 | 
 90 |         Parameters
 91 |         ----------
 92 |         ax : plt.axis
 93 |             figure axis
 94 |         
 95 |         data : np.array
 96 |             data to plot
 97 |         '''
 98 |         if legend == None:
 99 |             ax.plot(data,linestyle='-',linewidth=1)
100 |         else:
101 |             ax.plot(data,linestyle='-',linewidth=1,label=legend)
102 |         ax.set_xlabel(x_title)
103 |         ax.set_title(title) 
104 | 
105 |     def _sub_plot_multiple(self, axis:plt.axis, columns:tuple):
106 |         '''
107 |         Create a sub-plot for the given columns. 
108 | 
109 |         Parameters
110 |         ----------
111 |         ax : plt.axis
112 |             figure axis
113 |         columns : tuple
114 |             column names
115 |         '''
116 |         title = ''
117 |         for col in columns:
118 |             axis.plot(self.m_data_df[col], label=col,linestyle='-',linewidth=0.1)
119 |             title += col + ', '
120 |         axis.set_title(title) 
121 |         axis.set_xlabel('Rollouts')
122 |         axis.legend()
123 | 
124 |     def _sub_plot_multiple_twin(self, axis:plt.axis, columns:tuple):
125 |         '''
126 |         Create a sub-plot for the given columns. 
127 |         Create a twin x-axis with new y scale for the second column. 
128 | 
129 |         Parameters
130 |         ----------
131 |         ax : plt.axis
132 |             figure axis
133 |         column : tuple
134 |             2 column names
135 |         twin : bool
136 |             
137 |         '''
138 |         # PLot first column
139 |         axis.plot(self.m_data_df[columns[0]], label=columns[0], color='tab:blue',linestyle='-',linewidth=0.1)
140 |         axis.tick_params(axis='y', labelcolor='tab:blue')
141 |         axis.set_xlabel('Rollouts')
142 |         axis.set_ylabel(columns[0])
143 |         axis.set_title(columns[0] + ' ~ ' + columns[1])
144 | 
145 |         # PLot second column 
146 |         ax = axis.twinx()
147 |         ax.plot(self.m_data_df[columns[1]], label=columns[1], color='tab:red',linestyle='-',linewidth=1)
148 |         ax.set_ylabel(columns[1])
149 |         ax.tick_params(axis='y', labelcolor='tab:red')
150 | 
151 |     def show(self):
152 |         plt.tight_layout()
153 |         plt.show()
154 | 
155 |     def save(self, file = "fig.png"):
156 |         plt.savefig(file, bbox_inches='tight') 
157 |     
158 |     def close_all(self):
159 |         plt.close('all')
160 | 
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/unit_test/vpg_test.cpp:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <iostream>
  3 | #include <vector>
  4 | #include <vpg_wrapper.hpp>
  5 | #include <global_parameters.hpp>
  6 | #include <vpg_parameters.hpp>
  7 | 
  8 | static const std::string passed = "[\033[1;4;32mPASSED\033[0m]\n";
  9 | static const std::string failed = "[\033[1;4;31mFAILED\033[0m]\n";
 10 | static const std::string exception = "\n\033[1;33mFollowing exception occured:\033[0m\n";
 11 | 
 12 | void store_some_episodes(){
 13 |     VPG_episode e0;
 14 |     VPG_episode e1;
 15 |     VPG_episode e2;
 16 | 
 17 |     e0.action = {0,0};
 18 |     e1.action = {0.5,-0.5};
 19 |     e2.action = {1,1};
 20 | 
 21 |     e0.reward = 0.01;
 22 |     e1.reward = 0.02;
 23 |     e2.reward = 0.03;
 24 | 
 25 |     std::vector<double> state0(STATE_DIM, 1.);
 26 |     std::vector<double> state1(STATE_DIM, 0.);
 27 |     std::vector<double> state2(STATE_DIM, 0.5);
 28 |     e0.state = state0;
 29 |     e1.state = state1;
 30 |     e2.state = state2;
 31 |     e0.state_next = state2;
 32 |     e1.state_next = state1;
 33 |     e2.state_next = state0;
 34 | 
 35 |     // Push the episodes "randomly"
 36 |     vpg_w::store_episode(e0);
 37 |     vpg_w::store_episode(e1);
 38 |     vpg_w::store_episode(e2);
 39 | }
 40 | 
 41 | void test_memory_handling(){
 42 |     store_some_episodes();
 43 |     std::cout << "Memory storing test: " + (vpg_w::get_memory_size() != 3 ? failed : passed);
 44 | }
 45 | 
 46 | void test_reward_to_go(){
 47 |     bool success = true;
 48 |     double tol = 1e-10;
 49 |     vpg_w::reset_memory();
 50 |     store_some_episodes();
 51 |     
 52 |     // Test the reward_to_go computation
 53 |     vpg_w::compute_reward_to_go();
 54 |     const std::vector<VPG_episode> memory = vpg_w::get_memory();
 55 |     
 56 |     //std::cout << memory[0].reward_to_go << " " << memory[1].reward_to_go << " " << memory[2].reward_to_go << " "  << std::endl;
 57 |     std::cout << "Reward to go test: " + (success &&
 58 |         (abs(memory[0].reward_to_go - 0.01 - GAMMA*0.02 - GAMMA*GAMMA*0.03)>tol  || 
 59 |          abs(memory[1].reward_to_go - 0.02 - GAMMA*0.03)>tol || 
 60 |          abs(memory[2].reward_to_go - 0.03)>tol) ? failed : passed);
 61 | }
 62 | 
 63 | void test_eval_policy(){
 64 |     std::cout << "Policy evaluation test: " << std::flush;
 65 |     std::vector<float> p_actions;
 66 |     bool success = true;
 67 | 
 68 |     // Create a dummy state
 69 |     std::vector<double> v(STATE_DIM,0.);
 70 |     v.at(3) = 1.;
 71 |     v.at(2) = 1.;
 72 | 
 73 |     // Run a forward pass
 74 |     try{
 75 |         p_actions = vpg_w::eval_policy(v);
 76 |     }catch(const std::exception& e){
 77 |         success = false;
 78 |         std::cout << exception << e.what() << std::endl;
 79 |     }
 80 | 
 81 |     std::cout << (success ? passed : failed);
 82 | }
 83 | 
 84 | void test_value_function(){
 85 |     std::cout << "Value function test: " << std::flush;
 86 |     bool success = true;
 87 |     float value = 0.1234;
 88 |     std::vector<double> state(STATE_DIM,0.);
 89 |     state[0] = 1.;
 90 |     state[1] = 1.;
 91 | 
 92 |     try{
 93 |         value = vpg_w::eval_value_function(state);
 94 |     }catch(const std::exception& e){
 95 |         success = false;
 96 |         std::cout << exception << e.what() << std::endl;
 97 |     }
 98 |     if(value == 0.1234) success = false;
 99 | 
100 |     std::cout << (success ? passed : failed);
101 | }
102 | 
103 | void test_model_saving(std::string path, std::string file){
104 |     std::cout << "Saving test: " << std::flush; 
105 |     int success = true;
106 |     try{
107 |         vpg_w::save(path,file,POLICY);
108 |     }catch(const std::exception& e){
109 |         success = false;
110 |         std::cout << exception << e.what() << std::endl;
111 |     }
112 |     std::cout << (success ? passed : failed);
113 | }
114 | 
115 | void test_model_loading(std::string file){
116 |     std::cout << "Loading test: " << std::flush; 
117 |     int success = true;
118 |     try{
119 |         vpg_w::load(file,POLICY);
120 |     }catch(const std::exception& e){
121 |         success = false;
122 |         std::cout << exception << e.what() << std::endl;
123 |     }
124 |     std::cout << (success ? passed : failed);
125 | }
126 | 
127 | void test_training(){
128 |     bool success = true;
129 | 
130 |     // Empty the memory
131 |     vpg_w::reset_memory();
132 |     // Push some new episodes
133 |     store_some_episodes();
134 | 
135 |     // Run a training sequence
136 |     try{
137 |         vpg_w::train();
138 |     }catch(const std::exception& e){
139 |         success = false;
140 |         std::cout << exception << e.what() << std::endl;
141 |     }
142 |     std::cout << "Training test: " + (success ? passed : failed);
143 | }
144 | 
145 | void test_training_heavy(){
146 |     std::cout << "Heavy training test: " << std::flush;
147 |     bool success = true;
148 | 
149 |     // Empty the memory
150 |     vpg_w::reset_memory();
151 |     // Prepare a dummy episode
152 |     VPG_episode e0;
153 |     e0.action = {0,0};
154 |     e0.reward = 0.01;
155 |     std::vector<double> state0(STATE_DIM, 1.);
156 |     e0.state = state0;
157 |     e0.state_next = state0;
158 |     // Store it N times
159 |     for(int i=0; i<1000; i++){
160 |         vpg_w::store_episode(e0);
161 |     }
162 |     vpg_w::compute_reward_to_go();
163 | 
164 |     // Run a training sequence
165 |     try{
166 |         vpg_w::train(100);
167 |     }catch(const std::exception& e){
168 |         success = false;
169 |         std::cout << exception << e.what() << std::endl;
170 |     }
171 |     std::cout << (success ? passed : failed);
172 | }
173 | 
174 | int main(int argc, char* argv[]){
175 | 
176 |     // Initilize the network
177 |     vpg_w::init();
178 | 
179 |     // Test the memory for episodes handling 
180 |     test_memory_handling();
181 | 
182 |     test_reward_to_go();
183 | 
184 |     // Test the proba generation
185 |     test_eval_policy();
186 | 
187 |     // Test value function evaluation
188 |     test_value_function();
189 | 
190 |     // Test loading and saving the model
191 |     system("rm -r model");
192 |     test_model_loading("model/vpg_policy");
193 |     test_model_saving("model/","model/vpg_policy");
194 |     test_model_loading("model/vpg_policy");
195 | 
196 |     // Test vector constructor's properties
197 |     /*std::vector<int> v1 = {0,1,2,3,4};
198 |     std::vector<int> v2 = std::vector<int>(v1.begin(), v1.begin()+v1.size());
199 |     std::cout << "v1 == v2 ? " << (v1==v2) << std::endl; // true
200 |     for(int i=0; i<v2.size(); i++)
201 |         std::cout << v2[i];
202 |     std::cout << std::endl;*/
203 | 
204 |     // Test the training procedure
205 |     test_training();
206 | 
207 |     // Test the training procedure with lots of epsiodes
208 |     test_training_heavy();
209 | 
210 |     return 0;
211 | }
212 | 


--------------------------------------------------------------------------------
/controllers/vpg_controller/vpg_arbitrator.cpp:
--------------------------------------------------------------------------------
  1 | #include "vpg_arbitrator.hpp"
  2 | 
  3 | VPG_arbitrator::VPG_arbitrator(){
  4 | 
  5 |     // Robot Initialization
  6 |     k4::init_robot(); 
  7 | 
  8 |     // Initialize the time stamp
  9 |     this->m_timestamp = 0; 
 10 | 
 11 |     // Initilaize the current action
 12 |     this->m_current_action = {0.0,0.0};
 13 |     
 14 |     // Set invalide time to episodes
 15 |     this->m_current_episode.t = -1; 
 16 |     this->m_previous_episode.t = -1;
 17 | 
 18 |     // Default mode is training 
 19 |     this->m_is_training = true;
 20 |     this->m_verbose_eval = false;
 21 | 
 22 |     // Init the reward
 23 |     this->m_reward = Reward();
 24 | 
 25 |     // Initialize the wrapper for the network
 26 |     vpg_w::init(); 
 27 | 
 28 |     // Create the log file if not existing 
 29 |     create_log(LOG_PATH, LOG_FILE, log_columns);
 30 | 
 31 |     // Initialize the rollout count
 32 |     std::cout << "Reading the log. " << std::endl;
 33 |     this->m_rollout_count = read_last_rollout(LOG_FILE);
 34 |     this->m_rollout_local = 0;
 35 | }
 36 | 
 37 | VPG_arbitrator::~VPG_arbitrator(){}
 38 | 
 39 | int VPG_arbitrator::idle(int steps){
 40 |     int status = 1;
 41 |     for(int i=0; i<steps && status; i++){
 42 |         k4::set_normalized_motors_speed(this->m_current_action[0]*2.0-1.0,
 43 |                                         this->m_current_action[1]*2.0-1.0);
 44 |         status = k4::khepera_running();
 45 |     }
 46 |     return status;
 47 | }
 48 | 
 49 | int VPG_arbitrator::_training_step(){
 50 | 
 51 |     // Handle number of rollouts
 52 |     if(this->m_rollout_count >= ROLLOUTS){
 53 |         // All rollouts executed, stop controller
 54 |         std::cout << "All rollouts executed. Terminating." << std::endl;
 55 |         return 0;
 56 |     }
 57 |     
 58 |     // Check if time out is met or if collision occured
 59 |     bool timed_out = (TIMEOUT <= this->get_timestamp()*WAIT_STEPS*((double)ROBOT_BASIC_TIMESTEP)*0.001);
 60 |     bool collided = detect_collision();
 61 | 
 62 |     if(timed_out || collided){
 63 | 
 64 |         std::cout << "Time stamp: " << m_timestamp << ", duration in seconds: " 
 65 |             << (double)m_timestamp*WAIT_STEPS*(double)ROBOT_BASIC_TIMESTEP/1000.0 << std::endl;
 66 | 
 67 |         // Store the episode with the final reward
 68 |         this->build_final_episode();
 69 |         vpg_w::store_episode(this->m_previous_episode);
 70 | 
 71 |         this->m_rollout_count++;
 72 |         this->m_rollout_local++;
 73 | 
 74 |         std::cout << "Terminated rollout [" << this->m_rollout_count << "/" << ROLLOUTS << "]"  << std::endl;
 75 |         vpg_w::compute_reward_to_go();
 76 |         this->train();
 77 |         
 78 |         vpg_w::save(MODEL_PATH,MODEL_POLICY_FILE,POLICY);
 79 |         vpg_w::save(MODEL_PATH,MODEL_VALUE_FILE, VALUE);
 80 |         write_line(LOG_FILE, VPG_arbitrator::gather_log_data());
 81 |         
 82 |         this->reset();
 83 | 
 84 |         supervisor_move_robot();
 85 |         wb_supervisor_simulation_reset_physics(); 
 86 |     }
 87 | 
 88 |     // Increment timestamp for next call 
 89 |     this->m_timestamp++;
 90 |     
 91 |     // Exploit policy
 92 |     this->m_current_action = vpg_w::eval_policy(this->m_last_states.get_state(CURRENT_STATE));
 93 |     //std::cout << "New state encountered, trying action: " << this->m_current_action << std::endl; 
 94 | 
 95 |     // Apply action (make sure it is properly applied)
 96 |     k4::set_normalized_motors_speed(this->m_current_action[0]*2.0-1.0,
 97 |                                     this->m_current_action[1]*2.0-1.0);
 98 |     
 99 |     // Build episode 
100 |     this->build_episodes(this->m_current_action); 
101 | 
102 |     // Send the previous_state if valid to the network implementation for storing
103 |     if(this->m_previous_episode.t > -1){
104 |         vpg_w::store_episode(m_previous_episode);
105 |     }
106 | 
107 |     // Roll episodes for next step
108 |     this->roll_episodes();
109 | 
110 |     // Add to travelled distance
111 |     measure_travelled_distance(this->m_current_action[0]*2.0-1.0,
112 |                                 this->m_current_action[1]*2.0-1.0);
113 | 
114 |     return k4::khepera_running(); 
115 | }
116 | 
117 | int VPG_arbitrator::_eval_step(){
118 |     // Increment timestamp for next call 
119 |     this->m_timestamp++;
120 | 
121 |     // Run inference on the network 
122 |     this->m_current_action = vpg_w::eval_policy(m_last_states.get_state(CURRENT_STATE));
123 | 
124 |     // Print the action probabilities and the selected action
125 |     if(this->m_verbose_eval){
126 |         std::cout << std::fixed;
127 |         std::cout << std::setprecision(2);
128 |         std::cout << "State: " << this->m_last_states.to_string(CURRENT_STATE) << std::endl;
129 |         std::cout << "Action: ";
130 |         for(uint i=0; i<ACTION_DIM; i++){
131 |             std::cout << this->m_current_action[i] << ",";
132 |         }
133 |         if(USE_VALUE_FUNCTION)
134 |             std::cout << " - state value: " << vpg_w::eval_value_function(m_last_states.get_state(CURRENT_STATE));
135 |         std::cout << std::endl;
136 |     } 
137 | 
138 |     // Apply action
139 |     k4::set_normalized_motors_speed(this->m_current_action[0]*2.0-1.0, 
140 |                                     this->m_current_action[1]*2.0-1.0);
141 | 
142 |     return k4::khepera_running();
143 | }
144 | 
145 | int VPG_arbitrator::step(){
146 | 
147 |     // Set the query period 
148 |     for(int i=0; i<WAIT_STEPS; i++){
149 |         if(!k4::khepera_running())
150 |             return 0;
151 |     }
152 | 
153 |     // Observe current state
154 |     this->m_last_states.step();
155 |     
156 |     // Get current reward
157 |     this->m_reward.compute_reward(this->m_last_states.get_states(), this->m_timestamp);
158 |     
159 |     if(this->m_is_training)
160 |         return this->_training_step();
161 |     else
162 |         return this->_eval_step(); 
163 | }
164 | 
165 | void VPG_arbitrator::reset(){
166 |     // Initialize the time stamp
167 |     this->m_timestamp = 0; 
168 |     // Initialize the current action 
169 |     this->m_current_action = {0.0,0.0};
170 |     // Set invalide time to episodes
171 |     this->m_current_episode.t = -1;
172 |     this->m_previous_episode.t = -1;
173 | 
174 |     this->m_last_states.reset();
175 | 
176 |     this->m_reward.reset();
177 | 
178 |     reset_distance(); 
179 | 
180 |     vpg_w::reset_memory();
181 |     vpg_w::reset_max_loss(); 
182 | }
183 | 
184 | void VPG_arbitrator::build_episodes(std::vector<float> action){
185 |     this->m_current_episode.t = this->m_timestamp;
186 |     this->m_current_episode.state = this->m_last_states.get_state(CURRENT_STATE);
187 |     std::vector<double> to_double(action.begin(), action.end());
188 |     this->m_current_episode.action = to_double; 
189 |     this->m_previous_episode.reward = this->m_reward.get_reward();
190 |     this->m_previous_episode.state_next = this->m_last_states.get_state(CURRENT_STATE);
191 | }
192 | 
193 | void VPG_arbitrator::build_final_episode(){
194 |     this->m_previous_episode.reward = this->m_reward.get_reward();
195 |     this->m_previous_episode.state_next = this->m_last_states.get_state(CURRENT_STATE);
196 | }
197 | 
198 | void VPG_arbitrator::roll_episodes(){
199 |     this->m_previous_episode.t = this->m_timestamp;
200 |     this->m_previous_episode.state = this->m_last_states.get_state(CURRENT_STATE);
201 |     this->m_previous_episode.action = this->m_current_episode.action; 
202 | }
203 | 
204 | void VPG_arbitrator::mode(bool evaluating, bool verbose){
205 |     this->m_is_training = !evaluating;
206 |     this->m_verbose_eval = verbose;
207 | }
208 | 
209 | void VPG_arbitrator::train(){
210 |     vpg_w::train();
211 | }
212 | 
213 | int VPG_arbitrator::get_timestamp(){
214 |     return this->m_timestamp;
215 | }
216 | 
217 | int VPG_arbitrator::get_rollout_count(){
218 |     return this->m_rollout_count;
219 | }
220 | 
221 | bool VPG_arbitrator::is_training(){
222 |     return this->m_is_training;
223 | }
224 | 
225 | // {"Rollout", "Num_episodes", "Num_timesteps", "Cumulated_reward", "Max_loss", "Distance"}
226 | 
227 | std::vector<std::string> VPG_arbitrator::gather_log_data(){
228 |     std::vector<std::string> line;
229 |     // Get the rollout info
230 |     line.push_back(std::to_string(this->m_rollout_count));
231 |     // Get the number of used episodes
232 |     line.push_back(std::to_string(vpg_w::get_memory_size()));
233 |     // Get the number of timesteps
234 |     line.push_back(std::to_string(this->m_timestamp));
235 |     // Get the cumulated reward
236 |     line.push_back(std::to_string(this->m_reward.get_cumulated_reward()));
237 |     // Get the maximum loss during rollout
238 |     line.push_back(std::to_string(vpg_w::get_max_loss()));
239 |     // Get the travelled distance 
240 |     line.push_back(std::to_string(get_distance()));
241 |     return line;
242 | }
243 | 


--------------------------------------------------------------------------------
/libraries/networks/vpg_network/vpg_network.cpp:
--------------------------------------------------------------------------------
  1 | #include "vpg_network.hpp"
  2 | #include "vpg_wrapper.hpp"
  3 | 
  4 | /////////////////////////////////////////////////////////////////////
  5 | // WRAPPER METHODS IMPLEMENTATION
  6 | /////////////////////////////////////////////////////////////////////
  7 | 
  8 | void vpg_w::init(){
  9 | 
 10 |     //vpg::seed_generator();
 11 | 
 12 |     // Instantiate the models
 13 |     if(!vpg_w::load(MODEL_POLICY_FILE,POLICY)){
 14 |         vpg::policy_model = vpg::Policy();
 15 |     }
 16 |     if(!vpg_w::load(MODEL_VALUE_FILE,VALUE)){
 17 |         vpg::value_model = vpg::Value();
 18 |     }
 19 | 
 20 |     // Set the uninitialised policy optimizer pointer
 21 |     static torch::optim::Adam p_adam(vpg::policy_model->parameters(), 
 22 |         torch::optim::AdamOptions(1e-3)); // .beta1(0.5)
 23 |     vpg::p_optimizer = &p_adam;
 24 | 
 25 |     // Set the uninitialised value function optimizer pointer
 26 |     static torch::optim::Adam v_adam(vpg::value_model->parameters(), 
 27 |         torch::optim::AdamOptions(1e-4)); // .beta1(0.5)
 28 |     vpg::v_optimizer = &v_adam;
 29 | 
 30 |     // Set the maximum loss to zero
 31 |     vpg_w::reset_max_loss();
 32 | }
 33 | 
 34 | void vpg_w::store_episode(VPG_episode episode){
 35 |     vpg::memory.push_back(episode);
 36 | }
 37 | 
 38 | void vpg_w::compute_reward_to_go(){
 39 |     double rtg = 0.;
 40 |     if(vpg::memory.empty())
 41 |         return;
 42 |     for(uint i=vpg::memory.size()-1; i>=0; i--){
 43 |         rtg = vpg::memory[i].reward + GAMMA*rtg;
 44 |         vpg::memory[i].reward_to_go = rtg;
 45 |         if(i==0) break;
 46 |     }
 47 | }
 48 | 
 49 | uint vpg_w::get_memory_size(){
 50 |     return vpg::memory.size();
 51 | }
 52 | 
 53 | const std::vector<VPG_episode>& vpg_w::get_memory(){
 54 |     return vpg::memory;
 55 | }
 56 | 
 57 | void vpg_w::reset_memory(){
 58 |     vpg::memory.clear();
 59 | }
 60 | 
 61 | std::vector<float> vpg_w::eval_policy(std::vector<double> state){
 62 |     torch::Tensor proba = vpg::policy_model->forward(
 63 |                 torch::_cast_Float(vpg::convert_state_to_batch(state)));
 64 |     std::vector<float> action(ACTION_DIM, 0.0F);
 65 |     for(uint i=0; i<ACTION_DIM; i++){
 66 |         action.at(i) = proba[0][i].item().toFloat();
 67 |     }
 68 |     return action;
 69 | }
 70 | 
 71 | float vpg_w::eval_value_function(std::vector<double> state){
 72 |     return vpg::value_model->forward(torch::_cast_Float(vpg::convert_state_to_batch(state))).item().toFloat();
 73 | }
 74 | 
 75 | void vpg_w::train(uint batch_size){
 76 |     if(vpg::memory.size() == 0){
 77 |         std::cout << "\033[1;33m[WARNING] memory empty, training aborted.\033[0m" << std::endl; 
 78 |         return;
 79 |     }
 80 | 
 81 |     // Clear the gradients of the models' parameters
 82 |     vpg::policy_model->zero_grad();
 83 |     vpg::value_rollout_count++; 
 84 |     if(vpg::value_rollout_count > VALUE_UPDATE_RATE){
 85 |         vpg::value_rollout_count = 0;
 86 |         vpg::value_model->zero_grad();
 87 |     }
 88 | 
 89 |     // Split the memory into batches and compute the gradients on each batch sequentially 
 90 |     for(uint i=0 ;; i++){
 91 |         //std::cout << "Processing batch: " << i << std::endl;
 92 |         if(batch_size >= vpg::memory.size()){ // Batch size is at least as large as the memory
 93 |             vpg::memory_batch = std::vector<VPG_episode>(vpg::memory.begin(), vpg::memory.end());
 94 |             vpg::compute_gradients();
 95 |             break;
 96 |         }else if((i+1)*batch_size <= vpg::memory.size()){ // Extract a batch from the memory
 97 |             vpg::memory_batch = std::vector<VPG_episode>(vpg::memory.begin() + i*batch_size, vpg::memory.begin() + (i+1)*batch_size);
 98 |             vpg::compute_gradients();
 99 |         }else if(i*batch_size < vpg::memory.size() && (i+1)*batch_size >= vpg::memory.size()){ // If the last batch overshoots the memory's end
100 |             vpg::memory_batch = std::vector<VPG_episode>(vpg::memory.begin() + i*batch_size, vpg::memory.end());
101 |             vpg::compute_gradients();
102 |             break;
103 |         }else{ // Reached end of memory (i*batch_size == memory.size()) -> stop (was already handled in first "else if")
104 |             //std::cout << "\033[1;33mConditional Miss\033[0m, processing aborted. Status:" << std::endl;
105 |             //std::cout << "Memory size: " << vpg::memory.size() << std::endl;
106 |             //std::cout << "Start: " << i*batch_size << "\t End: " << (i+1)*batch_size << std::endl;
107 |             break; 
108 |         }
109 |     }
110 | 
111 |     // Update the parameters
112 |     vpg::p_optimizer->step();
113 |     if(vpg::value_rollout_count == VALUE_UPDATE_RATE && USE_VALUE_FUNCTION){
114 |         std::cout << "Cumulated gradients for " << 
115 |             VALUE_UPDATE_RATE << " rollouts. Updating value function weights." << std::endl;
116 |         vpg::v_optimizer->step();
117 |     }
118 | }
119 | 
120 | float vpg_w::get_max_loss(){
121 |     return vpg::max_loss;
122 | }
123 | 
124 | void vpg_w::reset_max_loss(){
125 |     vpg::max_loss = 0.0F;
126 | }
127 | 
128 | bool vpg_w::_dir_exists(std::string path){
129 |     struct stat sb;
130 |     if (stat(path.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode))
131 |         return true;
132 |     return false;
133 | }
134 | 
135 | void vpg_w::_create_path(std::string path){
136 |     const std::string command = "mkdir -p " + path;
137 |     system(command.c_str());
138 | }
139 | 
140 | void vpg_w::save(std::string path, std::string file, uint model){
141 |     if(!_dir_exists(path))
142 |         _create_path(path);
143 |     
144 |     switch(model){
145 |         case POLICY: 
146 |             torch::save(vpg::policy_model,file); 
147 |             break;
148 |         case VALUE:
149 |             torch::save(vpg::value_model,file); 
150 |             break;
151 |         /*case POLICY_OPTIM:
152 |             torch::save(vpg::p_optimizer,file);
153 |             break;
154 |         case VALUE_OPTIM:
155 |             torch::save(vpg::v_optimizer,file);
156 |             break;*/
157 |     }
158 | }
159 | 
160 | bool vpg_w::load(std::string file, uint model){
161 |     // Load the model
162 |     try{
163 |         if(model == POLICY){
164 |             torch::load(vpg::policy_model,file); 
165 |             return true;
166 |         }else if(model == VALUE){
167 |             torch::load(vpg::value_model,file); 
168 |             return true;
169 |         }
170 |     }catch(const std::exception& e){
171 |         std::cout << "\n\033[1;33m[WARNING]:\033[0m Could not load specified model. Using untrained version.\n";
172 |         return false;
173 |     }
174 | }
175 | 
176 | /////////////////////////////////////////////////////////////////////
177 | // CORE METHODS IMPLEMENTATION
178 | /////////////////////////////////////////////////////////////////////
179 | 
180 | void vpg::seed_generator(){
181 |     vpg::generator.seed(SEED);
182 | }
183 | 
184 | vpg::PolicyImpl::PolicyImpl(){
185 |     // Constructor - build the network's layers
186 |     this->l1 = register_module("l1",torch::nn::Linear(STATE_DIM,8)); 
187 |     this->l2 = register_module("l2",torch::nn::Linear(8,4));
188 |     this->l3 = register_module("l3",torch::nn::Linear(4,ACTION_DIM));
189 | }
190 | 
191 | torch::Tensor vpg::PolicyImpl::forward(torch::Tensor x){
192 |     // apply ReLU activations and sigmoid on output
193 |     x = torch::relu(this->l1->forward(x));
194 |     x = torch::relu(this->l2->forward(x));
195 |     x = torch::sigmoid(this->l3->forward(x)); 
196 | 
197 |     return x;
198 | }
199 | 
200 | vpg::ValueImpl::ValueImpl(){
201 |     // Constructor - build the network's layers
202 |     this->l1 = register_module("l1",torch::nn::Linear(STATE_DIM,5));
203 |     this->l2 = register_module("l2",torch::nn::Linear(5,3));
204 |     this->l3 = register_module("l3",torch::nn::Linear(3,1));
205 | }
206 | 
207 | torch::Tensor vpg::ValueImpl::forward(torch::Tensor x){
208 |     // apply ReLU activations and leave the output linear
209 |     x = torch::relu(this->l1->forward(x));
210 |     x = torch::relu(this->l2->forward(x));
211 |     x = this->l3->forward(x); 
212 | 
213 |     return x;
214 | }
215 | 
216 | std::vector<std::vector<double>> vpg::get_state_from_memory_batch(bool next){
217 |     std::vector<std::vector<double>> states;
218 |     for(uint i=0; i<vpg::memory_batch.size(); i++){
219 |         if(!next){
220 |             states.push_back(vpg::memory_batch[i].state);
221 |         }else{
222 |             states.push_back(vpg::memory_batch[i].state_next);
223 |         }
224 |     }
225 |     return states;
226 | }
227 | 
228 | torch::Tensor vpg::get_action_from_memory_batch(){
229 |     std::vector<double> actions;
230 |     for(uint i=0; i<vpg::memory_batch.size(); i++){
231 |         actions.push_back(vpg::memory_batch[i].action[0]);
232 |         actions.push_back(vpg::memory_batch[i].action[1]);
233 |     }
234 |     return torch::_cast_Float(torch::tensor(actions).view({-1,2}));
235 | }
236 | 
237 | torch::Tensor vpg::get_reward_from_memory_batch(){
238 |     std::vector<double> rewards;
239 |     for(uint i=0; i<vpg::memory_batch.size(); i++){
240 |         rewards.push_back(vpg::memory_batch[i].reward);
241 |     }
242 |     return torch::_cast_Float(torch::tensor(rewards).view({-1,1}));
243 | }
244 | 
245 | torch::Tensor vpg::get_reward_to_go_from_memory_batch(){
246 |     std::vector<double> rtg;
247 |     for(uint i=0; i<vpg::memory_batch.size(); i++){
248 |         rtg.push_back(vpg::memory_batch[i].reward_to_go);
249 |     }
250 |     return torch::_cast_Float(torch::tensor(rtg).view({-1,1}));
251 | }
252 | 
253 | torch::Tensor vpg::convert_state_to_batch(std::vector<double> state){
254 |     return torch::tensor(state).view({-1,(long)state.size()});
255 | }
256 | torch::Tensor vpg::convert_state_to_batch(std::vector<std::vector<double>> states){
257 |     torch::Tensor t = vpg::convert_state_to_batch(states[0]);
258 |     for(uint i=1; i<states.size(); i++){
259 |         t = torch::cat({t,vpg::convert_state_to_batch(states[i])}, /*dim=*/0);
260 |     }
261 |     return t;
262 | }
263 | 
264 | bool _has_nan(torch::Tensor t){
265 |     torch::Tensor b = at::isnan(t);
266 |     return at::any(b).item().toBool();
267 |     c10::IntArrayRef sizes = b.sizes(); 
268 | 
269 |     for(int i=0; i<sizes[0]; i++){
270 |         for(int j=0; j<sizes[1]; j++){
271 |             if(b[i][j].item().toBool())
272 |                 return true; 
273 |         }
274 |     }
275 |     return false; 
276 | }
277 | 
278 | int vpg::compute_gradients(){
279 |     // Get the states (network's input)
280 |     torch::Tensor states = torch::_cast_Float(vpg::convert_state_to_batch(vpg::get_state_from_memory_batch()));
281 |     // Get the next states that were the result of the chosen action
282 |     torch::Tensor next_states = torch::_cast_Float(vpg::convert_state_to_batch(vpg::get_state_from_memory_batch(true)));
283 |     // Get the actions (network's output)
284 |     torch::Tensor actions = vpg::policy_model->forward(states);
285 |     // Get the actually taken actions
286 |     //torch::Tensor actions = torch::_cast_Int(vpg::get_action_from_memory_batch());
287 |     // Get the rewards
288 |     torch::Tensor rewards = vpg::get_reward_from_memory_batch();
289 |     // Get the rewards to go
290 |     torch::Tensor rewards_to_go = vpg::get_reward_to_go_from_memory_batch();
291 | 
292 |     ////////////////////////////////////////////////////////////////////////////
293 |     // Train the Policy
294 |     ////////////////////////////////////////////////////////////////////////////
295 | 
296 |     // Compute the advantage
297 |     torch::Tensor advantage, s_values;
298 |     if(USE_VALUE_FUNCTION){
299 |         // Get the states values
300 |         s_values = vpg::value_model->forward(states);
301 |         advantage = rewards_to_go - s_values; 
302 |     }else{
303 |         // Compute the advantage without support from the value function
304 |         advantage = rewards_to_go;
305 |     }
306 |     
307 |     // Compute the loss (negative sign to perform gradient ascent) 
308 |     torch::Tensor policy_loss = -torch::log(actions) * advantage; 
309 | 
310 |     // Compute the backward pass on each component of the loss {N,1}
311 |     for(uint i=0; i<policy_loss.size(0); i++){
312 |         policy_loss[i][0].backward(/*gradient*/{}, /*keep_graph*/true);
313 |         policy_loss[i][1].backward(/*gradient*/{}, /*keep_graph*/true);
314 |     }
315 |     
316 |     ////////////////////////////////////////////////////////////////////////////
317 |     // Train the Value Function
318 |     ////////////////////////////////////////////////////////////////////////////
319 |     if(USE_VALUE_FUNCTION){
320 |         // Compute the Value function's loss based on the reward to go 
321 |         torch::Tensor value_loss = torch::mse_loss(s_values, rewards_to_go, /*reduction*/1L);
322 | 
323 |         // Update the maximum loss
324 |         vpg::update_max_loss(value_loss.max().item().toFloat());
325 | 
326 |         // Compute the gradients
327 |         value_loss.backward();
328 |     }
329 |     return 0;
330 | }
331 | 
332 | void vpg::update_max_loss(float loss){
333 |     if(max_loss < loss)
334 |         max_loss = loss;
335 | }


--------------------------------------------------------------------------------