├── clean_project.sh ├── env.sh ├── controllers ├── global_parameters.hpp ├── vpg_controller │ ├── vpg_parameters.hpp │ ├── vpg_controller.cpp │ ├── Makefile │ ├── vpg_arbitrator.hpp │ └── vpg_arbitrator.cpp └── khepera_test │ ├── khepera_test.cpp │ └── Makefile ├── unit_test ├── makefile └── vpg_test.cpp ├── libraries ├── networks │ └── vpg_network │ │ ├── build.sh │ │ ├── vpg_episode.cpp │ │ ├── vpg_episode.hpp │ │ ├── CMakeLists.txt │ │ ├── vpg_wrapper.hpp │ │ ├── vpg_network.hpp │ │ └── vpg_network.cpp └── utils │ ├── training_handler.hpp │ ├── state.hpp │ ├── state.cpp │ ├── reward.hpp │ ├── reward.cpp │ ├── khepera.hpp │ ├── data_logger.hpp │ ├── training_handler.cpp │ ├── khepera.cpp │ └── data_logger.cpp ├── .gitignore ├── LICENSE ├── README.md ├── log_analyzer ├── vpg_analyzer.py └── Analyzer.py └── worlds └── learned_obstacle_avoidance.wbt /clean_project.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "" 3 | echo -e "\033[1;33mCleaning the PyTorch shared object...\033[0m" 4 | cd - 5 | cd ./libraries/networks/vpg_network 6 | rm -r build 7 | cd ../.. 8 | rm libvpg_network.so 9 | rm vpg_episode.hpp 10 | rm vpg_wrapper.hpp 11 | 12 | echo "" 13 | echo -e "\033[1;33mCleaning the webots controller...\033[0m" 14 | cd ../controllers/vpg_controller 15 | make clean 16 | 17 | echo "done" 18 | -------------------------------------------------------------------------------- /env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Setup for running khepera IV robots and RL 3 | 4 | echo Exporting environment variables 5 | export WEBOTS_HOME=/usr/local/webots ; echo WEBOTS_HOME=$WEBOTS_HOME 6 | export WEBOTS_HOME_PATH=$WEBOTS_HOME 7 | 8 | # Change this path to point to your libtorch installation 9 | export LIBTORCH_PATH=/home/$(whoami)/Documents/PyTorch/libtorch # <- CHANGE ME! 10 | echo LIBTORCH_PATH=$LIBTORCH_PATH 11 | -------------------------------------------------------------------------------- /controllers/global_parameters.hpp: -------------------------------------------------------------------------------- 1 | # pragma once 2 | 3 | #define ROBOT_BASIC_TIMESTEP 8 // Time step in ms 4 | 5 | // State parameters 6 | #define STATE_DIM 10 // Dimension of the state (8 sensors, 2 wheel speeds) 7 | #define STATE_NUM_IR 8 8 | #define STATE_LEFT 8 9 | #define STATE_RIGHT 9 10 | 11 | // Action parameters 12 | #define ACTION_DIM 2 // Dimension of the action space 13 | 14 | -------------------------------------------------------------------------------- /unit_test/makefile: -------------------------------------------------------------------------------- 1 | # See: http://www.cs.colby.edu/maxwell/courses/tutorials/maketutor/ 2 | # Note -Wl,rpath,... links for the dynamic linker 3 | 4 | CC = g++ 5 | LIBS_PATH = ../libraries 6 | LIBS = -lvpg_network 7 | CPPFLAGS = -I. -I$(LIBS_PATH) -I../controllers/ -I../controllers/vpg_controller -L$(LIBS_PATH) -Wl,-rpath,$(LIBS_PATH) 8 | 9 | DEPS = vpg_test.hpp 10 | OBJ = vpg_test.o 11 | 12 | %.o: %.c $(DEPS) 13 | $(CC) -Wall -c -o $@ $(DEPS) $(CPPFLAGS) 14 | 15 | dqn_test: $(OBJ) 16 | $(CC) -Wall -o vpg_test vpg_test.o $(CPPFLAGS) $(LIBS) 17 | 18 | .PHONY: clean # do not do anything with files named "clean" 19 | clean: 20 | rm -f *.o *~ vpg_test -------------------------------------------------------------------------------- /libraries/networks/vpg_network/build.sh: -------------------------------------------------------------------------------- 1 | # Build the target 2 | mkdir build 3 | cd build 4 | cmake -DCMAKE_PREFIX_PATH=$LIBTORCH_PATH .. 5 | make 6 | 7 | # Move the generated shared object to the libraries folder of the Webots project 8 | cp ./libvpg_network.so ../../.. 9 | 10 | # Move the required headers as well 11 | cp ../vpg_wrapper.hpp ../../.. 12 | echo '// Do not modify! This file is generated when compiling the `libvpg_network.so` file.' | cat - ../../../vpg_wrapper.hpp > temp && mv temp ../../../vpg_wrapper.hpp 13 | cp ../vpg_episode.hpp ../../.. 14 | echo '// Do not modify! This file is generated when compiling the `libvpg_network.so` file.' | cat - ../../../vpg_episode.hpp > temp && mv temp ../../../vpg_episode.hpp 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # No files without file extension 2 | * 3 | !*/ 4 | !*.* 5 | !Makefile 6 | !makefile 7 | 8 | # Shared object build 9 | build/ 10 | libraries/*.hpp 11 | 12 | # Prerequisites 13 | *.d 14 | 15 | # Compiled Object files 16 | *.slo 17 | *.lo 18 | *.o 19 | *.obj 20 | 21 | # Precompiled Headers 22 | *.gch 23 | *.pch 24 | 25 | # Compiled Dynamic libraries 26 | *.so 27 | *.dylib 28 | *.dll 29 | 30 | # Fortran module files 31 | *.mod 32 | *.smod 33 | 34 | # Compiled Static libraries 35 | *.lai 36 | *.la 37 | *.a 38 | *.lib 39 | 40 | # Executables 41 | *.exe 42 | *.out 43 | *.app 44 | 45 | # editor related files 46 | *.vscode/ 47 | 48 | # python 49 | __pycache__/ 50 | 51 | # Logging 52 | log/ 53 | 54 | # Trained models 55 | model/ 56 | 57 | # webots 58 | *.wbproj 59 | -------------------------------------------------------------------------------- /libraries/networks/vpg_network/vpg_episode.cpp: -------------------------------------------------------------------------------- 1 | #include "vpg_episode.hpp" 2 | 3 | std::string VPG_episode::to_string() const{ 4 | std::string output = "Episode " + std::to_string(this->t) + ":\n\tstate: "; 5 | for(unsigned int i=0; istate.size(); i++){ 6 | output += std::to_string(this->state[i]) + " "; 7 | } 8 | output += ",\n\taction: " + std::to_string(this->action[0]) + ", " + std::to_string(this->action[1]) + ",\n"; 9 | output += "\treward: " + std::to_string(this->reward) + ",\n"; 10 | output += "\treward to go: " + std::to_string(this->reward_to_go) + ",\n\tnext state: "; 11 | for(unsigned int i=0; istate_next.size(); i++){ 12 | output += std::to_string(this->state_next[i]) + " "; 13 | } 14 | return output; 15 | } 16 | -------------------------------------------------------------------------------- /libraries/utils/training_handler.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | 12 | /** 13 | * Detect if the robot hit something. 14 | * @output (bool) Whether the robot is currently touching something. 15 | */ 16 | bool detect_collision(); 17 | 18 | /** 19 | * Detect if the simulation exploded. Problem occuring in 20 | * Webots version R2019b and previous. Solved in R2020a. 21 | */ 22 | bool simulation_exploded(); 23 | 24 | /** 25 | * Adds the speeds set to compute the "distance" travelled. 26 | */ 27 | void measure_travelled_distance(double left, double right); 28 | 29 | /** 30 | * Reset the measured travelled distance. 31 | */ 32 | void reset_distance(); 33 | 34 | /** 35 | * Get the measured travelled distance. 36 | */ 37 | double get_distance(); 38 | 39 | /** 40 | * Move the robot at a pre-defined random location. 41 | */ 42 | void supervisor_move_robot(); -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Lucas W. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /libraries/utils/state.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // Kephera API 9 | #include 10 | 11 | #include 12 | 13 | #define PREV_STATE 0 14 | #define CURRENT_STATE 1 15 | 16 | typedef std::vector> t_state; 17 | 18 | /** 19 | * The State class stores the two last observed states 20 | * as 1D vectors. 8 IR values, 2 wheel speeds. 21 | */ 22 | class State{ 23 | public: 24 | State(); 25 | 26 | /** 27 | * Make state measurment. 28 | */ 29 | void measure(); 30 | 31 | /** 32 | * Convert the state to a string. 33 | * t = 0 (PREV_STATE) is previous, = 1 (CURRENT_STATE) is last measured 34 | */ 35 | std::string to_string(uint t = CURRENT_STATE); 36 | 37 | /** 38 | * t = 0 (PREV_STATE) is previous, = 1 (CURRENT_STATE) is last measured 39 | */ 40 | std::vector get_state(uint t = CURRENT_STATE); 41 | t_state get_states(); 42 | t_state step(); 43 | 44 | void reset(); 45 | 46 | protected: 47 | 48 | private: 49 | t_state m_states; 50 | }; 51 | -------------------------------------------------------------------------------- /libraries/networks/vpg_network/vpg_episode.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | /** 8 | * Structure storing an Episode 9 | * {state, action, reward, reward_to_go, state_next} 10 | * At time t, `state` and `action` are sampled. 11 | * At time t+1, `reward` and `state_next` are sampled. 12 | * Once a rollout is complete, the reward_to_go can be computed. 13 | */ 14 | struct VPG_episode{ 15 | int t; // Timestamp. 16 | std::vector action; // Action taken by the agent when in `state`. 17 | double reward; // Reward perceived by the agent for performing `action` in `state`. 18 | double reward_to_go; // Cumulated reward evaluated from time t. 19 | std::vector state; // State when taking the action. 20 | std::vector state_next; // State after taking the action. 21 | 22 | /** 23 | * Convert Episode to string: 24 | * "Episode x: 25 | * state: x x x x x x 26 | * action: x 27 | * reward: x 28 | * reward to go: x 29 | * next state: x x x x x x 30 | * " 31 | */ 32 | std::string to_string() const; 33 | 34 | } typedef DQN_episode; -------------------------------------------------------------------------------- /libraries/networks/vpg_network/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0 FATAL_ERROR) 2 | project(vpg_network) 3 | 4 | find_package(Torch REQUIRED) 5 | 6 | # Paths to include vpg_parameters.hpp and global_parameters.hpp for global settings 7 | set(CONTROLLER_PATH ../../../controllers/vpg_controller) 8 | set(PARAMETERS_PATH ../../../controllers) 9 | 10 | # Specify target as shared object 11 | add_library(vpg_network SHARED vpg_network.cpp vpg_episode.cpp) 12 | 13 | # Include the directory of the controller 14 | target_include_directories(vpg_network PRIVATE ${CONTROLLER_PATH} ${PARAMETERS_PATH}) 15 | 16 | # Link the PyTorch libraries 17 | target_link_libraries(vpg_network "${TORCH_LIBRARIES}") 18 | 19 | set_property(TARGET vpg_network PROPERTY CXX_STANDARD 11) 20 | 21 | # The following code block is suggested to be used on Windows. 22 | # According to https://github.com/pytorch/pytorch/issues/25457, 23 | # the DLLs need to be copied to avoid memory errors. 24 | if (MSVC) 25 | file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll") 26 | add_custom_command(TARGET vpg_network 27 | POST_BUILD 28 | COMMAND ${CMAKE_COMMAND} -E copy_if_different 29 | ${TORCH_DLLS} 30 | $) 31 | endif (MSVC) 32 | -------------------------------------------------------------------------------- /controllers/vpg_controller/vpg_parameters.hpp: -------------------------------------------------------------------------------- 1 | # pragma once 2 | 3 | // Simulation parameters 4 | #define TIMEOUT 1*30 // Seconds before starting a new rollout 5 | #define QUERY_PERIOD 0.01 // Period in seconds between each run of the step method (min is 0.008) 6 | #define WAIT_STEPS floor(QUERY_PERIOD/(double)ROBOT_BASIC_TIMESTEP*1000.0) 7 | #define ROLLOUTS 40000 // Number of rollouts to execute in total. 8 | #define ROLL_PER_SESSION 100 // Restart the simulation after 100 rollouts. 9 | 10 | // Training parameters 11 | #define GAMMA 0.99 // Define the discount factor 12 | #define BATCH_SIZE 500 // Batch size to train the network 13 | #define USE_VALUE_FUNCTION true // Whether to use a Value Function network to support the policy 14 | #define VALUE_UPDATE_RATE 20 // Number of rollouts before updating the weights of the value function 15 | 16 | // Model saving/loading file 17 | #define MODEL_PATH "model/" 18 | #define MODEL_POLICY_FILE "model/vpg_policy" 19 | #define MODEL_VALUE_FILE "model/vpg_value" 20 | #define OPTIM_PATH "optim/" 21 | #define OPTIM_POLICY_FILE "optim/optim_policy" 22 | #define OPTIM_VALUE_FILE "optim/optim_value" 23 | 24 | // Data logging destination 25 | #define LOG_PATH "log/" 26 | #define LOG_FILE "log/log.csv" -------------------------------------------------------------------------------- /libraries/utils/state.cpp: -------------------------------------------------------------------------------- 1 | #include "state.hpp" 2 | 3 | State::State(){ 4 | 5 | // Set the default starting states (all false) 6 | std::vector v(STATE_DIM,0.); 7 | this->m_states.push_back(v); 8 | this->m_states.push_back(v); 9 | } 10 | 11 | void State::measure(){ 12 | // Roll states 13 | m_states[PREV_STATE] = m_states[CURRENT_STATE]; 14 | 15 | // Get the sensor readings 16 | std::vector state = k4::get_normalized_ranges(); 17 | std::vector speeds = k4::get_normalized_motors_speed(); 18 | state.push_back(speeds[0]); 19 | state.push_back(speeds[1]); 20 | m_states[CURRENT_STATE] = state; 21 | } 22 | 23 | std::string State::to_string(uint t){ 24 | std::string string = ""; 25 | for(uint i=0; im_states[t].size(); i++){ 26 | string += std::to_string(this->m_states[t][i]) + " "; 27 | } 28 | return string; 29 | } 30 | 31 | std::vector State::get_state(uint t){ 32 | return this->m_states.at(t); 33 | } 34 | 35 | t_state State::get_states(){ 36 | return this->m_states; 37 | } 38 | 39 | t_state State::step(){ 40 | this->measure(); 41 | return this->get_states(); 42 | } 43 | 44 | void State::reset(){ 45 | // Set the default starting state (all 0) 46 | for(uint i=0; im_states[0].at(i) = 0.; 48 | this->m_states[1].at(i) = 0.; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /libraries/utils/reward.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "state.hpp" 4 | #include "training_handler.hpp" 5 | 6 | /** 7 | * The Reward class enables to compute the reward associtated 8 | * to a given State. It stores the current (last evaluated) reward, 9 | * and the cumulated reward. 10 | */ 11 | class Reward{ 12 | public: 13 | /** 14 | * Reward constructor. 15 | */ 16 | Reward(); 17 | 18 | /** 19 | * Compute the reward selected when instantiating the class. 20 | * @param state (std::vector) Robot's current state. 21 | * @param timestamp (int) Rollout time stamp at which the reward is computed. If equal to 0, no decay is applied. 22 | */ 23 | double compute_reward(t_state state, int timestamp = 0); 24 | 25 | /** 26 | * Get the last computed reward. 27 | * @output (double) reward. 28 | */ 29 | double get_reward(); 30 | 31 | /** 32 | * Get the cumulated computed reward. 33 | * @output (double) cumulated reward. 34 | */ 35 | double get_cumulated_reward(); 36 | 37 | /** 38 | * Reset the currently stored values. 39 | */ 40 | void reset(); 41 | 42 | private: 43 | double m_reward; 44 | double m_cumulated_reward; 45 | double m_decay; 46 | std::string m_type; 47 | }; -------------------------------------------------------------------------------- /controllers/vpg_controller/vpg_controller.cpp: -------------------------------------------------------------------------------- 1 | // File: vpg_controller.cpp 2 | // Date: 28.10.2019 3 | // Description: Main file of the VPG implementation 4 | // Author: Lucas Wälti 5 | // Modifications: 6 | 7 | #include 8 | 9 | // The arguments of the main function can be specified by the 10 | // "controllerArgs" field of the Robot node 11 | int main(int argc, char **argv) { 12 | 13 | // Listen to arguments for mode 14 | bool eval = true; 15 | if(argc == 1){ 16 | std::cout << "No argument, using default 'eval' mode. Usage is 'training' or 'eval'." << std::endl; 17 | }else if(argc == 2){ 18 | std::string mode = argv[1]; 19 | if(mode == "training") 20 | eval = false; 21 | else 22 | eval = true; 23 | } 24 | 25 | // Create an arbitrator instance 26 | VPG_arbitrator arbitrator = VPG_arbitrator(); 27 | arbitrator.mode(/*evaluating*/ eval, /*verbose*/ true); 28 | arbitrator.reset(); 29 | 30 | std::cout << "Arbitrator launched in " << (eval ? "evaluation" : "training") << " mode" << std::endl; 31 | 32 | while (arbitrator.step()) { 33 | // Check if simulation break 34 | if(simulation_exploded()){ 35 | std::cout << "Simulation exploded." << std::endl; 36 | wb_supervisor_simulation_reset(); 37 | } 38 | }; 39 | 40 | if(arbitrator.is_training()) 41 | wb_supervisor_simulation_quit(EXIT_SUCCESS); // ask Webots to terminate 42 | 43 | // cleanup code 44 | return 0; 45 | } 46 | -------------------------------------------------------------------------------- /controllers/khepera_test/khepera_test.cpp: -------------------------------------------------------------------------------- 1 | // File: khepera_test.cpp 2 | // Date: 28.03.2020 3 | // Description: Simple demo controller showing basic controls 4 | // Author: Lucas Waelti 5 | // Modifications: 6 | 7 | /* 8 | * This controller was readapted and modified from the default 9 | * khepera.c controller provided by Cyberbotics. 10 | */ 11 | 12 | #include 13 | 14 | int main(int argc, char **argv) { 15 | 16 | // Initililaize the robot 17 | int time_step = k4::init_robot(); 18 | 19 | // store the last time a message was displayed 20 | int last_display_second = 0; 21 | 22 | std::vector ranges, normalized_ranges; 23 | 24 | // main loop 25 | while (wb_robot_step(time_step) != -1) { 26 | 27 | // Get infrared data 28 | normalized_ranges = k4::get_normalized_ranges(); 29 | ranges = k4::get_ranges(); 30 | 31 | // Display the normalized front sensor reading 32 | if(last_display_second != (int)wb_robot_get_time()){ 33 | last_display_second = (int)wb_robot_get_time(); 34 | std::cout << "Distance front: " << normalized_ranges[3] << std::endl; 35 | } 36 | 37 | // simple obstacle avoidance algorithm 38 | // based on the front infrared sensors 39 | double speed_offset = 0.2 * (MAX_SPEED - 0.03 * ranges[3]); 40 | double speed_delta = 0.03 * ranges[2] - 0.03 * ranges[4]; 41 | k4::set_motors_speed(speed_offset + speed_delta, speed_offset - speed_delta); 42 | }; 43 | 44 | wb_robot_cleanup(); 45 | 46 | return EXIT_SUCCESS; 47 | } 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RL_Webots 2 | This repository shows how Deep Reinforcement Learning can be used within Webots. This project was developed under Linux. While it should be possible to use it with other operating systems, this was not tested. 3 | 4 | The code presented in this repository was adapted from a semester project at the [Distributed Intelligent Systems and Algorithms Laboratory (DISAL)](https://www.epfl.ch/labs/disal/). A summary of the project can be found [here](https://disalw3.epfl.ch/teaching/student_projects/ay_2019-20/ws/DISAL-SP133_summary.pdf). The part presented here is about **Deep Reinforcement Learning**, leveraging **Policy Gradient** to train a fully connected, feed forward neural network to solve a simple **obstacle avoidance task** on a **continuous action space** with a Khepera IV robot. 5 | 6 | The project was entirely implemented in C++ and relies on [libtorch](https://pytorch.org/cppdocs/), [PyTorch](https://pytorch.org/)'s C++ API. 7 | 8 | ## Install 9 | Download [PyTorch's C++ API](https://pytorch.org/). Choose following options: 10 | - Build: Stable 11 | - OS: Linux 12 | - Package: LibTorch 13 | - Language: C++ 14 | - CUDA: None 15 | 16 | and click on the second link (cxx11 ABI, 87.4M). Extract the zip file wherever you like on your computer. You need to edit the `env.sh` script and adapt the path to your libtorch installation folder: 17 | ```bash 18 | export LIBTORCH_PATH=/path/to/libtorch 19 | ``` 20 | 21 | Open up a terminal, go to the project folder and run the following commands to build the project: 22 | ```bash 23 | source env.sh 24 | ./build_project.sh 25 | ``` 26 | ## Documentation 27 | Please visit the repository's [wiki](https://github.com/LucasWaelti/RL_Webots/wiki) for more details. -------------------------------------------------------------------------------- /libraries/utils/reward.cpp: -------------------------------------------------------------------------------- 1 | #include "reward.hpp" 2 | 3 | /** 4 | * Verify whether one if the IR sensor is seeing an 5 | * obstacle. 6 | */ 7 | bool _obstacle_present(const std::vector& a){ 8 | for(int i=0; i 0.0) 10 | return true; 11 | } 12 | return false; 13 | } 14 | 15 | Reward::Reward(){ 16 | reset(); 17 | } 18 | 19 | double Reward::compute_reward(t_state state, int timestamp){ 20 | 21 | m_reward = 0.; 22 | 23 | // Adapt speed representation 24 | state[CURRENT_STATE][STATE_LEFT] = state[CURRENT_STATE][STATE_LEFT] *2.0-1.0; 25 | state[CURRENT_STATE][STATE_RIGHT] = state[CURRENT_STATE][STATE_RIGHT]*2.0-1.0; 26 | 27 | // Reward driving forward when no obstacle 28 | if(!_obstacle_present(state.at(CURRENT_STATE))){ 29 | if(state.at(CURRENT_STATE)[STATE_LEFT] > 0.0 30 | && state.at(CURRENT_STATE)[STATE_RIGHT] > 0.0){ 31 | m_reward += 0.01; 32 | if(abs(state.at(CURRENT_STATE)[STATE_LEFT] - state.at(CURRENT_STATE)[STATE_RIGHT]) < 0.1) 33 | m_reward += 0.01; 34 | else 35 | m_reward -= 0.02; 36 | } 37 | else 38 | m_reward -= 0.01; 39 | } 40 | 41 | // Reward clearing obstacles 42 | if(_obstacle_present(state.at(PREV_STATE)) && 43 | !_obstacle_present(state.at(CURRENT_STATE))) 44 | m_reward += 1.0; 45 | 46 | // Penalize collisions (override reward) 47 | if(detect_collision()) 48 | m_reward = -1.0; 49 | 50 | m_cumulated_reward += m_reward; 51 | 52 | return m_reward; 53 | } 54 | 55 | double Reward::get_reward(){ 56 | return m_reward; 57 | } 58 | 59 | double Reward::get_cumulated_reward(){ 60 | return m_cumulated_reward; 61 | } 62 | 63 | void Reward::reset(){ 64 | m_reward = 0.; 65 | m_cumulated_reward = 0.; 66 | } -------------------------------------------------------------------------------- /log_analyzer/vpg_analyzer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from matplotlib import pyplot as plt 4 | import time 5 | 6 | from Analyzer import Analyzer 7 | 8 | class Vpg_analyzer(Analyzer): 9 | ''' 10 | Child class of Analyzer. 11 | Implement functions specialized for the given subject. 12 | ''' 13 | 14 | def __init__(self, loc = '../controllers/vpg_controller/log/', file = 'log.csv'): 15 | super(Vpg_analyzer, self).__init__(loc, file) 16 | 17 | def results(self): 18 | fig, axes = plt.subplots(4,1,figsize=(12,8)) 19 | fig.canvas.set_window_title('Statistics for {} rollouts'.format(self.m_data_df.shape[0])) 20 | 21 | # plot the loss 22 | sample_dim = 100 23 | self._sub_plot_single(axes[0], 'Max_loss',legend='Max loss') 24 | self._sub_plot_single_from_array(axes[0], 25 | self.compute_moving_average('Max_loss', sample_dim), 26 | 'rollouts', 'Max loss Moving Average (N={})'.format(sample_dim),legend='Max loss moving average') 27 | axes[0].legend() 28 | axes[0].grid() 29 | # Plot the cumulated reward 30 | self._sub_plot_single_from_array(axes[1], 31 | self.compute_moving_average('Cumulated_reward', sample_dim), 32 | 'rollouts', 'Cumulated Reward Moving Average (N={})'.format(sample_dim)) 33 | axes[1].grid() 34 | # PLot the average success rate 35 | self._sub_plot_single_from_array(axes[2], 36 | self.compute_moving_average('Distance', sample_dim), 37 | 'rollouts', 'Distance Moving Average (N={})'.format(sample_dim)) 38 | axes[2].grid() 39 | # Plot the number of timesteps 40 | self._sub_plot_single(axes[3], 'Num_timesteps') 41 | 42 | fig.tight_layout() 43 | 44 | if __name__ == "__main__": 45 | a = Vpg_analyzer(loc = '../controllers/vpg_controller/log/', file = 'log.csv') 46 | a.results() 47 | #a.plot_distance() 48 | a.show() 49 | 50 | -------------------------------------------------------------------------------- /libraries/utils/khepera.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | namespace k4{ 12 | 13 | #define MAX_SPEED 47.6 14 | #define REDUCER 0.5 // Only use 50% of max speed 15 | 16 | #define IR_MIN 100.0 // No obstacle 17 | #define IR_MAX 1000.0 // Almost touching obstacle 18 | 19 | #define NUMBER_OF_INFRARED_SENSORS 12 20 | #define NUMBER_OF_PERIMETER_IR_SENSORS 8 21 | 22 | /** 23 | * Initialize the robot's infrared sensors, motors and LEDs 24 | */ 25 | int init_robot(); 26 | 27 | /** 28 | * Perform a time step 29 | */ 30 | int khepera_running(); 31 | 32 | /** 33 | * Set the left and right motor speeds. 34 | */ 35 | void set_motors_speed(double left, double right); 36 | 37 | /** 38 | * Get the left and right motor speeds. 39 | */ 40 | std::vector get_motors_speed(); 41 | 42 | /** 43 | * Set the left and right motor speeds. Input must be 44 | * normalized between [-1,1]. Backward: -1, Forward: +1 45 | */ 46 | void set_normalized_motors_speed(double left, double right); 47 | 48 | /** 49 | * Get the left and right normalized motor speeds. 50 | */ 51 | std::vector get_normalized_motors_speed(); 52 | 53 | /** 54 | * Normalize a range reading between 0 and 1. 55 | * 0 (no obstacle), 1 (obstacle very close) 56 | */ 57 | double normalize_range(double range); 58 | 59 | /** 60 | * Get the range of infrared sensor i 61 | */ 62 | double get_range(int i); 63 | 64 | /** 65 | * Get the range of each infrared sensor 66 | */ 67 | std::vector get_ranges(); 68 | 69 | /** 70 | * Get the normalized range of each infrared sensor. 71 | * Returns values between 0 (no obstacle) and 1 (obstacle very close) 72 | */ 73 | std::vector get_normalized_ranges(); 74 | 75 | } -------------------------------------------------------------------------------- /libraries/utils/data_logger.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | /** 12 | * Create a csv file in the specified folder if it does not exist yet. 13 | * @param dir (std::string) Path to the file. 14 | * @param file (std::string) The file name must include the path to it. 15 | * @param headers (std::vector) Name of the columns. 16 | */ 17 | int create_log(std::string dir, std::string file, std::vector headers); 18 | 19 | /** 20 | * Write a line into the specified file (path to file included). 21 | * Each element of the vector will be written on the same line, 22 | * separated by a comma. 23 | * @param file (std::string) the file name must include the path to it. 24 | * @param values (std::vector) vector containing the values to write. 25 | */ 26 | void write_line(std::string file, std::vector values); 27 | 28 | /** 29 | * Write a line into the specified file (path to file included). 30 | * Each element of the vector will be written on the same line, 31 | * separated by a comma. 32 | * @overload void write_line(std::string file, std::vector values); 33 | * @param file (std::string) the file name must include the path to it. 34 | * @param values (std::vector) vector containing the values to write. 35 | * The values are converted to std::string. 36 | */ 37 | void write_line(std::string file, std::vector values); 38 | 39 | /** 40 | * Read the value of the last stored rollout. Defaults to 0 if no file 41 | * or if no rollout was previously logged. 42 | * @param file (std::string) the file name must include the path to it. 43 | * @output (int) value of the last rollout. 44 | */ 45 | int read_last_rollout(std::string file); 46 | 47 | /** 48 | * Read the index of the last stored particle. Defaults to 0 if no file 49 | * or if no particle was previously logged. 50 | * @param file (std::string) the file name must include the path to it. 51 | * @output (int) index of the last logged particle. 52 | */ 53 | int read_last_particle(std::string file); -------------------------------------------------------------------------------- /libraries/utils/training_handler.cpp: -------------------------------------------------------------------------------- 1 | #include "training_handler.hpp" 2 | 3 | static const char *def = "K4"; // Robot def 4 | 5 | bool detect_collision(){ 6 | // Supervisor - get handle on robot position 7 | WbNodeRef robot_node_ref = wb_supervisor_node_get_from_def(def); 8 | int num_contact_points = wb_supervisor_node_get_number_of_contact_points(robot_node_ref); 9 | if(num_contact_points > 0) 10 | return true; 11 | else 12 | return false; 13 | } 14 | 15 | bool simulation_exploded(){ 16 | // Supervisor - get handle on robot position 17 | WbNodeRef robot_node_ref = wb_supervisor_node_get_from_def(def); 18 | WbFieldRef position_field = wb_supervisor_node_get_field(robot_node_ref, "translation"); 19 | double y_pose = wb_supervisor_field_get_sf_vec3f(position_field)[1]; 20 | return (y_pose > 0.01); 21 | } 22 | 23 | static double distance = 0.0; 24 | 25 | void measure_travelled_distance(double left, double right){ 26 | distance += (left + right)/1000.0; 27 | } 28 | 29 | void reset_distance(){ 30 | distance = 0.0; 31 | } 32 | 33 | double get_distance(){ 34 | return distance; 35 | } 36 | 37 | static std::default_random_engine _generator; 38 | void supervisor_move_robot(){ 39 | 40 | // Supervisor - get handle on robot position 41 | WbNodeRef robot_node_ref = wb_supervisor_node_get_from_def(def); 42 | WbFieldRef position_field = wb_supervisor_node_get_field(robot_node_ref, "translation"); 43 | //WbFieldRef rotation_field = wb_supervisor_node_get_field(robot_node_ref, "rotation"); 44 | 45 | // Define some locations 46 | std::vector locations; 47 | double loc0[3] = {-0.66,0,-0.68}; // Upper left 48 | double loc1[3] = {-0.79,0,0.79}; // Lower left 49 | double loc2[3] = {0,0,0}; // Middle 50 | double loc3[3] = {0.71,0,0.72}; // In White area 51 | locations.push_back(loc0); 52 | locations.push_back(loc1); 53 | locations.push_back(loc2); 54 | locations.push_back(loc3); 55 | 56 | // Sample a location 57 | std::uniform_int_distribution<> dis(0, locations.size()-1); 58 | int ind = dis(_generator); 59 | 60 | // Spawn the robot at chosen location 61 | wb_supervisor_field_set_sf_vec3f(position_field, locations[ind]); 62 | } -------------------------------------------------------------------------------- /controllers/khepera_test/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 1996-2020 Cyberbotics Ltd. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ### Generic Makefile.include for Webots controllers, physics plugins, robot 16 | ### window libraries, remote control libraries and other libraries 17 | ### to be used with GNU make 18 | ### 19 | ### Platforms: Windows, macOS, Linux 20 | ### Languages: C, C++ 21 | ### 22 | ### Authors: Olivier Michel, Yvan Bourquin, Fabien Rohrer 23 | ### Edmund Ronald, Sergei Poskriakov 24 | ### 25 | ###----------------------------------------------------------------------------- 26 | ### 27 | ### This file is meant to be included from the Makefile files located in the 28 | ### Webots projects subdirectories. It is possible to set a number of variables 29 | ### to customize the build process, i.e., add source files, compilation flags, 30 | ### include paths, libraries, etc. These variables should be set in your local 31 | ### Makefile just before including this Makefile.include. This Makefile.include 32 | ### should never be modified. 33 | ### 34 | ### Here is a description of the variables you may set in your local Makefile: 35 | ### 36 | ### ---- C Sources ---- 37 | ### if your program uses several C source files: 38 | ### C_SOURCES = my_plugin.c my_clever_algo.c my_graphics.c 39 | ### 40 | ### ---- C++ Sources ---- 41 | ### if your program uses several C++ source files: 42 | ### CXX_SOURCES = my_plugin.cc my_clever_algo.cpp my_graphics.c++ 43 | ### 44 | ### ---- Compilation options ---- 45 | ### if special compilation flags are necessary: 46 | ### CFLAGS = -Wno-unused-result 47 | ### 48 | ### ---- Linked libraries ---- 49 | ### if your program needs additional libraries: 50 | ### INCLUDE = -I"/my_library_path/include" 51 | ### LIBRARIES = -L"/path/to/my/library" -lmy_library -lmy_other_library 52 | ### 53 | ### ---- Linking options ---- 54 | ### if special linking flags are needed: 55 | ### LFLAGS = -s 56 | ### 57 | ### ---- Webots included libraries ---- 58 | ### if you want to use the Webots C API in your C++ controller program: 59 | ### USE_C_API = true 60 | ### 61 | ### ---- Debug mode ---- 62 | ### if you want to display the gcc command line for compilation and link, as 63 | ### well as the rm command details used for cleaning: 64 | ### VERBOSE = 1 65 | ### 66 | ###----------------------------------------------------------------------------- 67 | 68 | USE_C_API = true 69 | 70 | UTILS_PATH = ../../libraries/utils/ 71 | 72 | INCLUDE = -I. -I$(UTILS_PATH) 73 | CXX_SOURCES = khepera_test.cpp $(UTILS_PATH)khepera.cpp 74 | 75 | ### Do not modify: this includes Webots global Makefile.include 76 | space := 77 | space += 78 | WEBOTS_HOME_PATH=$(subst $(space),\ ,$(strip $(subst \,/,$(WEBOTS_HOME)))) 79 | include $(WEBOTS_HOME_PATH)/resources/Makefile.include 80 | -------------------------------------------------------------------------------- /libraries/networks/vpg_network/vpg_wrapper.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "vpg_episode.hpp" 8 | 9 | enum SAVE_ARGS {POLICY,VALUE,POLICY_OPTIM,VALUE_OPTIM}; 10 | 11 | // Namespace for the functions of the VPG wrapper 12 | namespace vpg_w{ 13 | 14 | /** 15 | * Initialize the networks and other parameters. 16 | */ 17 | void init(); 18 | 19 | /** 20 | * Store provided episode to use later for training. 21 | * @param episode (Episode) {state, action, reward, next state} tuple. 22 | */ 23 | void store_episode(VPG_episode episode); 24 | 25 | /** 26 | * Compute the reward_to_go of the currently stored episodes. 27 | */ 28 | void compute_reward_to_go(); 29 | 30 | /** 31 | * @return (uint) number of episodes currently stored. 32 | */ 33 | uint get_memory_size(); 34 | 35 | /** 36 | * @return (std::vector) get the memory's reference. 37 | */ 38 | const std::vector& get_memory(); 39 | 40 | /** 41 | * Dump the currently stored episodes. 42 | */ 43 | void reset_memory(); 44 | 45 | /** 46 | * Run inference on the policy network and return the action probabilities. 47 | * @param state (std::vector) Robot's state. 48 | * @return (std::vector) Each action's probability. 49 | */ 50 | std::vector eval_policy(std::vector state); 51 | 52 | /** 53 | * Run inference on the value function network and return the state's value. 54 | * @param state (std::vector) Robot's state. 55 | * @return (float) State's estimated value. 56 | */ 57 | float eval_value_function(std::vector state); 58 | 59 | /** 60 | * Train the network based on the currently stored episodes. 61 | * @param batch_size (uint) Number of episodes to use in a single batch. 62 | */ 63 | void train(uint batch_size=100); 64 | 65 | /** 66 | * Get the maximum loss. 67 | */ 68 | float get_max_loss(); 69 | 70 | /** 71 | * Reset the maximum loss. 72 | */ 73 | void reset_max_loss(); 74 | 75 | /** 76 | * Check if the specified directory exists. 77 | * @param path (std::string) 78 | * @return (bool) 79 | */ 80 | bool _dir_exists(std::string path); 81 | 82 | /** 83 | * Create the specified directory exists. 84 | * @param path (std::string) 85 | */ 86 | void _create_path(std::string path); 87 | 88 | /** 89 | * Save the model into the specified file 90 | * @param path (std::string) path to the file 91 | * @param file (std::string) path to the file + file name 92 | * @param model (uint) whether to save the policy network (POLICY=0) or the value function network (VALUE=1) 93 | */ 94 | void save(std::string path, std::string file, uint model); 95 | 96 | /** 97 | * Load the model from the specified file 98 | * @param file (std::string) path to the file + file name 99 | * @param model (uint) whether to load the policy network (POLICY=0) or the value function network (VALUE=1) 100 | * @return (bool) whether loading has succeeded. 101 | */ 102 | bool load(std::string file, uint model); 103 | } 104 | -------------------------------------------------------------------------------- /libraries/networks/vpg_network/vpg_network.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "vpg_episode.hpp" 9 | 10 | #include 11 | #include 12 | 13 | // Generator's seed 14 | #define SEED 12345 15 | 16 | 17 | /** 18 | * Namespace "vpg" containing the Vanilla Policy Gradient implementation. 19 | */ 20 | namespace vpg{ 21 | 22 | // Random generator 23 | std::default_random_engine generator; 24 | // Seeds the generator for reproducibility 25 | void seed_generator(); 26 | 27 | // --------------------- Policy Network --------------------- 28 | struct PolicyImpl: torch::nn::Module{ 29 | PolicyImpl(); 30 | torch::Tensor forward(torch::Tensor x); 31 | torch::nn::Linear l1{nullptr},l2{nullptr},l3{nullptr}; 32 | }; 33 | TORCH_MODULE(Policy); 34 | // Network instance 35 | static Policy policy_model; 36 | // Optimizer used to train the network 37 | static torch::optim::Adam* p_optimizer; 38 | 39 | // --------------------- Value function Network --------------------- 40 | struct ValueImpl: torch::nn::Module{ 41 | ValueImpl(); 42 | torch::Tensor forward(torch::Tensor x); 43 | torch::nn::Linear l1{nullptr},l2{nullptr},l3{nullptr}; 44 | }; 45 | TORCH_MODULE(Value); 46 | // Network instance 47 | static Value value_model; 48 | // Optimizer used to train the network 49 | static torch::optim::Adam* v_optimizer; 50 | 51 | // Rollout counter 52 | static int value_rollout_count = 0; 53 | // ----------------------------------------------------------------- 54 | 55 | 56 | // Vector storing the generated episodes 57 | static std::vector memory; 58 | static std::vector memory_batch; 59 | 60 | /** 61 | * Extract the states stored by the episodes inside the memory. 62 | * @param next (bool) If false, the state of the episode is queried. 63 | * Otherwise the next state of the episode is returned. 64 | */ 65 | std::vector> get_state_from_memory_batch(bool next = false); 66 | 67 | torch::Tensor get_action_from_memory_batch(); 68 | 69 | torch::Tensor get_reward_from_memory_batch(); 70 | 71 | torch::Tensor get_reward_to_go_from_memory_batch(); 72 | 73 | /** 74 | * Utility method to convert a state vector into a Tensor including a dimension for the batch. 75 | * @param state (std::vector) 76 | */ 77 | torch::Tensor convert_state_to_batch(std::vector state); 78 | 79 | /** 80 | * Utility method to convert a vector of state vectors into a Tensor generating an actual batch. 81 | * @overload convert_state_to_batch(std::vector state) 82 | * @param states (std::vector>) 83 | */ 84 | torch::Tensor convert_state_to_batch(std::vector> states); 85 | 86 | /** 87 | * Compute parameters gradients based on the current memory state when called. 88 | */ 89 | int compute_gradients(); 90 | 91 | // Maximum loss encountered during a rollout 92 | static float max_loss = 0.0F; 93 | 94 | /** 95 | * Update the maximum loss. 96 | */ 97 | void update_max_loss(float loss); 98 | } 99 | 100 | -------------------------------------------------------------------------------- /controllers/vpg_controller/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 1996-2019 Cyberbotics Ltd. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ### Generic Makefile.include for Webots controllers, physics plugins, robot 16 | ### window libraries, remote control libraries and other libraries 17 | ### to be used with GNU make 18 | ### 19 | ### Platforms: Windows, macOS, Linux 20 | ### Languages: C, C++ 21 | ### 22 | ### Authors: Olivier Michel, Yvan Bourquin, Fabien Rohrer 23 | ### Edmund Ronald, Sergei Poskriakov 24 | ### 25 | ###----------------------------------------------------------------------------- 26 | ### 27 | ### This file is meant to be included from the Makefile files located in the 28 | ### Webots projects subdirectories. It is possible to set a number of variables 29 | ### to customize the build process, i.e., add source files, compilation flags, 30 | ### include paths, libraries, etc. These variables should be set in your local 31 | ### Makefile just before including this Makefile.include. This Makefile.include 32 | ### should never be modified. 33 | ### 34 | ### Here is a description of the variables you may set in your local Makefile: 35 | ### 36 | ### ---- C Sources ---- 37 | ### if your program uses several C source files: 38 | ### C_SOURCES = my_plugin.c my_clever_algo.c my_graphics.c 39 | ### 40 | ### ---- C++ Sources ---- 41 | ### if your program uses several C++ source files: 42 | ### CXX_SOURCES = my_plugin.cc my_clever_algo.cpp my_graphics.c++ 43 | ### 44 | ### ---- Compilation options ---- 45 | ### if special compilation flags are necessary: 46 | ### CFLAGS = -Wno-unused-result 47 | ### 48 | ### ---- Linked libraries ---- 49 | ### if your program needs additional libraries: 50 | ### INCLUDE = -I"/my_library_path/include" 51 | ### LIBRARIES = -L"/path/to/my/library" -lmy_library -lmy_other_library 52 | ### 53 | ### ---- Linking options ---- 54 | ### if special linking flags are needed: 55 | ### LFLAGS = -s 56 | ### 57 | ### ---- Webots included libraries ---- 58 | ### if you want to use the Webots C API in your C++ controller program: 59 | ### USE_C_API = true 60 | ### if you want to link with the Qt framework embedded in Webots: 61 | ### QT = core gui widgets network 62 | ### 63 | ### ---- Debug mode ---- 64 | ### if you want to display the gcc command line for compilation and link, as 65 | ### well as the rm command details used for cleaning: 66 | ### VERBOSE = 1 67 | ### 68 | ###----------------------------------------------------------------------------- 69 | 70 | USE_C_API = true 71 | 72 | # Utility files (world modification, data logging, state and reward computations) 73 | UTILS = ../../libraries/utils/ 74 | # Global parameters path 75 | GLOBAL_PARAM = ../ 76 | 77 | CXX_SOURCES = $(wildcard *.cpp) $(wildcard $(UTILS)*.cpp) 78 | 79 | INCLUDE = -I$(GLOBAL_PARAM) -I$(UTILS) -I../../libraries 80 | 81 | LIBRARIES= -L../../libraries -lvpg_network # -L $(WK4_PORTABLE) -lk4_webots -lgsl -lgslcblas 82 | 83 | ### Do not modify: this includes Webots global Makefile.include 84 | space := 85 | space += 86 | WEBOTS_HOME_PATH=$(subst $(space),\ ,$(strip $(subst \,/,$(WEBOTS_HOME)))) 87 | include $(WEBOTS_HOME_PATH)/resources/Makefile.include 88 | -------------------------------------------------------------------------------- /worlds/learned_obstacle_avoidance.wbt: -------------------------------------------------------------------------------- 1 | #VRML_SIM R2020a utf8 2 | WorldInfo { 3 | basicTimeStep 8 4 | lineScale 0.05 5 | contactProperties [ 6 | ContactProperties { 7 | material2 "khepera4 caster wheel" 8 | coulombFriction [ 9 | 0.01 10 | ] 11 | } 12 | ] 13 | } 14 | Viewpoint { 15 | orientation 0.00032259533168366796 0.7031169061809662 0.7110741959702646 3.1422306242127007 16 | position 0.032327726197468154 2.6940283690489477 -0.0019851336983633394 17 | follow "Khepera_IV" 18 | followType "None" 19 | } 20 | RectangleArena { 21 | floorSize 2 2 22 | floorTileSize 4 4 23 | floorAppearance Appearance { 24 | material Material { 25 | diffuseColor 0.2 0.2 0.2 26 | } 27 | } 28 | wallHeight 0.3 29 | } 30 | Background { 31 | skyColor [ 32 | 0.2 0.2 0.2 33 | 0 0 0 34 | 0 0 0 35 | ] 36 | } 37 | DirectionalLight { 38 | ambientIntensity 1 39 | direction 0 -1 0 40 | intensity 1.5 41 | } 42 | DEF K4 Khepera4 { 43 | name "Khepera_IV" 44 | controller "vpg_controller" 45 | controllerArgs "training" 46 | supervisor TRUE 47 | } 48 | DEF OBST_0 Solid { 49 | translation -0.4 0.04955854302621261 -0.3 50 | rotation -1.435231588108557e-17 1 -2.118236274989482e-18 0 51 | children [ 52 | Shape { 53 | appearance Appearance { 54 | material Material { 55 | diffuseColor 0.8 0.72 0.529 56 | } 57 | } 58 | geometry Box { 59 | size 0.16 0.16 0.16 60 | } 61 | } 62 | ] 63 | boundingObject Box { 64 | size 0.15 0.1 0.15 65 | } 66 | } 67 | DEF OBST_1 Solid { 68 | translation 0.35 0.04955854302621261 0.59 69 | rotation 0.25163385176495073 0.9678225067882721 -5.028279834967654e-18 0 70 | children [ 71 | Shape { 72 | appearance Appearance { 73 | material Material { 74 | diffuseColor 0.8 0.72 0.529 75 | } 76 | } 77 | geometry Box { 78 | size 0.16 0.16 0.16 79 | } 80 | } 81 | ] 82 | name "solid(1)" 83 | boundingObject Box { 84 | size 0.15 0.1 0.15 85 | } 86 | } 87 | DEF OBST_2 Solid { 88 | translation 0.26 0.04955854302621261 -0.75 89 | rotation 2.31139824055369e-15 1 -1.3894918185828773e-15 -0.005936323319313137 90 | children [ 91 | Shape { 92 | appearance Appearance { 93 | material Material { 94 | diffuseColor 0.8 0.72 0.529 95 | } 96 | } 97 | geometry Box { 98 | size 0.16 0.16 0.16 99 | } 100 | } 101 | ] 102 | name "solid(5)" 103 | boundingObject Box { 104 | size 0.15 0.1 0.15 105 | } 106 | } 107 | DEF OBST_3 Solid { 108 | translation 0.58 0.0495585430262156 0 109 | rotation -7.621102877601795e-10 1 -2.02435756457418e-09 0 110 | children [ 111 | Shape { 112 | appearance Appearance { 113 | material Material { 114 | diffuseColor 0.8 0.72 0.529 115 | } 116 | } 117 | geometry Box { 118 | size 0.16 0.16 0.16 119 | } 120 | } 121 | ] 122 | name "solid(2)" 123 | boundingObject Box { 124 | size 0.15 0.1 0.15 125 | } 126 | } 127 | DEF OBST_4 Solid { 128 | translation -0.75 0.04955854302621261 0.46 129 | rotation -2.7321952489371356e-17 1 1.7619336741772742e-17 0 130 | children [ 131 | Shape { 132 | appearance Appearance { 133 | material Material { 134 | diffuseColor 0.8 0.72 0.529 135 | } 136 | } 137 | geometry Box { 138 | size 0.16 0.16 0.16 139 | } 140 | } 141 | ] 142 | name "solid(3)" 143 | boundingObject Box { 144 | size 0.15 0.1 0.15 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /libraries/utils/khepera.cpp: -------------------------------------------------------------------------------- 1 | #include "khepera.hpp" 2 | 3 | static int time_step = 8; 4 | 5 | static const char *infrared_sensors_names[NUMBER_OF_INFRARED_SENSORS] = { 6 | // turret sensors 7 | "rear left infrared sensor", "left infrared sensor", "front left infrared sensor", "front infrared sensor", 8 | "front right infrared sensor", "right infrared sensor", "rear right infrared sensor", "rear infrared sensor", 9 | // ground sensors 10 | "ground left infrared sensor", "ground front left infrared sensor", "ground front right infrared sensor", 11 | "ground right infrared sensor"}; 12 | 13 | // Declare required device tags 14 | static WbDeviceTag leds[3]; 15 | static WbDeviceTag infrared_sensors[12]; 16 | static WbDeviceTag left_motor, right_motor; 17 | 18 | 19 | int k4::init_robot(){ 20 | wb_robot_init(); 21 | 22 | time_step = (int)wb_robot_get_basic_time_step(); 23 | 24 | // get and enable the infrared sensors 25 | for (int i = 0; i < 12; ++i) { 26 | infrared_sensors[i] = wb_robot_get_device(infrared_sensors_names[i]); 27 | wb_distance_sensor_enable(infrared_sensors[i], time_step); 28 | } 29 | 30 | // get the led actuators 31 | leds[0] = wb_robot_get_device("front left led"); 32 | leds[1] = wb_robot_get_device("front right led"); 33 | leds[2] = wb_robot_get_device("rear led"); 34 | for (int i = 0; i < 3; ++i) 35 | wb_led_set(leds[i], 0xFFFFFF & rand()); 36 | 37 | // get the motors and set target position to infinity (speed control) 38 | left_motor = wb_robot_get_device("left wheel motor"); 39 | right_motor = wb_robot_get_device("right wheel motor"); 40 | wb_motor_set_position(left_motor, INFINITY); 41 | wb_motor_set_position(right_motor, INFINITY); 42 | wb_motor_set_velocity(left_motor, 0.0); 43 | wb_motor_set_velocity(right_motor, 0.0); 44 | 45 | return time_step; 46 | } 47 | 48 | int k4::khepera_running(){ 49 | return (wb_robot_step(time_step) != -1) ? 1 : 0; 50 | } 51 | 52 | void k4::set_motors_speed(double left, double right){ 53 | wb_motor_set_velocity(left_motor, left); 54 | wb_motor_set_velocity(right_motor, right); 55 | } 56 | 57 | std::vector k4::get_motors_speed(){ 58 | std::vector speed; 59 | speed.push_back(wb_motor_get_velocity(left_motor)); 60 | speed.push_back(wb_motor_get_velocity(right_motor)); 61 | return speed; 62 | } 63 | 64 | void k4::set_normalized_motors_speed(double left, double right){ 65 | if(abs(left)>1 || abs(right)>1){ 66 | std::cout << "ERROR in set_normalized_motors_speed: input must be in [-1,1]" < k4::get_normalized_motors_speed(){ 74 | std::vector speed = get_motors_speed(); 75 | speed[0] /= (MAX_SPEED*REDUCER); 76 | speed[1] /= (MAX_SPEED*REDUCER); 77 | return speed; 78 | } 79 | 80 | double k4::normalize_range(double range){ 81 | double offset = 50.0; 82 | double value = (range - IR_MIN - offset)/(IR_MAX - IR_MIN - offset); 83 | if(value < 0.0) 84 | value = 0.0; 85 | else if(value > 1.0) 86 | value = 1.0; 87 | return value; 88 | } 89 | 90 | double k4::get_range(int i){ 91 | return wb_distance_sensor_get_value(infrared_sensors[i]); 92 | } 93 | 94 | std::vector k4::get_ranges(){ 95 | std::vector ranges; 96 | for(int i=0; i k4::get_normalized_ranges(){ 102 | std::vector ranges; 103 | for(int i=0; i 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // Utils 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | // Network 17 | #include 18 | #include "vpg_parameters.hpp" 19 | 20 | #include 21 | 22 | #include 23 | // Log file column names 24 | const std::vector log_columns = {"Rollout", "Num_episodes", 25 | "Num_timesteps", "Cumulated_reward", "Max_loss", "Distance"}; 26 | 27 | /** 28 | * VPG_arbitrator orchestrates the learning and evaluation of the network and 29 | * handles state and reward computations. 30 | */ 31 | class VPG_arbitrator{ 32 | public: 33 | /** 34 | * Instantiate the robot. 35 | */ 36 | VPG_arbitrator(); 37 | 38 | /** 39 | * Cleanup webots resources. 40 | */ 41 | ~VPG_arbitrator(); 42 | 43 | /** 44 | * Step the robot once to let the sensors settle (executes STOP behavior) 45 | * @param steps (int) number of steps to idle for. 46 | */ 47 | int idle(int steps=1); 48 | 49 | /** 50 | * Step the robot. Action depends on mode (train/eval). 51 | */ 52 | int step(); 53 | 54 | /** 55 | * Reset the arbitrator. 56 | */ 57 | void reset(); 58 | 59 | /** 60 | * Detect task completion 61 | * @return (bool) indicating wether task is complete or not. 62 | */ 63 | bool detect_task_completion(); 64 | 65 | /** 66 | * Build the episodes that are required to train the network. 67 | * @param action (int) Action taken at time t. 68 | */ 69 | void build_episodes(std::vector action); 70 | 71 | /** 72 | * Build the final epsiode containing the final reward. 73 | */ 74 | void build_final_episode(); 75 | 76 | /** 77 | * Transfer data between current and previous episodes for next iteration. 78 | */ 79 | void roll_episodes(); 80 | 81 | /** 82 | * Set the arbitrator in evaluation or training mode. 83 | * @param evaluating (bool) evaluation mode if true, training mode otherwise (default). 84 | * @param verbose (bool) print the action probabilities after evaluating the state when in evaluation mode. 85 | */ 86 | void mode(bool evaluating = true, bool verbose = false); 87 | 88 | /** 89 | * Train the network on the precedent roll out. 90 | */ 91 | void train(); 92 | 93 | /** 94 | * Get the current timestamp of the arbutrator. 95 | */ 96 | int get_timestamp(); 97 | 98 | /** 99 | * Get the current rollout count. 100 | */ 101 | int get_rollout_count(); 102 | 103 | /** 104 | * Tells if in training mode or eval mode. 105 | * @return (bool) training 106 | */ 107 | bool is_training(); 108 | 109 | /** 110 | * Retreive all the stats relative to the rollout. 111 | * @return (std::vector) vector of values. 112 | */ 113 | std::vector gather_log_data(); 114 | 115 | protected: 116 | 117 | private: 118 | int _training_step(); 119 | int _eval_step(); 120 | int m_timestamp; 121 | int m_rollout_count; 122 | int m_rollout_local; 123 | bool m_is_training; // default is true 124 | bool m_verbose_eval; 125 | std::vector m_current_action; // Left and right wheels velocity 126 | State m_last_states; 127 | Reward m_reward; 128 | VPG_episode m_current_episode; 129 | VPG_episode m_previous_episode; 130 | }; -------------------------------------------------------------------------------- /libraries/utils/data_logger.cpp: -------------------------------------------------------------------------------- 1 | #include "data_logger.hpp" 2 | 3 | char* _string_to_char(std::string s){ 4 | char* p = new char[s.length()+1]; 5 | memcpy(p, s.c_str(), s.length()+1); 6 | return p; 7 | } 8 | 9 | bool _dir_exists(std::string path){ 10 | struct stat sb; 11 | return (stat(_string_to_char(path), &sb) == 0 && S_ISDIR(sb.st_mode)); 12 | } 13 | 14 | bool _file_exists(const std::string& file) { 15 | struct stat buffer; 16 | return (stat (file.c_str(), &buffer) == 0); 17 | } 18 | 19 | int create_log(std::string dir, std::string file, std::vector headers){ 20 | if(_file_exists(file)) 21 | return 0; 22 | int r = 0; 23 | std::string command = ""; 24 | // Create directory 25 | if(dir != "./" || dir == ""){ 26 | command = "mkdir -p " + dir; 27 | r = system(_string_to_char(command)); 28 | } 29 | command = "touch " + file; 30 | r = system(_string_to_char(command)); 31 | 32 | write_line(file, headers); 33 | 34 | return r; 35 | } 36 | 37 | void write_line(std::string file, std::vector values){ 38 | try{ 39 | std::ofstream outfile; 40 | outfile.open(file, std::ios::app); // append 41 | 42 | std::string line = ""; 43 | for(uint i=0; i values){ 56 | std::vector str_values; 57 | for(uint i=0; i ##################################################### 18 | 19 | def read_log(self): 20 | self.m_data_df = pd.read_csv(self.m_loc_name+self.m_file_name, sep=',') 21 | 22 | def col_to_int_(self, columns:list): 23 | ''' Convert provided columns to int (in place). ''' 24 | for c in columns: 25 | self.m_data_df[c] = self.m_data_df[c].apply(lambda x : int(x)) 26 | 27 | def compute_quantity_rate(self, quantity:str, sample_dim=100): 28 | ''' Return the rates of a given quantity (DataFrame column) ''' 29 | results = np.array(self.m_data_df[quantity].values) 30 | dim = len(results) 31 | rate = [] 32 | for i in range(0,dim,sample_dim): 33 | if(i+sample_dim > dim): 34 | # Ignore computation for subset smaller than sample_dim 35 | break 36 | sample = results[i:i+sample_dim] 37 | rate.append(np.sum(sample)/len(sample)) 38 | return rate 39 | 40 | def compute_moving_average(self, quantity:str, sample_dim=100): 41 | ''' Return the rates of a given quantity (DataFrame column) as a moving average ''' 42 | results = np.array(self.m_data_df[quantity].values) 43 | dim = len(results) 44 | rate = [] 45 | for i in range(0,dim-sample_dim,1): 46 | #print('{} % done'.format(i/dim),end='\r') 47 | if(i+sample_dim > dim): 48 | # Ignore computation for subset smaller than sample_dim 49 | break 50 | sample = results[i:i+sample_dim] 51 | rate.append(np.sum(sample)/len(sample)) 52 | return rate 53 | 54 | def estimated_run_time(self): 55 | c = 'Num_timesteps' 56 | T = 0.008/10 # 8 ms, accelerated by ~23 ()fast mode 57 | return self.m_data_df[c].sum() * T / 3600 # Estimated run time in hour 58 | 59 | def show_head(self): 60 | print(self.m_data_df.head()) 61 | 62 | ### -> Pandas / Pyplot -> ########################################## 63 | 64 | def _sub_plot_single(self, ax:plt.axis, column:str, legend=None): 65 | ''' 66 | Create a sub-plot for a given column. 67 | 68 | Parameters 69 | ---------- 70 | ax : :plt.axis 71 | figure axis 72 | 73 | column : str 74 | column name 75 | ''' 76 | if legend == None: 77 | ax.plot(self.m_data_df[column],linestyle='-',linewidth=0.1) 78 | else: 79 | ax.plot(self.m_data_df[column],linestyle='-',linewidth=0.1,label=legend) 80 | 81 | ax.set_xlabel('Rollouts') 82 | if False and column == 'Num_timesteps': 83 | column += ' (approx. run time: {0:.2f} hours)'.format(self.estimated_run_time()) 84 | ax.set_title(column) 85 | 86 | def _sub_plot_single_from_array(self, ax:plt.axis, data:np.array, x_title:str, title:str, legend=None): 87 | ''' 88 | Create a sub-plot for a given column. 89 | 90 | Parameters 91 | ---------- 92 | ax : plt.axis 93 | figure axis 94 | 95 | data : np.array 96 | data to plot 97 | ''' 98 | if legend == None: 99 | ax.plot(data,linestyle='-',linewidth=1) 100 | else: 101 | ax.plot(data,linestyle='-',linewidth=1,label=legend) 102 | ax.set_xlabel(x_title) 103 | ax.set_title(title) 104 | 105 | def _sub_plot_multiple(self, axis:plt.axis, columns:tuple): 106 | ''' 107 | Create a sub-plot for the given columns. 108 | 109 | Parameters 110 | ---------- 111 | ax : plt.axis 112 | figure axis 113 | columns : tuple 114 | column names 115 | ''' 116 | title = '' 117 | for col in columns: 118 | axis.plot(self.m_data_df[col], label=col,linestyle='-',linewidth=0.1) 119 | title += col + ', ' 120 | axis.set_title(title) 121 | axis.set_xlabel('Rollouts') 122 | axis.legend() 123 | 124 | def _sub_plot_multiple_twin(self, axis:plt.axis, columns:tuple): 125 | ''' 126 | Create a sub-plot for the given columns. 127 | Create a twin x-axis with new y scale for the second column. 128 | 129 | Parameters 130 | ---------- 131 | ax : plt.axis 132 | figure axis 133 | column : tuple 134 | 2 column names 135 | twin : bool 136 | 137 | ''' 138 | # PLot first column 139 | axis.plot(self.m_data_df[columns[0]], label=columns[0], color='tab:blue',linestyle='-',linewidth=0.1) 140 | axis.tick_params(axis='y', labelcolor='tab:blue') 141 | axis.set_xlabel('Rollouts') 142 | axis.set_ylabel(columns[0]) 143 | axis.set_title(columns[0] + ' ~ ' + columns[1]) 144 | 145 | # PLot second column 146 | ax = axis.twinx() 147 | ax.plot(self.m_data_df[columns[1]], label=columns[1], color='tab:red',linestyle='-',linewidth=1) 148 | ax.set_ylabel(columns[1]) 149 | ax.tick_params(axis='y', labelcolor='tab:red') 150 | 151 | def show(self): 152 | plt.tight_layout() 153 | plt.show() 154 | 155 | def save(self, file = "fig.png"): 156 | plt.savefig(file, bbox_inches='tight') 157 | 158 | def close_all(self): 159 | plt.close('all') 160 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /unit_test/vpg_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | static const std::string passed = "[\033[1;4;32mPASSED\033[0m]\n"; 9 | static const std::string failed = "[\033[1;4;31mFAILED\033[0m]\n"; 10 | static const std::string exception = "\n\033[1;33mFollowing exception occured:\033[0m\n"; 11 | 12 | void store_some_episodes(){ 13 | VPG_episode e0; 14 | VPG_episode e1; 15 | VPG_episode e2; 16 | 17 | e0.action = {0,0}; 18 | e1.action = {0.5,-0.5}; 19 | e2.action = {1,1}; 20 | 21 | e0.reward = 0.01; 22 | e1.reward = 0.02; 23 | e2.reward = 0.03; 24 | 25 | std::vector state0(STATE_DIM, 1.); 26 | std::vector state1(STATE_DIM, 0.); 27 | std::vector state2(STATE_DIM, 0.5); 28 | e0.state = state0; 29 | e1.state = state1; 30 | e2.state = state2; 31 | e0.state_next = state2; 32 | e1.state_next = state1; 33 | e2.state_next = state0; 34 | 35 | // Push the episodes "randomly" 36 | vpg_w::store_episode(e0); 37 | vpg_w::store_episode(e1); 38 | vpg_w::store_episode(e2); 39 | } 40 | 41 | void test_memory_handling(){ 42 | store_some_episodes(); 43 | std::cout << "Memory storing test: " + (vpg_w::get_memory_size() != 3 ? failed : passed); 44 | } 45 | 46 | void test_reward_to_go(){ 47 | bool success = true; 48 | double tol = 1e-10; 49 | vpg_w::reset_memory(); 50 | store_some_episodes(); 51 | 52 | // Test the reward_to_go computation 53 | vpg_w::compute_reward_to_go(); 54 | const std::vector memory = vpg_w::get_memory(); 55 | 56 | //std::cout << memory[0].reward_to_go << " " << memory[1].reward_to_go << " " << memory[2].reward_to_go << " " << std::endl; 57 | std::cout << "Reward to go test: " + (success && 58 | (abs(memory[0].reward_to_go - 0.01 - GAMMA*0.02 - GAMMA*GAMMA*0.03)>tol || 59 | abs(memory[1].reward_to_go - 0.02 - GAMMA*0.03)>tol || 60 | abs(memory[2].reward_to_go - 0.03)>tol) ? failed : passed); 61 | } 62 | 63 | void test_eval_policy(){ 64 | std::cout << "Policy evaluation test: " << std::flush; 65 | std::vector p_actions; 66 | bool success = true; 67 | 68 | // Create a dummy state 69 | std::vector v(STATE_DIM,0.); 70 | v.at(3) = 1.; 71 | v.at(2) = 1.; 72 | 73 | // Run a forward pass 74 | try{ 75 | p_actions = vpg_w::eval_policy(v); 76 | }catch(const std::exception& e){ 77 | success = false; 78 | std::cout << exception << e.what() << std::endl; 79 | } 80 | 81 | std::cout << (success ? passed : failed); 82 | } 83 | 84 | void test_value_function(){ 85 | std::cout << "Value function test: " << std::flush; 86 | bool success = true; 87 | float value = 0.1234; 88 | std::vector state(STATE_DIM,0.); 89 | state[0] = 1.; 90 | state[1] = 1.; 91 | 92 | try{ 93 | value = vpg_w::eval_value_function(state); 94 | }catch(const std::exception& e){ 95 | success = false; 96 | std::cout << exception << e.what() << std::endl; 97 | } 98 | if(value == 0.1234) success = false; 99 | 100 | std::cout << (success ? passed : failed); 101 | } 102 | 103 | void test_model_saving(std::string path, std::string file){ 104 | std::cout << "Saving test: " << std::flush; 105 | int success = true; 106 | try{ 107 | vpg_w::save(path,file,POLICY); 108 | }catch(const std::exception& e){ 109 | success = false; 110 | std::cout << exception << e.what() << std::endl; 111 | } 112 | std::cout << (success ? passed : failed); 113 | } 114 | 115 | void test_model_loading(std::string file){ 116 | std::cout << "Loading test: " << std::flush; 117 | int success = true; 118 | try{ 119 | vpg_w::load(file,POLICY); 120 | }catch(const std::exception& e){ 121 | success = false; 122 | std::cout << exception << e.what() << std::endl; 123 | } 124 | std::cout << (success ? passed : failed); 125 | } 126 | 127 | void test_training(){ 128 | bool success = true; 129 | 130 | // Empty the memory 131 | vpg_w::reset_memory(); 132 | // Push some new episodes 133 | store_some_episodes(); 134 | 135 | // Run a training sequence 136 | try{ 137 | vpg_w::train(); 138 | }catch(const std::exception& e){ 139 | success = false; 140 | std::cout << exception << e.what() << std::endl; 141 | } 142 | std::cout << "Training test: " + (success ? passed : failed); 143 | } 144 | 145 | void test_training_heavy(){ 146 | std::cout << "Heavy training test: " << std::flush; 147 | bool success = true; 148 | 149 | // Empty the memory 150 | vpg_w::reset_memory(); 151 | // Prepare a dummy episode 152 | VPG_episode e0; 153 | e0.action = {0,0}; 154 | e0.reward = 0.01; 155 | std::vector state0(STATE_DIM, 1.); 156 | e0.state = state0; 157 | e0.state_next = state0; 158 | // Store it N times 159 | for(int i=0; i<1000; i++){ 160 | vpg_w::store_episode(e0); 161 | } 162 | vpg_w::compute_reward_to_go(); 163 | 164 | // Run a training sequence 165 | try{ 166 | vpg_w::train(100); 167 | }catch(const std::exception& e){ 168 | success = false; 169 | std::cout << exception << e.what() << std::endl; 170 | } 171 | std::cout << (success ? passed : failed); 172 | } 173 | 174 | int main(int argc, char* argv[]){ 175 | 176 | // Initilize the network 177 | vpg_w::init(); 178 | 179 | // Test the memory for episodes handling 180 | test_memory_handling(); 181 | 182 | test_reward_to_go(); 183 | 184 | // Test the proba generation 185 | test_eval_policy(); 186 | 187 | // Test value function evaluation 188 | test_value_function(); 189 | 190 | // Test loading and saving the model 191 | system("rm -r model"); 192 | test_model_loading("model/vpg_policy"); 193 | test_model_saving("model/","model/vpg_policy"); 194 | test_model_loading("model/vpg_policy"); 195 | 196 | // Test vector constructor's properties 197 | /*std::vector v1 = {0,1,2,3,4}; 198 | std::vector v2 = std::vector(v1.begin(), v1.begin()+v1.size()); 199 | std::cout << "v1 == v2 ? " << (v1==v2) << std::endl; // true 200 | for(int i=0; im_timestamp = 0; 10 | 11 | // Initilaize the current action 12 | this->m_current_action = {0.0,0.0}; 13 | 14 | // Set invalide time to episodes 15 | this->m_current_episode.t = -1; 16 | this->m_previous_episode.t = -1; 17 | 18 | // Default mode is training 19 | this->m_is_training = true; 20 | this->m_verbose_eval = false; 21 | 22 | // Init the reward 23 | this->m_reward = Reward(); 24 | 25 | // Initialize the wrapper for the network 26 | vpg_w::init(); 27 | 28 | // Create the log file if not existing 29 | create_log(LOG_PATH, LOG_FILE, log_columns); 30 | 31 | // Initialize the rollout count 32 | std::cout << "Reading the log. " << std::endl; 33 | this->m_rollout_count = read_last_rollout(LOG_FILE); 34 | this->m_rollout_local = 0; 35 | } 36 | 37 | VPG_arbitrator::~VPG_arbitrator(){} 38 | 39 | int VPG_arbitrator::idle(int steps){ 40 | int status = 1; 41 | for(int i=0; im_current_action[0]*2.0-1.0, 43 | this->m_current_action[1]*2.0-1.0); 44 | status = k4::khepera_running(); 45 | } 46 | return status; 47 | } 48 | 49 | int VPG_arbitrator::_training_step(){ 50 | 51 | // Handle number of rollouts 52 | if(this->m_rollout_count >= ROLLOUTS){ 53 | // All rollouts executed, stop controller 54 | std::cout << "All rollouts executed. Terminating." << std::endl; 55 | return 0; 56 | } 57 | 58 | // Check if time out is met or if collision occured 59 | bool timed_out = (TIMEOUT <= this->get_timestamp()*WAIT_STEPS*((double)ROBOT_BASIC_TIMESTEP)*0.001); 60 | bool collided = detect_collision(); 61 | 62 | if(timed_out || collided){ 63 | 64 | std::cout << "Time stamp: " << m_timestamp << ", duration in seconds: " 65 | << (double)m_timestamp*WAIT_STEPS*(double)ROBOT_BASIC_TIMESTEP/1000.0 << std::endl; 66 | 67 | // Store the episode with the final reward 68 | this->build_final_episode(); 69 | vpg_w::store_episode(this->m_previous_episode); 70 | 71 | this->m_rollout_count++; 72 | this->m_rollout_local++; 73 | 74 | std::cout << "Terminated rollout [" << this->m_rollout_count << "/" << ROLLOUTS << "]" << std::endl; 75 | vpg_w::compute_reward_to_go(); 76 | this->train(); 77 | 78 | vpg_w::save(MODEL_PATH,MODEL_POLICY_FILE,POLICY); 79 | vpg_w::save(MODEL_PATH,MODEL_VALUE_FILE, VALUE); 80 | write_line(LOG_FILE, VPG_arbitrator::gather_log_data()); 81 | 82 | this->reset(); 83 | 84 | supervisor_move_robot(); 85 | wb_supervisor_simulation_reset_physics(); 86 | } 87 | 88 | // Increment timestamp for next call 89 | this->m_timestamp++; 90 | 91 | // Exploit policy 92 | this->m_current_action = vpg_w::eval_policy(this->m_last_states.get_state(CURRENT_STATE)); 93 | //std::cout << "New state encountered, trying action: " << this->m_current_action << std::endl; 94 | 95 | // Apply action (make sure it is properly applied) 96 | k4::set_normalized_motors_speed(this->m_current_action[0]*2.0-1.0, 97 | this->m_current_action[1]*2.0-1.0); 98 | 99 | // Build episode 100 | this->build_episodes(this->m_current_action); 101 | 102 | // Send the previous_state if valid to the network implementation for storing 103 | if(this->m_previous_episode.t > -1){ 104 | vpg_w::store_episode(m_previous_episode); 105 | } 106 | 107 | // Roll episodes for next step 108 | this->roll_episodes(); 109 | 110 | // Add to travelled distance 111 | measure_travelled_distance(this->m_current_action[0]*2.0-1.0, 112 | this->m_current_action[1]*2.0-1.0); 113 | 114 | return k4::khepera_running(); 115 | } 116 | 117 | int VPG_arbitrator::_eval_step(){ 118 | // Increment timestamp for next call 119 | this->m_timestamp++; 120 | 121 | // Run inference on the network 122 | this->m_current_action = vpg_w::eval_policy(m_last_states.get_state(CURRENT_STATE)); 123 | 124 | // Print the action probabilities and the selected action 125 | if(this->m_verbose_eval){ 126 | std::cout << std::fixed; 127 | std::cout << std::setprecision(2); 128 | std::cout << "State: " << this->m_last_states.to_string(CURRENT_STATE) << std::endl; 129 | std::cout << "Action: "; 130 | for(uint i=0; im_current_action[i] << ","; 132 | } 133 | if(USE_VALUE_FUNCTION) 134 | std::cout << " - state value: " << vpg_w::eval_value_function(m_last_states.get_state(CURRENT_STATE)); 135 | std::cout << std::endl; 136 | } 137 | 138 | // Apply action 139 | k4::set_normalized_motors_speed(this->m_current_action[0]*2.0-1.0, 140 | this->m_current_action[1]*2.0-1.0); 141 | 142 | return k4::khepera_running(); 143 | } 144 | 145 | int VPG_arbitrator::step(){ 146 | 147 | // Set the query period 148 | for(int i=0; im_last_states.step(); 155 | 156 | // Get current reward 157 | this->m_reward.compute_reward(this->m_last_states.get_states(), this->m_timestamp); 158 | 159 | if(this->m_is_training) 160 | return this->_training_step(); 161 | else 162 | return this->_eval_step(); 163 | } 164 | 165 | void VPG_arbitrator::reset(){ 166 | // Initialize the time stamp 167 | this->m_timestamp = 0; 168 | // Initialize the current action 169 | this->m_current_action = {0.0,0.0}; 170 | // Set invalide time to episodes 171 | this->m_current_episode.t = -1; 172 | this->m_previous_episode.t = -1; 173 | 174 | this->m_last_states.reset(); 175 | 176 | this->m_reward.reset(); 177 | 178 | reset_distance(); 179 | 180 | vpg_w::reset_memory(); 181 | vpg_w::reset_max_loss(); 182 | } 183 | 184 | void VPG_arbitrator::build_episodes(std::vector action){ 185 | this->m_current_episode.t = this->m_timestamp; 186 | this->m_current_episode.state = this->m_last_states.get_state(CURRENT_STATE); 187 | std::vector to_double(action.begin(), action.end()); 188 | this->m_current_episode.action = to_double; 189 | this->m_previous_episode.reward = this->m_reward.get_reward(); 190 | this->m_previous_episode.state_next = this->m_last_states.get_state(CURRENT_STATE); 191 | } 192 | 193 | void VPG_arbitrator::build_final_episode(){ 194 | this->m_previous_episode.reward = this->m_reward.get_reward(); 195 | this->m_previous_episode.state_next = this->m_last_states.get_state(CURRENT_STATE); 196 | } 197 | 198 | void VPG_arbitrator::roll_episodes(){ 199 | this->m_previous_episode.t = this->m_timestamp; 200 | this->m_previous_episode.state = this->m_last_states.get_state(CURRENT_STATE); 201 | this->m_previous_episode.action = this->m_current_episode.action; 202 | } 203 | 204 | void VPG_arbitrator::mode(bool evaluating, bool verbose){ 205 | this->m_is_training = !evaluating; 206 | this->m_verbose_eval = verbose; 207 | } 208 | 209 | void VPG_arbitrator::train(){ 210 | vpg_w::train(); 211 | } 212 | 213 | int VPG_arbitrator::get_timestamp(){ 214 | return this->m_timestamp; 215 | } 216 | 217 | int VPG_arbitrator::get_rollout_count(){ 218 | return this->m_rollout_count; 219 | } 220 | 221 | bool VPG_arbitrator::is_training(){ 222 | return this->m_is_training; 223 | } 224 | 225 | // {"Rollout", "Num_episodes", "Num_timesteps", "Cumulated_reward", "Max_loss", "Distance"} 226 | 227 | std::vector VPG_arbitrator::gather_log_data(){ 228 | std::vector line; 229 | // Get the rollout info 230 | line.push_back(std::to_string(this->m_rollout_count)); 231 | // Get the number of used episodes 232 | line.push_back(std::to_string(vpg_w::get_memory_size())); 233 | // Get the number of timesteps 234 | line.push_back(std::to_string(this->m_timestamp)); 235 | // Get the cumulated reward 236 | line.push_back(std::to_string(this->m_reward.get_cumulated_reward())); 237 | // Get the maximum loss during rollout 238 | line.push_back(std::to_string(vpg_w::get_max_loss())); 239 | // Get the travelled distance 240 | line.push_back(std::to_string(get_distance())); 241 | return line; 242 | } 243 | -------------------------------------------------------------------------------- /libraries/networks/vpg_network/vpg_network.cpp: -------------------------------------------------------------------------------- 1 | #include "vpg_network.hpp" 2 | #include "vpg_wrapper.hpp" 3 | 4 | ///////////////////////////////////////////////////////////////////// 5 | // WRAPPER METHODS IMPLEMENTATION 6 | ///////////////////////////////////////////////////////////////////// 7 | 8 | void vpg_w::init(){ 9 | 10 | //vpg::seed_generator(); 11 | 12 | // Instantiate the models 13 | if(!vpg_w::load(MODEL_POLICY_FILE,POLICY)){ 14 | vpg::policy_model = vpg::Policy(); 15 | } 16 | if(!vpg_w::load(MODEL_VALUE_FILE,VALUE)){ 17 | vpg::value_model = vpg::Value(); 18 | } 19 | 20 | // Set the uninitialised policy optimizer pointer 21 | static torch::optim::Adam p_adam(vpg::policy_model->parameters(), 22 | torch::optim::AdamOptions(1e-3)); // .beta1(0.5) 23 | vpg::p_optimizer = &p_adam; 24 | 25 | // Set the uninitialised value function optimizer pointer 26 | static torch::optim::Adam v_adam(vpg::value_model->parameters(), 27 | torch::optim::AdamOptions(1e-4)); // .beta1(0.5) 28 | vpg::v_optimizer = &v_adam; 29 | 30 | // Set the maximum loss to zero 31 | vpg_w::reset_max_loss(); 32 | } 33 | 34 | void vpg_w::store_episode(VPG_episode episode){ 35 | vpg::memory.push_back(episode); 36 | } 37 | 38 | void vpg_w::compute_reward_to_go(){ 39 | double rtg = 0.; 40 | if(vpg::memory.empty()) 41 | return; 42 | for(uint i=vpg::memory.size()-1; i>=0; i--){ 43 | rtg = vpg::memory[i].reward + GAMMA*rtg; 44 | vpg::memory[i].reward_to_go = rtg; 45 | if(i==0) break; 46 | } 47 | } 48 | 49 | uint vpg_w::get_memory_size(){ 50 | return vpg::memory.size(); 51 | } 52 | 53 | const std::vector& vpg_w::get_memory(){ 54 | return vpg::memory; 55 | } 56 | 57 | void vpg_w::reset_memory(){ 58 | vpg::memory.clear(); 59 | } 60 | 61 | std::vector vpg_w::eval_policy(std::vector state){ 62 | torch::Tensor proba = vpg::policy_model->forward( 63 | torch::_cast_Float(vpg::convert_state_to_batch(state))); 64 | std::vector action(ACTION_DIM, 0.0F); 65 | for(uint i=0; i state){ 72 | return vpg::value_model->forward(torch::_cast_Float(vpg::convert_state_to_batch(state))).item().toFloat(); 73 | } 74 | 75 | void vpg_w::train(uint batch_size){ 76 | if(vpg::memory.size() == 0){ 77 | std::cout << "\033[1;33m[WARNING] memory empty, training aborted.\033[0m" << std::endl; 78 | return; 79 | } 80 | 81 | // Clear the gradients of the models' parameters 82 | vpg::policy_model->zero_grad(); 83 | vpg::value_rollout_count++; 84 | if(vpg::value_rollout_count > VALUE_UPDATE_RATE){ 85 | vpg::value_rollout_count = 0; 86 | vpg::value_model->zero_grad(); 87 | } 88 | 89 | // Split the memory into batches and compute the gradients on each batch sequentially 90 | for(uint i=0 ;; i++){ 91 | //std::cout << "Processing batch: " << i << std::endl; 92 | if(batch_size >= vpg::memory.size()){ // Batch size is at least as large as the memory 93 | vpg::memory_batch = std::vector(vpg::memory.begin(), vpg::memory.end()); 94 | vpg::compute_gradients(); 95 | break; 96 | }else if((i+1)*batch_size <= vpg::memory.size()){ // Extract a batch from the memory 97 | vpg::memory_batch = std::vector(vpg::memory.begin() + i*batch_size, vpg::memory.begin() + (i+1)*batch_size); 98 | vpg::compute_gradients(); 99 | }else if(i*batch_size < vpg::memory.size() && (i+1)*batch_size >= vpg::memory.size()){ // If the last batch overshoots the memory's end 100 | vpg::memory_batch = std::vector(vpg::memory.begin() + i*batch_size, vpg::memory.end()); 101 | vpg::compute_gradients(); 102 | break; 103 | }else{ // Reached end of memory (i*batch_size == memory.size()) -> stop (was already handled in first "else if") 104 | //std::cout << "\033[1;33mConditional Miss\033[0m, processing aborted. Status:" << std::endl; 105 | //std::cout << "Memory size: " << vpg::memory.size() << std::endl; 106 | //std::cout << "Start: " << i*batch_size << "\t End: " << (i+1)*batch_size << std::endl; 107 | break; 108 | } 109 | } 110 | 111 | // Update the parameters 112 | vpg::p_optimizer->step(); 113 | if(vpg::value_rollout_count == VALUE_UPDATE_RATE && USE_VALUE_FUNCTION){ 114 | std::cout << "Cumulated gradients for " << 115 | VALUE_UPDATE_RATE << " rollouts. Updating value function weights." << std::endl; 116 | vpg::v_optimizer->step(); 117 | } 118 | } 119 | 120 | float vpg_w::get_max_loss(){ 121 | return vpg::max_loss; 122 | } 123 | 124 | void vpg_w::reset_max_loss(){ 125 | vpg::max_loss = 0.0F; 126 | } 127 | 128 | bool vpg_w::_dir_exists(std::string path){ 129 | struct stat sb; 130 | if (stat(path.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) 131 | return true; 132 | return false; 133 | } 134 | 135 | void vpg_w::_create_path(std::string path){ 136 | const std::string command = "mkdir -p " + path; 137 | system(command.c_str()); 138 | } 139 | 140 | void vpg_w::save(std::string path, std::string file, uint model){ 141 | if(!_dir_exists(path)) 142 | _create_path(path); 143 | 144 | switch(model){ 145 | case POLICY: 146 | torch::save(vpg::policy_model,file); 147 | break; 148 | case VALUE: 149 | torch::save(vpg::value_model,file); 150 | break; 151 | /*case POLICY_OPTIM: 152 | torch::save(vpg::p_optimizer,file); 153 | break; 154 | case VALUE_OPTIM: 155 | torch::save(vpg::v_optimizer,file); 156 | break;*/ 157 | } 158 | } 159 | 160 | bool vpg_w::load(std::string file, uint model){ 161 | // Load the model 162 | try{ 163 | if(model == POLICY){ 164 | torch::load(vpg::policy_model,file); 165 | return true; 166 | }else if(model == VALUE){ 167 | torch::load(vpg::value_model,file); 168 | return true; 169 | } 170 | }catch(const std::exception& e){ 171 | std::cout << "\n\033[1;33m[WARNING]:\033[0m Could not load specified model. Using untrained version.\n"; 172 | return false; 173 | } 174 | } 175 | 176 | ///////////////////////////////////////////////////////////////////// 177 | // CORE METHODS IMPLEMENTATION 178 | ///////////////////////////////////////////////////////////////////// 179 | 180 | void vpg::seed_generator(){ 181 | vpg::generator.seed(SEED); 182 | } 183 | 184 | vpg::PolicyImpl::PolicyImpl(){ 185 | // Constructor - build the network's layers 186 | this->l1 = register_module("l1",torch::nn::Linear(STATE_DIM,8)); 187 | this->l2 = register_module("l2",torch::nn::Linear(8,4)); 188 | this->l3 = register_module("l3",torch::nn::Linear(4,ACTION_DIM)); 189 | } 190 | 191 | torch::Tensor vpg::PolicyImpl::forward(torch::Tensor x){ 192 | // apply ReLU activations and sigmoid on output 193 | x = torch::relu(this->l1->forward(x)); 194 | x = torch::relu(this->l2->forward(x)); 195 | x = torch::sigmoid(this->l3->forward(x)); 196 | 197 | return x; 198 | } 199 | 200 | vpg::ValueImpl::ValueImpl(){ 201 | // Constructor - build the network's layers 202 | this->l1 = register_module("l1",torch::nn::Linear(STATE_DIM,5)); 203 | this->l2 = register_module("l2",torch::nn::Linear(5,3)); 204 | this->l3 = register_module("l3",torch::nn::Linear(3,1)); 205 | } 206 | 207 | torch::Tensor vpg::ValueImpl::forward(torch::Tensor x){ 208 | // apply ReLU activations and leave the output linear 209 | x = torch::relu(this->l1->forward(x)); 210 | x = torch::relu(this->l2->forward(x)); 211 | x = this->l3->forward(x); 212 | 213 | return x; 214 | } 215 | 216 | std::vector> vpg::get_state_from_memory_batch(bool next){ 217 | std::vector> states; 218 | for(uint i=0; i actions; 230 | for(uint i=0; i rewards; 239 | for(uint i=0; i rtg; 247 | for(uint i=0; i state){ 254 | return torch::tensor(state).view({-1,(long)state.size()}); 255 | } 256 | torch::Tensor vpg::convert_state_to_batch(std::vector> states){ 257 | torch::Tensor t = vpg::convert_state_to_batch(states[0]); 258 | for(uint i=1; iforward(states); 285 | // Get the actually taken actions 286 | //torch::Tensor actions = torch::_cast_Int(vpg::get_action_from_memory_batch()); 287 | // Get the rewards 288 | torch::Tensor rewards = vpg::get_reward_from_memory_batch(); 289 | // Get the rewards to go 290 | torch::Tensor rewards_to_go = vpg::get_reward_to_go_from_memory_batch(); 291 | 292 | //////////////////////////////////////////////////////////////////////////// 293 | // Train the Policy 294 | //////////////////////////////////////////////////////////////////////////// 295 | 296 | // Compute the advantage 297 | torch::Tensor advantage, s_values; 298 | if(USE_VALUE_FUNCTION){ 299 | // Get the states values 300 | s_values = vpg::value_model->forward(states); 301 | advantage = rewards_to_go - s_values; 302 | }else{ 303 | // Compute the advantage without support from the value function 304 | advantage = rewards_to_go; 305 | } 306 | 307 | // Compute the loss (negative sign to perform gradient ascent) 308 | torch::Tensor policy_loss = -torch::log(actions) * advantage; 309 | 310 | // Compute the backward pass on each component of the loss {N,1} 311 | for(uint i=0; i