├── rl_studio ├── envs │ ├── __init__.py │ ├── gazebo │ │ ├── __init__.py │ │ ├── autoparking │ │ │ ├── ._models │ │ │ ├── env_type.py │ │ │ ├── exceptions.py │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ └── autoparking_env.py │ │ │ └── image_f1.py │ │ ├── f1 │ │ │ ├── README.md │ │ │ ├── exceptions.py │ │ │ ├── env_type.py │ │ │ └── models │ │ │ │ ├── utils.py │ │ │ │ ├── followlane_qlearn.py │ │ │ │ ├── followline_qlearn.py │ │ │ │ ├── f1_env.py │ │ │ │ ├── settings.py │ │ │ │ └── reset.py │ │ ├── mountain_car │ │ │ ├── env_type.py │ │ │ ├── exceptions.py │ │ │ └── __init__.py │ │ ├── robot_mesh │ │ │ ├── env_type.py │ │ │ ├── exceptions.py │ │ │ └── __init__.py │ │ ├── gazebo_utils.py │ │ ├── real_env_ros2.py │ │ └── real_env.py │ ├── openai_gym │ │ └── requirements.txt │ ├── carla │ │ ├── utils │ │ │ ├── constants.py │ │ │ ├── colors.py │ │ │ ├── weather.py │ │ │ ├── synchronous_mode.py │ │ │ └── plot_topology.py │ │ ├── __init__.py │ │ └── followlane │ │ │ ├── utils.py │ │ │ ├── reset.py │ │ │ └── settings.py │ └── envs_type.py ├── visual │ ├── __init__.py │ └── ascii │ │ ├── __init__.py │ │ ├── images.py │ │ └── text.py ├── agents │ ├── f1 │ │ ├── __init__.py │ │ ├── settings.py │ │ ├── manual_pilot.py │ │ └── utils.py │ ├── turtlebot │ │ ├── __init__.py │ │ ├── turtlebot_trainer.py │ │ ├── sarsa.py │ │ ├── liveplot.py │ │ ├── qlearn.py │ │ ├── memory.py │ │ └── circuit2_turtlebot_lidar_qlearn.py │ ├── utilities │ │ ├── __init__.py │ │ ├── render.py │ │ ├── push_git_repo.py │ │ ├── plot_npy_dataset.py │ │ ├── camera_visualizer.py │ │ ├── averaged_table.py │ │ ├── plot_ECDF_template.py │ │ ├── plot_reward_function.py │ │ └── plot_multiple_graphs_template.py │ ├── mountain_car │ │ ├── __init__.py │ │ ├── manual_pilot.py │ │ └── utils.py │ ├── robot_mesh │ │ ├── __init__.py │ │ ├── manual_pilot.py │ │ └── utils.py │ ├── frameworks_type.py │ ├── pendulum │ │ └── requirements.txt │ ├── tasks_type.py │ ├── agents_type.py │ ├── exceptions.py │ ├── trainer.py │ ├── ros_gazebo_pose.py │ ├── liveplot.py │ └── cartpole │ │ ├── requirements.txt │ │ └── README.md ├── scripts │ ├── __init__.py │ └── benchmark_runner ├── installation │ ├── __init__.py │ ├── formula1_laser_setup.bash │ ├── robot_mesh_setup.bash │ ├── mountain_car_setup.bash │ ├── formula1_setup.bash │ ├── setup_noetic.bash │ └── gazebo_ros_noetic.repos ├── wrappers │ ├── __init__.py │ ├── tests │ │ └── __init__.py │ ├── monitoring │ │ ├── __init__.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ ├── helpers.py │ │ │ └── test_video_recorder.py │ ├── dict.py │ ├── README.md │ ├── inference_rlstudio.py │ └── time_limit.py ├── algorithms │ ├── models │ │ ├── __init__.py │ │ └── qlearn.py │ ├── requirements.txt │ ├── exceptions.py │ ├── algorithms_type.py │ ├── utils.py │ ├── README.md │ ├── memory.py │ ├── qlearn_multiple_states.py │ ├── __init__.py │ └── ppo.py ├── docs │ ├── carla_board.png │ ├── Town01_topology.png │ ├── Town01_waypoints.png │ ├── gazebo_screenshot.png │ ├── rlstudio-diagram.png │ └── carla_spectator_camera.png ├── birdview_v2_cache │ └── Carla │ │ └── Maps │ │ └── .gitignore ├── rl-studio.py ├── config │ ├── config_pendulum_ddpg.yaml │ ├── config_pendulum_ppo.yaml │ ├── config_cartpole_programmatic.yaml │ ├── config_cartpole_ppo.yaml │ ├── config_cartpole_ppo_continuous.yaml │ ├── config_cartpole_ddpg.yaml │ ├── config_cartpole_dqn.yaml │ ├── config_mountain_car_qlearn.yaml │ ├── config_cartpole_qlearn.yaml │ ├── config_robot_mesh_qlearn.yaml │ ├── config_training_followline_qlearn_f1_gazebo.yaml │ ├── config_inference_followline_qlearn_f1_gazebo.yaml │ ├── config_inference_followline_dqn_f1_gazebo.yaml │ ├── config_training_followlane_qlearn_carla.yaml │ ├── config_training_followlane_qlearn_f1_gazebo.yaml │ ├── config_training_followline_dqn_f1_gazebo.yaml │ ├── config_inference_followlane_qlearn_f1_gazebo.yaml │ └── config_inference_followline_ddpg_f1_gazebo.yaml ├── requirements.txt ├── __init__.py └── rl-studio-tuning-qlearn-followlane.py ├── scripts ├── stop.sh ├── load_env.sh └── camera_view.sh ├── .github └── dependanbot.yml ├── .disable-pre-commit-config.yaml ├── CHANGELOG.md ├── FAQ.md ├── LICENSE.md ├── CODING.md └── CONTRIBUTING.md /rl_studio/envs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/visual/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/agents/f1/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/agents/turtlebot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/agents/utilities/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/installation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/visual/ascii/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rl_studio/wrappers/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/agents/mountain_car/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/agents/robot_mesh/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/algorithms/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/wrappers/monitoring/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/wrappers/monitoring/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/__init__.py: -------------------------------------------------------------------------------- 1 | from rl_studio.envs.gazebo.real_env import RealEnv 2 | -------------------------------------------------------------------------------- /rl_studio/envs/openai_gym/requirements.txt: -------------------------------------------------------------------------------- 1 | gym==0.25.0 2 | numpy==1.17.4 3 | pygame==2.1.2 4 | -------------------------------------------------------------------------------- /rl_studio/docs/carla_board.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/docs/carla_board.png -------------------------------------------------------------------------------- /rl_studio/docs/Town01_topology.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/docs/Town01_topology.png -------------------------------------------------------------------------------- /rl_studio/agents/turtlebot/turtlebot_trainer.py: -------------------------------------------------------------------------------- 1 | class TurtlebotTrainer: 2 | def __init__(self, params): 3 | pass 4 | -------------------------------------------------------------------------------- /rl_studio/docs/Town01_waypoints.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/docs/Town01_waypoints.png -------------------------------------------------------------------------------- /rl_studio/docs/gazebo_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/docs/gazebo_screenshot.png -------------------------------------------------------------------------------- /rl_studio/docs/rlstudio-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/docs/rlstudio-diagram.png -------------------------------------------------------------------------------- /rl_studio/envs/carla/utils/constants.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | 4 | ROOT_PATH = str(Path(__file__).parent.parent) 5 | -------------------------------------------------------------------------------- /rl_studio/birdview_v2_cache/Carla/Maps/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /rl_studio/docs/carla_spectator_camera.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/docs/carla_spectator_camera.png -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/autoparking/._models: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/envs/gazebo/autoparking/._models -------------------------------------------------------------------------------- /rl_studio/algorithms/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.17.4 2 | pydantic==1.9.1 3 | tensorflow==2.10.0 4 | tensorflow_gpu==2.9.1 5 | torch==1.12.1 6 | -------------------------------------------------------------------------------- /rl_studio/agents/frameworks_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class FrameworksType(Enum): 5 | TF = "TensorFlow" 6 | PYTORCH = "Pytorch" 7 | -------------------------------------------------------------------------------- /scripts/stop.sh: -------------------------------------------------------------------------------- 1 | killall -9 rosmaster 2 | killall -9 gzserver 3 | killall -9 gzclient 4 | killall -9 python 5 | killall -9 roscore 6 | killall -9 rosout 7 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/f1/README.md: -------------------------------------------------------------------------------- 1 | # F1 Environment 2 | 3 | ## Main class 4 | 5 | `env_f1.py` 6 | 7 | ## Usefull programs 8 | 9 | `image_f1.py` 10 | -------------------------------------------------------------------------------- /rl_studio/envs/envs_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class EnvsType(Enum): 5 | GAZEBO = "gazebo" 6 | CARLA = "carla" 7 | OPENAI = "openai" 8 | -------------------------------------------------------------------------------- /scripts/load_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | cd gym_gazebo/envs/installation 3 | rm -rf catkin_ws 4 | bash setup_noetic.bash 5 | bash formula1_laser_setup.bash 6 | cd ~/BehaviorStudio/ 7 | -------------------------------------------------------------------------------- /rl_studio/agents/pendulum/requirements.txt: -------------------------------------------------------------------------------- 1 | gym==0.26.2 2 | gymnasium==0.27.0 3 | markdownTable==6.0.0 4 | matplotlib==3.3.2 5 | numpy==1.17.4 6 | torch==1.12.1 7 | tqdm==4.64.0 8 | -------------------------------------------------------------------------------- /rl_studio/algorithms/models/qlearn.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class QlearnValidator(BaseModel): 5 | alpha: float 6 | epsilon: float 7 | gamma: float 8 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/autoparking/env_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class EnvironmentType(Enum): 5 | qlearn_env = "qlearn" 6 | dqn_env = "dqn" 7 | manual_env = "manual" 8 | ddpg_env = "ddpg" 9 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/mountain_car/env_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class TrainingType(Enum): 5 | qlearn_env_camera = "qlearn_camera" 6 | qlearn_env_laser = "qlearn_laser" 7 | dqn_env = "dqn" 8 | manual_env = "manual" 9 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/robot_mesh/env_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class TrainingType(Enum): 5 | qlearn_env_camera = "qlearn_camera" 6 | qlearn_env_laser = "qlearn_laser" 7 | dqn_env = "dqn" 8 | manual_env = "manual" 9 | -------------------------------------------------------------------------------- /rl_studio/wrappers/monitoring/tests/helpers.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import shutil 3 | import tempfile 4 | 5 | 6 | @contextlib.contextmanager 7 | def tempdir(): 8 | temp = tempfile.mkdtemp() 9 | yield temp 10 | shutil.rmtree(temp) 11 | -------------------------------------------------------------------------------- /rl_studio/agents/tasks_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class TasksType(Enum): 5 | FOLLOWLINEGAZEBO = "follow_line_gazebo" 6 | FOLLOWLANEGAZEBO = "follow_lane_gazebo" 7 | FOLLOWLANECARLA = "follow_lane_carla" 8 | AUTOPARKINGGAZEBO = "autoparking_gazebo" 9 | -------------------------------------------------------------------------------- /.github/dependanbot.yml: -------------------------------------------------------------------------------- 1 | # https://docs.github.com/en/code-security/supply-chain-security/configuration-options-for-dependency-updates#about-the-dependabotyml-file 2 | 3 | version: 2 4 | 5 | updates: 6 | - package-ecosystem: "pip" 7 | directory: "/" 8 | schedule: 9 | interval: "weekly" 10 | commit-message: 11 | prefix: ":arrow_up:" 12 | -------------------------------------------------------------------------------- /scripts/camera_view.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TOPIC="" 4 | 5 | if [ "$1" == "f1" ]; then 6 | TOPIC="/F1ROS/cameraL/image_raw" 7 | elif [ "$1" == "turtlebot" ]; then 8 | TOPIC="/camera/rgb/image_raw" 9 | else 10 | printf "\n[ERROR] - No robot selected (use: 'f1' or 'turtlebot')\n\n" 11 | exit 1 12 | fi 13 | 14 | rosrun image_view image_view image:=$TOPIC 15 | -------------------------------------------------------------------------------- /rl_studio/agents/agents_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class AgentsType(Enum): 5 | F1 = "f1" 6 | F1GAZEBO = "f1" 7 | TURTLEBOT = "turtlebot" 8 | ROBOT_MESH = "robot_mesh" 9 | MOUNTAIN_CAR = "mountain_car" 10 | CARTPOLE = "cartpole" 11 | PENDULUM = "pendulum" 12 | AUTOPARKINGGAZEBO = "autoparkingRL" 13 | AUTOCARLA = "auto_carla" 14 | -------------------------------------------------------------------------------- /rl_studio/algorithms/exceptions.py: -------------------------------------------------------------------------------- 1 | class CreationError(Exception): 2 | ... 3 | 4 | 5 | class NoValidAlgorithmType(CreationError): 6 | def __init__(self, algorithm): 7 | self.algorithm_type = algorithm 8 | self.message = f"[ERROR] No valid training type ({algorithm}) in your config.yml file or is missing." 9 | super().__init__(self.message) 10 | -------------------------------------------------------------------------------- /rl_studio/agents/exceptions.py: -------------------------------------------------------------------------------- 1 | class CreationError(Exception): 2 | ... 3 | 4 | 5 | class NoValidTrainingType(CreationError): 6 | def __init__(self, training_type): 7 | self.traning_type = training_type 8 | self.message = f"[ERROR] No valid training type ({training_type}) in your config.yml file or is missing." 9 | super().__init__(self.message) 10 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/robot_mesh/exceptions.py: -------------------------------------------------------------------------------- 1 | class CreationError(Exception): 2 | ... 3 | 4 | 5 | class NoValidTrainingType(CreationError): 6 | def __init__(self, training_type): 7 | self.traning_type = training_type 8 | self.message = f"[MESSAGE] No valid training type ({training_type}) in your settings.py file" 9 | super().__init__(self.message) 10 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/f1/exceptions.py: -------------------------------------------------------------------------------- 1 | class CreationError(Exception): 2 | ... 3 | 4 | 5 | class NoValidEnvironmentType(CreationError): 6 | def __init__(self, environment_type): 7 | self.traning_type = environment_type 8 | self.message = f"[MESSAGE] No valid training type ({environment_type}) in your settings.py file" 9 | super().__init__(self.message) 10 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/mountain_car/exceptions.py: -------------------------------------------------------------------------------- 1 | class CreationError(Exception): 2 | ... 3 | 4 | 5 | class NoValidTrainingType(CreationError): 6 | def __init__(self, training_type): 7 | self.traning_type = training_type 8 | self.message = f"[MESSAGE] No valid training type ({training_type}) in your settings.py file" 9 | super().__init__(self.message) 10 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/autoparking/exceptions.py: -------------------------------------------------------------------------------- 1 | class CreationError(Exception): 2 | ... 3 | 4 | 5 | class NoValidEnvironmentType(CreationError): 6 | def __init__(self, environment_type): 7 | self.traning_type = environment_type 8 | self.message = f"[MESSAGE] No valid training type ({environment_type}) in your settings.py file" 9 | super().__init__(self.message) 10 | -------------------------------------------------------------------------------- /rl_studio/algorithms/algorithms_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class AlgorithmsType(Enum): 5 | PROGRAMMATIC = 'programmatic' 6 | QLEARN = "qlearn" 7 | DEPRECATED_QLEARN = "qlearn_deprecated" 8 | DQN = "dqn" 9 | DDPG = "ddpg" 10 | SAC = "sac" 11 | DDPG_TORCH = "ddpg_torch" 12 | PPO = "ppo" 13 | PPO_CONTINIUOUS = 'ppo_continuous' 14 | MANUAL = "manual" 15 | -------------------------------------------------------------------------------- /rl_studio/agents/trainer.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class TrainerValidator(BaseModel): 7 | settings: dict 8 | agent: dict 9 | environment: dict 10 | algorithm: dict 11 | 12 | 13 | class InferenceExecutorValidator(BaseModel): 14 | settings: dict 15 | agent: dict 16 | environment: dict 17 | algorithm: dict 18 | inference: dict 19 | # gazebo: dict 20 | 21 | 22 | class AgentTrainer(ABC): 23 | pass 24 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/f1/env_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class EnvironmentType(Enum): 5 | qlearn_env_camera_follow_line = "qlearn_camera_follow_line" 6 | qlearn_env_camera_follow_lane = "qlearn_camera_follow_lane" 7 | qlearn_env_laser_follow_line = "qlearn_laser_follow_line" 8 | dqn_env_follow_line = "dqn_follow_line" 9 | dqn_env_follow_lane = "dqn_follow_lane" 10 | manual_env = "manual" 11 | ddpg_env_follow_line = "ddpg_follow_line" 12 | ddpg_env_follow_lane = "ddpg_follow_lane" 13 | -------------------------------------------------------------------------------- /.disable-pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://github.com/psf/black 9 | rev: 20.8b1 10 | hooks: 11 | - id: black 12 | language_version: python3 13 | #- repo: https://gitlab.com/pycqa/flake8 14 | # rev: 3.9.0 15 | # hooks: 16 | # - id: flake8 17 | # additional_dependencies: [flake8-bugbear==21.4.3] 18 | - repo: https://github.com/asottile/blacken-docs 19 | rev: v1.10.0 20 | hooks: 21 | - id: blacken-docs 22 | additional_dependencies: [black==20.8b1] 23 | -------------------------------------------------------------------------------- /rl_studio/agents/utilities/render.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | 4 | import gym 5 | 6 | parser = argparse.ArgumentParser( 7 | description="Renders a Gym environment for quick inspection." 8 | ) 9 | parser.add_argument( 10 | "env_id", 11 | type=str, 12 | help="the ID of the environment to be rendered (e.g. HalfCheetah-v1", 13 | ) 14 | parser.add_argument("--step", type=int, default=1) 15 | args = parser.parse_args() 16 | 17 | env = gym.make(args.env_id) 18 | env.reset() 19 | 20 | step = 0 21 | while True: 22 | if args.step: 23 | env.step(env.action_space.sample()) 24 | env.render() 25 | if step % 10 == 0: 26 | env.reset() 27 | step += 1 28 | -------------------------------------------------------------------------------- /rl_studio/agents/utilities/push_git_repo.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | def git_add_commit_push(commit_message): 4 | # Change this to your repository directory 5 | repo_directory = "/home/ruben/Desktop/2020-phd-ruben-lucas" 6 | 7 | # Change to the repository directory 8 | subprocess.run(["git", "add", "."], cwd=repo_directory) 9 | 10 | # Commit changes with the given commit message 11 | subprocess.run(["git", "commit", "-m", commit_message], cwd=repo_directory) 12 | 13 | # Push changes to the remote repository 14 | subprocess.run(["git", "push"], cwd=repo_directory) 15 | 16 | if __name__ == "__main__": 17 | commit_message = "Your commit message here." 18 | git_add_commit_push(commit_message) -------------------------------------------------------------------------------- /rl_studio/installation/formula1_laser_setup.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$GAZEBO_MODEL_PATH" ]; then 4 | bash -c 'echo "export GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:"`pwd`/../assets/models >> ~/.bashrc' 5 | else 6 | bash -c 'sed "s,GAZEBO_MODEL_PATH=[^;]*,'GAZEBO_MODEL_PATH=`pwd`/../assets/models'," -i ~/.bashrc' 7 | fi 8 | 9 | bash -c 'echo "export GYM_GAZEBO_WORLD_CIRCUIT_F1="`pwd`/../assets/worlds/f1_1_simplecircuit.world >> ~/.bashrc' 10 | bash -c 'echo "export GYM_GAZEBO_WORLD_CIRCUIT_F1_LASER="`pwd`/../assets/worlds/f1_1_simplecircuit_laser.world >> ~/.bashrc' 11 | #export GYM_GAZEBO_WORLD_CIRCUIT_F1=`pwd`/../assets/worlds/f1_1_simplecircuit.world 12 | echo 'Formula 1 env variables loaded succesfully' 13 | 14 | exec bash # reload bash 15 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | Here you can find all important changes in this project. RL-Studio adheres to [Semantic Versioning](http://semver.org/). 4 | ## [Unreleased] 5 | 6 | ## [1.2.0] - 2022-01-11 7 | ### Added: 8 | 9 | - Add new files in envs/gazebo/f1/models/: original files are split into different modular files such as images.py, reset.py, step.py, settings.py, simplified_perception.py ([PR #122](https://github.com/JdeRobot/RL-Studio/pull/122)) 10 | 11 | ### Changed: 12 | - entry file name: `rl-studio.py ` 13 | - config file names: now they follow naming `config_mode_task_algorithm_agent_simulator` 14 | - files structure in envs/gazebo/f1/models/: files are named `task_algorithm.py` 15 | 16 | 17 | 18 | [1.2.0]: https://github.com/JdeRobot/RL-Studio/pull/122 19 | -------------------------------------------------------------------------------- /FAQ.md: -------------------------------------------------------------------------------- 1 | # DOUBTS GLOSSARY 2 | 3 | In this document, the team doubts and solutions will be documented. 4 | 5 | ## MDP rule related 6 | 7 | - **DOUBT:** If we use the distance run from the last state as an input, is it going against the MDP principle? 8 | " *Just the current state must be considered to perform an action since the previous state does not affect to the current 9 | decision* " 10 | - **RESPONSE** Once you are in a given state, it doesn't matter how you got there. The current state contains all the information required to make a prediction or a decision. This simplifies the decision algorithm a lot, because it does not need any memory. 11 | That said, your input could be any kind of information that gives you an idea of the environment state, so, as long as this input does no imply a states explosion, it should be fine. -------------------------------------------------------------------------------- /rl_studio/rl-studio.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import yaml 4 | 5 | from rl_studio.agents import TrainerFactory, InferencerFactory 6 | 7 | 8 | def main(): 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument( 11 | "-f", 12 | "--file", 13 | type=argparse.FileType("r"), 14 | required=True, 15 | default="config/config.yaml", 16 | help="In /config dir you will find .yaml examples files", 17 | ) 18 | 19 | args = parser.parse_args() 20 | config_file = yaml.load(args.file, Loader=yaml.FullLoader) 21 | 22 | if config_file["settings"]["mode"] == "inference": 23 | inferencer = InferencerFactory(config_file) 24 | inferencer.main() 25 | else: 26 | trainer = TrainerFactory(config_file) 27 | trainer.main() 28 | 29 | 30 | if __name__ == "__main__": 31 | main() -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/mountain_car/__init__.py: -------------------------------------------------------------------------------- 1 | from .env_type import TrainingType 2 | from .exceptions import NoValidTrainingType 3 | 4 | 5 | class MountainCarEnv: 6 | def __new__(cls, **config): 7 | cls.circuit = None 8 | cls.vel_pub = None 9 | cls.unpause = None 10 | cls.pause = None 11 | cls.reset_proxy = None 12 | cls.action_space = None 13 | cls.reward_range = None 14 | cls.model_coordinates = None 15 | cls.position = None 16 | 17 | training_type = config["environments"]["training_type"] 18 | print(config.get("launchfile")) 19 | if training_type == TrainingType.qlearn_env_camera.value: 20 | from .mountain_car_env import MountainCarEnv 21 | 22 | return MountainCarEnv(**config) 23 | 24 | else: 25 | raise NoValidTrainingType(training_type) 26 | -------------------------------------------------------------------------------- /rl_studio/config/config_pendulum_ddpg.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | output_dir: "./logs/" 3 | save_model: True 4 | save_positions: True 5 | telemetry: False 6 | logging_level: info 7 | mode: inference 8 | agent: pendulum 9 | algorithm: ddpg_torch 10 | 11 | # TODO make this section optional 12 | actions: 13 | available_actions: 14 | simple: 15 | 16 | agent: 17 | # TODO To be removed 18 | camera_params: 19 | witdh: 640 20 | height: 480 21 | 22 | environments: 23 | env_name: Pendulum-v1 24 | environment_folder: pendulum 25 | # runs: 20000 26 | runs: 20000 27 | full_experimentation_runs: 0 28 | update_every: 20 29 | show_every: 50 30 | objective_reward: -430 31 | 32 | inference: 33 | inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/checkpoints/pendulum/ddpg/20221231_0100_actor_avg_-392.54141588266396.pkl 34 | 35 | algorithm: 36 | gamma: 0.99 37 | hidden_size: 512 38 | batch_size: 128 -------------------------------------------------------------------------------- /rl_studio/installation/robot_mesh_setup.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #TODO now, if it is executed twice, the $GAZEBO_MODEL_PATH and $GAZEBO_RESOURCE_PATH environment variables will be added twice. 4 | #TODO Avoid this without removing different installations environment variables. 5 | 6 | if [ -z "$GAZEBO_MODEL_PATH" ]; then 7 | bash -c 'echo "export GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:"`pwd`/../CustomRobots/robot_mesh/model >> ~/.bashrc' 8 | else 9 | bash -c 'sed "s,GAZEBO_MODEL_PATH=[^;]*,'GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:`pwd`/../CustomRobots/robot_mesh/model'," -i ~/.bashrc' 10 | fi 11 | 12 | if [ -z "$GAZEBO_RESOURCE_PATH" ]; then 13 | bash -c 'echo "export GAZEBO_RESOURCE_PATH=$GAZEBO_RESOURCE_PATH:"`pwd`/../CustomRobots/robot_mesh/world >> ~/.bashrc' 14 | else 15 | bash -c 'sed "s,GAZEBO_RESOURCE_PATH=[^;]*,'GAZEBO_RESOURCE_PATH=$GAZEBO_RESOURCE_PATH:`pwd`/../CustomRobots/robot_mesh/world'," -i ~/.bashrc' 16 | fi 17 | 18 | 19 | # Reload bash 20 | exec bash 21 | -------------------------------------------------------------------------------- /rl_studio/installation/mountain_car_setup.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #TODO now, if it is executed twice, the $GAZEBO_MODEL_PATH and $GAZEBO_RESOURCE_PATH environment variables will be added twice. 4 | #TODO Avoid this without removing different installations environment variables. 5 | 6 | if [ -z "$GAZEBO_MODEL_PATH" ]; then 7 | bash -c 'echo "export GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:"`pwd`/../CustomRobots/mountain_car/model >> ~/.bashrc' 8 | else 9 | bash -c 'sed "s,GAZEBO_MODEL_PATH=[^;]*,'GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:`pwd`/../CustomRobots/mountain_car/model'," -i ~/.bashrc' 10 | fi 11 | 12 | if [ -z "$GAZEBO_RESOURCE_PATH" ]; then 13 | bash -c 'echo "export GAZEBO_RESOURCE_PATH=$GAZEBO_RESOURCE_PATH:"`pwd`/../CustomRobots/mountain_car/world >> ~/.bashrc' 14 | else 15 | bash -c 'sed "s,GAZEBO_RESOURCE_PATH=[^;]*,'GAZEBO_RESOURCE_PATH=$GAZEBO_RESOURCE_PATH:`pwd`/../CustomRobots/mountain_car/world'," -i ~/.bashrc' 16 | fi 17 | 18 | 19 | # Reload bash 20 | exec bash 21 | -------------------------------------------------------------------------------- /rl_studio/agents/ros_gazebo_pose.py: -------------------------------------------------------------------------------- 1 | import rospy 2 | from gazebo_msgs.msg import ModelState 3 | from gazebo_msgs.srv import SetModelState 4 | 5 | 6 | def main(): 7 | rospy.init_node("set_pose") 8 | state_msg = ModelState() 9 | state_msg.model_name = "f1_renault" 10 | state_msg.pose.position.x = 0 11 | state_msg.pose.position.y = 0 12 | state_msg.pose.position.z = 0.004 13 | state_msg.pose.orientation.x = 0 14 | state_msg.pose.orientation.y = 0 15 | state_msg.pose.orientation.z = 0 16 | state_msg.pose.orientation.w = 0 17 | 18 | rospy.wait_for_service("/gazebo/set_model_state") 19 | try: 20 | set_state = rospy.ServiceProxy("/gazebo/set_model_state", SetModelState) 21 | resp = set_state(state_msg) 22 | 23 | except rospy.ServiceException as e: 24 | print(f"Service call failed: {e}") 25 | 26 | 27 | if __name__ == "__main__": 28 | try: 29 | main() 30 | except rospy.ROSInterruptException: 31 | pass 32 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/robot_mesh/__init__.py: -------------------------------------------------------------------------------- 1 | from .env_type import TrainingType 2 | from .exceptions import NoValidTrainingType 3 | 4 | 5 | class RobotMeshEnv: 6 | def __new__(cls, **config): 7 | cls.circuit = None 8 | cls.vel_pub = None 9 | cls.unpause = None 10 | cls.pause = None 11 | cls.reset_proxy = None 12 | cls.action_space = None 13 | cls.reward_range = None 14 | cls.model_coordinates = None 15 | cls.position = None 16 | 17 | training_type = config["environments"].get("training_type") 18 | print(config["environments"].get("launchfile")) 19 | if ( 20 | training_type == TrainingType.qlearn_env_camera.value 21 | or training_type == TrainingType.manual_env.value 22 | ): 23 | from .robot_mesh_position_env import RobotMeshEnv 24 | 25 | return RobotMeshEnv(**config) 26 | 27 | else: 28 | raise NoValidTrainingType(training_type) 29 | -------------------------------------------------------------------------------- /rl_studio/wrappers/dict.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | 5 | __all__ = ["FlattenDictWrapper"] 6 | 7 | 8 | class FlattenDictWrapper(gym.ObservationWrapper): 9 | """Flattens selected keys of a Dict observation space into 10 | an array. 11 | """ 12 | 13 | def __init__(self, env, dict_keys): 14 | super(FlattenDictWrapper, self).__init__(env) 15 | self.dict_keys = dict_keys 16 | 17 | # Figure out observation_space dimension. 18 | size = 0 19 | for key in dict_keys: 20 | shape = self.env.observation_space.spaces[key].shape 21 | size += np.prod(shape) 22 | self.observation_space = gym.spaces.Box( 23 | -np.inf, np.inf, shape=(size,), dtype="float32" 24 | ) 25 | 26 | def observation(self, observation): 27 | assert isinstance(observation, dict) 28 | obs = [] 29 | for key in self.dict_keys: 30 | obs.append(observation[key].ravel()) 31 | return np.concatenate(obs) 32 | -------------------------------------------------------------------------------- /rl_studio/algorithms/utils.py: -------------------------------------------------------------------------------- 1 | ###################################### 2 | # 3 | # Common functions and classes for /algoritmhs: DDPG, Qlearn, DQN.. 4 | # 5 | ###################################### 6 | import time 7 | 8 | 9 | def save_actorcritic_model( 10 | agent, global_params, algoritmhs_params, environment, cumulated_reward, episode, text 11 | ): 12 | 13 | timestamp = time.strftime('%Y%m%d-%H%M%S') 14 | agent.actor_model.save( 15 | f"{global_params.models_dir}/{timestamp}_{text}_" 16 | f"C-{environment['circuit_name']}_" 17 | f"S-{environment['states']}_" 18 | f"A-{environment['action_space']}_" 19 | f"MR-{int(cumulated_reward)}_" 20 | f"E-{episode}/ACTOR" 21 | ) 22 | agent.critic_model.save( 23 | f"{global_params.models_dir}/{timestamp}_{text}_" 24 | f"C-{environment['circuit_name']}_" 25 | f"S-{environment['states']}_" 26 | f"A-{environment['action_space']}_" 27 | f"MR-{int(cumulated_reward)}_" 28 | f"E-{episode}/CRITIC" 29 | ) 30 | 31 | 32 | -------------------------------------------------------------------------------- /rl_studio/wrappers/README.md: -------------------------------------------------------------------------------- 1 | # Wrappers 2 | 3 | Wrappers are used to transform an environment in a modular way: 4 | 5 | ``` 6 | env = gym.make('Pong-v0') 7 | env = MyWrapper(env) 8 | ``` 9 | 10 | Note that we may later restructure any of the files in this directory, 11 | but will keep the wrappers available at the wrappers' top-level 12 | folder. So for example, you should access `MyWrapper` as follows: 13 | 14 | ``` 15 | # Will be supported in future releases 16 | from gym_gazebo.wrappers import MyWrapper 17 | ``` 18 | 19 | ## Quick tips for writing your own wrapper 20 | 21 | - Don't forget to call super(class_name, self).__init__(env) if you override the wrapper's __init__ function 22 | - You can access the inner environment with `self.unwrapped` 23 | - You can access the previous layer using `self.env` 24 | - The variables `metadata`, `action_space`, `observation_space`, `reward_range`, and `spec` are copied to `self` from the previous layer 25 | - Create a wrapped function for at least one of the following: `__init__(self, env)`, `step`, `reset`, `_render`, `_close`, or `_seed` 26 | - Your layered function should take its input from the previous layer (`self.env`) and/or the inner layer (`self.unwrapped`) 27 | -------------------------------------------------------------------------------- /rl_studio/agents/utilities/plot_npy_dataset.py: -------------------------------------------------------------------------------- 1 | import matplotlib as pltlib 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | def load_rewards(file_path): 6 | rewards = np.load(file_path, allow_pickle=True).item() 7 | return rewards 8 | 9 | 10 | def plot_rewards(base_directory_name, file_name): 11 | pltlib.rcParams.update({'font.size': 15}) 12 | fig, ax1 = plt.subplots() 13 | 14 | file_path = base_directory_name + "/" + file_name 15 | rewards = load_rewards(file_path) 16 | ax1.set_xscale('log') 17 | ax1.plot(range(len(rewards["avg"])), rewards["avg"], color='purple', label='ddpg') 18 | 19 | # fig.canvas.manager.full_screen_toggle() 20 | 21 | plt.legend() 22 | plt.ylabel("cun_reward") 23 | plt.xlabel("episode") 24 | # plt.show() 25 | 26 | base_path = '/home/ruben/Desktop/2020-phd-ruben-lucas/docs/assets/images/results_images/f1-follow-line/gazebo/ddpg/sp10/' 27 | ax1.figure.savefig(base_path + 'trainings.png') 28 | 29 | if __name__ == "__main__": 30 | # Pass the file_name as a parameter when calling the script 31 | file_name = "20230722-135205_Circuit-simple_States-sp10_Actions-continuous_Rewards-followline_center.npy" 32 | plot_rewards(file_name) 33 | -------------------------------------------------------------------------------- /rl_studio/wrappers/inference_rlstudio.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | from rl_studio.algorithms import InferencerFactory 4 | from pydantic import BaseModel 5 | 6 | from rl_studio.algorithms import InferencerFactory 7 | 8 | 9 | # TODO future iteration -> make it language agnostic. Right now it is imported and instantiated like a library. 10 | # In the future, it could be launched, binded to a port or a topic, and just answering to what it is listening 11 | class InferencerWrapper: 12 | def __init__(self, algorithm, inference_file, actions_file="", env=None): 13 | 14 | inference_params = { 15 | "algorithm": algorithm, 16 | "inference_file": inference_file, 17 | "actions_file": actions_file, 18 | "env": env, 19 | } 20 | 21 | # TODO make mandatory env to retrieve from there the actions (instead of saving/loading actions file) 22 | params = self.InferenceValidator(**inference_params) 23 | 24 | self.inferencer = InferencerFactory(params) 25 | 26 | class InferenceValidator(BaseModel): 27 | inference_file: str 28 | algorithm: str 29 | actions_file: str 30 | env: object 31 | 32 | def inference(self, state): 33 | return self.inferencer.inference(state) 34 | -------------------------------------------------------------------------------- /rl_studio/requirements.txt: -------------------------------------------------------------------------------- 1 | ale_py==0.7.4 2 | carla_birdeye_view==1.1.1 3 | cv_bridge==1.16.2 4 | cv_bridge==1.16.0 5 | gym==0.17.0 6 | gymnasium==0.28.1 7 | gymnasium==0.27.0 8 | keras==2.11.0 9 | markdownTable==6.0.0 10 | matplotlib==3.3.2 11 | matplotlib==3.1.2 12 | networkx==2.5.1 13 | numpy==1.22.0 14 | numpy==1.17.4 15 | opencv_python==4.2.0.32 16 | opencv_python==4.2.0.34 17 | opencv_python_headless==4.7.0.68 18 | optuna==3.1.0 19 | pandas==1.5.3 20 | Pillow==8.3.1 21 | Pillow==9.3.0 22 | Pillow==7.0.0 23 | Pillow==10.1.0 24 | py3rosmsgs==1.18.2 25 | py_markdown_table==0.3.3 26 | pydantic==1.9.1 27 | pydantic==1.10.2 28 | pygame==2.4.0 29 | pygame==2.1.2 30 | Pygments==2.14.0 31 | Pygments==2.2.0 32 | Pygments==2.12.0 33 | Pygments==2.3.1 34 | PyYAML==6.0 35 | PyYAML==5.4 36 | PyYAML==6.0.1 37 | reloading==1.1.2 38 | rosbag==1.16.0 39 | rosbag==1.15.14 40 | rosgraph==1.16.0 41 | rosgraph==1.15.14 42 | rosnode==1.16.0 43 | rosnode==1.15.14 44 | rospy==1.16.0 45 | scikit_learn==1.1.2 46 | scikit_learn==1.1.1 47 | scipy==1.6.1 48 | scipy==1.4.1 49 | sensor_msgs==1.13.1 50 | six==1.14.0 51 | statsmodels==0.13.2 52 | tabulate==0.9.0 53 | tensorboardX==2.6 54 | tensorboardX==2.6.2.2 55 | tensorflow==2.9.1 56 | tensorflow_gpu==2.11.0 57 | tf==1.13.2 58 | torch==1.11.0+cu113 59 | torch==1.11.0 60 | tqdm==4.64.0 61 | -------------------------------------------------------------------------------- /rl_studio/agents/utilities/camera_visualizer.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import cv2 4 | import rospy 5 | from cv_bridge import CvBridge, CvBridgeError 6 | from sensor_msgs.msg import Image 7 | 8 | 9 | class ImageConverter: 10 | def __init__(self): 11 | 12 | self.bridge = CvBridge() 13 | self.image_sub = rospy.Subscriber("/camera/rgb/image_raw", Image, self.callback) 14 | 15 | def callback(self, data): 16 | try: 17 | cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") 18 | except CvBridgeError as e: 19 | print(e) 20 | 21 | cv_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) 22 | cv_image = cv2.resize(cv_image, (32, 32)) 23 | # cv_image = skimage.exposure.rescale_intensity(cv_image,out_range=(0,255)) 24 | 25 | cv2.namedWindow("Image window", cv2.WINDOW_NORMAL) 26 | cv2.resizeWindow("Image window", 480, 480) 27 | cv2.imshow("Image window", cv_image) 28 | cv2.waitKey(3) 29 | 30 | 31 | def main(args): 32 | ic = image_converter() 33 | rospy.init_node("image_converter", anonymous=True) 34 | try: 35 | rospy.spin() 36 | except KeyboardInterrupt: 37 | print("Shutting down") 38 | cv2.destroyAllWindows() 39 | 40 | 41 | if __name__ == "__main__": 42 | main(sys.argv) 43 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/autoparking/models/__init__.py: -------------------------------------------------------------------------------- 1 | from rl_studio.envs.gazebo.autoparking.env_type import EnvironmentType 2 | from rl_studio.envs.gazebo.autoparking.exceptions import NoValidEnvironmentType 3 | 4 | 5 | class AutoparkingEnv: 6 | def __new__(cls, **config): 7 | cls.circuit = None 8 | cls.vel_pub = None 9 | cls.unpause = None 10 | cls.pause = None 11 | cls.reset_proxy = None 12 | cls.action_space = None 13 | cls.reward_range = None 14 | cls.model_coordinates = None 15 | cls.position = None 16 | 17 | training_type = config.get("training_type") 18 | 19 | # DDPG 20 | if training_type == EnvironmentType.ddpg_env.value: 21 | from rl_studio.envs.gazebo.autoparking.models.autoparking_env_ddpg import ( 22 | DDPGAutoparkingEnvGazebo, 23 | ) 24 | 25 | return DDPGAutoparkingEnvGazebo(**config) 26 | 27 | ## Qlearn 28 | elif training_type == EnvironmentType.qlearn_env.value: 29 | from rl_studio.envs.gazebo.autoparking.models.autoparking_env_qlearn import ( 30 | QlearnAutoparkingEnvGazebo, 31 | ) 32 | 33 | return QlearnAutoparkingEnvGazebo(**config) 34 | 35 | else: 36 | raise NoValidEnvironmentType(training_type) 37 | -------------------------------------------------------------------------------- /rl_studio/envs/carla/__init__.py: -------------------------------------------------------------------------------- 1 | from rl_studio.agents.tasks_type import TasksType 2 | from rl_studio.agents.frameworks_type import FrameworksType 3 | from rl_studio.algorithms.algorithms_type import AlgorithmsType 4 | from rl_studio.envs.gazebo.f1.exceptions import NoValidEnvironmentType 5 | 6 | 7 | class Carla: 8 | def __new__(cls, **environment): 9 | 10 | algorithm = environment["algorithm"] 11 | task = environment["task"] 12 | framework = environment["framework"] 13 | weather = environment["weather"] 14 | traffic_pedestrians = environment["traffic_pedestrians"] 15 | 16 | # ============================= 17 | # FollowLane - qlearn - weather: static - traffic and pedestrians: No - (No framework) 18 | # ============================= 19 | if ( 20 | task == TasksType.FOLLOWLANECARLA.value 21 | and algorithm == AlgorithmsType.QLEARN.value 22 | and weather != "dynamic" 23 | and traffic_pedestrians is False 24 | ): 25 | from rl_studio.envs.carla.followlane.followlane_qlearn import ( 26 | FollowLaneQlearnStaticWeatherNoTraffic, 27 | ) 28 | 29 | return FollowLaneQlearnStaticWeatherNoTraffic(**environment) 30 | 31 | else: 32 | raise NoValidEnvironmentType(task) 33 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/gazebo_utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import rospy 4 | from gazebo_msgs.msg import ModelState 5 | from gazebo_msgs.srv import SetModelState 6 | 7 | 8 | def set_new_pose(circuit_positions_set): 9 | """ 10 | Receives the set of circuit positions (specified in the yml configuration file) 11 | and returns the index of the selected random position. 12 | 13 | (pos_number, pose_x, pose_y, pose_z, or_x, or_y, or_z, or_z) 14 | """ 15 | position = random.choice(list(enumerate(circuit_positions_set)))[0] 16 | print(position) 17 | 18 | state = ModelState() 19 | state.model_name = "f1_renault" 20 | state.pose.position.x = circuit_positions_set[position][1] 21 | state.pose.position.y = circuit_positions_set[position][2] 22 | state.pose.position.z = circuit_positions_set[position][3] 23 | state.pose.orientation.x = circuit_positions_set[position][4] 24 | state.pose.orientation.y = circuit_positions_set[position][5] 25 | state.pose.orientation.z = circuit_positions_set[position][6] 26 | state.pose.orientation.w = circuit_positions_set[position][7] 27 | 28 | rospy.wait_for_service("/gazebo/set_model_state") 29 | try: 30 | set_state = rospy.ServiceProxy("/gazebo/set_model_state", SetModelState) 31 | set_state(state) 32 | except rospy.ServiceException as e: 33 | print("Service call failed: {}".format(e)) 34 | return position 35 | -------------------------------------------------------------------------------- /rl_studio/agents/f1/settings.py: -------------------------------------------------------------------------------- 1 | ########################### 2 | # Global variables file 3 | ########################### 4 | from pydantic import BaseModel 5 | 6 | 7 | class QLearnConfig(BaseModel): 8 | actions: int = 3 9 | debug_level: int = 0 10 | telemetry: bool = False 11 | telemetry_mask: bool = True 12 | plotter_graphic: bool = False 13 | my_board: bool = True 14 | save_positions: bool = False 15 | save_model: bool = True 16 | load_model: bool = False 17 | output_dir = "./logs/qlearn_models/qlearn_camera_solved/" 18 | poi = 1 # The original pixel row is: 250, 300, 350, 400 and 450 but we work only with the half of the image 19 | actions_set = "simple" # test, simple, medium, hard 20 | max_distance = 0.5 21 | # === CAMERA === 22 | # Images size 23 | width = 640 24 | height = 480 25 | center_image = width / 2 26 | 27 | # Maximum distance from the line 28 | ranges = [300, 280, 250] # Line 1, 2 and 3 29 | reset_range = [-40, 40] 30 | last_center_line = 0 31 | if poi == 1: 32 | x_row = [60] 33 | elif poi == 2: 34 | x_row = [60, 110] 35 | elif poi == 3: 36 | x_row = [ 37 | 10, 38 | 60, 39 | 110, 40 | ] # The first and last element is not used. Just for metrics 41 | elif poi == 5: 42 | x_row = [250, 300, 350, 400, 450] 43 | 44 | 45 | qlearn = QLearnConfig() 46 | -------------------------------------------------------------------------------- /rl_studio/agents/utilities/averaged_table.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | This script creates a simple table from environemnt monitoring 4 | data obtained from '/tmp/gazebo_gym_experiments/', which has been 5 | created when calling a gazebo environment monitoring function in a test. 6 | 7 | Args: 8 | 9 | arg1 Creates a table using 'arg1' as average size delimiter. 10 | 11 | Examples: 12 | 13 | python display_image.py 20 14 | 15 | """ 16 | import itertools 17 | import sys 18 | 19 | import gym 20 | 21 | 22 | def expand(lst, n): 23 | lst = [[i] * n for i in lst] 24 | lst = list(itertools.chain.from_iterable(lst)) 25 | return lst 26 | 27 | 28 | if __name__ == "__main__": 29 | 30 | outdir = "/tmp/gazebo_gym_experiments" 31 | data_key = "episode_rewards" 32 | mod1 = int(sys.argv[1]) 33 | 34 | results = gym.monitoring.monitor.load_results(outdir) 35 | data = results[data_key] 36 | 37 | avg_data = [] 38 | 39 | for i, val in enumerate(data): 40 | if i % mod1 == 0: 41 | if (i + mod1) < len(data): 42 | avg = sum(data[i : i + mod1]) / mod1 43 | avg_data.append(avg) 44 | 45 | print("Printing averaged graph with interval=" + str(mod1) + " ...") 46 | print("----------------") 47 | for i, val in enumerate(avg_data): 48 | print(str(i * mod1) + "-" + str(i * mod1 + mod1) + " | " + str(val)) 49 | print("----------------") 50 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/f1/models/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | class F1GazeboUtils: 5 | def __init__(self): 6 | self.f1 = None 7 | 8 | @staticmethod 9 | def show_image_with_centrals( 10 | name, img, waitkey, centrals_in_pixels, centrals_normalized, x_row 11 | ): 12 | window_name = f"{name}" 13 | 14 | for index, value in enumerate(x_row): 15 | cv2.putText( 16 | img, 17 | str( 18 | f"{int(centrals_in_pixels[index])}" 19 | ), 20 | (int(centrals_in_pixels[index])+20, int(x_row[index])), 21 | cv2.FONT_HERSHEY_SIMPLEX, 22 | 0.3, 23 | (255, 255, 255), 24 | 1, 25 | cv2.LINE_AA, 26 | ) 27 | cv2.putText( 28 | img, 29 | str( 30 | f"[{centrals_normalized[index]}]" 31 | ), 32 | (320, int(x_row[index])), 33 | cv2.FONT_HERSHEY_SIMPLEX, 34 | 0.3, 35 | (255, 255, 255), 36 | 1, 37 | cv2.LINE_AA, 38 | ) 39 | cv2.imshow(window_name, img) 40 | cv2.waitKey(waitkey) 41 | 42 | def show_image(self, name, img, waitkey): 43 | window_name = f"{name}" 44 | cv2.imshow(window_name, img) 45 | cv2.waitKey(waitkey) 46 | -------------------------------------------------------------------------------- /rl_studio/agents/turtlebot/sarsa.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | class Sarsa: 5 | def __init__(self, actions, epsilon, alpha, gamma): 6 | self.q = {} 7 | 8 | self.epsilon = epsilon 9 | self.alpha = alpha 10 | self.gamma = gamma 11 | self.actions = actions 12 | 13 | def getQ(self, state, action): 14 | return self.q.get((state, action), 0.0) 15 | 16 | def learnQ(self, state, action, reward, value): 17 | oldv = self.q.get((state, action), None) 18 | if oldv is None: 19 | self.q[(state, action)] = reward 20 | else: 21 | self.q[(state, action)] = oldv + self.alpha * (value - oldv) 22 | 23 | def chooseAction(self, state): 24 | if random.random() < self.epsilon: 25 | action = random.choice(self.actions) 26 | else: 27 | q = [self.getQ(state, a) for a in self.actions] 28 | maxQ = max(q) 29 | count = q.count(maxQ) 30 | if count > 1: 31 | best = [i for i in range(len(self.actions)) if q[i] == maxQ] 32 | i = random.choice(best) 33 | else: 34 | i = q.index(maxQ) 35 | 36 | action = self.actions[i] 37 | return action 38 | 39 | def learn(self, state1, action1, reward, state2, action2): 40 | qnext = self.getQ(state2, action2) 41 | self.learnQ(state1, action1, reward, reward + self.gamma * qnext) 42 | -------------------------------------------------------------------------------- /rl_studio/agents/liveplot.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import matplotlib 3 | import matplotlib.pyplot as plt 4 | 5 | rewards_key = "episode_rewards" 6 | 7 | 8 | class LivePlot: 9 | def __init__(self, outdir, data_key=rewards_key, line_color="blue"): 10 | """ 11 | Liveplot renders a graph of either episode_rewards or episode_lengths 12 | Args: 13 | outdir (outdir): Monitor output file location used to populate the graph 14 | data_key (Optional[str]): The key in the json to graph (episode_rewards or episode_lengths). 15 | line_color (Optional[dict]): Color of the plot. 16 | """ 17 | self.outdir = outdir 18 | self.data_key = data_key 19 | self.line_color = line_color 20 | 21 | # styling options 22 | matplotlib.rcParams["toolbar"] = "None" 23 | plt.style.use("ggplot") 24 | plt.xlabel("Episodes") 25 | plt.ylabel(data_key) 26 | fig = plt.gcf().canvas.set_window_title("simulation_graph") 27 | 28 | def plot(self, env): 29 | if self.data_key is rewards_key: 30 | data = gym.wrappers.Monitor.get_episode_rewards(env) 31 | else: 32 | data = gym.wrappers.Monitor.get_episode_lengths(env) 33 | 34 | plt.plot(data, color=self.line_color) 35 | 36 | # pause so matplotlib will display 37 | # may want to figure out matplotlib animation or use a different library in the future 38 | plt.pause(0.000001) 39 | -------------------------------------------------------------------------------- /rl_studio/config/config_pendulum_ppo.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | output_dir: "./logs/" 3 | save_model: True 4 | save_positions: True 5 | telemetry: False 6 | logging_level: info 7 | mode: training 8 | agent: pendulum 9 | algorithm: ppo_continuous 10 | 11 | # TODO make this section optional 12 | actions: 13 | available_actions: 14 | simple: 15 | 16 | agent: 17 | # TODO To be removed 18 | camera_params: 19 | witdh: 640 20 | height: 480 21 | 22 | environments: 23 | env_name: Pendulum-v1 24 | environment_folder: pendulum 25 | # runs: 20000 26 | runs: 20000 27 | full_experimentation_runs: 0 28 | update_every: 200 29 | show_every: 1000 30 | objective_reward: -350 31 | # block_experience_batch: False 32 | block_experience_batch: False 33 | # random_start_level: 0.05 34 | random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position 35 | random_perturbations_level: 0.8 # Number between 0 and 1 that indicates the frequency of the random perturbations 36 | perturbations_intensity_std: 1 # Number between 0 and 1 that indicates the standard deviation of perturbations intensity 37 | initial_pole_angle: 0 38 | non_recoverable_angle: 0.3 39 | 40 | inference: 41 | inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/pendulum/ppo/checkpoints/20221231_0244_actor_avg_-803.8121022237663 42 | 43 | algorithm: 44 | gamma: 1 45 | epsilon: 0.15 46 | episodes_update: 5000 47 | -------------------------------------------------------------------------------- /rl_studio/agents/turtlebot/liveplot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import gym 3 | import matplotlib 4 | import matplotlib.pyplot as plt 5 | 6 | rewards_key = "episode_rewards" 7 | 8 | 9 | class LivePlot(object): 10 | def __init__(self, outdir, data_key=rewards_key, line_color="blue"): 11 | """ 12 | Liveplot renders a graph of either episode_rewards or episode_lengths 13 | Args: 14 | outdir (outdir): Monitor output file location used to populate the graph 15 | data_key (Optional[str]): The key in the json to graph (episode_rewards or episode_lengths). 16 | line_color (Optional[dict]): Color of the plot. 17 | """ 18 | self.outdir = outdir 19 | self.data_key = data_key 20 | self.line_color = line_color 21 | 22 | # styling options 23 | matplotlib.rcParams["toolbar"] = "None" 24 | plt.style.use("ggplot") 25 | plt.xlabel("Episodes") 26 | plt.ylabel(data_key) 27 | fig = plt.gcf().canvas.set_window_title("simulation_graph") 28 | 29 | def plot(self, env): 30 | if self.data_key is rewards_key: 31 | data = gym.wrappers.Monitor.get_episode_rewards(env) 32 | else: 33 | data = gym.wrappers.Monitor.get_episode_lengths(env) 34 | 35 | plt.plot(data, color=self.line_color) 36 | 37 | # pause so matplotlib will display 38 | # may want to figure out matplotlib animation or use a different library in the future 39 | plt.pause(0.000001) 40 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/f1/models/followlane_qlearn.py: -------------------------------------------------------------------------------- 1 | ############################################# 2 | # - Task: Follow Lane 3 | # - Algorithm: Qlearn 4 | # - actions: discrete 5 | # - State: Simplified perception 6 | # 7 | ############################################ 8 | 9 | import math 10 | 11 | from cv_bridge import CvBridge 12 | import cv2 13 | from geometry_msgs.msg import Twist 14 | import numpy as np 15 | 16 | import rospy 17 | from sensor_msgs.msg import Image 18 | 19 | from rl_studio.agents.utils import ( 20 | print_messages, 21 | ) 22 | from rl_studio.envs.gazebo.f1.models.f1_env import F1Env 23 | from rl_studio.envs.gazebo.f1.models.settings import F1GazeboTFConfig 24 | 25 | 26 | class FollowLaneQlearnF1Gazebo(F1Env): 27 | def __init__(self, **config): 28 | 29 | ###### init F1env 30 | F1Env.__init__(self, **config) 31 | ###### init class variables 32 | F1GazeboTFConfig.__init__(self, **config) 33 | 34 | def reset(self): 35 | from rl_studio.envs.gazebo.f1.models.reset import ( 36 | Reset, 37 | ) 38 | 39 | if self.state_space == "image": 40 | return Reset.reset_f1_state_image(self) 41 | else: 42 | return Reset.reset_f1_state_sp(self) 43 | 44 | def step(self, action, step): 45 | from rl_studio.envs.gazebo.f1.models.step import ( 46 | StepFollowLane, 47 | ) 48 | 49 | return StepFollowLane.step_followlane_state_sp_actions_discretes( 50 | self, action, step 51 | ) 52 | -------------------------------------------------------------------------------- /rl_studio/config/config_cartpole_programmatic.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | output_dir: "./logs/" 3 | save_model: True 4 | save_positions: True 5 | telemetry: False 6 | logging_level: info 7 | mode: inference 8 | agent: cartpole 9 | algorithm: programmatic 10 | 11 | # TODO make this section optional 12 | actions: 13 | available_actions: 14 | simple: 15 | 16 | agent: 17 | cartpole: 18 | # TODO To be removed 19 | camera_params: 20 | witdh: 640 21 | height: 480 22 | 23 | environments: 24 | env_name: myCartpole-v0 25 | environment_folder: cartpole 26 | # runs: 20000 27 | runs: 1000 28 | full_experimentation_runs: 0 29 | update_every: 10 30 | show_every: 1000 31 | experiments: 29 32 | random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position 33 | random_perturbations_level: 0.1 # Number between 0 and 1 that indicates the frequency of the random perturbations 34 | random_perturbations_level_step: 0.1 35 | perturbations_intensity_std: 21 #Number between 0 and 1 that indicates the standard deviation of perturbations intensity 36 | perturbations_intensity_std_step: 1 37 | initial_pole_angle: 0 38 | initial_pole_angle_steps: 0.05 39 | non_recoverable_angle: 0.3 # not applicable when making experiments with init_pole_angle (always 0.2 over the initial) 40 | 41 | algorithm: 42 | # TODO make it complaining just if the relevant parameter for this algorithm is not found 43 | 44 | inference: 45 | # TODO make it complaining just if the relevant parameter for this algorithm is not found -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # gym 2 | 3 | The MIT License 4 | 5 | Copyright (c) 2016 OpenAI (https://openai.com) 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | 25 | # Mujoco models 26 | This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license: 27 | ``` 28 | This file is part of MuJoCo. 29 | Copyright 2009-2015 Roboti LLC. 30 | Mujoco :: Advanced physics simulation engine 31 | Source : www.roboti.us 32 | Version : 1.31 33 | Released : 23Apr16 34 | Author :: Vikash Kumar 35 | Contacts : kumar@roboti.us 36 | ``` 37 | -------------------------------------------------------------------------------- /rl_studio/config/config_cartpole_ppo.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | output_dir: "./logs/" 3 | save_model: True 4 | save_positions: True 5 | telemetry: False 6 | logging_level: info 7 | mode: inference 8 | agent: cartpole 9 | algorithm: ppo 10 | 11 | # TODO make this section optional 12 | actions: 13 | available_actions: 14 | simple: 15 | 16 | agent: 17 | cartpole: 18 | # TODO To be removed 19 | camera_params: 20 | witdh: 640 21 | height: 480 22 | 23 | environments: 24 | env_name: myCartpole-v0 25 | environment_folder: cartpole 26 | # runs: 20000 27 | runs: 100 28 | full_experimentation_runs: 0 29 | update_every: 1000 30 | show_every: 1000 31 | objective_reward: 500 32 | # block_experience_batch: False 33 | block_experience_batch: False 34 | # random_start_level: 0.05 35 | experiments: 29 36 | random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position 37 | random_perturbations_level: 0.1 # Number between 0 and 1 that indicates the frequency of the random perturbations 38 | random_perturbations_level_step: 0.1 39 | perturbations_intensity_std: 21 #Number between 0 and 1 that indicates the standard deviation of perturbations intensity 40 | perturbations_intensity_std_step: 1 41 | initial_pole_angle: 0 42 | initial_pole_angle_steps: 0.05 43 | non_recoverable_angle: 0.3 # not applicable when making experiments with init_pole_angle (always 0.2 over the initial) 44 | 45 | inference: 46 | inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/ppo/checkpoints/20221110_0937_actor_avg_500.0.pkl 47 | 48 | algorithm: 49 | gamma: 1 50 | epsilon: 0.15 51 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/real_env_ros2.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | import gym 4 | import rclpy 5 | 6 | 7 | class RealEnvROS2(gym.Env): 8 | """Superclass for all Gazebo environments.""" 9 | 10 | metadata = {"render.models": ["human"]} 11 | 12 | def __init__(self): 13 | 14 | # Launch the simulation with the given launchfile name 15 | rclpy.init(args=None) 16 | self.node = rclpy.create_node("real_env_ros2") 17 | 18 | def step(self, action): 19 | 20 | # Implement this method in every subclass 21 | # Perform a step in gazebo. E.g. move the robot 22 | raise NotImplementedError 23 | 24 | def reset(self): 25 | 26 | # Implemented in subclass 27 | raise NotImplementedError 28 | 29 | def render(self, mode=None, close=False): 30 | pass 31 | 32 | def _render(self, mode=None, close=False): 33 | self._close() 34 | 35 | def _close(self): 36 | output1 = subprocess.check_call( 37 | ["cat", "/tmp/myroslaunch_" + self.port + ".pid"] 38 | ) 39 | output2 = subprocess.check_call( 40 | ["cat", "/home/erle/.ros/roscore-" + self.port + ".pid"] 41 | ) 42 | subprocess.Popen(["kill", "-INT", str(output1)]) 43 | subprocess.Popen(["kill", "-INT", str(output2)]) 44 | 45 | def close(self): 46 | pass 47 | 48 | def _configure(self): 49 | 50 | # TODO 51 | # From OpenAI API: Provides runtime configuration to the enviroment 52 | # Maybe set the Real Time Factor? 53 | pass 54 | 55 | def _seed(self): 56 | 57 | # TODO 58 | # From OpenAI API: Sets the seed for this env's random number generator(s) 59 | pass 60 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/f1/models/followline_qlearn.py: -------------------------------------------------------------------------------- 1 | ############################################# 2 | # - Task: Follow Line 3 | # - Algorithm: Qlearn 4 | # - actions: discrete 5 | # - State: Simplified perception 6 | # 7 | ############################################ 8 | 9 | import math 10 | 11 | from cv_bridge import CvBridge 12 | import cv2 13 | from geometry_msgs.msg import Twist 14 | import numpy as np 15 | 16 | import rospy 17 | from sensor_msgs.msg import Image 18 | 19 | from rl_studio.agents.utils import ( 20 | print_messages, 21 | ) 22 | from rl_studio.envs.gazebo.f1.models.f1_env import F1Env 23 | from rl_studio.envs.gazebo.f1.models.settings import F1GazeboTFConfig 24 | 25 | 26 | class FollowLineQlearnF1Gazebo(F1Env): 27 | def __init__(self, **config): 28 | 29 | ###### init F1env 30 | F1Env.__init__(self, **config) 31 | ###### init class variables 32 | F1GazeboTFConfig.__init__(self, **config) 33 | 34 | def reset(self): 35 | from rl_studio.envs.gazebo.f1.models.reset import ( 36 | Reset, 37 | ) 38 | 39 | if self.state_space == "image": 40 | return Reset.reset_f1_state_image(self) 41 | else: 42 | return Reset.reset_f1_state_sp(self) 43 | 44 | def step(self, action, step): 45 | from rl_studio.envs.gazebo.f1.models.step import ( 46 | StepFollowLine, 47 | ) 48 | 49 | if self.state_space == "image": 50 | return StepFollowLine.step_followline_state_image_actions_discretes( 51 | self, action, step 52 | ) 53 | else: 54 | return StepFollowLine.step_followline_state_sp_actions_discretes( 55 | self, action, step 56 | ) 57 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/autoparking/models/autoparking_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import rospy 3 | from gazebo_msgs.srv import GetModelState 4 | from geometry_msgs.msg import Twist 5 | from std_srvs.srv import Empty 6 | from sensor_msgs.msg import Image 7 | 8 | from rl_studio.envs.gazebo import gazebo_envs 9 | 10 | 11 | class AutoparkingEnv(gazebo_envs.GazeboEnv): 12 | def __init__(self, **config): 13 | gazebo_envs.GazeboEnv.__init__(self, config) 14 | self.circuit_name = config.get("circuit_name") 15 | # self.circuit_positions_set = config.get("circuit_positions_set") 16 | self.alternate_pose = config.get("alternate_pose") 17 | self.model_state_name = config.get("model_state_name") 18 | self.position = None 19 | # self.start_pose = np.array(config.get("alternate_pose")) 20 | self.start_pose = np.array(config.get("gazebo_start_pose")) 21 | self.start_random_pose = config.get("gazebo_random_start_pose") 22 | self._seed() 23 | self.parking_spot_position_x = config.get("parking_spot_position_x") 24 | self.parking_spot_position_y = config.get("parking_spot_position_y") 25 | 26 | # self.cv_image_pub = rospy.Publisher('/F1ROS/cameraL/image_raw', Image, queue_size = 10) 27 | self.vel_pub = rospy.Publisher("/F1ROS/cmd_vel", Twist, queue_size=5) 28 | self.unpause = rospy.ServiceProxy("/gazebo/unpause_physics", Empty) 29 | self.pause = rospy.ServiceProxy("/gazebo/pause_physics", Empty) 30 | self.reset_proxy = rospy.ServiceProxy("/gazebo/reset_simulation", Empty) 31 | self.reward_range = (-np.inf, np.inf) 32 | self.model_coordinates = rospy.ServiceProxy( 33 | "/gazebo/get_model_state", GetModelState 34 | ) 35 | -------------------------------------------------------------------------------- /rl_studio/installation/formula1_setup.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #TODO now, if it is executed twice, the $GAZEBO_MODEL_PATH and $GAZEBO_RESOURCE_PATH environment variables will be added twice. 4 | #TODO Avoid this without removing different installations environment variables. 5 | 6 | if [ -z "$GAZEBO_MODEL_PATH" ]; then 7 | bash -c 'echo "export GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:"`pwd`/../CustomRobots/f1/models >> ~/.bashrc' 8 | else 9 | bash -c 'sed "s,GAZEBO_MODEL_PATH=[^;]*,'GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:`pwd`/../CustomRobots/f1/models'," -i ~/.bashrc' 10 | fi 11 | 12 | if [ -z "$GAZEBO_RESOURCE_PATH" ]; then 13 | bash -c 'echo "export GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:"`pwd`/../CustomRobots/f1/worlds >> ~/.bashrc' 14 | else 15 | bash -c 'sed "s,GAZEBO_RESOURCE_PATH=[^;]*,'GAZEBO_RESOURCE_PATH=$GAZEBO_RESOURCE_PATH:`pwd`/../CustomRobots/f1/worlds'," -i ~/.bashrc' 16 | fi 17 | 18 | # Add Formula 1 launch environment variable 19 | if [ -z "$GYM_GAZEBO_WORLD_CIRCUIT_F1" ]; then 20 | bash -c 'echo "export GYM_GAZEBO_WORLD_CIRCUIT_F1="`pwd`/../CustomRobots/f1/worlds/simple_circuit.world >> ~/.bashrc' 21 | else 22 | bash -c 'sed "s,GYM_GAZEBO_WORLD_CIRCUIT=[^;]*,'GYM_GAZEBO_WORLD_CIRCUIT=`pwd`/../CustomRobots/f1/worlds/simple_circuit.world'," -i ~/.bashrc' 23 | fi 24 | 25 | if [ -z "$GYM_GAZEBO_WORLD_NURBURGRING_F1" ]; then 26 | bash -c 'echo "export GYM_GAZEBO_WORLD_NURBURGRING_F1="`pwd`/../CustomRobots/f1/worlds/nurburgring_line.world >> ~/.bashrc' 27 | fi 28 | 29 | if [ -z "$GYM_GAZEBO_WORLD_MONTREAL_F1" ]; then 30 | bash -c 'echo "export GYM_GAZEBO_WORLD_MONTREAL_F1="`pwd`/../CustomRobots/f1/worlds/montreal_line.world >> ~/.bashrc' 31 | fi 32 | 33 | echo 'Formula 1 env variables loaded succesfully' 34 | 35 | # Reload bash 36 | exec bash 37 | -------------------------------------------------------------------------------- /rl_studio/config/config_cartpole_ppo_continuous.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | output_dir: "./logs/" 3 | save_model: True 4 | save_positions: True 5 | telemetry: False 6 | logging_level: info 7 | mode: inference 8 | agent: cartpole 9 | algorithm: ppo_continuous 10 | 11 | # TODO make this section optional 12 | actions: 13 | available_actions: 14 | simple: 15 | 16 | agent: 17 | cartpole: 18 | # TODO To be removed 19 | camera_params: 20 | witdh: 640 21 | height: 480 22 | 23 | environments: 24 | env_name: myCartpole-continuous-v0 25 | environment_folder: cartpole 26 | # runs: 20000 27 | runs: 100 28 | full_experimentation_runs: 0 29 | update_every: 100 30 | show_every: 1000 31 | objective_reward: 500 32 | # block_experience_batch: False 33 | block_experience_batch: False 34 | # random_start_level: 0.05 35 | experiments: 29 36 | random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position 37 | random_perturbations_level: 0.1 # Number between 0 and 1 that indicates the frequency of the random perturbations 38 | random_perturbations_level_step: 0.1 39 | perturbations_intensity_std: 21 #Number between 0 and 1 that indicates the standard deviation of perturbations intensity 40 | perturbations_intensity_std_step: 1 41 | initial_pole_angle: 0 42 | initial_pole_angle_steps: 0.05 43 | non_recoverable_angle: 0.3 # not applicable when making experiments with init_pole_angle (always 0.2 over the initial) 44 | 45 | inference: 46 | inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/ppo_continuous/checkpoints/20221231_1813_actor_avg_422.44 47 | 48 | algorithm: 49 | gamma: 1 50 | epsilon: 0.15 51 | episodes_update: 1000 -------------------------------------------------------------------------------- /rl_studio/wrappers/monitoring/tests/test_video_recorder.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import gym 4 | from gym.wrappers.monitoring.video_recorder import VideoRecorder 5 | 6 | 7 | class BrokenRecordableEnv(object): 8 | metadata = {"render.models": [None, "rgb_array"]} 9 | 10 | def render(self, mode=None): 11 | pass 12 | 13 | 14 | class UnrecordableEnv(object): 15 | metadata = {"render.models": [None]} 16 | 17 | def render(self, mode=None): 18 | pass 19 | 20 | 21 | def test_record_simple(): 22 | env = gym.make("CartPole-v1") 23 | rec = VideoRecorder(env) 24 | env.reset() 25 | rec.capture_frame() 26 | rec.close() 27 | assert not rec.empty 28 | assert not rec.broken 29 | assert os.path.exists(rec.path) 30 | f = open(rec.path) 31 | assert os.fstat(f.fileno()).st_size > 100 32 | 33 | 34 | def test_no_frames(): 35 | env = BrokenRecordableEnv() 36 | rec = VideoRecorder(env) 37 | rec.close() 38 | assert rec.empty 39 | assert rec.functional 40 | assert not os.path.exists(rec.path) 41 | 42 | 43 | def test_record_unrecordable_method(): 44 | env = UnrecordableEnv() 45 | rec = VideoRecorder(env) 46 | assert not rec.enabled 47 | rec.close() 48 | 49 | 50 | def test_record_breaking_render_method(): 51 | env = BrokenRecordableEnv() 52 | rec = VideoRecorder(env) 53 | rec.capture_frame() 54 | rec.close() 55 | assert rec.empty 56 | assert rec.broken 57 | assert not os.path.exists(rec.path) 58 | 59 | 60 | def test_text_envs(): 61 | env = gym.make("FrozenLake-v0") 62 | video = VideoRecorder(env) 63 | try: 64 | env.reset() 65 | video.capture_frame() 66 | video.close() 67 | finally: 68 | os.remove(video.path) 69 | -------------------------------------------------------------------------------- /rl_studio/config/config_cartpole_ddpg.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | output_dir: "./logs/" 3 | save_model: True 4 | save_positions: True 5 | telemetry: False 6 | logging_level: info 7 | mode: inference 8 | agent: cartpole 9 | algorithm: ddpg 10 | framework: Pytorch 11 | 12 | # TODO make this section optional 13 | actions: 14 | available_actions: 15 | simple: 16 | 17 | agent: 18 | cartpole: 19 | # TODO To be removed 20 | camera_params: 21 | witdh: 640 22 | height: 480 23 | 24 | environments: 25 | env_name: myCartpole-continuous-v0 26 | environment_folder: cartpole 27 | # runs: 20000 28 | runs: 100 29 | full_experimentation_runs: 0 30 | update_every: 100 31 | show_every: 10000 32 | objective_reward: 500 33 | # block_experience_batch: False 34 | block_experience_batch: False 35 | # random_start_level: 0.05 36 | experiments: 29 37 | random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position 38 | random_perturbations_level: 0.1 # Number between 0 and 1 that indicates the frequency of the random perturbations 39 | random_perturbations_level_step: 0.1 40 | perturbations_intensity_std: 21 #Number between 0 and 1 that indicates the standard deviation of perturbations intensity 41 | perturbations_intensity_std_step: 1 42 | initial_pole_angle: 0 43 | initial_pole_angle_steps: 0.05 44 | non_recoverable_angle: 0.3 # not applicable when making experiments with init_pole_angle (always 0.2 over the initial) 45 | 46 | inference: 47 | inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/ddpg/checkpoints/20230107_0034_actor_avg_207.91.pkl 48 | 49 | algorithm: 50 | gamma: 0.99 51 | hidden_size: 128 52 | batch_size: 128 -------------------------------------------------------------------------------- /rl_studio/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | 4 | # F1 envs 5 | register( 6 | id="F1Env-v0", 7 | entry_point="rl_studio.envs.gazebo.f1.models:F1Env", 8 | # More arguments here 9 | ) 10 | 11 | # RobotMesh envs 12 | register( 13 | id="RobotMeshEnv-v0", 14 | entry_point="rl_studio.envs.gazebo.robot_mesh:RobotMeshEnv", 15 | # More arguments here 16 | ) 17 | 18 | register( 19 | id="myCartpole-v0", 20 | entry_point="rl_studio.envs.openai_gym.cartpole.cartpole_env:CartPoleEnv", 21 | max_episode_steps=500, 22 | kwargs={"random_start_level": 0.05}, 23 | ) 24 | 25 | register( 26 | id="myCartpole-v1", 27 | entry_point="rl_studio.envs.openai_gym.cartpole.cartpole_env_improved:CartPoleEnv", 28 | max_episode_steps=500, 29 | kwargs={"random_start_level": 0.05}, 30 | ) 31 | 32 | 33 | register( 34 | id="myCartpole-continuous-v0", 35 | entry_point="rl_studio.envs.openai_gym.cartpole.cartpole_env_continuous:CartPoleEnv", 36 | max_episode_steps=500, 37 | kwargs={"random_start_level": 0.05}, 38 | ) 39 | 40 | register( 41 | id="myCartpole-continuous-v1", 42 | entry_point="rl_studio.envs.openai_gym.cartpole.cartpole_env_continuous_improved:CartPoleEnv", 43 | max_episode_steps=500, 44 | kwargs={"random_start_level": 0.05}, 45 | ) 46 | 47 | # MountainCar envs 48 | register( 49 | id="MyMountainCarEnv-v0", 50 | entry_point="rl_studio.envs.gazebo.mountain_car:MountainCarEnv", 51 | # More arguments here 52 | ) 53 | 54 | # AutoParking envs 55 | register( 56 | id="AutoparkingEnv-v0", 57 | entry_point="rl_studio.envs.gazebo.autoparking.models:AutoparkingEnv", 58 | ) 59 | 60 | # Carla 61 | register( 62 | id="CarlaEnv-v0", 63 | entry_point="rl_studio.envs.carla:Carla", 64 | ) 65 | -------------------------------------------------------------------------------- /rl_studio/agents/f1/manual_pilot.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import datetime 4 | import pickle 5 | import time 6 | 7 | import gym 8 | 9 | from rl_studio.agents.f1 import settings 10 | from rl_studio.visual.ascii.text import MANUAL_PILOT 11 | 12 | total_episodes = 200000 13 | 14 | 15 | def save_times(checkpoints): 16 | file_name = "manual_pilot_checkpoints" 17 | file_dump = open("./logs/" + file_name + ".pkl", "wb") 18 | pickle.dump(checkpoints, file_dump) 19 | 20 | 21 | if __name__ == "__main__": 22 | 23 | print(MANUAL_PILOT) 24 | print(" - Start hour: {}".format(datetime.datetime.now())) 25 | 26 | environment = settings.envs_params["manual"] 27 | env = gym.make(environment["env"]) 28 | 29 | checkpoints = [] # "ID" - x, y - time 30 | time.sleep(5) 31 | 32 | previous = datetime.datetime.now() 33 | start_time = datetime.datetime.now() 34 | for episode in range(total_episodes): 35 | 36 | now = datetime.datetime.now() 37 | if now - datetime.timedelta(seconds=3) > previous: 38 | previous = datetime.datetime.now() 39 | x, y = env.get_position() 40 | checkpoints.append( 41 | [ 42 | len(checkpoints), 43 | (x, y), 44 | datetime.datetime.now().strftime("%M:%S.%f")[-4], 45 | ] 46 | ) 47 | 48 | if ( 49 | datetime.datetime.now() - datetime.timedelta(minutes=2, seconds=35) 50 | > start_time 51 | ): 52 | print("Finish. Saving parameters . . .") 53 | save_times(checkpoints) 54 | env.close() 55 | exit(0) 56 | 57 | env.execute() 58 | 59 | if episode % 500 == 0: 60 | print(episode) 61 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/f1/models/f1_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import rospy 3 | from gazebo_msgs.srv import GetModelState 4 | from geometry_msgs.msg import Twist 5 | from std_srvs.srv import Empty 6 | 7 | from rl_studio.envs.gazebo import gazebo_envs 8 | 9 | 10 | class F1Env(gazebo_envs.GazeboEnv): 11 | def __init__(self, **config): 12 | gazebo_envs.GazeboEnv.__init__(self, config) 13 | self.circuit_name = config.get("circuit_name") 14 | self.circuit_positions_set = config.get("circuit_positions_set") 15 | self.alternate_pose = config.get("alternate_pose") 16 | self.model_state_name = config.get("model_state_name") 17 | 18 | self.sleep = config.get("sleep") 19 | self.vel_pub = rospy.Publisher("/F1ROS/cmd_vel", Twist, queue_size=5) 20 | self.unpause = rospy.ServiceProxy("/gazebo/unpause_physics", Empty) 21 | self.pause = rospy.ServiceProxy("/gazebo/pause_physics", Empty) 22 | self.reset_proxy = rospy.ServiceProxy("/gazebo/reset_simulation", Empty) 23 | self.reward_range = (-np.inf, np.inf) 24 | self.model_coordinates = rospy.ServiceProxy( 25 | "/gazebo/get_model_state", GetModelState 26 | ) 27 | self.position = None 28 | # self.start_pose = np.array(config.get("start_pose")) 29 | self.start_pose = np.array(config.get("gazebo_start_pose")) 30 | self.start_random_pose = config.get("gazebo_random_start_pose") 31 | self.end = 0 32 | 33 | self._seed() 34 | 35 | def render(self, mode="human"): 36 | pass 37 | 38 | def step(self, action): 39 | 40 | raise NotImplementedError 41 | 42 | def reset(self): 43 | 44 | raise NotImplementedError 45 | 46 | def inference(self, action): 47 | 48 | raise NotImplementedError 49 | -------------------------------------------------------------------------------- /rl_studio/config/config_cartpole_dqn.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | output_dir: "./logs/" 3 | save_model: True 4 | save_positions: True 5 | telemetry: False 6 | logging_level: info 7 | mode: training 8 | agent: cartpole 9 | algorithm: dqn 10 | 11 | # TODO make this section optional 12 | actions: 13 | available_actions: 14 | simple: 15 | 16 | agent: 17 | cartpole: 18 | # TODO To be removed 19 | camera_params: 20 | witdh: 640 21 | height: 480 22 | 23 | environments: 24 | env_name: myCartpole-v0 25 | environment_folder: cartpole 26 | # runs: 20000 27 | runs: 20000 28 | full_experimentation_runs: 0 29 | update_every: 100 30 | show_every: 10000 31 | objective_reward: 500 32 | # block_experience_batch: False 33 | block_experience_batch: False 34 | # random_start_level: 0.05 35 | experiments: 1 36 | random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position 37 | random_perturbations_level: 0 # Number between 0 and 1 that indicates the frequency of the random perturbations 38 | random_perturbations_level_step: 0 39 | perturbations_intensity_std: 1 #Number between 0 and 1 that indicates the standard deviation of perturbations intensity 40 | perturbations_intensity_std_step: 1 41 | initial_pole_angle: 0 42 | initial_pole_angle_steps: 0.05 43 | non_recoverable_angle: 0.3 # not applicable when making experiments with init_pole_angle (always 0.2 over the initial) 44 | 45 | inference: 46 | inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/checkpoints/cartpole/dqn/20221017_2118_epsilon_1_DQN_WEIGHTS_avg_475.825.pkl 47 | 48 | algorithm: 49 | # gamma: 0.95 50 | gamma: 0.95 51 | epsilon_discount: 0.9997 52 | # epsilon_discount: 0.9998 53 | # batch_size: 256 54 | batch_size: 256 55 | -------------------------------------------------------------------------------- /rl_studio/envs/carla/utils/colors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ This is a helper module to pretty print in linux-based terminals.""" 4 | 5 | 6 | class Colors: 7 | """ 8 | Colors defined for improve the prints in each Stage 9 | """ 10 | DEBUG = '\033[1;36;1m' 11 | OKCYAN = '\033[96m' 12 | OKBLUE = '\033[94m' 13 | OKGREEN = '\033[92m' 14 | WARNING = '\033[93m' 15 | FAIL = '\033[91m' 16 | ENDC = '\033[0m' 17 | CEND = '\33[0m' 18 | CBOLD = '\33[1m' 19 | CITALIC = '\33[3m' 20 | CURL = '\33[4m' 21 | CBLINK = '\33[5m' 22 | CBLINK2 = '\33[6m' 23 | CSELECTED = '\33[7m' 24 | CBLACK = '\33[30m' 25 | CRED = '\33[31m' 26 | CGREEN = '\33[32m' 27 | CYELLOW = '\33[33m' 28 | CBLUE = '\33[34m' 29 | CVIOLET = '\33[35m' 30 | CBEIGE = '\33[36m' 31 | CWHITE = '\33[37m' 32 | CBLACKBG = '\33[40m' 33 | CREDBG = '\33[41m' 34 | CGREENBG = '\33[42m' 35 | CYELLOWBG = '\33[43m' 36 | CBLUEBG = '\33[44m' 37 | CVIOLETBG = '\33[45m' 38 | CBEIGEBG = '\33[46m' 39 | CWHITEBG = '\33[47m' 40 | CGREY = '\33[90m' 41 | CRED2 = '\33[91m' 42 | CGREEN2 = '\33[92m' 43 | CYELLOW2 = '\33[93m' 44 | CBLUE2 = '\33[94m' 45 | CVIOLET2 = '\33[95m' 46 | CBEIGE2 = '\33[96m' 47 | CWHITE2 = '\33[97m' 48 | CGREYBG = '\33[100m' 49 | CREDBG2 = '\33[101m' 50 | CGREENBG2 = '\33[102m' 51 | CYELLOWBG2 = '\33[103m' 52 | CBLUEBG2 = '\33[104m' 53 | CVIOLETBG2 = '\33[105m' 54 | CBEIGEBG2 = '\33[106m' 55 | CWHITEBG2 = '\33[107m' 56 | 57 | 58 | if __name__ == "__main__": 59 | 60 | import inspect 61 | 62 | c = Colors() 63 | variables = [i for i in dir(c) if not inspect.ismethod(i) and i not in ('__doc__', '__module__')] 64 | for v in variables: 65 | print("{}Sample:{}\t--> {}".format(getattr(c, v), c.ENDC, v)) 66 | -------------------------------------------------------------------------------- /rl_studio/config/config_mountain_car_qlearn.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | output_dir: "./logs/" 3 | save_model: True 4 | save_positions: True 5 | telemetry: False 6 | mode: inference 7 | agent: mountain_car 8 | algorithm: qlearn 9 | 10 | actions: 11 | 0: [ 0, 0, 0, 0] 12 | 1: [ 0, 0, -0.9, -0.9] 13 | 2: [ 0.9, 0.9, 0, 0] 14 | 15 | agent: 16 | mountain_car: 17 | camera_params: 18 | witdh: 640 19 | height: 480 20 | 21 | environments: 22 | env_name: MyMountainCarEnv-v0 23 | circuit_name: simple 24 | training_type: qlearn_camera 25 | launchfile: my_mountain_world.launch 26 | environment_folder: mountain_car 27 | robot_name: my_robot 28 | start_pose: 1, 2 29 | alternate_pose: True 30 | estimated_steps: 4000 31 | sensor: camera 32 | circuit_positions_set: # x, y, z, roll, pith, ???. yaw 33 | - [ 0, 53.462, -41.988, 0.004, 0, 0, 1.57, -1.57 ] 34 | - [ 1, 53.462, -8.734, 0.004, 0, 0, 1.57, -1.57 ] 35 | - [ 2, 39.712, -30.741, 0.004, 0, 0, 1.56, 1.56 ] 36 | - [ 3, -6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613 ] 37 | - [ 4, 20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383 ] 38 | goal: 6.5 39 | pos_x: -4 40 | pos_y: -2 41 | pos_z: 3 42 | max_steps: 5000 43 | 44 | # manual: 45 | # env_name: F1Env-v0 46 | # circuit_name: manual 47 | # training_type: qlearn_camera 48 | # launch: simple_circuit.launch 49 | # start_pose: 0 50 | # alternate_pose: False 51 | # estimated_steps: 4000 52 | # sensor: camera 53 | 54 | inference: 55 | inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/checkpoints/mountain_car/1_20221226_1928_epsilon_0.01_QTABLE.pkl 56 | actions_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/checkpoints/mountain_car/actions_set_20221226_1814 57 | 58 | algorithm: 59 | alpha: 0.9 60 | epsilon: 0.95 61 | gamma: 0.9 62 | epsilon_discount: 0.99995 63 | -------------------------------------------------------------------------------- /rl_studio/envs/carla/followlane/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | class AutoCarlaUtils: 6 | def __init__(self): 7 | self.f1 = None 8 | 9 | @staticmethod 10 | def show_image_with_centrals( 11 | name, img, waitkey, centrals_in_pixels, centrals_normalized, x_row 12 | ): 13 | window_name = f"{name}" 14 | img = np.array(img) 15 | 16 | for index, _ in enumerate(x_row): 17 | cv2.putText( 18 | img, 19 | str(f"{int(centrals_in_pixels[index])}"), 20 | (int(centrals_in_pixels[index]) + 20, int(x_row[index])), 21 | cv2.FONT_HERSHEY_SIMPLEX, 22 | 0.3, 23 | (255, 255, 255), 24 | 1, 25 | cv2.LINE_AA, 26 | ) 27 | cv2.putText( 28 | img, 29 | str(f"[{centrals_normalized[index]}]"), 30 | (320, int(x_row[index])), 31 | cv2.FONT_HERSHEY_SIMPLEX, 32 | 0.3, 33 | (255, 255, 255), 34 | 1, 35 | cv2.LINE_AA, 36 | ) 37 | cv2.line( 38 | img, 39 | (0, int(x_row[index])), 40 | (int(img.shape[1]), int(x_row[index])), 41 | color=(10, 10, 10), 42 | thickness=1, 43 | ) 44 | cv2.line( 45 | img, 46 | (int(img.shape[1] // 2), 0), 47 | (int(img.shape[1] // 2), int(img.shape[0])), 48 | color=(30, 30, 30), 49 | thickness=1, 50 | ) 51 | 52 | cv2.imshow(window_name, img) 53 | cv2.waitKey(waitkey) 54 | 55 | @staticmethod 56 | def show_image(name, img, waitkey): 57 | window_name = f"{name}" 58 | cv2.imshow(window_name, img) 59 | cv2.waitKey(waitkey) 60 | -------------------------------------------------------------------------------- /rl_studio/wrappers/time_limit.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from gym import Wrapper, logger 4 | 5 | 6 | class TimeLimit(Wrapper): 7 | def __init__(self, env, max_episode_seconds=None, max_episode_steps=None): 8 | super(TimeLimit, self).__init__(env) 9 | self._max_episode_seconds = max_episode_seconds 10 | self._max_episode_steps = max_episode_steps 11 | 12 | self._elapsed_steps = 0 13 | self._episode_started_at = None 14 | 15 | @property 16 | def _elapsed_seconds(self): 17 | return time.time() - self._episode_started_at 18 | 19 | def _past_limit(self): 20 | """Return true if we are past our limit""" 21 | if ( 22 | self._max_episode_steps is not None 23 | and self._max_episode_steps <= self._elapsed_steps 24 | ): 25 | logger.debug("Env has passed the step limit defined by TimeLimit.") 26 | return True 27 | 28 | if ( 29 | self._max_episode_seconds is not None 30 | and self._max_episode_seconds <= self._elapsed_seconds 31 | ): 32 | logger.debug("Env has passed the seconds limit defined by TimeLimit.") 33 | return True 34 | 35 | return False 36 | 37 | def step(self, action): 38 | assert ( 39 | self._episode_started_at is not None 40 | ), "Cannot call env.step() before calling reset()" 41 | observation, reward, done, info = self.env.step(action) 42 | self._elapsed_steps += 1 43 | 44 | if self._past_limit(): 45 | if self.metadata.get("semantics.autoreset"): 46 | _ = self.reset() # automatically reset the env 47 | done = True 48 | 49 | return observation, reward, done, info 50 | 51 | def reset(self): 52 | self._episode_started_at = time.time() 53 | self._elapsed_steps = 0 54 | return self.env.reset() 55 | -------------------------------------------------------------------------------- /rl_studio/algorithms/README.md: -------------------------------------------------------------------------------- 1 | # Algorithms 2 | 3 | ## Q-learning 4 | It is based in Watkins, C. J., & Dayan, P. (1992). Q-learning. Machine learning, 8(3), 279-292. 5 | As a tabular method to solve reinforcement learning tasks, it acts in a particular state getting an inmediate reward, saving all pairs (states, actions) -> rewards in a table. 6 | Q-learning has been designed to work with low dimensional states and discrete actions. It is one of the canonical RL algorithms with a high performance for low level dimensionality tasks. 7 | 8 | Our implementation of Q-Learning algorithm has two approaches: with table or dictionnary. You can choose any of them through config file. Dictionnary is more efficient in terms of memory size due to its dynamic implementation. Otherwise table option is closer to the Q-learning original approach, developed with numpy library. 9 | Both have been tested succesfully in different tasks. 10 | 11 | 12 | 13 | 14 | --- 15 | ## Deep Deterministic Gradient Policy (DDPG) 16 | 17 | The algorithm is based in LILLICRAP, Timothy P., et al. Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971, 2015. 18 | 19 | It allows us to work with **multidimensional states** such as raw input from a camera and with **continuous** or **discrete actions** to develop complex projects. Now, it is based on **Tensorflow**, although in the future we will be able to integrate other Deep Learning frameworks. 20 | 21 | 22 | --- 23 | 24 | ## Deep Q Networks (DQN) 25 | 26 | Based on [Human-level control through deep reinforcement learning whitepaper](https://www.nature.com/articles/nature14236?wm=book_wap_0005), it allows working with **multidimensional states** with Deep Neural Nets and **discrete actions**. Our solution is currently based on Tensorflow framework. 27 | 28 | 29 | --- 30 | ## How to config and launch 31 | If you want to config and training or inferencing, please go to [agents](../agents/README.md) section -------------------------------------------------------------------------------- /rl_studio/visual/ascii/images.py: -------------------------------------------------------------------------------- 1 | """ 2 | References: http://www.patorjk.com/software/taag/#p=display&f=Sub-Zero&t=Aliasrc%20 3 | """ 4 | 5 | JDEROBOT_LOGO = """ 6 | ``..--------..`` 7 | `.-:/::--.........--:/:-.` 8 | `-:/-.` `.-::-` 9 | .:/-` `-::-` 10 | -/:. `:/-` 11 | ./:. `:/- 12 | :/- `-:+ossyyyyyso+/-. ./:` 13 | `::` ./syo/:.`` `.-/+sy+:` `:/` 14 | `::` `:sy+- .:oy+. :/` 15 | :/` /ys: .+yo. `:: 16 | ./- .sy: .oy/ ./- 17 | :/ -ys. `.-://++//:-. /y+ :/ 18 | `/: .ys` ./+/-.`` ``.-/+:. /y+ -/. 19 | ./- oy. .++-` `-+/. oy- ./. 20 | ./-.yo :+: `/+- -y+ ./. 21 | `/:-y/ -+: .-::::-. /+. `ys -/. 22 | :/:y/`++ -//-.``.-//- `++ `ys :/ 23 | ./:yo`+: :/. ./: ++` -y+ ./- 24 | :/sy:+/./- -/. ++ oy. :: 25 | `//yy++//- -/. -+: /y/ :/` 26 | `:/yyo++/. .// -+/ /y+ `:/. 27 | `:/syo++//-.``.-//- `/+: .sy: .::` 28 | -/+syo+++/:::-. `-/+:` .oy+` `-/- 29 | `-/+syso++//:///+/:.` ./sy+. `-/:` 30 | `-//osyso+:-.` `..:/oys+- `.:/-` 31 | .-////++osyyyysso+/-` `.-:/-. 32 | `.-:/:--..``````...-:/:-.` 33 | `..---::::---..` 34 | """ 35 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/real_env.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | import gym 5 | import rospy 6 | 7 | 8 | class RealEnv(gym.Env): 9 | """Superclass for all Gazebo environments.""" 10 | 11 | metadata = {"render.models": ["human"]} 12 | 13 | def __init__(self): 14 | 15 | self.port = "11311" # str(random_number) #os.environ["ROS_PORT_SIM"] 16 | 17 | # #start roscore 18 | # subprocess.Popen(["roscore", "-p", self.port]) 19 | # time.sleep(1) 20 | print("Roscore launched!") 21 | 22 | # Launch the simulation with the given launchfile name 23 | rospy.init_node("gym", anonymous=True) 24 | 25 | def set_ros_master_uri(self): 26 | os.environ["ROS_MASTER_URI"] = self.ros_master_uri 27 | 28 | def step(self, action): 29 | 30 | # Implement this method in every subclass 31 | # Perform a step in gazebo. E.g. move the robot 32 | raise NotImplementedError 33 | 34 | def reset(self): 35 | 36 | # Implemented in subclass 37 | raise NotImplementedError 38 | 39 | def render(self, mode=None, close=False): 40 | pass 41 | 42 | def _render(self, mode=None, close=False): 43 | self._close() 44 | 45 | def _close(self): 46 | output1 = subprocess.check_call( 47 | ["cat", "/tmp/myroslaunch_" + self.port + ".pid"] 48 | ) 49 | output2 = subprocess.check_call( 50 | ["cat", "/home/erle/.ros/roscore-" + self.port + ".pid"] 51 | ) 52 | subprocess.Popen(["kill", "-INT", str(output1)]) 53 | subprocess.Popen(["kill", "-INT", str(output2)]) 54 | 55 | def close(self): 56 | pass 57 | 58 | def _configure(self): 59 | 60 | # TODO 61 | # From OpenAI API: Provides runtime configuration to the enviroment 62 | # Maybe set the Real Time Factor? 63 | pass 64 | 65 | def _seed(self): 66 | 67 | # TODO 68 | # From OpenAI API: Sets the seed for this env's random number generator(s) 69 | pass 70 | -------------------------------------------------------------------------------- /rl_studio/agents/turtlebot/qlearn.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | class QLearn: 5 | def __init__(self, actions, epsilon, alpha, gamma): 6 | self.q = {} 7 | self.epsilon = epsilon # exploration constant 8 | self.alpha = alpha # discount constant 9 | self.gamma = gamma # discount factor 10 | self.actions = actions 11 | 12 | def getQ(self, state, action): 13 | return self.q.get((state, action), 0.0) 14 | 15 | def learnQ(self, state, action, reward, value): 16 | """ 17 | Q-learning: 18 | Q(s, a) += alpha * (reward(s,a) + max(Q(s') - Q(s,a)) 19 | """ 20 | oldv = self.q.get((state, action), None) 21 | if oldv is None: 22 | self.q[(state, action)] = reward 23 | else: 24 | self.q[(state, action)] = oldv + self.alpha * (value - oldv) 25 | 26 | def chooseAction(self, state, return_q=False): 27 | q = [self.getQ(state, a) for a in self.actions] 28 | maxQ = max(q) 29 | 30 | if random.random() < self.epsilon: 31 | minQ = min(q) 32 | mag = max(abs(minQ), abs(maxQ)) 33 | # add random values to all the actions, recalculate maxQ 34 | q = [ 35 | q[i] + random.random() * mag - 0.5 * mag 36 | for i in range(len(self.actions)) 37 | ] 38 | maxQ = max(q) 39 | 40 | count = q.count(maxQ) 41 | # In case there're several state-action max values 42 | # we select a random one among them 43 | if count > 1: 44 | best = [i for i in range(len(self.actions)) if q[i] == maxQ] 45 | i = random.choice(best) 46 | else: 47 | i = q.index(maxQ) 48 | 49 | action = self.actions[i] 50 | if return_q: # if they want it, give it! 51 | return action, q 52 | return action 53 | 54 | def learn(self, state1, action1, reward, state2): 55 | maxqnew = max([self.getQ(state2, a) for a in self.actions]) 56 | self.learnQ(state1, action1, reward, reward + self.gamma * maxqnew) 57 | -------------------------------------------------------------------------------- /rl_studio/agents/utilities/plot_ECDF_template.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import matplotlib as pltlib 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from statsmodels.distributions.empirical_distribution import ECDF 6 | 7 | RUNS = 100 8 | max_episode_steps = 500 9 | 10 | def plot_ecdf(sample, color, label): 11 | # fit a cdf 12 | ecdf = ECDF(sample) 13 | # get cumulative probability for values 14 | print('P(x<20): %.3f' % ecdf(20)) 15 | print('P(x<40): %.3f' % ecdf(40)) 16 | print('P(x<60): %.3f' % ecdf(60)) 17 | # plot the cdf 18 | plt.plot(ecdf.x, ecdf.y, color=color, label=label) 19 | 20 | if __name__ == "__main__": 21 | 22 | pltlib.rcParams.update({'font.size': 15}) 23 | 24 | rewards_file = open( 25 | "/rl_studio/logs/cartpole/old_datasets/dqn_analysis/training_with_frequencies/2022-10-20 23:05:26.343167__rewards_rsl-0_rpl-0.2_pi-10.pkl", "rb") 26 | rewards = pickle.load(rewards_file) 27 | rewards = np.asarray(rewards) 28 | plot_ecdf(rewards, 'blue', 'trained with frequency = 0') 29 | 30 | rewards_file = open( 31 | "/rl_studio/logs/cartpole/old_datasets/dqn_analysis/training_with_frequencies/2022-10-20 23:00:41.230018__rewards_rsl-0_rpl-0.2_pi-10.pkl", "rb") 32 | rewards = pickle.load(rewards_file) 33 | rewards = np.asarray(rewards) 34 | plot_ecdf(rewards, 'green', 'trained with frequency = 0.1') 35 | 36 | rewards_file = open( 37 | "/rl_studio/logs/cartpole/old_datasets/dqn_analysis/training_with_frequencies/2022-10-20 23:00:04.352224__rewards_rsl-0_rpl-0.2_pi-10.pkl", "rb") 38 | rewards = pickle.load(rewards_file) 39 | rewards = np.asarray(rewards) 40 | plot_ecdf(rewards, 'orange', 'trained with frequency = 0.2') 41 | 42 | rewards_file = open( 43 | "/rl_studio/logs/cartpole/old_datasets/dqn_analysis/training_with_frequencies/2022-10-20 22:59:30.164014__rewards_rsl-0_rpl-0.2_pi-10.pkl", "rb") 44 | rewards = pickle.load(rewards_file) 45 | rewards = np.asarray(rewards) 46 | plot_ecdf(rewards, 'black', 'trained with frequency = 0.3') 47 | 48 | plt.legend() 49 | plt.ylabel("percentage of runs") 50 | plt.xlabel("steps") 51 | plt.show() 52 | 53 | -------------------------------------------------------------------------------- /rl_studio/agents/utilities/plot_reward_function.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | def normalize_range(num, a, b): 5 | return (num - a) / (b - a) 6 | 7 | def linear_function(cross_x, slope, x): 8 | return cross_x + (slope * x) 9 | 10 | def sigmoid_function(start, end, x, slope=10): 11 | slope = slope / (end - start) 12 | sigmoid = 1 / (1 + np.exp(-slope * (x - ((start + end) / 2)))) 13 | return sigmoid 14 | 15 | def reward_proximity(state): 16 | if abs(state) > 0.7: 17 | return 0 18 | else: 19 | # return 1 - abs(state) 20 | # return linear_function(1, -1.4, abs(state)) 21 | return pow(1 - abs(state), 4) 22 | # return 1 - sigmoid_function(0, 1, abs(state), 5) 23 | 24 | 25 | def rewards_followline_velocity_center(v, pos, range_v): 26 | p_reward = reward_proximity(pos) 27 | v_norm = normalize_range(v, range_v[0], range_v[1]) 28 | v_r = v_norm * pow(p_reward, 2) 29 | beta_pos = 0.7 30 | reward = (beta_pos * p_reward) + ((1 - beta_pos) * v_r) 31 | v_punish = reward * (1 - p_reward) * v_norm 32 | reward = reward - v_punish 33 | 34 | return reward 35 | 36 | range_v = [0, 10] 37 | 38 | # Define the ranges for v, w, and pos 39 | v_range = np.linspace(range_v[0], range_v[1], 50) # Adjust the range as needed 40 | pos_range = np.linspace(-1, 1, 50) # Adjust the range as needed 41 | 42 | # Create a grid of values for v, w, and pos 43 | V, POS = np.meshgrid(v_range, pos_range) 44 | 45 | # Calculate the rewards for each combination of v, w, and pos 46 | rewards = np.empty_like(V) 47 | for i in range(V.shape[0]): 48 | for j in range(V.shape[1]): 49 | rewards[i, j] = rewards_followline_velocity_center(V[i, j], POS[i, j], range_v) 50 | 51 | # Create a 3D plot 52 | fig = plt.figure(figsize=(10, 8)) 53 | ax = fig.add_subplot(111, projection='3d') 54 | 55 | # Plot the 3D surface 56 | surf = ax.plot_surface(POS, V, rewards, cmap='viridis') 57 | 58 | # Set labels for the axes 59 | ax.set_xlabel('Center Distance (pos)') 60 | ax.set_ylabel('Linear Velocity (v)') 61 | ax.set_zlabel('Reward') 62 | 63 | # Show the color bar 64 | fig.colorbar(surf, shrink=0.5, aspect=5) 65 | 66 | plt.show() 67 | -------------------------------------------------------------------------------- /rl_studio/config/config_cartpole_qlearn.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | output_dir: "./logs/" 3 | save_model: True 4 | save_positions: True 5 | telemetry: False 6 | logging_level: info 7 | mode: training 8 | agent: cartpole 9 | algorithm: qlearn 10 | 11 | # TODO make this section optional 12 | actions: 13 | available_actions: 14 | simple: 15 | 16 | agent: 17 | cartpole: 18 | # TODO To be removed 19 | camera_params: 20 | witdh: 640 21 | height: 480 22 | 23 | environments: 24 | env_name: myCartpole-v0 25 | environment_folder: cartpole 26 | # runs: 20000 27 | angle_bins: 100 28 | pos_bins: 100 29 | runs: 4000000 30 | full_experimentation_runs: 0 31 | update_every: 10000 32 | save_every: 10000000 33 | show_every: 10000 34 | objective_reward: 500 35 | # block_experience_batch: False 36 | block_experience_batch: False 37 | # random_start_level: 0.05 38 | experiments: 1 39 | random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position 40 | random_perturbations_level: 0 # Number between 0 and 1 that indicates the frequency of the random perturbations 41 | random_perturbations_level_step: 0 42 | perturbations_intensity_std: 0 #Number between 0 and 1 that indicates the standard deviation of perturbations intensity 43 | perturbations_intensity_std_step: 0 44 | initial_pole_angle: 0 45 | initial_pole_angle_steps: 0 46 | non_recoverable_angle: 0.3 # not applicable when making experiments with init_pole_angle (always 0.2 over the initial) 47 | 48 | # previously_trained_agent: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/qlearning/checkpoints/20221116_2001_epsilon_0.01_QTABLE_avg_ 179.357.pkl 49 | reward_value: 1 50 | punish: 0 51 | reward_shaping: 0 52 | 53 | inference: 54 | inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/qlearning/checkpoints/old/20221110_0931_epsilon_0.116_406.5153_QTABLE.pkl_avg_.pkl 55 | actions_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/qlearning/checkpoints/actions_set_20221109_2108 56 | 57 | algorithm: 58 | alpha: 0.9 59 | epsilon: 0.99 60 | gamma: 0.9 61 | epsilon_discount: 0.99999997 62 | -------------------------------------------------------------------------------- /rl_studio/agents/robot_mesh/manual_pilot.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import sys as system 3 | import time 4 | 5 | import gym 6 | 7 | 8 | class ManualRobotMeshTrainer: 9 | def __init__(self, params): 10 | # TODO: Create a pydantic metaclass to simplify the way we extract the params 11 | # environment params 12 | self.environment_params = params.environment["params"] 13 | self.env_name = params.environment["params"]["env_name"] 14 | env_params = params.environment["params"] 15 | actions = params.environment["actions"] 16 | env_params["actions"] = actions 17 | self.env = gym.make(self.env_name, **env_params) 18 | # algorithm params 19 | self.alpha = params.algorithm["params"]["alpha"] 20 | self.epsilon = params.algorithm["params"]["epsilon"] 21 | self.gamma = params.algorithm["params"]["gamma"] 22 | 23 | def execute_action(self, env): 24 | for step in range(50000): 25 | 26 | input_order = input("provide action (0-up, 1-right, 2-down, 3-left): ") 27 | if ( 28 | input_order == "0" 29 | or input_order == "1" 30 | or input_order == "2" 31 | or input_order == "3" 32 | ): 33 | action = int(input_order) 34 | print("Selected Action!! " + str(action)) 35 | # Execute the action and get feedback 36 | next_state, reward, env.done, lap_completed = env.step(action) 37 | 38 | env._flush(force=True) 39 | if not env.done: 40 | state = next_state 41 | else: 42 | break 43 | elif input_order == "q": 44 | system.exit(1) 45 | else: 46 | print("wrong action! Try again") 47 | 48 | def main(self): 49 | 50 | print(f"\t- Start hour: {datetime.datetime.now()}\n") 51 | print(f"\t- Environment params:\n{self.environment_params}") 52 | outdir = "./logs/robot_mesh/" 53 | env = gym.wrappers.Monitor(self.env, outdir, force=True) 54 | total_episodes = 20000 55 | env.done = False 56 | 57 | for episode in range(total_episodes): 58 | print("resetting") 59 | time.sleep(5) 60 | env.reset() 61 | 62 | self.execute_action(env) 63 | 64 | env.close() 65 | -------------------------------------------------------------------------------- /rl_studio/agents/turtlebot/memory.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | 5 | 6 | class Memory: 7 | """ 8 | This class provides an abstraction to store the [s, a, r, a'] elements of each iteration. 9 | Instead of using tuples (as other implementations do), the information is stored in lists 10 | that get returned as another list of dictionaries with each key corresponding to either 11 | "state", "action", "reward", "nextState" or "isFinal". 12 | """ 13 | 14 | def __init__(self, size): 15 | self.size = size 16 | self.currentPosition = 0 17 | self.states = [] 18 | self.actions = [] 19 | self.rewards = [] 20 | self.newStates = [] 21 | self.finals = [] 22 | 23 | def getMiniBatch(self, size): 24 | indices = random.sample( 25 | np.arange(len(self.states)), min(size, len(self.states)) 26 | ) 27 | miniBatch = [] 28 | for index in indices: 29 | miniBatch.append( 30 | { 31 | "state": self.states[index], 32 | "action": self.actions[index], 33 | "reward": self.rewards[index], 34 | "newState": self.newStates[index], 35 | "isFinal": self.finals[index], 36 | } 37 | ) 38 | return miniBatch 39 | 40 | def getCurrentSize(self): 41 | return len(self.states) 42 | 43 | def getMemory(self, index): 44 | return { 45 | "state": self.states[index], 46 | "action": self.actions[index], 47 | "reward": self.rewards[index], 48 | "newState": self.newStates[index], 49 | "isFinal": self.finals[index], 50 | } 51 | 52 | def addMemory(self, state, action, reward, newState, isFinal): 53 | if self.currentPosition >= self.size - 1: 54 | self.currentPosition = 0 55 | if len(self.states) > self.size: 56 | self.states[self.currentPosition] = state 57 | self.actions[self.currentPosition] = action 58 | self.rewards[self.currentPosition] = reward 59 | self.newStates[self.currentPosition] = newState 60 | self.finals[self.currentPosition] = isFinal 61 | else: 62 | self.states.append(state) 63 | self.actions.append(action) 64 | self.rewards.append(reward) 65 | self.newStates.append(newState) 66 | self.finals.append(isFinal) 67 | 68 | self.currentPosition += 1 69 | -------------------------------------------------------------------------------- /rl_studio/algorithms/memory.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | 5 | 6 | class Memory: 7 | """ 8 | This class provides an abstraction to store the [s, a, r, a'] elements of each iteration. 9 | Instead of using tuples (as other implementations do), the information is stored in lists 10 | that get returned as another list of dictionaries with each key corresponding to either 11 | "state", "action", "reward", "nextState" or "isFinal". 12 | """ 13 | 14 | def __init__(self, size): 15 | self.size = size 16 | self.currentPosition = 0 17 | self.states = [] 18 | self.actions = [] 19 | self.rewards = [] 20 | self.newStates = [] 21 | self.finals = [] 22 | 23 | def getMiniBatch(self, size): 24 | indices = random.sample( 25 | list(np.arange(len(self.states))), min(size, len(self.states)) 26 | ) 27 | miniBatch = [] 28 | for index in indices: 29 | miniBatch.append( 30 | { 31 | "state": self.states[index], 32 | "action": self.actions[index], 33 | "reward": self.rewards[index], 34 | "newState": self.newStates[index], 35 | "isFinal": self.finals[index], 36 | } 37 | ) 38 | return miniBatch 39 | 40 | def getCurrentSize(self): 41 | return len(self.states) 42 | 43 | def getMemory(self, index): 44 | return { 45 | "state": self.states[index], 46 | "action": self.actions[index], 47 | "reward": self.rewards[index], 48 | "newState": self.newStates[index], 49 | "isFinal": self.finals[index], 50 | } 51 | 52 | def addMemory(self, state, action, reward, newState, isFinal): 53 | if self.currentPosition >= self.size - 1: 54 | self.currentPosition = 0 55 | if len(self.states) > self.size: 56 | self.states[self.currentPosition] = state 57 | self.actions[self.currentPosition] = action 58 | self.rewards[self.currentPosition] = reward 59 | self.newStates[self.currentPosition] = newState 60 | self.finals[self.currentPosition] = isFinal 61 | else: 62 | self.states.append(state) 63 | self.actions.append(action) 64 | self.rewards.append(reward) 65 | self.newStates.append(newState) 66 | self.finals.append(isFinal) 67 | 68 | self.currentPosition += 1 69 | -------------------------------------------------------------------------------- /rl_studio/envs/carla/followlane/reset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rl_studio.agents.utils import ( 4 | print_messages, 5 | ) 6 | from rl_studio.envs.gazebo.f1.models.f1_env import F1Env 7 | 8 | 9 | class Reset(F1Env): 10 | """ 11 | Works for Follow Line and Follow Lane tasks 12 | """ 13 | 14 | def reset_f1_state_image(self): 15 | """ 16 | reset for 17 | - State: Image 18 | - tasks: FollowLane and FollowLine 19 | """ 20 | self._gazebo_reset() 21 | # === POSE === 22 | if self.alternate_pose: 23 | self._gazebo_set_random_pose_f1_follow_rigth_lane() 24 | else: 25 | self._gazebo_set_fix_pose_f1_follow_right_lane() 26 | 27 | self._gazebo_unpause() 28 | 29 | ##==== get image from sensor camera 30 | f1_image_camera, _ = self.f1gazeboimages.get_camera_info() 31 | self._gazebo_pause() 32 | 33 | ##==== calculating State 34 | # image as observation 35 | state = np.array( 36 | self.f1gazeboimages.image_preprocessing_black_white_32x32( 37 | f1_image_camera.data, self.height 38 | ) 39 | ) 40 | state_size = state.shape 41 | 42 | return state, state_size 43 | 44 | def reset_f1_state_sp(self): 45 | """ 46 | reset for 47 | - State: Simplified perception 48 | - tasks: FollowLane and FollowLine 49 | """ 50 | self._gazebo_reset() 51 | # === POSE === 52 | if self.alternate_pose: 53 | self._gazebo_set_random_pose_f1_follow_rigth_lane() 54 | else: 55 | self._gazebo_set_fix_pose_f1_follow_right_lane() 56 | 57 | self._gazebo_unpause() 58 | 59 | ##==== get image from sensor camera 60 | f1_image_camera, _ = self.f1gazeboimages.get_camera_info() 61 | self._gazebo_pause() 62 | 63 | ##==== calculating State 64 | # simplified perception as observation 65 | centrals_in_pixels, _ = self.simplifiedperception.calculate_centrals_lane( 66 | f1_image_camera.data, 67 | self.height, 68 | self.width, 69 | self.x_row, 70 | self.lower_limit, 71 | self.center_image, 72 | ) 73 | states = self.simplifiedperception.calculate_observation( 74 | centrals_in_pixels, self.center_image, self.pixel_region 75 | ) 76 | state = [states[0]] 77 | state_size = len(state) 78 | 79 | return state, state_size 80 | -------------------------------------------------------------------------------- /rl_studio/visual/ascii/text.py: -------------------------------------------------------------------------------- 1 | """ 2 | References: http://www.patorjk.com/software/taag/#p=display&f=Sub-Zero&t=Aliasrc%20 3 | """ 4 | 5 | LETS_GO = """ 6 | ______ __ 7 | / ____/___ / / 8 | / / __/ __ \/ / 9 | / /_/ / /_/ /_/ 10 | \____/\____(_) 11 | """ 12 | 13 | QLEARN_CAMERA = """ 14 | ___ _ ____ 15 | / _ \| | ___ __ _ _ __ _ __ / ___|__ _ _ __ ___ ___ _ __ __ _ 16 | | | | | |/ _ \/ _` | '__| '_ \ _____ | | / _` | '_ ` _ \ / _ \ '__/ _` | 17 | | |_| | | __/ (_| | | | | | | |_____| | |__| (_| | | | | | | __/ | | (_| | 18 | \__\_\_|\___|\__,_|_| |_| |_| \____\__,_|_| |_| |_|\___|_| \__,_| 19 | 20 | """ 21 | 22 | JDEROBOT = """ 23 | ___ _ ______ _ _ 24 | |_ | | | | ___ \ | | | | 25 | | | __| | ___| |_/ /___ | |__ ___ | |_ 26 | | |/ _` |/ _ \ // _ \| '_ \ / _ \| __| 27 | /\__/ / (_| | __/ |\ \ (_) | |_) | (_) | |_ 28 | \____/ \__,_|\___\_| \_\___/|_.__/ \___/ \__| 29 | 30 | """ 31 | 32 | END_OF_TRAIN = """ 33 | _____ _ _ _ _ _ 34 | |_ _| __ __ _(_)_ __ (_)_ __ __ _ ___ ___ _ __ ___ _ __ | | ___| |_ ___ __| | 35 | | || '__/ _` | | '_ \| | '_ \ / _` | / __/ _ \| '_ ` _ \| '_ \| |/ _ \ __/ _ \/ _` | 36 | | || | | (_| | | | | | | | | | (_| | | (_| (_) | | | | | | |_) | | __/ || __/ (_| | 37 | |_||_| \__,_|_|_| |_|_|_| |_|\__, | \___\___/|_| |_| |_| .__/|_|\___|\__\___|\__,_| 38 | |___/ |_| 39 | """ 40 | 41 | RACE_COMPLETED = """ 42 | ______ _ _ _ 43 | | ___ \ | | | | | | 44 | | |_/ /__ _ ___ ___ ___ ___ _ __ ___ _ __ | | ___| |_ ___ __| | 45 | | // _` |/ __/ _ \ / __/ _ \| '_ ` _ \| '_ \| |/ _ \ __/ _ \/ _` | 46 | | |\ \ (_| | (_| __/ | (_| (_) | | | | | | |_) | | __/ || __/ (_| | 47 | \_| \_\__,_|\___\___| \___\___/|_| |_| |_| .__/|_|\___|\__\___|\__,_| 48 | | | 49 | |_| 50 | """ 51 | 52 | MANUAL_PILOT = """ 53 | ___ ___ _ ______ _ _ _ 54 | | \/ | | | | ___ (_) | | | 55 | | . . | __ _ _ __ _ _ __ _| | | |_/ /_| | ___ | |_ 56 | | |\/| |/ _` | '_ \| | | |/ _` | | | __/| | |/ _ \| __| 57 | | | | | (_| | | | | |_| | (_| | | | | | | | (_) | |_ 58 | \_| |_/\__,_|_| |_|\__,_|\__,_|_| \_| |_|_|\___/ \__| 59 | 60 | """ 61 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/f1/models/settings.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | from rl_studio.envs.gazebo.f1.image_f1 import ImageF1, ListenerCamera 4 | from rl_studio.envs.gazebo.f1.models.images import F1GazeboImages 5 | from rl_studio.envs.gazebo.f1.models.utils import F1GazeboUtils 6 | from rl_studio.envs.gazebo.f1.models.rewards import F1GazeboRewards 7 | from rl_studio.envs.gazebo.f1.models.simplified_perception import ( 8 | F1GazeboSimplifiedPerception, 9 | ) 10 | 11 | 12 | class F1GazeboTFConfig(BaseModel): 13 | def __init__(self, **config): 14 | self.simplifiedperception = F1GazeboSimplifiedPerception() 15 | self.f1gazeborewards = F1GazeboRewards(**config) 16 | self.f1gazeboutils = F1GazeboUtils() 17 | self.f1gazeboimages = F1GazeboImages() 18 | 19 | self.image = ImageF1() 20 | #self.image = ListenerCamera("/F1ROS/cameraL/image_raw") 21 | self.image_raw_from_topic = None 22 | self.f1_image_camera = None 23 | self.sensor = config["sensor"] 24 | 25 | # Image 26 | self.image_resizing = config["image_resizing"] / 100 27 | self.new_image_size = config["new_image_size"] 28 | self.raw_image = config["raw_image"] 29 | self.height = int(config["height_image"] * self.image_resizing) 30 | self.width = int(config["width_image"] * self.image_resizing) 31 | self.center_image = int(config["center_image"] * self.image_resizing) 32 | self.num_regions = config["num_regions"] 33 | self.pixel_region = int(self.center_image / self.num_regions) * 2 34 | self.telemetry_mask = config["telemetry_mask"] 35 | self.poi = config["x_row"][0] 36 | self.image_center = None 37 | self.right_lane_center_image = config["center_image"] + ( 38 | config["center_image"] // 2 39 | ) 40 | self.lower_limit = config["lower_limit"] 41 | 42 | # States 43 | self.state_space = config["states"] 44 | if self.state_space == "spn": 45 | self.x_row = [i for i in range(1, int(self.height / 2) - 1)] 46 | else: 47 | self.x_row = config["x_row"] 48 | 49 | # Actions TODO incompatible with baselines way to go 50 | self.action_space = config["action_space"] 51 | self.actions = config["actions"] 52 | 53 | # Rewards 54 | self.reward_function = config["reward_function"] 55 | self.rewards = config["rewards"] 56 | self.min_reward = config["min_reward"] 57 | 58 | # Others 59 | self.telemetry = config["telemetry"] 60 | -------------------------------------------------------------------------------- /rl_studio/installation/setup_noetic.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source /opt/ros/noetic/setup.bash 4 | 5 | # --- CustomRobots --------------------------------------- 6 | DIR="../CustomRobots" 7 | if ! [[ -d "$DIR" ]] 8 | then 9 | echo "$DIR doesn't exists. Cloning CustomRobots repository." 10 | git clone --branch noetic-devel https://github.com/JdeRobot/CustomRobots.git $DIR 11 | else 12 | echo "CustomRobots is already downloaded. Pass." 13 | fi 14 | 15 | if [ -z "$ROS_DISTRO" ]; then 16 | echo "ROS not installed. Check the installation steps: https://github.com/erlerobot/gym#installing-the-gazebo-environment" 17 | fi 18 | 19 | # --- Gazebo -------------------------------------------- 20 | program="gazebo" 21 | condition=$(which $program 2>/dev/null | grep -v "not found" | wc -l) 22 | if [ $condition -eq 0 ] ; then 23 | echo "Gazebo is not installed. Check the installation steps: https://github.com/erlerobot/gym#installing-the-gazebo-environment" 24 | fi 25 | 26 | # --- Create catkin_ws ----------------------------------- 27 | ws="catkin_ws" 28 | if [ -d $ws ]; then 29 | echo "Error: catkin_ws directory already exists" 1>&2 30 | fi 31 | src=$ws"/src" 32 | mkdir -p $src 33 | cd $src 34 | catkin_init_workspace 35 | 36 | # Import and build dependencies 37 | cd ../catkin_ws/src/ 38 | vcs import < ../gazebo_ros_noetic.repos 39 | 40 | cd .. 41 | touch catkin_ws/src/ecl_navigation/ecl_mobile_robot/CATKIN_IGNORE 42 | catkin_make 43 | bash -c 'echo source `pwd`/devel/setup.bash >> ~/.bashrc' 44 | 45 | echo "## ROS workspace compiled ##" 46 | 47 | # --- Adding Environment variables ----------------- 48 | # --- BASH 49 | if [ -z "$GAZEBO_MODEL_PATH" ]; then 50 | bash -c 'echo "export GAZEBO_MODEL_PATH="`pwd`/../CustomRobots/f1/models >> ~/.bashrc' 51 | else 52 | printf "GAZEBO_MODEL_PATH env variable already exists in your .bashrc." 53 | fi 54 | 55 | if [ -z "$GAZEBO_RESOURCE_PATH" ]; then 56 | bash -c 'echo "export GAZEBO_RESOURCE_PATH=`pwd`/../CustomRobots/f1/worlds >> ~/.bashrc' 57 | else 58 | printf "GAZEBO_RESOURCE_PATH env variable already exist in your .bashrc\n" 59 | fi 60 | 61 | # --- ZSH 62 | read -p "Do you use zsh? [Y/n]: " zsh 63 | if [ -n "$!zsh" ]; then 64 | zsh -c 'echo "export GAZEBO_MODEL_PATH="`pwd`/../CustomRobots/f1/models >> ~/.zshrc' 65 | zsh -c 'echo "export GAZEBO_RESOURCE_PATH="`pwd`/../CustomRobots/f1/worlds >> ~/.zshrc' 66 | zsh -c 'echo source `pwd`/devel/setup.zsh >> ~/.zshrc' 67 | printf "" 68 | printf "\nRestarting zsh terminal . . . \n" 69 | exec zsh 70 | else 71 | # --- Restarting Bash 72 | printf "" 73 | printf "\nRestarting the terminal . . .\n" 74 | exec bash 75 | fi 76 | 77 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/autoparking/image_f1.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | from rl_studio.agents.f1.settings import QLearnConfig 5 | 6 | 7 | class ImageF1: 8 | 9 | font = cv2.FONT_HERSHEY_COMPLEX 10 | 11 | def __init__(self): 12 | self.height = 3 # Image height [pixels] 13 | self.width = 3 # Image width [pixels] 14 | self.timeStamp = 0 # Time stamp [s] */ 15 | self.format = "" # Image format string (RGB8, BGR,...) 16 | self.data = np.zeros( 17 | (self.height, self.width, 3), np.uint8 18 | ) # The image data itself 19 | self.data.shape = self.height, self.width, 3 20 | self.config = QLearnConfig() 21 | 22 | def __str__(self): 23 | return ( 24 | f"Image:" 25 | f"\nHeight: {self.height}\nWidth: {self.width}\n" 26 | f"Format: {self.format}\nTimeStamp: {self.timeStamp}\nData: {self.data}" 27 | ) 28 | 29 | @staticmethod 30 | def image_msg_to_image(img, cv_image): 31 | 32 | img.width = img.width 33 | img.height = img.height 34 | img.format = "RGB8" 35 | img.timeStamp = img.header.stamp.secs + (img.header.stamp.nsecs * 1e-9) 36 | img.data = cv_image 37 | 38 | return img 39 | 40 | def show_telemetry(self, img, points, action, reward): 41 | count = 0 42 | for idx, point in enumerate(points): 43 | cv2.line( 44 | img, 45 | (320, self.config.x_row[idx]), 46 | (320, self.config.x_row[idx]), 47 | (255, 255, 0), 48 | thickness=5, 49 | ) 50 | # cv2.line(img, (center_image, x_row[idx]), (point, x_row[idx]), (255, 255, 255), thickness=2) 51 | cv2.putText( 52 | img, 53 | str("err{}: {}".format(idx + 1, self.config.center_image - point)), 54 | (18, 340 + count), 55 | self.font, 56 | 0.4, 57 | (255, 255, 255), 58 | 1, 59 | ) 60 | count += 20 61 | cv2.putText( 62 | img, 63 | str(f"action: {action}"), 64 | (18, 280), 65 | self, 66 | 0.4, 67 | (255, 255, 255), 68 | 1, 69 | ) 70 | cv2.putText( 71 | img, 72 | str(f"reward: {reward}"), 73 | (18, 320), 74 | self, 75 | 0.4, 76 | (255, 255, 255), 77 | 1, 78 | ) 79 | 80 | cv2.imshow("Image window", img[240:]) 81 | cv2.waitKey(3) 82 | -------------------------------------------------------------------------------- /rl_studio/agents/utilities/plot_multiple_graphs_template.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import matplotlib as pltlib 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | RUNS = 20000 8 | max_episode_steps = 500 9 | 10 | if __name__ == "__main__": 11 | pltlib.rcParams.update({'font.size': 15}) 12 | 13 | fig, ax1 = plt.subplots() 14 | 15 | RUNS = 4000000 16 | 17 | rewards_file = open( 18 | "/home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/qlearning/training/2023-01-20 18:41:16.873044__rewards_.pkl", "rb") 19 | rewards = pickle.load(rewards_file) 20 | rewards = np.asarray(rewards) 21 | ax1.set_xscale('log') 22 | ax1.plot(range(RUNS), rewards, color='purple', label='qlearning') 23 | 24 | RUNS = 20000 25 | 26 | rewards_file = open( 27 | "/home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/dqn/training/2023-01-20 02:50:09.991537__rewards_rsl-0_rpl-0_pi-1.pkl", "rb") 28 | rewards = pickle.load(rewards_file) 29 | rewards = np.asarray(rewards) 30 | ax1.set_xscale('log') 31 | ax1.plot(range(RUNS), rewards, color='pink', label='dqn') 32 | 33 | RUNS = 10000 34 | 35 | rewards_file = open( 36 | "/home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/ddpg/training/2023-01-14 03:15:07.136008__rewards_rsl-0_rpl-0.1_pi-1.pkl", "rb") 37 | rewards = pickle.load(rewards_file) 38 | rewards = np.asarray(rewards) 39 | ax1.set_xscale('log') 40 | ax1.plot(range(RUNS), rewards, color='brown', label='ddpg') 41 | 42 | RUNS = 1000 43 | 44 | rewards_file = open( 45 | "/home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/ppo_continuous/training/2023-01-11 21:25:12.509340__rewards_rsl-0_rpl-0_pi-0.pkl", "rb") 46 | rewards = pickle.load(rewards_file) 47 | rewards = np.asarray(rewards) 48 | ax1.set_xscale('log') 49 | ax1.plot(range(RUNS), rewards, color='black', label='ppo_continuous') 50 | 51 | RUNS = 1000 52 | 53 | rewards_file = open( 54 | "/home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/ppo/training/2023-01-11 21:30:20.951490__rewards_rsl-0.2_rpl-0_pi-0.pkl", "rb") 55 | rewards = pickle.load(rewards_file) 56 | rewards = np.asarray(rewards) 57 | ax1.set_xscale('log') 58 | ax1.plot(range(RUNS), rewards, color='green', label='ppo') 59 | fig.canvas.manager.full_screen_toggle() 60 | 61 | plt.legend() 62 | plt.ylabel("steps") 63 | plt.xlabel("runs") 64 | plt.show() 65 | 66 | base_path = '/home/ruben/Desktop/2020-phd-ruben-lucas/docs/assets/images/results_images/cartpole/solidityExperiments/refinement/refinementOfRefinement/' 67 | ax1.figure.savefig(base_path + 'trainings.png') 68 | 69 | -------------------------------------------------------------------------------- /rl_studio/agents/mountain_car/manual_pilot.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import sys as system 3 | import time 4 | 5 | import gym 6 | 7 | import utils 8 | from rl_studio.agents.f1 import settings as settings 9 | 10 | 11 | class ManualMountainCarTrainer: 12 | def __init__(self, params): 13 | # TODO: Create a pydantic metaclass to simplify the way we extract the params 14 | # environment params 15 | self.environment_params = params.environment["params"] 16 | self.env_name = params.environment["params"]["env_name"] 17 | env_params = params.environment["params"] 18 | actions = params.environment["actions"] 19 | env_params["actions"] = actions 20 | self.env = gym.make(self.env_name, **env_params) 21 | # algorithm params 22 | self.alpha = params.algorithm["params"]["alpha"] 23 | self.epsilon = params.algorithm["params"]["epsilon"] 24 | self.gamma = params.algorithm["params"]["gamma"] 25 | self.rewards_per_run = [0, 0] 26 | 27 | def execute_steps(self): 28 | highest_reward = 0 29 | cumulated_reward = 0 30 | for step in range(50000): 31 | 32 | inpt = input("provide action (0-none, 1-left, 2-right): ") 33 | if inpt == "1" or inpt == "2" or inpt == "0": 34 | action = int(inpt) 35 | print("Selected Action!! " + str(action)) 36 | # Execute the action and get feedback 37 | nextState, reward, self.envdone, lap_completed = self.envstep(action) 38 | cumulated_reward += reward 39 | 40 | if highest_reward < cumulated_reward: 41 | highest_reward = cumulated_reward 42 | print("cumulated_reward = " + str(cumulated_reward)) 43 | self.env_flush(force=True) 44 | if self.envdone: 45 | break 46 | elif inpt == "q": 47 | system.exit(1) 48 | else: 49 | nextState, reward, self.envdone, lap_completed = self.envstep(-1) 50 | print("wrong action! Try again") 51 | break 52 | 53 | self.rewards_per_run.append(cumulated_reward) 54 | 55 | def main(self): 56 | 57 | print(settings.title) 58 | print(settings.description) 59 | print(f"\t- Start hour: {datetime.datetime.now()}") 60 | 61 | total_episodes = 20000 62 | self.env.done = False 63 | 64 | utils.get_stats_figure(self.rewards_per_run) 65 | 66 | for episode in range(total_episodes): 67 | print("resetting") 68 | time.sleep(5) 69 | self.env.reset() 70 | 71 | self.execute_steps() 72 | 73 | self.env.close() 74 | -------------------------------------------------------------------------------- /rl_studio/config/config_robot_mesh_qlearn.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | output_dir: "./logs/" 3 | save_model: True 4 | save_positions: True 5 | telemetry: False 6 | mode: inference 7 | agent: robot_mesh 8 | algorithm: qlearn 9 | 10 | actions: 11 | 0: [ 0, 0, -1, 0 ] 12 | 1: [ 0, 0, 0.7, 0.7] 13 | 2: [ 0, 0, 0, -1] 14 | 3: [0, 0, -0.7, 0.7] 15 | 16 | agent: 17 | camera_params: 18 | witdh: 640 19 | height: 480 20 | 21 | environments: 22 | # simple: 23 | # env_name: RobotMeshEnv-v0 24 | # circuit_name: simple 25 | # training_type: qlearn_camera 26 | # launchfile: my_simple_world.launch 27 | # environment_folder: robot_mesh 28 | # robot_name: my_robot 29 | # start_pose: 1, 2 30 | # alternate_pose: True 31 | # estimated_steps: 4000 32 | # sensor: camera 33 | # circuit_positions_set: # x, y, z, roll, pith, ???. yaw 34 | # - [ 0, 53.462, -41.988, 0.004, 0, 0, 1.57, -1.57 ] 35 | # - [ 1, 53.462, -8.734, 0.004, 0, 0, 1.57, -1.57 ] 36 | # - [ 2, 39.712, -30.741, 0.004, 0, 0, 1.56, 1.56 ] 37 | # - [ 3, -6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613 ] 38 | # - [ 4, 20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383 ] 39 | # actions_force: 18 40 | # boot_on_crash: False 41 | # goal_x: -6.5 42 | # goal_y: 14 43 | # pos_x: 6.4 44 | # pos_y: -5 45 | # pos_z: 0.3 46 | env_name: RobotMeshEnv-v0 47 | circuit_name: complex 48 | training_type: qlearn_camera 49 | launchfile: my_complex_world.launch 50 | environment_folder: robot_mesh 51 | robot_name: my_robot 52 | start_pose: 0 53 | alternate_pose: True 54 | estimated_steps: 4000 55 | sensor: camera 56 | circuit_positions_set: # x, y, z, roll, pith, ???. yaw 57 | - [ 0, 53.462, -41.988, 0.004, 0, 0, 1.57, -1.57] 58 | - [ 1, 53.462, -8.734, 0.004, 0, 0, 1.57, -1.57 ] 59 | - [ 2, 39.712, -30.741, 0.004, 0, 0, 1.56, 1.56 ] 60 | - [ 3, -6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613 ] 61 | - [ 4, 20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383 ] 62 | actions_force: 24 63 | boot_on_crash: False 64 | goal_x: -100 65 | goal_y: 14 66 | pos_x: 17 67 | pos_y: -13 68 | pos_z: 0.3 69 | # manual: 70 | # env_name: F1Env-v0 71 | # circuit_name: manual 72 | # training_type: qlearn_camera 73 | # launch: simple_circuit.launch 74 | # start_pose: 0 75 | # alternate_pose: False 76 | # estimated_steps: 4000 77 | # sensor: camera 78 | 79 | inference: 80 | inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/checkpoints/robot_mesh/1_20221227_0306_epsilon_0.05_QTABLE.pkl 81 | actions_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/checkpoints/robot_mesh/actions_set_20221227_0657 82 | 83 | algorithm: 84 | alpha: 0.2 85 | epsilon: 0.95 86 | gamma: 0.9 87 | -------------------------------------------------------------------------------- /rl_studio/algorithms/qlearn_multiple_states.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import pickle 3 | import random 4 | 5 | import numpy as np 6 | 7 | 8 | class QLearn: 9 | def __init__(self, actions, epsilon=0.99, alpha=0.8, gamma=0.9): 10 | self.q = {} 11 | self.epsilon = epsilon # exploration constant 12 | self.alpha = alpha # discount constant 13 | self.gamma = gamma # discount factor 14 | self.actions = actions 15 | 16 | def getQValues(self, state, action): 17 | state = state if isinstance(state, collections.abc.Sequence) else [state] 18 | return self.q.get(tuple(state) + (action,), 0.0) 19 | 20 | def selectAction(self, state, return_q=False): 21 | q = [self.getQValues(state, a) for a in self.actions] 22 | maxQ = max(q) 23 | 24 | if random.random() < self.epsilon: 25 | return random.choice(list(self.actions)) 26 | 27 | count = q.count(maxQ) 28 | # In case there're several state-action max values 29 | # we select a random one among them 30 | if count > 1: 31 | best = [i for i in range(len(self.actions)) if q[i] == maxQ] 32 | i = random.choice(best) 33 | else: 34 | i = q.index(maxQ) 35 | 36 | action = i 37 | if return_q: # if they want it, give it! 38 | return action, q 39 | return action 40 | 41 | def learn(self, state1, action1, reward, state2, done): 42 | maxqnew = max([self.getQValues(state2, a) for a in self.actions]) 43 | 44 | if done == True: 45 | self.q[(tuple(state1) + (action1,))] = reward 46 | q_update = self.alpha * (reward - self.getQValues(state1, action1)) 47 | else: 48 | q_update = self.alpha * ( 49 | reward + self.gamma * maxqnew - self.getQValues(state1, action1) 50 | ) 51 | self.q[(tuple(state1) + (action1,))] = ( 52 | self.getQValues(state1, action1) + q_update 53 | ) 54 | 55 | def reset(self): 56 | self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) 57 | self.steps_beyond_done = None 58 | return np.array(self.state) 59 | 60 | def inference(self, state, return_q=False): 61 | return self.selectAction(state, return_q) 62 | 63 | def load_model(self, file_path, actions): 64 | 65 | qlearn_file = open(file_path, "rb") 66 | 67 | self.q = pickle.load(qlearn_file) 68 | # TODO it may be possible to infer the actions from the model. I don know enough to assume that for every algorithm 69 | self.actions = actions 70 | print(f"\n\nMODEL LOADED.") 71 | print(f" - Loading: {file_path}") 72 | print(f" - Model size: {len(self.q)}") 73 | -------------------------------------------------------------------------------- /rl_studio/agents/cartpole/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==1.3.0 2 | #actionlib==1.13.2 3 | #angles==1.9.13 4 | appdirs==1.4.4 5 | cachetools==5.2.0 6 | #camera-calibration-parsers==1.12.0 7 | #catkin==0.8.10 8 | #catkin-pkg==0.4.23 9 | certifi==2021.5.30 10 | cfgv==3.3.0 11 | chardet==4.0.0 12 | cloudpickle==1.6.0 13 | #controller-manager==0.19.5 14 | #controller-manager-msgs==0.19.5 15 | #cv-bridge==1.16.0 16 | cycler==0.10.0 17 | decorator==4.4.2 18 | defusedxml==0.6.0 19 | #diagnostic-updater==1.11.0 20 | distlib==0.3.2 21 | distro==1.5.0 22 | docutils==0.17.1 23 | #dynamic-reconfigure==1.7.3 24 | environs==9.2.0 25 | filelock==3.0.12 26 | future==0.18.2 27 | #gazebo_plugins==2.9.2 28 | #gazebo_ros==2.9.2 29 | #gencpp==0.6.5 30 | #geneus==3.0.0 31 | #genlisp==0.4.18 32 | #genmsg==0.5.16 33 | #gennodejs==2.0.2 34 | #genpy==0.6.15 35 | #google-auth==2.15.0 36 | #google-auth-oauthlib==0.4.6 37 | #grpcio==1.51.1 38 | gym==0.25.0 39 | gym-notices==0.0.8 40 | gymnasium-notices==0.0.1 41 | identify==2.2.11 42 | idna==2.10 43 | imageio==2.9.0 44 | importlib-metadata==5.2.0 45 | #jax-jumpy==0.2.0 46 | #kiwisolver==1.3.1 47 | Markdown==3.4.1 48 | MarkupSafe==2.1.1 49 | marshmallow==3.12.2 50 | matplotlib==3.3.2 51 | #message-filters==1.15.14 52 | #netifaces==0.10.9 53 | #networkx==2.5.1 54 | #nodeenv==1.6.0 55 | numpy==1.18.5 56 | #oauthlib==3.2.2 57 | opencv-python==4.2.0.32 58 | pandas==1.4.4 59 | Pillow==8.3.1 60 | #pre-commit==2.13.0 61 | #protobuf==3.20.3 62 | py-markdown-table==0.3.3 63 | #pyasn1==0.4.8 64 | #pyasn1-modules==0.2.8 65 | pydantic==1.10.3 66 | pygame==2.1.0 67 | Pygments==2.14.00 68 | pyglet==1.5.0 69 | pyparsing==2.4.7 70 | python-dateutil==2.8.1 71 | python-dotenv==0.18.0 72 | pytz==2022.7 73 | #PyWavelets==1.1.1 74 | PyYAML==5.4.1 75 | requests==2.25.1 76 | requests-oauthlib==1.3.1 77 | #rosbag==1.15.14 78 | #rosclean==1.15.8 79 | #rosgraph==1.15.14 80 | #roslaunch==1.15.14 81 | #roslib==1.15.8 82 | #roslz4==1.15.14 83 | #rosmaster==1.15.14 84 | #rosmsg==1.15.14 85 | #rosnode==1.15.14 86 | #rosparam==1.15.14 87 | #rospkg==1.2.8 88 | #rospy==1.15.14 89 | #rosservice==1.15.14 90 | #rostest==1.15.14 91 | #rostopic==1.15.14 92 | #rosunit==1.15.8 93 | #roswtf==1.15.14 94 | rsa==4.9 95 | scikit-image==0.17.2 96 | scipy==1.6.1 97 | #sensor-msgs==1.13.1 98 | #Shimmy==0.2.0 99 | #six==1.14.0 100 | #smclib==1.8.6 101 | tensorboard==2.11.0 102 | tensorboard-data-server==0.6.1 103 | tensorboard-plugin-wit==1.8.1 104 | #tf==1.13.2 105 | #tf2-py==0.7.5 106 | #tf2-ros==0.7.5 107 | tifffile==2021.7.2 108 | toml==0.10.2 109 | #topic-tools==1.15.14 110 | #torch==1.12.1+cu113 111 | torch==1.12.1 112 | tqdm==4.64.0 113 | typing_extensions==4.4.0 114 | urllib3==1.26.6 115 | virtualenv==20.4.7 116 | Werkzeug==2.2.2 117 | zipp==3.11.0 -------------------------------------------------------------------------------- /rl_studio/envs/carla/utils/weather.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def clamp(value, minimum=0.0, maximum=100.0): 4 | return max(minimum, min(value, maximum)) 5 | 6 | 7 | class Sun(object): 8 | def __init__(self, azimuth, altitude): 9 | self.azimuth = azimuth 10 | self.altitude = altitude 11 | self._t = 0.0 12 | 13 | def tick(self, delta_seconds): 14 | self._t += 0.008 * delta_seconds 15 | self._t %= 2.0 * math.pi 16 | self.azimuth += 0.25 * delta_seconds 17 | self.azimuth %= 360.0 18 | self.altitude = (70 * math.sin(self._t)) - 20 19 | 20 | def __str__(self): 21 | return 'Sun(alt: %.2f, azm: %.2f)' % (self.altitude, self.azimuth) 22 | 23 | 24 | class Storm(object): 25 | def __init__(self, precipitation): 26 | self._t = precipitation if precipitation > 0.0 else -50.0 27 | self._increasing = True 28 | self.clouds = 0.0 29 | self.rain = 0.0 30 | self.wetness = 0.0 31 | self.puddles = 0.0 32 | self.wind = 0.0 33 | self.fog = 0.0 34 | 35 | def tick(self, delta_seconds): 36 | delta = (1.3 if self._increasing else -1.3) * delta_seconds 37 | self._t = clamp(delta + self._t, -250.0, 100.0) 38 | self.clouds = clamp(self._t + 40.0, 0.0, 90.0) 39 | self.rain = clamp(self._t, 0.0, 80.0) 40 | delay = -10.0 if self._increasing else 90.0 41 | self.puddles = clamp(self._t + delay, 0.0, 85.0) 42 | self.wetness = clamp(self._t * 5, 0.0, 100.0) 43 | self.wind = 5.0 if self.clouds <= 20 else 90 if self.clouds >= 70 else 40 44 | self.fog = clamp(self._t - 10, 0.0, 30.0) 45 | if self._t == -250.0: 46 | self._increasing = True 47 | if self._t == 100.0: 48 | self._increasing = False 49 | 50 | def __str__(self): 51 | return 'Storm(clouds=%d%%, rain=%d%%, wind=%d%%)' % (self.clouds, self.rain, self.wind) 52 | 53 | 54 | class Weather(object): 55 | def __init__(self, weather): 56 | self.weather = weather 57 | self._sun = Sun(weather.sun_azimuth_angle, weather.sun_altitude_angle) 58 | self._storm = Storm(weather.precipitation) 59 | 60 | def tick(self, delta_seconds): 61 | self._sun.tick(delta_seconds) 62 | self._storm.tick(delta_seconds) 63 | self.weather.cloudiness = self._storm.clouds 64 | self.weather.precipitation = self._storm.rain 65 | self.weather.precipitation_deposits = self._storm.puddles 66 | self.weather.wind_intensity = self._storm.wind 67 | self.weather.fog_density = self._storm.fog 68 | self.weather.wetness = self._storm.wetness 69 | self.weather.sun_azimuth_angle = self._sun.azimuth 70 | self.weather.sun_altitude_angle = self._sun.altitude 71 | 72 | def __str__(self): 73 | return '%s %s' % (self._sun, self._storm) -------------------------------------------------------------------------------- /rl_studio/envs/carla/followlane/settings.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | from rl_studio.envs.carla.followlane.images import AutoCarlaImages 4 | from rl_studio.envs.carla.followlane.utils import AutoCarlaUtils 5 | from rl_studio.envs.carla.followlane.rewards import AutoCarlaRewards 6 | from rl_studio.envs.carla.followlane.simplified_perception import ( 7 | AutoCarlaSimplifiedPerception, 8 | ) 9 | 10 | 11 | class FollowLaneCarlaConfig(BaseModel): 12 | def __init__(self, **config): 13 | self.simplifiedperception = AutoCarlaSimplifiedPerception() 14 | self.autocarlarewards = AutoCarlaRewards() 15 | self.autocarlautils = AutoCarlaUtils() 16 | self.autocarlaimages = AutoCarlaImages() 17 | 18 | # self.image = ImageF1() 19 | # self.image = ListenerCamera("/F1ROS/cameraL/image_raw") 20 | self.image_raw_from_topic = None 21 | self.image_camera = None 22 | # self.sensor = config["sensor"] 23 | 24 | # Image 25 | self.image_resizing = config["image_resizing"] / 100 26 | self.new_image_size = config["new_image_size"] 27 | self.raw_image = config["raw_image"] 28 | self.height = int(config["height_image"] * self.image_resizing) 29 | self.width = int(config["width_image"] * self.image_resizing) 30 | self.center_image = int(config["center_image"] * self.image_resizing) 31 | self.num_regions = config["num_regions"] 32 | self.pixel_region = int(self.center_image / self.num_regions) * 2 33 | # self.telemetry_mask = config["telemetry_mask"] 34 | self.poi = config["x_row"][0] 35 | self.image_center = None 36 | self.right_lane_center_image = config["center_image"] + ( 37 | config["center_image"] // 2 38 | ) 39 | self.lower_limit = config["lower_limit"] 40 | 41 | # States 42 | self.state_space = config["states"] 43 | if self.state_space == "spn": 44 | self.x_row = [i for i in range(1, int(self.height / 2) - 1)] 45 | else: 46 | self.x_row = config["x_row"] 47 | 48 | # Actions 49 | self.action_space = config["action_space"] 50 | self.actions = config["actions"] 51 | 52 | # Rewards 53 | self.reward_function = config["reward_function"] 54 | self.rewards = config["rewards"] 55 | self.min_reward = config["min_reward"] 56 | 57 | # Pose 58 | self.alternate_pose = config["alternate_pose"] 59 | self.waypoints_meters = config["waypoints_meters"] 60 | self.waypoints_init = config["waypoints_init"] 61 | self.waypoints_target = config["waypoints_target"] 62 | self.waypoints_lane_id = config["waypoints_lane_id"] 63 | self.waypoints_road_id = config["waypoints_road_id"] 64 | 65 | # self.actor_list = [] 66 | # self.collision_hist = [] 67 | -------------------------------------------------------------------------------- /rl_studio/agents/mountain_car/utils.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | 6 | 7 | # TODO Since these utils are algorithm specific, those should stay in the algorithm folder somehow tied to its algorithm class 8 | 9 | 10 | def update_line(axes, runs_rewards): 11 | plot_rewards_per_run(axes, runs_rewards) 12 | plt.draw() 13 | plt.pause(0.01) 14 | 15 | 16 | def get_stats_figure(runs_rewards): 17 | fig, axes = plt.subplots() 18 | fig.set_size_inches(12, 4) 19 | plot_rewards_per_run(axes, runs_rewards) 20 | plt.ion() 21 | plt.show() 22 | return fig, axes 23 | 24 | 25 | def plot_rewards_per_run(axes, runs_rewards): 26 | rewards_graph = pd.DataFrame(runs_rewards) 27 | ax = rewards_graph.plot(ax=axes, title="steps per run") 28 | ax.set_xlabel("runs") 29 | ax.set_ylabel("steps") 30 | ax.legend().set_visible(False) 31 | 32 | 33 | def save_model(qlearn, current_time, states, states_counter, states_rewards): 34 | # Tabular RL: Tabular Q-learning basically stores the policy (Q-values) of the agent into a matrix of shape 35 | # (S x A), where s are all states, a are all the possible actions. After the environment is solved, just save this 36 | # matrix as a csv file. I have a quick implementation of this on my GitHub under Reinforcement Learning. 37 | 38 | # Q TABLE 39 | base_file_name = "_epsilon_{}".format(round(qlearn.epsilon, 3)) 40 | file_dump = open( 41 | f"./logs/mountain_car/1_{current_time}{base_file_name}_QTABLE.pkl", "wb" 42 | ) 43 | pickle.dump(qlearn.q, file_dump) 44 | # STATES COUNTER 45 | states_counter_file_name = base_file_name + "_STATES_COUNTER.pkl" 46 | file_dump = open( 47 | f"./logs/mountain_car/2_{current_time}{states_counter_file_name}", "wb" 48 | ) 49 | pickle.dump(states_counter, file_dump) 50 | # STATES CUMULATED REWARD 51 | states_cum_reward_file_name = base_file_name + "_STATES_CUM_REWARD.pkl" 52 | file_dump = open( 53 | f"./logs/mountain_car/3_{current_time}{states_cum_reward_file_name}", "wb" 54 | ) 55 | pickle.dump(states_rewards, file_dump) 56 | # STATES 57 | steps = base_file_name + "_STATES_STEPS.pkl" 58 | file_dump = open(f"./logs/mountain_car/4_{current_time}{steps}", "wb") 59 | pickle.dump(states, file_dump) 60 | 61 | 62 | def save_actions(actions, start_time): 63 | file_dump = open("./logs/mountain_car/actions_set_" + start_time, "wb") 64 | pickle.dump(actions, file_dump) 65 | 66 | 67 | def render(env, episode): 68 | render_skip = 0 69 | render_interval = 50 70 | render_episodes = 10 71 | 72 | if (episode % render_interval == 0) and (episode != 0) and (episode > render_skip): 73 | env.render() 74 | elif ( 75 | ((episode - render_episodes) % render_interval == 0) 76 | and (episode != 0) 77 | and (episode > render_skip) 78 | and (render_episodes < episode) 79 | ): 80 | env.render(close=True) 81 | -------------------------------------------------------------------------------- /rl_studio/agents/robot_mesh/utils.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | 6 | 7 | # TODO Since these utils are algorithm specific, those should stay in the algorithm folder somehow tied to its algorithm class 8 | 9 | 10 | # TODO Since these utils are algorithm specific, those should stay in the algorithm folder somehow tied to its algorithm class 11 | 12 | 13 | def update_line(axes, runs_rewards): 14 | plot_rewards_per_run(axes, runs_rewards) 15 | plt.draw() 16 | plt.pause(0.01) 17 | 18 | 19 | def get_stats_figure(runs_rewards): 20 | fig, axes = plt.subplots() 21 | fig.set_size_inches(12, 4) 22 | plot_rewards_per_run(axes, runs_rewards) 23 | plt.ion() 24 | plt.show() 25 | return fig, axes 26 | 27 | 28 | def plot_rewards_per_run(axes, runs_rewards): 29 | rewards_graph = pd.DataFrame(runs_rewards) 30 | ax = rewards_graph.plot(ax=axes, title="steps per run") 31 | ax.set_xlabel("runs") 32 | ax.set_ylabel("steps") 33 | ax.legend().set_visible(False) 34 | 35 | 36 | def save_model(qlearn, current_time, states, states_counter, states_rewards): 37 | # Tabular RL: Tabular Q-learning basically stores the policy (Q-values) of the agent into a matrix of shape 38 | # (S x A), where s are all states, a are all the possible actions. After the environment is solved, just save this 39 | # matrix as a csv file. I have a quick implementation of this on my GitHub under Reinforcement Learning. 40 | 41 | # TODO The paths are not relative to the agents folder 42 | # Q TABLE 43 | base_file_name = "_epsilon_{}".format(round(qlearn.epsilon, 3)) 44 | file_dump = open( 45 | "./logs/robot_mesh/1_" + current_time + base_file_name + "_QTABLE.pkl", "wb" 46 | ) 47 | pickle.dump(qlearn.q, file_dump) 48 | # STATES COUNTER 49 | states_counter_file_name = base_file_name + "_STATES_COUNTER.pkl" 50 | file_dump = open( 51 | "./logs/robot_mesh/2_" + current_time + states_counter_file_name, "wb" 52 | ) 53 | pickle.dump(states_counter, file_dump) 54 | # STATES CUMULATED REWARD 55 | states_cum_reward_file_name = base_file_name + "_STATES_CUM_REWARD.pkl" 56 | file_dump = open( 57 | "./logs/robot_mesh/3_" + current_time + states_cum_reward_file_name, "wb" 58 | ) 59 | pickle.dump(states_rewards, file_dump) 60 | # STATES 61 | steps = base_file_name + "_STATES_STEPS.pkl" 62 | file_dump = open("./logs/robot_mesh/4_" + current_time + steps, "wb") 63 | pickle.dump(states, file_dump) 64 | 65 | 66 | def save_actions(actions, start_time): 67 | file_dump = open("./logs/robot_mesh/actions_set_" + start_time, "wb") 68 | pickle.dump(actions, file_dump) 69 | 70 | 71 | def render(env, episode): 72 | render_skip = 0 73 | render_interval = 50 74 | render_episodes = 10 75 | 76 | if (episode % render_interval == 0) and (episode != 0) and (episode > render_skip): 77 | env.render() 78 | elif ( 79 | ((episode - render_episodes) % render_interval == 0) 80 | and (episode != 0) 81 | and (episode > render_skip) 82 | and (render_episodes < episode) 83 | ): 84 | env.render(close=True) 85 | -------------------------------------------------------------------------------- /rl_studio/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | from rl_studio.algorithms.algorithms_type import AlgorithmsType 2 | from rl_studio.algorithms.exceptions import NoValidAlgorithmType 3 | import pickle 4 | 5 | 6 | class TrainerFactory: 7 | def __init__(self, **kwargs): 8 | self.algorithm = kwargs.get("algorithm") 9 | 10 | 11 | class InferencerFactory: 12 | def __new__(cls, config): 13 | 14 | algorithm = config.algorithm 15 | inference_file_name = config.inference_file 16 | 17 | if algorithm == AlgorithmsType.QLEARN.value: 18 | from rl_studio.algorithms.qlearn_multiple_states import QLearn 19 | 20 | actions_file_name = config.actions_file 21 | actions_file = open(actions_file_name, "rb") 22 | actions = pickle.load(actions_file) 23 | 24 | brain = QLearn(config, epsilon=0) 25 | brain.load_model(inference_file_name, actions) 26 | 27 | return brain 28 | 29 | if algorithm == AlgorithmsType.DEPRECATED_QLEARN.value: 30 | from rl_studio.algorithms.qlearn import QLearn 31 | 32 | actions_file_name = config.actions_file 33 | actions_file = open(actions_file_name, "rb") 34 | actions = pickle.load(actions_file) 35 | 36 | brain = QLearn(config, epsilon=0.05) 37 | brain.load_model(inference_file_name, actions) 38 | 39 | return brain 40 | 41 | elif algorithm == AlgorithmsType.DQN.value: 42 | from rl_studio.algorithms.dqn_torch import DQN_Agent 43 | 44 | input_dim = config.env.observation_space.shape[0] 45 | output_dim = config.env.action_space.n 46 | brain = DQN_Agent(layer_sizes=[input_dim, 64, output_dim]) 47 | brain.load_model(inference_file_name) 48 | 49 | return brain 50 | 51 | elif algorithm == AlgorithmsType.PPO.value: 52 | from rl_studio.algorithms.ppo import Actor 53 | from rl_studio.algorithms.ppo import Mish 54 | 55 | input_dim = config.env.observation_space.shape[0] 56 | output_dim = config.env.action_space.n 57 | brain = Actor(input_dim, output_dim, activation=Mish) 58 | brain.load_model(inference_file_name) 59 | 60 | return brain 61 | 62 | elif algorithm == AlgorithmsType.PPO_CONTINIUOUS.value: 63 | from rl_studio.algorithms.ppo_continuous import PPO 64 | 65 | input_dim = config.env.observation_space.shape[0] 66 | output_dim = config.env.action_space.shape[0] 67 | 68 | brain = PPO(input_dim, output_dim, None, None, None, None, None, 69 | True, None) 70 | brain.load(inference_file_name) 71 | 72 | return brain 73 | 74 | elif algorithm == AlgorithmsType.DDPG.value: 75 | from rl_studio.algorithms.ddpg_torch import Actor 76 | 77 | brain = Actor() 78 | brain.load_model(inference_file_name) 79 | 80 | return brain 81 | # elif algorithm == AlgorithmsType.DQN.value: 82 | # from rl_studio.algorithms.dqn import DeepQ 83 | # 84 | # return DeepQ(config) 85 | 86 | else: 87 | raise NoValidAlgorithmType(algorithm) 88 | -------------------------------------------------------------------------------- /rl_studio/agents/cartpole/README.md: -------------------------------------------------------------------------------- 1 | # CARTPOLE 2 | 3 | TO know about the provided env by openAI refer to the [following url](https://www.gymlibrary.dev/environments/classic_control/cart_pole/) 4 | 5 | ## ENVIRONMENT MODIFICATIONS 6 | 7 | In RL-Studio, Cartpole environment has been tuned to permit some fine grained experiments regarding the: 8 | - solidity of different algorithms when the difficulty of the problem arises 9 | - viability of the algorithms regarding the inference cycle iteration time 10 | - the performance improvement of implementing continuous actions with respect to discrete actions 11 | 12 | The modifications are made in the environments cartpole_env_improved.py and cartpole_env_continuous_improved.py. 13 | 14 | Use the following parameters in the section environments of the config.yaml in order to: 15 | ### configure the desired environment, 16 | 17 | - To use the cartpole_env_continuous_improved.py 18 | - env_name: myCartpole-continuous-v1 19 | - environment_folder: cartpole 20 | - To use the cartpole_env_improved.py 21 | - env_name: myCartpole-v1 22 | - environment_folder: cartpole 23 | 24 | ### configure the difficulty of the environment, use the following parameters in the config.yaml: 25 | 26 | - random_start_level: Indicates the standard deviation in radians of the initial pole angle 27 | - random_perturbations_level: Number between 0 and 1 that indicates the frequency of the random perturbations. 28 | A perturbation of 0.1 indicates that a perturbation will be added each 10 steps. 29 | - perturbations_intensity_std: Number between 0 and 1 that indicates the standard deviation of perturbations intensity. 30 | A perturbations_instensity_std of 1 indicates that each perturbation instensity will have a standard deviation equal to 31 | the intensity of the action applied to the pole. 32 | - initial_pole_angle: Indicates a fixed initial pole angle in radians 33 | 34 | ### configure an automatic launching of several inference experiments with different problem configurations 35 | 36 | - settings: mode: inference 37 | - experiments: Number of experiments for each of the different perturbations configurations to run. 38 | A value of 20 indicates that 20 experiments of initial_pole, 20 of perturbation intensity and 20 of perturbation 39 | frequency will be run. 40 | - random_perturbations_level_step: step between one experiment and the following regarding the random_perturbation_level. 41 | A value of 0.1 means that the first experiment will be run with 0.1 random_perturbation_level, the second one with 0.2, the third one 42 | with 0.3... 43 | perturbations_intensity_std_step: step between one experiment and the following regarding the perturbations_intensity_std. 44 | A value of 0.1 means that the first experiment will be run with 0.1 perturbations_intensity_std, the second one with 0.2, the third one 45 | with 0.3... 46 | initial_pole_angle_steps: step between one experiment and the following regarding the initial_pole_angle. 47 | A value of 0.1 means that the first experiment will be run with 0.1 initial_pole_angle, the second one with 0.2, the third one 48 | with 0.3... 49 | 50 | Note that all experiments results and logs will be stored in the logs///inference folder. -------------------------------------------------------------------------------- /rl_studio/scripts/benchmark_runner: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Run all the tasks on a benchmark using a random agent. 4 | # 5 | # This script assumes you have set an OPENAI_GYM_API_KEY environment 6 | # variable. You can find your API key in the web interface: 7 | # https://gym.openai.com/settings/profile. 8 | # 9 | import argparse 10 | import logging 11 | import sys 12 | 13 | import gym 14 | # In modules, use `logger = logging.getLogger(__name__)` 15 | from gym import wrappers 16 | from gym.scoreboard.scoring import benchmark_score_from_local 17 | 18 | logger = logging.getLogger() 19 | 20 | 21 | def main(): 22 | parser = argparse.ArgumentParser(description=None) 23 | parser.add_argument( 24 | "-b", "--benchmark-id", help="id of benchmark to run e.g. Atari7Ram-v0" 25 | ) 26 | parser.add_argument( 27 | "-v", 28 | "--verbose", 29 | action="count", 30 | dest="verbosity", 31 | default=0, 32 | help="Set verbosity.", 33 | ) 34 | parser.add_argument( 35 | "-f", "--force", action="store_true", dest="force", default=False 36 | ) 37 | parser.add_argument( 38 | "-t", 39 | "--training-dir", 40 | default="/tmp/gym-results", 41 | help="What directory to upload.", 42 | ) 43 | args = parser.parse_args() 44 | 45 | if args.verbosity == 0: 46 | logger.setLevel(logging.INFO) 47 | elif args.verbosity >= 1: 48 | logger.setLevel(logging.DEBUG) 49 | 50 | benchmark_id = args.benchmark_id 51 | if benchmark_id is None: 52 | logger.info("Must supply a valid benchmark") 53 | return 1 54 | 55 | try: 56 | benchmark = gym.benchmark_spec(benchmark_id) 57 | except Exception: 58 | logger.info("Invalid benchmark") 59 | return 1 60 | 61 | # run benchmark tasks 62 | for task in benchmark.tasks: 63 | logger.info("Running on env: {}".format(task.env_id)) 64 | for trial in range(task.trials): 65 | env = gym.make(task.env_id) 66 | training_dir_name = "{}/{}-{}".format(args.training_dir, task.env_id, trial) 67 | env = wrappers.Monitor( 68 | env, training_dir_name, video_callable=False, force=args.force 69 | ) 70 | env.reset() 71 | for _ in range(task.max_timesteps): 72 | o, r, done, _ = env.step(env.action_space.sample()) 73 | if done: 74 | env.reset() 75 | env.close() 76 | 77 | logger.info( 78 | """Computing statistics for this benchmark run... 79 | {{ 80 | score: {score}, 81 | num_envs_solved: {num_envs_solved}, 82 | summed_training_seconds: {summed_training_seconds}, 83 | start_to_finish_seconds: {start_to_finish_seconds}, 84 | }} 85 | 86 | """.rstrip().format( 87 | **benchmark_score_from_local(benchmark_id, args.training_dir) 88 | ) 89 | ) 90 | 91 | logger.info( 92 | """Done running, upload results using the following command: 93 | 94 | python -c "import gym; gym.upload('{}', benchmark_id='{}', algorithm_id='(unknown)')" 95 | 96 | """.rstrip().format( 97 | args.training_dir, benchmark_id 98 | ) 99 | ) 100 | 101 | return 0 102 | 103 | 104 | if __name__ == "__main__": 105 | sys.exit(main()) 106 | -------------------------------------------------------------------------------- /rl_studio/config/config_training_followline_qlearn_f1_gazebo.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | mode: training # training, retraining 3 | task: follow_line_gazebo # follow_line_gazebo 4 | algorithm: qlearn # qlearn 5 | simulator: gazebo # openai, carla, gazebo, sumo 6 | environment_set: gazebo_environments # gazebo_environments, carla_environments 7 | env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking 8 | agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot 9 | actions: simple # simple, medium, hard, test, autoparking_simple 10 | states: sp1 # sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points) 11 | rewards: followline_center # 12 | framework: _ 13 | total_episodes: 10 14 | training_time: 10 15 | models_dir: "./checkpoints" 16 | logs_dir: "./logs" 17 | metrics_dir: "./metrics" 18 | 19 | ros: 20 | ros_master_uri: "11311" 21 | gazebo_master_uri: "11345" 22 | 23 | retraining: 24 | qlearn: 25 | retrain_qlearn_model_name: "20230105-174932_Circuit-simple_States-sp1_Actions-simple_Rewards-followline_center_epsilon-0.05_epoch-10_step-15001_reward-134200-qtable.npy" 26 | 27 | inference: 28 | qlearn: 29 | inference_qlearn_model_name: 30 | 31 | algorithm: 32 | qlearn: 33 | alpha: 0.2 34 | epsilon: 0.95 35 | epsilon_min: 0.05 36 | gamma: 0.9 37 | 38 | agents: 39 | f1: 40 | camera_params: 41 | width: 640 42 | height: 480 43 | center_image: 320 44 | raw_image: False 45 | image_resizing: 100 46 | new_image_size: 32 47 | num_regions: 16 48 | lower_limit: 220 49 | 50 | states: 51 | sp1: 52 | 0: [10] 53 | sp3: 54 | 0: [5, 15, 22] 55 | sp5: 56 | 0: [3, 5, 10, 15, 20] 57 | spn: 58 | 0: [10] 59 | 60 | actions: 61 | simple: 62 | 0: [3, 0] 63 | 1: [2, 1] 64 | 2: [2, -1] 65 | medium: 66 | 0: [3, 0] 67 | 1: [2, 1] 68 | 2: [2, -1] 69 | 3: [1, 1.5] 70 | 4: [1, -1.5] 71 | hard: 72 | 0: [3, 0] 73 | 1: [2, 1] 74 | 2: [2, -1] 75 | 3: [1.5, 1] 76 | 4: [1.5, -1] 77 | 5: [1, -1.5] 78 | 6: [1, -1.5] 79 | test: 80 | 0: [0, 0] 81 | 82 | rewards: 83 | followline_center: 84 | from_10: 10 85 | from_02: 2 86 | from_01: 1 87 | penal: -100 88 | min_reward: 5_000 89 | highest_reward: 100 90 | 91 | gazebo_environments: 92 | simple: 93 | env_name: F1Env-v0 94 | circuit_name: simple 95 | launchfile: simple_circuit.launch 96 | environment_folder: f1 97 | robot_name: f1_renault 98 | model_state_name: f1_renault # 99 | start_pose: 0 # 0, 1, 2, 3, 4 100 | alternate_pose: False 101 | estimated_steps: 15000 102 | sensor: camera 103 | save_episodes: 1 104 | save_every_step: 1000 105 | lap_completed: False 106 | save_model: True 107 | save_positions: True 108 | debug_level: DEBUG 109 | telemetry: False 110 | telemetry_mask: False 111 | plotter_graphic: False 112 | circuit_positions_set: 113 | 0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57] 114 | 1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57] #finish line 115 | 2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56] 116 | 3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613] 117 | 4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383] 118 | -------------------------------------------------------------------------------- /rl_studio/agents/f1/utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import pickle 4 | 5 | import cv2 6 | import numpy as np 7 | import pandas as pd 8 | 9 | from rl_studio.agents.f1 import settings 10 | 11 | 12 | # TODO Since these utils are algorithm specific, those should stay in the algorithm folder somehow tied to its algorithm class 13 | 14 | 15 | def load_model(params, qlearn, file_name): 16 | 17 | qlearn_file = open("./logs/qlearn_models/" + file_name) 18 | model = pickle.load(qlearn_file) 19 | 20 | qlearn.q = model 21 | qlearn.ALPHA = params.algorithm["params"]["alpha"] 22 | qlearn.GAMMA = params.algorithm["params"]["epsilon"] 23 | qlearn.epsilon = params.algorithm["params"]["gamma"] 24 | 25 | # highest_reward = settings.algorithm_params["highest_reward"] 26 | 27 | print(f"\n\nMODEL LOADED. Number of (action, state): {len(model)}") 28 | print(f" - Loading: {file_name}") 29 | print(f" - Model size: {len(qlearn.q)}") 30 | print(f" - Action set: {settings.actions_set}") 31 | print(f" - Epsilon: {qlearn.epsilon}") 32 | print(f" - Start: {datetime.datetime.now()}") 33 | 34 | 35 | def save_model(qlearn, current_time, states, states_counter, states_rewards): 36 | # Tabular RL: Tabular Q-learning basically stores the policy (Q-values) of the agent into a matrix of shape 37 | # (S x A), where s are all states, a are all the possible actions. After the environment is solved, just save this 38 | # matrix as a csv file. I have a quick implementation of this on my GitHub under Reinforcement Learning. 39 | 40 | # Q TABLE 41 | base_file_name = "_act_set_{}_epsilon_{}".format( 42 | settings.qlearn.actions_set, round(qlearn.epsilon, 2) 43 | ) 44 | file_dump = open( 45 | f"./logs/qlearn_models/1_{current_time}{base_file_name}_QTABLE.pkl", "wb" 46 | ) 47 | pickle.dump(qlearn.q, file_dump) 48 | # STATES COUNTER 49 | states_counter_file_name = base_file_name + "_STATES_COUNTER.pkl" 50 | file_dump = open( 51 | f"./logs/qlearn_models/2_{current_time}{states_counter_file_name}", "wb" 52 | ) 53 | pickle.dump(states_counter, file_dump) 54 | # STATES CUMULATED REWARD 55 | states_cum_reward_file_name = base_file_name + "_STATES_CUM_REWARD.pkl" 56 | file_dump = open( 57 | f"./logs/qlearn_models/3_{current_time}{states_cum_reward_file_name}", "wb" 58 | ) 59 | pickle.dump(states_rewards, file_dump) 60 | # STATES 61 | steps = base_file_name + "_STATES_STEPS.pkl" 62 | file_dump = open(f"./logs/qlearn_models/4_{current_time}{steps}", "wb") 63 | pickle.dump(states, file_dump) 64 | 65 | 66 | def save_times(checkpoints): 67 | file_name = "actions_" 68 | file_dump = open( 69 | f"/logs/{file_name}{settings.qlearn.actions_set}_checkpoints.pkl", "wb" 70 | ) 71 | pickle.dump(checkpoints, file_dump) 72 | 73 | 74 | def save_actions(actions, current_time): 75 | file_dump = open("./logs/qlearn_models/actions_set_" + current_time, "wb") 76 | pickle.dump(actions, file_dump) 77 | 78 | 79 | def render(env, episode): 80 | render_skip = 0 81 | render_interval = 50 82 | render_episodes = 10 83 | 84 | if (episode % render_interval == 0) and (episode != 0) and (episode > render_skip): 85 | env.render() 86 | elif ( 87 | ((episode - render_episodes) % render_interval == 0) 88 | and (episode != 0) 89 | and (episode > render_skip) 90 | and (render_episodes < episode) 91 | ): 92 | env.render(close=True) 93 | -------------------------------------------------------------------------------- /rl_studio/config/config_inference_followline_qlearn_f1_gazebo.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | mode: inference # training, retraining, inference 3 | task: follow_line_gazebo # follow_line_gazebo, follow_lane_gazebo, autoparking_gazebo 4 | algorithm: qlearn # qlearn, dqn, ddpg, ppo 5 | simulator: gazebo # openai, carla, gazebo, sumo 6 | environment_set: gazebo_environments # gazebo_environments, carla_environments 7 | env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking 8 | agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot 9 | actions: simple # simple, medium, hard, test 10 | states: sp1 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points) 11 | rewards: followline_center # rewards_followline_center 12 | framework: _ 13 | total_episodes: 5 14 | training_time: 6 15 | models_dir: "./checkpoints" 16 | logs_dir: "./logs" 17 | metrics_dir: "./metrics" 18 | 19 | ros: 20 | ros_master_uri: "11311" 21 | gazebo_master_uri: "11345" 22 | 23 | retraining: 24 | qlearn: 25 | retrain_qlearn_model_name: 26 | 27 | inference: 28 | qlearn: 29 | inference_qlearn_model_name: "20230105-174932_Circuit-simple_States-sp1_Actions-simple_Rewards-followline_center_epsilon-0.05_epoch-10_step-15001_reward-134200-qtable.npy" 30 | 31 | algorithm: 32 | qlearn: 33 | alpha: 0.2 34 | epsilon: 0.95 35 | epsilon_min: 0.05 36 | gamma: 0.9 37 | 38 | agents: 39 | f1: 40 | camera_params: 41 | width: 640 42 | height: 480 43 | center_image: 320 44 | raw_image: False 45 | image_resizing: 100 46 | new_image_size: 32 47 | num_regions: 16 48 | lower_limit: 220 49 | states: 50 | sp1: 51 | 0: [10] 52 | sp3: 53 | 0: [5, 15, 22] 54 | sp5: 55 | 0: [3, 5, 10, 15, 20] 56 | spn: 57 | 0: [10] 58 | 59 | actions: 60 | simple: 61 | 0: [3, 0] 62 | 1: [2, 1] 63 | 2: [2, -1] 64 | medium: 65 | 0: [3, 0] 66 | 1: [2, 1] 67 | 2: [2, -1] 68 | 3: [1, 1.5] 69 | 4: [1, -1.5] 70 | hard: 71 | 0: [3, 0] 72 | 1: [2, 1] 73 | 2: [2, -1] 74 | 3: [1.5, 1] 75 | 4: [1.5, -1] 76 | 5: [1, -1.5] 77 | 6: [1, -1.5] 78 | test: 79 | 0: [0, 0] 80 | 81 | rewards: 82 | followline_center: 83 | from_10: 10 84 | from_02: 2 85 | from_01: 1 86 | penal: -100 87 | min_reward: 5_000 88 | highest_reward: 100 89 | 90 | gazebo_environments: 91 | simple: 92 | env_name: F1Env-v0 93 | circuit_name: simple 94 | launchfile: simple_circuit.launch 95 | environment_folder: f1 96 | robot_name: f1_renault 97 | model_state_name: f1_renault # autoparking 98 | start_pose: 0 # 0, 1, 2, 3, 4 99 | alternate_pose: False 100 | estimated_steps: 100 101 | sensor: camera 102 | save_episodes: 5 103 | save_every_step: 10 104 | lap_completed: False 105 | save_model: True 106 | save_positions: True 107 | debug_level: DEBUG 108 | telemetry: False 109 | telemetry_mask: False 110 | plotter_graphic: False 111 | circuit_positions_set: 112 | 0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57] 113 | 1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57] 114 | 2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56] 115 | 3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613] 116 | 4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383] 117 | -------------------------------------------------------------------------------- /rl_studio/installation/gazebo_ros_noetic.repos: -------------------------------------------------------------------------------- 1 | repositories: 2 | # ar_track_alvar: 3 | # type: git 4 | # url: https://github.com/ros-perception/ar_track_alvar 5 | # version: melodic-devel 6 | catkin_simple: 7 | type: git 8 | url: https://github.com/catkin/catkin_simple.git 9 | version: master 10 | control_toolbox: 11 | type: git 12 | url: https://github.com/ros-controls/control_toolbox.git 13 | version: melodic-devel 14 | ecl_core: 15 | type: git 16 | url: https://github.com/stonier/ecl_core 17 | version: release/0.62-noetic 18 | ecl_lite: 19 | type: git 20 | url: https://github.com/stonier/ecl_lite 21 | version: release/0.61-noetic 22 | ecl_navigation: 23 | type: git 24 | url: https://github.com/stonier/ecl_navigation 25 | version: release/0.60-melodic 26 | ecl_tools: 27 | type: git 28 | url: https://github.com/stonier/ecl_tools 29 | version: release/0.61-noetic 30 | driver_base: 31 | type: git 32 | url: https://github.com/ros-drivers/driver_common.git 33 | version: indigo-devel 34 | gazebo_ros_pkgs: 35 | type: git 36 | url: https://github.com/ros-simulation/gazebo_ros_pkgs 37 | version: noetic-devel 38 | hector_gazebo: 39 | type: git 40 | url: https://github.com/tu-darmstadt-ros-pkg/hector_gazebo/ 41 | version: melodic-devel 42 | image_common: 43 | type: git 44 | url: https://github.com/ros-perception/image_common.git 45 | version: noetic-devel 46 | joystick_drivers: 47 | type: git 48 | url: https://github.com/ros-drivers/joystick_drivers.git 49 | version: master 50 | # kobuki: 51 | # type: git 52 | # url: https://github.com/yujinrobot/kobuki 53 | # version: melodic 54 | # kobuki_core: 55 | # type: git 56 | # url: https://github.com/yujinrobot/kobuki_core 57 | # version: melodic 58 | # kobuki_desktop: 59 | # type: git 60 | # url: https://github.com/yujinrobot/kobuki_desktop 61 | # version: melodic 62 | # kobuki_msgs: 63 | # type: git 64 | # url: https://github.com/yujinrobot/kobuki_msgs 65 | # version: release/0.7-melodic 66 | navigation: 67 | type: git 68 | url: https://github.com/ros-planning/navigation 69 | version: noetic-devel 70 | pcl_ros: 71 | type: git 72 | url: https://github.com/ros-perception/perception_pcl.git 73 | version: melodic-devel 74 | realtime_tools: 75 | type: git 76 | url: https://github.com/ros-controls/realtime_tools 77 | version: noetic-devel 78 | ros_control: 79 | type: git 80 | url: https://github.com/ros-controls/ros_control 81 | version: noetic-devel 82 | roslint: 83 | type: git 84 | url: https://github.com/ros/roslint 85 | version: master 86 | turtlebot: 87 | type: git 88 | url: https://github.com/turtlebot/turtlebot 89 | version: melodic 90 | turtlebot_create: 91 | type: git 92 | url: https://github.com/turtlebot/turtlebot_create 93 | version: indigo 94 | turtlebot_simulator: 95 | type: git 96 | url: https://github.com/turtlebot/turtlebot_simulator 97 | version: melodic 98 | xacro: 99 | type: git 100 | url: https://github.com/ros/xacro 101 | version: noetic-devel 102 | yocs_msgs: 103 | type: git 104 | url: https://github.com/yujinrobot/yocs_msgs 105 | version: release/0.7-melodic 106 | # yujin_ocs: 107 | # type: git 108 | # url: https://github.com/yujinrobot/yujin_ocs 109 | # version: release/0.8-melodic 110 | -------------------------------------------------------------------------------- /CODING.md: -------------------------------------------------------------------------------- 1 | # CODING STYLE 2 | 3 | If you are contributing to RL-Studio tool development, please follow the below coding style and structure of directories, files recommendations: 4 | 5 | ## SOME RULES 6 | 7 | - [PEP 8](https://peps.python.org/pep-0008/) style guide for Python Code 8 | - [Black](https://github.com/psf/black) format 9 | - [Pylint](https://pypi.org/project/pylint/) as static code analyser 10 | - Constants variable must be upper-case (e.g `TIME_THRESHOLD`, `NUMBER_OF_RUNS`) 11 | - Comment all non trivial functions to ease readability 12 | - All the internal imported packages must be imported from the root of the project (e.g `import rl_studio.agents` instead `import agents`) 13 | - Organize imports before pushing your code to the repo 14 | 15 | - When creating a project, please keep in mind: 16 | 17 | - in **/agents** directory, files names should be `mode_task_algorithm_simulator_framework.py`, i.e. `trainer_followline_ddpg_F1_gazebo_tf.py` or `inferencer_mountaincar_qlearn_openai_pytorch.py`. In case of not using framework leave it blank. 18 | - in **/envs/gazebo/f1/models** directory, files names should be `task_algorithm_framework.py`, i.e. `followline_ddpg_gazebo_tf.py` or `followlane_qlearn_pytorch.py`. In case of not using framework leave it blank. 19 | - As a general rule, **classes names** have to follow convention `ModeTaskAlgorithmAgentSimuladorFramework`, i.e. `TrainerFollowLaneDDPGF1GazeboPytorch` or `InferencerFollowLaneDQNF1GazeboTF` 20 | - in **/envs/gazebo** directory, classes names follow rule `TaskAlgorithmAgentSimulatorFramework`, i.e. `FollowlineDDPGF1GazeboTF`. 21 | 22 | # Directory architecture 23 | 24 | ## Config files 25 | 26 | - in **/config** directory add a configuration file with the following format `config_mode_task_algorithm_agent_simulator.yaml`, i.e. `config_training_followlane_qlearn_F1_carla.yaml` 27 | 28 | ## Models 29 | 30 | - Add a trained brain in **/checkpoints** folder. You can configure it in the config.yaml file. Automatically the app will add a directory with the format `task_algorithm_agent_simulator_framework` where to save models. 31 | - The file model should have the format `timestamp_maxreward_epoch_ADITIONALTEXT.h5` in format h5 i.e. `09122002_max45678_epoch435_actor_conv2d32x64_critic_conv2d32x64_actionsCont_stateImg_rewardDiscrete.h5` to indicate the main features of the model saved in order to easily find the exact model. 32 | 33 | ## Metrics 34 | 35 | - In **/metrics** folder should be saved statistics and metrics of models. You can configure it in the config.yaml file. Automatically the app will add a directory with the format `mode/task_algorithm_agent_simulator_framework/data` where to save data. 36 | 37 | ## Graphics 38 | 39 | - In **/metrics** folder should be saved graphics of models. You can configure it in the config.yaml file. Automatically the app will add a directory with the format `mode/task_algorithm_agent_simulator_framework/graphics` where to save graphics. 40 | 41 | ## Logs and TensorBoard files 42 | 43 | - In **/logs** folder should be saved TensorBoard and logs files. You can configure it in the config.yaml file. 44 | For TensorBoard, automatically the app will add a directory with the format `mode/task_algorithm_agent_simulator_framework/TensorBoard`. 45 | 46 | For logs, the app automatically will add a directory with the format `mode/task_algorithm_agent_simulator_framework/logs`. 47 | 48 | # TIPS 49 | 50 | - You can refer to "mountain_car qlearning" project as an example of how to implement a real time monitoring 51 | - You can refer to "cartpole dqn" project as an example of how to implement logging 52 | -------------------------------------------------------------------------------- /rl_studio/rl-studio-tuning-qlearn-followlane.py: -------------------------------------------------------------------------------- 1 | 2 | import optuna 3 | from rl_studio.agents import TrainerFactory, InferencerFactory 4 | 5 | 6 | def objective(trial): 7 | 8 | config_file = {'settings': {'mode': 'training', 'task': 'follow_lane_gazebo', 9 | 'algorithm': 'qlearn', 'simulator': 'gazebo', 'environment_set': 'gazebo_environments', 10 | 'env': 'simple', 'agent': 'f1', 'actions': 'simple', 'states': 'sp1', 11 | 'rewards': 'follow_right_lane_only_center', 'framework': '_', 12 | 'total_episodes': 5, 'training_time': 6, 'models_dir': './checkpoints', 'logs_dir': './logs', 13 | 'metrics_dir': './metrics'}, 'ros': {'ros_master_uri': '11311', 'gazebo_master_uri': '11345'}, 14 | 'retraining': {'qlearn': {'retrain_qlearn_model_name':None}}, 15 | 'inference': {'qlearn': {'inference_qlearn_model_name': None}}, 16 | 'algorithm': {'qlearn': {'alpha': 0.2, 'epsilon': 0.95, 'epsilon_min': 0.05, 'gamma': 0.9}}, 17 | 'agents': {'f1': {'camera_params': {'width': 640, 'height': 480, 'center_image': 320, 'raw_image': False, 'image_resizing': 100, 18 | 'new_image_size': 32, 'num_regions': 16, 'lower_limit': 220}}}, 'states': {'image': {0: [3]}, 'sp1': {0: [10]}, 'sp3': {0: [5, 15, 22]}, 19 | 'sp5': {0: [3, 5, 10, 15, 20]}, 'spn': {0: [10]}}, 'actions': {'simple': {0: [3, 0], 1: [2, 1], 2: [2, -1]}, 20 | 'medium': {0: [3, 0], 1: [2, 1], 2: [2, -1], 3: [1, 1.5], 4: [1, -1.5]}, 21 | 'hard': {0: [3, 0], 1: [2, 1], 2: [2, -1], 3: [1.5, 1], 4: [1.5, -1], 5: [1, -1.5], 6: [1, -1.5]}, 22 | 'test': {0: [0, 0]}}, 23 | 'rewards': {'follow_right_lane_only_center': {'from_10': 10, 'from_02': 2, 'from_01': 1, 'penal': -100, 'min_reward': 5000, 'highest_reward': 100}, 24 | 'follow_right_lane_center_v_step': {'from_10': 10, 'from_02': 2, 'from_01': 1, 'penal': -100, 'min_reward': 5000, 'highest_reward': 100}, 25 | 'follow_right_lane_center_v_w_linear': {'beta_0': 3, 'beta_1': -0.1, 'penal': 0, 'min_reward': 1000, 'highest_reward': 100}}, 26 | 'gazebo_environments': {'simple': {'env_name': 'F1Env-v0', 'circuit_name': 'simple', 'launchfile': 'simple_circuit.launch', 27 | 'environment_folder': 'f1', 'robot_name': 'f1_renault', 'model_state_name': 'f1_renault', 'start_pose': 0, 'alternate_pose': False, 28 | 'estimated_steps': 100, 'sensor': 'camera', 'save_episodes': 5, 'save_every_step': 10, 'lap_completed': False, 'save_model': True, 29 | 'save_positions': True, 'debug_level': 'DEBUG', 'telemetry': False, 'telemetry_mask': False, 'plotter_graphic': False, 30 | 'circuit_positions_set': {0: [52.8, -12.734, 0.004, 0, 0, 1.57, -1.57], 1: [52.97, -42.06, 0.004, 0, 0, 1.57, -1.57], 31 | 2: [40.2, -30.741, 0.004, 0, 0, 1.56, 1.56], 3: [0, 31.15, 0.004, 0, 0.01, 0, 0.31], 4: [19.25, 43.5, 0.004, 0, 0.0, 1.57, -1.69], 32 | 5: [52.8, -35.486, 0.004, 0, 0, 1.57, -1.57]}}}} 33 | 34 | config_file['algorithm']['qlearn']['alpha'] = trial.suggest_float('alpha', 0.1, 1, log=True) 35 | config_file['algorithm']['qlearn']['gamma'] = trial.suggest_float('gamma', 0.1, 1, log=True) 36 | 37 | print(f"config_file: {config_file}") 38 | 39 | if config_file["settings"]["mode"] == "inference": 40 | inferencer = InferencerFactory(config_file) 41 | inferencer.main() 42 | else: 43 | trainer = TrainerFactory(config_file) 44 | trainer.main() 45 | 46 | 47 | if __name__ == "__main__": 48 | 49 | study = optuna.create_study(direction='maximize') 50 | study.optimize(objective) 51 | trial = study.best_trial 52 | 53 | print(f"Accuracy = {trial.value}") 54 | print(f"best hypers = {trial.params}") -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Reinforcement Learning Studio 2 | 3 | Thanks for your interest on contributing! 4 | 5 | This file contains a set of rules to contributing to the project and the 6 | rest of the projects developed by JdeRobot. 7 | If you have any doubt about how to contribute contact one of the maintainers 8 | of the project. They will be pleased to tell you how you can contribute with your 9 | knowledge to the project and organization! 10 | 11 | * [Code of conduct](#code-of-conduct) 12 | * [Prerequisites before contributing](#prerequisites-before-contributing) 13 | * [How can I contribute?](#how-can-i-contribute) 14 | 15 | 16 | 17 | ## Code of conduct 18 | Please report any unacceptable behavior to any of [the maintainers](#i-have-a-question). 19 | 20 | 21 | ## Prerequisites before contributing 22 | In order to contribute to JdeRobot projects, please read carefully the project README.md/[webpage](https://github.com/JdeRobot/RL-Studio) before 23 | starting contributing to understand the purpose of the project and where you can contribute. 24 | 25 | 26 | ## How can I contribute? 27 | Any JdeRobot project follows the same workflow when contributing. 28 | 29 | * **Find a problem or possible improvement for the project:** First of all, check that the feature/bug is not listed in the current [open issues](https://github.com/JdeRobot/RL-Studio/issues). 30 | 31 | * **Create an issue:** [Create an issue](https://github.com/JdeRobot/RL-Studio/issues/new) in the project with the problem/improvement you will 32 | address. In this issue, explain carefully what you will be updating and how this changes will impact the project. 33 | Provide any complementary information to explain it (code samples, screenshots ...). You should information about: 34 | * Expected behavior 35 | * Actual behavior 36 | * Steps to reproduce 37 | * Environment 38 | * Possible cause 39 | * Possible solution 40 | 41 | The two following points are different depending on the permissions you have to the repo. 42 | * **[If you have write permission] Work in a separate branch always:** Create a new branch with a describable name (you can use the issue number as branch name "issue_xxx"). Create your commits in that branch making the needed changes. Please, use describable names as commit messages, so everyone can understand easily the changes you made. 43 | 44 | * **[If you only have read permission] Fork the project:** Fork the project. Work on that copy of the repo, making the desirable changes. Please, use describable names as commit messages, so everyone can understand easily the changes you made. 45 | 46 | * **Open a pull request:** A pull request is compulsory any time a new change wants to be added to the core or the project. After solving the issue, create a pull request with your branch. In this pull request include all the commits made, write a good description of the changes made and refer to the issue solved to make things easier to the maintainers. Include any additional resource that would be interesting (references, screenshots...). Link the PR with the issue. 47 | 48 | * **Testing and merging pull requests** 49 | One of maintainers will review your code. Reviewer could ask you to modify your pull request. 50 | Please provide timely response for reviewers (within weeks, not months), otherwise you submission could be postponed or even rejected. 51 | 52 | * **[If you have write permission] Don't accept your own pull requests:** Wait for a project maintainer to accept the changes you made. They will probably comment the pull request with some feedback and will consider if it can be merge to the master branch. Be proactive and kind! 53 | 54 | -------------------------------------------------------------------------------- /rl_studio/config/config_inference_followline_dqn_f1_gazebo.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | mode: inference 3 | task: follow_line_gazebo # follow_line_gazebo, follow_lane_gazebo, autoparking_gazebo 4 | algorithm: dqn # qlearn, dqn, ddpg, ppo 5 | simulator: gazebo # openai, carla, gazebo, sumo 6 | environment_set: gazebo_environments # gazebo_environments, carla_environments 7 | env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking 8 | agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot 9 | actions: simple # simple, medium, hard, test 10 | states: sp1 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points) 11 | rewards: followline_center # rewards_followline_center 12 | framework: TensorFlow # TensorFlow, Pytorch 13 | total_episodes: 5 14 | training_time: 6 15 | models_dir: "./checkpoints" 16 | logs_dir: "./logs" 17 | metrics_dir: "./metrics" 18 | 19 | ros: 20 | ros_master_uri: "11311" 21 | gazebo_master_uri: "11345" 22 | 23 | retraining: 24 | dqn: 25 | retrain_dqn_tf_model_name: 26 | 27 | inference: 28 | dqn: 29 | inference_dqn_tf_model_name: "DQN_sp_16x16_LAPCOMPLETED_Max990_Epoch3_inTime20230110-180135.model" 30 | 31 | algorithm: 32 | dqn: 33 | alpha: 0.8 34 | gamma: 0.9 35 | epsilon: 0.99 36 | epsilon_discount: 0.9986 37 | epsilon_min: 0.05 38 | model_name: DQN_sp_16x16 39 | replay_memory_size: 50_000 40 | min_replay_memory_size: 1000 41 | minibatch_size: 64 42 | update_target_every: 5 43 | memory_fraction: 0.20 44 | buffer_capacity: 100_000 45 | batch_size: 64 46 | 47 | agents: 48 | f1: 49 | camera_params: 50 | width: 640 51 | height: 480 52 | center_image: 320 53 | raw_image: False 54 | image_resizing: 100 55 | new_image_size: 32 56 | num_regions: 16 57 | lower_limit: 220 58 | 59 | states: 60 | image: 61 | 0: [3] 62 | sp1: 63 | 0: [10] 64 | sp3: 65 | 0: [5, 15, 22] 66 | sp5: 67 | 0: [3, 5, 10, 15, 20] 68 | spn: 69 | 0: [10] 70 | 71 | actions: 72 | simple: 73 | 0: [3, 0] 74 | 1: [2, 1] 75 | 2: [2, -1] 76 | medium: 77 | 0: [3, 0] 78 | 1: [2, 1] 79 | 2: [2, -1] 80 | 3: [1, 1.5] 81 | 4: [1, -1.5] 82 | hard: 83 | 0: [3, 0] 84 | 1: [2, 1] 85 | 2: [2, -1] 86 | 3: [1.5, 1] 87 | 4: [1.5, -1] 88 | 5: [1, -1.5] 89 | 6: [1, -1.5] 90 | test: 91 | 0: [0, 0] 92 | 93 | rewards: 94 | followline_center: 95 | from_10: 10 96 | from_02: 2 97 | from_01: 1 98 | penal: -100 99 | min_reward: 5_000 100 | highest_reward: 100 101 | 102 | gazebo_environments: 103 | simple: 104 | env_name: F1Env-v0 105 | circuit_name: simple 106 | launchfile: simple_circuit.launch 107 | environment_folder: f1 108 | robot_name: f1_renault 109 | model_state_name: f1_renault # f1_renault_multicamera_multilaser 110 | start_pose: 0 # 0, 1, 2, 3, 4 111 | alternate_pose: False 112 | estimated_steps: 50 113 | sensor: camera 114 | save_episodes: 5 115 | save_every_step: 10 116 | lap_completed: False 117 | save_model: True 118 | save_positions: True 119 | debug_level: DEBUG 120 | telemetry: False 121 | telemetry_mask: False 122 | plotter_graphic: False 123 | circuit_positions_set: 124 | 0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57] 125 | 1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57] 126 | 2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56] 127 | 3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613] 128 | 4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383] 129 | -------------------------------------------------------------------------------- /rl_studio/envs/gazebo/f1/models/reset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rl_studio.agents.utils import ( 4 | print_messages, 5 | ) 6 | from rl_studio.envs.gazebo.f1.models.f1_env import F1Env 7 | 8 | 9 | class Reset(F1Env): 10 | """ 11 | Works for Follow Line and Follow Lane tasks 12 | """ 13 | 14 | def reset_f1_state_image(self): 15 | """ 16 | reset for 17 | - State: Image 18 | - tasks: FollowLane and FollowLine 19 | """ 20 | self._gazebo_reset() 21 | # === POSE === 22 | if self.alternate_pose: 23 | self._gazebo_set_random_pose_f1_follow_rigth_lane() 24 | else: 25 | self._gazebo_set_fix_pose_f1_follow_right_lane() 26 | 27 | self._gazebo_unpause() 28 | 29 | ##==== get image from sensor camera 30 | f1_image_camera, _ = self.f1gazeboimages.get_camera_info() 31 | self._gazebo_pause() 32 | 33 | ##==== calculating State 34 | # image as observation 35 | state = np.array( 36 | self.f1gazeboimages.image_preprocessing_black_white_32x32( 37 | f1_image_camera.data, self.height 38 | ) 39 | ) 40 | state_size = state.shape 41 | 42 | return state, state_size 43 | 44 | def reset_f1_state_sp(self): 45 | """ 46 | reset for 47 | - State: Simplified perception 48 | - tasks: FollowLane and FollowLine 49 | """ 50 | self._gazebo_reset() 51 | # === POSE === 52 | if self.alternate_pose: 53 | self._gazebo_set_random_pose_f1_follow_rigth_lane() 54 | else: 55 | self._gazebo_set_fix_pose_f1_follow_right_lane() 56 | 57 | self._gazebo_unpause() 58 | 59 | ##==== get image from sensor camera 60 | f1_image_camera, _ = self.f1gazeboimages.get_camera_info() 61 | self._gazebo_pause() 62 | 63 | ##==== calculating State 64 | # simplified perception as observation 65 | points_in_red_line, centrals_normalized = self.simplifiedperception.processed_image( 66 | f1_image_camera.data, self.height, self.width, self.x_row, self.center_image 67 | ) 68 | self._gazebo_pause() 69 | states = centrals_normalized 70 | state_size = len(states) 71 | 72 | return states, state_size 73 | 74 | def reset_f1_state_sp_line(self): 75 | """ 76 | reset for 77 | - State: Simplified perception 78 | - tasks: FollowLane and FollowLine 79 | """ 80 | self._gazebo_reset() 81 | # === POSE === 82 | if self.alternate_pose: 83 | self._gazebo_set_random_pose_f1_follow_rigth_lane() 84 | else: 85 | self._gazebo_set_fix_pose_f1_follow_right_lane() 86 | 87 | ##==== get image from sensor camera 88 | f1_image_camera, _ = self.f1gazeboimages.get_camera_info() 89 | f1_image_camera, _ = self.f1gazeboimages.get_camera_info() 90 | 91 | ##==== get center 92 | points_in_red_line, _ = self.simplifiedperception.processed_image( 93 | f1_image_camera.data, self.height, self.width, self.x_row, self.center_image 94 | ) 95 | 96 | if self.state_space == "spn": 97 | self.point = points_in_red_line[self.poi] 98 | else: 99 | self.point = points_in_red_line[0] 100 | 101 | ##==== get State 102 | ##==== simplified perception as observation 103 | states = self.simplifiedperception.calculate_observation( 104 | points_in_red_line, self.center_image, self.pixel_region 105 | ) 106 | 107 | state_size = len(states) 108 | 109 | return states, state_size 110 | -------------------------------------------------------------------------------- /rl_studio/config/config_training_followlane_qlearn_carla.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | mode: training # training, retraining 3 | task: follow_lane_carla # follow_line_gazebo, follow_lane_gazebo 4 | algorithm: qlearn # qlearn 5 | simulator: carla # openai, carla, gazebo, sumo 6 | environment_set: carla_environments # gazebo_environments, carla_environments 7 | env: follow_lane # Town01, simple, nurburgring, montreal, curves, simple_laser, manual, autoparking 8 | agent: auto_carla # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot 9 | actions: carla_simple # simple, medium, hard, test, autoparking_simple 10 | states: sp4 # sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points) 11 | rewards: follow_right_lane_only_center # 12 | framework: _ 13 | total_episodes: 3 14 | training_time: 6 15 | models_dir: "./checkpoints" 16 | logs_dir: "./logs" 17 | metrics_dir: "./metrics" 18 | recorder_carla_dir: "./recorders" 19 | 20 | ros: 21 | ros_master_uri: "11311" 22 | gazebo_master_uri: "11345" 23 | 24 | carla: 25 | carla_server: localhost 26 | carla_client: 2000 27 | 28 | retraining: 29 | qlearn: 30 | retrain_qlearn_model_name: "20230123-161229_Circuit-simple_States-sp1_Actions-simple_Rewards-follow_right_lane_only_center_epsilon-0.399_epoch-291_step-15001_reward-136707-qtable.npy" 31 | 32 | inference: 33 | qlearn: 34 | inference_qlearn_model_name: 35 | 36 | algorithm: 37 | qlearn: 38 | alpha: 0.7 39 | epsilon: 0.95 40 | epsilon_min: 0.05 41 | gamma: 0.9 42 | 43 | agents: 44 | auto_carla: 45 | camera_params: 46 | width: 640 47 | height: 480 48 | center_image: 320 49 | raw_image: False 50 | image_resizing: 100 51 | new_image_size: 32 52 | num_regions: 16 53 | lower_limit: 220 54 | 55 | states: 56 | sp1: 57 | 0: [50] 58 | sp4: 59 | 0: [30, 60, 100, 120] 60 | sp5: 61 | 0: [3, 5, 10, 15, 20] 62 | spn: 63 | 0: [50, 120, 180] 64 | 65 | actions: 66 | carla_simple: 67 | 0: [1, -0.2] 68 | 1: [1, 0] 69 | 2: [1, 0.2] 70 | simple: 71 | 0: [2, -1] 72 | 1: [3, 0] 73 | 2: [2, 1] 74 | medium: 75 | 0: [3, 0] 76 | 1: [2, 1] 77 | 2: [2, -1] 78 | 3: [1, 1.5] 79 | 4: [1, -1.5] 80 | hard: 81 | 0: [3, 0] 82 | 1: [2, 1] 83 | 2: [2, -1] 84 | 3: [1.5, 1] 85 | 4: [1.5, -1] 86 | 5: [1, -1.5] 87 | 6: [1, -1.5] 88 | test: 89 | 0: [0, 0] 90 | 91 | rewards: 92 | follow_right_lane_only_center: 93 | from_10: 10 94 | from_02: 2 95 | from_01: 1 96 | penal: -100 97 | min_reward: 5_000 98 | highest_reward: 100 99 | follow_right_lane_center_v_step: 100 | from_10: 10 101 | from_02: 2 102 | from_01: 1 103 | penal: -100 104 | min_reward: 5_000 105 | highest_reward: 100 106 | 107 | carla_environments: 108 | follow_lane: 109 | env_name: CarlaEnv-v0 110 | town: Town01 #Town01, Town02, Town03, Town04, Town05, Town10HD 111 | # Town01_Opt, Town02_Opt, Town03_Opt, Town04_Opt, Town05_Opt, Town10HD_Opt 112 | car: model1 113 | #sync_mode: True 114 | weather: ClearNoon #dynamic, argparse 115 | traffic_pedestrians: False 116 | city_lights: False 117 | car_lights: False 118 | estimated_steps: 5 119 | save_episodes: 10 120 | save_every_step: 1000 121 | init_pose: 122 | goal_pose: 123 | filter: vehicle.* 124 | generation: "2" 125 | rolename: "hero" #name 126 | gamma: 2.2 #for camera 127 | sync: True #syncronous mode or async 128 | alternate_pose: False 129 | waypoints_meters: 5 #distance between waypoints in meters 130 | waypoints_init: 839 131 | waypoints_target: 959 #961 132 | waypoints_lane_id: -1 133 | waypoints_road_id: 8 134 | -------------------------------------------------------------------------------- /rl_studio/envs/carla/utils/synchronous_mode.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import sys 4 | 5 | try: 6 | sys.path.append( 7 | glob.glob( 8 | "../carla/dist/carla-*%d.%d-%s.egg" 9 | % ( 10 | sys.version_info.major, 11 | sys.version_info.minor, 12 | "win-amd64" if os.name == "nt" else "linux-x86_64", 13 | ) 14 | )[0] 15 | ) 16 | except IndexError: 17 | pass 18 | 19 | import carla 20 | 21 | import random 22 | 23 | try: 24 | import pygame 25 | except ImportError: 26 | raise RuntimeError("cannot import pygame, make sure pygame package is installed") 27 | 28 | try: 29 | import numpy as np 30 | except ImportError: 31 | raise RuntimeError("cannot import numpy, make sure numpy package is installed") 32 | 33 | try: 34 | import queue 35 | except ImportError: 36 | import Queue as queue 37 | 38 | 39 | class CarlaSyncMode(object): 40 | """ 41 | Context manager to synchronize output from different sensors. Synchronous 42 | mode is enabled as long as we are inside this context 43 | 44 | with CarlaSyncMode(world, sensors) as sync_mode: 45 | while True: 46 | data = sync_mode.tick(timeout=1.0) 47 | 48 | """ 49 | 50 | def __init__(self, world, *sensors, **kwargs): 51 | self.world = world 52 | self.sensors = sensors 53 | self.frame = None 54 | self.delta_seconds = 1.0 / kwargs.get("fps", 20) 55 | self._queues = [] 56 | self._settings = None 57 | 58 | def __enter__(self): 59 | self._settings = self.world.get_settings() 60 | self.frame = self.world.apply_settings( 61 | carla.WorldSettings( 62 | no_rendering_mode=False, 63 | synchronous_mode=True, 64 | fixed_delta_seconds=self.delta_seconds, 65 | ) 66 | ) 67 | 68 | def make_queue(register_event): 69 | q = queue.Queue() 70 | register_event(q.put) 71 | self._queues.append(q) 72 | 73 | make_queue(self.world.on_tick) 74 | for sensor in self.sensors: 75 | make_queue(sensor.listen) 76 | return self 77 | 78 | def tick(self, timeout): 79 | self.frame = self.world.tick() 80 | data = [self._retrieve_data(q, timeout) for q in self._queues] 81 | assert all(x.frame == self.frame for x in data) 82 | return data 83 | 84 | def __exit__(self, *args, **kwargs): 85 | self.world.apply_settings(self._settings) 86 | 87 | def _retrieve_data(self, sensor_queue, timeout): 88 | while True: 89 | data = sensor_queue.get(timeout=timeout) 90 | if data.frame == self.frame: 91 | return data 92 | 93 | 94 | def draw_image(surface, image, blend=False): 95 | array = np.frombuffer(image.raw_data, dtype=np.dtype("uint8")) 96 | array = np.reshape(array, (image.height, image.width, 4)) 97 | array = array[:, :, :3] 98 | array = array[:, :, ::-1] 99 | image_surface = pygame.surfarray.make_surface(array.swapaxes(0, 1)) 100 | if blend: 101 | image_surface.set_alpha(100) 102 | surface.blit(image_surface, (0, 0)) 103 | 104 | 105 | def get_font(): 106 | fonts = [x for x in pygame.font.get_fonts()] 107 | default_font = "ubuntumono" 108 | font = default_font if default_font in fonts else fonts[0] 109 | font = pygame.font.match_font(font) 110 | return pygame.font.Font(font, 14) 111 | 112 | 113 | def should_quit(): 114 | for event in pygame.event.get(): 115 | if event.type == pygame.QUIT: 116 | return True 117 | elif event.type == pygame.KEYUP: 118 | if event.key == pygame.K_ESCAPE: 119 | return True 120 | return False 121 | -------------------------------------------------------------------------------- /rl_studio/algorithms/ppo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import gym 4 | from torch import nn 5 | from torch.nn import functional as F 6 | import matplotlib.pyplot as plt 7 | from torch.utils import tensorboard 8 | import pickle 9 | 10 | 11 | def mish(input): 12 | return input * torch.tanh(F.softplus(input)) 13 | 14 | 15 | class Mish(nn.Module): 16 | def __init__(self): super().__init__() 17 | 18 | def forward(self, input): return mish(input) 19 | 20 | 21 | # helper function to convert numpy arrays to tensors 22 | def t(x): 23 | return torch.from_numpy(x).float() 24 | 25 | def get_dist(x): 26 | return torch.distributions.Categorical(probs=x) 27 | 28 | # Actor module, categorical actions only 29 | class Actor(nn.Module): 30 | def __init__(self, state_dim, n_actions, activation=nn.Tanh): 31 | super().__init__() 32 | self.model = nn.Sequential( 33 | nn.Linear(state_dim, 64), 34 | activation(), 35 | nn.Linear(64, 32), 36 | activation(), 37 | nn.Linear(32, n_actions), 38 | nn.Softmax() 39 | ) 40 | self.adam_actor = torch.optim.Adam(self.parameters(), lr=3e-4) 41 | torch.manual_seed(1) 42 | 43 | def train(self, w, prev_prob_act, prob_act, advantage, global_steps, epsilon): 44 | actor_loss = self.policy_loss(prev_prob_act.detach(), prob_act, advantage.detach(), epsilon) 45 | w.add_scalar("loss/actor_loss", actor_loss, global_step=global_steps) 46 | self.adam_actor.zero_grad() 47 | actor_loss.backward() 48 | # clip_grad_norm_(adam_actor, max_grad_norm) 49 | # w.add_histogram("gradients/actor", 50 | # torch.cat([p.grad.view(-1) for p in self.parameters()]), global_step=global_steps) 51 | self.adam_actor.step() 52 | return actor_loss 53 | 54 | def clip_grad_norm_(module, max_grad_norm): 55 | nn.utils.clip_grad_norm_([p for g in module.param_groups for p in g["params"]], max_grad_norm) 56 | 57 | def policy_loss(self, old_log_prob, log_prob, advantage, eps): 58 | ratio = (log_prob - old_log_prob).exp() 59 | clipped = torch.clamp(ratio, 1 - eps, 1 + eps) * advantage 60 | 61 | m = torch.min(ratio * advantage, clipped) 62 | return -m 63 | 64 | def forward(self, X): 65 | return self.model(X) 66 | 67 | def load_model(self, actor_file_path): 68 | model = open(actor_file_path, "rb") 69 | 70 | self.model = pickle.load(model) 71 | 72 | print(f"\n\nMODEL LOADED.") 73 | 74 | def inference(self, state): 75 | probs = self(t(state)) 76 | dist = get_dist(probs) 77 | action = dist.sample() 78 | 79 | return action 80 | 81 | 82 | # Critic module 83 | class Critic(nn.Module): 84 | def __init__(self, state_dim, activation=nn.Tanh): 85 | super().__init__() 86 | self.model = nn.Sequential( 87 | nn.Linear(state_dim, 64), 88 | activation(), 89 | nn.Linear(64, 32), 90 | activation(), 91 | nn.Linear(32, 1) 92 | ) 93 | self.adam_critic = torch.optim.Adam(self.parameters(), lr=1e-3) 94 | torch.manual_seed(1) 95 | 96 | def train(self, w, advantage, global_steps): 97 | critic_loss = advantage.pow(2).mean() 98 | w.add_scalar("loss/critic_loss", critic_loss, global_step=global_steps) 99 | self.adam_critic.zero_grad() 100 | critic_loss.backward() 101 | # clip_grad_norm_(adam_critic, max_grad_norm) 102 | # w.add_histogram("gradients/critic", 103 | # torch.cat([p.data.view(-1) for p in self.parameters()]), global_step=global_steps) 104 | self.adam_critic.step() 105 | return critic_loss 106 | 107 | def forward(self, X): 108 | return self.model(X) 109 | -------------------------------------------------------------------------------- /rl_studio/agents/turtlebot/circuit2_turtlebot_lidar_qlearn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import time 3 | 4 | import gym 5 | import numpy 6 | 7 | import liveplot 8 | import qlearn 9 | 10 | 11 | def render(): 12 | render_skip = 0 # Skip first X episodes. 13 | render_interval = 50 # Show render Every Y episodes. 14 | render_episodes = 10 # Show Z episodes every rendering. 15 | 16 | if (x % render_interval == 0) and (x != 0) and (x > render_skip): 17 | env.render() 18 | elif ( 19 | ((x - render_episodes) % render_interval == 0) 20 | and (x != 0) 21 | and (x > render_skip) 22 | and (render_episodes < x) 23 | ): 24 | env.render(close=True) 25 | 26 | 27 | if __name__ == "__main__": 28 | 29 | env = gym.make("GazeboCircuit2TurtlebotLidar-v0") 30 | 31 | outdir = "/tmp/gazebo_gym_experiments" 32 | env = gym.wrappers.Monitor(env, outdir, force=True) 33 | plotter = liveplot.LivePlot(outdir) 34 | 35 | last_time_steps = numpy.ndarray(0) 36 | 37 | qlearn = qlearn.QLearn( 38 | actions=range(env.action_space.n), alpha=0.2, gamma=0.8, epsilon=0.9 39 | ) 40 | 41 | initial_epsilon = qlearn.epsilon 42 | 43 | epsilon_discount = 0.9986 44 | 45 | start_time = time.time() 46 | total_episodes = 10000 47 | highest_reward = 0 48 | 49 | for x in range(total_episodes): 50 | done = False 51 | 52 | cumulated_reward = 0 # Should going forward give more reward then L/R ? 53 | 54 | observation = env.reset() 55 | 56 | if qlearn.epsilon > 0.05: 57 | qlearn.epsilon *= epsilon_discount 58 | 59 | # render() #defined above, not env.render() 60 | 61 | state = "".join(map(str, observation)) 62 | 63 | for i in range(1500): 64 | 65 | # Pick an action based on the current state 66 | action = qlearn.chooseAction(state) 67 | 68 | # Execute the action and get feedback 69 | observation, reward, done, info = env.step(action) 70 | cumulated_reward += reward 71 | 72 | if highest_reward < cumulated_reward: 73 | highest_reward = cumulated_reward 74 | 75 | nextState = "".join(map(str, observation)) 76 | 77 | qlearn.learn(state, action, reward, nextState) 78 | 79 | env._flush(force=True) 80 | 81 | if not (done): 82 | state = nextState 83 | else: 84 | last_time_steps = numpy.append(last_time_steps, [int(i + 1)]) 85 | break 86 | 87 | if x % 100 == 0: 88 | plotter.plot(env) 89 | 90 | m, s = divmod(int(time.time() - start_time), 60) 91 | h, m = divmod(m, 60) 92 | print( 93 | "EP: " 94 | + str(x + 1) 95 | + " - [alpha: " 96 | + str(round(qlearn.alpha, 2)) 97 | + " - gamma: " 98 | + str(round(qlearn.gamma, 2)) 99 | + " - epsilon: " 100 | + str(round(qlearn.epsilon, 2)) 101 | + "] - Reward: " 102 | + str(cumulated_reward) 103 | + " Time: %d:%02d:%02d" % (h, m, s) 104 | ) 105 | 106 | # Github table content 107 | print( 108 | "\n|" 109 | + str(total_episodes) 110 | + "|" 111 | + str(qlearn.alpha) 112 | + "|" 113 | + str(qlearn.gamma) 114 | + "|" 115 | + str(initial_epsilon) 116 | + "*" 117 | + str(epsilon_discount) 118 | + "|" 119 | + str(highest_reward) 120 | + "| PICTURE |" 121 | ) 122 | 123 | l = last_time_steps.tolist() 124 | l.sort() 125 | 126 | # print("Parameters: a="+str) 127 | print("Overall score: {:0.2f}".format(last_time_steps.mean())) 128 | print( 129 | "Best 100 score: {:0.2f}".format( 130 | reduce(lambda x, y: x + y, l[-100:]) / len(l[-100:]) 131 | ) 132 | ) 133 | 134 | env.close() 135 | -------------------------------------------------------------------------------- /rl_studio/envs/carla/utils/plot_topology.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # This work is licensed under the terms of the MIT license. 4 | # For a copy, see . 5 | 6 | import glob 7 | import os 8 | import sys 9 | 10 | try: 11 | sys.path.append( 12 | glob.glob( 13 | "../carla/dist/carla-*%d.%d-%s.egg" 14 | % ( 15 | sys.version_info.major, 16 | sys.version_info.minor, 17 | "win-amd64" if os.name == "nt" else "linux-x86_64", 18 | ) 19 | )[0] 20 | ) 21 | except IndexError: 22 | pass 23 | 24 | import carla 25 | 26 | import argparse 27 | 28 | 29 | def main(): 30 | argparser = argparse.ArgumentParser() 31 | argparser.add_argument( 32 | "--host", 33 | metavar="H", 34 | default="127.0.0.1", 35 | help="IP of the host server (default: 127.0.0.1)", 36 | ) 37 | argparser.add_argument( 38 | "-p", 39 | "--port", 40 | metavar="P", 41 | default=2000, 42 | type=int, 43 | help="TCP port to listen to (default: 2000)", 44 | ) 45 | argparser.add_argument( 46 | "-t", 47 | "--town", 48 | metavar="T", 49 | default="Town01", 50 | help="load Town to work with", 51 | ) 52 | args = argparser.parse_args() 53 | 54 | # Approximate distance between the waypoints 55 | WAYPOINT_DISTANCE = 5.0 # in meters 56 | 57 | try: 58 | client = carla.Client(args.host, args.port) 59 | client.set_timeout(2.0) 60 | 61 | # world = client.get_world() 62 | world = client.load_world(args.town) 63 | carla_map = world.get_map() 64 | 65 | import matplotlib.pyplot as plt 66 | 67 | # plt.subplot(211) 68 | # Invert the y axis since we follow UE4 coordinates 69 | plt.gca().invert_yaxis() 70 | plt.margins(x=0.7, y=0) 71 | 72 | # GET WAYPOINTS IN THE MAP ########################################## 73 | # Returns a list of waypoints positioned on the center of the lanes 74 | # all over the map with an approximate distance between them. 75 | topology = carla_map.get_topology() 76 | road_list = [] 77 | 78 | for wp_pair in topology: 79 | current_wp = wp_pair[0] 80 | # Check if there is a road with no previus road, this can happen 81 | # in opendrive. Then just continue. 82 | if current_wp is None: 83 | continue 84 | # First waypoint on the road that goes from wp_pair[0] to wp_pair[1]. 85 | current_road_id = current_wp.road_id 86 | wps_in_single_road = [current_wp] 87 | # While current_wp has the same road_id (has not arrived to next road). 88 | while current_wp.road_id == current_road_id: 89 | # Check for next waypoints in aprox distance. 90 | available_next_wps = current_wp.next(WAYPOINT_DISTANCE) 91 | # If there is next waypoint/s? 92 | if available_next_wps: 93 | # We must take the first ([0]) element because next(dist) can 94 | # return multiple waypoints in intersections. 95 | current_wp = available_next_wps[0] 96 | wps_in_single_road.append(current_wp) 97 | else: # If there is no more waypoints we can stop searching for more. 98 | break 99 | road_list.append(wps_in_single_road) 100 | 101 | # Plot each road (on a different color by default) 102 | for road in road_list: 103 | plt.plot( 104 | [wp.transform.location.x for wp in road], 105 | [wp.transform.location.y for wp in road], 106 | ) 107 | 108 | plt.show() 109 | 110 | finally: 111 | pass 112 | 113 | 114 | if __name__ == "__main__": 115 | try: 116 | main() 117 | finally: 118 | print("Done.") 119 | -------------------------------------------------------------------------------- /rl_studio/config/config_training_followlane_qlearn_f1_gazebo.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | mode: retraining # training, retraining 3 | task: follow_lane_gazebo # follow_line_gazebo 4 | algorithm: qlearn # qlearn 5 | simulator: gazebo # openai, carla, gazebo, sumo 6 | environment_set: gazebo_environments # gazebo_environments, carla_environments 7 | env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking 8 | agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot 9 | actions: simple # simple, medium, hard, test, autoparking_simple 10 | states: sp1 # sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points) 11 | rewards: follow_right_lane_only_center # 12 | framework: _ 13 | total_episodes: 1_000 14 | training_time: 6 15 | models_dir: "./checkpoints" 16 | logs_dir: "./logs" 17 | metrics_dir: "./metrics" 18 | 19 | ros: 20 | ros_master_uri: "11311" 21 | gazebo_master_uri: "11345" 22 | 23 | retraining: 24 | qlearn: 25 | retrain_qlearn_model_name: "20230123-161229_Circuit-simple_States-sp1_Actions-simple_Rewards-follow_right_lane_only_center_epsilon-0.399_epoch-291_step-15001_reward-136707-qtable.npy" 26 | 27 | inference: 28 | qlearn: 29 | inference_qlearn_model_name: 30 | 31 | algorithm: 32 | qlearn: 33 | alpha: 0.2 34 | epsilon: 0.95 35 | epsilon_min: 0.05 36 | gamma: 0.9 37 | 38 | agents: 39 | f1: 40 | camera_params: 41 | width: 640 42 | height: 480 43 | center_image: 320 44 | raw_image: False 45 | image_resizing: 100 46 | new_image_size: 32 47 | num_regions: 16 48 | lower_limit: 220 49 | 50 | states: 51 | sp1: 52 | 0: [50] 53 | sp3: 54 | 0: [5, 15, 22] 55 | sp5: 56 | 0: [3, 5, 10, 15, 20] 57 | spn: 58 | 0: [10] 59 | 60 | actions: 61 | simple: 62 | 0: [3, 0] 63 | 1: [2, 1] 64 | 2: [2, -1] 65 | medium: 66 | 0: [3, 0] 67 | 1: [2, 1] 68 | 2: [2, -1] 69 | 3: [1, 1.5] 70 | 4: [1, -1.5] 71 | hard: 72 | 0: [3, 0] 73 | 1: [2, 1] 74 | 2: [2, -1] 75 | 3: [1.5, 1] 76 | 4: [1.5, -1] 77 | 5: [1, -1.5] 78 | 6: [1, -1.5] 79 | test: 80 | 0: [0, 0] 81 | 82 | rewards: 83 | follow_right_lane_only_center: 84 | from_10: 10 85 | from_02: 2 86 | from_01: 1 87 | penal: -100 88 | min_reward: 5_000 89 | highest_reward: 100 90 | follow_right_lane_center_v_step: 91 | from_10: 10 92 | from_02: 2 93 | from_01: 1 94 | penal: -100 95 | min_reward: 5_000 96 | highest_reward: 100 97 | 98 | gazebo_environments: 99 | simple: 100 | env_name: F1Env-v0 101 | circuit_name: simple 102 | launchfile: simple_circuit_no_wall.launch #simple_circuit.launch 103 | environment_folder: f1 104 | robot_name: f1_renault 105 | model_state_name: f1_renault_multicamera_multilaser # f1_renault, 106 | start_pose: 0 # 0, 1, 2, 3, 4 107 | alternate_pose: False 108 | estimated_steps: 15_000 109 | sensor: camera 110 | save_episodes: 10 111 | save_every_step: 1000 112 | lap_completed: False 113 | save_model: True 114 | save_positions: True 115 | debug_level: DEBUG 116 | telemetry: False 117 | telemetry_mask: False 118 | plotter_graphic: False 119 | circuit_positions_set: 120 | 0: [52.462, -41.988, 0.004, 0, 0, 1.57, -1.57] 121 | #0: [52.800, -12.734, 0.004, 0, 0, 1.57, -1.57] # near to first curve 122 | #0: [52.460, -8.734, 0.004, 0, 0, 1.57, -1.57] # Finish line 123 | #0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57] 124 | 1: [52.97, -42.06, 0.004, 0, 0, 1.57, -1.57] 125 | #1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57] 126 | #2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56] 127 | 2: [40.2, -30.741, 0.004, 0, 0, 1.56, 1.56] 128 | #3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613] 129 | 3: [0, 31.15, 0.004, 0, 0.01, 0, 0.31] 130 | #4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383] 131 | 4: [19.25, 43.50, 0.004, 0, 0.0, 1.57, -1.69] 132 | 5: [52.800, -35.486, 0.004, 0, 0, 1.57, -1.57] # near to first curve 133 | -------------------------------------------------------------------------------- /rl_studio/config/config_training_followline_dqn_f1_gazebo.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | mode: retraining # training, retraining, inference 3 | task: follow_line_gazebo # follow_line_gazebo, follow_lane_gazebo, autoparking_gazebo 4 | algorithm: dqn # qlearn, dqn, ddpg, ppo 5 | simulator: gazebo # openai, carla, gazebo, sumo 6 | environment_set: gazebo_environments # gazebo_environments, carla_environments 7 | env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking 8 | agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot 9 | actions: hard # simple, medium, hard, test 10 | states: sp10 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points) 11 | rewards: followline_center # followline_center 12 | framework: TensorFlow # TensorFlow, Pytorch 13 | recorder_carla_dir: 14 | total_episodes: 100000 15 | training_time: 90 16 | models_dir: "./checkpoints" 17 | logs_dir: "./logs" 18 | metrics_dir: "./metrics" 19 | debug_stats: false 20 | show_monitoring: true 21 | steps_to_decrease: 30000 22 | decrease_substraction: 0.01 23 | decrease_min: 0.03 24 | reward_params: 25 | function: linear 26 | punish_zig_zag_value: 0.5 27 | punish_ineffective_vel: 0 28 | beta_1: 0.8 29 | 30 | ros: 31 | ros_master_uri: "11311" 32 | gazebo_master_uri: "11345" 33 | 34 | retraining: 35 | dqn: 36 | retrain_dqn_tf_model_name: "DQN_sp_16x16_LAPCOMPLETED_Max3076_Epoch492_inTime20231218-045917.model" 37 | 38 | inference: 39 | dqn: 40 | inference_dqn_tf_model_name: 41 | 42 | algorithm: 43 | dqn: 44 | alpha: 0.9 45 | gamma: 0.9 46 | epsilon: 0.99 47 | epsilon_discount: 0.99 48 | epsilon_min: 0.01 49 | model_name: DQN_sp_16x16 50 | replay_memory_size: 50_000 51 | min_replay_memory_size: 64 52 | minibatch_size: 64 53 | update_target_every: 5 54 | memory_fraction: 0.20 55 | buffer_capacity: 100_000 56 | batch_size: 64 57 | 58 | agents: 59 | f1: 60 | camera_params: 61 | width: 640 62 | height: 480 63 | center_image: 320 64 | raw_image: False 65 | image_resizing: 100 66 | new_image_size: 32 67 | num_regions: 16 68 | lower_limit: 220 69 | 70 | states: 71 | image: 72 | 0: [3] 73 | sp1: 74 | 0: [10] 75 | sp3: 76 | 0: [5, 15, 22] 77 | sp5: 78 | 0: [3, 5, 10, 15, 20] 79 | sp10: 80 | 0: [3, 15, 30, 45, 50, 70, 100, 120, 150, 190] 81 | spn: 82 | 0: [10] 83 | 84 | actions: 85 | simple: 86 | 0: [3, 0] 87 | 1: [2, 1] 88 | 2: [2, -1] 89 | medium: 90 | 0: [3, 0] 91 | 1: [2, 1] 92 | 2: [2, -1] 93 | 3: [1, 1.5] 94 | 4: [1, -1.5] 95 | hard: 96 | 0: [7, 0] 97 | 1: [4, 1,5] 98 | 2: [4, -1,5] 99 | 3: [3, 1] 100 | 4: [3, -1] 101 | 5: [2, 1] 102 | 6: [2, -1] 103 | 7: [1, 0.5] 104 | 8: [1, -0.5] 105 | test: 106 | 0: [0, 0] 107 | 108 | rewards: 109 | followline_center: 110 | from_10: 10 111 | from_02: 2 112 | from_01: 1 113 | penal: -100 114 | min_reward: 5_000 115 | highest_reward: 100 116 | 117 | gazebo_environments: 118 | simple: 119 | env_name: F1Env-v0 120 | circuit_name: simple 121 | launchfile: simple_circuit.launch 122 | environment_folder: f1 123 | robot_name: f1_renault 124 | model_state_name: f1_renault # f1_renault_multicamera_multilaser 125 | start_pose: 0 # 0, 1, 2, 3, 4 126 | alternate_pose: True 127 | estimated_steps: 5000 128 | sensor: camera 129 | save_episodes: 500 130 | save_every_step: 5 131 | lap_completed: False 132 | save_model: True 133 | save_positions: True 134 | debug_level: DEBUG 135 | telemetry: False 136 | telemetry_mask: False 137 | plotter_graphic: False 138 | circuit_positions_set: 139 | 0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57] 140 | 1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57] #finish line 141 | 2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56] 142 | 3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613] 143 | 4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383] 144 | sleep: 0.05 145 | -------------------------------------------------------------------------------- /rl_studio/config/config_inference_followlane_qlearn_f1_gazebo.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | mode: inference # training, retraining, inference 3 | task: follow_lane_gazebo # follow_line_gazebo, follow_lane_gazebo, autoparking_gazebo 4 | algorithm: qlearn # qlearn, dqn, ddpg, ppo 5 | simulator: gazebo # openai, carla, gazebo, sumo 6 | environment_set: gazebo_environments # gazebo_environments, carla_environments 7 | env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking 8 | agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot 9 | actions: simple # continuous, simple, medium, hard, test, autoparking_simple 10 | states: sp1 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points) 11 | rewards: follow_right_lane_only_center # rewards_followline_center 12 | framework: _ 13 | total_episodes: 10 14 | training_time: 6 15 | models_dir: "./checkpoints" 16 | logs_dir: "./logs" 17 | metrics_dir: "./metrics" 18 | 19 | ros: 20 | ros_master_uri: "11311" 21 | gazebo_master_uri: "11345" 22 | 23 | retraining: 24 | qlearn: 25 | retrain_qlearn_model_name: 26 | 27 | inference: 28 | qlearn: 29 | inference_qlearn_model_name: "20230109-154549_Circuit-simple_States-sp1_Actions-simple_Rewards-follow_right_lane_only_center_epsilon-0.76_epoch-2_step-101_reward-919-qtable.npy" 30 | 31 | algorithm: 32 | qlearn: 33 | alpha: 0.2 34 | epsilon: 0.95 35 | epsilon_min: 0.05 36 | gamma: 0.9 37 | 38 | agents: 39 | f1: 40 | camera_params: 41 | width: 640 42 | height: 480 43 | center_image: 320 44 | raw_image: False 45 | image_resizing: 100 46 | new_image_size: 32 47 | num_regions: 16 48 | lower_limit: 220 49 | 50 | states: 51 | sp1: 52 | 0: [10] 53 | sp3: 54 | 0: [5, 15, 22] 55 | sp5: 56 | 0: [3, 5, 10, 15, 20] 57 | spn: 58 | 0: [10] 59 | 60 | actions: 61 | simple: 62 | 0: [3, 0] 63 | 1: [2, 1] 64 | 2: [2, -1] 65 | medium: 66 | 0: [3, 0] 67 | 1: [2, 1] 68 | 2: [2, -1] 69 | 3: [1, 1.5] 70 | 4: [1, -1.5] 71 | hard: 72 | 0: [3, 0] 73 | 1: [2, 1] 74 | 2: [2, -1] 75 | 3: [1.5, 1] 76 | 4: [1.5, -1] 77 | 5: [1, -1.5] 78 | 6: [1, -1.5] 79 | test: 80 | 0: [0, 0] 81 | 82 | rewards: 83 | follow_right_lane_only_center: 84 | from_10: 10 85 | from_02: 2 86 | from_01: 1 87 | penal: -100 88 | min_reward: 5_000 89 | highest_reward: 100 90 | follow_right_lane_center_v_step: 91 | from_10: 10 92 | from_02: 2 93 | from_01: 1 94 | penal: -100 95 | min_reward: 5_000 96 | highest_reward: 100 97 | 98 | gazebo_environments: 99 | simple: 100 | env_name: F1Env-v0 101 | circuit_name: simple 102 | launchfile: simple_circuit_no_wall.launch #simple_circuit.launch 103 | environment_folder: f1 104 | robot_name: f1_renault 105 | model_state_name: f1_renault_multicamera_multilaser # f1_renault, 106 | start_pose: 0 # 0, 1, 2, 3, 4 107 | alternate_pose: False 108 | estimated_steps: 100 109 | sensor: camera 110 | save_episodes: 1 111 | save_every_step: 10 112 | lap_completed: False 113 | save_model: True 114 | save_positions: True 115 | debug_level: DEBUG 116 | telemetry: False 117 | telemetry_mask: False 118 | plotter_graphic: False 119 | circuit_positions_set: 120 | 0: [52.462, -41.988, 0.004, 0, 0, 1.57, -1.57] 121 | #0: [52.800, -12.734, 0.004, 0, 0, 1.57, -1.57] # near to first curve 122 | #0: [52.800, -8.734, 0.004, 0, 0, 1.57, -1.57] # Finish line 123 | #0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57] 124 | 1: [52.97, -42.06, 0.004, 0, 0, 1.57, -1.57] 125 | #1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57] 126 | #2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56] 127 | 2: [40.2, -30.741, 0.004, 0, 0, 1.56, 1.56] 128 | #3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613] 129 | 3: [0, 31.15, 0.004, 0, 0.01, 0, 0.31] 130 | #4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383] 131 | 4: [19.25, 43.50, 0.004, 0, 0.0, 1.57, -1.69] 132 | 5: [52.800, -35.486, 0.004, 0, 0, 1.57, -1.57] # near to first curve 133 | -------------------------------------------------------------------------------- /rl_studio/config/config_inference_followline_ddpg_f1_gazebo.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | mode: inference # training, retraining, inference 3 | task: follow_line_gazebo # follow_line_gazebo, follow_lane_gazebo, autoparking_gazebo 4 | algorithm: ddpg # qlearn, dqn, ddpg, ppo 5 | simulator: gazebo # openai, carla, gazebo, sumo 6 | environment_set: gazebo_environments # gazebo_environments, carla_environments 7 | env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking 8 | agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot 9 | actions: continuous # continuous, simple, medium, hard, test, autoparking_simple 10 | states: image #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points) 11 | rewards: followline_center # rewards_followline_center 12 | framework: TensorFlow # TensorFlow, Pytorch 13 | total_episodes: 10 14 | training_time: 6 15 | models_dir: "./checkpoints" 16 | logs_dir: "./logs" 17 | metrics_dir: "./metrics" 18 | 19 | ros: 20 | ros_master_uri: "11311" 21 | gazebo_master_uri: "11345" 22 | 23 | retraining: 24 | ddpg: 25 | retrain_ddpg_tf_actor_model_name: 26 | retrain_ddpg_tf_critic_model_name: 27 | 28 | inference: 29 | ddpg: 30 | inference_ddpg_tf_actor_model_name: "20230111_DDPG_Actor_conv2d32x64_Critic_conv2d32x64_BATCH_ACTOR_Max--100_Epoch-5_State-image_Actions-continuous_Rewards-followline_center_inTime-20230111-183048.h5" 31 | inference_ddpg_tf_critic_model_name: "20230111_DDPG_Actor_conv2d32x64_Critic_conv2d32x64_BATCH_CRITIC_Max--100_Epoch-5_State-image_Actions-continuous_Rewards-followline_center_inTime-20230111-183048.h5" 32 | 33 | algorithm: 34 | ddpg: 35 | gamma: 0.9 36 | tau: 0.005 37 | std_dev: 0.2 38 | model_name: DDPG_Actor_conv2d32x64_Critic_conv2d32x64 39 | replay_memory_size: 50_000 40 | memory_fraction: 0.20 41 | critic_lr: 0.002 42 | actor_lr: 0.001 43 | buffer_capacity: 100_000 44 | batch_size: 64 45 | 46 | agents: 47 | f1: 48 | camera_params: 49 | width: 640 50 | height: 480 51 | center_image: 320 52 | raw_image: False 53 | image_resizing: 100 54 | new_image_size: 32 55 | num_regions: 16 56 | lower_limit: 220 57 | 58 | states: 59 | image: 60 | 0: [3] 61 | sp1: 62 | 0: [10] 63 | sp3: 64 | 0: [5, 15, 22] 65 | sp5: 66 | 0: [3, 5, 10, 15, 20] 67 | spn: 68 | 0: [10] 69 | 70 | actions: 71 | simple: 72 | 0: [3, 0] 73 | 1: [2, 1] 74 | 2: [2, -1] 75 | medium: 76 | 0: [3, 0] 77 | 1: [2, 1] 78 | 2: [2, -1] 79 | 3: [1, 1.5] 80 | 4: [1, -1.5] 81 | hard: 82 | 0: [3, 0] 83 | 1: [2, 1] 84 | 2: [2, -1] 85 | 3: [1.5, 1] 86 | 4: [1.5, -1] 87 | 5: [1, -1.5] 88 | 6: [1, -1.5] 89 | test: 90 | 0: [0, 0] 91 | continuous: 92 | v: [2, 30] 93 | w: [-3, 3] 94 | 95 | rewards: 96 | followline_center: 97 | from_10: 10 98 | from_02: 2 99 | from_01: 1 100 | penal: -100 101 | min_reward: 5_000 102 | highest_reward: 100 103 | followline_center_v_w_linear: # only for continuous actions 104 | beta_0: 3 105 | beta_1: -0.1 106 | penal: 0 107 | min_reward: 1_000 108 | highest_reward: 100 109 | 110 | gazebo_environments: 111 | simple: 112 | env_name: F1Env-v0 113 | circuit_name: simple 114 | launchfile: simple_circuit.launch 115 | environment_folder: f1 116 | robot_name: f1_renault 117 | model_state_name: f1_renault # f1_renault_multicamera_multilaser 118 | start_pose: 0 # 0, 1, 2, 3, 4 119 | alternate_pose: False 120 | estimated_steps: 100 121 | sensor: camera 122 | save_episodes: 5 123 | save_every_step: 10 124 | lap_completed: False 125 | save_model: True 126 | save_positions: True 127 | debug_level: DEBUG 128 | telemetry: False 129 | telemetry_mask: False 130 | plotter_graphic: False 131 | circuit_positions_set: 132 | 0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57] 133 | 1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57] 134 | 2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56] 135 | 3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613] 136 | 4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383] 137 | --------------------------------------------------------------------------------