├── rl_studio
    ├── envs
    │   ├── __init__.py
    │   ├── gazebo
    │   │   ├── __init__.py
    │   │   ├── autoparking
    │   │   │   ├── ._models
    │   │   │   ├── env_type.py
    │   │   │   ├── exceptions.py
    │   │   │   ├── models
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── autoparking_env.py
    │   │   │   └── image_f1.py
    │   │   ├── f1
    │   │   │   ├── README.md
    │   │   │   ├── exceptions.py
    │   │   │   ├── env_type.py
    │   │   │   └── models
    │   │   │   │   ├── utils.py
    │   │   │   │   ├── followlane_qlearn.py
    │   │   │   │   ├── followline_qlearn.py
    │   │   │   │   ├── f1_env.py
    │   │   │   │   ├── settings.py
    │   │   │   │   └── reset.py
    │   │   ├── mountain_car
    │   │   │   ├── env_type.py
    │   │   │   ├── exceptions.py
    │   │   │   └── __init__.py
    │   │   ├── robot_mesh
    │   │   │   ├── env_type.py
    │   │   │   ├── exceptions.py
    │   │   │   └── __init__.py
    │   │   ├── gazebo_utils.py
    │   │   ├── real_env_ros2.py
    │   │   └── real_env.py
    │   ├── openai_gym
    │   │   └── requirements.txt
    │   ├── carla
    │   │   ├── utils
    │   │   │   ├── constants.py
    │   │   │   ├── colors.py
    │   │   │   ├── weather.py
    │   │   │   ├── synchronous_mode.py
    │   │   │   └── plot_topology.py
    │   │   ├── __init__.py
    │   │   └── followlane
    │   │   │   ├── utils.py
    │   │   │   ├── reset.py
    │   │   │   └── settings.py
    │   └── envs_type.py
    ├── visual
    │   ├── __init__.py
    │   └── ascii
    │   │   ├── __init__.py
    │   │   ├── images.py
    │   │   └── text.py
    ├── agents
    │   ├── f1
    │   │   ├── __init__.py
    │   │   ├── settings.py
    │   │   ├── manual_pilot.py
    │   │   └── utils.py
    │   ├── turtlebot
    │   │   ├── __init__.py
    │   │   ├── turtlebot_trainer.py
    │   │   ├── sarsa.py
    │   │   ├── liveplot.py
    │   │   ├── qlearn.py
    │   │   ├── memory.py
    │   │   └── circuit2_turtlebot_lidar_qlearn.py
    │   ├── utilities
    │   │   ├── __init__.py
    │   │   ├── render.py
    │   │   ├── push_git_repo.py
    │   │   ├── plot_npy_dataset.py
    │   │   ├── camera_visualizer.py
    │   │   ├── averaged_table.py
    │   │   ├── plot_ECDF_template.py
    │   │   ├── plot_reward_function.py
    │   │   └── plot_multiple_graphs_template.py
    │   ├── mountain_car
    │   │   ├── __init__.py
    │   │   ├── manual_pilot.py
    │   │   └── utils.py
    │   ├── robot_mesh
    │   │   ├── __init__.py
    │   │   ├── manual_pilot.py
    │   │   └── utils.py
    │   ├── frameworks_type.py
    │   ├── pendulum
    │   │   └── requirements.txt
    │   ├── tasks_type.py
    │   ├── agents_type.py
    │   ├── exceptions.py
    │   ├── trainer.py
    │   ├── ros_gazebo_pose.py
    │   ├── liveplot.py
    │   └── cartpole
    │   │   ├── requirements.txt
    │   │   └── README.md
    ├── scripts
    │   ├── __init__.py
    │   └── benchmark_runner
    ├── installation
    │   ├── __init__.py
    │   ├── formula1_laser_setup.bash
    │   ├── robot_mesh_setup.bash
    │   ├── mountain_car_setup.bash
    │   ├── formula1_setup.bash
    │   ├── setup_noetic.bash
    │   └── gazebo_ros_noetic.repos
    ├── wrappers
    │   ├── __init__.py
    │   ├── tests
    │   │   └── __init__.py
    │   ├── monitoring
    │   │   ├── __init__.py
    │   │   └── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── helpers.py
    │   │   │   └── test_video_recorder.py
    │   ├── dict.py
    │   ├── README.md
    │   ├── inference_rlstudio.py
    │   └── time_limit.py
    ├── algorithms
    │   ├── models
    │   │   ├── __init__.py
    │   │   └── qlearn.py
    │   ├── requirements.txt
    │   ├── exceptions.py
    │   ├── algorithms_type.py
    │   ├── utils.py
    │   ├── README.md
    │   ├── memory.py
    │   ├── qlearn_multiple_states.py
    │   ├── __init__.py
    │   └── ppo.py
    ├── docs
    │   ├── carla_board.png
    │   ├── Town01_topology.png
    │   ├── Town01_waypoints.png
    │   ├── gazebo_screenshot.png
    │   ├── rlstudio-diagram.png
    │   └── carla_spectator_camera.png
    ├── birdview_v2_cache
    │   └── Carla
    │   │   └── Maps
    │   │       └── .gitignore
    ├── rl-studio.py
    ├── config
    │   ├── config_pendulum_ddpg.yaml
    │   ├── config_pendulum_ppo.yaml
    │   ├── config_cartpole_programmatic.yaml
    │   ├── config_cartpole_ppo.yaml
    │   ├── config_cartpole_ppo_continuous.yaml
    │   ├── config_cartpole_ddpg.yaml
    │   ├── config_cartpole_dqn.yaml
    │   ├── config_mountain_car_qlearn.yaml
    │   ├── config_cartpole_qlearn.yaml
    │   ├── config_robot_mesh_qlearn.yaml
    │   ├── config_training_followline_qlearn_f1_gazebo.yaml
    │   ├── config_inference_followline_qlearn_f1_gazebo.yaml
    │   ├── config_inference_followline_dqn_f1_gazebo.yaml
    │   ├── config_training_followlane_qlearn_carla.yaml
    │   ├── config_training_followlane_qlearn_f1_gazebo.yaml
    │   ├── config_training_followline_dqn_f1_gazebo.yaml
    │   ├── config_inference_followlane_qlearn_f1_gazebo.yaml
    │   └── config_inference_followline_ddpg_f1_gazebo.yaml
    ├── requirements.txt
    ├── __init__.py
    └── rl-studio-tuning-qlearn-followlane.py
├── scripts
    ├── stop.sh
    ├── load_env.sh
    └── camera_view.sh
├── .github
    └── dependanbot.yml
├── .disable-pre-commit-config.yaml
├── CHANGELOG.md
├── FAQ.md
├── LICENSE.md
├── CODING.md
└── CONTRIBUTING.md


/rl_studio/envs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/visual/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/agents/f1/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/agents/turtlebot/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/agents/utilities/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/installation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/visual/ascii/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rl_studio/wrappers/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/agents/mountain_car/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/agents/robot_mesh/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/algorithms/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/wrappers/monitoring/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/wrappers/monitoring/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/__init__.py:
--------------------------------------------------------------------------------
1 | from rl_studio.envs.gazebo.real_env import RealEnv
2 | 


--------------------------------------------------------------------------------
/rl_studio/envs/openai_gym/requirements.txt:
--------------------------------------------------------------------------------
1 | gym==0.25.0
2 | numpy==1.17.4
3 | pygame==2.1.2
4 | 


--------------------------------------------------------------------------------
/rl_studio/docs/carla_board.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/docs/carla_board.png


--------------------------------------------------------------------------------
/rl_studio/docs/Town01_topology.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/docs/Town01_topology.png


--------------------------------------------------------------------------------
/rl_studio/agents/turtlebot/turtlebot_trainer.py:
--------------------------------------------------------------------------------
1 | class TurtlebotTrainer:
2 |     def __init__(self, params):
3 |         pass
4 | 


--------------------------------------------------------------------------------
/rl_studio/docs/Town01_waypoints.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/docs/Town01_waypoints.png


--------------------------------------------------------------------------------
/rl_studio/docs/gazebo_screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/docs/gazebo_screenshot.png


--------------------------------------------------------------------------------
/rl_studio/docs/rlstudio-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/docs/rlstudio-diagram.png


--------------------------------------------------------------------------------
/rl_studio/envs/carla/utils/constants.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | 
3 | 
4 | ROOT_PATH = str(Path(__file__).parent.parent)
5 | 


--------------------------------------------------------------------------------
/rl_studio/birdview_v2_cache/Carla/Maps/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore
5 | 


--------------------------------------------------------------------------------
/rl_studio/docs/carla_spectator_camera.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/docs/carla_spectator_camera.png


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/autoparking/._models:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JdeRobot/RL-Studio/HEAD/rl_studio/envs/gazebo/autoparking/._models


--------------------------------------------------------------------------------
/rl_studio/algorithms/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.17.4
2 | pydantic==1.9.1
3 | tensorflow==2.10.0
4 | tensorflow_gpu==2.9.1
5 | torch==1.12.1
6 | 


--------------------------------------------------------------------------------
/rl_studio/agents/frameworks_type.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | 
4 | class FrameworksType(Enum):
5 |     TF = "TensorFlow"
6 |     PYTORCH = "Pytorch"
7 | 


--------------------------------------------------------------------------------
/scripts/stop.sh:
--------------------------------------------------------------------------------
1 | killall -9 rosmaster
2 | killall -9 gzserver
3 | killall -9 gzclient
4 | killall -9 python
5 | killall -9 roscore
6 | killall -9 rosout
7 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/f1/README.md:
--------------------------------------------------------------------------------
 1 | # F1 Environment
 2 | 
 3 | ## Main class
 4 | 
 5 | `env_f1.py`
 6 | 
 7 | ## Usefull programs
 8 | 
 9 | `image_f1.py`
10 | 


--------------------------------------------------------------------------------
/rl_studio/envs/envs_type.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | 
4 | class EnvsType(Enum):
5 |     GAZEBO = "gazebo"
6 |     CARLA = "carla"
7 |     OPENAI = "openai"
8 | 


--------------------------------------------------------------------------------
/scripts/load_env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | cd gym_gazebo/envs/installation
3 | rm -rf catkin_ws
4 | bash setup_noetic.bash
5 | bash formula1_laser_setup.bash
6 | cd ~/BehaviorStudio/
7 | 


--------------------------------------------------------------------------------
/rl_studio/agents/pendulum/requirements.txt:
--------------------------------------------------------------------------------
1 | gym==0.26.2
2 | gymnasium==0.27.0
3 | markdownTable==6.0.0
4 | matplotlib==3.3.2
5 | numpy==1.17.4
6 | torch==1.12.1
7 | tqdm==4.64.0
8 | 


--------------------------------------------------------------------------------
/rl_studio/algorithms/models/qlearn.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 | 
3 | 
4 | class QlearnValidator(BaseModel):
5 |     alpha: float
6 |     epsilon: float
7 |     gamma: float
8 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/autoparking/env_type.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | 
4 | class EnvironmentType(Enum):
5 |     qlearn_env = "qlearn"
6 |     dqn_env = "dqn"
7 |     manual_env = "manual"
8 |     ddpg_env = "ddpg"
9 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/mountain_car/env_type.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | 
4 | class TrainingType(Enum):
5 |     qlearn_env_camera = "qlearn_camera"
6 |     qlearn_env_laser = "qlearn_laser"
7 |     dqn_env = "dqn"
8 |     manual_env = "manual"
9 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/robot_mesh/env_type.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | 
4 | class TrainingType(Enum):
5 |     qlearn_env_camera = "qlearn_camera"
6 |     qlearn_env_laser = "qlearn_laser"
7 |     dqn_env = "dqn"
8 |     manual_env = "manual"
9 | 


--------------------------------------------------------------------------------
/rl_studio/wrappers/monitoring/tests/helpers.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import shutil
 3 | import tempfile
 4 | 
 5 | 
 6 | @contextlib.contextmanager
 7 | def tempdir():
 8 |     temp = tempfile.mkdtemp()
 9 |     yield temp
10 |     shutil.rmtree(temp)
11 | 


--------------------------------------------------------------------------------
/rl_studio/agents/tasks_type.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | 
4 | class TasksType(Enum):
5 |     FOLLOWLINEGAZEBO = "follow_line_gazebo"
6 |     FOLLOWLANEGAZEBO = "follow_lane_gazebo"
7 |     FOLLOWLANECARLA = "follow_lane_carla"
8 |     AUTOPARKINGGAZEBO = "autoparking_gazebo"
9 | 


--------------------------------------------------------------------------------
/.github/dependanbot.yml:
--------------------------------------------------------------------------------
 1 | # https://docs.github.com/en/code-security/supply-chain-security/configuration-options-for-dependency-updates#about-the-dependabotyml-file
 2 | 
 3 | version: 2
 4 | 
 5 | updates:
 6 |   - package-ecosystem: "pip"
 7 |     directory: "/"
 8 |     schedule:
 9 |       interval: "weekly"
10 |     commit-message:
11 |       prefix: ":arrow_up:"
12 | 


--------------------------------------------------------------------------------
/scripts/camera_view.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | TOPIC=""
 4 | 
 5 | if [ "$1" == "f1" ]; then
 6 |     TOPIC="/F1ROS/cameraL/image_raw"
 7 | elif [ "$1" == "turtlebot" ]; then
 8 |     TOPIC="/camera/rgb/image_raw"
 9 | else
10 |     printf "\n[ERROR] - No robot selected (use: 'f1' or 'turtlebot')\n\n"
11 |     exit 1
12 | fi
13 | 
14 | rosrun image_view image_view image:=$TOPIC
15 | 


--------------------------------------------------------------------------------
/rl_studio/agents/agents_type.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class AgentsType(Enum):
 5 |     F1 = "f1"
 6 |     F1GAZEBO = "f1"
 7 |     TURTLEBOT = "turtlebot"
 8 |     ROBOT_MESH = "robot_mesh"
 9 |     MOUNTAIN_CAR = "mountain_car"
10 |     CARTPOLE = "cartpole"
11 |     PENDULUM = "pendulum"
12 |     AUTOPARKINGGAZEBO = "autoparkingRL"
13 |     AUTOCARLA = "auto_carla"
14 | 


--------------------------------------------------------------------------------
/rl_studio/algorithms/exceptions.py:
--------------------------------------------------------------------------------
 1 | class CreationError(Exception):
 2 |     ...
 3 | 
 4 | 
 5 | class NoValidAlgorithmType(CreationError):
 6 |     def __init__(self, algorithm):
 7 |         self.algorithm_type = algorithm
 8 |         self.message = f"[ERROR] No valid training type ({algorithm}) in your config.yml file or is missing."
 9 |         super().__init__(self.message)
10 | 


--------------------------------------------------------------------------------
/rl_studio/agents/exceptions.py:
--------------------------------------------------------------------------------
 1 | class CreationError(Exception):
 2 |     ...
 3 | 
 4 | 
 5 | class NoValidTrainingType(CreationError):
 6 |     def __init__(self, training_type):
 7 |         self.traning_type = training_type
 8 |         self.message = f"[ERROR] No valid training type ({training_type}) in your config.yml file or is missing."
 9 |         super().__init__(self.message)
10 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/robot_mesh/exceptions.py:
--------------------------------------------------------------------------------
 1 | class CreationError(Exception):
 2 |     ...
 3 | 
 4 | 
 5 | class NoValidTrainingType(CreationError):
 6 |     def __init__(self, training_type):
 7 |         self.traning_type = training_type
 8 |         self.message = f"[MESSAGE] No valid training type ({training_type}) in your settings.py file"
 9 |         super().__init__(self.message)
10 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/f1/exceptions.py:
--------------------------------------------------------------------------------
 1 | class CreationError(Exception):
 2 |     ...
 3 | 
 4 | 
 5 | class NoValidEnvironmentType(CreationError):
 6 |     def __init__(self, environment_type):
 7 |         self.traning_type = environment_type
 8 |         self.message = f"[MESSAGE] No valid training type ({environment_type}) in your settings.py file"
 9 |         super().__init__(self.message)
10 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/mountain_car/exceptions.py:
--------------------------------------------------------------------------------
 1 | class CreationError(Exception):
 2 |     ...
 3 | 
 4 | 
 5 | class NoValidTrainingType(CreationError):
 6 |     def __init__(self, training_type):
 7 |         self.traning_type = training_type
 8 |         self.message = f"[MESSAGE] No valid training type ({training_type}) in your settings.py file"
 9 |         super().__init__(self.message)
10 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/autoparking/exceptions.py:
--------------------------------------------------------------------------------
 1 | class CreationError(Exception):
 2 |     ...
 3 | 
 4 | 
 5 | class NoValidEnvironmentType(CreationError):
 6 |     def __init__(self, environment_type):
 7 |         self.traning_type = environment_type
 8 |         self.message = f"[MESSAGE] No valid training type ({environment_type}) in your settings.py file"
 9 |         super().__init__(self.message)
10 | 


--------------------------------------------------------------------------------
/rl_studio/algorithms/algorithms_type.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class AlgorithmsType(Enum):
 5 |     PROGRAMMATIC = 'programmatic'
 6 |     QLEARN = "qlearn"
 7 |     DEPRECATED_QLEARN = "qlearn_deprecated"
 8 |     DQN = "dqn"
 9 |     DDPG = "ddpg"
10 |     SAC = "sac"
11 |     DDPG_TORCH = "ddpg_torch"
12 |     PPO = "ppo"
13 |     PPO_CONTINIUOUS = 'ppo_continuous'
14 |     MANUAL = "manual"
15 | 


--------------------------------------------------------------------------------
/rl_studio/agents/trainer.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class TrainerValidator(BaseModel):
 7 |     settings: dict
 8 |     agent: dict
 9 |     environment: dict
10 |     algorithm: dict
11 | 
12 | 
13 | class InferenceExecutorValidator(BaseModel):
14 |     settings: dict
15 |     agent: dict
16 |     environment: dict
17 |     algorithm: dict
18 |     inference: dict
19 |     # gazebo: dict
20 | 
21 | 
22 | class AgentTrainer(ABC):
23 |     pass
24 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/f1/env_type.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class EnvironmentType(Enum):
 5 |     qlearn_env_camera_follow_line = "qlearn_camera_follow_line"
 6 |     qlearn_env_camera_follow_lane = "qlearn_camera_follow_lane"
 7 |     qlearn_env_laser_follow_line = "qlearn_laser_follow_line"
 8 |     dqn_env_follow_line = "dqn_follow_line"
 9 |     dqn_env_follow_lane = "dqn_follow_lane"
10 |     manual_env = "manual"
11 |     ddpg_env_follow_line = "ddpg_follow_line"
12 |     ddpg_env_follow_lane = "ddpg_follow_lane"
13 | 


--------------------------------------------------------------------------------
/.disable-pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |   rev: v2.3.0
 4 |   hooks:
 5 |     - id: check-yaml
 6 |     - id: end-of-file-fixer
 7 |     - id: trailing-whitespace
 8 | - repo: https://github.com/psf/black
 9 |   rev: 20.8b1
10 |   hooks:
11 |     - id: black
12 |       language_version: python3
13 | #- repo: https://gitlab.com/pycqa/flake8
14 | #  rev: 3.9.0
15 | #  hooks:
16 | #  - id: flake8
17 | #    additional_dependencies: [flake8-bugbear==21.4.3]
18 | - repo: https://github.com/asottile/blacken-docs
19 |   rev: v1.10.0
20 |   hooks:
21 |   - id: blacken-docs
22 |     additional_dependencies: [black==20.8b1]
23 | 


--------------------------------------------------------------------------------
/rl_studio/agents/utilities/render.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import argparse
 3 | 
 4 | import gym
 5 | 
 6 | parser = argparse.ArgumentParser(
 7 |     description="Renders a Gym environment for quick inspection."
 8 | )
 9 | parser.add_argument(
10 |     "env_id",
11 |     type=str,
12 |     help="the ID of the environment to be rendered (e.g. HalfCheetah-v1",
13 | )
14 | parser.add_argument("--step", type=int, default=1)
15 | args = parser.parse_args()
16 | 
17 | env = gym.make(args.env_id)
18 | env.reset()
19 | 
20 | step = 0
21 | while True:
22 |     if args.step:
23 |         env.step(env.action_space.sample())
24 |     env.render()
25 |     if step % 10 == 0:
26 |         env.reset()
27 |     step += 1
28 | 


--------------------------------------------------------------------------------
/rl_studio/agents/utilities/push_git_repo.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | 
 3 | def git_add_commit_push(commit_message):
 4 |     # Change this to your repository directory
 5 |     repo_directory = "/home/ruben/Desktop/2020-phd-ruben-lucas"
 6 | 
 7 |     # Change to the repository directory
 8 |     subprocess.run(["git", "add", "."], cwd=repo_directory)
 9 | 
10 |     # Commit changes with the given commit message
11 |     subprocess.run(["git", "commit", "-m", commit_message], cwd=repo_directory)
12 | 
13 |     # Push changes to the remote repository
14 |     subprocess.run(["git", "push"], cwd=repo_directory)
15 | 
16 | if __name__ == "__main__":
17 |     commit_message = "Your commit message here."
18 |     git_add_commit_push(commit_message)


--------------------------------------------------------------------------------
/rl_studio/installation/formula1_laser_setup.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$GAZEBO_MODEL_PATH" ]; then
 4 |   bash -c 'echo "export GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:"`pwd`/../assets/models >> ~/.bashrc'
 5 | else
 6 |   bash -c 'sed "s,GAZEBO_MODEL_PATH=[^;]*,'GAZEBO_MODEL_PATH=`pwd`/../assets/models'," -i ~/.bashrc'
 7 | fi
 8 | 
 9 | bash -c 'echo "export GYM_GAZEBO_WORLD_CIRCUIT_F1="`pwd`/../assets/worlds/f1_1_simplecircuit.world >> ~/.bashrc'
10 | bash -c 'echo "export GYM_GAZEBO_WORLD_CIRCUIT_F1_LASER="`pwd`/../assets/worlds/f1_1_simplecircuit_laser.world >> ~/.bashrc'
11 | #export GYM_GAZEBO_WORLD_CIRCUIT_F1=`pwd`/../assets/worlds/f1_1_simplecircuit.world
12 | echo 'Formula 1 env variables loaded succesfully'
13 | 
14 | exec bash # reload bash
15 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Change Log
 2 | 
 3 | Here you can find all important changes in this project. RL-Studio adheres to [Semantic Versioning](http://semver.org/).
 4 | ## [Unreleased]
 5 | 
 6 | ## [1.2.0] - 2022-01-11
 7 | ### Added:
 8 | 
 9 | - Add new files in envs/gazebo/f1/models/: original files are split into different modular files such as images.py, reset.py, step.py, settings.py, simplified_perception.py ([PR #122](https://github.com/JdeRobot/RL-Studio/pull/122))
10 | 
11 | ### Changed:
12 | - entry file name: `rl-studio.py `
13 | - config file names: now they follow naming `config_mode_task_algorithm_agent_simulator`
14 | - files structure in envs/gazebo/f1/models/: files are named `task_algorithm.py`
15 | 
16 | 
17 | 
18 | [1.2.0]: https://github.com/JdeRobot/RL-Studio/pull/122
19 | 


--------------------------------------------------------------------------------
/FAQ.md:
--------------------------------------------------------------------------------
 1 | # DOUBTS GLOSSARY
 2 | 
 3 | In this document, the team doubts and solutions will be documented.
 4 | 
 5 | ## MDP rule related
 6 | 
 7 | - **DOUBT:** If we use the distance run from the last state as an input, is it going against the MDP principle?
 8 |    " *Just the current state must be considered to perform an action since the previous state does not affect to the current
 9 |   decision* " 
10 |   - **RESPONSE** Once you are in a given state, it doesn't matter how you got there. The current state contains all the information required to make a prediction or a decision. This simplifies the decision algorithm a lot, because it does not need any memory.
11 |   That said, your input could be any kind of information that gives you an idea of the environment state, so, as long as this input does no imply a states explosion, it should be fine.


--------------------------------------------------------------------------------
/rl_studio/rl-studio.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import yaml
 4 | 
 5 | from rl_studio.agents import TrainerFactory, InferencerFactory
 6 | 
 7 | 
 8 | def main():
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument(
11 |         "-f",
12 |         "--file",
13 |         type=argparse.FileType("r"),
14 |         required=True,
15 |         default="config/config.yaml",
16 |         help="In /config dir you will find .yaml examples files",
17 |     )
18 | 
19 |     args = parser.parse_args()
20 |     config_file = yaml.load(args.file, Loader=yaml.FullLoader)
21 | 
22 |     if config_file["settings"]["mode"] == "inference":
23 |         inferencer = InferencerFactory(config_file)
24 |         inferencer.main()
25 |     else:
26 |         trainer = TrainerFactory(config_file)
27 |         trainer.main()
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     main()


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/mountain_car/__init__.py:
--------------------------------------------------------------------------------
 1 | from .env_type import TrainingType
 2 | from .exceptions import NoValidTrainingType
 3 | 
 4 | 
 5 | class MountainCarEnv:
 6 |     def __new__(cls, **config):
 7 |         cls.circuit = None
 8 |         cls.vel_pub = None
 9 |         cls.unpause = None
10 |         cls.pause = None
11 |         cls.reset_proxy = None
12 |         cls.action_space = None
13 |         cls.reward_range = None
14 |         cls.model_coordinates = None
15 |         cls.position = None
16 | 
17 |         training_type = config["environments"]["training_type"]
18 |         print(config.get("launchfile"))
19 |         if training_type == TrainingType.qlearn_env_camera.value:
20 |             from .mountain_car_env import MountainCarEnv
21 | 
22 |             return MountainCarEnv(**config)
23 | 
24 |         else:
25 |             raise NoValidTrainingType(training_type)
26 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_pendulum_ddpg.yaml:
--------------------------------------------------------------------------------
 1 | settings:
 2 |   output_dir: "./logs/"
 3 |   save_model: True
 4 |   save_positions: True
 5 |   telemetry: False
 6 |   logging_level: info
 7 |   mode: inference
 8 |   agent: pendulum
 9 |   algorithm: ddpg_torch
10 | 
11 | # TODO make this section optional
12 | actions:
13 |     available_actions:
14 |       simple:
15 | 
16 | agent:
17 |   # TODO To be removed
18 |   camera_params:
19 |     witdh: 640
20 |     height: 480
21 | 
22 | environments:
23 |     env_name: Pendulum-v1
24 |     environment_folder: pendulum
25 | #    runs: 20000
26 |     runs: 20000
27 |     full_experimentation_runs: 0
28 |     update_every: 20
29 |     show_every: 50
30 |     objective_reward: -430
31 | 
32 | inference:
33 |   inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/checkpoints/pendulum/ddpg/20221231_0100_actor_avg_-392.54141588266396.pkl
34 | 
35 | algorithm:
36 |     gamma: 0.99
37 |     hidden_size: 512
38 |     batch_size: 128


--------------------------------------------------------------------------------
/rl_studio/installation/robot_mesh_setup.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #TODO now, if it is executed twice, the $GAZEBO_MODEL_PATH and $GAZEBO_RESOURCE_PATH environment variables will be added twice.
 4 | #TODO Avoid this without removing different installations environment variables.
 5 | 
 6 | if [ -z "$GAZEBO_MODEL_PATH" ]; then
 7 |   bash -c 'echo "export GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:"`pwd`/../CustomRobots/robot_mesh/model >> ~/.bashrc'
 8 | else
 9 |   bash -c 'sed "s,GAZEBO_MODEL_PATH=[^;]*,'GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:`pwd`/../CustomRobots/robot_mesh/model'," -i ~/.bashrc'
10 | fi
11 | 
12 | if [ -z "$GAZEBO_RESOURCE_PATH" ]; then
13 |   bash -c 'echo "export GAZEBO_RESOURCE_PATH=$GAZEBO_RESOURCE_PATH:"`pwd`/../CustomRobots/robot_mesh/world >> ~/.bashrc'
14 | else
15 |   bash -c 'sed "s,GAZEBO_RESOURCE_PATH=[^;]*,'GAZEBO_RESOURCE_PATH=$GAZEBO_RESOURCE_PATH:`pwd`/../CustomRobots/robot_mesh/world'," -i ~/.bashrc'
16 | fi
17 | 
18 | 
19 | # Reload bash
20 | exec bash
21 | 


--------------------------------------------------------------------------------
/rl_studio/installation/mountain_car_setup.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #TODO now, if it is executed twice, the $GAZEBO_MODEL_PATH and $GAZEBO_RESOURCE_PATH environment variables will be added twice.
 4 | #TODO Avoid this without removing different installations environment variables.
 5 | 
 6 | if [ -z "$GAZEBO_MODEL_PATH" ]; then
 7 |   bash -c 'echo "export GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:"`pwd`/../CustomRobots/mountain_car/model >> ~/.bashrc'
 8 | else
 9 |   bash -c 'sed "s,GAZEBO_MODEL_PATH=[^;]*,'GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:`pwd`/../CustomRobots/mountain_car/model'," -i ~/.bashrc'
10 | fi
11 | 
12 | if [ -z "$GAZEBO_RESOURCE_PATH" ]; then
13 |   bash -c 'echo "export GAZEBO_RESOURCE_PATH=$GAZEBO_RESOURCE_PATH:"`pwd`/../CustomRobots/mountain_car/world >> ~/.bashrc'
14 | else
15 |   bash -c 'sed "s,GAZEBO_RESOURCE_PATH=[^;]*,'GAZEBO_RESOURCE_PATH=$GAZEBO_RESOURCE_PATH:`pwd`/../CustomRobots/mountain_car/world'," -i ~/.bashrc'
16 | fi
17 | 
18 | 
19 | # Reload bash
20 | exec bash
21 | 


--------------------------------------------------------------------------------
/rl_studio/agents/ros_gazebo_pose.py:
--------------------------------------------------------------------------------
 1 | import rospy
 2 | from gazebo_msgs.msg import ModelState
 3 | from gazebo_msgs.srv import SetModelState
 4 | 
 5 | 
 6 | def main():
 7 |     rospy.init_node("set_pose")
 8 |     state_msg = ModelState()
 9 |     state_msg.model_name = "f1_renault"
10 |     state_msg.pose.position.x = 0
11 |     state_msg.pose.position.y = 0
12 |     state_msg.pose.position.z = 0.004
13 |     state_msg.pose.orientation.x = 0
14 |     state_msg.pose.orientation.y = 0
15 |     state_msg.pose.orientation.z = 0
16 |     state_msg.pose.orientation.w = 0
17 | 
18 |     rospy.wait_for_service("/gazebo/set_model_state")
19 |     try:
20 |         set_state = rospy.ServiceProxy("/gazebo/set_model_state", SetModelState)
21 |         resp = set_state(state_msg)
22 | 
23 |     except rospy.ServiceException as e:
24 |         print(f"Service call failed: {e}")
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     try:
29 |         main()
30 |     except rospy.ROSInterruptException:
31 |         pass
32 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/robot_mesh/__init__.py:
--------------------------------------------------------------------------------
 1 | from .env_type import TrainingType
 2 | from .exceptions import NoValidTrainingType
 3 | 
 4 | 
 5 | class RobotMeshEnv:
 6 |     def __new__(cls, **config):
 7 |         cls.circuit = None
 8 |         cls.vel_pub = None
 9 |         cls.unpause = None
10 |         cls.pause = None
11 |         cls.reset_proxy = None
12 |         cls.action_space = None
13 |         cls.reward_range = None
14 |         cls.model_coordinates = None
15 |         cls.position = None
16 | 
17 |         training_type = config["environments"].get("training_type")
18 |         print(config["environments"].get("launchfile"))
19 |         if (
20 |             training_type == TrainingType.qlearn_env_camera.value
21 |             or training_type == TrainingType.manual_env.value
22 |         ):
23 |             from .robot_mesh_position_env import RobotMeshEnv
24 | 
25 |             return RobotMeshEnv(**config)
26 | 
27 |         else:
28 |             raise NoValidTrainingType(training_type)
29 | 


--------------------------------------------------------------------------------
/rl_studio/wrappers/dict.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | 
 4 | 
 5 | __all__ = ["FlattenDictWrapper"]
 6 | 
 7 | 
 8 | class FlattenDictWrapper(gym.ObservationWrapper):
 9 |     """Flattens selected keys of a Dict observation space into
10 |     an array.
11 |     """
12 | 
13 |     def __init__(self, env, dict_keys):
14 |         super(FlattenDictWrapper, self).__init__(env)
15 |         self.dict_keys = dict_keys
16 | 
17 |         # Figure out observation_space dimension.
18 |         size = 0
19 |         for key in dict_keys:
20 |             shape = self.env.observation_space.spaces[key].shape
21 |             size += np.prod(shape)
22 |         self.observation_space = gym.spaces.Box(
23 |             -np.inf, np.inf, shape=(size,), dtype="float32"
24 |         )
25 | 
26 |     def observation(self, observation):
27 |         assert isinstance(observation, dict)
28 |         obs = []
29 |         for key in self.dict_keys:
30 |             obs.append(observation[key].ravel())
31 |         return np.concatenate(obs)
32 | 


--------------------------------------------------------------------------------
/rl_studio/algorithms/utils.py:
--------------------------------------------------------------------------------
 1 | ######################################
 2 | #
 3 | # Common functions and classes for /algoritmhs: DDPG, Qlearn, DQN..
 4 | #
 5 | ######################################
 6 | import time
 7 | 
 8 | 
 9 | def save_actorcritic_model(
10 |     agent, global_params, algoritmhs_params, environment, cumulated_reward, episode, text
11 | ):
12 | 
13 |     timestamp = time.strftime('%Y%m%d-%H%M%S')
14 |     agent.actor_model.save(
15 |         f"{global_params.models_dir}/{timestamp}_{text}_"
16 |         f"C-{environment['circuit_name']}_"
17 |         f"S-{environment['states']}_"
18 |         f"A-{environment['action_space']}_"
19 |         f"MR-{int(cumulated_reward)}_"
20 |         f"E-{episode}/ACTOR"
21 |     )    
22 |     agent.critic_model.save(
23 |         f"{global_params.models_dir}/{timestamp}_{text}_"
24 |         f"C-{environment['circuit_name']}_"
25 |         f"S-{environment['states']}_"
26 |         f"A-{environment['action_space']}_"
27 |         f"MR-{int(cumulated_reward)}_"
28 |         f"E-{episode}/CRITIC"
29 |     )
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/rl_studio/wrappers/README.md:
--------------------------------------------------------------------------------
 1 | # Wrappers
 2 | 
 3 | Wrappers are used to transform an environment in a modular way:
 4 | 
 5 | ```
 6 | env = gym.make('Pong-v0')
 7 | env = MyWrapper(env)
 8 | ```
 9 | 
10 | Note that we may later restructure any of the files in this directory,
11 | but will keep the wrappers available at the wrappers' top-level
12 | folder. So for example, you should access `MyWrapper` as follows:
13 | 
14 | ```
15 | # Will be supported in future releases
16 | from gym_gazebo.wrappers import MyWrapper
17 | ```
18 | 
19 | ## Quick tips for writing your own wrapper
20 | 
21 | - Don't forget to call super(class_name, self).__init__(env) if you override the wrapper's __init__ function
22 | - You can access the inner environment with `self.unwrapped`
23 | - You can access the previous layer using `self.env`
24 | - The variables `metadata`, `action_space`, `observation_space`, `reward_range`, and `spec` are copied to `self` from the previous layer
25 | - Create a wrapped function for at least one of the following: `__init__(self, env)`, `step`, `reset`, `_render`, `_close`, or `_seed`
26 | - Your layered function should take its input from the previous layer (`self.env`) and/or the inner layer (`self.unwrapped`)
27 | 


--------------------------------------------------------------------------------
/rl_studio/agents/utilities/plot_npy_dataset.py:
--------------------------------------------------------------------------------
 1 | import matplotlib as pltlib
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | def load_rewards(file_path):
 6 |     rewards = np.load(file_path, allow_pickle=True).item()
 7 |     return rewards
 8 | 
 9 | 
10 | def plot_rewards(base_directory_name, file_name):
11 |     pltlib.rcParams.update({'font.size': 15})
12 |     fig, ax1 = plt.subplots()
13 | 
14 |     file_path = base_directory_name + "/" + file_name
15 |     rewards = load_rewards(file_path)
16 |     ax1.set_xscale('log')
17 |     ax1.plot(range(len(rewards["avg"])), rewards["avg"], color='purple', label='ddpg')
18 | 
19 |     # fig.canvas.manager.full_screen_toggle()
20 | 
21 |     plt.legend()
22 |     plt.ylabel("cun_reward")
23 |     plt.xlabel("episode")
24 |     # plt.show()
25 | 
26 |     base_path = '/home/ruben/Desktop/2020-phd-ruben-lucas/docs/assets/images/results_images/f1-follow-line/gazebo/ddpg/sp10/'
27 |     ax1.figure.savefig(base_path + 'trainings.png')
28 | 
29 | if __name__ == "__main__":
30 |     # Pass the file_name as a parameter when calling the script
31 |     file_name = "20230722-135205_Circuit-simple_States-sp10_Actions-continuous_Rewards-followline_center.npy"
32 |     plot_rewards(file_name)
33 | 


--------------------------------------------------------------------------------
/rl_studio/wrappers/inference_rlstudio.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | 
 3 | from rl_studio.algorithms import InferencerFactory
 4 | from pydantic import BaseModel
 5 | 
 6 | from rl_studio.algorithms import InferencerFactory
 7 | 
 8 | 
 9 | # TODO future iteration -> make it language agnostic. Right now it is imported and instantiated like a library.
10 | # In the future, it could be launched, binded to a port or a topic, and just answering to what it is listening
11 | class InferencerWrapper:
12 |     def __init__(self, algorithm, inference_file, actions_file="", env=None):
13 | 
14 |         inference_params = {
15 |             "algorithm": algorithm,
16 |             "inference_file": inference_file,
17 |             "actions_file": actions_file,
18 |             "env": env,
19 |         }
20 | 
21 |         # TODO make mandatory env to retrieve from there the actions (instead of saving/loading actions file)
22 |         params = self.InferenceValidator(**inference_params)
23 | 
24 |         self.inferencer = InferencerFactory(params)
25 | 
26 |     class InferenceValidator(BaseModel):
27 |         inference_file: str
28 |         algorithm: str
29 |         actions_file: str
30 |         env: object
31 | 
32 |     def inference(self, state):
33 |         return self.inferencer.inference(state)
34 | 


--------------------------------------------------------------------------------
/rl_studio/requirements.txt:
--------------------------------------------------------------------------------
 1 | ale_py==0.7.4
 2 | carla_birdeye_view==1.1.1
 3 | cv_bridge==1.16.2
 4 | cv_bridge==1.16.0
 5 | gym==0.17.0
 6 | gymnasium==0.28.1
 7 | gymnasium==0.27.0
 8 | keras==2.11.0
 9 | markdownTable==6.0.0
10 | matplotlib==3.3.2
11 | matplotlib==3.1.2
12 | networkx==2.5.1
13 | numpy==1.22.0
14 | numpy==1.17.4
15 | opencv_python==4.2.0.32
16 | opencv_python==4.2.0.34
17 | opencv_python_headless==4.7.0.68
18 | optuna==3.1.0
19 | pandas==1.5.3
20 | Pillow==8.3.1
21 | Pillow==9.3.0
22 | Pillow==7.0.0
23 | Pillow==10.1.0
24 | py3rosmsgs==1.18.2
25 | py_markdown_table==0.3.3
26 | pydantic==1.9.1
27 | pydantic==1.10.2
28 | pygame==2.4.0
29 | pygame==2.1.2
30 | Pygments==2.14.0
31 | Pygments==2.2.0
32 | Pygments==2.12.0
33 | Pygments==2.3.1
34 | PyYAML==6.0
35 | PyYAML==5.4
36 | PyYAML==6.0.1
37 | reloading==1.1.2
38 | rosbag==1.16.0
39 | rosbag==1.15.14
40 | rosgraph==1.16.0
41 | rosgraph==1.15.14
42 | rosnode==1.16.0
43 | rosnode==1.15.14
44 | rospy==1.16.0
45 | scikit_learn==1.1.2
46 | scikit_learn==1.1.1
47 | scipy==1.6.1
48 | scipy==1.4.1
49 | sensor_msgs==1.13.1
50 | six==1.14.0
51 | statsmodels==0.13.2
52 | tabulate==0.9.0
53 | tensorboardX==2.6
54 | tensorboardX==2.6.2.2
55 | tensorflow==2.9.1
56 | tensorflow_gpu==2.11.0
57 | tf==1.13.2
58 | torch==1.11.0+cu113
59 | torch==1.11.0
60 | tqdm==4.64.0
61 | 


--------------------------------------------------------------------------------
/rl_studio/agents/utilities/camera_visualizer.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import cv2
 4 | import rospy
 5 | from cv_bridge import CvBridge, CvBridgeError
 6 | from sensor_msgs.msg import Image
 7 | 
 8 | 
 9 | class ImageConverter:
10 |     def __init__(self):
11 | 
12 |         self.bridge = CvBridge()
13 |         self.image_sub = rospy.Subscriber("/camera/rgb/image_raw", Image, self.callback)
14 | 
15 |     def callback(self, data):
16 |         try:
17 |             cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8")
18 |         except CvBridgeError as e:
19 |             print(e)
20 | 
21 |         cv_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
22 |         cv_image = cv2.resize(cv_image, (32, 32))
23 |         # cv_image = skimage.exposure.rescale_intensity(cv_image,out_range=(0,255))
24 | 
25 |         cv2.namedWindow("Image window", cv2.WINDOW_NORMAL)
26 |         cv2.resizeWindow("Image window", 480, 480)
27 |         cv2.imshow("Image window", cv_image)
28 |         cv2.waitKey(3)
29 | 
30 | 
31 | def main(args):
32 |     ic = image_converter()
33 |     rospy.init_node("image_converter", anonymous=True)
34 |     try:
35 |         rospy.spin()
36 |     except KeyboardInterrupt:
37 |         print("Shutting down")
38 |     cv2.destroyAllWindows()
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     main(sys.argv)
43 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/autoparking/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from rl_studio.envs.gazebo.autoparking.env_type import EnvironmentType
 2 | from rl_studio.envs.gazebo.autoparking.exceptions import NoValidEnvironmentType
 3 | 
 4 | 
 5 | class AutoparkingEnv:
 6 |     def __new__(cls, **config):
 7 |         cls.circuit = None
 8 |         cls.vel_pub = None
 9 |         cls.unpause = None
10 |         cls.pause = None
11 |         cls.reset_proxy = None
12 |         cls.action_space = None
13 |         cls.reward_range = None
14 |         cls.model_coordinates = None
15 |         cls.position = None
16 | 
17 |         training_type = config.get("training_type")
18 | 
19 |         # DDPG
20 |         if training_type == EnvironmentType.ddpg_env.value:
21 |             from rl_studio.envs.gazebo.autoparking.models.autoparking_env_ddpg import (
22 |                 DDPGAutoparkingEnvGazebo,
23 |             )
24 | 
25 |             return DDPGAutoparkingEnvGazebo(**config)
26 | 
27 |         ## Qlearn
28 |         elif training_type == EnvironmentType.qlearn_env.value:
29 |             from rl_studio.envs.gazebo.autoparking.models.autoparking_env_qlearn import (
30 |                 QlearnAutoparkingEnvGazebo,
31 |             )
32 | 
33 |             return QlearnAutoparkingEnvGazebo(**config)
34 | 
35 |         else:
36 |             raise NoValidEnvironmentType(training_type)
37 | 


--------------------------------------------------------------------------------
/rl_studio/envs/carla/__init__.py:
--------------------------------------------------------------------------------
 1 | from rl_studio.agents.tasks_type import TasksType
 2 | from rl_studio.agents.frameworks_type import FrameworksType
 3 | from rl_studio.algorithms.algorithms_type import AlgorithmsType
 4 | from rl_studio.envs.gazebo.f1.exceptions import NoValidEnvironmentType
 5 | 
 6 | 
 7 | class Carla:
 8 |     def __new__(cls, **environment):
 9 | 
10 |         algorithm = environment["algorithm"]
11 |         task = environment["task"]
12 |         framework = environment["framework"]
13 |         weather = environment["weather"]
14 |         traffic_pedestrians = environment["traffic_pedestrians"]
15 | 
16 |         # =============================
17 |         # FollowLane - qlearn - weather: static - traffic and pedestrians: No - (No framework)
18 |         # =============================
19 |         if (
20 |             task == TasksType.FOLLOWLANECARLA.value
21 |             and algorithm == AlgorithmsType.QLEARN.value
22 |             and weather != "dynamic"
23 |             and traffic_pedestrians is False
24 |         ):
25 |             from rl_studio.envs.carla.followlane.followlane_qlearn import (
26 |                 FollowLaneQlearnStaticWeatherNoTraffic,
27 |             )
28 | 
29 |             return FollowLaneQlearnStaticWeatherNoTraffic(**environment)
30 | 
31 |         else:
32 |             raise NoValidEnvironmentType(task)
33 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/gazebo_utils.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import rospy
 4 | from gazebo_msgs.msg import ModelState
 5 | from gazebo_msgs.srv import SetModelState
 6 | 
 7 | 
 8 | def set_new_pose(circuit_positions_set):
 9 |     """
10 |     Receives the set of circuit positions (specified in the yml configuration file)
11 |     and returns the index of the selected random position.
12 | 
13 |     (pos_number, pose_x, pose_y, pose_z, or_x, or_y, or_z, or_z)
14 |     """
15 |     position = random.choice(list(enumerate(circuit_positions_set)))[0]
16 |     print(position)
17 | 
18 |     state = ModelState()
19 |     state.model_name = "f1_renault"
20 |     state.pose.position.x = circuit_positions_set[position][1]
21 |     state.pose.position.y = circuit_positions_set[position][2]
22 |     state.pose.position.z = circuit_positions_set[position][3]
23 |     state.pose.orientation.x = circuit_positions_set[position][4]
24 |     state.pose.orientation.y = circuit_positions_set[position][5]
25 |     state.pose.orientation.z = circuit_positions_set[position][6]
26 |     state.pose.orientation.w = circuit_positions_set[position][7]
27 | 
28 |     rospy.wait_for_service("/gazebo/set_model_state")
29 |     try:
30 |         set_state = rospy.ServiceProxy("/gazebo/set_model_state", SetModelState)
31 |         set_state(state)
32 |     except rospy.ServiceException as e:
33 |         print("Service call failed: {}".format(e))
34 |     return position
35 | 


--------------------------------------------------------------------------------
/rl_studio/agents/f1/settings.py:
--------------------------------------------------------------------------------
 1 | ###########################
 2 | # Global variables file
 3 | ###########################
 4 | from pydantic import BaseModel
 5 | 
 6 | 
 7 | class QLearnConfig(BaseModel):
 8 |     actions: int = 3
 9 |     debug_level: int = 0
10 |     telemetry: bool = False
11 |     telemetry_mask: bool = True
12 |     plotter_graphic: bool = False
13 |     my_board: bool = True
14 |     save_positions: bool = False
15 |     save_model: bool = True
16 |     load_model: bool = False
17 |     output_dir = "./logs/qlearn_models/qlearn_camera_solved/"
18 |     poi = 1  # The original pixel row is: 250, 300, 350, 400 and 450 but we work only with the half of the image
19 |     actions_set = "simple"  # test, simple, medium, hard
20 |     max_distance = 0.5
21 |     # === CAMERA ===
22 |     # Images size
23 |     width = 640
24 |     height = 480
25 |     center_image = width / 2
26 | 
27 |     # Maximum distance from the line
28 |     ranges = [300, 280, 250]  # Line 1, 2 and 3
29 |     reset_range = [-40, 40]
30 |     last_center_line = 0
31 |     if poi == 1:
32 |         x_row = [60]
33 |     elif poi == 2:
34 |         x_row = [60, 110]
35 |     elif poi == 3:
36 |         x_row = [
37 |             10,
38 |             60,
39 |             110,
40 |         ]  # The first and last element is not used. Just for metrics
41 |     elif poi == 5:
42 |         x_row = [250, 300, 350, 400, 450]
43 | 
44 | 
45 | qlearn = QLearnConfig()
46 | 


--------------------------------------------------------------------------------
/rl_studio/agents/utilities/averaged_table.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | This script creates a simple table from environemnt monitoring
 4 | data obtained from '/tmp/gazebo_gym_experiments/', which has been
 5 | created when calling a gazebo environment monitoring function in a test.
 6 | 
 7 | Args:
 8 | 
 9 |   arg1       Creates a table using 'arg1' as average size delimiter.
10 | 
11 | Examples:
12 | 
13 |   python display_image.py 20
14 | 
15 | """
16 | import itertools
17 | import sys
18 | 
19 | import gym
20 | 
21 | 
22 | def expand(lst, n):
23 |     lst = [[i] * n for i in lst]
24 |     lst = list(itertools.chain.from_iterable(lst))
25 |     return lst
26 | 
27 | 
28 | if __name__ == "__main__":
29 | 
30 |     outdir = "/tmp/gazebo_gym_experiments"
31 |     data_key = "episode_rewards"
32 |     mod1 = int(sys.argv[1])
33 | 
34 |     results = gym.monitoring.monitor.load_results(outdir)
35 |     data = results[data_key]
36 | 
37 |     avg_data = []
38 | 
39 |     for i, val in enumerate(data):
40 |         if i % mod1 == 0:
41 |             if (i + mod1) < len(data):
42 |                 avg = sum(data[i : i + mod1]) / mod1
43 |                 avg_data.append(avg)
44 | 
45 |     print("Printing averaged graph with interval=" + str(mod1) + " ...")
46 |     print("----------------")
47 |     for i, val in enumerate(avg_data):
48 |         print(str(i * mod1) + "-" + str(i * mod1 + mod1) + " | " + str(val))
49 |     print("----------------")
50 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/f1/models/utils.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | 
 3 | 
 4 | class F1GazeboUtils:
 5 |     def __init__(self):
 6 |         self.f1 = None
 7 | 
 8 |     @staticmethod
 9 |     def show_image_with_centrals(
10 |         name, img, waitkey, centrals_in_pixels, centrals_normalized, x_row
11 |     ):
12 |         window_name = f"{name}"
13 | 
14 |         for index, value in enumerate(x_row):
15 |             cv2.putText(
16 |                 img,
17 |                 str(
18 |                     f"{int(centrals_in_pixels[index])}"
19 |                 ),
20 |                 (int(centrals_in_pixels[index])+20, int(x_row[index])),
21 |                 cv2.FONT_HERSHEY_SIMPLEX,
22 |                 0.3,
23 |                 (255, 255, 255),
24 |                 1,
25 |                 cv2.LINE_AA,
26 |             )
27 |             cv2.putText(
28 |                 img,
29 |                 str(
30 |                     f"[{centrals_normalized[index]}]"
31 |                 ),
32 |                 (320, int(x_row[index])),
33 |                 cv2.FONT_HERSHEY_SIMPLEX,
34 |                 0.3,
35 |                 (255, 255, 255),
36 |                 1,
37 |                 cv2.LINE_AA,
38 |             )
39 |         cv2.imshow(window_name, img)
40 |         cv2.waitKey(waitkey)
41 | 
42 |     def show_image(self, name, img, waitkey):
43 |         window_name = f"{name}"
44 |         cv2.imshow(window_name, img)
45 |         cv2.waitKey(waitkey)
46 | 


--------------------------------------------------------------------------------
/rl_studio/agents/turtlebot/sarsa.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | 
 4 | class Sarsa:
 5 |     def __init__(self, actions, epsilon, alpha, gamma):
 6 |         self.q = {}
 7 | 
 8 |         self.epsilon = epsilon
 9 |         self.alpha = alpha
10 |         self.gamma = gamma
11 |         self.actions = actions
12 | 
13 |     def getQ(self, state, action):
14 |         return self.q.get((state, action), 0.0)
15 | 
16 |     def learnQ(self, state, action, reward, value):
17 |         oldv = self.q.get((state, action), None)
18 |         if oldv is None:
19 |             self.q[(state, action)] = reward
20 |         else:
21 |             self.q[(state, action)] = oldv + self.alpha * (value - oldv)
22 | 
23 |     def chooseAction(self, state):
24 |         if random.random() < self.epsilon:
25 |             action = random.choice(self.actions)
26 |         else:
27 |             q = [self.getQ(state, a) for a in self.actions]
28 |             maxQ = max(q)
29 |             count = q.count(maxQ)
30 |             if count > 1:
31 |                 best = [i for i in range(len(self.actions)) if q[i] == maxQ]
32 |                 i = random.choice(best)
33 |             else:
34 |                 i = q.index(maxQ)
35 | 
36 |             action = self.actions[i]
37 |         return action
38 | 
39 |     def learn(self, state1, action1, reward, state2, action2):
40 |         qnext = self.getQ(state2, action2)
41 |         self.learnQ(state1, action1, reward, reward + self.gamma * qnext)
42 | 


--------------------------------------------------------------------------------
/rl_studio/agents/liveplot.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import matplotlib
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | rewards_key = "episode_rewards"
 6 | 
 7 | 
 8 | class LivePlot:
 9 |     def __init__(self, outdir, data_key=rewards_key, line_color="blue"):
10 |         """
11 |         Liveplot renders a graph of either episode_rewards or episode_lengths
12 |         Args:
13 |             outdir (outdir): Monitor output file location used to populate the graph
14 |             data_key (Optional[str]): The key in the json to graph (episode_rewards or episode_lengths).
15 |             line_color (Optional[dict]): Color of the plot.
16 |         """
17 |         self.outdir = outdir
18 |         self.data_key = data_key
19 |         self.line_color = line_color
20 | 
21 |         # styling options
22 |         matplotlib.rcParams["toolbar"] = "None"
23 |         plt.style.use("ggplot")
24 |         plt.xlabel("Episodes")
25 |         plt.ylabel(data_key)
26 |         fig = plt.gcf().canvas.set_window_title("simulation_graph")
27 | 
28 |     def plot(self, env):
29 |         if self.data_key is rewards_key:
30 |             data = gym.wrappers.Monitor.get_episode_rewards(env)
31 |         else:
32 |             data = gym.wrappers.Monitor.get_episode_lengths(env)
33 | 
34 |         plt.plot(data, color=self.line_color)
35 | 
36 |         # pause so matplotlib will display
37 |         # may want to figure out matplotlib animation or use a different library in the future
38 |         plt.pause(0.000001)
39 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_pendulum_ppo.yaml:
--------------------------------------------------------------------------------
 1 | settings:
 2 |   output_dir: "./logs/"
 3 |   save_model: True
 4 |   save_positions: True
 5 |   telemetry: False
 6 |   logging_level: info
 7 |   mode: training
 8 |   agent: pendulum
 9 |   algorithm: ppo_continuous
10 | 
11 | # TODO make this section optional
12 | actions:
13 |     available_actions:
14 |       simple:
15 | 
16 | agent:
17 |     # TODO To be removed
18 |     camera_params:
19 |       witdh: 640
20 |       height: 480
21 | 
22 | environments:
23 |     env_name: Pendulum-v1
24 |     environment_folder: pendulum
25 | #    runs: 20000
26 |     runs: 20000
27 |     full_experimentation_runs: 0
28 |     update_every: 200
29 |     show_every: 1000
30 |     objective_reward: -350
31 | #    block_experience_batch: False
32 |     block_experience_batch: False
33 |   #  random_start_level: 0.05
34 |     random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position
35 |     random_perturbations_level: 0.8 # Number between 0 and 1 that indicates the frequency of the random perturbations
36 |     perturbations_intensity_std: 1 # Number between 0 and 1 that indicates the standard deviation of perturbations intensity
37 |     initial_pole_angle: 0
38 |     non_recoverable_angle: 0.3
39 | 
40 | inference:
41 |     inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/pendulum/ppo/checkpoints/20221231_0244_actor_avg_-803.8121022237663
42 | 
43 | algorithm:
44 |   gamma: 1
45 |   epsilon: 0.15
46 |   episodes_update: 5000
47 | 


--------------------------------------------------------------------------------
/rl_studio/agents/turtlebot/liveplot.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import gym
 3 | import matplotlib
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | rewards_key = "episode_rewards"
 7 | 
 8 | 
 9 | class LivePlot(object):
10 |     def __init__(self, outdir, data_key=rewards_key, line_color="blue"):
11 |         """
12 |         Liveplot renders a graph of either episode_rewards or episode_lengths
13 |         Args:
14 |             outdir (outdir): Monitor output file location used to populate the graph
15 |             data_key (Optional[str]): The key in the json to graph (episode_rewards or episode_lengths).
16 |             line_color (Optional[dict]): Color of the plot.
17 |         """
18 |         self.outdir = outdir
19 |         self.data_key = data_key
20 |         self.line_color = line_color
21 | 
22 |         # styling options
23 |         matplotlib.rcParams["toolbar"] = "None"
24 |         plt.style.use("ggplot")
25 |         plt.xlabel("Episodes")
26 |         plt.ylabel(data_key)
27 |         fig = plt.gcf().canvas.set_window_title("simulation_graph")
28 | 
29 |     def plot(self, env):
30 |         if self.data_key is rewards_key:
31 |             data = gym.wrappers.Monitor.get_episode_rewards(env)
32 |         else:
33 |             data = gym.wrappers.Monitor.get_episode_lengths(env)
34 | 
35 |         plt.plot(data, color=self.line_color)
36 | 
37 |         # pause so matplotlib will display
38 |         # may want to figure out matplotlib animation or use a different library in the future
39 |         plt.pause(0.000001)
40 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/f1/models/followlane_qlearn.py:
--------------------------------------------------------------------------------
 1 | #############################################
 2 | # - Task: Follow Lane
 3 | # - Algorithm: Qlearn
 4 | # - actions: discrete
 5 | # - State: Simplified perception
 6 | #
 7 | ############################################
 8 | 
 9 | import math
10 | 
11 | from cv_bridge import CvBridge
12 | import cv2
13 | from geometry_msgs.msg import Twist
14 | import numpy as np
15 | 
16 | import rospy
17 | from sensor_msgs.msg import Image
18 | 
19 | from rl_studio.agents.utils import (
20 |     print_messages,
21 | )
22 | from rl_studio.envs.gazebo.f1.models.f1_env import F1Env
23 | from rl_studio.envs.gazebo.f1.models.settings import F1GazeboTFConfig
24 | 
25 | 
26 | class FollowLaneQlearnF1Gazebo(F1Env):
27 |     def __init__(self, **config):
28 | 
29 |         ###### init F1env
30 |         F1Env.__init__(self, **config)
31 |         ###### init class variables
32 |         F1GazeboTFConfig.__init__(self, **config)
33 | 
34 |     def reset(self):
35 |         from rl_studio.envs.gazebo.f1.models.reset import (
36 |             Reset,
37 |         )
38 | 
39 |         if self.state_space == "image":
40 |             return Reset.reset_f1_state_image(self)
41 |         else:
42 |             return Reset.reset_f1_state_sp(self)
43 | 
44 |     def step(self, action, step):
45 |         from rl_studio.envs.gazebo.f1.models.step import (
46 |             StepFollowLane,
47 |         )
48 | 
49 |         return StepFollowLane.step_followlane_state_sp_actions_discretes(
50 |             self, action, step
51 |         )
52 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_cartpole_programmatic.yaml:
--------------------------------------------------------------------------------
 1 | settings:
 2 |   output_dir: "./logs/"
 3 |   save_model: True
 4 |   save_positions: True
 5 |   telemetry: False
 6 |   logging_level: info
 7 |   mode: inference
 8 |   agent: cartpole
 9 |   algorithm: programmatic
10 | 
11 | # TODO make this section optional
12 | actions:
13 |     available_actions:
14 |       simple:
15 | 
16 | agent:
17 |   cartpole:
18 |     # TODO To be removed
19 |     camera_params:
20 |       witdh: 640
21 |       height: 480
22 | 
23 | environments:
24 |   env_name: myCartpole-v0
25 |   environment_folder: cartpole
26 | #    runs: 20000
27 |   runs: 1000
28 |   full_experimentation_runs: 0
29 |   update_every: 10
30 |   show_every: 1000
31 |   experiments: 29
32 |   random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position
33 |   random_perturbations_level: 0.1 # Number between 0 and 1 that indicates the frequency of the random perturbations
34 |   random_perturbations_level_step: 0.1
35 |   perturbations_intensity_std: 21 #Number between 0 and 1 that indicates the standard deviation of perturbations intensity
36 |   perturbations_intensity_std_step: 1
37 |   initial_pole_angle: 0
38 |   initial_pole_angle_steps: 0.05
39 |   non_recoverable_angle: 0.3 # not applicable when making experiments with init_pole_angle (always 0.2 over the initial)
40 | 
41 | algorithm:
42 |     # TODO make it complaining just if the relevant parameter for this algorithm is not found
43 | 
44 | inference:
45 |   # TODO make it complaining just if the relevant parameter for this algorithm is not found


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # gym
 2 | 
 3 | The MIT License
 4 | 
 5 | Copyright (c) 2016 OpenAI (https://openai.com)
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 | 
25 | # Mujoco models
26 | This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license:
27 | ```
28 | This file is part of MuJoCo.
29 | Copyright 2009-2015 Roboti LLC.
30 | Mujoco		:: Advanced physics simulation engine
31 | Source		: www.roboti.us
32 | Version		: 1.31
33 | Released 	: 23Apr16
34 | Author		:: Vikash Kumar
35 | Contacts 	: kumar@roboti.us
36 | ```
37 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_cartpole_ppo.yaml:
--------------------------------------------------------------------------------
 1 | settings:
 2 |   output_dir: "./logs/"
 3 |   save_model: True
 4 |   save_positions: True
 5 |   telemetry: False
 6 |   logging_level: info
 7 |   mode: inference
 8 |   agent: cartpole
 9 |   algorithm: ppo
10 | 
11 | # TODO make this section optional
12 | actions:
13 |     available_actions:
14 |       simple:
15 | 
16 | agent:
17 |   cartpole:
18 |     # TODO To be removed
19 |     camera_params:
20 |       witdh: 640
21 |       height: 480
22 | 
23 | environments:
24 |     env_name: myCartpole-v0
25 |     environment_folder: cartpole
26 | #    runs: 20000
27 |     runs: 100
28 |     full_experimentation_runs: 0
29 |     update_every: 1000
30 |     show_every: 1000
31 |     objective_reward: 500
32 | #    block_experience_batch: False
33 |     block_experience_batch: False
34 |   #  random_start_level: 0.05
35 |     experiments: 29
36 |     random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position
37 |     random_perturbations_level: 0.1 # Number between 0 and 1 that indicates the frequency of the random perturbations
38 |     random_perturbations_level_step: 0.1
39 |     perturbations_intensity_std: 21 #Number between 0 and 1 that indicates the standard deviation of perturbations intensity
40 |     perturbations_intensity_std_step: 1
41 |     initial_pole_angle: 0
42 |     initial_pole_angle_steps: 0.05
43 |     non_recoverable_angle: 0.3 # not applicable when making experiments with init_pole_angle (always 0.2 over the initial)
44 | 
45 | inference:
46 |   inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/ppo/checkpoints/20221110_0937_actor_avg_500.0.pkl
47 | 
48 | algorithm:
49 |   gamma: 1
50 |   epsilon: 0.15
51 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/real_env_ros2.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | 
 3 | import gym
 4 | import rclpy
 5 | 
 6 | 
 7 | class RealEnvROS2(gym.Env):
 8 |     """Superclass for all Gazebo environments."""
 9 | 
10 |     metadata = {"render.models": ["human"]}
11 | 
12 |     def __init__(self):
13 | 
14 |         # Launch the simulation with the given launchfile name
15 |         rclpy.init(args=None)
16 |         self.node = rclpy.create_node("real_env_ros2")
17 | 
18 |     def step(self, action):
19 | 
20 |         # Implement this method in every subclass
21 |         # Perform a step in gazebo. E.g. move the robot
22 |         raise NotImplementedError
23 | 
24 |     def reset(self):
25 | 
26 |         # Implemented in subclass
27 |         raise NotImplementedError
28 | 
29 |     def render(self, mode=None, close=False):
30 |         pass
31 | 
32 |     def _render(self, mode=None, close=False):
33 |         self._close()
34 | 
35 |     def _close(self):
36 |         output1 = subprocess.check_call(
37 |             ["cat", "/tmp/myroslaunch_" + self.port + ".pid"]
38 |         )
39 |         output2 = subprocess.check_call(
40 |             ["cat", "/home/erle/.ros/roscore-" + self.port + ".pid"]
41 |         )
42 |         subprocess.Popen(["kill", "-INT", str(output1)])
43 |         subprocess.Popen(["kill", "-INT", str(output2)])
44 | 
45 |     def close(self):
46 |         pass
47 | 
48 |     def _configure(self):
49 | 
50 |         # TODO
51 |         # From OpenAI API: Provides runtime configuration to the enviroment
52 |         # Maybe set the Real Time Factor?
53 |         pass
54 | 
55 |     def _seed(self):
56 | 
57 |         # TODO
58 |         # From OpenAI API: Sets the seed for this env's random number generator(s)
59 |         pass
60 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/f1/models/followline_qlearn.py:
--------------------------------------------------------------------------------
 1 | #############################################
 2 | # - Task: Follow Line
 3 | # - Algorithm: Qlearn
 4 | # - actions: discrete
 5 | # - State: Simplified perception
 6 | #
 7 | ############################################
 8 | 
 9 | import math
10 | 
11 | from cv_bridge import CvBridge
12 | import cv2
13 | from geometry_msgs.msg import Twist
14 | import numpy as np
15 | 
16 | import rospy
17 | from sensor_msgs.msg import Image
18 | 
19 | from rl_studio.agents.utils import (
20 |     print_messages,
21 | )
22 | from rl_studio.envs.gazebo.f1.models.f1_env import F1Env
23 | from rl_studio.envs.gazebo.f1.models.settings import F1GazeboTFConfig
24 | 
25 | 
26 | class FollowLineQlearnF1Gazebo(F1Env):
27 |     def __init__(self, **config):
28 | 
29 |         ###### init F1env
30 |         F1Env.__init__(self, **config)
31 |         ###### init class variables
32 |         F1GazeboTFConfig.__init__(self, **config)
33 | 
34 |     def reset(self):
35 |         from rl_studio.envs.gazebo.f1.models.reset import (
36 |             Reset,
37 |         )
38 | 
39 |         if self.state_space == "image":
40 |             return Reset.reset_f1_state_image(self)
41 |         else:
42 |             return Reset.reset_f1_state_sp(self)
43 | 
44 |     def step(self, action, step):
45 |         from rl_studio.envs.gazebo.f1.models.step import (
46 |             StepFollowLine,
47 |         )
48 | 
49 |         if self.state_space == "image":
50 |             return StepFollowLine.step_followline_state_image_actions_discretes(
51 |                 self, action, step
52 |             )
53 |         else:
54 |             return StepFollowLine.step_followline_state_sp_actions_discretes(
55 |                 self, action, step
56 |             )
57 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/autoparking/models/autoparking_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import rospy
 3 | from gazebo_msgs.srv import GetModelState
 4 | from geometry_msgs.msg import Twist
 5 | from std_srvs.srv import Empty
 6 | from sensor_msgs.msg import Image
 7 | 
 8 | from rl_studio.envs.gazebo import gazebo_envs
 9 | 
10 | 
11 | class AutoparkingEnv(gazebo_envs.GazeboEnv):
12 |     def __init__(self, **config):
13 |         gazebo_envs.GazeboEnv.__init__(self, config)
14 |         self.circuit_name = config.get("circuit_name")
15 |         # self.circuit_positions_set = config.get("circuit_positions_set")
16 |         self.alternate_pose = config.get("alternate_pose")
17 |         self.model_state_name = config.get("model_state_name")
18 |         self.position = None
19 |         # self.start_pose = np.array(config.get("alternate_pose"))
20 |         self.start_pose = np.array(config.get("gazebo_start_pose"))
21 |         self.start_random_pose = config.get("gazebo_random_start_pose")
22 |         self._seed()
23 |         self.parking_spot_position_x = config.get("parking_spot_position_x")
24 |         self.parking_spot_position_y = config.get("parking_spot_position_y")
25 | 
26 |         # self.cv_image_pub = rospy.Publisher('/F1ROS/cameraL/image_raw', Image, queue_size = 10)
27 |         self.vel_pub = rospy.Publisher("/F1ROS/cmd_vel", Twist, queue_size=5)
28 |         self.unpause = rospy.ServiceProxy("/gazebo/unpause_physics", Empty)
29 |         self.pause = rospy.ServiceProxy("/gazebo/pause_physics", Empty)
30 |         self.reset_proxy = rospy.ServiceProxy("/gazebo/reset_simulation", Empty)
31 |         self.reward_range = (-np.inf, np.inf)
32 |         self.model_coordinates = rospy.ServiceProxy(
33 |             "/gazebo/get_model_state", GetModelState
34 |         )
35 | 


--------------------------------------------------------------------------------
/rl_studio/installation/formula1_setup.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #TODO now, if it is executed twice, the $GAZEBO_MODEL_PATH and $GAZEBO_RESOURCE_PATH environment variables will be added twice.
 4 | #TODO Avoid this without removing different installations environment variables.
 5 | 
 6 | if [ -z "$GAZEBO_MODEL_PATH" ]; then
 7 |   bash -c 'echo "export GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:"`pwd`/../CustomRobots/f1/models >> ~/.bashrc'
 8 | else
 9 |   bash -c 'sed "s,GAZEBO_MODEL_PATH=[^;]*,'GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:`pwd`/../CustomRobots/f1/models'," -i ~/.bashrc'
10 | fi
11 | 
12 | if [ -z "$GAZEBO_RESOURCE_PATH" ]; then
13 |   bash -c 'echo "export GAZEBO_MODEL_PATH=$GAZEBO_MODEL_PATH:"`pwd`/../CustomRobots/f1/worlds >> ~/.bashrc'
14 | else
15 |   bash -c 'sed "s,GAZEBO_RESOURCE_PATH=[^;]*,'GAZEBO_RESOURCE_PATH=$GAZEBO_RESOURCE_PATH:`pwd`/../CustomRobots/f1/worlds'," -i ~/.bashrc'
16 | fi
17 | 
18 | # Add Formula 1 launch environment variable
19 | if [ -z "$GYM_GAZEBO_WORLD_CIRCUIT_F1" ]; then
20 |   bash -c 'echo "export GYM_GAZEBO_WORLD_CIRCUIT_F1="`pwd`/../CustomRobots/f1/worlds/simple_circuit.world >> ~/.bashrc'
21 | else
22 |   bash -c 'sed "s,GYM_GAZEBO_WORLD_CIRCUIT=[^;]*,'GYM_GAZEBO_WORLD_CIRCUIT=`pwd`/../CustomRobots/f1/worlds/simple_circuit.world'," -i ~/.bashrc'
23 | fi
24 | 
25 | if [ -z "$GYM_GAZEBO_WORLD_NURBURGRING_F1" ]; then
26 |   bash -c 'echo "export GYM_GAZEBO_WORLD_NURBURGRING_F1="`pwd`/../CustomRobots/f1/worlds/nurburgring_line.world >> ~/.bashrc'
27 | fi
28 | 
29 | if [ -z "$GYM_GAZEBO_WORLD_MONTREAL_F1" ]; then
30 |   bash -c 'echo "export GYM_GAZEBO_WORLD_MONTREAL_F1="`pwd`/../CustomRobots/f1/worlds/montreal_line.world >> ~/.bashrc'
31 | fi
32 | 
33 | echo 'Formula 1 env variables loaded succesfully'
34 | 
35 | # Reload bash
36 | exec bash
37 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_cartpole_ppo_continuous.yaml:
--------------------------------------------------------------------------------
 1 | settings:
 2 |   output_dir: "./logs/"
 3 |   save_model: True
 4 |   save_positions: True
 5 |   telemetry: False
 6 |   logging_level: info
 7 |   mode: inference
 8 |   agent: cartpole
 9 |   algorithm: ppo_continuous
10 | 
11 | # TODO make this section optional
12 | actions:
13 |     available_actions:
14 |       simple:
15 | 
16 | agent:
17 |   cartpole:
18 |     # TODO To be removed
19 |     camera_params:
20 |       witdh: 640
21 |       height: 480
22 | 
23 | environments:
24 |     env_name: myCartpole-continuous-v0
25 |     environment_folder: cartpole
26 | #    runs: 20000
27 |     runs: 100
28 |     full_experimentation_runs: 0
29 |     update_every: 100
30 |     show_every: 1000
31 |     objective_reward: 500
32 | #    block_experience_batch: False
33 |     block_experience_batch: False
34 |   #  random_start_level: 0.05
35 |     experiments: 29
36 |     random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position
37 |     random_perturbations_level: 0.1 # Number between 0 and 1 that indicates the frequency of the random perturbations
38 |     random_perturbations_level_step: 0.1
39 |     perturbations_intensity_std: 21 #Number between 0 and 1 that indicates the standard deviation of perturbations intensity
40 |     perturbations_intensity_std_step: 1
41 |     initial_pole_angle: 0
42 |     initial_pole_angle_steps: 0.05
43 |     non_recoverable_angle: 0.3 # not applicable when making experiments with init_pole_angle (always 0.2 over the initial)
44 | 
45 | inference:
46 |   inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/ppo_continuous/checkpoints/20221231_1813_actor_avg_422.44
47 | 
48 | algorithm:
49 |   gamma: 1
50 |   epsilon: 0.15
51 |   episodes_update: 1000


--------------------------------------------------------------------------------
/rl_studio/wrappers/monitoring/tests/test_video_recorder.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import gym
 4 | from gym.wrappers.monitoring.video_recorder import VideoRecorder
 5 | 
 6 | 
 7 | class BrokenRecordableEnv(object):
 8 |     metadata = {"render.models": [None, "rgb_array"]}
 9 | 
10 |     def render(self, mode=None):
11 |         pass
12 | 
13 | 
14 | class UnrecordableEnv(object):
15 |     metadata = {"render.models": [None]}
16 | 
17 |     def render(self, mode=None):
18 |         pass
19 | 
20 | 
21 | def test_record_simple():
22 |     env = gym.make("CartPole-v1")
23 |     rec = VideoRecorder(env)
24 |     env.reset()
25 |     rec.capture_frame()
26 |     rec.close()
27 |     assert not rec.empty
28 |     assert not rec.broken
29 |     assert os.path.exists(rec.path)
30 |     f = open(rec.path)
31 |     assert os.fstat(f.fileno()).st_size > 100
32 | 
33 | 
34 | def test_no_frames():
35 |     env = BrokenRecordableEnv()
36 |     rec = VideoRecorder(env)
37 |     rec.close()
38 |     assert rec.empty
39 |     assert rec.functional
40 |     assert not os.path.exists(rec.path)
41 | 
42 | 
43 | def test_record_unrecordable_method():
44 |     env = UnrecordableEnv()
45 |     rec = VideoRecorder(env)
46 |     assert not rec.enabled
47 |     rec.close()
48 | 
49 | 
50 | def test_record_breaking_render_method():
51 |     env = BrokenRecordableEnv()
52 |     rec = VideoRecorder(env)
53 |     rec.capture_frame()
54 |     rec.close()
55 |     assert rec.empty
56 |     assert rec.broken
57 |     assert not os.path.exists(rec.path)
58 | 
59 | 
60 | def test_text_envs():
61 |     env = gym.make("FrozenLake-v0")
62 |     video = VideoRecorder(env)
63 |     try:
64 |         env.reset()
65 |         video.capture_frame()
66 |         video.close()
67 |     finally:
68 |         os.remove(video.path)
69 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_cartpole_ddpg.yaml:
--------------------------------------------------------------------------------
 1 | settings:
 2 |   output_dir: "./logs/"
 3 |   save_model: True
 4 |   save_positions: True
 5 |   telemetry: False
 6 |   logging_level: info
 7 |   mode: inference
 8 |   agent: cartpole
 9 |   algorithm: ddpg
10 |   framework: Pytorch
11 | 
12 | # TODO make this section optional
13 | actions:
14 |     available_actions:
15 |       simple:
16 | 
17 | agent:
18 |   cartpole:
19 |     # TODO To be removed
20 |     camera_params:
21 |       witdh: 640
22 |       height: 480
23 | 
24 | environments:
25 |     env_name: myCartpole-continuous-v0
26 |     environment_folder: cartpole
27 | #    runs: 20000
28 |     runs: 100
29 |     full_experimentation_runs: 0
30 |     update_every: 100
31 |     show_every: 10000
32 |     objective_reward: 500
33 | #    block_experience_batch: False
34 |     block_experience_batch: False
35 |   #  random_start_level: 0.05
36 |     experiments: 29
37 |     random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position
38 |     random_perturbations_level: 0.1 # Number between 0 and 1 that indicates the frequency of the random perturbations
39 |     random_perturbations_level_step: 0.1
40 |     perturbations_intensity_std: 21 #Number between 0 and 1 that indicates the standard deviation of perturbations intensity
41 |     perturbations_intensity_std_step: 1
42 |     initial_pole_angle: 0
43 |     initial_pole_angle_steps: 0.05
44 |     non_recoverable_angle: 0.3 # not applicable when making experiments with init_pole_angle (always 0.2 over the initial)
45 | 
46 | inference:
47 |   inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/ddpg/checkpoints/20230107_0034_actor_avg_207.91.pkl
48 | 
49 | algorithm:
50 |     gamma: 0.99
51 |     hidden_size: 128
52 |     batch_size: 128


--------------------------------------------------------------------------------
/rl_studio/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register
 2 | 
 3 | 
 4 | # F1 envs
 5 | register(
 6 |     id="F1Env-v0",
 7 |     entry_point="rl_studio.envs.gazebo.f1.models:F1Env",
 8 |     # More arguments here
 9 | )
10 | 
11 | # RobotMesh envs
12 | register(
13 |     id="RobotMeshEnv-v0",
14 |     entry_point="rl_studio.envs.gazebo.robot_mesh:RobotMeshEnv",
15 |     # More arguments here
16 | )
17 | 
18 | register(
19 |     id="myCartpole-v0",
20 |     entry_point="rl_studio.envs.openai_gym.cartpole.cartpole_env:CartPoleEnv",
21 |     max_episode_steps=500,
22 |     kwargs={"random_start_level": 0.05},
23 | )
24 | 
25 | register(
26 |     id="myCartpole-v1",
27 |     entry_point="rl_studio.envs.openai_gym.cartpole.cartpole_env_improved:CartPoleEnv",
28 |     max_episode_steps=500,
29 |     kwargs={"random_start_level": 0.05},
30 | )
31 | 
32 | 
33 | register(
34 |     id="myCartpole-continuous-v0",
35 |     entry_point="rl_studio.envs.openai_gym.cartpole.cartpole_env_continuous:CartPoleEnv",
36 |     max_episode_steps=500,
37 |     kwargs={"random_start_level": 0.05},
38 | )
39 | 
40 | register(
41 |     id="myCartpole-continuous-v1",
42 |     entry_point="rl_studio.envs.openai_gym.cartpole.cartpole_env_continuous_improved:CartPoleEnv",
43 |     max_episode_steps=500,
44 |     kwargs={"random_start_level": 0.05},
45 | )
46 | 
47 | # MountainCar envs
48 | register(
49 |     id="MyMountainCarEnv-v0",
50 |     entry_point="rl_studio.envs.gazebo.mountain_car:MountainCarEnv",
51 |     # More arguments here
52 | )
53 | 
54 | # AutoParking envs
55 | register(
56 |     id="AutoparkingEnv-v0",
57 |     entry_point="rl_studio.envs.gazebo.autoparking.models:AutoparkingEnv",
58 | )
59 | 
60 | # Carla
61 | register(
62 |     id="CarlaEnv-v0",
63 |     entry_point="rl_studio.envs.carla:Carla",
64 | )
65 | 


--------------------------------------------------------------------------------
/rl_studio/agents/f1/manual_pilot.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | import datetime
 4 | import pickle
 5 | import time
 6 | 
 7 | import gym
 8 | 
 9 | from rl_studio.agents.f1 import settings
10 | from rl_studio.visual.ascii.text import MANUAL_PILOT
11 | 
12 | total_episodes = 200000
13 | 
14 | 
15 | def save_times(checkpoints):
16 |     file_name = "manual_pilot_checkpoints"
17 |     file_dump = open("./logs/" + file_name + ".pkl", "wb")
18 |     pickle.dump(checkpoints, file_dump)
19 | 
20 | 
21 | if __name__ == "__main__":
22 | 
23 |     print(MANUAL_PILOT)
24 |     print("    - Start hour: {}".format(datetime.datetime.now()))
25 | 
26 |     environment = settings.envs_params["manual"]
27 |     env = gym.make(environment["env"])
28 | 
29 |     checkpoints = []  # "ID" - x, y - time
30 |     time.sleep(5)
31 | 
32 |     previous = datetime.datetime.now()
33 |     start_time = datetime.datetime.now()
34 |     for episode in range(total_episodes):
35 | 
36 |         now = datetime.datetime.now()
37 |         if now - datetime.timedelta(seconds=3) > previous:
38 |             previous = datetime.datetime.now()
39 |             x, y = env.get_position()
40 |             checkpoints.append(
41 |                 [
42 |                     len(checkpoints),
43 |                     (x, y),
44 |                     datetime.datetime.now().strftime("%M:%S.%f")[-4],
45 |                 ]
46 |             )
47 | 
48 |         if (
49 |             datetime.datetime.now() - datetime.timedelta(minutes=2, seconds=35)
50 |             > start_time
51 |         ):
52 |             print("Finish. Saving parameters . . .")
53 |             save_times(checkpoints)
54 |             env.close()
55 |             exit(0)
56 | 
57 |         env.execute()
58 | 
59 |         if episode % 500 == 0:
60 |             print(episode)
61 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/f1/models/f1_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import rospy
 3 | from gazebo_msgs.srv import GetModelState
 4 | from geometry_msgs.msg import Twist
 5 | from std_srvs.srv import Empty
 6 | 
 7 | from rl_studio.envs.gazebo import gazebo_envs
 8 | 
 9 | 
10 | class F1Env(gazebo_envs.GazeboEnv):
11 |     def __init__(self, **config):
12 |         gazebo_envs.GazeboEnv.__init__(self, config)
13 |         self.circuit_name = config.get("circuit_name")
14 |         self.circuit_positions_set = config.get("circuit_positions_set")
15 |         self.alternate_pose = config.get("alternate_pose")
16 |         self.model_state_name = config.get("model_state_name")
17 | 
18 |         self.sleep = config.get("sleep")
19 |         self.vel_pub = rospy.Publisher("/F1ROS/cmd_vel", Twist, queue_size=5)
20 |         self.unpause = rospy.ServiceProxy("/gazebo/unpause_physics", Empty)
21 |         self.pause = rospy.ServiceProxy("/gazebo/pause_physics", Empty)
22 |         self.reset_proxy = rospy.ServiceProxy("/gazebo/reset_simulation", Empty)
23 |         self.reward_range = (-np.inf, np.inf)
24 |         self.model_coordinates = rospy.ServiceProxy(
25 |             "/gazebo/get_model_state", GetModelState
26 |         )
27 |         self.position = None
28 |         # self.start_pose = np.array(config.get("start_pose"))
29 |         self.start_pose = np.array(config.get("gazebo_start_pose"))
30 |         self.start_random_pose = config.get("gazebo_random_start_pose")
31 |         self.end = 0
32 | 
33 |         self._seed()
34 | 
35 |     def render(self, mode="human"):
36 |         pass
37 | 
38 |     def step(self, action):
39 | 
40 |         raise NotImplementedError
41 | 
42 |     def reset(self):
43 | 
44 |         raise NotImplementedError
45 | 
46 |     def inference(self, action):
47 | 
48 |         raise NotImplementedError
49 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_cartpole_dqn.yaml:
--------------------------------------------------------------------------------
 1 | settings:
 2 |   output_dir: "./logs/"
 3 |   save_model: True
 4 |   save_positions: True
 5 |   telemetry: False
 6 |   logging_level: info
 7 |   mode: training
 8 |   agent: cartpole
 9 |   algorithm: dqn
10 | 
11 | # TODO make this section optional
12 | actions:
13 |     available_actions:
14 |       simple:
15 | 
16 | agent:
17 |   cartpole:
18 |     # TODO To be removed
19 |     camera_params:
20 |       witdh: 640
21 |       height: 480
22 | 
23 | environments:
24 |     env_name: myCartpole-v0
25 |     environment_folder: cartpole
26 | #    runs: 20000
27 |     runs: 20000
28 |     full_experimentation_runs: 0
29 |     update_every: 100
30 |     show_every: 10000
31 |     objective_reward: 500
32 | #    block_experience_batch: False
33 |     block_experience_batch: False
34 |   #  random_start_level: 0.05
35 |     experiments: 1
36 |     random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position
37 |     random_perturbations_level: 0 # Number between 0 and 1 that indicates the frequency of the random perturbations
38 |     random_perturbations_level_step: 0
39 |     perturbations_intensity_std: 1 #Number between 0 and 1 that indicates the standard deviation of perturbations intensity
40 |     perturbations_intensity_std_step: 1
41 |     initial_pole_angle: 0
42 |     initial_pole_angle_steps: 0.05
43 |     non_recoverable_angle: 0.3 # not applicable when making experiments with init_pole_angle (always 0.2 over the initial)
44 | 
45 | inference:
46 |     inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/checkpoints/cartpole/dqn/20221017_2118_epsilon_1_DQN_WEIGHTS_avg_475.825.pkl
47 | 
48 | algorithm:
49 | #      gamma: 0.95
50 |       gamma: 0.95
51 |       epsilon_discount: 0.9997
52 | #      epsilon_discount: 0.9998
53 | #      batch_size: 256
54 |       batch_size: 256
55 | 


--------------------------------------------------------------------------------
/rl_studio/envs/carla/utils/colors.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """ This is a helper module to pretty print in linux-based terminals."""
 4 | 
 5 | 
 6 | class Colors:
 7 |     """
 8 |     Colors defined for improve the prints in each Stage
 9 |     """
10 |     DEBUG = '\033[1;36;1m'
11 |     OKCYAN = '\033[96m'
12 |     OKBLUE = '\033[94m'
13 |     OKGREEN = '\033[92m'
14 |     WARNING = '\033[93m'
15 |     FAIL = '\033[91m'
16 |     ENDC = '\033[0m'
17 |     CEND = '\33[0m'
18 |     CBOLD = '\33[1m'
19 |     CITALIC = '\33[3m'
20 |     CURL = '\33[4m'
21 |     CBLINK = '\33[5m'
22 |     CBLINK2 = '\33[6m'
23 |     CSELECTED = '\33[7m'
24 |     CBLACK = '\33[30m'
25 |     CRED = '\33[31m'
26 |     CGREEN = '\33[32m'
27 |     CYELLOW = '\33[33m'
28 |     CBLUE = '\33[34m'
29 |     CVIOLET = '\33[35m'
30 |     CBEIGE = '\33[36m'
31 |     CWHITE = '\33[37m'
32 |     CBLACKBG = '\33[40m'
33 |     CREDBG = '\33[41m'
34 |     CGREENBG = '\33[42m'
35 |     CYELLOWBG = '\33[43m'
36 |     CBLUEBG = '\33[44m'
37 |     CVIOLETBG = '\33[45m'
38 |     CBEIGEBG = '\33[46m'
39 |     CWHITEBG = '\33[47m'
40 |     CGREY = '\33[90m'
41 |     CRED2 = '\33[91m'
42 |     CGREEN2 = '\33[92m'
43 |     CYELLOW2 = '\33[93m'
44 |     CBLUE2 = '\33[94m'
45 |     CVIOLET2 = '\33[95m'
46 |     CBEIGE2 = '\33[96m'
47 |     CWHITE2 = '\33[97m'
48 |     CGREYBG = '\33[100m'
49 |     CREDBG2 = '\33[101m'
50 |     CGREENBG2 = '\33[102m'
51 |     CYELLOWBG2 = '\33[103m'
52 |     CBLUEBG2 = '\33[104m'
53 |     CVIOLETBG2 = '\33[105m'
54 |     CBEIGEBG2 = '\33[106m'
55 |     CWHITEBG2 = '\33[107m'
56 | 
57 | 
58 | if __name__ == "__main__":
59 | 
60 |     import inspect
61 | 
62 |     c = Colors()
63 |     variables = [i for i in dir(c) if not inspect.ismethod(i) and i not in ('__doc__', '__module__')]
64 |     for v in variables:
65 |         print("{}Sample:{}\t-->  {}".format(getattr(c, v), c.ENDC, v))
66 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_mountain_car_qlearn.yaml:
--------------------------------------------------------------------------------
 1 | settings:
 2 |   output_dir: "./logs/"
 3 |   save_model: True
 4 |   save_positions: True
 5 |   telemetry: False
 6 |   mode: inference
 7 |   agent: mountain_car
 8 |   algorithm: qlearn
 9 | 
10 | actions:
11 |     0: [ 0, 0, 0, 0]
12 |     1: [ 0, 0, -0.9, -0.9]
13 |     2: [ 0.9, 0.9, 0, 0]
14 | 
15 | agent:
16 |   mountain_car:
17 |     camera_params:
18 |       witdh: 640
19 |       height: 480
20 | 
21 | environments:
22 |     env_name: MyMountainCarEnv-v0
23 |     circuit_name: simple
24 |     training_type: qlearn_camera
25 |     launchfile: my_mountain_world.launch
26 |     environment_folder: mountain_car
27 |     robot_name: my_robot
28 |     start_pose: 1, 2
29 |     alternate_pose: True
30 |     estimated_steps: 4000
31 |     sensor: camera
32 |     circuit_positions_set: #  x, y, z, roll, pith, ???. yaw
33 |       - [ 0, 53.462, -41.988, 0.004, 0, 0, 1.57, -1.57 ]
34 |       - [ 1, 53.462, -8.734, 0.004, 0, 0, 1.57, -1.57 ]
35 |       - [ 2, 39.712, -30.741, 0.004, 0, 0, 1.56, 1.56 ]
36 |       - [ 3, -6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613 ]
37 |       - [ 4, 20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383 ]
38 |     goal: 6.5
39 |     pos_x: -4
40 |     pos_y: -2
41 |     pos_z: 3
42 |     max_steps: 5000
43 | 
44 |   # manual:
45 |   #   env_name: F1Env-v0
46 |   #   circuit_name: manual
47 |   #   training_type: qlearn_camera
48 |   #   launch: simple_circuit.launch
49 |   #   start_pose: 0
50 |   #   alternate_pose: False
51 |   #   estimated_steps: 4000
52 |   #   sensor: camera
53 | 
54 | inference:
55 |   inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/checkpoints/mountain_car/1_20221226_1928_epsilon_0.01_QTABLE.pkl
56 |   actions_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/checkpoints/mountain_car/actions_set_20221226_1814
57 | 
58 | algorithm:
59 |     alpha: 0.9
60 |     epsilon: 0.95
61 |     gamma: 0.9
62 |     epsilon_discount: 0.99995
63 | 


--------------------------------------------------------------------------------
/rl_studio/envs/carla/followlane/utils.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | 
 5 | class AutoCarlaUtils:
 6 |     def __init__(self):
 7 |         self.f1 = None
 8 | 
 9 |     @staticmethod
10 |     def show_image_with_centrals(
11 |         name, img, waitkey, centrals_in_pixels, centrals_normalized, x_row
12 |     ):
13 |         window_name = f"{name}"
14 |         img = np.array(img)
15 | 
16 |         for index, _ in enumerate(x_row):
17 |             cv2.putText(
18 |                 img,
19 |                 str(f"{int(centrals_in_pixels[index])}"),
20 |                 (int(centrals_in_pixels[index]) + 20, int(x_row[index])),
21 |                 cv2.FONT_HERSHEY_SIMPLEX,
22 |                 0.3,
23 |                 (255, 255, 255),
24 |                 1,
25 |                 cv2.LINE_AA,
26 |             )
27 |             cv2.putText(
28 |                 img,
29 |                 str(f"[{centrals_normalized[index]}]"),
30 |                 (320, int(x_row[index])),
31 |                 cv2.FONT_HERSHEY_SIMPLEX,
32 |                 0.3,
33 |                 (255, 255, 255),
34 |                 1,
35 |                 cv2.LINE_AA,
36 |             )
37 |             cv2.line(
38 |                 img,
39 |                 (0, int(x_row[index])),
40 |                 (int(img.shape[1]), int(x_row[index])),
41 |                 color=(10, 10, 10),
42 |                 thickness=1,
43 |             )
44 |             cv2.line(
45 |                 img,
46 |                 (int(img.shape[1] // 2), 0),
47 |                 (int(img.shape[1] // 2), int(img.shape[0])),
48 |                 color=(30, 30, 30),
49 |                 thickness=1,
50 |             )
51 | 
52 |         cv2.imshow(window_name, img)
53 |         cv2.waitKey(waitkey)
54 | 
55 |     @staticmethod
56 |     def show_image(name, img, waitkey):
57 |         window_name = f"{name}"
58 |         cv2.imshow(window_name, img)
59 |         cv2.waitKey(waitkey)
60 | 


--------------------------------------------------------------------------------
/rl_studio/wrappers/time_limit.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from gym import Wrapper, logger
 4 | 
 5 | 
 6 | class TimeLimit(Wrapper):
 7 |     def __init__(self, env, max_episode_seconds=None, max_episode_steps=None):
 8 |         super(TimeLimit, self).__init__(env)
 9 |         self._max_episode_seconds = max_episode_seconds
10 |         self._max_episode_steps = max_episode_steps
11 | 
12 |         self._elapsed_steps = 0
13 |         self._episode_started_at = None
14 | 
15 |     @property
16 |     def _elapsed_seconds(self):
17 |         return time.time() - self._episode_started_at
18 | 
19 |     def _past_limit(self):
20 |         """Return true if we are past our limit"""
21 |         if (
22 |             self._max_episode_steps is not None
23 |             and self._max_episode_steps <= self._elapsed_steps
24 |         ):
25 |             logger.debug("Env has passed the step limit defined by TimeLimit.")
26 |             return True
27 | 
28 |         if (
29 |             self._max_episode_seconds is not None
30 |             and self._max_episode_seconds <= self._elapsed_seconds
31 |         ):
32 |             logger.debug("Env has passed the seconds limit defined by TimeLimit.")
33 |             return True
34 | 
35 |         return False
36 | 
37 |     def step(self, action):
38 |         assert (
39 |             self._episode_started_at is not None
40 |         ), "Cannot call env.step() before calling reset()"
41 |         observation, reward, done, info = self.env.step(action)
42 |         self._elapsed_steps += 1
43 | 
44 |         if self._past_limit():
45 |             if self.metadata.get("semantics.autoreset"):
46 |                 _ = self.reset()  # automatically reset the env
47 |             done = True
48 | 
49 |         return observation, reward, done, info
50 | 
51 |     def reset(self):
52 |         self._episode_started_at = time.time()
53 |         self._elapsed_steps = 0
54 |         return self.env.reset()
55 | 


--------------------------------------------------------------------------------
/rl_studio/algorithms/README.md:
--------------------------------------------------------------------------------
 1 | # Algorithms
 2 | 
 3 | ## Q-learning
 4 | It is based in Watkins, C. J., & Dayan, P. (1992). Q-learning. Machine learning, 8(3), 279-292.
 5 | As a tabular method to solve reinforcement learning tasks, it acts in a particular state getting an inmediate reward, saving all pairs (states, actions) -> rewards in a table.
 6 | Q-learning has been designed to work with low dimensional states and discrete actions. It is one of the canonical RL algorithms with a high performance for low level dimensionality tasks.
 7 | 
 8 | Our implementation of Q-Learning algorithm has two approaches: with table or dictionnary. You can choose any of them through config file. Dictionnary is more efficient in terms of memory size due to its dynamic implementation. Otherwise table option is closer to the Q-learning original approach, developed with numpy library.
 9 | Both have been tested succesfully in different tasks.
10 | 
11 | 
12 | 
13 | 
14 | ---
15 | ## Deep Deterministic Gradient Policy (DDPG)
16 | 
17 | The algorithm is based in LILLICRAP, Timothy P., et al. Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971, 2015.
18 | 
19 | It allows us to work with **multidimensional states** such as raw input from a camera and with **continuous** or **discrete actions** to develop complex projects. Now, it is based on **Tensorflow**, although in the future we will be able to integrate other Deep Learning frameworks.
20 | 
21 | 
22 | ---
23 | 
24 | ## Deep Q Networks (DQN)
25 | 
26 | Based on [Human-level control through deep reinforcement learning whitepaper](https://www.nature.com/articles/nature14236?wm=book_wap_0005), it allows working with **multidimensional states** with Deep Neural Nets and **discrete actions**. Our solution is currently based on Tensorflow framework.
27 | 
28 | 
29 | ---
30 | ## How to config and launch 
31 | If you want to config and training or inferencing, please go to [agents](../agents/README.md) section 


--------------------------------------------------------------------------------
/rl_studio/visual/ascii/images.py:
--------------------------------------------------------------------------------
 1 | """
 2 | References: http://www.patorjk.com/software/taag/#p=display&f=Sub-Zero&t=Aliasrc%20
 3 | """
 4 | 
 5 | JDEROBOT_LOGO = """
 6 |                       ``..--------..``                      
 7 |                  `.-:/::--.........--:/:-.`                 
 8 |               `-:/-.`                  `.-::-`              
 9 |             .:/-`                          `-::-`           
10 |           -/:.                                `:/-`         
11 |         ./:.                                    `:/-        
12 |        :/-       `-:+ossyyyyyso+/-.               ./:`      
13 |      `::`     ./syo/:.``     `.-/+sy+:`            `:/`     
14 |     `::`   `:sy+-                  .:oy+.            :/`    
15 |     :/`   /ys:                        .+yo.          `::    
16 |    ./-  .sy:                            .oy/          ./-   
17 |    :/  -ys.    `.-://++//:-.              /y+          :/   
18 |   `/: .ys`   ./+/-.``  ``.-/+:.            /y+         -/.  
19 |   ./- oy.  .++-`            `-+/.           oy-        ./.  
20 |   ./-.yo  :+:                 `/+-          -y+        ./.  
21 |   `/:-y/ -+:   .-::::-.         /+.         `ys        -/.  
22 |    :/:y/`++  -//-.``.-//-       `++         `ys        :/   
23 |    ./:yo`+: :/.        ./:       ++`        -y+       ./-   
24 |     :/sy:+/./-          -/.      ++         oy.       ::    
25 |     `//yy++//-          -/.     -+:        /y/       :/`    
26 |      `:/yyo++/.        .//     -+/        /y+      `:/.     
27 |       `:/syo++//-.``.-//-    `/+:       .sy:      .::`      
28 |         -/+syo+++/:::-.   `-/+:`      .oy+`     `-/-        
29 |          `-/+syso++//:///+/:.`     ./sy+.     `-/:`         
30 |            `-//osyso+:-.`   `..:/oys+-     `.:/-`           
31 |               .-////++osyyyysso+/-`    `.-:/-.              
32 |                  `.-:/:--..``````...-:/:-.`                 
33 |                       `..---::::---..`                      
34 | """
35 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/real_env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | 
 4 | import gym
 5 | import rospy
 6 | 
 7 | 
 8 | class RealEnv(gym.Env):
 9 |     """Superclass for all Gazebo environments."""
10 | 
11 |     metadata = {"render.models": ["human"]}
12 | 
13 |     def __init__(self):
14 | 
15 |         self.port = "11311"  # str(random_number) #os.environ["ROS_PORT_SIM"]
16 | 
17 |         # #start roscore
18 |         # subprocess.Popen(["roscore", "-p", self.port])
19 |         # time.sleep(1)
20 |         print("Roscore launched!")
21 | 
22 |         # Launch the simulation with the given launchfile name
23 |         rospy.init_node("gym", anonymous=True)
24 | 
25 |     def set_ros_master_uri(self):
26 |         os.environ["ROS_MASTER_URI"] = self.ros_master_uri
27 | 
28 |     def step(self, action):
29 | 
30 |         # Implement this method in every subclass
31 |         # Perform a step in gazebo. E.g. move the robot
32 |         raise NotImplementedError
33 | 
34 |     def reset(self):
35 | 
36 |         # Implemented in subclass
37 |         raise NotImplementedError
38 | 
39 |     def render(self, mode=None, close=False):
40 |         pass
41 | 
42 |     def _render(self, mode=None, close=False):
43 |         self._close()
44 | 
45 |     def _close(self):
46 |         output1 = subprocess.check_call(
47 |             ["cat", "/tmp/myroslaunch_" + self.port + ".pid"]
48 |         )
49 |         output2 = subprocess.check_call(
50 |             ["cat", "/home/erle/.ros/roscore-" + self.port + ".pid"]
51 |         )
52 |         subprocess.Popen(["kill", "-INT", str(output1)])
53 |         subprocess.Popen(["kill", "-INT", str(output2)])
54 | 
55 |     def close(self):
56 |         pass
57 | 
58 |     def _configure(self):
59 | 
60 |         # TODO
61 |         # From OpenAI API: Provides runtime configuration to the enviroment
62 |         # Maybe set the Real Time Factor?
63 |         pass
64 | 
65 |     def _seed(self):
66 | 
67 |         # TODO
68 |         # From OpenAI API: Sets the seed for this env's random number generator(s)
69 |         pass
70 | 


--------------------------------------------------------------------------------
/rl_studio/agents/turtlebot/qlearn.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | 
 4 | class QLearn:
 5 |     def __init__(self, actions, epsilon, alpha, gamma):
 6 |         self.q = {}
 7 |         self.epsilon = epsilon  # exploration constant
 8 |         self.alpha = alpha  # discount constant
 9 |         self.gamma = gamma  # discount factor
10 |         self.actions = actions
11 | 
12 |     def getQ(self, state, action):
13 |         return self.q.get((state, action), 0.0)
14 | 
15 |     def learnQ(self, state, action, reward, value):
16 |         """
17 |         Q-learning:
18 |             Q(s, a) += alpha * (reward(s,a) + max(Q(s') - Q(s,a))
19 |         """
20 |         oldv = self.q.get((state, action), None)
21 |         if oldv is None:
22 |             self.q[(state, action)] = reward
23 |         else:
24 |             self.q[(state, action)] = oldv + self.alpha * (value - oldv)
25 | 
26 |     def chooseAction(self, state, return_q=False):
27 |         q = [self.getQ(state, a) for a in self.actions]
28 |         maxQ = max(q)
29 | 
30 |         if random.random() < self.epsilon:
31 |             minQ = min(q)
32 |             mag = max(abs(minQ), abs(maxQ))
33 |             # add random values to all the actions, recalculate maxQ
34 |             q = [
35 |                 q[i] + random.random() * mag - 0.5 * mag
36 |                 for i in range(len(self.actions))
37 |             ]
38 |             maxQ = max(q)
39 | 
40 |         count = q.count(maxQ)
41 |         # In case there're several state-action max values
42 |         # we select a random one among them
43 |         if count > 1:
44 |             best = [i for i in range(len(self.actions)) if q[i] == maxQ]
45 |             i = random.choice(best)
46 |         else:
47 |             i = q.index(maxQ)
48 | 
49 |         action = self.actions[i]
50 |         if return_q:  # if they want it, give it!
51 |             return action, q
52 |         return action
53 | 
54 |     def learn(self, state1, action1, reward, state2):
55 |         maxqnew = max([self.getQ(state2, a) for a in self.actions])
56 |         self.learnQ(state1, action1, reward, reward + self.gamma * maxqnew)
57 | 


--------------------------------------------------------------------------------
/rl_studio/agents/utilities/plot_ECDF_template.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import matplotlib as pltlib
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | from statsmodels.distributions.empirical_distribution import ECDF
 6 | 
 7 | RUNS = 100
 8 | max_episode_steps = 500
 9 | 
10 | def plot_ecdf(sample, color, label):
11 |     # fit a cdf
12 |     ecdf = ECDF(sample)
13 |     # get cumulative probability for values
14 |     print('P(x<20): %.3f' % ecdf(20))
15 |     print('P(x<40): %.3f' % ecdf(40))
16 |     print('P(x<60): %.3f' % ecdf(60))
17 |     # plot the cdf
18 |     plt.plot(ecdf.x, ecdf.y,  color=color, label=label)
19 | 
20 | if __name__ == "__main__":
21 | 
22 |     pltlib.rcParams.update({'font.size': 15})
23 | 
24 |     rewards_file = open(
25 |         "/rl_studio/logs/cartpole/old_datasets/dqn_analysis/training_with_frequencies/2022-10-20 23:05:26.343167__rewards_rsl-0_rpl-0.2_pi-10.pkl", "rb")
26 |     rewards = pickle.load(rewards_file)
27 |     rewards = np.asarray(rewards)
28 |     plot_ecdf(rewards, 'blue', 'trained with frequency = 0')
29 | 
30 |     rewards_file = open(
31 |         "/rl_studio/logs/cartpole/old_datasets/dqn_analysis/training_with_frequencies/2022-10-20 23:00:41.230018__rewards_rsl-0_rpl-0.2_pi-10.pkl", "rb")
32 |     rewards = pickle.load(rewards_file)
33 |     rewards = np.asarray(rewards)
34 |     plot_ecdf(rewards, 'green', 'trained with frequency = 0.1')
35 | 
36 |     rewards_file = open(
37 |         "/rl_studio/logs/cartpole/old_datasets/dqn_analysis/training_with_frequencies/2022-10-20 23:00:04.352224__rewards_rsl-0_rpl-0.2_pi-10.pkl", "rb")
38 |     rewards = pickle.load(rewards_file)
39 |     rewards = np.asarray(rewards)
40 |     plot_ecdf(rewards, 'orange', 'trained with frequency = 0.2')
41 | 
42 |     rewards_file = open(
43 |         "/rl_studio/logs/cartpole/old_datasets/dqn_analysis/training_with_frequencies/2022-10-20 22:59:30.164014__rewards_rsl-0_rpl-0.2_pi-10.pkl", "rb")
44 |     rewards = pickle.load(rewards_file)
45 |     rewards = np.asarray(rewards)
46 |     plot_ecdf(rewards, 'black', 'trained with frequency = 0.3')
47 | 
48 |     plt.legend()
49 |     plt.ylabel("percentage of runs")
50 |     plt.xlabel("steps")
51 |     plt.show()
52 | 
53 | 


--------------------------------------------------------------------------------
/rl_studio/agents/utilities/plot_reward_function.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | def normalize_range(num, a, b):
 5 |     return (num - a) / (b - a)
 6 | 
 7 | def linear_function(cross_x, slope, x):
 8 |     return cross_x + (slope * x)
 9 | 
10 | def sigmoid_function(start, end, x, slope=10):
11 |     slope = slope / (end - start)
12 |     sigmoid = 1 / (1 + np.exp(-slope * (x - ((start + end) / 2))))
13 |     return sigmoid
14 | 
15 | def reward_proximity(state):
16 |     if abs(state) > 0.7:
17 |         return 0
18 |     else:
19 |         # return 1 - abs(state)
20 |         # return linear_function(1, -1.4, abs(state))
21 |         return pow(1 - abs(state), 4)
22 |         # return 1 - sigmoid_function(0, 1, abs(state), 5)
23 | 
24 | 
25 | def rewards_followline_velocity_center(v, pos, range_v):
26 |     p_reward = reward_proximity(pos)
27 |     v_norm = normalize_range(v, range_v[0], range_v[1])
28 |     v_r = v_norm * pow(p_reward, 2)
29 |     beta_pos = 0.7
30 |     reward = (beta_pos * p_reward) + ((1 - beta_pos) * v_r)
31 |     v_punish = reward * (1 - p_reward) * v_norm
32 |     reward = reward - v_punish
33 | 
34 |     return reward
35 | 
36 | range_v = [0, 10]
37 | 
38 | # Define the ranges for v, w, and pos
39 | v_range = np.linspace(range_v[0], range_v[1], 50)  # Adjust the range as needed
40 | pos_range = np.linspace(-1, 1, 50)  # Adjust the range as needed
41 | 
42 | # Create a grid of values for v, w, and pos
43 | V, POS = np.meshgrid(v_range, pos_range)
44 | 
45 | # Calculate the rewards for each combination of v, w, and pos
46 | rewards = np.empty_like(V)
47 | for i in range(V.shape[0]):
48 |     for j in range(V.shape[1]):
49 |             rewards[i, j] = rewards_followline_velocity_center(V[i, j], POS[i, j], range_v)
50 | 
51 | # Create a 3D plot
52 | fig = plt.figure(figsize=(10, 8))
53 | ax = fig.add_subplot(111, projection='3d')
54 | 
55 | # Plot the 3D surface
56 | surf = ax.plot_surface(POS, V, rewards, cmap='viridis')
57 | 
58 | # Set labels for the axes
59 | ax.set_xlabel('Center Distance (pos)')
60 | ax.set_ylabel('Linear Velocity (v)')
61 | ax.set_zlabel('Reward')
62 | 
63 | # Show the color bar
64 | fig.colorbar(surf, shrink=0.5, aspect=5)
65 | 
66 | plt.show()
67 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_cartpole_qlearn.yaml:
--------------------------------------------------------------------------------
 1 | settings:
 2 |   output_dir: "./logs/"
 3 |   save_model: True
 4 |   save_positions: True
 5 |   telemetry: False
 6 |   logging_level: info
 7 |   mode: training
 8 |   agent: cartpole
 9 |   algorithm: qlearn
10 | 
11 | # TODO make this section optional
12 | actions:
13 |     available_actions:
14 |       simple:
15 | 
16 | agent:
17 |   cartpole:
18 |     # TODO To be removed
19 |     camera_params:
20 |       witdh: 640
21 |     height: 480
22 | 
23 | environments:
24 |     env_name: myCartpole-v0
25 |     environment_folder: cartpole
26 | #    runs: 20000
27 |     angle_bins: 100
28 |     pos_bins: 100
29 |     runs: 4000000
30 |     full_experimentation_runs: 0
31 |     update_every: 10000
32 |     save_every: 10000000
33 |     show_every: 10000
34 |     objective_reward: 500
35 | #    block_experience_batch: False
36 |     block_experience_batch: False
37 |   #  random_start_level: 0.05
38 |     experiments: 1
39 |     random_start_level: 0 # Number between 0 and 1 that indicates the difficulty of the start position
40 |     random_perturbations_level: 0 # Number between 0 and 1 that indicates the frequency of the random perturbations
41 |     random_perturbations_level_step: 0
42 |     perturbations_intensity_std: 0 #Number between 0 and 1 that indicates the standard deviation of perturbations intensity
43 |     perturbations_intensity_std_step: 0
44 |     initial_pole_angle: 0
45 |     initial_pole_angle_steps: 0
46 |     non_recoverable_angle: 0.3 # not applicable when making experiments with init_pole_angle (always 0.2 over the initial)
47 | 
48 | #    previously_trained_agent: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/qlearning/checkpoints/20221116_2001_epsilon_0.01_QTABLE_avg_ 179.357.pkl
49 |     reward_value: 1
50 |     punish: 0
51 |     reward_shaping: 0
52 | 
53 | inference:
54 |     inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/qlearning/checkpoints/old/20221110_0931_epsilon_0.116_406.5153_QTABLE.pkl_avg_.pkl
55 |     actions_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/qlearning/checkpoints/actions_set_20221109_2108
56 | 
57 | algorithm:
58 |     alpha: 0.9
59 |     epsilon: 0.99
60 |     gamma: 0.9
61 |     epsilon_discount: 0.99999997
62 | 


--------------------------------------------------------------------------------
/rl_studio/agents/robot_mesh/manual_pilot.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import sys as system
 3 | import time
 4 | 
 5 | import gym
 6 | 
 7 | 
 8 | class ManualRobotMeshTrainer:
 9 |     def __init__(self, params):
10 |         # TODO: Create a pydantic metaclass to simplify the way we extract the params
11 |         # environment params
12 |         self.environment_params = params.environment["params"]
13 |         self.env_name = params.environment["params"]["env_name"]
14 |         env_params = params.environment["params"]
15 |         actions = params.environment["actions"]
16 |         env_params["actions"] = actions
17 |         self.env = gym.make(self.env_name, **env_params)
18 |         # algorithm params
19 |         self.alpha = params.algorithm["params"]["alpha"]
20 |         self.epsilon = params.algorithm["params"]["epsilon"]
21 |         self.gamma = params.algorithm["params"]["gamma"]
22 | 
23 |     def execute_action(self, env):
24 |         for step in range(50000):
25 | 
26 |             input_order = input("provide action (0-up, 1-right, 2-down, 3-left): ")
27 |             if (
28 |                 input_order == "0"
29 |                 or input_order == "1"
30 |                 or input_order == "2"
31 |                 or input_order == "3"
32 |             ):
33 |                 action = int(input_order)
34 |                 print("Selected Action!! " + str(action))
35 |                 # Execute the action and get feedback
36 |                 next_state, reward, env.done, lap_completed = env.step(action)
37 | 
38 |                 env._flush(force=True)
39 |                 if not env.done:
40 |                     state = next_state
41 |                 else:
42 |                     break
43 |             elif input_order == "q":
44 |                 system.exit(1)
45 |             else:
46 |                 print("wrong action! Try again")
47 | 
48 |     def main(self):
49 | 
50 |         print(f"\t- Start hour: {datetime.datetime.now()}\n")
51 |         print(f"\t- Environment params:\n{self.environment_params}")
52 |         outdir = "./logs/robot_mesh/"
53 |         env = gym.wrappers.Monitor(self.env, outdir, force=True)
54 |         total_episodes = 20000
55 |         env.done = False
56 | 
57 |         for episode in range(total_episodes):
58 |             print("resetting")
59 |             time.sleep(5)
60 |             env.reset()
61 | 
62 |             self.execute_action(env)
63 | 
64 |         env.close()
65 | 


--------------------------------------------------------------------------------
/rl_studio/agents/turtlebot/memory.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Memory:
 7 |     """
 8 |     This class provides an abstraction to store the [s, a, r, a'] elements of each iteration.
 9 |     Instead of using tuples (as other implementations do), the information is stored in lists
10 |     that get returned as another list of dictionaries with each key corresponding to either
11 |     "state", "action", "reward", "nextState" or "isFinal".
12 |     """
13 | 
14 |     def __init__(self, size):
15 |         self.size = size
16 |         self.currentPosition = 0
17 |         self.states = []
18 |         self.actions = []
19 |         self.rewards = []
20 |         self.newStates = []
21 |         self.finals = []
22 | 
23 |     def getMiniBatch(self, size):
24 |         indices = random.sample(
25 |             np.arange(len(self.states)), min(size, len(self.states))
26 |         )
27 |         miniBatch = []
28 |         for index in indices:
29 |             miniBatch.append(
30 |                 {
31 |                     "state": self.states[index],
32 |                     "action": self.actions[index],
33 |                     "reward": self.rewards[index],
34 |                     "newState": self.newStates[index],
35 |                     "isFinal": self.finals[index],
36 |                 }
37 |             )
38 |         return miniBatch
39 | 
40 |     def getCurrentSize(self):
41 |         return len(self.states)
42 | 
43 |     def getMemory(self, index):
44 |         return {
45 |             "state": self.states[index],
46 |             "action": self.actions[index],
47 |             "reward": self.rewards[index],
48 |             "newState": self.newStates[index],
49 |             "isFinal": self.finals[index],
50 |         }
51 | 
52 |     def addMemory(self, state, action, reward, newState, isFinal):
53 |         if self.currentPosition >= self.size - 1:
54 |             self.currentPosition = 0
55 |         if len(self.states) > self.size:
56 |             self.states[self.currentPosition] = state
57 |             self.actions[self.currentPosition] = action
58 |             self.rewards[self.currentPosition] = reward
59 |             self.newStates[self.currentPosition] = newState
60 |             self.finals[self.currentPosition] = isFinal
61 |         else:
62 |             self.states.append(state)
63 |             self.actions.append(action)
64 |             self.rewards.append(reward)
65 |             self.newStates.append(newState)
66 |             self.finals.append(isFinal)
67 | 
68 |         self.currentPosition += 1
69 | 


--------------------------------------------------------------------------------
/rl_studio/algorithms/memory.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Memory:
 7 |     """
 8 |     This class provides an abstraction to store the [s, a, r, a'] elements of each iteration.
 9 |     Instead of using tuples (as other implementations do), the information is stored in lists
10 |     that get returned as another list of dictionaries with each key corresponding to either
11 |     "state", "action", "reward", "nextState" or "isFinal".
12 |     """
13 | 
14 |     def __init__(self, size):
15 |         self.size = size
16 |         self.currentPosition = 0
17 |         self.states = []
18 |         self.actions = []
19 |         self.rewards = []
20 |         self.newStates = []
21 |         self.finals = []
22 | 
23 |     def getMiniBatch(self, size):
24 |         indices = random.sample(
25 |             list(np.arange(len(self.states))), min(size, len(self.states))
26 |         )
27 |         miniBatch = []
28 |         for index in indices:
29 |             miniBatch.append(
30 |                 {
31 |                     "state": self.states[index],
32 |                     "action": self.actions[index],
33 |                     "reward": self.rewards[index],
34 |                     "newState": self.newStates[index],
35 |                     "isFinal": self.finals[index],
36 |                 }
37 |             )
38 |         return miniBatch
39 | 
40 |     def getCurrentSize(self):
41 |         return len(self.states)
42 | 
43 |     def getMemory(self, index):
44 |         return {
45 |             "state": self.states[index],
46 |             "action": self.actions[index],
47 |             "reward": self.rewards[index],
48 |             "newState": self.newStates[index],
49 |             "isFinal": self.finals[index],
50 |         }
51 | 
52 |     def addMemory(self, state, action, reward, newState, isFinal):
53 |         if self.currentPosition >= self.size - 1:
54 |             self.currentPosition = 0
55 |         if len(self.states) > self.size:
56 |             self.states[self.currentPosition] = state
57 |             self.actions[self.currentPosition] = action
58 |             self.rewards[self.currentPosition] = reward
59 |             self.newStates[self.currentPosition] = newState
60 |             self.finals[self.currentPosition] = isFinal
61 |         else:
62 |             self.states.append(state)
63 |             self.actions.append(action)
64 |             self.rewards.append(reward)
65 |             self.newStates.append(newState)
66 |             self.finals.append(isFinal)
67 | 
68 |         self.currentPosition += 1
69 | 


--------------------------------------------------------------------------------
/rl_studio/envs/carla/followlane/reset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from rl_studio.agents.utils import (
 4 |     print_messages,
 5 | )
 6 | from rl_studio.envs.gazebo.f1.models.f1_env import F1Env
 7 | 
 8 | 
 9 | class Reset(F1Env):
10 |     """
11 |     Works for Follow Line and Follow Lane tasks
12 |     """
13 | 
14 |     def reset_f1_state_image(self):
15 |         """
16 |         reset for
17 |         - State: Image
18 |         - tasks: FollowLane and FollowLine
19 |         """
20 |         self._gazebo_reset()
21 |         # === POSE ===
22 |         if self.alternate_pose:
23 |             self._gazebo_set_random_pose_f1_follow_rigth_lane()
24 |         else:
25 |             self._gazebo_set_fix_pose_f1_follow_right_lane()
26 | 
27 |         self._gazebo_unpause()
28 | 
29 |         ##==== get image from sensor camera
30 |         f1_image_camera, _ = self.f1gazeboimages.get_camera_info()
31 |         self._gazebo_pause()
32 | 
33 |         ##==== calculating State
34 |         # image as observation
35 |         state = np.array(
36 |             self.f1gazeboimages.image_preprocessing_black_white_32x32(
37 |                 f1_image_camera.data, self.height
38 |             )
39 |         )
40 |         state_size = state.shape
41 | 
42 |         return state, state_size
43 | 
44 |     def reset_f1_state_sp(self):
45 |         """
46 |         reset for
47 |         - State: Simplified perception
48 |         - tasks: FollowLane and FollowLine
49 |         """
50 |         self._gazebo_reset()
51 |         # === POSE ===
52 |         if self.alternate_pose:
53 |             self._gazebo_set_random_pose_f1_follow_rigth_lane()
54 |         else:
55 |             self._gazebo_set_fix_pose_f1_follow_right_lane()
56 | 
57 |         self._gazebo_unpause()
58 | 
59 |         ##==== get image from sensor camera
60 |         f1_image_camera, _ = self.f1gazeboimages.get_camera_info()
61 |         self._gazebo_pause()
62 | 
63 |         ##==== calculating State
64 |         # simplified perception as observation
65 |         centrals_in_pixels, _ = self.simplifiedperception.calculate_centrals_lane(
66 |             f1_image_camera.data,
67 |             self.height,
68 |             self.width,
69 |             self.x_row,
70 |             self.lower_limit,
71 |             self.center_image,
72 |         )
73 |         states = self.simplifiedperception.calculate_observation(
74 |             centrals_in_pixels, self.center_image, self.pixel_region
75 |         )
76 |         state = [states[0]]
77 |         state_size = len(state)
78 | 
79 |         return state, state_size
80 | 


--------------------------------------------------------------------------------
/rl_studio/visual/ascii/text.py:
--------------------------------------------------------------------------------
 1 | """
 2 | References: http://www.patorjk.com/software/taag/#p=display&f=Sub-Zero&t=Aliasrc%20
 3 | """
 4 | 
 5 | LETS_GO = """
 6 |    ______      __
 7 |   / ____/___  / /
 8 |  / / __/ __ \/ /
 9 | / /_/ / /_/ /_/
10 | \____/\____(_)
11 | """
12 | 
13 | QLEARN_CAMERA = """
14 |    ___  _                                  ____
15 |   / _ \| | ___  __ _ _ __ _ __            / ___|__ _ _ __ ___   ___ _ __ __ _
16 |  | | | | |/ _ \/ _` | '__| '_ \   _____  | |   / _` | '_ ` _ \ / _ \ '__/ _` |
17 |  | |_| | |  __/ (_| | |  | | | | |_____| | |__| (_| | | | | | |  __/ | | (_| |
18 |   \__\_\_|\___|\__,_|_|  |_| |_|          \____\__,_|_| |_| |_|\___|_|  \__,_|
19 | 
20 | """
21 | 
22 | JDEROBOT = """
23 |    ___     _     ______      _           _
24 |   |_  |   | |    | ___ \    | |         | |
25 |     | | __| | ___| |_/ /___ | |__   ___ | |_
26 |     | |/ _` |/ _ \    // _ \| '_ \ / _ \| __|
27 | /\__/ / (_| |  __/ |\ \ (_) | |_) | (_) | |_
28 | \____/ \__,_|\___\_| \_\___/|_.__/ \___/ \__|
29 | 
30 | """
31 | 
32 | END_OF_TRAIN = """
33 |   _____          _       _                                         _      _           _
34 |  |_   _| __ __ _(_)_ __ (_)_ __   __ _    ___ ___  _ __ ___  _ __ | | ___| |_ ___  __| |
35 |    | || '__/ _` | | '_ \| | '_ \ / _` |  / __/ _ \| '_ ` _ \| '_ \| |/ _ \ __/ _ \/ _` |
36 |    | || | | (_| | | | | | | | | | (_| | | (_| (_) | | | | | | |_) | |  __/ ||  __/ (_| |
37 |    |_||_|  \__,_|_|_| |_|_|_| |_|\__, |  \___\___/|_| |_| |_| .__/|_|\___|\__\___|\__,_|
38 |                                  |___/                      |_|
39 | """
40 | 
41 | RACE_COMPLETED = """
42 | ______                                           _      _           _
43 | | ___ \                                         | |    | |         | |
44 | | |_/ /__ _  ___ ___    ___ ___  _ __ ___  _ __ | | ___| |_ ___  __| |
45 | |    // _` |/ __/ _ \  / __/ _ \| '_ ` _ \| '_ \| |/ _ \ __/ _ \/ _` |
46 | | |\ \ (_| | (_|  __/ | (_| (_) | | | | | | |_) | |  __/ ||  __/ (_| |
47 | \_| \_\__,_|\___\___|  \___\___/|_| |_| |_| .__/|_|\___|\__\___|\__,_|
48 |                                           | |
49 |                                           |_|
50 | """
51 | 
52 | MANUAL_PILOT = """
53 | ___  ___                        _  ______ _ _       _
54 | |  \/  |                       | | | ___ (_) |     | |
55 | | .  . | __ _ _ __  _   _  __ _| | | |_/ /_| | ___ | |_
56 | | |\/| |/ _` | '_ \| | | |/ _` | | |  __/| | |/ _ \| __|
57 | | |  | | (_| | | | | |_| | (_| | | | |   | | | (_) | |_
58 | \_|  |_/\__,_|_| |_|\__,_|\__,_|_| \_|   |_|_|\___/ \__|
59 | 
60 | """
61 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/f1/models/settings.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | 
 3 | from rl_studio.envs.gazebo.f1.image_f1 import ImageF1, ListenerCamera
 4 | from rl_studio.envs.gazebo.f1.models.images import F1GazeboImages
 5 | from rl_studio.envs.gazebo.f1.models.utils import F1GazeboUtils
 6 | from rl_studio.envs.gazebo.f1.models.rewards import F1GazeboRewards
 7 | from rl_studio.envs.gazebo.f1.models.simplified_perception import (
 8 |     F1GazeboSimplifiedPerception,
 9 | )
10 | 
11 | 
12 | class F1GazeboTFConfig(BaseModel):
13 |     def __init__(self, **config):
14 |         self.simplifiedperception = F1GazeboSimplifiedPerception()
15 |         self.f1gazeborewards = F1GazeboRewards(**config)
16 |         self.f1gazeboutils = F1GazeboUtils()
17 |         self.f1gazeboimages = F1GazeboImages()
18 | 
19 |         self.image = ImageF1()
20 |         #self.image = ListenerCamera("/F1ROS/cameraL/image_raw")
21 |         self.image_raw_from_topic = None
22 |         self.f1_image_camera = None
23 |         self.sensor = config["sensor"]
24 | 
25 |         # Image
26 |         self.image_resizing = config["image_resizing"] / 100
27 |         self.new_image_size = config["new_image_size"]
28 |         self.raw_image = config["raw_image"]
29 |         self.height = int(config["height_image"] * self.image_resizing)
30 |         self.width = int(config["width_image"] * self.image_resizing)
31 |         self.center_image = int(config["center_image"] * self.image_resizing)
32 |         self.num_regions = config["num_regions"]
33 |         self.pixel_region = int(self.center_image / self.num_regions) * 2
34 |         self.telemetry_mask = config["telemetry_mask"]
35 |         self.poi = config["x_row"][0]
36 |         self.image_center = None
37 |         self.right_lane_center_image = config["center_image"] + (
38 |             config["center_image"] // 2
39 |         )
40 |         self.lower_limit = config["lower_limit"]
41 | 
42 |         # States
43 |         self.state_space = config["states"]
44 |         if self.state_space == "spn":
45 |             self.x_row = [i for i in range(1, int(self.height / 2) - 1)]
46 |         else:
47 |             self.x_row = config["x_row"]
48 | 
49 |         # Actions TODO incompatible with baselines way to go
50 |         self.action_space = config["action_space"]
51 |         self.actions = config["actions"]
52 | 
53 |         # Rewards
54 |         self.reward_function = config["reward_function"]
55 |         self.rewards = config["rewards"]
56 |         self.min_reward = config["min_reward"]
57 | 
58 |         # Others
59 |         self.telemetry = config["telemetry"]
60 | 


--------------------------------------------------------------------------------
/rl_studio/installation/setup_noetic.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source /opt/ros/noetic/setup.bash
 4 | 
 5 | # --- CustomRobots ---------------------------------------
 6 | DIR="../CustomRobots"
 7 | if ! [[ -d "$DIR" ]]
 8 | then
 9 |     echo "$DIR doesn't exists. Cloning CustomRobots repository."
10 |     git clone --branch noetic-devel https://github.com/JdeRobot/CustomRobots.git $DIR
11 | else
12 |     echo "CustomRobots is already downloaded. Pass."
13 | fi
14 | 
15 | if [ -z "$ROS_DISTRO" ]; then
16 |   echo "ROS not installed. Check the installation steps: https://github.com/erlerobot/gym#installing-the-gazebo-environment"
17 | fi
18 | 
19 | # --- Gazebo --------------------------------------------
20 | program="gazebo"
21 | condition=$(which $program 2>/dev/null | grep -v "not found" | wc -l)
22 | if [ $condition -eq 0 ] ; then
23 |     echo "Gazebo is not installed. Check the installation steps: https://github.com/erlerobot/gym#installing-the-gazebo-environment"
24 | fi
25 | 
26 | # --- Create catkin_ws -----------------------------------
27 | ws="catkin_ws"
28 | if [ -d $ws ]; then
29 |   echo "Error: catkin_ws directory already exists" 1>&2
30 | fi
31 | src=$ws"/src"
32 | mkdir -p $src
33 | cd $src
34 | catkin_init_workspace
35 | 
36 | # Import and build dependencies
37 | cd ../catkin_ws/src/
38 | vcs import < ../gazebo_ros_noetic.repos
39 | 
40 | cd ..
41 | touch catkin_ws/src/ecl_navigation/ecl_mobile_robot/CATKIN_IGNORE
42 | catkin_make
43 | bash -c 'echo source `pwd`/devel/setup.bash >> ~/.bashrc'
44 | 
45 | echo "## ROS workspace compiled ##"
46 | 
47 | # --- Adding Environment variables -----------------
48 | # --- BASH
49 | if [ -z "$GAZEBO_MODEL_PATH" ]; then
50 |   bash -c 'echo "export GAZEBO_MODEL_PATH="`pwd`/../CustomRobots/f1/models >> ~/.bashrc'
51 | else
52 |   printf "GAZEBO_MODEL_PATH env variable already exists in your .bashrc."
53 | fi
54 | 
55 | if [ -z "$GAZEBO_RESOURCE_PATH" ]; then
56 |   bash -c 'echo "export GAZEBO_RESOURCE_PATH=`pwd`/../CustomRobots/f1/worlds >> ~/.bashrc'
57 | else
58 |   printf "GAZEBO_RESOURCE_PATH env variable already exist in your .bashrc\n"
59 | fi
60 | 
61 | # --- ZSH
62 | read -p "Do you use zsh? [Y/n]: " zsh
63 | if [ -n "$!zsh" ]; then
64 |   zsh -c 'echo "export GAZEBO_MODEL_PATH="`pwd`/../CustomRobots/f1/models >> ~/.zshrc'
65 |   zsh -c 'echo "export GAZEBO_RESOURCE_PATH="`pwd`/../CustomRobots/f1/worlds >> ~/.zshrc'
66 |   zsh -c 'echo source `pwd`/devel/setup.zsh >> ~/.zshrc'
67 |   printf ""
68 |   printf "\nRestarting zsh terminal . . . \n"
69 |   exec zsh
70 | else
71 |   # --- Restarting Bash
72 |   printf ""
73 |   printf "\nRestarting the terminal . . .\n"
74 |   exec bash
75 | fi
76 | 
77 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/autoparking/image_f1.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | from rl_studio.agents.f1.settings import QLearnConfig
 5 | 
 6 | 
 7 | class ImageF1:
 8 | 
 9 |     font = cv2.FONT_HERSHEY_COMPLEX
10 | 
11 |     def __init__(self):
12 |         self.height = 3  # Image height [pixels]
13 |         self.width = 3  # Image width [pixels]
14 |         self.timeStamp = 0  # Time stamp [s] */
15 |         self.format = ""  # Image format string (RGB8, BGR,...)
16 |         self.data = np.zeros(
17 |             (self.height, self.width, 3), np.uint8
18 |         )  # The image data itself
19 |         self.data.shape = self.height, self.width, 3
20 |         self.config = QLearnConfig()
21 | 
22 |     def __str__(self):
23 |         return (
24 |             f"Image:"
25 |             f"\nHeight: {self.height}\nWidth: {self.width}\n"
26 |             f"Format: {self.format}\nTimeStamp: {self.timeStamp}\nData: {self.data}"
27 |         )
28 | 
29 |     @staticmethod
30 |     def image_msg_to_image(img, cv_image):
31 | 
32 |         img.width = img.width
33 |         img.height = img.height
34 |         img.format = "RGB8"
35 |         img.timeStamp = img.header.stamp.secs + (img.header.stamp.nsecs * 1e-9)
36 |         img.data = cv_image
37 | 
38 |         return img
39 | 
40 |     def show_telemetry(self, img, points, action, reward):
41 |         count = 0
42 |         for idx, point in enumerate(points):
43 |             cv2.line(
44 |                 img,
45 |                 (320, self.config.x_row[idx]),
46 |                 (320, self.config.x_row[idx]),
47 |                 (255, 255, 0),
48 |                 thickness=5,
49 |             )
50 |             # cv2.line(img, (center_image, x_row[idx]), (point, x_row[idx]), (255, 255, 255), thickness=2)
51 |             cv2.putText(
52 |                 img,
53 |                 str("err{}: {}".format(idx + 1, self.config.center_image - point)),
54 |                 (18, 340 + count),
55 |                 self.font,
56 |                 0.4,
57 |                 (255, 255, 255),
58 |                 1,
59 |             )
60 |             count += 20
61 |         cv2.putText(
62 |             img,
63 |             str(f"action: {action}"),
64 |             (18, 280),
65 |             self,
66 |             0.4,
67 |             (255, 255, 255),
68 |             1,
69 |         )
70 |         cv2.putText(
71 |             img,
72 |             str(f"reward: {reward}"),
73 |             (18, 320),
74 |             self,
75 |             0.4,
76 |             (255, 255, 255),
77 |             1,
78 |         )
79 | 
80 |         cv2.imshow("Image window", img[240:])
81 |         cv2.waitKey(3)
82 | 


--------------------------------------------------------------------------------
/rl_studio/agents/utilities/plot_multiple_graphs_template.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import matplotlib as pltlib
 4 | import matplotlib.pyplot as plt
 5 | import numpy as np
 6 | 
 7 | RUNS = 20000
 8 | max_episode_steps = 500
 9 | 
10 | if __name__ == "__main__":
11 |     pltlib.rcParams.update({'font.size': 15})
12 | 
13 |     fig, ax1 = plt.subplots()
14 | 
15 |     RUNS = 4000000
16 | 
17 |     rewards_file = open(
18 |         "/home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/qlearning/training/2023-01-20 18:41:16.873044__rewards_.pkl", "rb")
19 |     rewards = pickle.load(rewards_file)
20 |     rewards = np.asarray(rewards)
21 |     ax1.set_xscale('log')
22 |     ax1.plot(range(RUNS), rewards, color='purple', label='qlearning')
23 | 
24 |     RUNS = 20000
25 | 
26 |     rewards_file = open(
27 |         "/home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/dqn/training/2023-01-20 02:50:09.991537__rewards_rsl-0_rpl-0_pi-1.pkl", "rb")
28 |     rewards = pickle.load(rewards_file)
29 |     rewards = np.asarray(rewards)
30 |     ax1.set_xscale('log')
31 |     ax1.plot(range(RUNS), rewards, color='pink', label='dqn')
32 | 
33 |     RUNS = 10000
34 | 
35 |     rewards_file = open(
36 |         "/home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/ddpg/training/2023-01-14 03:15:07.136008__rewards_rsl-0_rpl-0.1_pi-1.pkl", "rb")
37 |     rewards = pickle.load(rewards_file)
38 |     rewards = np.asarray(rewards)
39 |     ax1.set_xscale('log')
40 |     ax1.plot(range(RUNS), rewards, color='brown', label='ddpg')
41 | 
42 |     RUNS = 1000
43 | 
44 |     rewards_file = open(
45 |         "/home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/ppo_continuous/training/2023-01-11 21:25:12.509340__rewards_rsl-0_rpl-0_pi-0.pkl", "rb")
46 |     rewards = pickle.load(rewards_file)
47 |     rewards = np.asarray(rewards)
48 |     ax1.set_xscale('log')
49 |     ax1.plot(range(RUNS), rewards, color='black', label='ppo_continuous')
50 | 
51 |     RUNS = 1000
52 | 
53 |     rewards_file = open(
54 |         "/home/ruben/Desktop/my-RL-Studio/rl_studio/logs/cartpole/ppo/training/2023-01-11 21:30:20.951490__rewards_rsl-0.2_rpl-0_pi-0.pkl", "rb")
55 |     rewards = pickle.load(rewards_file)
56 |     rewards = np.asarray(rewards)
57 |     ax1.set_xscale('log')
58 |     ax1.plot(range(RUNS), rewards, color='green', label='ppo')
59 |     fig.canvas.manager.full_screen_toggle()
60 | 
61 |     plt.legend()
62 |     plt.ylabel("steps")
63 |     plt.xlabel("runs")
64 |     plt.show()
65 | 
66 |     base_path = '/home/ruben/Desktop/2020-phd-ruben-lucas/docs/assets/images/results_images/cartpole/solidityExperiments/refinement/refinementOfRefinement/'
67 |     ax1.figure.savefig(base_path + 'trainings.png')
68 | 
69 | 


--------------------------------------------------------------------------------
/rl_studio/agents/mountain_car/manual_pilot.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import sys as system
 3 | import time
 4 | 
 5 | import gym
 6 | 
 7 | import utils
 8 | from rl_studio.agents.f1 import settings as settings
 9 | 
10 | 
11 | class ManualMountainCarTrainer:
12 |     def __init__(self, params):
13 |         # TODO: Create a pydantic metaclass to simplify the way we extract the params
14 |         # environment params
15 |         self.environment_params = params.environment["params"]
16 |         self.env_name = params.environment["params"]["env_name"]
17 |         env_params = params.environment["params"]
18 |         actions = params.environment["actions"]
19 |         env_params["actions"] = actions
20 |         self.env = gym.make(self.env_name, **env_params)
21 |         # algorithm params
22 |         self.alpha = params.algorithm["params"]["alpha"]
23 |         self.epsilon = params.algorithm["params"]["epsilon"]
24 |         self.gamma = params.algorithm["params"]["gamma"]
25 |         self.rewards_per_run = [0, 0]
26 | 
27 |     def execute_steps(self):
28 |         highest_reward = 0
29 |         cumulated_reward = 0
30 |         for step in range(50000):
31 | 
32 |             inpt = input("provide action (0-none, 1-left, 2-right): ")
33 |             if inpt == "1" or inpt == "2" or inpt == "0":
34 |                 action = int(inpt)
35 |                 print("Selected Action!! " + str(action))
36 |                 # Execute the action and get feedback
37 |                 nextState, reward, self.envdone, lap_completed = self.envstep(action)
38 |                 cumulated_reward += reward
39 | 
40 |                 if highest_reward < cumulated_reward:
41 |                     highest_reward = cumulated_reward
42 |                 print("cumulated_reward = " + str(cumulated_reward))
43 |                 self.env_flush(force=True)
44 |                 if self.envdone:
45 |                     break
46 |             elif inpt == "q":
47 |                 system.exit(1)
48 |             else:
49 |                 nextState, reward, self.envdone, lap_completed = self.envstep(-1)
50 |                 print("wrong action! Try again")
51 |                 break
52 | 
53 |             self.rewards_per_run.append(cumulated_reward)
54 | 
55 |     def main(self):
56 | 
57 |         print(settings.title)
58 |         print(settings.description)
59 |         print(f"\t- Start hour: {datetime.datetime.now()}")
60 | 
61 |         total_episodes = 20000
62 |         self.env.done = False
63 | 
64 |         utils.get_stats_figure(self.rewards_per_run)
65 | 
66 |         for episode in range(total_episodes):
67 |             print("resetting")
68 |             time.sleep(5)
69 |             self.env.reset()
70 | 
71 |             self.execute_steps()
72 | 
73 |         self.env.close()
74 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_robot_mesh_qlearn.yaml:
--------------------------------------------------------------------------------
 1 | settings:
 2 |   output_dir: "./logs/"
 3 |   save_model: True
 4 |   save_positions: True
 5 |   telemetry: False
 6 |   mode: inference
 7 |   agent: robot_mesh
 8 |   algorithm: qlearn
 9 | 
10 | actions:
11 |     0: [ 0, 0, -1, 0 ]
12 |     1: [ 0, 0, 0.7, 0.7]
13 |     2: [ 0, 0, 0, -1]
14 |     3: [0, 0, -0.7, 0.7]
15 | 
16 | agent:
17 |   camera_params:
18 |     witdh: 640
19 |     height: 480
20 | 
21 | environments:
22 | #  simple:
23 | #    env_name: RobotMeshEnv-v0
24 | #    circuit_name: simple
25 | #    training_type: qlearn_camera
26 | #    launchfile: my_simple_world.launch
27 | #    environment_folder: robot_mesh
28 | #    robot_name: my_robot
29 | #    start_pose: 1, 2
30 | #    alternate_pose: True
31 | #    estimated_steps: 4000
32 | #    sensor: camera
33 | #    circuit_positions_set: #  x, y, z, roll, pith, ???. yaw
34 | #      - [ 0, 53.462, -41.988, 0.004, 0, 0, 1.57, -1.57 ]
35 | #      - [ 1, 53.462, -8.734, 0.004, 0, 0, 1.57, -1.57 ]
36 | #      - [ 2, 39.712, -30.741, 0.004, 0, 0, 1.56, 1.56 ]
37 | #      - [ 3, -6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613 ]
38 | #      - [ 4, 20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383 ]
39 | #    actions_force: 18
40 | #    boot_on_crash: False
41 | #    goal_x: -6.5
42 | #    goal_y: 14
43 | #    pos_x: 6.4
44 | #    pos_y: -5
45 | #    pos_z: 0.3
46 |   env_name: RobotMeshEnv-v0
47 |   circuit_name: complex
48 |   training_type: qlearn_camera
49 |   launchfile: my_complex_world.launch
50 |   environment_folder: robot_mesh
51 |   robot_name: my_robot
52 |   start_pose: 0
53 |   alternate_pose: True
54 |   estimated_steps: 4000
55 |   sensor: camera
56 |   circuit_positions_set: #  x, y, z, roll, pith, ???. yaw
57 |     - [ 0, 53.462, -41.988, 0.004, 0, 0, 1.57, -1.57]
58 |     - [ 1, 53.462, -8.734, 0.004, 0, 0, 1.57, -1.57 ]
59 |     - [ 2, 39.712, -30.741, 0.004, 0, 0, 1.56, 1.56 ]
60 |     - [ 3, -6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613 ]
61 |     - [ 4, 20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383 ]
62 |   actions_force: 24
63 |   boot_on_crash: False
64 |   goal_x: -100
65 |   goal_y: 14
66 |   pos_x: 17
67 |   pos_y: -13
68 |   pos_z: 0.3
69 |   # manual:
70 |   #   env_name: F1Env-v0
71 |   #   circuit_name: manual
72 |   #   training_type: qlearn_camera
73 |   #   launch: simple_circuit.launch
74 |   #   start_pose: 0
75 |   #   alternate_pose: False
76 |   #   estimated_steps: 4000
77 |   #   sensor: camera
78 | 
79 | inference:
80 |   inference_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/checkpoints/robot_mesh/1_20221227_0306_epsilon_0.05_QTABLE.pkl
81 |   actions_file: /home/ruben/Desktop/my-RL-Studio/rl_studio/checkpoints/robot_mesh/actions_set_20221227_0657
82 | 
83 | algorithm:
84 |     alpha: 0.2
85 |     epsilon: 0.95
86 |     gamma: 0.9
87 | 


--------------------------------------------------------------------------------
/rl_studio/algorithms/qlearn_multiple_states.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import pickle
 3 | import random
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | class QLearn:
 9 |     def __init__(self, actions, epsilon=0.99, alpha=0.8, gamma=0.9):
10 |         self.q = {}
11 |         self.epsilon = epsilon  # exploration constant
12 |         self.alpha = alpha  # discount constant
13 |         self.gamma = gamma  # discount factor
14 |         self.actions = actions
15 | 
16 |     def getQValues(self, state, action):
17 |         state = state if isinstance(state, collections.abc.Sequence) else [state]
18 |         return self.q.get(tuple(state) + (action,), 0.0)
19 | 
20 |     def selectAction(self, state, return_q=False):
21 |         q = [self.getQValues(state, a) for a in self.actions]
22 |         maxQ = max(q)
23 | 
24 |         if random.random() < self.epsilon:
25 |             return random.choice(list(self.actions))
26 | 
27 |         count = q.count(maxQ)
28 |         # In case there're several state-action max values
29 |         # we select a random one among them
30 |         if count > 1:
31 |             best = [i for i in range(len(self.actions)) if q[i] == maxQ]
32 |             i = random.choice(best)
33 |         else:
34 |             i = q.index(maxQ)
35 | 
36 |         action = i
37 |         if return_q:  # if they want it, give it!
38 |             return action, q
39 |         return action
40 | 
41 |     def learn(self, state1, action1, reward, state2, done):
42 |         maxqnew = max([self.getQValues(state2, a) for a in self.actions])
43 | 
44 |         if done == True:
45 |             self.q[(tuple(state1) + (action1,))] = reward
46 |             q_update = self.alpha * (reward - self.getQValues(state1, action1))
47 |         else:
48 |             q_update = self.alpha * (
49 |                 reward + self.gamma * maxqnew - self.getQValues(state1, action1)
50 |             )
51 |         self.q[(tuple(state1) + (action1,))] = (
52 |             self.getQValues(state1, action1) + q_update
53 |         )
54 | 
55 |     def reset(self):
56 |         self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
57 |         self.steps_beyond_done = None
58 |         return np.array(self.state)
59 | 
60 |     def inference(self, state, return_q=False):
61 |         return self.selectAction(state, return_q)
62 | 
63 |     def load_model(self, file_path, actions):
64 | 
65 |         qlearn_file = open(file_path, "rb")
66 | 
67 |         self.q = pickle.load(qlearn_file)
68 |         # TODO it may be possible to infer the actions from the model. I don know enough to assume that for every algorithm
69 |         self.actions = actions
70 |         print(f"\n\nMODEL LOADED.")
71 |         print(f"    - Loading:    {file_path}")
72 |         print(f"    - Model size: {len(self.q)}")
73 | 


--------------------------------------------------------------------------------
/rl_studio/agents/cartpole/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==1.3.0
  2 | #actionlib==1.13.2
  3 | #angles==1.9.13
  4 | appdirs==1.4.4
  5 | cachetools==5.2.0
  6 | #camera-calibration-parsers==1.12.0
  7 | #catkin==0.8.10
  8 | #catkin-pkg==0.4.23
  9 | certifi==2021.5.30
 10 | cfgv==3.3.0
 11 | chardet==4.0.0
 12 | cloudpickle==1.6.0
 13 | #controller-manager==0.19.5
 14 | #controller-manager-msgs==0.19.5
 15 | #cv-bridge==1.16.0
 16 | cycler==0.10.0
 17 | decorator==4.4.2
 18 | defusedxml==0.6.0
 19 | #diagnostic-updater==1.11.0
 20 | distlib==0.3.2
 21 | distro==1.5.0
 22 | docutils==0.17.1
 23 | #dynamic-reconfigure==1.7.3
 24 | environs==9.2.0
 25 | filelock==3.0.12
 26 | future==0.18.2
 27 | #gazebo_plugins==2.9.2
 28 | #gazebo_ros==2.9.2
 29 | #gencpp==0.6.5
 30 | #geneus==3.0.0
 31 | #genlisp==0.4.18
 32 | #genmsg==0.5.16
 33 | #gennodejs==2.0.2
 34 | #genpy==0.6.15
 35 | #google-auth==2.15.0
 36 | #google-auth-oauthlib==0.4.6
 37 | #grpcio==1.51.1
 38 | gym==0.25.0
 39 | gym-notices==0.0.8
 40 | gymnasium-notices==0.0.1
 41 | identify==2.2.11
 42 | idna==2.10
 43 | imageio==2.9.0
 44 | importlib-metadata==5.2.0
 45 | #jax-jumpy==0.2.0
 46 | #kiwisolver==1.3.1
 47 | Markdown==3.4.1
 48 | MarkupSafe==2.1.1
 49 | marshmallow==3.12.2
 50 | matplotlib==3.3.2
 51 | #message-filters==1.15.14
 52 | #netifaces==0.10.9
 53 | #networkx==2.5.1
 54 | #nodeenv==1.6.0
 55 | numpy==1.18.5
 56 | #oauthlib==3.2.2
 57 | opencv-python==4.2.0.32
 58 | pandas==1.4.4
 59 | Pillow==8.3.1
 60 | #pre-commit==2.13.0
 61 | #protobuf==3.20.3
 62 | py-markdown-table==0.3.3
 63 | #pyasn1==0.4.8
 64 | #pyasn1-modules==0.2.8
 65 | pydantic==1.10.3
 66 | pygame==2.1.0
 67 | Pygments==2.14.00
 68 | pyglet==1.5.0
 69 | pyparsing==2.4.7
 70 | python-dateutil==2.8.1
 71 | python-dotenv==0.18.0
 72 | pytz==2022.7
 73 | #PyWavelets==1.1.1
 74 | PyYAML==5.4.1
 75 | requests==2.25.1
 76 | requests-oauthlib==1.3.1
 77 | #rosbag==1.15.14
 78 | #rosclean==1.15.8
 79 | #rosgraph==1.15.14
 80 | #roslaunch==1.15.14
 81 | #roslib==1.15.8
 82 | #roslz4==1.15.14
 83 | #rosmaster==1.15.14
 84 | #rosmsg==1.15.14
 85 | #rosnode==1.15.14
 86 | #rosparam==1.15.14
 87 | #rospkg==1.2.8
 88 | #rospy==1.15.14
 89 | #rosservice==1.15.14
 90 | #rostest==1.15.14
 91 | #rostopic==1.15.14
 92 | #rosunit==1.15.8
 93 | #roswtf==1.15.14
 94 | rsa==4.9
 95 | scikit-image==0.17.2
 96 | scipy==1.6.1
 97 | #sensor-msgs==1.13.1
 98 | #Shimmy==0.2.0
 99 | #six==1.14.0
100 | #smclib==1.8.6
101 | tensorboard==2.11.0
102 | tensorboard-data-server==0.6.1
103 | tensorboard-plugin-wit==1.8.1
104 | #tf==1.13.2
105 | #tf2-py==0.7.5
106 | #tf2-ros==0.7.5
107 | tifffile==2021.7.2
108 | toml==0.10.2
109 | #topic-tools==1.15.14
110 | #torch==1.12.1+cu113
111 | torch==1.12.1
112 | tqdm==4.64.0
113 | typing_extensions==4.4.0
114 | urllib3==1.26.6
115 | virtualenv==20.4.7
116 | Werkzeug==2.2.2
117 | zipp==3.11.0


--------------------------------------------------------------------------------
/rl_studio/envs/carla/utils/weather.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def clamp(value, minimum=0.0, maximum=100.0):
 4 |     return max(minimum, min(value, maximum))
 5 | 
 6 | 
 7 | class Sun(object):
 8 |     def __init__(self, azimuth, altitude):
 9 |         self.azimuth = azimuth
10 |         self.altitude = altitude
11 |         self._t = 0.0
12 | 
13 |     def tick(self, delta_seconds):
14 |         self._t += 0.008 * delta_seconds
15 |         self._t %= 2.0 * math.pi
16 |         self.azimuth += 0.25 * delta_seconds
17 |         self.azimuth %= 360.0
18 |         self.altitude = (70 * math.sin(self._t)) - 20
19 | 
20 |     def __str__(self):
21 |         return 'Sun(alt: %.2f, azm: %.2f)' % (self.altitude, self.azimuth)
22 | 
23 | 
24 | class Storm(object):
25 |     def __init__(self, precipitation):
26 |         self._t = precipitation if precipitation > 0.0 else -50.0
27 |         self._increasing = True
28 |         self.clouds = 0.0
29 |         self.rain = 0.0
30 |         self.wetness = 0.0
31 |         self.puddles = 0.0
32 |         self.wind = 0.0
33 |         self.fog = 0.0
34 | 
35 |     def tick(self, delta_seconds):
36 |         delta = (1.3 if self._increasing else -1.3) * delta_seconds
37 |         self._t = clamp(delta + self._t, -250.0, 100.0)
38 |         self.clouds = clamp(self._t + 40.0, 0.0, 90.0)
39 |         self.rain = clamp(self._t, 0.0, 80.0)
40 |         delay = -10.0 if self._increasing else 90.0
41 |         self.puddles = clamp(self._t + delay, 0.0, 85.0)
42 |         self.wetness = clamp(self._t * 5, 0.0, 100.0)
43 |         self.wind = 5.0 if self.clouds <= 20 else 90 if self.clouds >= 70 else 40
44 |         self.fog = clamp(self._t - 10, 0.0, 30.0)
45 |         if self._t == -250.0:
46 |             self._increasing = True
47 |         if self._t == 100.0:
48 |             self._increasing = False
49 | 
50 |     def __str__(self):
51 |         return 'Storm(clouds=%d%%, rain=%d%%, wind=%d%%)' % (self.clouds, self.rain, self.wind)
52 | 
53 | 
54 | class Weather(object):
55 |     def __init__(self, weather):
56 |         self.weather = weather
57 |         self._sun = Sun(weather.sun_azimuth_angle, weather.sun_altitude_angle)
58 |         self._storm = Storm(weather.precipitation)
59 | 
60 |     def tick(self, delta_seconds):
61 |         self._sun.tick(delta_seconds)
62 |         self._storm.tick(delta_seconds)
63 |         self.weather.cloudiness = self._storm.clouds
64 |         self.weather.precipitation = self._storm.rain
65 |         self.weather.precipitation_deposits = self._storm.puddles
66 |         self.weather.wind_intensity = self._storm.wind
67 |         self.weather.fog_density = self._storm.fog
68 |         self.weather.wetness = self._storm.wetness
69 |         self.weather.sun_azimuth_angle = self._sun.azimuth
70 |         self.weather.sun_altitude_angle = self._sun.altitude
71 | 
72 |     def __str__(self):
73 |         return '%s %s' % (self._sun, self._storm)


--------------------------------------------------------------------------------
/rl_studio/envs/carla/followlane/settings.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | 
 3 | from rl_studio.envs.carla.followlane.images import AutoCarlaImages
 4 | from rl_studio.envs.carla.followlane.utils import AutoCarlaUtils
 5 | from rl_studio.envs.carla.followlane.rewards import AutoCarlaRewards
 6 | from rl_studio.envs.carla.followlane.simplified_perception import (
 7 |     AutoCarlaSimplifiedPerception,
 8 | )
 9 | 
10 | 
11 | class FollowLaneCarlaConfig(BaseModel):
12 |     def __init__(self, **config):
13 |         self.simplifiedperception = AutoCarlaSimplifiedPerception()
14 |         self.autocarlarewards = AutoCarlaRewards()
15 |         self.autocarlautils = AutoCarlaUtils()
16 |         self.autocarlaimages = AutoCarlaImages()
17 | 
18 |         # self.image = ImageF1()
19 |         # self.image = ListenerCamera("/F1ROS/cameraL/image_raw")
20 |         self.image_raw_from_topic = None
21 |         self.image_camera = None
22 |         # self.sensor = config["sensor"]
23 | 
24 |         # Image
25 |         self.image_resizing = config["image_resizing"] / 100
26 |         self.new_image_size = config["new_image_size"]
27 |         self.raw_image = config["raw_image"]
28 |         self.height = int(config["height_image"] * self.image_resizing)
29 |         self.width = int(config["width_image"] * self.image_resizing)
30 |         self.center_image = int(config["center_image"] * self.image_resizing)
31 |         self.num_regions = config["num_regions"]
32 |         self.pixel_region = int(self.center_image / self.num_regions) * 2
33 |         # self.telemetry_mask = config["telemetry_mask"]
34 |         self.poi = config["x_row"][0]
35 |         self.image_center = None
36 |         self.right_lane_center_image = config["center_image"] + (
37 |             config["center_image"] // 2
38 |         )
39 |         self.lower_limit = config["lower_limit"]
40 | 
41 |         # States
42 |         self.state_space = config["states"]
43 |         if self.state_space == "spn":
44 |             self.x_row = [i for i in range(1, int(self.height / 2) - 1)]
45 |         else:
46 |             self.x_row = config["x_row"]
47 | 
48 |         # Actions
49 |         self.action_space = config["action_space"]
50 |         self.actions = config["actions"]
51 | 
52 |         # Rewards
53 |         self.reward_function = config["reward_function"]
54 |         self.rewards = config["rewards"]
55 |         self.min_reward = config["min_reward"]
56 | 
57 |         # Pose
58 |         self.alternate_pose = config["alternate_pose"]
59 |         self.waypoints_meters = config["waypoints_meters"]
60 |         self.waypoints_init = config["waypoints_init"]
61 |         self.waypoints_target = config["waypoints_target"]
62 |         self.waypoints_lane_id = config["waypoints_lane_id"]
63 |         self.waypoints_road_id = config["waypoints_road_id"]
64 | 
65 |         # self.actor_list = []
66 |         # self.collision_hist = []
67 | 


--------------------------------------------------------------------------------
/rl_studio/agents/mountain_car/utils.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import pandas as pd
 5 | 
 6 | 
 7 | # TODO Since these utils are algorithm specific, those should stay in the algorithm folder somehow tied to its algorithm class
 8 | 
 9 | 
10 | def update_line(axes, runs_rewards):
11 |     plot_rewards_per_run(axes, runs_rewards)
12 |     plt.draw()
13 |     plt.pause(0.01)
14 | 
15 | 
16 | def get_stats_figure(runs_rewards):
17 |     fig, axes = plt.subplots()
18 |     fig.set_size_inches(12, 4)
19 |     plot_rewards_per_run(axes, runs_rewards)
20 |     plt.ion()
21 |     plt.show()
22 |     return fig, axes
23 | 
24 | 
25 | def plot_rewards_per_run(axes, runs_rewards):
26 |     rewards_graph = pd.DataFrame(runs_rewards)
27 |     ax = rewards_graph.plot(ax=axes, title="steps per run")
28 |     ax.set_xlabel("runs")
29 |     ax.set_ylabel("steps")
30 |     ax.legend().set_visible(False)
31 | 
32 | 
33 | def save_model(qlearn, current_time, states, states_counter, states_rewards):
34 |     # Tabular RL: Tabular Q-learning basically stores the policy (Q-values) of  the agent into a matrix of shape
35 |     # (S x A), where s are all states, a are all the possible actions. After the environment is solved, just save this
36 |     # matrix as a csv file. I have a quick implementation of this on my GitHub under Reinforcement Learning.
37 | 
38 |     # Q TABLE
39 |     base_file_name = "_epsilon_{}".format(round(qlearn.epsilon, 3))
40 |     file_dump = open(
41 |         f"./logs/mountain_car/1_{current_time}{base_file_name}_QTABLE.pkl", "wb"
42 |     )
43 |     pickle.dump(qlearn.q, file_dump)
44 |     # STATES COUNTER
45 |     states_counter_file_name = base_file_name + "_STATES_COUNTER.pkl"
46 |     file_dump = open(
47 |         f"./logs/mountain_car/2_{current_time}{states_counter_file_name}", "wb"
48 |     )
49 |     pickle.dump(states_counter, file_dump)
50 |     # STATES CUMULATED REWARD
51 |     states_cum_reward_file_name = base_file_name + "_STATES_CUM_REWARD.pkl"
52 |     file_dump = open(
53 |         f"./logs/mountain_car/3_{current_time}{states_cum_reward_file_name}", "wb"
54 |     )
55 |     pickle.dump(states_rewards, file_dump)
56 |     # STATES
57 |     steps = base_file_name + "_STATES_STEPS.pkl"
58 |     file_dump = open(f"./logs/mountain_car/4_{current_time}{steps}", "wb")
59 |     pickle.dump(states, file_dump)
60 | 
61 | 
62 | def save_actions(actions, start_time):
63 |     file_dump = open("./logs/mountain_car/actions_set_" + start_time, "wb")
64 |     pickle.dump(actions, file_dump)
65 | 
66 | 
67 | def render(env, episode):
68 |     render_skip = 0
69 |     render_interval = 50
70 |     render_episodes = 10
71 | 
72 |     if (episode % render_interval == 0) and (episode != 0) and (episode > render_skip):
73 |         env.render()
74 |     elif (
75 |         ((episode - render_episodes) % render_interval == 0)
76 |         and (episode != 0)
77 |         and (episode > render_skip)
78 |         and (render_episodes < episode)
79 |     ):
80 |         env.render(close=True)
81 | 


--------------------------------------------------------------------------------
/rl_studio/agents/robot_mesh/utils.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import pandas as pd
 5 | 
 6 | 
 7 | # TODO Since these utils are algorithm specific, those should stay in the algorithm folder somehow tied to its algorithm class
 8 | 
 9 | 
10 | # TODO Since these utils are algorithm specific, those should stay in the algorithm folder somehow tied to its algorithm class
11 | 
12 | 
13 | def update_line(axes, runs_rewards):
14 |     plot_rewards_per_run(axes, runs_rewards)
15 |     plt.draw()
16 |     plt.pause(0.01)
17 | 
18 | 
19 | def get_stats_figure(runs_rewards):
20 |     fig, axes = plt.subplots()
21 |     fig.set_size_inches(12, 4)
22 |     plot_rewards_per_run(axes, runs_rewards)
23 |     plt.ion()
24 |     plt.show()
25 |     return fig, axes
26 | 
27 | 
28 | def plot_rewards_per_run(axes, runs_rewards):
29 |     rewards_graph = pd.DataFrame(runs_rewards)
30 |     ax = rewards_graph.plot(ax=axes, title="steps per run")
31 |     ax.set_xlabel("runs")
32 |     ax.set_ylabel("steps")
33 |     ax.legend().set_visible(False)
34 | 
35 | 
36 | def save_model(qlearn, current_time, states, states_counter, states_rewards):
37 |     # Tabular RL: Tabular Q-learning basically stores the policy (Q-values) of  the agent into a matrix of shape
38 |     # (S x A), where s are all states, a are all the possible actions. After the environment is solved, just save this
39 |     # matrix as a csv file. I have a quick implementation of this on my GitHub under Reinforcement Learning.
40 | 
41 |     # TODO The paths are not relative to the agents folder
42 |     # Q TABLE
43 |     base_file_name = "_epsilon_{}".format(round(qlearn.epsilon, 3))
44 |     file_dump = open(
45 |         "./logs/robot_mesh/1_" + current_time + base_file_name + "_QTABLE.pkl", "wb"
46 |     )
47 |     pickle.dump(qlearn.q, file_dump)
48 |     # STATES COUNTER
49 |     states_counter_file_name = base_file_name + "_STATES_COUNTER.pkl"
50 |     file_dump = open(
51 |         "./logs/robot_mesh/2_" + current_time + states_counter_file_name, "wb"
52 |     )
53 |     pickle.dump(states_counter, file_dump)
54 |     # STATES CUMULATED REWARD
55 |     states_cum_reward_file_name = base_file_name + "_STATES_CUM_REWARD.pkl"
56 |     file_dump = open(
57 |         "./logs/robot_mesh/3_" + current_time + states_cum_reward_file_name, "wb"
58 |     )
59 |     pickle.dump(states_rewards, file_dump)
60 |     # STATES
61 |     steps = base_file_name + "_STATES_STEPS.pkl"
62 |     file_dump = open("./logs/robot_mesh/4_" + current_time + steps, "wb")
63 |     pickle.dump(states, file_dump)
64 | 
65 | 
66 | def save_actions(actions, start_time):
67 |     file_dump = open("./logs/robot_mesh/actions_set_" + start_time, "wb")
68 |     pickle.dump(actions, file_dump)
69 | 
70 | 
71 | def render(env, episode):
72 |     render_skip = 0
73 |     render_interval = 50
74 |     render_episodes = 10
75 | 
76 |     if (episode % render_interval == 0) and (episode != 0) and (episode > render_skip):
77 |         env.render()
78 |     elif (
79 |         ((episode - render_episodes) % render_interval == 0)
80 |         and (episode != 0)
81 |         and (episode > render_skip)
82 |         and (render_episodes < episode)
83 |     ):
84 |         env.render(close=True)
85 | 


--------------------------------------------------------------------------------
/rl_studio/algorithms/__init__.py:
--------------------------------------------------------------------------------
 1 | from rl_studio.algorithms.algorithms_type import AlgorithmsType
 2 | from rl_studio.algorithms.exceptions import NoValidAlgorithmType
 3 | import pickle
 4 | 
 5 | 
 6 | class TrainerFactory:
 7 |     def __init__(self, **kwargs):
 8 |         self.algorithm = kwargs.get("algorithm")
 9 | 
10 | 
11 | class InferencerFactory:
12 |     def __new__(cls, config):
13 | 
14 |         algorithm = config.algorithm
15 |         inference_file_name = config.inference_file
16 | 
17 |         if algorithm == AlgorithmsType.QLEARN.value:
18 |             from rl_studio.algorithms.qlearn_multiple_states import QLearn
19 | 
20 |             actions_file_name = config.actions_file
21 |             actions_file = open(actions_file_name, "rb")
22 |             actions = pickle.load(actions_file)
23 | 
24 |             brain = QLearn(config, epsilon=0)
25 |             brain.load_model(inference_file_name, actions)
26 | 
27 |             return brain
28 | 
29 |         if algorithm == AlgorithmsType.DEPRECATED_QLEARN.value:
30 |             from rl_studio.algorithms.qlearn import QLearn
31 | 
32 |             actions_file_name = config.actions_file
33 |             actions_file = open(actions_file_name, "rb")
34 |             actions = pickle.load(actions_file)
35 | 
36 |             brain = QLearn(config, epsilon=0.05)
37 |             brain.load_model(inference_file_name, actions)
38 | 
39 |             return brain
40 | 
41 |         elif algorithm == AlgorithmsType.DQN.value:
42 |             from rl_studio.algorithms.dqn_torch import DQN_Agent
43 | 
44 |             input_dim = config.env.observation_space.shape[0]
45 |             output_dim = config.env.action_space.n
46 |             brain = DQN_Agent(layer_sizes=[input_dim, 64, output_dim])
47 |             brain.load_model(inference_file_name)
48 | 
49 |             return brain
50 | 
51 |         elif algorithm == AlgorithmsType.PPO.value:
52 |             from rl_studio.algorithms.ppo import Actor
53 |             from rl_studio.algorithms.ppo import Mish
54 | 
55 |             input_dim = config.env.observation_space.shape[0]
56 |             output_dim = config.env.action_space.n
57 |             brain = Actor(input_dim, output_dim, activation=Mish)
58 |             brain.load_model(inference_file_name)
59 | 
60 |             return brain
61 | 
62 |         elif algorithm == AlgorithmsType.PPO_CONTINIUOUS.value:
63 |             from rl_studio.algorithms.ppo_continuous import PPO
64 | 
65 |             input_dim = config.env.observation_space.shape[0]
66 |             output_dim = config.env.action_space.shape[0]
67 | 
68 |             brain = PPO(input_dim, output_dim, None, None, None, None, None,
69 |                         True, None)
70 |             brain.load(inference_file_name)
71 | 
72 |             return brain
73 | 
74 |         elif algorithm == AlgorithmsType.DDPG.value:
75 |             from rl_studio.algorithms.ddpg_torch import Actor
76 | 
77 |             brain = Actor()
78 |             brain.load_model(inference_file_name)
79 | 
80 |             return brain
81 |         # elif algorithm == AlgorithmsType.DQN.value:
82 |         #     from rl_studio.algorithms.dqn import DeepQ
83 |         #
84 |         #     return DeepQ(config)
85 | 
86 |         else:
87 |             raise NoValidAlgorithmType(algorithm)
88 | 


--------------------------------------------------------------------------------
/rl_studio/agents/cartpole/README.md:
--------------------------------------------------------------------------------
 1 | # CARTPOLE
 2 | 
 3 | TO know about the provided env by openAI refer to the [following url](https://www.gymlibrary.dev/environments/classic_control/cart_pole/)
 4 | 
 5 | ## ENVIRONMENT MODIFICATIONS
 6 | 
 7 | In RL-Studio, Cartpole environment has been tuned to permit some fine grained experiments regarding the:
 8 |     - solidity of different algorithms when the difficulty of the problem arises
 9 |     - viability of the algorithms regarding the inference cycle iteration time
10 |     - the performance improvement of implementing continuous actions with respect to discrete actions
11 | 
12 | The modifications are made in the environments cartpole_env_improved.py and cartpole_env_continuous_improved.py.
13 |  
14 | Use the following parameters in the section environments of the config.yaml in order to:
15 | ### configure the desired environment, 
16 |     
17 |     - To use the cartpole_env_continuous_improved.py
18 |         - env_name: myCartpole-continuous-v1
19 |         - environment_folder: cartpole
20 |     - To use the cartpole_env_improved.py
21 |         - env_name: myCartpole-v1
22 |         - environment_folder: cartpole        
23 | 
24 | ### configure the difficulty of the environment, use the following parameters in the config.yaml:
25 |     
26 |     - random_start_level: Indicates the standard deviation in radians of the initial pole angle
27 |     - random_perturbations_level: Number between 0 and 1 that indicates the frequency of the random perturbations.
28 |     A perturbation of 0.1 indicates that a perturbation will be added each 10 steps. 
29 |     - perturbations_intensity_std: Number between 0 and 1 that indicates the standard deviation of perturbations intensity.
30 |     A perturbations_instensity_std of 1 indicates that each perturbation instensity will have a standard deviation equal to 
31 |     the intensity of the action applied to the pole.
32 |     - initial_pole_angle: Indicates a fixed initial pole angle in radians
33 | 
34 | ### configure an automatic launching of several inference experiments with different problem configurations
35 |     
36 |     - settings: mode: inference
37 |     - experiments: Number of experiments for each of the different perturbations configurations to run. 
38 |     A value of 20 indicates that 20 experiments of initial_pole, 20 of perturbation intensity and 20 of perturbation
39 |     frequency will be run.
40 |     - random_perturbations_level_step: step between one experiment and the following regarding the random_perturbation_level.
41 |     A value of 0.1 means that the first experiment will be run with 0.1 random_perturbation_level, the second one with 0.2, the third one
42 |     with 0.3...
43 |     perturbations_intensity_std_step: step between one experiment and the following regarding the perturbations_intensity_std.
44 |     A value of 0.1 means that the first experiment will be run with 0.1 perturbations_intensity_std, the second one with 0.2, the third one
45 |     with 0.3...
46 |     initial_pole_angle_steps: step between one experiment and the following regarding the initial_pole_angle.
47 |     A value of 0.1 means that the first experiment will be run with 0.1 initial_pole_angle, the second one with 0.2, the third one
48 |     with 0.3...
49 | 
50 |     Note that all experiments results and logs will be stored in the logs/<problem>/<algorithm>/inference folder. 


--------------------------------------------------------------------------------
/rl_studio/scripts/benchmark_runner:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # Run all the tasks on a benchmark using a random agent.
  4 | #
  5 | # This script assumes you have set an OPENAI_GYM_API_KEY environment
  6 | # variable. You can find your API key in the web interface:
  7 | # https://gym.openai.com/settings/profile.
  8 | #
  9 | import argparse
 10 | import logging
 11 | import sys
 12 | 
 13 | import gym
 14 | # In modules, use `logger = logging.getLogger(__name__)`
 15 | from gym import wrappers
 16 | from gym.scoreboard.scoring import benchmark_score_from_local
 17 | 
 18 | logger = logging.getLogger()
 19 | 
 20 | 
 21 | def main():
 22 |     parser = argparse.ArgumentParser(description=None)
 23 |     parser.add_argument(
 24 |         "-b", "--benchmark-id", help="id of benchmark to run e.g. Atari7Ram-v0"
 25 |     )
 26 |     parser.add_argument(
 27 |         "-v",
 28 |         "--verbose",
 29 |         action="count",
 30 |         dest="verbosity",
 31 |         default=0,
 32 |         help="Set verbosity.",
 33 |     )
 34 |     parser.add_argument(
 35 |         "-f", "--force", action="store_true", dest="force", default=False
 36 |     )
 37 |     parser.add_argument(
 38 |         "-t",
 39 |         "--training-dir",
 40 |         default="/tmp/gym-results",
 41 |         help="What directory to upload.",
 42 |     )
 43 |     args = parser.parse_args()
 44 | 
 45 |     if args.verbosity == 0:
 46 |         logger.setLevel(logging.INFO)
 47 |     elif args.verbosity >= 1:
 48 |         logger.setLevel(logging.DEBUG)
 49 | 
 50 |     benchmark_id = args.benchmark_id
 51 |     if benchmark_id is None:
 52 |         logger.info("Must supply a valid benchmark")
 53 |         return 1
 54 | 
 55 |     try:
 56 |         benchmark = gym.benchmark_spec(benchmark_id)
 57 |     except Exception:
 58 |         logger.info("Invalid benchmark")
 59 |         return 1
 60 | 
 61 |     # run benchmark tasks
 62 |     for task in benchmark.tasks:
 63 |         logger.info("Running on env: {}".format(task.env_id))
 64 |         for trial in range(task.trials):
 65 |             env = gym.make(task.env_id)
 66 |             training_dir_name = "{}/{}-{}".format(args.training_dir, task.env_id, trial)
 67 |             env = wrappers.Monitor(
 68 |                 env, training_dir_name, video_callable=False, force=args.force
 69 |             )
 70 |             env.reset()
 71 |             for _ in range(task.max_timesteps):
 72 |                 o, r, done, _ = env.step(env.action_space.sample())
 73 |                 if done:
 74 |                     env.reset()
 75 |             env.close()
 76 | 
 77 |     logger.info(
 78 |         """Computing statistics for this benchmark run...
 79 | {{
 80 |     score: {score},
 81 |     num_envs_solved: {num_envs_solved},
 82 |     summed_training_seconds: {summed_training_seconds},
 83 |     start_to_finish_seconds: {start_to_finish_seconds},
 84 | }}
 85 | 
 86 |     """.rstrip().format(
 87 |             **benchmark_score_from_local(benchmark_id, args.training_dir)
 88 |         )
 89 |     )
 90 | 
 91 |     logger.info(
 92 |         """Done running, upload results using the following command:
 93 | 
 94 | python -c "import gym; gym.upload('{}', benchmark_id='{}', algorithm_id='(unknown)')"
 95 | 
 96 |     """.rstrip().format(
 97 |             args.training_dir, benchmark_id
 98 |         )
 99 |     )
100 | 
101 |     return 0
102 | 
103 | 
104 | if __name__ == "__main__":
105 |     sys.exit(main())
106 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_training_followline_qlearn_f1_gazebo.yaml:
--------------------------------------------------------------------------------
  1 | settings:
  2 |   mode: training # training, retraining
  3 |   task: follow_line_gazebo # follow_line_gazebo
  4 |   algorithm: qlearn # qlearn
  5 |   simulator: gazebo # openai, carla, gazebo, sumo
  6 |   environment_set: gazebo_environments # gazebo_environments, carla_environments
  7 |   env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking
  8 |   agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot
  9 |   actions: simple # simple, medium, hard, test, autoparking_simple
 10 |   states: sp1 # sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points)
 11 |   rewards: followline_center #
 12 |   framework: _
 13 |   total_episodes: 10
 14 |   training_time: 10
 15 |   models_dir: "./checkpoints"
 16 |   logs_dir: "./logs"
 17 |   metrics_dir: "./metrics"
 18 | 
 19 | ros:
 20 |   ros_master_uri: "11311"
 21 |   gazebo_master_uri: "11345"
 22 | 
 23 | retraining:
 24 |   qlearn:
 25 |     retrain_qlearn_model_name: "20230105-174932_Circuit-simple_States-sp1_Actions-simple_Rewards-followline_center_epsilon-0.05_epoch-10_step-15001_reward-134200-qtable.npy"
 26 | 
 27 | inference:
 28 |   qlearn:
 29 |     inference_qlearn_model_name:
 30 | 
 31 | algorithm:
 32 |   qlearn:
 33 |     alpha: 0.2
 34 |     epsilon: 0.95
 35 |     epsilon_min: 0.05
 36 |     gamma: 0.9
 37 | 
 38 | agents:
 39 |   f1:
 40 |     camera_params:
 41 |       width: 640
 42 |       height: 480
 43 |       center_image: 320
 44 |       raw_image: False
 45 |       image_resizing: 100
 46 |       new_image_size: 32
 47 |       num_regions: 16
 48 |       lower_limit: 220
 49 | 
 50 | states:
 51 |   sp1:
 52 |     0: [10]
 53 |   sp3:
 54 |     0: [5, 15, 22]
 55 |   sp5:
 56 |     0: [3, 5, 10, 15, 20]
 57 |   spn:
 58 |     0: [10]
 59 | 
 60 | actions:
 61 |   simple:
 62 |     0: [3, 0]
 63 |     1: [2, 1]
 64 |     2: [2, -1]
 65 |   medium:
 66 |     0: [3, 0]
 67 |     1: [2, 1]
 68 |     2: [2, -1]
 69 |     3: [1, 1.5]
 70 |     4: [1, -1.5]
 71 |   hard:
 72 |     0: [3, 0]
 73 |     1: [2, 1]
 74 |     2: [2, -1]
 75 |     3: [1.5, 1]
 76 |     4: [1.5, -1]
 77 |     5: [1, -1.5]
 78 |     6: [1, -1.5]
 79 |   test:
 80 |     0: [0, 0]
 81 | 
 82 | rewards:
 83 |   followline_center:
 84 |     from_10: 10
 85 |     from_02: 2
 86 |     from_01: 1
 87 |     penal: -100
 88 |     min_reward: 5_000
 89 |     highest_reward: 100
 90 | 
 91 | gazebo_environments:
 92 |   simple:
 93 |     env_name: F1Env-v0
 94 |     circuit_name: simple
 95 |     launchfile: simple_circuit.launch
 96 |     environment_folder: f1
 97 |     robot_name: f1_renault
 98 |     model_state_name: f1_renault #
 99 |     start_pose: 0 # 0, 1, 2, 3, 4
100 |     alternate_pose: False
101 |     estimated_steps: 15000
102 |     sensor: camera
103 |     save_episodes: 1
104 |     save_every_step: 1000
105 |     lap_completed: False
106 |     save_model: True
107 |     save_positions: True
108 |     debug_level: DEBUG
109 |     telemetry: False
110 |     telemetry_mask: False
111 |     plotter_graphic: False
112 |     circuit_positions_set:
113 |       0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57]
114 |       1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57] #finish line
115 |       2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56]
116 |       3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613]
117 |       4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383]
118 | 


--------------------------------------------------------------------------------
/rl_studio/agents/f1/utils.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | import pickle
 4 | 
 5 | import cv2
 6 | import numpy as np
 7 | import pandas as pd
 8 | 
 9 | from rl_studio.agents.f1 import settings
10 | 
11 | 
12 | # TODO Since these utils are algorithm specific, those should stay in the algorithm folder somehow tied to its algorithm class
13 | 
14 | 
15 | def load_model(params, qlearn, file_name):
16 | 
17 |     qlearn_file = open("./logs/qlearn_models/" + file_name)
18 |     model = pickle.load(qlearn_file)
19 | 
20 |     qlearn.q = model
21 |     qlearn.ALPHA = params.algorithm["params"]["alpha"]
22 |     qlearn.GAMMA = params.algorithm["params"]["epsilon"]
23 |     qlearn.epsilon = params.algorithm["params"]["gamma"]
24 | 
25 |     # highest_reward = settings.algorithm_params["highest_reward"]
26 | 
27 |     print(f"\n\nMODEL LOADED. Number of (action, state): {len(model)}")
28 |     print(f"    - Loading:    {file_name}")
29 |     print(f"    - Model size: {len(qlearn.q)}")
30 |     print(f"    - Action set: {settings.actions_set}")
31 |     print(f"    - Epsilon:    {qlearn.epsilon}")
32 |     print(f"    - Start:      {datetime.datetime.now()}")
33 | 
34 | 
35 | def save_model(qlearn, current_time, states, states_counter, states_rewards):
36 |     # Tabular RL: Tabular Q-learning basically stores the policy (Q-values) of  the agent into a matrix of shape
37 |     # (S x A), where s are all states, a are all the possible actions. After the environment is solved, just save this
38 |     # matrix as a csv file. I have a quick implementation of this on my GitHub under Reinforcement Learning.
39 | 
40 |     # Q TABLE
41 |     base_file_name = "_act_set_{}_epsilon_{}".format(
42 |         settings.qlearn.actions_set, round(qlearn.epsilon, 2)
43 |     )
44 |     file_dump = open(
45 |         f"./logs/qlearn_models/1_{current_time}{base_file_name}_QTABLE.pkl", "wb"
46 |     )
47 |     pickle.dump(qlearn.q, file_dump)
48 |     # STATES COUNTER
49 |     states_counter_file_name = base_file_name + "_STATES_COUNTER.pkl"
50 |     file_dump = open(
51 |         f"./logs/qlearn_models/2_{current_time}{states_counter_file_name}", "wb"
52 |     )
53 |     pickle.dump(states_counter, file_dump)
54 |     # STATES CUMULATED REWARD
55 |     states_cum_reward_file_name = base_file_name + "_STATES_CUM_REWARD.pkl"
56 |     file_dump = open(
57 |         f"./logs/qlearn_models/3_{current_time}{states_cum_reward_file_name}", "wb"
58 |     )
59 |     pickle.dump(states_rewards, file_dump)
60 |     # STATES
61 |     steps = base_file_name + "_STATES_STEPS.pkl"
62 |     file_dump = open(f"./logs/qlearn_models/4_{current_time}{steps}", "wb")
63 |     pickle.dump(states, file_dump)
64 | 
65 | 
66 | def save_times(checkpoints):
67 |     file_name = "actions_"
68 |     file_dump = open(
69 |         f"/logs/{file_name}{settings.qlearn.actions_set}_checkpoints.pkl", "wb"
70 |     )
71 |     pickle.dump(checkpoints, file_dump)
72 | 
73 | 
74 | def save_actions(actions, current_time):
75 |     file_dump = open("./logs/qlearn_models/actions_set_" + current_time, "wb")
76 |     pickle.dump(actions, file_dump)
77 | 
78 | 
79 | def render(env, episode):
80 |     render_skip = 0
81 |     render_interval = 50
82 |     render_episodes = 10
83 | 
84 |     if (episode % render_interval == 0) and (episode != 0) and (episode > render_skip):
85 |         env.render()
86 |     elif (
87 |         ((episode - render_episodes) % render_interval == 0)
88 |         and (episode != 0)
89 |         and (episode > render_skip)
90 |         and (render_episodes < episode)
91 |     ):
92 |         env.render(close=True)
93 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_inference_followline_qlearn_f1_gazebo.yaml:
--------------------------------------------------------------------------------
  1 | settings:
  2 |   mode: inference # training, retraining, inference
  3 |   task: follow_line_gazebo # follow_line_gazebo, follow_lane_gazebo, autoparking_gazebo
  4 |   algorithm: qlearn # qlearn, dqn, ddpg, ppo
  5 |   simulator: gazebo # openai, carla, gazebo, sumo
  6 |   environment_set: gazebo_environments # gazebo_environments, carla_environments
  7 |   env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking
  8 |   agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot
  9 |   actions: simple # simple, medium, hard, test
 10 |   states: sp1 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points)
 11 |   rewards: followline_center # rewards_followline_center
 12 |   framework: _
 13 |   total_episodes: 5
 14 |   training_time: 6
 15 |   models_dir: "./checkpoints"
 16 |   logs_dir: "./logs"
 17 |   metrics_dir: "./metrics"
 18 | 
 19 | ros:
 20 |   ros_master_uri: "11311"
 21 |   gazebo_master_uri: "11345"
 22 | 
 23 | retraining:
 24 |   qlearn:
 25 |     retrain_qlearn_model_name:
 26 | 
 27 | inference:
 28 |   qlearn:
 29 |     inference_qlearn_model_name: "20230105-174932_Circuit-simple_States-sp1_Actions-simple_Rewards-followline_center_epsilon-0.05_epoch-10_step-15001_reward-134200-qtable.npy"
 30 | 
 31 | algorithm:
 32 |   qlearn:
 33 |     alpha: 0.2
 34 |     epsilon: 0.95
 35 |     epsilon_min: 0.05
 36 |     gamma: 0.9
 37 | 
 38 | agents:
 39 |   f1:
 40 |     camera_params:
 41 |       width: 640
 42 |       height: 480
 43 |       center_image: 320
 44 |       raw_image: False
 45 |       image_resizing: 100
 46 |       new_image_size: 32
 47 |       num_regions: 16
 48 |       lower_limit: 220
 49 | states:
 50 |   sp1:
 51 |     0: [10]
 52 |   sp3:
 53 |     0: [5, 15, 22]
 54 |   sp5:
 55 |     0: [3, 5, 10, 15, 20]
 56 |   spn:
 57 |     0: [10]
 58 | 
 59 | actions:
 60 |   simple:
 61 |     0: [3, 0]
 62 |     1: [2, 1]
 63 |     2: [2, -1]
 64 |   medium:
 65 |     0: [3, 0]
 66 |     1: [2, 1]
 67 |     2: [2, -1]
 68 |     3: [1, 1.5]
 69 |     4: [1, -1.5]
 70 |   hard:
 71 |     0: [3, 0]
 72 |     1: [2, 1]
 73 |     2: [2, -1]
 74 |     3: [1.5, 1]
 75 |     4: [1.5, -1]
 76 |     5: [1, -1.5]
 77 |     6: [1, -1.5]
 78 |   test:
 79 |     0: [0, 0]
 80 | 
 81 | rewards:
 82 |   followline_center:
 83 |     from_10: 10
 84 |     from_02: 2
 85 |     from_01: 1
 86 |     penal: -100
 87 |     min_reward: 5_000
 88 |     highest_reward: 100
 89 | 
 90 | gazebo_environments:
 91 |   simple:
 92 |     env_name: F1Env-v0
 93 |     circuit_name: simple
 94 |     launchfile: simple_circuit.launch
 95 |     environment_folder: f1
 96 |     robot_name: f1_renault
 97 |     model_state_name: f1_renault # autoparking
 98 |     start_pose: 0 # 0, 1, 2, 3, 4
 99 |     alternate_pose: False
100 |     estimated_steps: 100
101 |     sensor: camera
102 |     save_episodes: 5
103 |     save_every_step: 10
104 |     lap_completed: False
105 |     save_model: True
106 |     save_positions: True
107 |     debug_level: DEBUG
108 |     telemetry: False
109 |     telemetry_mask: False
110 |     plotter_graphic: False
111 |     circuit_positions_set:
112 |       0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57]
113 |       1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57]
114 |       2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56]
115 |       3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613]
116 |       4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383]
117 | 


--------------------------------------------------------------------------------
/rl_studio/installation/gazebo_ros_noetic.repos:
--------------------------------------------------------------------------------
  1 | repositories:
  2 | #   ar_track_alvar:
  3 | #     type: git
  4 | #     url: https://github.com/ros-perception/ar_track_alvar
  5 | #     version: melodic-devel
  6 |   catkin_simple:
  7 |     type: git
  8 |     url: https://github.com/catkin/catkin_simple.git
  9 |     version: master
 10 |   control_toolbox:
 11 |     type: git
 12 |     url: https://github.com/ros-controls/control_toolbox.git
 13 |     version: melodic-devel
 14 |   ecl_core:
 15 |     type: git
 16 |     url: https://github.com/stonier/ecl_core
 17 |     version: release/0.62-noetic
 18 |   ecl_lite:
 19 |     type: git
 20 |     url: https://github.com/stonier/ecl_lite
 21 |     version: release/0.61-noetic
 22 |   ecl_navigation:
 23 |     type: git
 24 |     url: https://github.com/stonier/ecl_navigation
 25 |     version: release/0.60-melodic
 26 |   ecl_tools:
 27 |     type: git
 28 |     url: https://github.com/stonier/ecl_tools
 29 |     version: release/0.61-noetic
 30 |   driver_base:
 31 |     type: git
 32 |     url: https://github.com/ros-drivers/driver_common.git
 33 |     version: indigo-devel
 34 |   gazebo_ros_pkgs:
 35 |     type: git
 36 |     url: https://github.com/ros-simulation/gazebo_ros_pkgs
 37 |     version: noetic-devel
 38 |   hector_gazebo:
 39 |     type: git
 40 |     url: https://github.com/tu-darmstadt-ros-pkg/hector_gazebo/
 41 |     version: melodic-devel
 42 |   image_common:
 43 |     type: git
 44 |     url: https://github.com/ros-perception/image_common.git
 45 |     version: noetic-devel
 46 |   joystick_drivers:
 47 |     type: git
 48 |     url: https://github.com/ros-drivers/joystick_drivers.git
 49 |     version: master
 50 | #   kobuki:
 51 | #     type: git
 52 | #     url: https://github.com/yujinrobot/kobuki
 53 | #     version: melodic
 54 | #   kobuki_core:
 55 | #     type: git
 56 | #     url: https://github.com/yujinrobot/kobuki_core
 57 | #     version: melodic
 58 | #   kobuki_desktop:
 59 | #     type: git
 60 | #     url: https://github.com/yujinrobot/kobuki_desktop
 61 | #     version: melodic
 62 | #   kobuki_msgs:
 63 | #     type: git
 64 | #     url: https://github.com/yujinrobot/kobuki_msgs
 65 | #     version: release/0.7-melodic
 66 |   navigation:
 67 |     type: git
 68 |     url: https://github.com/ros-planning/navigation
 69 |     version: noetic-devel
 70 |   pcl_ros:
 71 |     type: git
 72 |     url: https://github.com/ros-perception/perception_pcl.git
 73 |     version: melodic-devel
 74 |   realtime_tools:
 75 |     type: git
 76 |     url: https://github.com/ros-controls/realtime_tools
 77 |     version: noetic-devel
 78 |   ros_control:
 79 |     type: git
 80 |     url: https://github.com/ros-controls/ros_control
 81 |     version: noetic-devel
 82 |   roslint:
 83 |     type: git
 84 |     url: https://github.com/ros/roslint
 85 |     version: master
 86 |   turtlebot:
 87 |     type: git
 88 |     url: https://github.com/turtlebot/turtlebot
 89 |     version: melodic
 90 |   turtlebot_create:
 91 |     type: git
 92 |     url: https://github.com/turtlebot/turtlebot_create
 93 |     version: indigo
 94 |   turtlebot_simulator:
 95 |     type: git
 96 |     url: https://github.com/turtlebot/turtlebot_simulator
 97 |     version: melodic
 98 |   xacro:
 99 |     type: git
100 |     url: https://github.com/ros/xacro
101 |     version: noetic-devel
102 |   yocs_msgs:
103 |     type: git
104 |     url: https://github.com/yujinrobot/yocs_msgs
105 |     version: release/0.7-melodic
106 | #   yujin_ocs:
107 | #     type: git
108 | #     url: https://github.com/yujinrobot/yujin_ocs
109 | #     version: release/0.8-melodic
110 | 


--------------------------------------------------------------------------------
/CODING.md:
--------------------------------------------------------------------------------
 1 | # CODING STYLE
 2 | 
 3 | If you are contributing to RL-Studio tool development, please follow the below coding style and structure of directories, files recommendations:
 4 | 
 5 | ## SOME RULES
 6 | 
 7 | - [PEP 8](https://peps.python.org/pep-0008/) style guide for Python Code
 8 | - [Black](https://github.com/psf/black) format
 9 | - [Pylint](https://pypi.org/project/pylint/) as static code analyser
10 | - Constants variable must be upper-case (e.g `TIME_THRESHOLD`, `NUMBER_OF_RUNS`)
11 | - Comment all non trivial functions to ease readability
12 | - All the internal imported packages must be imported from the root of the project (e.g `import rl_studio.agents` instead `import agents`)
13 | - Organize imports before pushing your code to the repo
14 | 
15 | - When creating a project, please keep in mind:
16 | 
17 |   - in **/agents** directory, files names should be `mode_task_algorithm_simulator_framework.py`, i.e. `trainer_followline_ddpg_F1_gazebo_tf.py` or `inferencer_mountaincar_qlearn_openai_pytorch.py`. In case of not using framework leave it blank.
18 |   - in **/envs/gazebo/f1/models** directory, files names should be `task_algorithm_framework.py`, i.e. `followline_ddpg_gazebo_tf.py` or `followlane_qlearn_pytorch.py`. In case of not using framework leave it blank.
19 |   - As a general rule, **classes names** have to follow convention `ModeTaskAlgorithmAgentSimuladorFramework`, i.e. `TrainerFollowLaneDDPGF1GazeboPytorch` or `InferencerFollowLaneDQNF1GazeboTF`
20 |   - in **/envs/gazebo** directory, classes names follow rule `TaskAlgorithmAgentSimulatorFramework`, i.e. `FollowlineDDPGF1GazeboTF`.
21 | 
22 | # Directory architecture
23 | 
24 | ## Config files
25 | 
26 | - in **/config** directory add a configuration file with the following format `config_mode_task_algorithm_agent_simulator.yaml`, i.e. `config_training_followlane_qlearn_F1_carla.yaml`
27 | 
28 | ## Models
29 | 
30 | - Add a trained brain in **/checkpoints** folder. You can configure it in the config.yaml file. Automatically the app will add a directory with the format `task_algorithm_agent_simulator_framework` where to save models.
31 | - The file model should have the format `timestamp_maxreward_epoch_ADITIONALTEXT.h5` in format h5 i.e. `09122002_max45678_epoch435_actor_conv2d32x64_critic_conv2d32x64_actionsCont_stateImg_rewardDiscrete.h5` to indicate the main features of the model saved in order to easily find the exact model.
32 | 
33 | ## Metrics
34 | 
35 | - In **/metrics** folder should be saved statistics and metrics of models. You can configure it in the config.yaml file. Automatically the app will add a directory with the format `mode/task_algorithm_agent_simulator_framework/data` where to save data.
36 | 
37 | ## Graphics
38 | 
39 | - In **/metrics** folder should be saved graphics of models. You can configure it in the config.yaml file. Automatically the app will add a directory with the format `mode/task_algorithm_agent_simulator_framework/graphics` where to save graphics.
40 | 
41 | ## Logs and TensorBoard files
42 | 
43 | - In **/logs** folder should be saved TensorBoard and logs files. You can configure it in the config.yaml file.
44 |   For TensorBoard, automatically the app will add a directory with the format `mode/task_algorithm_agent_simulator_framework/TensorBoard`.
45 | 
46 |   For logs, the app automatically will add a directory with the format `mode/task_algorithm_agent_simulator_framework/logs`.
47 | 
48 | # TIPS
49 | 
50 | - You can refer to "mountain_car qlearning" project as an example of how to implement a real time monitoring
51 | - You can refer to "cartpole dqn" project as an example of how to implement logging
52 | 


--------------------------------------------------------------------------------
/rl_studio/rl-studio-tuning-qlearn-followlane.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import optuna
 3 | from rl_studio.agents import TrainerFactory, InferencerFactory
 4 | 
 5 | 
 6 | def objective(trial):
 7 | 
 8 |     config_file = {'settings': {'mode': 'training', 'task': 'follow_lane_gazebo', 
 9 |     'algorithm': 'qlearn', 'simulator': 'gazebo', 'environment_set': 'gazebo_environments', 
10 |     'env': 'simple', 'agent': 'f1', 'actions': 'simple', 'states': 'sp1', 
11 |     'rewards': 'follow_right_lane_only_center', 'framework': '_', 
12 |     'total_episodes': 5, 'training_time': 6, 'models_dir': './checkpoints', 'logs_dir': './logs', 
13 |     'metrics_dir': './metrics'}, 'ros': {'ros_master_uri': '11311', 'gazebo_master_uri': '11345'}, 
14 |     'retraining': {'qlearn': {'retrain_qlearn_model_name':None}},
15 |     'inference': {'qlearn': {'inference_qlearn_model_name': None}}, 
16 |     'algorithm': {'qlearn': {'alpha': 0.2, 'epsilon': 0.95, 'epsilon_min': 0.05, 'gamma': 0.9}}, 
17 |     'agents': {'f1': {'camera_params': {'width': 640, 'height': 480, 'center_image': 320, 'raw_image': False, 'image_resizing': 100, 
18 |     'new_image_size': 32, 'num_regions': 16, 'lower_limit': 220}}}, 'states': {'image': {0: [3]}, 'sp1': {0: [10]}, 'sp3': {0: [5, 15, 22]}, 
19 |     'sp5': {0: [3, 5, 10, 15, 20]}, 'spn': {0: [10]}}, 'actions': {'simple': {0: [3, 0], 1: [2, 1], 2: [2, -1]}, 
20 |     'medium': {0: [3, 0], 1: [2, 1], 2: [2, -1], 3: [1, 1.5], 4: [1, -1.5]}, 
21 |     'hard': {0: [3, 0], 1: [2, 1], 2: [2, -1], 3: [1.5, 1], 4: [1.5, -1], 5: [1, -1.5], 6: [1, -1.5]}, 
22 |     'test': {0: [0, 0]}}, 
23 |     'rewards': {'follow_right_lane_only_center': {'from_10': 10, 'from_02': 2, 'from_01': 1, 'penal': -100, 'min_reward': 5000, 'highest_reward': 100},
24 |     'follow_right_lane_center_v_step': {'from_10': 10, 'from_02': 2, 'from_01': 1, 'penal': -100, 'min_reward': 5000, 'highest_reward': 100}, 
25 |     'follow_right_lane_center_v_w_linear': {'beta_0': 3, 'beta_1': -0.1, 'penal': 0, 'min_reward': 1000, 'highest_reward': 100}}, 
26 |     'gazebo_environments': {'simple': {'env_name': 'F1Env-v0', 'circuit_name': 'simple', 'launchfile': 'simple_circuit.launch', 
27 |     'environment_folder': 'f1', 'robot_name': 'f1_renault', 'model_state_name': 'f1_renault', 'start_pose': 0, 'alternate_pose': False, 
28 |     'estimated_steps': 100, 'sensor': 'camera', 'save_episodes': 5, 'save_every_step': 10, 'lap_completed': False, 'save_model': True, 
29 |     'save_positions': True, 'debug_level': 'DEBUG', 'telemetry': False, 'telemetry_mask': False, 'plotter_graphic': False, 
30 |     'circuit_positions_set': {0: [52.8, -12.734, 0.004, 0, 0, 1.57, -1.57], 1: [52.97, -42.06, 0.004, 0, 0, 1.57, -1.57], 
31 |     2: [40.2, -30.741, 0.004, 0, 0, 1.56, 1.56], 3: [0, 31.15, 0.004, 0, 0.01, 0, 0.31], 4: [19.25, 43.5, 0.004, 0, 0.0, 1.57, -1.69], 
32 |     5: [52.8, -35.486, 0.004, 0, 0, 1.57, -1.57]}}}}
33 | 
34 |     config_file['algorithm']['qlearn']['alpha'] = trial.suggest_float('alpha', 0.1, 1, log=True)
35 |     config_file['algorithm']['qlearn']['gamma'] = trial.suggest_float('gamma', 0.1, 1, log=True)
36 | 
37 |     print(f"config_file: {config_file}")
38 |     
39 |     if config_file["settings"]["mode"] == "inference":
40 |         inferencer = InferencerFactory(config_file)
41 |         inferencer.main()
42 |     else:
43 |         trainer = TrainerFactory(config_file)
44 |         trainer.main()
45 | 
46 | 
47 | if __name__ == "__main__":
48 | 
49 |     study = optuna.create_study(direction='maximize')
50 |     study.optimize(objective)
51 |     trial = study.best_trial
52 | 
53 |     print(f"Accuracy = {trial.value}")
54 |     print(f"best hypers = {trial.params}")


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Reinforcement Learning Studio
 2 | 
 3 | Thanks for your interest on contributing!
 4 | 
 5 | This file contains a set of rules to contributing to the project and the 
 6 | rest of the projects developed by JdeRobot.
 7 | If you have any doubt about how to contribute contact one of the maintainers
 8 | of the project. They will be pleased to tell you how you can contribute with your
 9 | knowledge to the project and organization!
10 | 
11 | * [Code of conduct](#code-of-conduct)
12 | * [Prerequisites before contributing](#prerequisites-before-contributing)
13 | * [How can I contribute?](#how-can-i-contribute)
14 | 
15 | 
16 | <a name="#code-of-conduct"></a>
17 | ## Code of conduct
18 | Please report any unacceptable behavior to any of [the maintainers](#i-have-a-question).
19 | 
20 | <a name="#prerequisites"></a>
21 | ## Prerequisites before contributing
22 | In order to contribute to JdeRobot projects, please read carefully the project README.md/[webpage](https://github.com/JdeRobot/RL-Studio) before 
23 | starting contributing to understand the purpose of the project and where you can contribute.
24 | 
25 | <a name="#how-to-contribute"></a>
26 | ## How can I contribute?
27 | Any JdeRobot project follows the same workflow when contributing.
28 | 
29 | * **Find a problem or possible improvement for the project:** First of all, check that the feature/bug is not listed in the current [open issues](https://github.com/JdeRobot/RL-Studio/issues).
30 | 
31 | * **Create an issue:** [Create an issue](https://github.com/JdeRobot/RL-Studio/issues/new) in the project with the problem/improvement you will
32 | address. In this issue, explain carefully what you will be updating and how this changes will impact the project.
33 |  Provide any complementary information to explain it (code samples, screenshots ...). You should information about:
34 |   * Expected behavior
35 |   * Actual behavior
36 |   * Steps to reproduce
37 |   * Environment
38 |   * Possible cause
39 |   * Possible solution
40 | 
41 | The two following points are different depending on the permissions you have to the repo.
42 | * **[If you have write permission] Work in a separate branch always:** Create a new branch with a describable name (you can use the issue number as branch name "issue_xxx"). Create your commits in that branch making the needed changes. Please, use describable names as commit messages, so everyone can understand easily the changes you made.
43 | 
44 | * **[If you only have read permission] Fork the project:** Fork the project. Work on that copy of the repo, making the desirable changes. Please, use describable names as commit messages, so everyone can understand easily the changes you made.
45 | 
46 | * **Open a pull request:** A pull request is compulsory any time a new change wants to be added to the core or the project. After solving the issue, create a pull request with your branch. In this pull request include all the commits made, write a good description of the changes made and refer to the issue solved to make things easier to the maintainers. Include any additional resource that would be interesting (references, screenshots...). Link the PR with the issue.
47 | 
48 | * **Testing and merging pull requests**
49 | One of maintainers will review your code. Reviewer could ask you to modify your pull request.
50 |  Please provide timely response for reviewers (within weeks, not months), otherwise you submission could be postponed or even rejected.
51 |  
52 | * **[If you have write permission] Don't accept your own pull requests:** Wait for a project maintainer to accept the changes you made. They will probably comment the pull request with some feedback and will consider if it can be merge to the master branch. Be proactive and kind!
53 | 
54 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_inference_followline_dqn_f1_gazebo.yaml:
--------------------------------------------------------------------------------
  1 | settings:
  2 |   mode: inference
  3 |   task: follow_line_gazebo # follow_line_gazebo, follow_lane_gazebo, autoparking_gazebo
  4 |   algorithm: dqn # qlearn, dqn, ddpg, ppo
  5 |   simulator: gazebo # openai, carla, gazebo, sumo
  6 |   environment_set: gazebo_environments # gazebo_environments, carla_environments
  7 |   env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking
  8 |   agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot
  9 |   actions: simple # simple, medium, hard, test
 10 |   states: sp1 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points)
 11 |   rewards: followline_center # rewards_followline_center
 12 |   framework: TensorFlow # TensorFlow, Pytorch
 13 |   total_episodes: 5
 14 |   training_time: 6
 15 |   models_dir: "./checkpoints"
 16 |   logs_dir: "./logs"
 17 |   metrics_dir: "./metrics"
 18 | 
 19 | ros:
 20 |   ros_master_uri: "11311"
 21 |   gazebo_master_uri: "11345"
 22 | 
 23 | retraining:
 24 |   dqn:
 25 |     retrain_dqn_tf_model_name:
 26 | 
 27 | inference:
 28 |   dqn:
 29 |     inference_dqn_tf_model_name: "DQN_sp_16x16_LAPCOMPLETED_Max990_Epoch3_inTime20230110-180135.model"
 30 | 
 31 | algorithm:
 32 |   dqn:
 33 |     alpha: 0.8
 34 |     gamma: 0.9
 35 |     epsilon: 0.99
 36 |     epsilon_discount: 0.9986
 37 |     epsilon_min: 0.05
 38 |     model_name: DQN_sp_16x16
 39 |     replay_memory_size: 50_000
 40 |     min_replay_memory_size: 1000
 41 |     minibatch_size: 64
 42 |     update_target_every: 5
 43 |     memory_fraction: 0.20
 44 |     buffer_capacity: 100_000
 45 |     batch_size: 64
 46 | 
 47 | agents:
 48 |   f1:
 49 |     camera_params:
 50 |       width: 640
 51 |       height: 480
 52 |       center_image: 320
 53 |       raw_image: False
 54 |       image_resizing: 100
 55 |       new_image_size: 32
 56 |       num_regions: 16
 57 |       lower_limit: 220
 58 | 
 59 | states:
 60 |   image:
 61 |     0: [3]
 62 |   sp1:
 63 |     0: [10]
 64 |   sp3:
 65 |     0: [5, 15, 22]
 66 |   sp5:
 67 |     0: [3, 5, 10, 15, 20]
 68 |   spn:
 69 |     0: [10]
 70 | 
 71 | actions:
 72 |   simple:
 73 |     0: [3, 0]
 74 |     1: [2, 1]
 75 |     2: [2, -1]
 76 |   medium:
 77 |     0: [3, 0]
 78 |     1: [2, 1]
 79 |     2: [2, -1]
 80 |     3: [1, 1.5]
 81 |     4: [1, -1.5]
 82 |   hard:
 83 |     0: [3, 0]
 84 |     1: [2, 1]
 85 |     2: [2, -1]
 86 |     3: [1.5, 1]
 87 |     4: [1.5, -1]
 88 |     5: [1, -1.5]
 89 |     6: [1, -1.5]
 90 |   test:
 91 |     0: [0, 0]
 92 | 
 93 | rewards:
 94 |   followline_center:
 95 |     from_10: 10
 96 |     from_02: 2
 97 |     from_01: 1
 98 |     penal: -100
 99 |     min_reward: 5_000
100 |     highest_reward: 100
101 | 
102 | gazebo_environments:
103 |   simple:
104 |     env_name: F1Env-v0
105 |     circuit_name: simple
106 |     launchfile: simple_circuit.launch
107 |     environment_folder: f1
108 |     robot_name: f1_renault
109 |     model_state_name: f1_renault # f1_renault_multicamera_multilaser
110 |     start_pose: 0 # 0, 1, 2, 3, 4
111 |     alternate_pose: False
112 |     estimated_steps: 50
113 |     sensor: camera
114 |     save_episodes: 5
115 |     save_every_step: 10
116 |     lap_completed: False
117 |     save_model: True
118 |     save_positions: True
119 |     debug_level: DEBUG
120 |     telemetry: False
121 |     telemetry_mask: False
122 |     plotter_graphic: False
123 |     circuit_positions_set:
124 |       0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57]
125 |       1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57]
126 |       2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56]
127 |       3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613]
128 |       4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383]
129 | 


--------------------------------------------------------------------------------
/rl_studio/envs/gazebo/f1/models/reset.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from rl_studio.agents.utils import (
  4 |     print_messages,
  5 | )
  6 | from rl_studio.envs.gazebo.f1.models.f1_env import F1Env
  7 | 
  8 | 
  9 | class Reset(F1Env):
 10 |     """
 11 |     Works for Follow Line and Follow Lane tasks
 12 |     """
 13 | 
 14 |     def reset_f1_state_image(self):
 15 |         """
 16 |         reset for
 17 |         - State: Image
 18 |         - tasks: FollowLane and FollowLine
 19 |         """
 20 |         self._gazebo_reset()
 21 |         # === POSE ===
 22 |         if self.alternate_pose:
 23 |             self._gazebo_set_random_pose_f1_follow_rigth_lane()
 24 |         else:
 25 |             self._gazebo_set_fix_pose_f1_follow_right_lane()
 26 | 
 27 |         self._gazebo_unpause()
 28 | 
 29 |         ##==== get image from sensor camera
 30 |         f1_image_camera, _ = self.f1gazeboimages.get_camera_info()
 31 |         self._gazebo_pause()
 32 | 
 33 |         ##==== calculating State
 34 |         # image as observation
 35 |         state = np.array(
 36 |             self.f1gazeboimages.image_preprocessing_black_white_32x32(
 37 |                 f1_image_camera.data, self.height
 38 |             )
 39 |         )
 40 |         state_size = state.shape
 41 | 
 42 |         return state, state_size
 43 | 
 44 |     def reset_f1_state_sp(self):
 45 |         """
 46 |         reset for
 47 |         - State: Simplified perception
 48 |         - tasks: FollowLane and FollowLine
 49 |         """
 50 |         self._gazebo_reset()
 51 |         # === POSE ===
 52 |         if self.alternate_pose:
 53 |             self._gazebo_set_random_pose_f1_follow_rigth_lane()
 54 |         else:
 55 |             self._gazebo_set_fix_pose_f1_follow_right_lane()
 56 | 
 57 |         self._gazebo_unpause()
 58 | 
 59 |         ##==== get image from sensor camera
 60 |         f1_image_camera, _ = self.f1gazeboimages.get_camera_info()
 61 |         self._gazebo_pause()
 62 | 
 63 |         ##==== calculating State
 64 |         # simplified perception as observation
 65 |         points_in_red_line, centrals_normalized = self.simplifiedperception.processed_image(
 66 |             f1_image_camera.data, self.height, self.width, self.x_row, self.center_image
 67 |         )
 68 |         self._gazebo_pause()
 69 |         states = centrals_normalized
 70 |         state_size = len(states)
 71 | 
 72 |         return states, state_size
 73 | 
 74 |     def reset_f1_state_sp_line(self):
 75 |         """
 76 |                reset for
 77 |                - State: Simplified perception
 78 |                - tasks: FollowLane and FollowLine
 79 |                """
 80 |         self._gazebo_reset()
 81 |         # === POSE ===
 82 |         if self.alternate_pose:
 83 |             self._gazebo_set_random_pose_f1_follow_rigth_lane()
 84 |         else:
 85 |             self._gazebo_set_fix_pose_f1_follow_right_lane()
 86 | 
 87 |         ##==== get image from sensor camera
 88 |         f1_image_camera, _ = self.f1gazeboimages.get_camera_info()
 89 |         f1_image_camera, _ = self.f1gazeboimages.get_camera_info()
 90 | 
 91 |         ##==== get center
 92 |         points_in_red_line, _ = self.simplifiedperception.processed_image(
 93 |             f1_image_camera.data, self.height, self.width, self.x_row, self.center_image
 94 |         )
 95 | 
 96 |         if self.state_space == "spn":
 97 |             self.point = points_in_red_line[self.poi]
 98 |         else:
 99 |             self.point = points_in_red_line[0]
100 | 
101 |         ##==== get State
102 |         ##==== simplified perception as observation
103 |         states = self.simplifiedperception.calculate_observation(
104 |             points_in_red_line, self.center_image, self.pixel_region
105 |         )
106 | 
107 |         state_size = len(states)
108 |         
109 |         return states, state_size
110 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_training_followlane_qlearn_carla.yaml:
--------------------------------------------------------------------------------
  1 | settings:
  2 |   mode: training # training, retraining
  3 |   task: follow_lane_carla # follow_line_gazebo, follow_lane_gazebo
  4 |   algorithm: qlearn # qlearn
  5 |   simulator: carla # openai, carla, gazebo, sumo
  6 |   environment_set: carla_environments # gazebo_environments, carla_environments
  7 |   env: follow_lane # Town01, simple, nurburgring, montreal, curves, simple_laser, manual, autoparking
  8 |   agent: auto_carla # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot
  9 |   actions: carla_simple # simple, medium, hard, test, autoparking_simple
 10 |   states: sp4 # sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points)
 11 |   rewards: follow_right_lane_only_center #
 12 |   framework: _
 13 |   total_episodes: 3
 14 |   training_time: 6
 15 |   models_dir: "./checkpoints"
 16 |   logs_dir: "./logs"
 17 |   metrics_dir: "./metrics"
 18 |   recorder_carla_dir: "./recorders"
 19 | 
 20 | ros:
 21 |   ros_master_uri: "11311"
 22 |   gazebo_master_uri: "11345"
 23 | 
 24 | carla:
 25 |   carla_server: localhost
 26 |   carla_client: 2000
 27 | 
 28 | retraining:
 29 |   qlearn:
 30 |     retrain_qlearn_model_name: "20230123-161229_Circuit-simple_States-sp1_Actions-simple_Rewards-follow_right_lane_only_center_epsilon-0.399_epoch-291_step-15001_reward-136707-qtable.npy"
 31 | 
 32 | inference:
 33 |   qlearn:
 34 |     inference_qlearn_model_name:
 35 | 
 36 | algorithm:
 37 |   qlearn:
 38 |     alpha: 0.7
 39 |     epsilon: 0.95
 40 |     epsilon_min: 0.05
 41 |     gamma: 0.9
 42 | 
 43 | agents:
 44 |   auto_carla:
 45 |     camera_params:
 46 |       width: 640
 47 |       height: 480
 48 |       center_image: 320
 49 |       raw_image: False
 50 |       image_resizing: 100
 51 |       new_image_size: 32
 52 |       num_regions: 16
 53 |       lower_limit: 220
 54 | 
 55 | states:
 56 |   sp1:
 57 |     0: [50]
 58 |   sp4:
 59 |     0: [30, 60, 100, 120]
 60 |   sp5:
 61 |     0: [3, 5, 10, 15, 20]
 62 |   spn:
 63 |     0: [50, 120, 180]
 64 | 
 65 | actions:
 66 |   carla_simple:
 67 |     0: [1, -0.2]
 68 |     1: [1, 0]
 69 |     2: [1, 0.2]
 70 |   simple:
 71 |     0: [2, -1]
 72 |     1: [3, 0]
 73 |     2: [2, 1]
 74 |   medium:
 75 |     0: [3, 0]
 76 |     1: [2, 1]
 77 |     2: [2, -1]
 78 |     3: [1, 1.5]
 79 |     4: [1, -1.5]
 80 |   hard:
 81 |     0: [3, 0]
 82 |     1: [2, 1]
 83 |     2: [2, -1]
 84 |     3: [1.5, 1]
 85 |     4: [1.5, -1]
 86 |     5: [1, -1.5]
 87 |     6: [1, -1.5]
 88 |   test:
 89 |     0: [0, 0]
 90 | 
 91 | rewards:
 92 |   follow_right_lane_only_center:
 93 |     from_10: 10
 94 |     from_02: 2
 95 |     from_01: 1
 96 |     penal: -100
 97 |     min_reward: 5_000
 98 |     highest_reward: 100
 99 |   follow_right_lane_center_v_step:
100 |     from_10: 10
101 |     from_02: 2
102 |     from_01: 1
103 |     penal: -100
104 |     min_reward: 5_000
105 |     highest_reward: 100
106 | 
107 | carla_environments:
108 |   follow_lane:
109 |     env_name: CarlaEnv-v0
110 |     town: Town01 #Town01, Town02, Town03, Town04, Town05, Town10HD
111 |     # Town01_Opt, Town02_Opt, Town03_Opt, Town04_Opt, Town05_Opt, Town10HD_Opt
112 |     car: model1
113 |     #sync_mode: True
114 |     weather: ClearNoon #dynamic, argparse
115 |     traffic_pedestrians: False
116 |     city_lights: False
117 |     car_lights: False
118 |     estimated_steps: 5
119 |     save_episodes: 10
120 |     save_every_step: 1000
121 |     init_pose:
122 |     goal_pose:
123 |     filter: vehicle.*
124 |     generation: "2"
125 |     rolename: "hero" #name
126 |     gamma: 2.2 #for camera
127 |     sync: True #syncronous mode or async
128 |     alternate_pose: False
129 |     waypoints_meters: 5 #distance between waypoints in meters
130 |     waypoints_init: 839
131 |     waypoints_target: 959 #961
132 |     waypoints_lane_id: -1
133 |     waypoints_road_id: 8
134 | 


--------------------------------------------------------------------------------
/rl_studio/envs/carla/utils/synchronous_mode.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import os
  3 | import sys
  4 | 
  5 | try:
  6 |     sys.path.append(
  7 |         glob.glob(
  8 |             "../carla/dist/carla-*%d.%d-%s.egg"
  9 |             % (
 10 |                 sys.version_info.major,
 11 |                 sys.version_info.minor,
 12 |                 "win-amd64" if os.name == "nt" else "linux-x86_64",
 13 |             )
 14 |         )[0]
 15 |     )
 16 | except IndexError:
 17 |     pass
 18 | 
 19 | import carla
 20 | 
 21 | import random
 22 | 
 23 | try:
 24 |     import pygame
 25 | except ImportError:
 26 |     raise RuntimeError("cannot import pygame, make sure pygame package is installed")
 27 | 
 28 | try:
 29 |     import numpy as np
 30 | except ImportError:
 31 |     raise RuntimeError("cannot import numpy, make sure numpy package is installed")
 32 | 
 33 | try:
 34 |     import queue
 35 | except ImportError:
 36 |     import Queue as queue
 37 | 
 38 | 
 39 | class CarlaSyncMode(object):
 40 |     """
 41 |     Context manager to synchronize output from different sensors. Synchronous
 42 |     mode is enabled as long as we are inside this context
 43 | 
 44 |         with CarlaSyncMode(world, sensors) as sync_mode:
 45 |             while True:
 46 |                 data = sync_mode.tick(timeout=1.0)
 47 | 
 48 |     """
 49 | 
 50 |     def __init__(self, world, *sensors, **kwargs):
 51 |         self.world = world
 52 |         self.sensors = sensors
 53 |         self.frame = None
 54 |         self.delta_seconds = 1.0 / kwargs.get("fps", 20)
 55 |         self._queues = []
 56 |         self._settings = None
 57 | 
 58 |     def __enter__(self):
 59 |         self._settings = self.world.get_settings()
 60 |         self.frame = self.world.apply_settings(
 61 |             carla.WorldSettings(
 62 |                 no_rendering_mode=False,
 63 |                 synchronous_mode=True,
 64 |                 fixed_delta_seconds=self.delta_seconds,
 65 |             )
 66 |         )
 67 | 
 68 |         def make_queue(register_event):
 69 |             q = queue.Queue()
 70 |             register_event(q.put)
 71 |             self._queues.append(q)
 72 | 
 73 |         make_queue(self.world.on_tick)
 74 |         for sensor in self.sensors:
 75 |             make_queue(sensor.listen)
 76 |         return self
 77 | 
 78 |     def tick(self, timeout):
 79 |         self.frame = self.world.tick()
 80 |         data = [self._retrieve_data(q, timeout) for q in self._queues]
 81 |         assert all(x.frame == self.frame for x in data)
 82 |         return data
 83 | 
 84 |     def __exit__(self, *args, **kwargs):
 85 |         self.world.apply_settings(self._settings)
 86 | 
 87 |     def _retrieve_data(self, sensor_queue, timeout):
 88 |         while True:
 89 |             data = sensor_queue.get(timeout=timeout)
 90 |             if data.frame == self.frame:
 91 |                 return data
 92 | 
 93 | 
 94 | def draw_image(surface, image, blend=False):
 95 |     array = np.frombuffer(image.raw_data, dtype=np.dtype("uint8"))
 96 |     array = np.reshape(array, (image.height, image.width, 4))
 97 |     array = array[:, :, :3]
 98 |     array = array[:, :, ::-1]
 99 |     image_surface = pygame.surfarray.make_surface(array.swapaxes(0, 1))
100 |     if blend:
101 |         image_surface.set_alpha(100)
102 |     surface.blit(image_surface, (0, 0))
103 | 
104 | 
105 | def get_font():
106 |     fonts = [x for x in pygame.font.get_fonts()]
107 |     default_font = "ubuntumono"
108 |     font = default_font if default_font in fonts else fonts[0]
109 |     font = pygame.font.match_font(font)
110 |     return pygame.font.Font(font, 14)
111 | 
112 | 
113 | def should_quit():
114 |     for event in pygame.event.get():
115 |         if event.type == pygame.QUIT:
116 |             return True
117 |         elif event.type == pygame.KEYUP:
118 |             if event.key == pygame.K_ESCAPE:
119 |                 return True
120 |     return False
121 | 


--------------------------------------------------------------------------------
/rl_studio/algorithms/ppo.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import gym
  4 | from torch import nn
  5 | from torch.nn import functional as F
  6 | import matplotlib.pyplot as plt
  7 | from torch.utils import tensorboard
  8 | import pickle
  9 | 
 10 | 
 11 | def mish(input):
 12 |     return input * torch.tanh(F.softplus(input))
 13 | 
 14 | 
 15 | class Mish(nn.Module):
 16 |     def __init__(self): super().__init__()
 17 | 
 18 |     def forward(self, input): return mish(input)
 19 | 
 20 | 
 21 | # helper function to convert numpy arrays to tensors
 22 | def t(x):
 23 |     return torch.from_numpy(x).float()
 24 | 
 25 | def get_dist(x):
 26 |     return torch.distributions.Categorical(probs=x)
 27 | 
 28 | # Actor module, categorical actions only
 29 | class Actor(nn.Module):
 30 |     def __init__(self, state_dim, n_actions, activation=nn.Tanh):
 31 |         super().__init__()
 32 |         self.model = nn.Sequential(
 33 |             nn.Linear(state_dim, 64),
 34 |             activation(),
 35 |             nn.Linear(64, 32),
 36 |             activation(),
 37 |             nn.Linear(32, n_actions),
 38 |             nn.Softmax()
 39 |         )
 40 |         self.adam_actor = torch.optim.Adam(self.parameters(), lr=3e-4)
 41 |         torch.manual_seed(1)
 42 | 
 43 |     def train(self, w, prev_prob_act, prob_act, advantage, global_steps, epsilon):
 44 |         actor_loss = self.policy_loss(prev_prob_act.detach(), prob_act, advantage.detach(), epsilon)
 45 |         w.add_scalar("loss/actor_loss", actor_loss, global_step=global_steps)
 46 |         self.adam_actor.zero_grad()
 47 |         actor_loss.backward()
 48 |         # clip_grad_norm_(adam_actor, max_grad_norm)
 49 |         # w.add_histogram("gradients/actor",
 50 |         #                 torch.cat([p.grad.view(-1) for p in self.parameters()]), global_step=global_steps)
 51 |         self.adam_actor.step()
 52 |         return actor_loss
 53 | 
 54 |     def clip_grad_norm_(module, max_grad_norm):
 55 |         nn.utils.clip_grad_norm_([p for g in module.param_groups for p in g["params"]], max_grad_norm)
 56 | 
 57 |     def policy_loss(self, old_log_prob, log_prob, advantage, eps):
 58 |         ratio = (log_prob - old_log_prob).exp()
 59 |         clipped = torch.clamp(ratio, 1 - eps, 1 + eps) * advantage
 60 | 
 61 |         m = torch.min(ratio * advantage, clipped)
 62 |         return -m
 63 | 
 64 |     def forward(self, X):
 65 |         return self.model(X)
 66 | 
 67 |     def load_model(self, actor_file_path):
 68 |         model = open(actor_file_path, "rb")
 69 | 
 70 |         self.model = pickle.load(model)
 71 | 
 72 |         print(f"\n\nMODEL LOADED.")
 73 | 
 74 |     def inference(self, state):
 75 |         probs = self(t(state))
 76 |         dist = get_dist(probs)
 77 |         action = dist.sample()
 78 | 
 79 |         return action
 80 | 
 81 | 
 82 | # Critic module
 83 | class Critic(nn.Module):
 84 |     def __init__(self, state_dim, activation=nn.Tanh):
 85 |         super().__init__()
 86 |         self.model = nn.Sequential(
 87 |             nn.Linear(state_dim, 64),
 88 |             activation(),
 89 |             nn.Linear(64, 32),
 90 |             activation(),
 91 |             nn.Linear(32, 1)
 92 |         )
 93 |         self.adam_critic = torch.optim.Adam(self.parameters(), lr=1e-3)
 94 |         torch.manual_seed(1)
 95 | 
 96 |     def train(self, w, advantage, global_steps):
 97 |         critic_loss = advantage.pow(2).mean()
 98 |         w.add_scalar("loss/critic_loss", critic_loss, global_step=global_steps)
 99 |         self.adam_critic.zero_grad()
100 |         critic_loss.backward()
101 |         # clip_grad_norm_(adam_critic, max_grad_norm)
102 |         # w.add_histogram("gradients/critic",
103 |         #                     torch.cat([p.data.view(-1) for p in self.parameters()]), global_step=global_steps)
104 |         self.adam_critic.step()
105 |         return critic_loss
106 | 
107 |     def forward(self, X):
108 |         return self.model(X)
109 | 


--------------------------------------------------------------------------------
/rl_studio/agents/turtlebot/circuit2_turtlebot_lidar_qlearn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import time
  3 | 
  4 | import gym
  5 | import numpy
  6 | 
  7 | import liveplot
  8 | import qlearn
  9 | 
 10 | 
 11 | def render():
 12 |     render_skip = 0  # Skip first X episodes.
 13 |     render_interval = 50  # Show render Every Y episodes.
 14 |     render_episodes = 10  # Show Z episodes every rendering.
 15 | 
 16 |     if (x % render_interval == 0) and (x != 0) and (x > render_skip):
 17 |         env.render()
 18 |     elif (
 19 |         ((x - render_episodes) % render_interval == 0)
 20 |         and (x != 0)
 21 |         and (x > render_skip)
 22 |         and (render_episodes < x)
 23 |     ):
 24 |         env.render(close=True)
 25 | 
 26 | 
 27 | if __name__ == "__main__":
 28 | 
 29 |     env = gym.make("GazeboCircuit2TurtlebotLidar-v0")
 30 | 
 31 |     outdir = "/tmp/gazebo_gym_experiments"
 32 |     env = gym.wrappers.Monitor(env, outdir, force=True)
 33 |     plotter = liveplot.LivePlot(outdir)
 34 | 
 35 |     last_time_steps = numpy.ndarray(0)
 36 | 
 37 |     qlearn = qlearn.QLearn(
 38 |         actions=range(env.action_space.n), alpha=0.2, gamma=0.8, epsilon=0.9
 39 |     )
 40 | 
 41 |     initial_epsilon = qlearn.epsilon
 42 | 
 43 |     epsilon_discount = 0.9986
 44 | 
 45 |     start_time = time.time()
 46 |     total_episodes = 10000
 47 |     highest_reward = 0
 48 | 
 49 |     for x in range(total_episodes):
 50 |         done = False
 51 | 
 52 |         cumulated_reward = 0  # Should going forward give more reward then L/R ?
 53 | 
 54 |         observation = env.reset()
 55 | 
 56 |         if qlearn.epsilon > 0.05:
 57 |             qlearn.epsilon *= epsilon_discount
 58 | 
 59 |         # render() #defined above, not env.render()
 60 | 
 61 |         state = "".join(map(str, observation))
 62 | 
 63 |         for i in range(1500):
 64 | 
 65 |             # Pick an action based on the current state
 66 |             action = qlearn.chooseAction(state)
 67 | 
 68 |             # Execute the action and get feedback
 69 |             observation, reward, done, info = env.step(action)
 70 |             cumulated_reward += reward
 71 | 
 72 |             if highest_reward < cumulated_reward:
 73 |                 highest_reward = cumulated_reward
 74 | 
 75 |             nextState = "".join(map(str, observation))
 76 | 
 77 |             qlearn.learn(state, action, reward, nextState)
 78 | 
 79 |             env._flush(force=True)
 80 | 
 81 |             if not (done):
 82 |                 state = nextState
 83 |             else:
 84 |                 last_time_steps = numpy.append(last_time_steps, [int(i + 1)])
 85 |                 break
 86 | 
 87 |         if x % 100 == 0:
 88 |             plotter.plot(env)
 89 | 
 90 |         m, s = divmod(int(time.time() - start_time), 60)
 91 |         h, m = divmod(m, 60)
 92 |         print(
 93 |             "EP: "
 94 |             + str(x + 1)
 95 |             + " - [alpha: "
 96 |             + str(round(qlearn.alpha, 2))
 97 |             + " - gamma: "
 98 |             + str(round(qlearn.gamma, 2))
 99 |             + " - epsilon: "
100 |             + str(round(qlearn.epsilon, 2))
101 |             + "] - Reward: "
102 |             + str(cumulated_reward)
103 |             + "     Time: %d:%02d:%02d" % (h, m, s)
104 |         )
105 | 
106 |     # Github table content
107 |     print(
108 |         "\n|"
109 |         + str(total_episodes)
110 |         + "|"
111 |         + str(qlearn.alpha)
112 |         + "|"
113 |         + str(qlearn.gamma)
114 |         + "|"
115 |         + str(initial_epsilon)
116 |         + "*"
117 |         + str(epsilon_discount)
118 |         + "|"
119 |         + str(highest_reward)
120 |         + "| PICTURE |"
121 |     )
122 | 
123 |     l = last_time_steps.tolist()
124 |     l.sort()
125 | 
126 |     # print("Parameters: a="+str)
127 |     print("Overall score: {:0.2f}".format(last_time_steps.mean()))
128 |     print(
129 |         "Best 100 score: {:0.2f}".format(
130 |             reduce(lambda x, y: x + y, l[-100:]) / len(l[-100:])
131 |         )
132 |     )
133 | 
134 |     env.close()
135 | 


--------------------------------------------------------------------------------
/rl_studio/envs/carla/utils/plot_topology.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # This work is licensed under the terms of the MIT license.
  4 | # For a copy, see <https://opensource.org/licenses/MIT>.
  5 | 
  6 | import glob
  7 | import os
  8 | import sys
  9 | 
 10 | try:
 11 |     sys.path.append(
 12 |         glob.glob(
 13 |             "../carla/dist/carla-*%d.%d-%s.egg"
 14 |             % (
 15 |                 sys.version_info.major,
 16 |                 sys.version_info.minor,
 17 |                 "win-amd64" if os.name == "nt" else "linux-x86_64",
 18 |             )
 19 |         )[0]
 20 |     )
 21 | except IndexError:
 22 |     pass
 23 | 
 24 | import carla
 25 | 
 26 | import argparse
 27 | 
 28 | 
 29 | def main():
 30 |     argparser = argparse.ArgumentParser()
 31 |     argparser.add_argument(
 32 |         "--host",
 33 |         metavar="H",
 34 |         default="127.0.0.1",
 35 |         help="IP of the host server (default: 127.0.0.1)",
 36 |     )
 37 |     argparser.add_argument(
 38 |         "-p",
 39 |         "--port",
 40 |         metavar="P",
 41 |         default=2000,
 42 |         type=int,
 43 |         help="TCP port to listen to (default: 2000)",
 44 |     )
 45 |     argparser.add_argument(
 46 |         "-t",
 47 |         "--town",
 48 |         metavar="T",
 49 |         default="Town01",
 50 |         help="load Town to work with",
 51 |     )
 52 |     args = argparser.parse_args()
 53 | 
 54 |     # Approximate distance between the waypoints
 55 |     WAYPOINT_DISTANCE = 5.0  # in meters
 56 | 
 57 |     try:
 58 |         client = carla.Client(args.host, args.port)
 59 |         client.set_timeout(2.0)
 60 | 
 61 |         # world = client.get_world()
 62 |         world = client.load_world(args.town)
 63 |         carla_map = world.get_map()
 64 | 
 65 |         import matplotlib.pyplot as plt
 66 | 
 67 |         # plt.subplot(211)
 68 |         # Invert the y axis since we follow UE4 coordinates
 69 |         plt.gca().invert_yaxis()
 70 |         plt.margins(x=0.7, y=0)
 71 | 
 72 |         # GET WAYPOINTS IN THE MAP ##########################################
 73 |         # Returns a list of waypoints positioned on the center of the lanes
 74 |         # all over the map with an approximate distance between them.
 75 |         topology = carla_map.get_topology()
 76 |         road_list = []
 77 | 
 78 |         for wp_pair in topology:
 79 |             current_wp = wp_pair[0]
 80 |             # Check if there is a road with no previus road, this can happen
 81 |             # in opendrive. Then just continue.
 82 |             if current_wp is None:
 83 |                 continue
 84 |             # First waypoint on the road that goes from wp_pair[0] to wp_pair[1].
 85 |             current_road_id = current_wp.road_id
 86 |             wps_in_single_road = [current_wp]
 87 |             # While current_wp has the same road_id (has not arrived to next road).
 88 |             while current_wp.road_id == current_road_id:
 89 |                 # Check for next waypoints in aprox distance.
 90 |                 available_next_wps = current_wp.next(WAYPOINT_DISTANCE)
 91 |                 # If there is next waypoint/s?
 92 |                 if available_next_wps:
 93 |                     # We must take the first ([0]) element because next(dist) can
 94 |                     # return multiple waypoints in intersections.
 95 |                     current_wp = available_next_wps[0]
 96 |                     wps_in_single_road.append(current_wp)
 97 |                 else:  # If there is no more waypoints we can stop searching for more.
 98 |                     break
 99 |             road_list.append(wps_in_single_road)
100 | 
101 |         # Plot each road (on a different color by default)
102 |         for road in road_list:
103 |             plt.plot(
104 |                 [wp.transform.location.x for wp in road],
105 |                 [wp.transform.location.y for wp in road],
106 |             )
107 | 
108 |         plt.show()
109 | 
110 |     finally:
111 |         pass
112 | 
113 | 
114 | if __name__ == "__main__":
115 |     try:
116 |         main()
117 |     finally:
118 |         print("Done.")
119 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_training_followlane_qlearn_f1_gazebo.yaml:
--------------------------------------------------------------------------------
  1 | settings:
  2 |   mode: retraining # training, retraining
  3 |   task: follow_lane_gazebo # follow_line_gazebo
  4 |   algorithm: qlearn # qlearn
  5 |   simulator: gazebo # openai, carla, gazebo, sumo
  6 |   environment_set: gazebo_environments # gazebo_environments, carla_environments
  7 |   env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking
  8 |   agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot
  9 |   actions: simple # simple, medium, hard, test, autoparking_simple
 10 |   states: sp1 # sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points)
 11 |   rewards: follow_right_lane_only_center #
 12 |   framework: _
 13 |   total_episodes: 1_000
 14 |   training_time: 6
 15 |   models_dir: "./checkpoints"
 16 |   logs_dir: "./logs"
 17 |   metrics_dir: "./metrics"
 18 | 
 19 | ros:
 20 |   ros_master_uri: "11311"
 21 |   gazebo_master_uri: "11345"
 22 | 
 23 | retraining:
 24 |   qlearn:
 25 |     retrain_qlearn_model_name: "20230123-161229_Circuit-simple_States-sp1_Actions-simple_Rewards-follow_right_lane_only_center_epsilon-0.399_epoch-291_step-15001_reward-136707-qtable.npy"
 26 | 
 27 | inference:
 28 |   qlearn:
 29 |     inference_qlearn_model_name:
 30 | 
 31 | algorithm:
 32 |   qlearn:
 33 |     alpha: 0.2
 34 |     epsilon: 0.95
 35 |     epsilon_min: 0.05
 36 |     gamma: 0.9
 37 | 
 38 | agents:
 39 |   f1:
 40 |     camera_params:
 41 |       width: 640
 42 |       height: 480
 43 |       center_image: 320
 44 |       raw_image: False
 45 |       image_resizing: 100
 46 |       new_image_size: 32
 47 |       num_regions: 16
 48 |       lower_limit: 220
 49 | 
 50 | states:
 51 |   sp1:
 52 |     0: [50]
 53 |   sp3:
 54 |     0: [5, 15, 22]
 55 |   sp5:
 56 |     0: [3, 5, 10, 15, 20]
 57 |   spn:
 58 |     0: [10]
 59 | 
 60 | actions:
 61 |   simple:
 62 |     0: [3, 0]
 63 |     1: [2, 1]
 64 |     2: [2, -1]
 65 |   medium:
 66 |     0: [3, 0]
 67 |     1: [2, 1]
 68 |     2: [2, -1]
 69 |     3: [1, 1.5]
 70 |     4: [1, -1.5]
 71 |   hard:
 72 |     0: [3, 0]
 73 |     1: [2, 1]
 74 |     2: [2, -1]
 75 |     3: [1.5, 1]
 76 |     4: [1.5, -1]
 77 |     5: [1, -1.5]
 78 |     6: [1, -1.5]
 79 |   test:
 80 |     0: [0, 0]
 81 | 
 82 | rewards:
 83 |   follow_right_lane_only_center:
 84 |     from_10: 10
 85 |     from_02: 2
 86 |     from_01: 1
 87 |     penal: -100
 88 |     min_reward: 5_000
 89 |     highest_reward: 100
 90 |   follow_right_lane_center_v_step:
 91 |     from_10: 10
 92 |     from_02: 2
 93 |     from_01: 1
 94 |     penal: -100
 95 |     min_reward: 5_000
 96 |     highest_reward: 100
 97 | 
 98 | gazebo_environments:
 99 |   simple:
100 |     env_name: F1Env-v0
101 |     circuit_name: simple
102 |     launchfile: simple_circuit_no_wall.launch #simple_circuit.launch
103 |     environment_folder: f1
104 |     robot_name: f1_renault
105 |     model_state_name: f1_renault_multicamera_multilaser # f1_renault,
106 |     start_pose: 0 # 0, 1, 2, 3, 4
107 |     alternate_pose: False
108 |     estimated_steps: 15_000
109 |     sensor: camera
110 |     save_episodes: 10
111 |     save_every_step: 1000
112 |     lap_completed: False
113 |     save_model: True
114 |     save_positions: True
115 |     debug_level: DEBUG
116 |     telemetry: False
117 |     telemetry_mask: False
118 |     plotter_graphic: False
119 |     circuit_positions_set:
120 |       0: [52.462, -41.988, 0.004, 0, 0, 1.57, -1.57]
121 |       #0: [52.800, -12.734, 0.004, 0, 0, 1.57, -1.57] # near to first curve
122 |       #0: [52.460, -8.734, 0.004, 0, 0, 1.57, -1.57] # Finish line
123 |       #0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57]
124 |       1: [52.97, -42.06, 0.004, 0, 0, 1.57, -1.57]
125 |       #1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57]
126 |       #2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56]
127 |       2: [40.2, -30.741, 0.004, 0, 0, 1.56, 1.56]
128 |       #3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613]
129 |       3: [0, 31.15, 0.004, 0, 0.01, 0, 0.31]
130 |       #4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383]
131 |       4: [19.25, 43.50, 0.004, 0, 0.0, 1.57, -1.69]
132 |       5: [52.800, -35.486, 0.004, 0, 0, 1.57, -1.57] # near to first curve
133 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_training_followline_dqn_f1_gazebo.yaml:
--------------------------------------------------------------------------------
  1 | settings:
  2 |   mode: retraining # training, retraining, inference
  3 |   task: follow_line_gazebo # follow_line_gazebo, follow_lane_gazebo, autoparking_gazebo
  4 |   algorithm: dqn # qlearn, dqn, ddpg, ppo
  5 |   simulator: gazebo # openai, carla, gazebo, sumo
  6 |   environment_set: gazebo_environments # gazebo_environments, carla_environments
  7 |   env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking
  8 |   agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot
  9 |   actions: hard # simple, medium, hard, test
 10 |   states: sp10 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points)
 11 |   rewards: followline_center # followline_center
 12 |   framework: TensorFlow # TensorFlow, Pytorch
 13 |   recorder_carla_dir:
 14 |   total_episodes: 100000
 15 |   training_time: 90
 16 |   models_dir: "./checkpoints"
 17 |   logs_dir: "./logs"
 18 |   metrics_dir: "./metrics"
 19 |   debug_stats: false
 20 |   show_monitoring: true
 21 |   steps_to_decrease: 30000
 22 |   decrease_substraction: 0.01
 23 |   decrease_min: 0.03
 24 |   reward_params:
 25 |     function: linear
 26 |     punish_zig_zag_value: 0.5
 27 |     punish_ineffective_vel: 0
 28 |     beta_1: 0.8
 29 | 
 30 | ros:
 31 |   ros_master_uri: "11311"
 32 |   gazebo_master_uri: "11345"
 33 | 
 34 | retraining:
 35 |   dqn:
 36 |     retrain_dqn_tf_model_name: "DQN_sp_16x16_LAPCOMPLETED_Max3076_Epoch492_inTime20231218-045917.model"
 37 | 
 38 | inference:
 39 |   dqn:
 40 |     inference_dqn_tf_model_name:
 41 | 
 42 | algorithm:
 43 |   dqn:
 44 |     alpha: 0.9
 45 |     gamma: 0.9
 46 |     epsilon: 0.99
 47 |     epsilon_discount: 0.99
 48 |     epsilon_min: 0.01
 49 |     model_name: DQN_sp_16x16
 50 |     replay_memory_size: 50_000
 51 |     min_replay_memory_size: 64
 52 |     minibatch_size: 64
 53 |     update_target_every: 5
 54 |     memory_fraction: 0.20
 55 |     buffer_capacity: 100_000
 56 |     batch_size: 64
 57 | 
 58 | agents:
 59 |   f1:
 60 |     camera_params:
 61 |       width: 640
 62 |       height: 480
 63 |       center_image: 320
 64 |       raw_image: False
 65 |       image_resizing: 100
 66 |       new_image_size: 32
 67 |       num_regions: 16
 68 |       lower_limit: 220
 69 | 
 70 | states:
 71 |   image:
 72 |     0: [3]
 73 |   sp1:
 74 |     0: [10]
 75 |   sp3:
 76 |     0: [5, 15, 22]
 77 |   sp5:
 78 |     0: [3, 5, 10, 15, 20]
 79 |   sp10:
 80 |     0: [3, 15, 30, 45, 50, 70, 100, 120, 150, 190]
 81 |   spn:
 82 |     0: [10]
 83 | 
 84 | actions:
 85 |   simple:
 86 |     0: [3, 0]
 87 |     1: [2, 1]
 88 |     2: [2, -1]
 89 |   medium:
 90 |     0: [3, 0]
 91 |     1: [2, 1]
 92 |     2: [2, -1]
 93 |     3: [1, 1.5]
 94 |     4: [1, -1.5]
 95 |   hard:
 96 |     0: [7, 0]
 97 |     1: [4, 1,5]
 98 |     2: [4, -1,5]
 99 |     3: [3, 1]
100 |     4: [3, -1]
101 |     5: [2, 1]
102 |     6: [2, -1]
103 |     7: [1, 0.5]
104 |     8: [1, -0.5]
105 |   test:
106 |     0: [0, 0]
107 | 
108 | rewards:
109 |   followline_center:
110 |     from_10: 10
111 |     from_02: 2
112 |     from_01: 1
113 |     penal: -100
114 |     min_reward: 5_000
115 |     highest_reward: 100
116 | 
117 | gazebo_environments:
118 |   simple:
119 |     env_name: F1Env-v0
120 |     circuit_name: simple
121 |     launchfile: simple_circuit.launch
122 |     environment_folder: f1
123 |     robot_name: f1_renault
124 |     model_state_name: f1_renault # f1_renault_multicamera_multilaser
125 |     start_pose: 0 # 0, 1, 2, 3, 4
126 |     alternate_pose: True
127 |     estimated_steps: 5000
128 |     sensor: camera
129 |     save_episodes: 500
130 |     save_every_step: 5
131 |     lap_completed: False
132 |     save_model: True
133 |     save_positions: True
134 |     debug_level: DEBUG
135 |     telemetry: False
136 |     telemetry_mask: False
137 |     plotter_graphic: False
138 |     circuit_positions_set:
139 |       0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57]
140 |       1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57] #finish line
141 |       2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56]
142 |       3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613]
143 |       4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383]
144 |     sleep: 0.05
145 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_inference_followlane_qlearn_f1_gazebo.yaml:
--------------------------------------------------------------------------------
  1 | settings:
  2 |   mode: inference # training, retraining, inference
  3 |   task: follow_lane_gazebo # follow_line_gazebo, follow_lane_gazebo, autoparking_gazebo
  4 |   algorithm: qlearn # qlearn, dqn, ddpg, ppo
  5 |   simulator: gazebo # openai, carla, gazebo, sumo
  6 |   environment_set: gazebo_environments # gazebo_environments, carla_environments
  7 |   env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking
  8 |   agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot
  9 |   actions: simple # continuous, simple, medium, hard, test, autoparking_simple
 10 |   states: sp1 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points)
 11 |   rewards: follow_right_lane_only_center # rewards_followline_center
 12 |   framework: _
 13 |   total_episodes: 10
 14 |   training_time: 6
 15 |   models_dir: "./checkpoints"
 16 |   logs_dir: "./logs"
 17 |   metrics_dir: "./metrics"
 18 | 
 19 | ros:
 20 |   ros_master_uri: "11311"
 21 |   gazebo_master_uri: "11345"
 22 | 
 23 | retraining:
 24 |   qlearn:
 25 |     retrain_qlearn_model_name:
 26 | 
 27 | inference:
 28 |   qlearn:
 29 |     inference_qlearn_model_name: "20230109-154549_Circuit-simple_States-sp1_Actions-simple_Rewards-follow_right_lane_only_center_epsilon-0.76_epoch-2_step-101_reward-919-qtable.npy"
 30 | 
 31 | algorithm:
 32 |   qlearn:
 33 |     alpha: 0.2
 34 |     epsilon: 0.95
 35 |     epsilon_min: 0.05
 36 |     gamma: 0.9
 37 | 
 38 | agents:
 39 |   f1:
 40 |     camera_params:
 41 |       width: 640
 42 |       height: 480
 43 |       center_image: 320
 44 |       raw_image: False
 45 |       image_resizing: 100
 46 |       new_image_size: 32
 47 |       num_regions: 16
 48 |       lower_limit: 220
 49 | 
 50 | states:
 51 |   sp1:
 52 |     0: [10]
 53 |   sp3:
 54 |     0: [5, 15, 22]
 55 |   sp5:
 56 |     0: [3, 5, 10, 15, 20]
 57 |   spn:
 58 |     0: [10]
 59 | 
 60 | actions:
 61 |   simple:
 62 |     0: [3, 0]
 63 |     1: [2, 1]
 64 |     2: [2, -1]
 65 |   medium:
 66 |     0: [3, 0]
 67 |     1: [2, 1]
 68 |     2: [2, -1]
 69 |     3: [1, 1.5]
 70 |     4: [1, -1.5]
 71 |   hard:
 72 |     0: [3, 0]
 73 |     1: [2, 1]
 74 |     2: [2, -1]
 75 |     3: [1.5, 1]
 76 |     4: [1.5, -1]
 77 |     5: [1, -1.5]
 78 |     6: [1, -1.5]
 79 |   test:
 80 |     0: [0, 0]
 81 | 
 82 | rewards:
 83 |   follow_right_lane_only_center:
 84 |     from_10: 10
 85 |     from_02: 2
 86 |     from_01: 1
 87 |     penal: -100
 88 |     min_reward: 5_000
 89 |     highest_reward: 100
 90 |   follow_right_lane_center_v_step:
 91 |     from_10: 10
 92 |     from_02: 2
 93 |     from_01: 1
 94 |     penal: -100
 95 |     min_reward: 5_000
 96 |     highest_reward: 100
 97 | 
 98 | gazebo_environments:
 99 |   simple:
100 |     env_name: F1Env-v0
101 |     circuit_name: simple
102 |     launchfile: simple_circuit_no_wall.launch #simple_circuit.launch
103 |     environment_folder: f1
104 |     robot_name: f1_renault
105 |     model_state_name: f1_renault_multicamera_multilaser # f1_renault,
106 |     start_pose: 0 # 0, 1, 2, 3, 4
107 |     alternate_pose: False
108 |     estimated_steps: 100
109 |     sensor: camera
110 |     save_episodes: 1
111 |     save_every_step: 10
112 |     lap_completed: False
113 |     save_model: True
114 |     save_positions: True
115 |     debug_level: DEBUG
116 |     telemetry: False
117 |     telemetry_mask: False
118 |     plotter_graphic: False
119 |     circuit_positions_set:
120 |       0: [52.462, -41.988, 0.004, 0, 0, 1.57, -1.57]
121 |       #0: [52.800, -12.734, 0.004, 0, 0, 1.57, -1.57] # near to first curve
122 |       #0: [52.800, -8.734, 0.004, 0, 0, 1.57, -1.57] # Finish line
123 |       #0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57]
124 |       1: [52.97, -42.06, 0.004, 0, 0, 1.57, -1.57]
125 |       #1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57]
126 |       #2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56]
127 |       2: [40.2, -30.741, 0.004, 0, 0, 1.56, 1.56]
128 |       #3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613]
129 |       3: [0, 31.15, 0.004, 0, 0.01, 0, 0.31]
130 |       #4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383]
131 |       4: [19.25, 43.50, 0.004, 0, 0.0, 1.57, -1.69]
132 |       5: [52.800, -35.486, 0.004, 0, 0, 1.57, -1.57] # near to first curve
133 | 


--------------------------------------------------------------------------------
/rl_studio/config/config_inference_followline_ddpg_f1_gazebo.yaml:
--------------------------------------------------------------------------------
  1 | settings:
  2 |   mode: inference # training, retraining, inference
  3 |   task: follow_line_gazebo # follow_line_gazebo, follow_lane_gazebo, autoparking_gazebo
  4 |   algorithm: ddpg # qlearn, dqn, ddpg, ppo
  5 |   simulator: gazebo # openai, carla, gazebo, sumo
  6 |   environment_set: gazebo_environments # gazebo_environments, carla_environments
  7 |   env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking
  8 |   agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot
  9 |   actions: continuous # continuous, simple, medium, hard, test, autoparking_simple
 10 |   states: image #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points)
 11 |   rewards: followline_center # rewards_followline_center
 12 |   framework: TensorFlow # TensorFlow, Pytorch
 13 |   total_episodes: 10
 14 |   training_time: 6
 15 |   models_dir: "./checkpoints"
 16 |   logs_dir: "./logs"
 17 |   metrics_dir: "./metrics"
 18 | 
 19 | ros:
 20 |   ros_master_uri: "11311"
 21 |   gazebo_master_uri: "11345"
 22 | 
 23 | retraining:
 24 |   ddpg:
 25 |     retrain_ddpg_tf_actor_model_name:
 26 |     retrain_ddpg_tf_critic_model_name:
 27 | 
 28 | inference:
 29 |   ddpg:
 30 |     inference_ddpg_tf_actor_model_name: "20230111_DDPG_Actor_conv2d32x64_Critic_conv2d32x64_BATCH_ACTOR_Max--100_Epoch-5_State-image_Actions-continuous_Rewards-followline_center_inTime-20230111-183048.h5"
 31 |     inference_ddpg_tf_critic_model_name: "20230111_DDPG_Actor_conv2d32x64_Critic_conv2d32x64_BATCH_CRITIC_Max--100_Epoch-5_State-image_Actions-continuous_Rewards-followline_center_inTime-20230111-183048.h5"
 32 | 
 33 | algorithm:
 34 |   ddpg:
 35 |     gamma: 0.9
 36 |     tau: 0.005
 37 |     std_dev: 0.2
 38 |     model_name: DDPG_Actor_conv2d32x64_Critic_conv2d32x64
 39 |     replay_memory_size: 50_000
 40 |     memory_fraction: 0.20
 41 |     critic_lr: 0.002
 42 |     actor_lr: 0.001
 43 |     buffer_capacity: 100_000
 44 |     batch_size: 64
 45 | 
 46 | agents:
 47 |   f1:
 48 |     camera_params:
 49 |       width: 640
 50 |       height: 480
 51 |       center_image: 320
 52 |       raw_image: False
 53 |       image_resizing: 100
 54 |       new_image_size: 32
 55 |       num_regions: 16
 56 |       lower_limit: 220
 57 | 
 58 | states:
 59 |   image:
 60 |     0: [3]
 61 |   sp1:
 62 |     0: [10]
 63 |   sp3:
 64 |     0: [5, 15, 22]
 65 |   sp5:
 66 |     0: [3, 5, 10, 15, 20]
 67 |   spn:
 68 |     0: [10]
 69 | 
 70 | actions:
 71 |   simple:
 72 |     0: [3, 0]
 73 |     1: [2, 1]
 74 |     2: [2, -1]
 75 |   medium:
 76 |     0: [3, 0]
 77 |     1: [2, 1]
 78 |     2: [2, -1]
 79 |     3: [1, 1.5]
 80 |     4: [1, -1.5]
 81 |   hard:
 82 |     0: [3, 0]
 83 |     1: [2, 1]
 84 |     2: [2, -1]
 85 |     3: [1.5, 1]
 86 |     4: [1.5, -1]
 87 |     5: [1, -1.5]
 88 |     6: [1, -1.5]
 89 |   test:
 90 |     0: [0, 0]
 91 |   continuous:
 92 |     v: [2, 30]
 93 |     w: [-3, 3]
 94 | 
 95 | rewards:
 96 |   followline_center:
 97 |     from_10: 10
 98 |     from_02: 2
 99 |     from_01: 1
100 |     penal: -100
101 |     min_reward: 5_000
102 |     highest_reward: 100
103 |   followline_center_v_w_linear: # only for continuous actions
104 |     beta_0: 3
105 |     beta_1: -0.1
106 |     penal: 0
107 |     min_reward: 1_000
108 |     highest_reward: 100
109 | 
110 | gazebo_environments:
111 |   simple:
112 |     env_name: F1Env-v0
113 |     circuit_name: simple
114 |     launchfile: simple_circuit.launch
115 |     environment_folder: f1
116 |     robot_name: f1_renault
117 |     model_state_name: f1_renault # f1_renault_multicamera_multilaser
118 |     start_pose: 0 # 0, 1, 2, 3, 4
119 |     alternate_pose: False
120 |     estimated_steps: 100
121 |     sensor: camera
122 |     save_episodes: 5
123 |     save_every_step: 10
124 |     lap_completed: False
125 |     save_model: True
126 |     save_positions: True
127 |     debug_level: DEBUG
128 |     telemetry: False
129 |     telemetry_mask: False
130 |     plotter_graphic: False
131 |     circuit_positions_set:
132 |       0: [53.462, -41.988, 0.004, 0, 0, 1.57, -1.57]
133 |       1: [53.462, -8.734, 0.004, 0, 0, 1.57, -1.57]
134 |       2: [39.712, -30.741, 0.004, 0, 0, 1.56, 1.56]
135 |       3: [-6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613]
136 |       4: [20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383]
137 | 


--------------------------------------------------------------------------------