├── robo_gym
    ├── envs
    │   ├── ur
    │   │   ├── __init__.py
    │   │   ├── ur_ee_pos.py
    │   │   ├── ur_base.py
    │   │   ├── ur_isaac_reach.py
    │   │   └── ur_avoidance_basic.py
    │   ├── base
    │   │   └── __init__.py
    │   ├── example
    │   │   ├── __init__.py
    │   │   └── example_env.py
    │   ├── mir100
    │   │   └── __init__.py
    │   ├── manipulator
    │   │   ├── __init__.py
    │   │   ├── isaac_reach.py
    │   │   └── manipulator_base.py
    │   ├── __init__.py
    │   ├── panda
    │   │   ├── panda_base.py
    │   │   ├── panda_ee_pos.py
    │   │   └── panda_isaac_reach.py
    │   └── simulation_wrapper.py
    ├── utils
    │   ├── __init__.py
    │   ├── exceptions.py
    │   ├── managed_rs_client.py
    │   ├── panda_utils.py
    │   ├── mir100_utils.py
    │   ├── ur_utils.py
    │   ├── panda_parameters
    │   │   └── panda.yaml
    │   ├── table.py
    │   ├── ur_parameters
    │   │   ├── ur10.yaml
    │   │   ├── ur3.yaml
    │   │   ├── ur10e.yaml
    │   │   ├── ur16e.yaml
    │   │   ├── ur3e.yaml
    │   │   ├── ur5.yaml
    │   │   └── ur5e.yaml
    │   └── manipulator_model.py
    ├── wrappers
    │   ├── __init__.py
    │   └── env_wrappers
    │   │   └── ur_ee_positioning_training.py
    ├── version.py
    └── __init__.py
├── bin
    ├── docker_entrypoint
    └── run-rs-side-standard
├── MANIFEST.in
├── docs
    ├── media
    │   ├── eurobin.png
    │   ├── robo-gym-logo.png
    │   └── funded_by_the_eu.png
    ├── examples
    │   ├── random_agent_sim.py
    │   ├── random_agent_robot.py
    │   ├── stable-baselines
    │   │   ├── td3_script.py
    │   │   ├── td3_load.py
    │   │   └── basic_example.md
    │   ├── random_agent_sim_custom_flags.py
    │   └── rllib
    │   │   ├── policy_from_cp_ur.py
    │   │   ├── policy_from_cp_mir.py
    │   │   ├── README.md
    │   │   ├── training_inference_mir.py
    │   │   └── training_inference_ur.py
    ├── creating_environments.md
    ├── managing_multiple_python_vers.md
    ├── isaac_lab_compatibility.md
    ├── the_framework.md
    └── modular_environments.md
├── .gitignore
├── pytest.ini
├── Dockerfile
├── docker-compose-test-melodic.yml
├── tests
    └── robo-gym
    │   ├── envs
    │       ├── mir100
    │       │   ├── test_mir100_base_env.py
    │       │   └── test_mir100_obstacle_avoidance.py
    │       └── ur
    │       │   ├── test_ur_avoidance_raad_fixed_spline.py
    │       │   ├── test_ur_base_env.py
    │       │   ├── test_ur_avoidance_raad_training.py
    │       │   └── test_ur_avoidance_basic.py
    │   └── utils
    │       ├── test_ur.py
    │       └── test_utils.py
├── LICENSE
├── setup.py
└── .gitlab-ci.yml


/robo_gym/envs/ur/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/robo_gym/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/robo_gym/envs/base/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/robo_gym/envs/example/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/robo_gym/envs/mir100/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/robo_gym/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/robo_gym/envs/manipulator/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/robo_gym/version.py:
--------------------------------------------------------------------------------
1 | VERSION = "2.1.0"
2 | 


--------------------------------------------------------------------------------
/bin/docker_entrypoint:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | 
4 | exec "$@"
5 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include robo_gym/utils/ur_parameters/*
2 | include robo_gym/envs/ur/robot_trajectories/*


--------------------------------------------------------------------------------
/docs/media/eurobin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jr-robotics/robo-gym/HEAD/docs/media/eurobin.png


--------------------------------------------------------------------------------
/docs/media/robo-gym-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jr-robotics/robo-gym/HEAD/docs/media/robo-gym-logo.png


--------------------------------------------------------------------------------
/docs/media/funded_by_the_eu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jr-robotics/robo-gym/HEAD/docs/media/funded_by_the_eu.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *__pycache__
 2 | *.egg-info
 3 | /.vscode
 4 | /.idea
 5 | 
 6 | # Setuptools
 7 | /build
 8 | /dist
 9 | /*.egg-info
10 | /Pipfile
11 | 


--------------------------------------------------------------------------------
/bin/run-rs-side-standard:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | docker run -it \
4 |     --net=host \
5 |     joanneumrobotics/robo-gym-robot-server-side:0.1.0-standard
6 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     nightly: marks tests to be run in nightly pipeline (deselect with '-m "not nightly"')
4 |     commit: marks tests to be run by default (select with '-m "commit"')


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PYTHON_VER
 2 | FROM python:$PYTHON_VER-bullseye
 3 | RUN apt-get -y update && apt-get install -y unzip libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg
 4 | RUN pip install pytest pytest-rerunfailures
 5 | ARG CACHEBUST=1
 6 | COPY . /usr/local/robo-gym/
 7 | WORKDIR /usr/local/robo-gym/
 8 | RUN pip install .
 9 | ENTRYPOINT ["/usr/local/robo-gym/bin/docker_entrypoint"]
10 | CMD ["bash"]
11 | 


--------------------------------------------------------------------------------
/docs/examples/random_agent_sim.py:
--------------------------------------------------------------------------------
 1 | import gymnasium as gym
 2 | import robo_gym
 3 | 
 4 | target_machine_ip = "127.0.0.1"  # or other machine 'xxx.xxx.xxx.xxx'
 5 | 
 6 | # initialize environment
 7 | env = gym.make("NoObstacleNavigationMir100Sim-v0", ip=target_machine_ip, gui=True)
 8 | 
 9 | num_episodes = 10
10 | 
11 | for episode in range(num_episodes):
12 |     done = False
13 |     env.reset()
14 |     while not done:
15 |         # random step in the environment
16 |         state, reward, terminated, truncated, info = env.step(env.action_space.sample())
17 |         done = terminated or truncated
18 | 


--------------------------------------------------------------------------------
/docker-compose-test-melodic.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   robot-servers:
 5 |     container_name: robot-servers
 6 |     image: robot-servers
 7 |     command: bash -c "start-server-manager && tail -f /dev/null" #This starts the server manager and hangs
 8 |     restart: always
 9 | 
10 |   robo-gym:
11 |     container_name: robo-gym
12 |     # build:
13 |     #   context: .
14 |     #   dockerfile: Dockerfile
15 |     image: robo-gym
16 |     restart: always
17 |     entrypoint: tail -f /dev/null
18 |     depends_on:
19 |       - robot-servers
20 |   #   stdin_open: true
21 |   #   tty: true
22 | 


--------------------------------------------------------------------------------
/docs/examples/random_agent_robot.py:
--------------------------------------------------------------------------------
 1 | import gymnasium as gym
 2 | import robo_gym
 3 | 
 4 | # PLEASE DON'T USE RANDOM SAMPLES WITH A REAL ROBOT! THIS IS JUST A CODE EXAMPLE!
 5 | 
 6 | robot_address = 'xxx.xxx.xxx.xxx'
 7 | 
 8 | # initialize environment
 9 | env = gym.make('NoObstacleNavigationMir100Rob-v0', rs_address=robot_address)
10 | 
11 | num_episodes = 1
12 | 
13 | for episode in range(num_episodes):
14 |     done = False
15 |     env.reset()
16 |     while not done:
17 |         # random step in the environment
18 |         state, reward, terminated, truncated, info = env.step(env.action_space.sample())
19 |         done = terminated or truncated


--------------------------------------------------------------------------------
/docs/examples/stable-baselines/td3_script.py:
--------------------------------------------------------------------------------
 1 | import gymnasium as gym
 2 | import robo_gym
 3 | from stable_baselines3 import TD3
 4 | from stable_baselines3.td3.policies import MlpPolicy
 5 | 
 6 | # specify the ip of the machine running the robot-server
 7 | target_machine_ip = "127.0.0.1"  # or other xxx.xxx.xxx.xxx
 8 | 
 9 | # initialize environment
10 | env = gym.make("NoObstacleNavigationMir100Sim-v0", ip=target_machine_ip, gui=True)
11 | 
12 | # choose and run appropriate algorithm provided by stable-baselines
13 | model = TD3(MlpPolicy, env, verbose=1)
14 | model.learn(total_timesteps=2000)
15 | 
16 | # save model
17 | model.save("td3_mir_basic")
18 | 


--------------------------------------------------------------------------------
/tests/robo-gym/envs/mir100/test_mir100_base_env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import gymnasium as gym
 4 | import robo_gym
 5 | import pytest
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | def env(request):
 9 |     ip = os.environ.get("ROBOGYM_SERVERS_HOST", 'robot-servers')
10 |     env = gym.make('NoObstacleNavigationMir100Sim-v0', ip=ip)
11 |     yield env
12 |     env.kill_sim()
13 | 
14 | @pytest.mark.commit 
15 | def test_initialization(env):
16 |     env.reset()
17 |     done = False
18 |     for _ in range(10):
19 |         if not done:
20 |             action = env.action_space.sample()
21 |             observation, _, done, _, _ = env.step(action)
22 | 
23 |     assert env.observation_space.contains(observation)


--------------------------------------------------------------------------------
/tests/robo-gym/envs/mir100/test_mir100_obstacle_avoidance.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import gymnasium as gym
 4 | import robo_gym
 5 | import pytest
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | def env(request):
 9 |     ip = os.environ.get("ROBOGYM_SERVERS_HOST", 'robot-servers')
10 |     env = gym.make('ObstacleAvoidanceMir100Sim-v0', ip=ip)
11 |     yield env
12 |     env.kill_sim()
13 | 
14 | @pytest.mark.commit 
15 | def test_initialization(env):
16 |     env.reset()
17 |     done = False
18 |     for _ in range(10):
19 |         if not done:
20 |             action = env.action_space.sample()
21 |             observation, _, done, _, _ = env.step(action)
22 | 
23 |     assert env.observation_space.contains(observation)


--------------------------------------------------------------------------------
/docs/examples/random_agent_sim_custom_flags.py:
--------------------------------------------------------------------------------
 1 | import gymnasium as gym
 2 | import robo_gym
 3 | from robo_gym.envs.simulation_wrapper import Simulation
 4 | 
 5 | target_machine_ip = "127.0.0.1"  # or other machine 'xxx.xxx.xxx.xxx'
 6 | 
 7 | Simulation.verbose = True
 8 | Simulation.del_try_async_kill = False
 9 | 
10 | # initialize environment
11 | env = gym.make("NoObstacleNavigationMir100Sim-v0", ip=target_machine_ip, gui=True)
12 | 
13 | num_episodes = 10
14 | 
15 | for episode in range(num_episodes):
16 |     done = False
17 |     env.reset()
18 |     while not done:
19 |         # random step in the environment
20 |         state, reward, terminated, truncated, info = env.step(env.action_space.sample())
21 |         done = terminated or truncated
22 | 
23 | env.close()
24 | 


--------------------------------------------------------------------------------
/robo_gym/utils/exceptions.py:
--------------------------------------------------------------------------------
 1 | class InvalidStateError(Exception):
 2 |     def __init__(self, message = "The environment state received is not contained in the observation space."):
 3 |         self.message = message
 4 |     def __str__(self):
 5 |         return self.message
 6 | 
 7 | class RobotServerError(Exception):
 8 |     def __init__(self, service):
 9 |         if service == "set_state":
10 |             self.message = "set_state service call failed."
11 |         elif service == "send_action":
12 |             self.message = "send_action service call failed."
13 |         elif service == "get_state":
14 |             self.message = "get_state service call failed."
15 |         else:
16 |             self.message = "service call failed"
17 |     def __str__(self):
18 |         return self.message
19 | 
20 | class InvalidActionError(Exception):
21 |     def __init__(self, message = "The action is not contained in the action space."):
22 |         self.message = message
23 |     def __str__(self):
24 |         return self.message


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 JOANNEUM RESEARCH Forschungsgesellschaft mbH
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | import sys, os
 3 | 
 4 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "robo_gym"))
 5 | from version import VERSION
 6 | 
 7 | setuptools.setup(
 8 |     name="robo-gym",
 9 |     version=VERSION,
10 |     description="robo-gym: an open source toolkit for Distributed Deep Reinforcement Learning on real and simulated robots.",
11 |     url="https://github.com/jr-robotics/robo-gym",
12 |     author="Matteo Lucchi, Friedemann Zindler, Bernhard Reiterer, Mara Vukadinovic, Christian Bauer, Thomas Gallien, Benjamim Breiling",
13 |     author_email="bernhard.reiterer@joanneum.at",
14 |     packages=setuptools.find_packages(),
15 |     include_package_data=True,
16 |     classifiers=[
17 |         "Programming Language :: Python :: 3",
18 |         "Programming Language :: Python :: 3.8",
19 |         "Programming Language :: Python :: 3.9",
20 |         "Programming Language :: Python :: 3.10",
21 |         "Programming Language :: Python :: 3.11",
22 |     ],
23 |     install_requires=[
24 |         "gymnasium",
25 |         "robo-gym-server-modules",
26 |         "numpy",
27 |         "scipy",
28 |         "pyyaml",
29 |     ],
30 |     python_requires=">=3.8",
31 |     scripts=["bin/run-rs-side-standard"],
32 | )
33 | 


--------------------------------------------------------------------------------
/robo_gym/utils/managed_rs_client.py:
--------------------------------------------------------------------------------
 1 | import robo_gym_server_modules.robot_server.client as rs_client
 2 | import robo_gym_server_modules.server_manager.client as sm_client
 3 | 
 4 | 
 5 | class ManagedClient(rs_client.Client):
 6 | 
 7 |     def __init__(
 8 |         self,
 9 |         *,
10 |         server_manager_host: str,
11 |         server_manager_port: int = 50100,
12 |         launch_cmd: str,
13 |         gui: bool = False
14 |     ):
15 |         if not server_manager_host:
16 |             raise Exception("ManagedClient: missing server manager host - cannot work!")
17 |         print(
18 |             "Command for new simulation robot server: "
19 |             + launch_cmd
20 |             + " gui:="
21 |             + ("true" if gui else "false")
22 |         )
23 |         self.sm_client = sm_client.Client(
24 |             server_manager_host, server_manager_port, server_manager_port + 1
25 |         )
26 |         self.cmd = launch_cmd
27 |         self.gui = gui
28 |         self.rs_address = self.sm_client.start_new_server(cmd=self.cmd, gui=self.gui)
29 |         super().__init__(self.rs_address)
30 | 
31 |     def kill(self):
32 |         self.sm_client.kill_server(self.rs_address)
33 |         # TODO might want to try async killing, too
34 | 


--------------------------------------------------------------------------------
/robo_gym/utils/panda_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import numpy as np
 4 | from numpy.typing import NDArray
 5 | import yaml
 6 | import os
 7 | import copy
 8 | 
 9 | from robo_gym.utils.manipulator_model import *
10 | 
11 | 
12 | class Panda(ManipulatorModel):
13 |     """Franka Emika Panda model class.
14 | 
15 |     Attributes:
16 |         max_joint_positions (np.array): Maximum joint position values (rad)`.
17 |         min_joint_positions (np.array): Minimum joint position values (rad)`.
18 |         max_joint_velocities (np.array): Maximum joint velocity values (rad/s)`.
19 |         min_joint_velocities (np.array): Minimum joint velocity values (rad/s)`.
20 |         joint_names (list): Joint names (Standard Indexing)`.
21 | 
22 |     Joint Names (ROS Indexing):
23 |     [panda_joint1, panda_joint2, panda_joint3, panda_joint4, panda_joint5,
24 |      panda_joint6, panda_joint7]
25 | 
26 |     NOTE: Where not specified, Standard Indexing is used.
27 |     """
28 | 
29 |     def __init__(self, model_key: str):
30 | 
31 |         assert model_key == "panda"
32 | 
33 |         self.model = model_key
34 | 
35 |         file_name = model_key + ".yaml"
36 |         file_path = os.path.join(
37 |             os.path.dirname(__file__), "panda_parameters", file_name
38 |         )
39 | 
40 |         super().__init__(model_key, file_path)
41 | 


--------------------------------------------------------------------------------
/tests/robo-gym/utils/test_ur.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from robo_gym.utils.ur_utils import UR
 3 | import numpy as np
 4 | 
 5 | 
 6 | class TestUrUtils:
 7 |     # TODO more assertions on test results
 8 | 
 9 |     def test_random_workspace_pose(self):
10 |         ur = UR(model_key="ur10")
11 | 
12 |         pose = ur.get_random_workspace_pose_rpy()
13 |         assert 6 == len(pose)
14 | 
15 |         pose = ur.get_random_workspace_pose_rpy(np_random=np.random.default_rng())
16 |         assert 6 == len(pose)
17 | 
18 |     def test_random_offset_joint_positions(self):
19 |         ur = UR(model_key="ur10")
20 |         joint_positions = np.array(
21 |             [
22 |                 0.0,
23 |                 -2.5,
24 |                 1.5,
25 |                 0.0,
26 |                 -1.4,
27 |                 0.0,
28 |             ],
29 |             dtype=np.float32,
30 |         )
31 |         random_offset = np.array(
32 |             [
33 |                 1.0,
34 |                 1.0,
35 |                 1.0,
36 |                 1.0,
37 |                 1.0,
38 |                 1.0,
39 |             ],
40 |             dtype=np.float32,
41 |         )
42 | 
43 |         positions = ur.get_random_offset_joint_positions(joint_positions, random_offset)
44 |         assert 6 == len(positions)
45 | 
46 |         positions = ur.get_random_offset_joint_positions(
47 |             joint_positions, random_offset, np_random=np.random.default_rng()
48 |         )
49 |         assert 6 == len(positions)
50 | 


--------------------------------------------------------------------------------
/docs/examples/stable-baselines/td3_load.py:
--------------------------------------------------------------------------------
 1 | import gymnasium as gym
 2 | import robo_gym
 3 | from stable_baselines3 import TD3
 4 | from stable_baselines3.td3.policies import MlpPolicy
 5 | 
 6 | # specify the ip of the machine running the robot-server
 7 | target_machine_ip = "127.0.0.1"  # or other xxx.xxx.xxx.xxx
 8 | 
 9 | # initialize environment
10 | env = gym.make("NoObstacleNavigationMir100Sim-v0", ip=target_machine_ip, gui=True)
11 | 
12 | model = TD3.load("td3_mir_basic")
13 | 
14 | timesteps = 1000
15 | 
16 | passed_timesteps = 0
17 | passed_episodes = 0
18 | 
19 | while True:
20 |     print("Starting episode {} - reset... ".format((passed_episodes + 1)))
21 |     observation, info = env.reset()
22 |     done = False
23 |     episode_timesteps = 0
24 |     print("Reset finished, entering execution.".format((passed_episodes + 1)))
25 |     while not done:
26 |         action, _ = model.predict(observation=observation)
27 |         observation, reward, terminated, truncated, info = env.step(action)
28 |         done = terminated or truncated
29 |         passed_timesteps += 1
30 |         episode_timesteps += 1
31 |         if passed_timesteps >= timesteps:
32 |             break
33 |         if done:
34 |             passed_episodes += 1
35 |             print(
36 |                 "Episode {} terminated after {} timesteps with reward {}".format(
37 |                     passed_episodes, episode_timesteps, reward
38 |                 )
39 |             )
40 | 
41 |     if passed_timesteps >= timesteps:
42 |         break
43 | 
44 | env.close()
45 | 


--------------------------------------------------------------------------------
/robo_gym/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | # Example
 2 | from robo_gym.envs.example.example_env import ExampleEnvSim, ExampleEnvRob
 3 | 
 4 | # MiR100
 5 | from robo_gym.envs.mir100.mir100 import (
 6 |     NoObstacleNavigationMir100Sim,
 7 |     NoObstacleNavigationMir100Rob,
 8 | )
 9 | from robo_gym.envs.mir100.mir100 import (
10 |     ObstacleAvoidanceMir100Sim,
11 |     ObstacleAvoidanceMir100Rob,
12 | )
13 | 
14 | # UR
15 | from robo_gym.envs.ur.ur_base_env import EmptyEnvironmentURSim, EmptyEnvironmentURRob
16 | from robo_gym.envs.ur.ur_ee_positioning import (
17 |     EndEffectorPositioningURSim,
18 |     EndEffectorPositioningURRob,
19 | )
20 | from robo_gym.envs.ur.ur_avoidance_basic import BasicAvoidanceURSim, BasicAvoidanceURRob
21 | from robo_gym.envs.ur.ur_avoidance_raad import (
22 |     AvoidanceRaad2022URSim,
23 |     AvoidanceRaad2022URRob,
24 | )
25 | from robo_gym.envs.ur.ur_avoidance_raad import (
26 |     AvoidanceRaad2022TestURSim,
27 |     AvoidanceRaad2022TestURRob,
28 | )
29 | 
30 | from robo_gym.envs.ur.ur_base import EmptyEnvironment2URSim, EmptyEnvironment2URRob
31 | from robo_gym.envs.ur.ur_ee_pos import (
32 |     EndEffectorPositioning2URSim,
33 |     EndEffectorPositioning2URRob,
34 | )
35 | from robo_gym.envs.ur.ur_isaac_reach import IsaacReachURSim, IsaacReachURRob
36 | 
37 | from robo_gym.envs.panda.panda_base import (
38 |     EmptyEnvironmentPandaSim,
39 |     EmptyEnvironmentPandaRob,
40 | )
41 | from robo_gym.envs.panda.panda_ee_pos import (
42 |     EndEffectorPositioningPandaSim,
43 |     EndEffectorPositioningPandaRob,
44 | )
45 | from robo_gym.envs.panda.panda_isaac_reach import IsaacReachPandaSim, IsaacReachPandaRob
46 | 


--------------------------------------------------------------------------------
/docs/examples/rllib/policy_from_cp_ur.py:
--------------------------------------------------------------------------------
 1 | import gymnasium as gym
 2 | from ray.rllib.policy.policy import Policy
 3 | import robo_gym
 4 | import ray
 5 | import sys
 6 | import numpy as np
 7 | import torch
 8 | import torch.nn
 9 | from ray.rllib.core.rl_module import RLModule
10 | 
11 | if __name__ == "__main__":
12 | 
13 |     policy_cp_path = sys.argv[1]
14 | 
15 |     policy = Policy.from_checkpoint(policy_cp_path)
16 |     
17 |     env = gym.make("EndEffectorPositioningURSim-v0", ip="127.0.0.1", gui=True, ur_model="ur5", rs_state_to_info=False)
18 |     obs, _ = env.reset()
19 |     
20 |     num_episodes = 0
21 |     episode_reward = 0.0
22 | 
23 |     count_overall = 0
24 |     count_clipped = 0
25 | 
26 |     while num_episodes < 5:
27 |         torch_obs_batch = torch.from_numpy(np.array([obs]))
28 |         a = policy.compute_single_action(torch_obs_batch)
29 |         a_np = np.array(a[0], dtype=np.float32)
30 |         
31 |         clipped_a_np = a_np.clip(-1, 1)
32 | 
33 |         if not np.array_equal(clipped_a_np, a_np):
34 |             count_clipped += 1
35 | 
36 |         count_overall += 1
37 |         
38 |         # Send the computed action `a` to the env.
39 |         obs, reward, done, truncated, _ = env.step(clipped_a_np)
40 |         episode_reward += reward
41 |         # Is the episode `done`? -> Reset.
42 |         if done:
43 |             print(f"Episode done: Total reward = {episode_reward}")
44 |             obs, _ = env.reset()
45 |             num_episodes += 1
46 |             episode_reward = 0.0
47 | 
48 |     env.close()
49 | 
50 |     ray.shutdown()
51 |     print(f"clipped {count_clipped} of {count_overall} actions")


--------------------------------------------------------------------------------
/docs/creating_environments.md:
--------------------------------------------------------------------------------
 1 | # Creating Environments
 2 | 
 3 | One of the benefits of the modular architecture of robo-gym is that it is fairly simple to add new environments to it.
 4 | 
 5 | All the environments are stored under the [envs](robo_gym/envs) directory, in here we organized the different environments in different folders for each robot.
 6 | 
 7 | See also newer documentation in [Modular Environments](modular_environments.md) to learn about the benefits of basing environments on `RoboGymEnv` rather than directly on `gym.env`. 
 8 | 
 9 | All the environments are based on the `gym.env` class, you can base your environment class directly on `gym.env` or start from one of the classes already included in robo-gym and expand them.
10 | 
11 | Once you created the class for your environment it has to be added to the registration in order to make it available. Following what it has already been done for the existing environments, add the import of your classes to [robo_gym/envs/\_\_init\_\_.py](robo_gym/envs/__init__.py) and the registration of the environment to [robo_gym/\_\_init\_\_.py](robo_gym/__init__.py).
12 | 
13 | ## Example
14 | 
15 | We provide a base Environment together with its Robot Server with minimum functionality as a base for you to start with the implementation of your own Robot Server.
16 | 
17 | [Robot Server Example](https://github.com/jr-robotics/robo-gym-robot-servers/tree/master/example_robot_server)
18 | 
19 | [Environment Example](../robo_gym/envs/example/example_env.py)
20 | 
21 | ## Integrating new robot and sensors
22 | 
23 | Integrating new robots and sensors is possible but requires knowledge of ROS and Gazebo or alternatives.
24 | 


--------------------------------------------------------------------------------
/docs/examples/rllib/policy_from_cp_mir.py:
--------------------------------------------------------------------------------
 1 | import gymnasium as gym
 2 | from ray.rllib.policy.policy import Policy
 3 | import robo_gym
 4 | import ray
 5 | import sys
 6 | import numpy as np
 7 | from robo_gym.envs.simulation_wrapper import Simulation
 8 | import torch
 9 | import torch.nn
10 | from ray.rllib.core.rl_module import RLModule
11 | 
12 | if __name__ == "__main__":
13 | 
14 |     policy_cp_path = sys.argv[1]
15 | 
16 |     policy = Policy.from_checkpoint(policy_cp_path)
17 |     
18 |     env = gym.make("NoObstacleNavigationMir100Sim-v0", ip="127.0.0.1", gui=True)
19 |     obs, _ = env.reset()
20 |     
21 |     num_episodes = 0
22 |     episode_reward = 0.0
23 | 
24 |     count_overall = 0
25 |     count_clipped = 0
26 | 
27 |     while num_episodes < 5:
28 |         torch_obs_batch = torch.from_numpy(np.array([obs]))
29 |         a = policy.compute_single_action(torch_obs_batch)
30 |         a_np = np.array(a[0], dtype=np.float32)
31 |         
32 |         clipped_a_np = a_np.clip(-1, 1)
33 | 
34 |         if not np.array_equal(clipped_a_np, a_np):
35 |             count_clipped += 1
36 | 
37 |         count_overall += 1
38 |         
39 |         # Send the computed action `a` to the env.
40 |         obs, reward, done, truncated, _ = env.step(clipped_a_np)
41 |         episode_reward += reward
42 |         # Is the episode `done`? -> Reset.
43 |         if done:
44 |             print(f"Episode done: Total reward = {episode_reward}")
45 |             obs, _ = env.reset()
46 |             env.render()
47 |             num_episodes += 1
48 |             episode_reward = 0.0
49 | 
50 |     env.close()
51 | 
52 |     ray.shutdown()
53 |     print(f"clipped {count_clipped} of {count_overall} actions")


--------------------------------------------------------------------------------
/docs/managing_multiple_python_vers.md:
--------------------------------------------------------------------------------
 1 | # Managing Multiple Python Versions
 2 | 
 3 | 
 4 | To manage multiple Python versions on the same machine we suggest to use [pyenv](https://github.com/pyenv/pyenv).
 5 | 
 6 | [Here](https://realpython.com/intro-to-pyenv/) there is a great article on pyenv.
 7 | 
 8 | We recommend using [pyenv-installer](https://github.com/pyenv/pyenv-installer) to install pyenv on your machine.
 9 | 
10 | Once pyenv is installed you can check the python versions available with:
11 | 
12 | ```
13 | pyenv versions
14 | ```
15 | 
16 | If this is the first time that you use pyenv you the previous command should return:
17 | 
18 | ```
19 | * system (set by /home/username/.pyenv/version)
20 | ```
21 | Which shows that the only python installation available is the system default python.
22 | 
23 | The Standard Installation of the Robot Server Side has to be performed on
24 | the system default python which should be python 2.7.
25 | 
26 | The Environment Side installation requires Python >= 3.5, let's first install a suitable python version (e.g. 3.6.10) with:
27 | 
28 | ```
29 | pyenv install 3.6.10
30 | ```
31 | 
32 | Now we can create a virtual environment in which to install and run robo-gym:
33 | 
34 | ```
35 | pyenv virtualenv 3.6.10 robo-gym
36 | ```
37 | 
38 | Where 3.6.10 is the desired python version and robo-gym is the name of the virtual
39 | environment.
40 | 
41 | To activate the virtual environment use:
42 | 
43 | ```
44 | pyenv activate robo-gym
45 | ```
46 | 
47 | You should see in your shell something like:
48 | 
49 | ```
50 | (robo-gym) user@machine:$
51 | ```
52 | 
53 | This means that you are now within the virtual environment.
54 | This is the virtual environment in which you should perform the Environment Side
55 | installation. When you run `pip install robo-gym` the robo-gym package will
56 | be installed in the virtual environment.
57 | 


--------------------------------------------------------------------------------
/docs/examples/stable-baselines/basic_example.md:
--------------------------------------------------------------------------------
 1 | # Getting Started Using Stable-Baselines3
 2 | 
 3 | ## Prerequisites
 4 | ### Installation of robo-gym
 5 | Follow the installation instructions: https://github.com/jr-robotics/robo-gym#installation
 6 | 
 7 | ### Installation of stable-baselines3: 
 8 | Follow the installation instructions: https://github.com/DLR-RM/stable-baselines3
 9 | 
10 | ## 1. Start the robot-server-manager
11 | Start the robot-server-manager on the machine where the simulation is supposed to be run by entering the start command in the bash:
12 | 
13 | ```bash
14 | start-server-manager
15 | ```
16 | 
17 | 
18 | ## 2. Example of simulation training using the MiR100 base environment
19 | Here is a quick example running TD3 in the MiR100 base environment
20 | 
21 | (_Python >=3.8_ / _robo-gym_ virtual environment)
22 | ```python
23 | import gymnasium as gym
24 | import robo_gym
25 | from stable_baselines3 import TD3
26 | from stable_baselines3.td3.policies import MlpPolicy
27 | 
28 | # specify the ip of the machine running the robot-server
29 | target_machine_ip = '127.0.0.1'
30 | 
31 | # initialize environment (to render the environment set gui=True)
32 | env = gym.make('NoObstacleNavigationMir100Sim-v0', ip=target_machine_ip, gui=False)
33 | 
34 | # follow the instructions provided by stable-baselines
35 | model = TD3(MlpPolicy, env, verbose=1)
36 | model.learn(total_timesteps=15000)
37 | 
38 | # saving and loading a model
39 | model.save('td3_mir_basic')
40 | del model 
41 | model = TD3.load("td3_mir_basic")
42 | 
43 | # reinitialize simulation environment with rendering enabled
44 | env.kill_sim() 
45 | env = gym.make('NoObstacleNavigationMir100Sim-v0', ip=target_machine_ip, gui=True)
46 | 
47 | # run the environment 10 times using the trained model
48 | num_episodes = 10
49 | for episode in range(num_episodes):
50 |     obs, _ = env.reset()
51 |     done = False
52 |     while not done:
53 |         action, _states = model.predict(obs)
54 |         obs, rewards, terminated, truncated, info = env.step(action)
55 |         done = terminated or truncated
56 | ```


--------------------------------------------------------------------------------
/robo_gym/utils/mir100_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import numpy as np
 4 | from robo_gym.utils import utils
 5 | 
 6 | class Mir100():
 7 |     """Mobile Industrial Robots MiR100 utilities.
 8 | 
 9 |     Attributes:
10 |         max_lin_vel (float): Maximum robot's linear velocity (m/s).
11 |         min_lin_vel (float): Minimum robot's linear velocity (m/s).
12 |         max_ang_vel (float): Maximum robot's angular velocity (rad/s).
13 |         min_ang_vel (float): Minimum robot's linear velocity (rad/s).
14 | 
15 |     """
16 |     def __init__(self):
17 | 
18 |         self.max_lin_vel = 1.0
19 |         self.min_lin_vel = -1.0
20 | 
21 |         self.max_ang_vel = 1.5
22 |         self.min_ang_vel = -1.5
23 | 
24 |     def get_max_lin_vel(self):
25 | 
26 |         return self.max_lin_vel
27 | 
28 |     def get_min_lin_vel(self):
29 | 
30 |         return self.min_lin_vel
31 | 
32 |     def get_max_ang_vel(self):
33 | 
34 |         return self.max_ang_vel
35 | 
36 |     def get_min_ang_vel(self):
37 | 
38 |         return self.min_ang_vel
39 | 
40 |     def get_corners_positions(self,x,y,yaw):
41 |         """Get robot's corners coordinates given the coordinates of its center.
42 | 
43 |         Args:
44 |             x (float): x coordinate of robot's geometric center.
45 |             y (float): y coordinate of robot's geometric center.
46 |             yaw (float): yaw angle of robot's geometric center.
47 | 
48 |         The coordinates are given with respect to the map origin and cartesian system.
49 | 
50 |         Returns:
51 |             list[list]: x and y coordinates of the 4 robot's corners.
52 | 
53 |         """
54 | 
55 |         robot_x_dimension = 0.9
56 |         robot_y_dimension = 0.58
57 |         dx = robot_x_dimension/2
58 |         dy = robot_y_dimension/2
59 | 
60 |         delta_corners = [[dx,-dy],[dx,dy],[-dx,dy],[-dx,-dy]]
61 |         corners = []
62 | 
63 |         for corner_xy in delta_corners:
64 |             # Rotate point around origin
65 |             r_xy = utils.rotate_point(corner_xy[0],corner_xy[1],yaw)
66 |             # Translate back from origin to corner
67 |             corners.append([sum(x) for x in zip(r_xy,[x,y])])
68 | 
69 |         return corners
70 | 


--------------------------------------------------------------------------------
/robo_gym/envs/ur/ur_ee_pos.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from robo_gym.envs.base.robogym_env import *
 4 | from robo_gym.envs.manipulator.ee_pos_base import ManipulatorEePosEnv
 5 | from robo_gym.envs.ur.ur_base import URBaseEnv2
 6 | 
 7 | 
 8 | class EndEffectorPositioning2UR(ManipulatorEePosEnv):
 9 |     def __init__(self, **kwargs):
10 |         # not too nice - repeated in super init
11 |         self._config = kwargs
12 | 
13 |         obs_nodes: list[ObservationNode] | None = kwargs.get(
14 |             RoboGymEnv.KW_OBSERVATION_NODES
15 |         )
16 |         if obs_nodes is None:
17 |             # having this hardcoded index is suboptimal; 1 should be correct since 0 will be the manipulator observation node
18 |             obs_nodes = [LastActionObservationNode(**self.get_obs_node_setup_kwargs(1))]
19 |             kwargs[RoboGymEnv.KW_OBSERVATION_NODES] = obs_nodes
20 | 
21 |         URBaseEnv2.set_robot_defaults(kwargs)
22 |         kwargs.setdefault(RoboGymEnv.KW_ACTION_RATE, 10.0)
23 |         kwargs.setdefault(
24 |             ManipulatorEePosEnv.KW_RANDOM_JOINT_OFFSET, [1.5, 0.25, 0.5, 1.0, 0.4, 3.14]
25 |         )
26 | 
27 |         super().__init__(**kwargs)
28 | 
29 |     def get_launch_cmd(self) -> str:
30 |         # TODO make string composition more dynamic
31 |         return f"roslaunch ur_robot_server ur_robot_server.launch \
32 |             rviz_gui:={self._config.get(self.KW_RVIZ_GUI_FLAG, True)} \
33 |             gazebo_gui:={self._config.get(self.KW_GAZEBO_GUI_FLAG, True)} \
34 |             world_name:=tabletop_sphere50_no_collision.world \
35 |             reference_frame:=base_link \
36 |             max_velocity_scale_factor:={self._config.get(self.KW_MAX_VELOCITY_SCALE_FACTOR, .1)} \
37 |             action_cycle_rate:={self.action_rate} \
38 |             objects_controller:=true \
39 |             rs_mode:=1object \
40 |             n_objects:=1.0 \
41 |             object_0_model_name:=sphere50_no_collision \
42 |             object_0_frame:=target \
43 |             ur_model:={self.ur_model_key}"
44 | 
45 |     @property
46 |     def ur_model_key(self) -> str:
47 |         return self._config.get(URBaseEnv2.KW_UR_MODEL_KEY)
48 | 
49 | 
50 | class EndEffectorPositioning2URSim(EndEffectorPositioning2UR):
51 | 
52 |     def __init__(self, **kwargs):
53 |         kwargs[self.KW_REAL_ROBOT] = False
54 |         super().__init__(**kwargs)
55 | 
56 | 
57 | class EndEffectorPositioning2URRob(EndEffectorPositioning2UR):
58 | 
59 |     def __init__(self, **kwargs):
60 |         kwargs[self.KW_REAL_ROBOT] = True
61 |         super().__init__(**kwargs)
62 | 


--------------------------------------------------------------------------------
/robo_gym/envs/panda/panda_base.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from robo_gym.envs.base.robogym_env import *
 4 | from robo_gym.envs.manipulator.manipulator_base import (
 5 |     ManipulatorBaseEnv,
 6 |     ManipulatorActionNode,
 7 | )
 8 | from robo_gym.utils.panda_utils import Panda
 9 | 
10 | 
11 | class PandaBaseEnv(ManipulatorBaseEnv):
12 | 
13 |     KW_PANDA_MODEL_KEY = "panda_model"
14 | 
15 |     def __init__(self, **kwargs):
16 | 
17 |         PandaBaseEnv.set_robot_defaults(kwargs)
18 | 
19 |         super().__init__(**kwargs)
20 | 
21 |     @staticmethod
22 |     def set_robot_defaults(kwargs: dict[str, Any]):
23 | 
24 |         # prepare robot model set it in the kwargs
25 |         robot_model: Panda | None = kwargs.get(RoboGymEnv.KW_ROBOT_MODEL_OBJECT)
26 |         if robot_model is None:
27 |             robot_model = Panda(
28 |                 model_key=kwargs.get(PandaBaseEnv.KW_PANDA_MODEL_KEY, "panda")
29 |             )
30 |             kwargs[RoboGymEnv.KW_ROBOT_MODEL_OBJECT] = robot_model
31 | 
32 |         # default action rate
33 |         kwargs.setdefault(RoboGymEnv.KW_ACTION_RATE, 30.0)
34 | 
35 |         # default max episode steps
36 |         kwargs.setdefault(RewardNode.KW_MAX_EPISODE_STEPS, 300)
37 | 
38 |         # default fixed joints: last joint
39 |         prefix_fix = ManipulatorActionNode.KW_PREFIX_FIX_JOINT
40 |         last_joint_name = robot_model.joint_names[-1]
41 |         kw_fix_last_joint = prefix_fix + last_joint_name
42 |         kwargs.setdefault(kw_fix_last_joint, True)
43 | 
44 |         # default joint positions
45 |         value = [-0.018, -0.76, 0.02, -2.342, 0.03, 1.541, 0.753]
46 | 
47 |         kwargs.setdefault(ManipulatorBaseEnv.KW_JOINT_POSITIONS, value)
48 | 
49 |     def get_launch_cmd(self) -> str:
50 |         # TODO make string composition more dynamic
51 |         return f"roslaunch panda_robot_server panda_robot_server.launch \
52 |         rviz_gui:={self._config.get(self.KW_RVIZ_GUI_FLAG, True)} \
53 |         gazebo_gui:={self._config.get(self.KW_GAZEBO_GUI_FLAG, True)} \
54 |         world_name:=empty_no_gravity.world \
55 |         reference_frame:=world \
56 |         max_velocity_scale_factor:={self._config.get(self.KW_MAX_VELOCITY_SCALE_FACTOR, .2)} \
57 |         action_cycle_rate:={self.action_rate} \
58 |         rs_mode:=only_robot \
59 |         action_mode:={self.action_mode}"
60 | 
61 | 
62 | class EmptyEnvironmentPandaSim(PandaBaseEnv):
63 | 
64 |     def __init__(self, **kwargs):
65 |         kwargs[self.KW_REAL_ROBOT] = False
66 |         super().__init__(**kwargs)
67 | 
68 | 
69 | class EmptyEnvironmentPandaRob(PandaBaseEnv):
70 | 
71 |     def __init__(self, **kwargs):
72 |         kwargs[self.KW_REAL_ROBOT] = True
73 |         super().__init__(**kwargs)
74 | 


--------------------------------------------------------------------------------
/robo_gym/utils/ur_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import numpy as np
 4 | from numpy.typing import NDArray
 5 | import yaml
 6 | import os
 7 | import copy
 8 | 
 9 | from robo_gym.utils.manipulator_model import *
10 | 
11 | 
12 | class UR(ManipulatorModel):
13 |     """Universal Robots model class.
14 | 
15 |     Attributes:
16 |         max_joint_positions (np.array): Maximum joint position values (rad)`.
17 |         min_joint_positions (np.array): Minimum joint position values (rad)`.
18 |         max_joint_velocities (np.array): Maximum joint velocity values (rad/s)`.
19 |         min_joint_velocities (np.array): Minimum joint velocity values (rad/s)`.
20 |         joint_names (list): Joint names (Standard Indexing)`.
21 | 
22 |     Joint Names (ROS Indexing):
23 |     [elbow_joint, shoulder_lift_joint, shoulder_pan_joint, wrist_1_joint, wrist_2_joint,
24 |      wrist_3_joint]
25 | 
26 |     NOTE: Where not specified, Standard Indexing is used.
27 |     """
28 | 
29 |     def __init__(self, model_key: str):
30 | 
31 |         assert model_key in ["ur3", "ur3e", "ur5", "ur5e", "ur10", "ur10e", "ur16e"]
32 | 
33 |         self.model = model_key
34 | 
35 |         file_name = model_key + ".yaml"
36 |         file_path = os.path.join(os.path.dirname(__file__), "ur_parameters", file_name)
37 | 
38 |         super().__init__(model_key, file_path)
39 | 
40 |     def _swap_base_and_elbow(self, thetas: NDArray) -> NDArray:
41 |         return np.array(
42 |             [thetas[2], thetas[1], thetas[0], thetas[3], thetas[4], thetas[5]]
43 |         )
44 | 
45 |     def reorder_joints_from_rs(self, ros_thetas):
46 |         """Transform joint angles list from ROS indexing to standard indexing.
47 | 
48 |         Rearrange a list containing the joints values from the joint indexes used
49 |         in the ROS join_states messages to the standard joint indexing going from
50 |         base to end effector.
51 | 
52 |         Args:
53 |             ros_thetas (list): Joint angles with ROS indexing.
54 | 
55 |         Returns:
56 |             np.array: Joint angles with standard indexing.
57 | 
58 |         """
59 | 
60 |         return self._swap_base_and_elbow(ros_thetas)
61 | 
62 |     def reorder_joints_for_rs(self, thetas):
63 |         """Transform joint angles list from standard indexing to ROS indexing.
64 | 
65 |         Rearrange a list containing the joints values from the standard joint indexing
66 |         going from base to end effector to the indexing used in the ROS
67 |         join_states messages.
68 | 
69 |         Args:
70 |             thetas (list): Joint angles with standard indexing.
71 | 
72 |         Returns:
73 |             np.array: Joint angles with ROS indexing.
74 | 
75 |         """
76 | 
77 |         return self._swap_base_and_elbow(thetas)
78 | 


--------------------------------------------------------------------------------
/robo_gym/utils/panda_parameters/panda.yaml:
--------------------------------------------------------------------------------
 1 | # Sources:
 2 | #
 3 | #  - Franka Emika Panda Documentation
 4 | #    https://wiki.happylab.at/images/5/5e/Panda_Robot_Manual_English.pdf
 5 | joint_names:
 6 |   - panda_joint1
 7 |   - panda_joint2
 8 |   - panda_joint3
 9 |   - panda_joint4
10 |   - panda_joint5
11 |   - panda_joint6
12 |   - panda_joint7
13 | remote_joint_names:
14 |   - joint1
15 |   - joint2
16 |   - joint3
17 |   - joint4
18 |   - joint5
19 |   - joint6
20 |   - joint7
21 | workspace_area:
22 |   # Values for the recommended workspace area
23 |   r: 0.755
24 |   min_r: 0.200
25 | 
26 | # From https://frankaemika.github.io/docs/control_parameters.html
27 | # Joints limits
28 | joint_limits:
29 |   panda_joint1:
30 |     has_acceleration_limits: true
31 |     has_effort_limits: true
32 |     has_position_limits: true
33 |     has_velocity_limits: true
34 |     max_acceleration: 15.0
35 |     max_effort: 87.0
36 |     max_position: 2.8973
37 |     max_velocity: 2.1750
38 |     min_position: -2.8973
39 |   panda_joint2:
40 |     has_acceleration_limits: true
41 |     has_effort_limits: true
42 |     has_position_limits: true
43 |     has_velocity_limits: true
44 |     max_acceleration: 7.5
45 |     max_effort: 87.0
46 |     max_position: 1.7628
47 |     max_velocity: 2.1750
48 |     min_position: -1.7628
49 |   panda_joint3:
50 |     has_acceleration_limits: true
51 |     has_effort_limits: true
52 |     has_position_limits: true
53 |     has_velocity_limits: true
54 |     max_acceleration: 10.0
55 |     max_effort: 87.0
56 |     max_position: 2.8973
57 |     max_velocity: 2.1750
58 |     min_position: -2.8973
59 |   panda_joint4:
60 |     has_acceleration_limits: true
61 |     has_effort_limits: true
62 |     has_position_limits: true
63 |     has_velocity_limits: true
64 |     max_acceleration: 12.5
65 |     max_effort: 87.0
66 |     max_position: -0.0698
67 |     max_velocity: 2.1750
68 |     min_position: -3.0718
69 |   panda_joint5:
70 |     has_acceleration_limits: true
71 |     has_effort_limits: true
72 |     has_position_limits: true
73 |     has_velocity_limits: true
74 |     max_acceleration: 15.0
75 |     max_effort: 15.0
76 |     max_position: 2.8973
77 |     max_velocity: 2.6100
78 |     min_position: -2.8973
79 |   panda_joint6:
80 |     has_acceleration_limits: true
81 |     has_effort_limits: true
82 |     has_position_limits: true
83 |     has_velocity_limits: true
84 |     max_acceleration: 20.0
85 |     max_effort: 12.0
86 |     max_position: 3.7525
87 |     max_velocity: 2.6100
88 |     min_position: -0.0175
89 |   panda_joint7:
90 |     has_acceleration_limits: true
91 |     has_effort_limits: true
92 |     has_position_limits: true
93 |     has_velocity_limits: true
94 |     max_acceleration: 20.0
95 |     max_effort: 12.0
96 |     max_position: 2.8973
97 |     max_velocity: 2.6100
98 |     min_position: -2.8973


--------------------------------------------------------------------------------
/robo_gym/envs/panda/panda_ee_pos.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from robo_gym.envs.base.robogym_env import *
 4 | from robo_gym.envs.manipulator.ee_pos_base import ManipulatorEePosEnv
 5 | from robo_gym.envs.panda.panda_base import PandaBaseEnv
 6 | 
 7 | 
 8 | class EndEffectorPositioningPanda(ManipulatorEePosEnv):
 9 |     def __init__(self, **kwargs):
10 |         # not too nice - repeated in super init
11 |         self._config = kwargs
12 | 
13 |         obs_nodes: list[ObservationNode] | None = kwargs.get(
14 |             RoboGymEnv.KW_OBSERVATION_NODES
15 |         )
16 |         if obs_nodes is None:
17 |             # having this hardcoded index is suboptimal; 1 should be correct since 0 will be the manipulator observation node
18 |             obs_nodes = [LastActionObservationNode(**self.get_obs_node_setup_kwargs(1))]
19 |             kwargs[RoboGymEnv.KW_OBSERVATION_NODES] = obs_nodes
20 | 
21 |         PandaBaseEnv.set_robot_defaults(kwargs)
22 | 
23 |         # these are probably not too meaningful since first 6 joints are equal to UR
24 |         value = [1.5, 0.25, 0.5, 1.0, 0.4, 3.14, 1.0]
25 |         kwargs.setdefault(ManipulatorEePosEnv.KW_RANDOM_JOINT_OFFSET, value)
26 | 
27 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_TARGET_VOLUME_BOUNDING_BOX, True)
28 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_POSITION_X_RANGE, [0.35, 0.65])
29 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_POSITION_Y_RANGE, [-0.2, 0.2])
30 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_POSITION_Z_RANGE, [0.15, 0.5])
31 | 
32 |         super().__init__(**kwargs)
33 | 
34 |     def get_launch_cmd(self) -> str:
35 |         # TODO make string composition more dynamic
36 |         return f"roslaunch panda_robot_server panda_robot_server.launch \
37 |         rviz_gui:={self._config.get(self.KW_RVIZ_GUI_FLAG, True)} \
38 |         gazebo_gui:={self._config.get(self.KW_GAZEBO_GUI_FLAG, True)} \
39 |         world_name:=tabletop_sphere50_no_collision_no_gravity.world \
40 |         reference_frame:=world \
41 |         max_velocity_scale_factor:={self._config.get(self.KW_MAX_VELOCITY_SCALE_FACTOR, .2)} \
42 |         action_cycle_rate:={self.action_rate} \
43 |         objects_controller:=true \
44 |         rs_mode:=1object \
45 |         action_mode:={self.action_mode}\
46 |         n_objects:=1.0 \
47 |         object_0_model_name:=sphere50_no_collision \
48 |         object_0_frame:=target "
49 | 
50 | 
51 | class EndEffectorPositioningPandaSim(EndEffectorPositioningPanda):
52 | 
53 |     def __init__(self, **kwargs):
54 |         kwargs[self.KW_REAL_ROBOT] = False
55 |         super().__init__(**kwargs)
56 | 
57 | 
58 | class EndEffectorPositioningPandaRob(EndEffectorPositioningPanda):
59 | 
60 |     def __init__(self, **kwargs):
61 |         kwargs[self.KW_REAL_ROBOT] = True
62 |         super().__init__(**kwargs)
63 | 


--------------------------------------------------------------------------------
/robo_gym/envs/ur/ur_base.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from robo_gym.envs.base.robogym_env import *
 4 | from robo_gym.envs.manipulator.manipulator_base import (
 5 |     ManipulatorBaseEnv,
 6 |     ManipulatorActionNode,
 7 | )
 8 | from robo_gym.utils.ur_utils import UR
 9 | 
10 | 
11 | class URBaseEnv2(ManipulatorBaseEnv):
12 | 
13 |     KW_UR_MODEL_KEY = "ur_model"
14 | 
15 |     def __init__(self, **kwargs):
16 | 
17 |         URBaseEnv2.set_robot_defaults(kwargs)
18 | 
19 |         super().__init__(**kwargs)
20 | 
21 |     @staticmethod
22 |     def set_robot_defaults(kwargs: dict[str, Any]):
23 | 
24 |         # prepare UR model depending on ur_model and set it in the kwargs
25 |         robot_model: UR | None = kwargs.get(RoboGymEnv.KW_ROBOT_MODEL_OBJECT)
26 |         if robot_model is None:
27 |             robot_model = UR(model_key=kwargs.get(URBaseEnv2.KW_UR_MODEL_KEY))
28 |             kwargs[RoboGymEnv.KW_ROBOT_MODEL_OBJECT] = robot_model
29 | 
30 |         # default action rate
31 |         kwargs.setdefault(RoboGymEnv.KW_ACTION_RATE, 20.0)
32 | 
33 |         # default max episode steps
34 |         kwargs.setdefault(RewardNode.KW_MAX_EPISODE_STEPS, 300)
35 | 
36 |         # default fixed joints: last joint
37 |         prefix_fix = ManipulatorActionNode.KW_PREFIX_FIX_JOINT
38 |         last_joint_name = robot_model.joint_names[-1]
39 |         kw_fix_last_joint = prefix_fix + last_joint_name
40 |         kwargs.setdefault(kw_fix_last_joint, True)
41 | 
42 |         # default joint positions
43 |         value = [
44 |             0.0,
45 |             -2.5,
46 |             1.5,
47 |             0.0,
48 |             -1.4,
49 |             0.0,
50 |         ]
51 |         kwargs.setdefault(ManipulatorBaseEnv.KW_JOINT_POSITIONS, value)
52 | 
53 |     def get_launch_cmd(self) -> str:
54 |         # TODO make string composition more dynamic
55 |         return f"roslaunch ur_robot_server ur_robot_server.launch \
56 |         rviz_gui:={self._config.get(self.KW_RVIZ_GUI_FLAG, True)} \
57 |         gazebo_gui:={self._config.get(self.KW_GAZEBO_GUI_FLAG, True)} \
58 |         world_name:=empty.world \
59 |         reference_frame:=base_link \
60 |         max_velocity_scale_factor:={self._config.get(self.KW_MAX_VELOCITY_SCALE_FACTOR, .2)} \
61 |         action_cycle_rate:={self.action_rate} \
62 |         rs_mode:=only_robot \
63 |         ur_model:={self.ur_model_key}"
64 | 
65 |     @property
66 |     def ur_model_key(self) -> str:
67 |         return self._config.get(URBaseEnv2.KW_UR_MODEL_KEY)
68 | 
69 | 
70 | class EmptyEnvironment2URSim(URBaseEnv2):
71 | 
72 |     def __init__(self, **kwargs):
73 |         kwargs[self.KW_REAL_ROBOT] = False
74 |         super().__init__(**kwargs)
75 | 
76 | 
77 | class EmptyEnvironment2URRob(URBaseEnv2):
78 | 
79 |     def __init__(self, **kwargs):
80 |         kwargs[self.KW_REAL_ROBOT] = True
81 |         super().__init__(**kwargs)
82 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
  1 | stages:
  2 |   - build
  3 |   - deploy
  4 | 
  5 | variables:
  6 |   PYTHON_VER: "3.8"
  7 |   ROS_DISTRO: "noetic"
  8 |   ROBO_GYM_IMAGE: $CI_REGISTRY_IMAGE:$PYTHON_VER-$CI_COMMIT_SHA
  9 |   ROBOT_SERVERS_IMAGE: "$CI_REGISTRY/robo-gym/robot-servers:$ROS_DISTRO-develop-latest"  
 10 | 
 11 | before_script:
 12 |   # docker login asks for the password to be passed through stdin for security
 13 |   # we use $CI_JOB_TOKEN here which is a special token provided by GitLab
 14 |   - echo -n $CI_JOB_TOKEN | docker login -u gitlab-ci-token --password-stdin $CI_REGISTRY
 15 | 
 16 | # The pipeline needs at minimum one job to run 
 17 | dummy:
 18 |   stage: build
 19 |   variables:
 20 |     GIT_STRATEGY: none
 21 |   script:
 22 |     - ":"
 23 |   only:
 24 |     - /^docs.*$/
 25 |     
 26 | .build_robo-gym_image: &build_robo-gym_image_def
 27 |   image: docker:18.09
 28 |   services:
 29 |     - docker:18.09-dind
 30 |   tags:
 31 |     - docker-executor
 32 |   stage: build
 33 |   retry:
 34 |     max: 2
 35 |   script:
 36 |     # fetch the latest image (not failing if image is not found)
 37 |     - docker pull $CI_REGISTRY_IMAGE:$PYTHON_VER-latest || true
 38 |     - >
 39 |       docker build
 40 |       --pull
 41 |       --cache-from $CI_REGISTRY_IMAGE:$PYTHON_VER-latest
 42 |       --tag $ROBO_GYM_IMAGE
 43 |       --build-arg PYTHON_VER=$PYTHON_VER
 44 |       --build-arg CACHEBUST=$CI_COMMIT_SHA
 45 |       .
 46 |     - docker push $ROBO_GYM_IMAGE
 47 |     - docker tag $ROBO_GYM_IMAGE $CI_REGISTRY_IMAGE:$PYTHON_VER-latest
 48 |     - docker push $CI_REGISTRY_IMAGE:$PYTHON_VER-latest
 49 |   except:
 50 |     - /^docs.*$/
 51 | 
 52 | build_robo-gym_image_py38:
 53 |   <<: *build_robo-gym_image_def
 54 |   variables:
 55 |       PYTHON_VER: "3.8"
 56 |   only:
 57 |     - schedules
 58 | 
 59 | build_robo-gym_image_py39:
 60 |   <<: *build_robo-gym_image_def
 61 |   variables:
 62 |       PYTHON_VER: "3.9"
 63 |   only:
 64 |     - schedules
 65 | 
 66 | deploy_docker_image: 
 67 |   image: docker:18.09
 68 |   services:
 69 |     - docker:18.09-dind
 70 |   tags:
 71 |     - docker-executor
 72 |   stage: deploy
 73 |   variables:
 74 |     GIT_STRATEGY: none
 75 |   script:
 76 |     - echo -n $CI_JOB_TOKEN | docker login -u gitlab-ci-token --password-stdin $CI_REGISTRY
 77 |     - docker pull $ROBO_GYM_IMAGE
 78 |     - docker tag $ROBO_GYM_IMAGE $CI_REGISTRY_IMAGE/$CI_COMMIT_BRANCH:latest
 79 |     - docker push $CI_REGISTRY_IMAGE/$CI_COMMIT_BRANCH:latest
 80 |   only:
 81 |     - master 
 82 |     - develop
 83 |   except:
 84 |     - /^docs.*$/
 85 | 
 86 | deploy_production:
 87 |   image: python:3.8-slim-buster
 88 |   before_script:
 89 |      - pip3 install twine
 90 |      - python3 setup.py sdist
 91 |   stage: deploy
 92 |   variables:
 93 |     TWINE_USERNAME: $PYPI_USERNAME
 94 |     TWINE_PASSWORD: $PYPI_PASSWORD
 95 |   script:
 96 |     - twine upload --repository-url https://upload.pypi.org/legacy/ dist/*
 97 |   only:
 98 |     - /^v.*$/
 99 |   except:
100 |     refs:
101 |       - branches
102 |       - triggers
103 | 


--------------------------------------------------------------------------------
/robo_gym/utils/table.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | 
 4 | class RowIterator(object):
 5 |     def __init__(self, table, starting_row_index=-1):
 6 |         self.table = table
 7 |         self.current_row_index = starting_row_index
 8 | 
 9 |     def __next__(self):
10 |         self.current_row_index += 1
11 |         if not self.has_current_row():
12 |             raise StopIteration
13 |         result = self.get_current_row()
14 |         return result
15 | 
16 |     def get_current_row(self):
17 |         return self.table.get_row(self.current_row_index)
18 | 
19 |     def has_current_row(self):
20 |         return self.table and self.current_row_index < self.table.count_rows()
21 | 
22 |     def has_next_row(self):
23 |         return self.table and self.current_row_index + 1 < self.table.count_rows()
24 | 
25 |     def get_columns(self):
26 |         return self.table.get_columns()
27 | 
28 | 
29 | class Table(object):
30 | 
31 |     def __init__(self, rows=[], columns=[]):
32 |         self.rows = rows  # list of dicts
33 |         self.columns = columns
34 |         if not self.columns:
35 |             if self.rows:
36 |                 self.columns = self.rows[0].keys
37 | 
38 |     def __iter__(self):
39 |         return RowIterator(self)
40 | 
41 |     def get_row(self, row_index):
42 |         return self.rows[row_index]
43 | 
44 |     def add_row(self, row):
45 |         self.rows.append(row)
46 |         if not self.columns:
47 |             self.columns = row.keys()
48 | 
49 |     def count_rows(self):
50 |         return len(self.rows)
51 | 
52 |     def get_columns(self):
53 |         return self.columns
54 | 
55 |     def clear_rows(self):
56 |         self.rows = []
57 | 
58 | 
59 | def write_csv(table, file_path, delimiter=",", value_formatters=None):
60 |     with open(file_path, "w", newline="") as file:
61 |         writer = csv.DictWriter(file, table.columns, delimiter=delimiter)
62 |         writer.writeheader()
63 |         for row in table:
64 |             processed_row = row
65 |             for column in table.columns:
66 |                 value = row[column]
67 |                 if value_formatters is None:
68 |                     value = str(value)
69 |                 elif callable(value_formatters):
70 |                     value = value_formatters(value)
71 |                 elif column in value_formatters and callable(value_formatters[column]):
72 |                     value = value_formatters[column](value)
73 |                 processed_row[column] = value
74 |             writer.writerow(processed_row)
75 | 
76 | 
77 | def read_csv(file_path, delimiter=",", value_parsers=None):
78 |     with open(file_path, "r") as file:
79 |         reader = csv.DictReader(file, delimiter=delimiter)
80 |         table = Table([], reader.fieldnames)
81 |         for row in reader:
82 |             processed_row = row
83 |             for column in reader.fieldnames:
84 |                 value = row[column]
85 |                 if value_parsers is None:
86 |                     value = float(value)
87 |                 elif callable(value_parsers):
88 |                     value = value_parsers(value)
89 |                 elif column in value_parsers and callable(value_parsers[column]):
90 |                     value = value_parsers[column](value)
91 |                 processed_row[column] = value
92 |             table.add_row(processed_row)
93 |         return table
94 | 


--------------------------------------------------------------------------------
/robo_gym/envs/ur/ur_isaac_reach.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from robo_gym.envs.base.robogym_env import *
 4 | from robo_gym.envs.manipulator.isaac_reach import *
 5 | from robo_gym.envs.ur.ur_base import URBaseEnv2
 6 | from robo_gym.utils.ur_utils import UR
 7 | 
 8 | 
 9 | class IsaacReachUR(IsaacReachEnv):
10 |     def __init__(self, **kwargs):
11 |         # not too nice - repeated in super init
12 |         self._config = kwargs
13 | 
14 |         kwargs.setdefault(URBaseEnv2.KW_UR_MODEL_KEY, "ur10")
15 | 
16 |         IsaacReachUR.set_robot_defaults(kwargs)
17 | 
18 |         obs_nodes: list[ObservationNode] | None = kwargs.get(
19 |             RoboGymEnv.KW_OBSERVATION_NODES
20 |         )
21 |         if obs_nodes is None:
22 |             # having this hardcoded index is suboptimal; 1 should be correct since 0 will be the manipulator observation node
23 |             obs_nodes = [LastActionObservationNode(**self.get_obs_node_setup_kwargs(1))]
24 |             kwargs[RoboGymEnv.KW_OBSERVATION_NODES] = obs_nodes
25 | 
26 |         super().__init__(**kwargs)
27 | 
28 |     @staticmethod
29 |     def set_robot_defaults(kwargs: dict[str, Any]):
30 | 
31 |         # prepare UR model depending on ur_model and set it in the kwargs
32 |         ur_model: UR | None = kwargs.get(RoboGymEnv.KW_ROBOT_MODEL_OBJECT)
33 |         if ur_model is None:
34 |             ur_model = UR(model_key=kwargs.get(URBaseEnv2.KW_UR_MODEL_KEY))
35 |             kwargs[RoboGymEnv.KW_ROBOT_MODEL_OBJECT] = ur_model
36 | 
37 |         # default action rate
38 |         kwargs.setdefault(RoboGymEnv.KW_ACTION_RATE, 30.0)
39 | 
40 |         # default max episode steps
41 |         kwargs.setdefault(RewardNode.KW_MAX_EPISODE_STEPS, 600)
42 | 
43 |         kwargs.setdefault(ManipulatorEePosEnv.KW_CONTINUE_EXCEPT_COLLISION, True)
44 |         # default joint positions
45 |         kwargs.setdefault(
46 |             ManipulatorBaseEnv.KW_JOINT_POSITIONS, [0, -1.7120, 1.7120, 0, 0, 0]
47 |         )
48 | 
49 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_ROTATION_PITCH_RANGE, math.pi / 2)
50 | 
51 |     def get_launch_cmd(self) -> str:
52 |         # TODO make string composition more dynamic
53 |         # TODO duplicated from EndEffectorPositioning2UR
54 |         return f"roslaunch ur_robot_server ur_robot_server.launch \
55 |             rviz_gui:={self._config.get(self.KW_RVIZ_GUI_FLAG, True)} \
56 |             gazebo_gui:={self._config.get(self.KW_GAZEBO_GUI_FLAG, True)} \
57 |             world_name:=isaactabletop_sphere50_no_collision.world \
58 |             reference_frame:=base_link \
59 |             ee_frame:=tool1 \
60 |             max_velocity_scale_factor:={self._config.get(self.KW_MAX_VELOCITY_SCALE_FACTOR, .2)} \
61 |             action_cycle_rate:={self.action_rate} \
62 |             objects_controller:=true \
63 |             rs_mode:=1object \
64 |             n_objects:=1.0 \
65 |             object_0_model_name:=sphere50_no_collision \
66 |             object_0_frame:=target \
67 |             z:=0.0 \
68 |             ur_model:={self.ur_model_key}"
69 | 
70 |     @property
71 |     def ur_model_key(self) -> str:
72 |         return self._config.get(URBaseEnv2.KW_UR_MODEL_KEY)
73 | 
74 | 
75 | class IsaacReachURSim(IsaacReachUR):
76 | 
77 |     def __init__(self, **kwargs):
78 |         kwargs[self.KW_REAL_ROBOT] = False
79 |         super().__init__(**kwargs)
80 | 
81 | 
82 | class IsaacReachURRob(IsaacReachUR):
83 | 
84 |     def __init__(self, **kwargs):
85 |         kwargs[self.KW_REAL_ROBOT] = True
86 |         super().__init__(**kwargs)
87 | 


--------------------------------------------------------------------------------
/docs/examples/rllib/README.md:
--------------------------------------------------------------------------------
 1 | # Getting started using RLlib with robo-gym
 2 | 
 3 | ## Prerequisites
 4 | 
 5 | Tested on Ubuntu 20.04 using Python 3.8.
 6 | 
 7 | ### Installation of robo-gym
 8 | 
 9 | Install an up-to-date version of robo-gym-server-modules. Recommended to check out version 0.3.0.0 or newer and install from source using
10 | 
11 |     pip install -e .
12 | 
13 | Install robo-gym from the copy at hand using
14 | 
15 |     pip install -e .
16 | 
17 | ### Installation of RLlib
18 | 
19 | Follow [instructions from RLlib](https://docs.ray.io/en/latest/rllib/index.html).
20 | 
21 | It is recommended to also install gputil for use by RLlib:
22 | 
23 |     pip install gputil
24 | 
25 | At runtime, you may encounter this error:
26 | 
27 |     (...) ray/_private/accelerators/nvidia_gpu.py", line 71, in get_current_node_accelerator_type
28 |         device_name = device_name.decode("utf-8")
29 |     UnicodeDecodeError: 'utf-8' codec can't decode byte 0xf8 in position 0: invalid start byte
30 | 
31 | It is typically resolved by updating the NVidia GPU driver. If occuring in an WSL environment, this typically concerns the driver for Windows.
32 | 
33 | ## Using RLlib with robo-gym
34 | 
35 | The scripts here are based on examples from RLlib. They are tested against Ray/RLlib v2.1.0. The API may deviate if you use a different version.
36 | 
37 | Mind that setting the environment argument `rs_state_to_info` in the initialization of UR environments to a value of `False` is important for being able to save checkpoints.
38 | 
39 | ### Scripts for training followed by inference
40 | 
41 | * [Training, saving and inference for MiR100](./training_inference_mir.py)
42 | * [Training, saving and inference for UR5](./training_inference_ur.py)
43 | 
44 | Each training script does a few inference runs in its last phase, applying the trained model to a new instance of the respective environment. The training scripts support a number of arguments to customize the training and inference runs. Call the script with the argument `-h` or `--help` to see the available arguments.
45 | 
46 | In addition, the environment variable `RLLIB_NUM_GPUS` can be set to the desired value (default: 0) to set the number of GPUs that RLlib should use.
47 | 
48 | The inference scripts for the policy-only approach may need to be adjusted if a non-default framework (i.e., not Torch) was used for the training.
49 | 
50 | Note that the default environment registration from Gymnasium (establishing the link from environment names to the implementing Python classes) is not sufficient for using the environments in RLlib agents. This is why each of the training scripts uses RLlib's own registration mechanism and consequently has to import the respective environment class. 
51 | 
52 | ### Scripts for inference from policy checkpoint
53 | 
54 | The saved checkpoints from the training scripts can also be used in the standalone inference scripts.
55 | 
56 | Pass the policy checkpoint path as an argument to the script. If using a policy from an algorithm checkpoint, a typical folder for a policy checkpoint is obtained by appending policies/default_policy to the path.
57 | 
58 | * [Inference from a saved policy checkpoint for MiR100](./policy_from_cp_mir.py)
59 | * [Inference from a saved policy checkpoint for UR5](./policy_from_cp_ur.py)
60 | 
61 | Inference from saved algorithm checkpoints is possible as an alternative to the shown restoring of policy checkpoints. The inference code can be derived from the inference section of the training scripts. However, configured algorithms in RLlib contain their environment. For robo-gym environments, this implies that a simulation server is started for each worker instance.


--------------------------------------------------------------------------------
/robo_gym/envs/panda/panda_isaac_reach.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import numpy as np
 4 | 
 5 | from robo_gym.envs.base.robogym_env import *
 6 | from robo_gym.envs.manipulator.isaac_reach import *
 7 | from robo_gym.envs.panda.panda_base import PandaBaseEnv
 8 | from robo_gym.utils.panda_utils import Panda
 9 | 
10 | 
11 | class IsaacReachPanda(IsaacReachEnv):
12 |     def __init__(self, **kwargs):
13 |         # not too nice - repeated in super init
14 |         self._config = kwargs
15 | 
16 |         IsaacReachPanda.set_robot_defaults(kwargs)
17 | 
18 |         obs_nodes: list[ObservationNode] | None = kwargs.get(
19 |             RoboGymEnv.KW_OBSERVATION_NODES
20 |         )
21 |         if obs_nodes is None:
22 |             # having this hardcoded index is suboptimal; 1 should be correct since 0 will be the manipulator observation node
23 |             obs_nodes = [LastActionObservationNode(**self.get_obs_node_setup_kwargs(1))]
24 |             kwargs[RoboGymEnv.KW_OBSERVATION_NODES] = obs_nodes
25 | 
26 |         super().__init__(**kwargs)
27 | 
28 |     @staticmethod
29 |     def set_robot_defaults(kwargs: dict[str, Any]):
30 | 
31 |         # prepare robot model set it in the kwargs
32 |         robot_model: Panda | None = kwargs.get(RoboGymEnv.KW_ROBOT_MODEL_OBJECT)
33 |         if robot_model is None:
34 |             robot_model = Panda(
35 |                 model_key=kwargs.get(PandaBaseEnv.KW_PANDA_MODEL_KEY, "panda")
36 |             )
37 |             kwargs[RoboGymEnv.KW_ROBOT_MODEL_OBJECT] = robot_model
38 | 
39 |         # default action rate
40 |         kwargs.setdefault(RoboGymEnv.KW_ACTION_RATE, 30.0)
41 | 
42 |         # default max episode steps
43 |         kwargs.setdefault(RewardNode.KW_MAX_EPISODE_STEPS, 600)
44 | 
45 |         kwargs.setdefault(ManipulatorEePosEnv.KW_CONTINUE_EXCEPT_COLLISION, False)
46 |         # default joint positions
47 |         # panda_finger_joint.*: 0.04
48 |         kwargs.setdefault(
49 |             ManipulatorBaseEnv.KW_JOINT_POSITIONS,
50 |             [0, -0.569, 0, -2.81, 0, 3.037, 0.741],
51 |         )
52 | 
53 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_ROTATION_PITCH_RANGE, math.pi)
54 | 
55 |         # values for finger joints in observation
56 |         kwargs.setdefault(
57 |             IsaacReachEnv.KW_ISAAC_OBS_EXTRA_STATIC_JOINTS,
58 |             # not sure about min (-1) and max (1)
59 |             np.array([[0.04, -1, 1], [0.04, -1, 1]], dtype=np.float32),
60 |         )
61 | 
62 |     def get_launch_cmd(self) -> str:
63 |         # TODO make string composition more dynamic
64 |         return f"roslaunch panda_robot_server panda_robot_server.launch \
65 |             rviz_gui:={self._config.get(self.KW_RVIZ_GUI_FLAG, True)} \
66 |             gazebo_gui:={self._config.get(self.KW_GAZEBO_GUI_FLAG, True)} \
67 |             world_name:=isaactabletop_sphere50_no_collision.world \
68 |             reference_frame:=world \
69 |             max_velocity_scale_factor:={self._config.get(self.KW_MAX_VELOCITY_SCALE_FACTOR, .2)} \
70 |             action_cycle_rate:={self.action_rate} \
71 |             objects_controller:=true \
72 |             rs_mode:=1object \
73 |             action_mode:={self.action_mode}\
74 |             n_objects:=1.0 \
75 |             object_0_model_name:=sphere50_no_collision \
76 |             object_0_frame:=target \
77 |             z:=0.0 "
78 | 
79 | 
80 | class IsaacReachPandaSim(IsaacReachPanda):
81 | 
82 |     def __init__(self, **kwargs):
83 |         kwargs[self.KW_REAL_ROBOT] = False
84 |         super().__init__(**kwargs)
85 | 
86 | 
87 | class IsaacReachPandaRob(IsaacReachPanda):
88 | 
89 |     def __init__(self, **kwargs):
90 |         kwargs[self.KW_REAL_ROBOT] = True
91 |         super().__init__(**kwargs)
92 | 


--------------------------------------------------------------------------------
/robo_gym/utils/ur_parameters/ur10.yaml:
--------------------------------------------------------------------------------
  1 | # Sources:
  2 | #
  3 | #  - UR10 User Manual, Universal Robots, UR10/CB3
  4 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/77490/99203_UR10_User_Manual_en_Global.pdf
  5 | joint_names:
  6 |   - shoulder_pan
  7 |   - shoulder_lift
  8 |   - elbow_joint
  9 |   - wrist_1
 10 |   - wrist_2
 11 |   - wrist_3
 12 | remote_joint_names:
 13 |   - base_joint
 14 |   - shoulder_joint
 15 |   - elbow_joint
 16 |   - wrist_1_joint
 17 |   - wrist_2_joint
 18 |   - wrist_3_joint
 19 | workspace_area:
 20 |   # Values for the recommended workspace area
 21 |   r: 1.300
 22 |   min_r: 0.300
 23 | 
 24 | # From https://github.com/ros-industrial/universal_robot
 25 | # Joints limits
 26 | #
 27 | # Sources:
 28 | #
 29 | #  - UR10 User Manual, Universal Robots, UR10/CB3, Version 3.13
 30 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/69442/99203_UR10_User_Manual_en_Global.pdf
 31 | #  - Support > Articles > UR articles > Max. joint torques
 32 | #    https://www.universal-robots.com/articles/ur-articles/max-joint-torques
 33 | #    retrieved: 2020-06-16, last modified: 2020-06-09
 34 | joint_limits:
 35 |   shoulder_pan:
 36 |     # acceleration limits are not publicly available
 37 |     has_acceleration_limits: false
 38 |     has_effort_limits: true
 39 |     has_position_limits: true
 40 |     has_velocity_limits: true
 41 |     max_effort: 330.0
 42 |     max_position: 6.28318530718
 43 |     max_velocity: 2.0943951
 44 |     min_position: -6.28318530718
 45 |   shoulder_lift:
 46 |     # acceleration limits are not publicly available
 47 |     has_acceleration_limits: false
 48 |     has_effort_limits: true
 49 |     has_position_limits: true
 50 |     has_velocity_limits: true
 51 |     max_effort: 330.0
 52 |     max_position: 6.28318530718
 53 |     max_velocity: 2.0943951
 54 |     min_position: -6.28318530718
 55 |   elbow_joint:
 56 |     # acceleration limits are not publicly available
 57 |     has_acceleration_limits: false
 58 |     has_effort_limits: true
 59 |     has_position_limits: true
 60 |     has_velocity_limits: true
 61 |     max_effort: 150.0
 62 |     # we artificially limit this joint to half its actual joint position limit
 63 |     # to avoid (MoveIt/OMPL) planning problems, as due to the physical
 64 |     # construction of the robot, it's impossible to rotate the 'elbow_joint'
 65 |     # over more than approx +- 1 pi (the shoulder lift joint gets in the way).
 66 |     #
 67 |     # This leads to planning problems as the search space will be divided into
 68 |     # two sections, with no connections from one to the other.
 69 |     #
 70 |     # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for
 71 |     # more information.
 72 |     max_position: 3.14159265359
 73 |     max_velocity: 3.14159265359
 74 |     min_position: -3.14159265359
 75 |   wrist_1:
 76 |     # acceleration limits are not publicly available
 77 |     has_acceleration_limits: false
 78 |     has_effort_limits: true
 79 |     has_position_limits: true
 80 |     has_velocity_limits: true
 81 |     max_effort: 56.0
 82 |     max_position: 6.28318530718
 83 |     max_velocity: 3.14159265359
 84 |     min_position: -6.28318530718
 85 |   wrist_2:
 86 |     # acceleration limits are not publicly available
 87 |     has_acceleration_limits: false
 88 |     has_effort_limits: true
 89 |     has_position_limits: true
 90 |     has_velocity_limits: true
 91 |     max_effort: 56.0
 92 |     max_position: 6.28318530718
 93 |     max_velocity: 3.14159265359
 94 |     min_position: -6.28318530718
 95 |   wrist_3:
 96 |     # acceleration limits are not publicly available
 97 |     has_acceleration_limits: false
 98 |     has_effort_limits: true
 99 |     has_position_limits: true
100 |     has_velocity_limits: true
101 |     max_effort: 56.0
102 |     max_position: 6.28318530718
103 |     max_velocity: 3.14159265359
104 |     min_position: -6.28318530718


--------------------------------------------------------------------------------
/robo_gym/utils/ur_parameters/ur3.yaml:
--------------------------------------------------------------------------------
  1 | # Sources:
  2 | #
  3 | #  - UR3 User Manual, Universal Robots, UR3/CB3
  4 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/77348/99241_UR3_User_Manual_en_Global.pdf
  5 | joint_names:
  6 |   - shoulder_pan
  7 |   - shoulder_lift
  8 |   - elbow_joint
  9 |   - wrist_1
 10 |   - wrist_2
 11 |   - wrist_3
 12 | remote_joint_names:
 13 |   - base_joint
 14 |   - shoulder_joint
 15 |   - elbow_joint
 16 |   - wrist_1_joint
 17 |   - wrist_2_joint
 18 |   - wrist_3_joint
 19 | workspace_area:
 20 |   # Values for the recommended workspace area
 21 |   r: 0.450
 22 |   min_r: 0.200
 23 | 
 24 | # From https://github.com/ros-industrial/universal_robot
 25 | # Joints limits
 26 | #
 27 | # Sources:
 28 | #
 29 | #  - UR3 User Manual, Universal Robots, UR3/CB3, Version 3.13
 30 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/69300/99241_UR3_User_Manual_en_Global.pdf
 31 | #  - Support > Articles > UR articles > Max. joint torques
 32 | #    https://www.universal-robots.com/articles/ur-articles/max-joint-torques
 33 | #    retrieved: 2020-06-16, last modified: 2020-06-09
 34 | joint_limits:
 35 |   shoulder_pan:
 36 |     # acceleration limits are not publicly available
 37 |     has_acceleration_limits: false
 38 |     has_effort_limits: true
 39 |     has_position_limits: true
 40 |     has_velocity_limits: true
 41 |     max_effort: 56.0
 42 |     max_position: 6.28318530718
 43 |     max_velocity: 3.14159265359
 44 |     min_position: -6.28318530718
 45 |   shoulder_lift:
 46 |     # acceleration limits are not publicly available
 47 |     has_acceleration_limits: false
 48 |     has_effort_limits: true
 49 |     has_position_limits: true
 50 |     has_velocity_limits: true
 51 |     max_effort: 56.0
 52 |     max_position: 6.28318530718
 53 |     max_velocity: 3.14159265359
 54 |     min_position: -6.28318530718
 55 |   elbow_joint:
 56 |     # acceleration limits are not publicly available
 57 |     has_acceleration_limits: false
 58 |     has_effort_limits: true
 59 |     has_position_limits: true
 60 |     has_velocity_limits: true
 61 |     max_effort: 28.0
 62 |     # we artificially limit this joint to half its actual joint position limit
 63 |     # to avoid (MoveIt/OMPL) planning problems, as due to the physical
 64 |     # construction of the robot, it's impossible to rotate the 'elbow_joint'
 65 |     # over more than approx +- 1 pi (the shoulder lift joint gets in the way).
 66 |     #
 67 |     # This leads to planning problems as the search space will be divided into
 68 |     # two sections, with no connections from one to the other.
 69 |     #
 70 |     # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for
 71 |     # more information.
 72 |     max_position: 3.14159265359
 73 |     max_velocity: 3.14159265359
 74 |     min_position: -3.14159265359
 75 |   wrist_1:
 76 |     # acceleration limits are not publicly available
 77 |     has_acceleration_limits: false
 78 |     has_effort_limits: true
 79 |     has_position_limits: true
 80 |     has_velocity_limits: true
 81 |     max_effort: 12.0
 82 |     max_position: 6.28318530718
 83 |     max_velocity: 6.28318530718
 84 |     min_position: -6.28318530718
 85 |   wrist_2:
 86 |     # acceleration limits are not publicly available
 87 |     has_acceleration_limits: false
 88 |     has_effort_limits: true
 89 |     has_position_limits: true
 90 |     has_velocity_limits: true
 91 |     max_effort: 12.0
 92 |     max_position: 6.28318530718
 93 |     max_velocity: 6.28318530718
 94 |     min_position: -6.28318530718
 95 |   wrist_3:
 96 |     # acceleration limits are not publicly available
 97 |     has_acceleration_limits: false
 98 |     has_effort_limits: true
 99 |     has_position_limits: true
100 |     has_velocity_limits: true
101 |     max_effort: 12.0
102 |     max_position: 6.28318530718
103 |     max_velocity: 6.28318530718
104 |     min_position: -6.28318530718


--------------------------------------------------------------------------------
/robo_gym/utils/ur_parameters/ur10e.yaml:
--------------------------------------------------------------------------------
  1 | # Sources:
  2 | #
  3 | #  - Universal Robots e-Series, User Manual, UR10e
  4 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/77195/99405_UR10e_User_Manual_en_Global.pdf
  5 | joint_names:
  6 |   - shoulder_pan
  7 |   - shoulder_lift
  8 |   - elbow_joint
  9 |   - wrist_1
 10 |   - wrist_2
 11 |   - wrist_3
 12 | remote_joint_names:
 13 |   - base_joint
 14 |   - shoulder_joint
 15 |   - elbow_joint
 16 |   - wrist_1_joint
 17 |   - wrist_2_joint
 18 |   - wrist_3_joint
 19 | workspace_area:
 20 |   # Values for the recommended workspace area
 21 |   r: 1.300
 22 |   min_r: 0.300
 23 | 
 24 | # From https://github.com/ros-industrial/universal_robot
 25 | # Joints limits
 26 | #
 27 | # Sources:
 28 | #
 29 | #  - Universal Robots e-Series, User Manual, UR10e, Version 5.8
 30 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/69139/99405_UR10e_User_Manual_en_Global.pdf
 31 | #  - Support > Articles > UR articles > Max. joint torques
 32 | #    https://www.universal-robots.com/articles/ur-articles/max-joint-torques
 33 | #    retrieved: 2020-06-16, last modified: 2020-06-09
 34 | joint_limits:
 35 |   shoulder_pan:
 36 |     # acceleration limits are not publicly available
 37 |     has_acceleration_limits: false
 38 |     has_effort_limits: true
 39 |     has_position_limits: true
 40 |     has_velocity_limits: true
 41 |     max_effort: 330.0
 42 |     max_position: 6.28318530718
 43 |     max_velocity: 2.0943951
 44 |     min_position: -6.28318530718
 45 |   shoulder_lift:
 46 |     # acceleration limits are not publicly available
 47 |     has_acceleration_limits: false
 48 |     has_effort_limits: true
 49 |     has_position_limits: true
 50 |     has_velocity_limits: true
 51 |     max_effort: 330.0
 52 |     max_position: 6.28318530718
 53 |     max_velocity: 2.0943951
 54 |     min_position: -6.28318530718
 55 |   elbow_joint:
 56 |     # acceleration limits are not publicly available
 57 |     has_acceleration_limits: false
 58 |     has_effort_limits: true
 59 |     has_position_limits: true
 60 |     has_velocity_limits: true
 61 |     max_effort: 150.0
 62 |     # we artificially limit this joint to half its actual joint position limit
 63 |     # to avoid (MoveIt/OMPL) planning problems, as due to the physical
 64 |     # construction of the robot, it's impossible to rotate the 'elbow_joint'
 65 |     # over more than approx +- 1 pi (the shoulder lift joint gets in the way).
 66 |     #
 67 |     # This leads to planning problems as the search space will be divided into
 68 |     # two sections, with no connections from one to the other.
 69 |     #
 70 |     # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for
 71 |     # more information.
 72 |     max_position: 3.14159265359
 73 |     max_velocity: 3.14159265359
 74 |     min_position: -3.14159265359
 75 |   wrist_1:
 76 |     # acceleration limits are not publicly available
 77 |     has_acceleration_limits: false
 78 |     has_effort_limits: true
 79 |     has_position_limits: true
 80 |     has_velocity_limits: true
 81 |     max_effort: 56.0
 82 |     max_position: 6.28318530718
 83 |     max_velocity: 3.14159265359
 84 |     min_position: -6.28318530718
 85 |   wrist_2:
 86 |     # acceleration limits are not publicly available
 87 |     has_acceleration_limits: false
 88 |     has_effort_limits: true
 89 |     has_position_limits: true
 90 |     has_velocity_limits: true
 91 |     max_effort: 56.0
 92 |     max_position: 6.28318530718
 93 |     max_velocity: 3.14159265359
 94 |     min_position: -6.28318530718
 95 |   wrist_3:
 96 |     # acceleration limits are not publicly available
 97 |     has_acceleration_limits: false
 98 |     has_effort_limits: true
 99 |     has_position_limits: true
100 |     has_velocity_limits: true
101 |     max_effort: 56.0
102 |     max_position: 6.28318530718
103 |     max_velocity: 3.14159265359
104 |     min_position: -6.28318530718
105 | 


--------------------------------------------------------------------------------
/robo_gym/utils/ur_parameters/ur16e.yaml:
--------------------------------------------------------------------------------
  1 | # Sources:
  2 | #
  3 | #  - Universal Robots e-Series, User Manual, UR16e
  4 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/77243/99473_UR16e_User_Manual_en_Global.pdf
  5 | joint_names:
  6 |   - shoulder_pan
  7 |   - shoulder_lift
  8 |   - elbow_joint
  9 |   - wrist_1
 10 |   - wrist_2
 11 |   - wrist_3
 12 | remote_joint_names:
 13 |   - base_joint
 14 |   - shoulder_joint
 15 |   - elbow_joint
 16 |   - wrist_1_joint
 17 |   - wrist_2_joint
 18 |   - wrist_3_joint
 19 | workspace_area:
 20 |   # Values for the recommended workspace area
 21 |   r: 0.800
 22 |   min_r: 0.300
 23 | 
 24 | # From https://github.com/ros-industrial/universal_robot
 25 | # Joints limits
 26 | #
 27 | # Sources:
 28 | #
 29 | #  - Universal Robots e-Series, User Manual, UR16e, Version 5.8
 30 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/69187/99473_UR16e_User_Manual_en_Global.pdf
 31 | #  - Support > Articles > UR articles > Max. joint torques
 32 | #    https://www.universal-robots.com/articles/ur-articles/max-joint-torques
 33 | #    retrieved: 2020-06-16, last modified: 2020-06-09
 34 | joint_limits:
 35 |   shoulder_pan:
 36 |     # acceleration limits are not publicly available
 37 |     has_acceleration_limits: false
 38 |     has_effort_limits: true
 39 |     has_position_limits: true
 40 |     has_velocity_limits: true
 41 |     max_effort: 330.0
 42 |     max_position: 6.28318530718
 43 |     max_velocity: 2.0943951
 44 |     min_position: -6.28318530718
 45 |   shoulder_lift:
 46 |     # acceleration limits are not publicly available
 47 |     has_acceleration_limits: false
 48 |     has_effort_limits: true
 49 |     has_position_limits: true
 50 |     has_velocity_limits: true
 51 |     max_effort: 330.0
 52 |     max_position: 6.28318530718
 53 |     max_velocity: 2.0943951
 54 |     min_position: -6.28318530718
 55 |   elbow_joint:
 56 |     # acceleration limits are not publicly available
 57 |     has_acceleration_limits: false
 58 |     has_effort_limits: true
 59 |     has_position_limits: true
 60 |     has_velocity_limits: true
 61 |     max_effort: 150.0
 62 |     # we artificially limit this joint to half its actual joint position limit
 63 |     # to avoid (MoveIt/OMPL) planning problems, as due to the physical
 64 |     # construction of the robot, it's impossible to rotate the 'elbow_joint'
 65 |     # over more than approx +- 1 pi (the shoulder lift joint gets in the way).
 66 |     #
 67 |     # This leads to planning problems as the search space will be divided into
 68 |     # two sections, with no connections from one to the other.
 69 |     #
 70 |     # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for
 71 |     # more information.
 72 |     max_position: 3.14159265359
 73 |     max_velocity: 3.14159265359
 74 |     min_position: -3.14159265359
 75 |   wrist_1:
 76 |     # acceleration limits are not publicly available
 77 |     has_acceleration_limits: false
 78 |     has_effort_limits: true
 79 |     has_position_limits: true
 80 |     has_velocity_limits: true
 81 |     max_effort: 56.0
 82 |     max_position: 6.28318530718
 83 |     max_velocity: 3.14159265359
 84 |     min_position: -6.28318530718
 85 |   wrist_2:
 86 |     # acceleration limits are not publicly available
 87 |     has_acceleration_limits: false
 88 |     has_effort_limits: true
 89 |     has_position_limits: true
 90 |     has_velocity_limits: true
 91 |     max_effort: 56.0
 92 |     max_position: 6.28318530718
 93 |     max_velocity: 3.14159265359
 94 |     min_position: -6.28318530718
 95 |   wrist_3:
 96 |     # acceleration limits are not publicly available
 97 |     has_acceleration_limits: false
 98 |     has_effort_limits: true
 99 |     has_position_limits: true
100 |     has_velocity_limits: true
101 |     max_effort: 56.0
102 |     max_position: 6.28318530718
103 |     max_velocity: 3.14159265359
104 |     min_position: -6.28318530718
105 | 


--------------------------------------------------------------------------------
/robo_gym/utils/ur_parameters/ur3e.yaml:
--------------------------------------------------------------------------------
  1 | # Sources:
  2 | #
  3 | #  - Universal Robots e-Series, User Manual, UR3e
  4 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/77099/99403_UR3e_User_Manual_en_Global.pdf
  5 | joint_names:
  6 |   - shoulder_pan
  7 |   - shoulder_lift
  8 |   - elbow_joint
  9 |   - wrist_1
 10 |   - wrist_2
 11 |   - wrist_3
 12 | remote_joint_names:
 13 |   - base_joint
 14 |   - shoulder_joint
 15 |   - elbow_joint
 16 |   - wrist_1_joint
 17 |   - wrist_2_joint
 18 |   - wrist_3_joint
 19 | workspace_area:
 20 |   # Values for the recommended workspace area
 21 |   r: 0.450
 22 |   min_r: 0.200
 23 | 
 24 | # From https://github.com/ros-industrial/universal_robot
 25 | # Joints limits
 26 | #
 27 | # Sources:
 28 | #
 29 | #  - Universal Robots e-Series, User Manual, UR3e, Version 5.8
 30 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/69043/99403_UR3e_User_Manual_en_Global.pdf
 31 | #  - Support > Articles > UR articles > Max. joint torques
 32 | #    https://www.universal-robots.com/articles/ur-articles/max-joint-torques
 33 | #    retrieved: 2020-06-16, last modified: 2020-06-09
 34 | joint_limits:
 35 |   shoulder_pan:
 36 |     # acceleration limits are not publicly available
 37 |     has_acceleration_limits: false
 38 |     has_effort_limits: true
 39 |     has_position_limits: true
 40 |     has_velocity_limits: true
 41 |     max_effort: 56.0
 42 |     max_position: 6.28318530718
 43 |     max_velocity: 3.14159265359
 44 |     min_position: -6.28318530718
 45 |   shoulder_lift:
 46 |     # acceleration limits are not publicly available
 47 |     has_acceleration_limits: false
 48 |     has_effort_limits: true
 49 |     has_position_limits: true
 50 |     has_velocity_limits: true
 51 |     max_effort: 56.0
 52 |     max_position: 6.28318530718
 53 |     max_velocity: 3.14159265359
 54 |     min_position: -6.28318530718
 55 |   elbow_joint:
 56 |     # acceleration limits are not publicly available
 57 |     has_acceleration_limits: false
 58 |     has_effort_limits: true
 59 |     has_position_limits: true
 60 |     has_velocity_limits: true
 61 |     max_effort: 28.0
 62 |     # we artificially limit this joint to half its actual joint position limit
 63 |     # to avoid (MoveIt/OMPL) planning problems, as due to the physical
 64 |     # construction of the robot, it's impossible to rotate the 'elbow_joint'
 65 |     # over more than approx +- 1 pi (the shoulder lift joint gets in the way).
 66 |     #
 67 |     # This leads to planning problems as the search space will be divided into
 68 |     # two sections, with no connections from one to the other.
 69 |     #
 70 |     # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for
 71 |     # more information.
 72 |     max_position: 3.14159265359
 73 |     max_velocity: 3.14159265359
 74 |     min_position: -3.14159265359
 75 |   wrist_1:
 76 |     # acceleration limits are not publicly available
 77 |     has_acceleration_limits: false
 78 |     has_effort_limits: true
 79 |     has_position_limits: true
 80 |     has_velocity_limits: true
 81 |     max_effort: 12.0
 82 |     max_position: 6.28318530718
 83 |     max_velocity: 6.28318530718
 84 |     min_position: -6.28318530718
 85 |   wrist_2:
 86 |     # acceleration limits are not publicly available
 87 |     has_acceleration_limits: false
 88 |     has_effort_limits: true
 89 |     has_position_limits: true
 90 |     has_velocity_limits: true
 91 |     max_effort: 12.0
 92 |     max_position: 6.28318530718
 93 |     max_velocity: 6.28318530718
 94 |     min_position: -6.28318530718
 95 |   wrist_3:
 96 |     # acceleration limits are not publicly available
 97 |     has_acceleration_limits: false
 98 |     has_effort_limits: true
 99 |     has_position_limits: true
100 |     has_velocity_limits: true
101 |     max_effort: 12.0
102 |     max_position: 6.28318530718
103 |     max_velocity: 6.28318530718
104 |     min_position: -6.28318530718
105 | 


--------------------------------------------------------------------------------
/robo_gym/utils/ur_parameters/ur5.yaml:
--------------------------------------------------------------------------------
  1 | # Sources:
  2 | #
  3 | #  - UR5 User Manual, Universal Robots, UR5/CB3
  4 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/77419/99202_UR5_User_Manual_en_Global.pdf
  5 | joint_names:
  6 |   - shoulder_pan
  7 |   - shoulder_lift
  8 |   - elbow_joint
  9 |   - wrist_1
 10 |   - wrist_2
 11 |   - wrist_3
 12 | remote_joint_names:
 13 |   - base_joint
 14 |   - shoulder_joint
 15 |   - elbow_joint
 16 |   - wrist_1_joint
 17 |   - wrist_2_joint
 18 |   - wrist_3_joint
 19 | workspace_area:
 20 |   # Values for the recommended workspace area
 21 |   r: 0.750
 22 |   min_r: 0.200
 23 | 
 24 | # From https://github.com/ros-industrial/universal_robot
 25 | # Joints limits
 26 | #
 27 | # Sources:
 28 | #
 29 | #  - UR5 User Manual, Universal Robots, UR5/CB3, Version 3.13
 30 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/69371/99202_UR5_User_Manual_en_Global.pdf
 31 | #  - Support > Articles > UR articles > Max. joint torques
 32 | #    https://www.universal-robots.com/articles/ur-articles/max-joint-torques
 33 | #    retrieved: 2020-06-16, last modified: 2020-06-09
 34 | joint_limits:
 35 |   shoulder_pan:
 36 |     # acceleration limits are not publicly available
 37 |     has_acceleration_limits: false
 38 |     has_effort_limits: true
 39 |     has_position_limits: true
 40 |     has_velocity_limits: true
 41 |     max_effort: 150.0
 42 |     max_position: 6.28318530718
 43 |     max_velocity: 3.14159265359
 44 |     min_position: -6.28318530718
 45 |   shoulder_lift:
 46 |     # acceleration limits are not publicly available
 47 |     has_acceleration_limits: false
 48 |     has_effort_limits: true
 49 |     has_position_limits: true
 50 |     has_velocity_limits: true
 51 |     max_effort: 150.0
 52 |     max_position: 6.28318530718
 53 |     max_velocity: 3.14159265359
 54 |     min_position: -6.28318530718
 55 |   elbow_joint:
 56 |     # acceleration limits are not publicly available
 57 |     has_acceleration_limits: false
 58 |     has_effort_limits: true
 59 |     has_position_limits: true
 60 |     has_velocity_limits: true
 61 |     max_effort: 150.0
 62 |     # we artificially limit this joint to half its actual joint position limit
 63 |     # to avoid (MoveIt/OMPL) planning problems, as due to the physical
 64 |     # construction of the robot, it's impossible to rotate the 'elbow_joint'
 65 |     # over more than approx +- 1 pi (the shoulder lift joint gets in the way).
 66 |     #
 67 |     # This leads to planning problems as the search space will be divided into
 68 |     # two sections, with no connections from one to the other.
 69 |     #
 70 |     # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for
 71 |     # more information.
 72 |     max_position: 3.14159265359
 73 |     max_velocity: 3.14159265359
 74 |     min_position: -3.14159265359
 75 |   wrist_1:
 76 |     # acceleration limits are not publicly available
 77 |     has_acceleration_limits: false
 78 |     has_effort_limits: true
 79 |     has_position_limits: true
 80 |     has_velocity_limits: true
 81 |     max_effort: 28.0
 82 |     max_position: 6.28318530718
 83 |     max_velocity: 3.14159265359
 84 |     min_position: -6.28318530718
 85 |   wrist_2:
 86 |     # acceleration limits are not publicly available
 87 |     has_acceleration_limits: false
 88 |     has_effort_limits: true
 89 |     has_position_limits: true
 90 |     has_velocity_limits: true
 91 |     max_effort: 28.0
 92 |     max_position: 6.28318530718
 93 |     max_velocity: 3.14159265359
 94 |     min_position: -6.28318530718
 95 |   wrist_3:
 96 |     # acceleration limits are not publicly available
 97 |     has_acceleration_limits: false
 98 |     has_effort_limits: true
 99 |     has_position_limits: true
100 |     has_velocity_limits: true
101 |     max_effort: 28.0
102 |     max_position: 6.28318530718
103 |     max_velocity: 3.14159265359
104 |     min_position: -6.28318530718
105 | 
106 | 


--------------------------------------------------------------------------------
/robo_gym/utils/ur_parameters/ur5e.yaml:
--------------------------------------------------------------------------------
  1 | # Sources:
  2 | #
  3 | #  - Universal Robots e-Series, User Manual, UR5e
  4 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/77147/99404_UR5e_User_Manual_en_Global.pdf
  5 | joint_names:
  6 |   - shoulder_pan
  7 |   - shoulder_lift
  8 |   - elbow_joint
  9 |   - wrist_1
 10 |   - wrist_2
 11 |   - wrist_3
 12 | remote_joint_names:
 13 |   - base_joint
 14 |   - shoulder_joint
 15 |   - elbow_joint
 16 |   - wrist_1_joint
 17 |   - wrist_2_joint
 18 |   - wrist_3_joint
 19 | workspace_area:
 20 |   # Values for the recommended workspace area
 21 |   r: 0.750
 22 |   min_r: 0.200
 23 | 
 24 | # From https://github.com/ros-industrial/universal_robot
 25 | # Joints limits
 26 | #
 27 | # Sources:
 28 | #
 29 | #  - Universal Robots e-Series, User Manual, UR5e, Version 5.8
 30 | #    https://s3-eu-west-1.amazonaws.com/ur-support-site/69091/99404_UR5e_User_Manual_en_Global.pdf
 31 | #  - Support > Articles > UR articles > Max. joint torques
 32 | #    https://www.universal-robots.com/articles/ur-articles/max-joint-torques
 33 | #    retrieved: 2020-06-16, last modified: 2020-06-09
 34 | joint_limits:
 35 |   shoulder_pan:
 36 |     # acceleration limits are not publicly available
 37 |     has_acceleration_limits: false
 38 |     has_effort_limits: true
 39 |     has_position_limits: true
 40 |     has_velocity_limits: true
 41 |     max_effort: 150.0
 42 |     max_position: 6.28318530718
 43 |     max_velocity: 3.14159265359
 44 |     min_position: -6.28318530718
 45 |   shoulder_lift:
 46 |     # acceleration limits are not publicly available
 47 |     has_acceleration_limits: false
 48 |     has_effort_limits: true
 49 |     has_position_limits: true
 50 |     has_velocity_limits: true
 51 |     max_effort: 150.0
 52 |     max_position: 6.28318530718
 53 |     max_velocity: 3.14159265359
 54 |     min_position: -6.28318530718
 55 |   elbow_joint:
 56 |     # acceleration limits are not publicly available
 57 |     has_acceleration_limits: false
 58 |     has_effort_limits: true
 59 |     has_position_limits: true
 60 |     has_velocity_limits: true
 61 |     max_effort: 150.0
 62 |     # we artificially limit this joint to half its actual joint position limit
 63 |     # to avoid (MoveIt/OMPL) planning problems, as due to the physical
 64 |     # construction of the robot, it's impossible to rotate the 'elbow_joint'
 65 |     # over more than approx +- 1 pi (the shoulder lift joint gets in the way).
 66 |     #
 67 |     # This leads to planning problems as the search space will be divided into
 68 |     # two sections, with no connections from one to the other.
 69 |     #
 70 |     # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for
 71 |     # more information.
 72 |     max_position: 3.14159265359
 73 |     max_velocity: 3.14159265359
 74 |     min_position: -3.14159265359
 75 |   wrist_1:
 76 |     # acceleration limits are not publicly available
 77 |     has_acceleration_limits: false
 78 |     has_effort_limits: true
 79 |     has_position_limits: true
 80 |     has_velocity_limits: true
 81 |     max_effort: 28.0
 82 |     max_position: 6.28318530718
 83 |     max_velocity: 3.14159265359
 84 |     min_position: -6.28318530718
 85 |   wrist_2:
 86 |     # acceleration limits are not publicly available
 87 |     has_acceleration_limits: false
 88 |     has_effort_limits: true
 89 |     has_position_limits: true
 90 |     has_velocity_limits: true
 91 |     max_effort: 28.0
 92 |     max_position: 6.28318530718
 93 |     max_velocity: 3.14159265359
 94 |     min_position: -6.28318530718
 95 |   wrist_3:
 96 |     # acceleration limits are not publicly available
 97 |     has_acceleration_limits: false
 98 |     has_effort_limits: true
 99 |     has_position_limits: true
100 |     has_velocity_limits: true
101 |     max_effort: 28.0
102 |     max_position: 6.28318530718
103 |     max_velocity: 3.14159265359
104 |     min_position: -6.28318530718
105 | 


--------------------------------------------------------------------------------
/robo_gym/__init__.py:
--------------------------------------------------------------------------------
  1 | from gymnasium.envs.registration import register
  2 | 
  3 | # naming convention: EnvnameRobotSim
  4 | 
  5 | # Example Environments
  6 | register(
  7 |     id="ExampleEnvSim-v0",
  8 |     entry_point="robo_gym.envs:ExampleEnvSim",
  9 | )
 10 | 
 11 | register(
 12 |     id="ExampleEnvRob-v0",
 13 |     entry_point="robo_gym.envs:ExampleEnvRob",
 14 | )
 15 | 
 16 | # MiR100 Environments
 17 | register(
 18 |     id="NoObstacleNavigationMir100Sim-v0",
 19 |     entry_point="robo_gym.envs:NoObstacleNavigationMir100Sim",
 20 | )
 21 | 
 22 | register(
 23 |     id="NoObstacleNavigationMir100Rob-v0",
 24 |     entry_point="robo_gym.envs:NoObstacleNavigationMir100Rob",
 25 | )
 26 | 
 27 | register(
 28 |     id="ObstacleAvoidanceMir100Sim-v0",
 29 |     entry_point="robo_gym.envs:ObstacleAvoidanceMir100Sim",
 30 | )
 31 | 
 32 | register(
 33 |     id="ObstacleAvoidanceMir100Rob-v0",
 34 |     entry_point="robo_gym.envs:ObstacleAvoidanceMir100Rob",
 35 | )
 36 | 
 37 | # UR Environments
 38 | register(
 39 |     id="EmptyEnvironmentURSim-v0",
 40 |     entry_point="robo_gym.envs:EmptyEnvironmentURSim",
 41 | )
 42 | 
 43 | register(
 44 |     id="EmptyEnvironmentURRob-v0",
 45 |     entry_point="robo_gym.envs:EmptyEnvironmentURRob",
 46 | )
 47 | 
 48 | register(
 49 |     id="EndEffectorPositioningURSim-v0",
 50 |     entry_point="robo_gym.envs:EndEffectorPositioningURSim",
 51 | )
 52 | 
 53 | register(
 54 |     id="EndEffectorPositioningURRob-v0",
 55 |     entry_point="robo_gym.envs:EndEffectorPositioningURRob",
 56 | )
 57 | 
 58 | register(
 59 |     id="BasicAvoidanceURSim-v0",
 60 |     entry_point="robo_gym.envs:BasicAvoidanceURSim",
 61 | )
 62 | 
 63 | register(
 64 |     id="BasicAvoidanceURRob-v0",
 65 |     entry_point="robo_gym.envs:BasicAvoidanceURRob",
 66 | )
 67 | 
 68 | register(
 69 |     id="AvoidanceRaad2022URSim-v0",
 70 |     entry_point="robo_gym.envs:AvoidanceRaad2022URSim",
 71 | )
 72 | 
 73 | register(
 74 |     id="AvoidanceRaad2022URRob-v0",
 75 |     entry_point="robo_gym.envs:AvoidanceRaad2022URRob",
 76 | )
 77 | 
 78 | register(
 79 |     id="AvoidanceRaad2022TestURSim-v0",
 80 |     entry_point="robo_gym.envs:AvoidanceRaad2022TestURSim",
 81 | )
 82 | 
 83 | register(
 84 |     id="AvoidanceRaad2022TestURRob-v0",
 85 |     entry_point="robo_gym.envs:AvoidanceRaad2022TestURRob",
 86 | )
 87 | 
 88 | # TODO register the following as v1 or v2 of the corresponding v0 ones above instead?
 89 | register(
 90 |     id="EmptyEnvironment2URSim-v0",
 91 |     entry_point="robo_gym.envs:EmptyEnvironment2URSim",
 92 | )
 93 | 
 94 | register(
 95 |     id="EmptyEnvironment2URRob-v0",
 96 |     entry_point="robo_gym.envs:EmptyEnvironment2URRob",
 97 | )
 98 | 
 99 | register(
100 |     id="EndEffectorPositioning2URSim-v0",
101 |     entry_point="robo_gym.envs:EndEffectorPositioning2URSim",
102 | )
103 | 
104 | register(
105 |     id="EndEffectorPositioning2URRob-v0",
106 |     entry_point="robo_gym.envs:EndEffectorPositioning2URRob",
107 | )
108 | 
109 | register(
110 |     id="IsaacReachURSim-v0",
111 |     entry_point="robo_gym.envs:IsaacReachURSim",
112 | )
113 | 
114 | register(
115 |     id="IsaacReachURRob-v0",
116 |     entry_point="robo_gym.envs:IsaacReachURRob",
117 | )
118 | 
119 | # Panda Environments
120 | register(
121 |     id="EmptyEnvironmentPandaSim-v0",
122 |     entry_point="robo_gym.envs:EmptyEnvironmentPandaSim",
123 | )
124 | 
125 | register(
126 |     id="EmptyEnvironmentPandaRob-v0",
127 |     entry_point="robo_gym.envs:EmptyEnvironmentPandaRob",
128 | )
129 | 
130 | 
131 | register(
132 |     id="EndEffectorPositioningPandaSim-v0",
133 |     entry_point="robo_gym.envs:EndEffectorPositioningPandaSim",
134 | )
135 | 
136 | register(
137 |     id="EndEffectorPositioningPandaRob-v0",
138 |     entry_point="robo_gym.envs:EndEffectorPositioningPandaRob",
139 | )
140 | register(
141 |     id="IsaacReachPandaSim-v0",
142 |     entry_point="robo_gym.envs:IsaacReachPandaSim",
143 | )
144 | 
145 | register(
146 |     id="IsaacReachPandaRob-v0",
147 |     entry_point="robo_gym.envs:IsaacReachPandaRob",
148 | )
149 | 


--------------------------------------------------------------------------------
/docs/examples/rllib/training_inference_mir.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import gymnasium as gym
  3 | import os
  4 | 
  5 | import ray
  6 | from ray import air, tune
  7 | from ray.rllib.algorithms.algorithm import Algorithm
  8 | from ray.tune.registry import get_trainable_cls, register_env
  9 | 
 10 | import robo_gym
 11 | from robo_gym.envs.mir100.mir100 import NoObstacleNavigationMir100Sim
 12 | 
 13 | def env_creator_mir(config):
 14 |     return NoObstacleNavigationMir100Sim(**config)
 15 | 
 16 | 
 17 | register_env(
 18 |     name="NoObstacleNavigationMir100Sim-v0",
 19 |     env_creator=env_creator_mir,
 20 | )
 21 | 
 22 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 23 | parser.add_argument(
 24 |     "--run", type=str, default="PPO", help="The RLlib-registered algorithm to use."
 25 | )
 26 | parser.add_argument("--num-cpus", type=int, default=0)
 27 | parser.add_argument(
 28 |     "--framework",
 29 |     choices=["tf", "tf2", "torch"],
 30 |     default="torch",
 31 |     help="The DL framework specifier.",
 32 | )
 33 | parser.add_argument(
 34 |     "--stop-iters",
 35 |     type=int,
 36 |     default=200,
 37 |     help="Number of iterations to train before we do inference.",
 38 | )
 39 | parser.add_argument(
 40 |     "--stop-timesteps",
 41 |     type=int,
 42 |     default=100000,
 43 |     help="Number of timesteps to train before we do inference.",
 44 | )
 45 | parser.add_argument(
 46 |     "--stop-reward",
 47 |     type=float,
 48 |     default=150.0,
 49 |     help="Reward at which we stop training before we do inference.",
 50 | )
 51 | parser.add_argument(
 52 |     "--explore-during-inference",
 53 |     action="store_true",
 54 |     help="Whether the trained policy should use exploration during action "
 55 |     "inference.",
 56 | )
 57 | parser.add_argument(
 58 |     "--num-episodes-during-inference",
 59 |     type=int,
 60 |     default=10,
 61 |     help="Number of episodes to do inference over after training.",
 62 | )
 63 | 
 64 | if __name__ == "__main__":
 65 |     args = parser.parse_args()
 66 | 
 67 |     ray.init(num_cpus=args.num_cpus or None)
 68 | 
 69 |     config = (
 70 |         get_trainable_cls(args.run)
 71 |         .get_default_config()
 72 |         .environment("NoObstacleNavigationMir100Sim-v0", env_config={"ip": "127.0.0.1", "gui": False},)
 73 |         # Run with tracing enabled for tf2?
 74 |         .framework(args.framework)
 75 |         # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
 76 |         .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
 77 |     )
 78 | 
 79 |     stop = {
 80 |         "training_iteration": args.stop_iters,
 81 |         "timesteps_total": args.stop_timesteps,
 82 |         "episode_reward_mean": args.stop_reward,
 83 |     }
 84 | 
 85 |     print("Training policy until desired reward/timesteps/iterations. ...")
 86 |     tuner = tune.Tuner(
 87 |         args.run,
 88 |         param_space=config.to_dict(),
 89 |         run_config=air.RunConfig(
 90 |             stop=stop,
 91 |             verbose=2,
 92 |             checkpoint_config=air.CheckpointConfig(
 93 |                 checkpoint_frequency=1, checkpoint_at_end=True
 94 |             ),
 95 |         ),
 96 |     )
 97 |     results = tuner.fit()
 98 | 
 99 |     print("Training completed. Restoring new Algorithm for action inference.")
100 |     # Get the last checkpoint from the above training run.
101 |     checkpoint = results.get_best_result().checkpoint
102 |     # Create new Algorithm and restore its state from the last checkpoint.
103 |     algo = Algorithm.from_checkpoint(checkpoint)
104 | 
105 |     # Create the env to do inference in.
106 |     env = gym.make("NoObstacleNavigationMir100Sim-v0", ip="127.0.0.1", gui=True)
107 |     obs, info = env.reset()
108 |     env.render()
109 | 
110 |     num_episodes = 0
111 |     episode_reward = 0.0
112 | 
113 |     while num_episodes < args.num_episodes_during_inference:
114 |         # Compute an action (`a`).
115 |         a = algo.compute_single_action(
116 |             observation=obs,
117 |             explore=args.explore_during_inference,
118 |             policy_id="default_policy",  # <- default value
119 |         )
120 |         # Send the computed action `a` to the env.
121 |         obs, reward, done, truncated, _ = env.step(a)
122 |         env.render()
123 |         episode_reward += reward
124 |         # Is the episode `done`? -> Reset.
125 |         if done:
126 |             print(f"Episode done: Total reward = {episode_reward}")
127 |             obs, info = env.reset()
128 |             env.render()
129 |             num_episodes += 1
130 |             episode_reward = 0.0
131 | 
132 |     algo.stop()
133 |     env.close()
134 |     ray.shutdown()
135 | 


--------------------------------------------------------------------------------
/docs/examples/rllib/training_inference_ur.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import gymnasium as gym
  3 | import os
  4 | 
  5 | import ray
  6 | from ray import air, tune
  7 | from ray.rllib.algorithms.algorithm import Algorithm
  8 | from ray.tune.registry import get_trainable_cls, register_env
  9 | 
 10 | import robo_gym
 11 | from robo_gym.envs.ur.ur_ee_positioning import EndEffectorPositioningURSim
 12 | 
 13 | def env_creator_ur(config):
 14 |     return EndEffectorPositioningURSim(**config)
 15 | 
 16 | 
 17 | register_env(
 18 |     name="EndEffectorPositioningURSim-v0",
 19 |     env_creator=env_creator_ur,
 20 | )
 21 | 
 22 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 23 | parser.add_argument(
 24 |     "--run", type=str, default="PPO", help="The RLlib-registered algorithm to use."
 25 | )
 26 | parser.add_argument("--num-cpus", type=int, default=0)
 27 | parser.add_argument(
 28 |     "--framework",
 29 |     choices=["tf", "tf2", "torch"],
 30 |     default="torch",
 31 |     help="The DL framework specifier.",
 32 | )
 33 | parser.add_argument(
 34 |     "--stop-iters",
 35 |     type=int,
 36 |     default=200,
 37 |     help="Number of iterations to train before we do inference.",
 38 | )
 39 | parser.add_argument(
 40 |     "--stop-timesteps",
 41 |     type=int,
 42 |     default=100000,
 43 |     help="Number of timesteps to train before we do inference.",
 44 | )
 45 | parser.add_argument(
 46 |     "--stop-reward",
 47 |     type=float,
 48 |     default=150.0,
 49 |     help="Reward at which we stop training before we do inference.",
 50 | )
 51 | parser.add_argument(
 52 |     "--explore-during-inference",
 53 |     action="store_true",
 54 |     help="Whether the trained policy should use exploration during action "
 55 |     "inference.",
 56 | )
 57 | parser.add_argument(
 58 |     "--num-episodes-during-inference",
 59 |     type=int,
 60 |     default=10,
 61 |     help="Number of episodes to do inference over after training.",
 62 | )
 63 | 
 64 | if __name__ == "__main__":
 65 |     args = parser.parse_args()
 66 | 
 67 |     ray.init(num_cpus=args.num_cpus or None)
 68 | 
 69 |     config = (
 70 |         get_trainable_cls(args.run)
 71 |         .get_default_config()
 72 |         .environment("EndEffectorPositioningURSim-v0", env_config={"ip": "127.0.0.1", "gui": False, "ur_model": "ur5", "rs_state_to_info": False},)
 73 |         # Run with tracing enabled for tf2?
 74 |         .framework(args.framework)
 75 |         # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
 76 |         .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
 77 |     )
 78 | 
 79 |     stop = {
 80 |         "training_iteration": args.stop_iters,
 81 |         "timesteps_total": args.stop_timesteps,
 82 |         "episode_reward_mean": args.stop_reward,
 83 |     }
 84 | 
 85 |     print("Training policy until desired reward/timesteps/iterations. ...")
 86 |     tuner = tune.Tuner(
 87 |         args.run,
 88 |         param_space=config.to_dict(),
 89 |         run_config=air.RunConfig(
 90 |             stop=stop,
 91 |             verbose=2,
 92 |             checkpoint_config=air.CheckpointConfig(
 93 |                 checkpoint_frequency=1, checkpoint_at_end=True
 94 |             ),
 95 |         ),
 96 |     )
 97 |     results = tuner.fit()
 98 | 
 99 |     print("Training completed. Restoring new Algorithm for action inference.")
100 |     # Get the last checkpoint from the above training run.
101 |     checkpoint = results.get_best_result().checkpoint
102 |     # Create new Algorithm and restore its state from the last checkpoint.
103 |     algo = Algorithm.from_checkpoint(checkpoint)
104 | 
105 |     # Create the env to do inference in.
106 |     env = gym.make("EndEffectorPositioningURSim-v0", ip="127.0.0.1", gui=True, ur_model="ur5", rs_state_to_info=False)
107 |     obs, info = env.reset()
108 |     env.render()
109 | 
110 |     num_episodes = 0
111 |     episode_reward = 0.0
112 | 
113 |     while num_episodes < args.num_episodes_during_inference:
114 |         # Compute an action (`a`).
115 |         a = algo.compute_single_action(
116 |             observation=obs,
117 |             explore=args.explore_during_inference,
118 |             policy_id="default_policy",  # <- default value
119 |         )
120 |         # Send the computed action `a` to the env.
121 |         obs, reward, done, truncated, _ = env.step(a)
122 |         env.render()
123 |         episode_reward += reward
124 |         # Is the episode `done`? -> Reset.
125 |         if done:
126 |             print(f"Episode done: Total reward = {episode_reward}")
127 |             obs, info = env.reset()
128 |             env.render()
129 |             num_episodes += 1
130 |             episode_reward = 0.0
131 | 
132 |     algo.stop()
133 |     env.close()
134 |     ray.shutdown()
135 | 


--------------------------------------------------------------------------------
/docs/isaac_lab_compatibility.md:
--------------------------------------------------------------------------------
  1 | # Isaac Lab Compatibility
  2 | 
  3 | The environments of robo-gym include a few that are intended to provide compatibility with agents trained using [Isaac Lab](https://developer.nvidia.com/isaac/lab). The main purpose of this compatibility is to offer robo-gym as a way of transferring the trained policies from Isaac Lab to real robots, with robo-gym simulations as an intermediate step.
  4 | 
  5 | An overview of the robo-gym environments implemented for this feature is included in [Modular Environments](modular_environments.md).
  6 | 
  7 | ## Setup
  8 | 
  9 | ### robo-gym
 10 | 
 11 | #### Agent side
 12 | 
 13 | Install robo-gym version >=2.1.0 and its prerequisites, including robo-gym server modules version 0.3.0.1.
 14 | 
 15 | In addition, install Torch:
 16 | 
 17 | `pip install torch==2.5.1 torchvision==0.20.1 --index-url https://download.pytorch.org/whl/cu121`
 18 | 
 19 | #### Server side
 20 | 
 21 | Install robo-gym-robot-servers >=2.1.1 and its prerequisites, including robo-gym server modules version 0.3.0.1 and prerequisites for the robots you want to use in your ROS noetic Python environment.
 22 | 
 23 | ### Isaac Lab
 24 | 
 25 | Isaac Lab is not required to run our compatibility environments.
 26 | 
 27 | If you already have a policy that you want to use, you don't need to run Isaac Lab.
 28 | 
 29 | It is recommended to use a different Python environment for Isaac Lab and robo-gym (both sides), as their dependencies may be incompatible.
 30 | 
 31 | Install Isaac Lab as by their [instructions](https://isaac-sim.github.io/IsaacLab/main/source/setup/installation/index.html#local-installation). 
 32 | 
 33 | Tested with policies from this setup:
 34 | * Isaac Sim: 4.5.0, installed via pip
 35 | * Isaac Lab: 2.1.0/commit 5af5f388 (main as of 29 April 2025)
 36 | * Python: 3.10.11
 37 | * OS: Windows 10 Enterprise Build 19045
 38 | 
 39 | ## Training
 40 | 
 41 | Use the rsl_rl train script to perform training either on the Franka Reach or UR10 Reach tasks. The commands are executed from the Isaac Lab folder with the corresponding Python environment active.
 42 | 
 43 | See also the [overview of environments in Isaac Lab](https://isaac-sim.github.io/IsaacLab/main/source/overview/environments.html).
 44 | 
 45 | If not running on Windows, replace `isaaclab.bat` by the corresponding script file name for your system.
 46 | 
 47 | ### Franka Emika Panda
 48 | 
 49 | Default training:
 50 | 
 51 | `isaaclab.bat -p scripts/reinforcement_learning/rsl_rl/train.py --task=Isaac-Reach-Franka-v0 --headless`
 52 | 
 53 | Play the environment with the trained policy from the last run (needs to be started at least once to obtain exported policy file):
 54 | 
 55 | `isaaclab.bat -p scripts/reinforcement_learning/rsl_rl/play.py --task=Isaac-Reach-Franka-v0`
 56 | 
 57 | ### UR10 
 58 | 
 59 | Default training:
 60 | 
 61 | `isaaclab.bat -p scripts/reinforcement_learning/rsl_rl/train.py --task=Isaac-Reach-UR10-v0 --headless`
 62 | 
 63 | Play the environment with the trained policy from the last run (needs to be started at least once to obtain exported policy file):
 64 | 
 65 | `isaaclab.bat -p scripts/reinforcement_learning/rsl_rl/play.py --task=Isaac-Reach-UR10-v0`
 66 | 
 67 | ## Running
 68 | 
 69 | ### Server side
 70 | 
 71 | #### Simulation
 72 | 
 73 | In a commandline where you have your robo-gym server-side ROS workspace ready:
 74 | 
 75 | `start-server-manager && attach-to-server-manager`
 76 | 
 77 | #### Real robot
 78 | 
 79 | The procedure would be the same as for other environments for the corresponding robot. Due to limited progress in our own tests with real robots, we do not hand out instructions for this case right now.
 80 | 
 81 | ### Agent side
 82 | 
 83 | We use our [env test script](robo_envtest.py) for convenient testing and to show that the Isaac Lab Compatibility environments are to be used in the same way as our other environments.
 84 | 
 85 | Arguments that need adaptation for your setup:
 86 | * `-i`: server manager host
 87 | * `-p`: Path to the exported policy
 88 | 
 89 | The commands are executed from this folder with the robo-gym Python environment active.
 90 | 
 91 | #### UR10
 92 | 
 93 | Command to run the [env test script](robo_envtest.py):
 94 | 
 95 | `python robo-envtest.py -i localhost -g 1 -gz 1 -rv 0 -e IsaacReachURSim-v0 -u ur10 -et 500 -t 5000 -p D:\\PATH\\TO\\IsaacLab\\logs\\rsl_rl\\reach_ur10\\2025-04-30_09-36-28\\exported\\policy.pt`
 96 | 
 97 | Your console output should give you information about the progress while a few episodes are executed. During each episode, you should see the UR10 in Gazebo reach the target to a degree comparable to what you see when playing in Isaac Lab. Mind that the orientation of the target pose cannot be seen in Gazebo. The console output of the test script should give you a success indication at the end of each episode, with the distance in translation ("dist_coord") in m and the distance in rotation ("dist_rot") in rad. As by the default configuration of the environment, the target is located within a certain cuboid and rotated randomly around the vertical axis.
 98 | 
 99 | The environment identifier usable in the `gym.make` command is `IsaacReachURSim-v0`.
100 | 
101 | #### Panda
102 | 
103 | Command to run the [env test script](robo_envtest.py):
104 | 
105 | `python robo-envtest.py -i localhost -g 1 -gz 1 -rv 0 -e IsaacReachPandaSim-v0 -et 500 -t 5000 -p D:\\PATH\\TO\\IsaacLab\\logs\\rsl_rl\\franka_reach\\2025-05-16_17-53-15\\exported\\policy.pt`
106 | 
107 | Note that, while similar results should be reachable for the Panda as for the UR10 (granted that significantly longer training time may be required), we have not yet achieved satisfactory performance with controlling the Panda robot via this approach.
108 | 
109 | The environment identifier usable in the `gym.make` command is `IsaacReachPandaSim-v0`.
110 | 


--------------------------------------------------------------------------------
/docs/the_framework.md:
--------------------------------------------------------------------------------
 1 | # The robo-gym framework
 2 | 
 3 | The following information is extracted from: *M. Lucchi, F. Zindler, S. Mühlbacher-Karrer, H. Pichler, "robo-gym - An Open Source Toolkit for Distributed Deep Reinforcement Learning on Real and Simulated Robots"* Submitted to IROS 2020. Under review. 
 4 | 
 5 | ![alt text](https://user-images.githubusercontent.com/36470989/79330117-4498dc80-7f19-11ea-9de4-bed4f6390f3a.jpg "The robo-gym framework")
 6 | 
 7 | ## The Components
 8 | 
 9 | ### Real or Simulated Robot
10 | 
11 | This component includes the robot itself, the sensors, and the scene surrounding the robot.
12 | 
13 | The interface to the robots and the sensors is implemented in ROS for both real and simulated hardware. The interface of the simulated robots is generally corresponding to the one of the real robots augmented with features that would be impractical or too expensive to match in the real world. An example is virtual collision sensors that detect any kind of collision of a robot link.
14 | The simulated and the real robot must use the same controllers.
15 | 
16 | The simulated scenes are generated in Gazebo and are described using the SDFormat (SDF), an XML format. These can be created and manipulated in multiple ways: online, via API or GUI, and offline, by editing the SDF files.
17 | 
18 | ### Command Handler
19 | 
20 | Within the Markov Decision Process (MDP) framework, it is assumed that interactions between agent and environment take place at each of a series of discrete time steps.
21 | In a real-world system, however, time passes in a continuous manner.
22 | It is therefore necessary to make some adjustments to the real world system so that its behavior gets closer to the one defined by the MDP framework.
23 | The Command Handler (CH) implements these aspects.
24 | 
25 | The CH uses a queue with capacity for one command message.
26 | When the component receives a command message it tries to fill the queue with it.
27 | New elements get ignored until the element in the queue gets consumed.   
28 | The CH continuously publishes command messages to the robot at the frequency required by its controller.
29 | If, at the moment of publishing, the queue is full, the CH retrieves the command, publishes it to the robot for the selected number of times and after that it empties the queue.
30 | In the opposite case, the handler publishes the command corresponding to an interruption of the movement execution. This corresponds to either zero velocities for mobile robots or preemption of the trajectory execution for robot arms.
31 | 
32 | The framework's CH supports the standard *diff_drive_controller* and *joint_trajectory_controller* from  ROS controllers.
33 | This covers the most common types of robots; nevertheless, this component can be easily implemented for any other ROS controller.
34 | 
35 | ### Robot Server
36 | 
37 | It exposes a gRPC server that allows to interact with the robot through the integrated ROS bridge.
38 | 
39 | The first function of the server is to store updated information regarding the state of the robot, that can be queried at any time via a gRPC service call.
40 | The robot and the sensors constantly publish information via ROS.
41 | The ROS Bridge collects the information from the different sources and stores it in a buffer as an array of values.
42 | The robot and the sensors update their information with different frequencies.
43 | The buffer is managed with a threading mechanism to ensure that the data delivered to the client is consistent and containing the latest information.
44 | 
45 | The second function is to set the robot and the scene to a desired state. For instance, the user might want to set the joint positions of a robotic arm to a specific value when resetting the environment.
46 | 
47 | Lastly, it provides a service to publish commands to the CH.
48 | 
49 | ### Environment
50 | 
51 | This is the top element of the framework, which provides the standard OpenAI Gym interface to the agent.
52 | 
53 | The main function of the Environment component is to define the composition of the state, the initial conditions of an episode and the reward function.
54 | In addition, it includes a gRPC stub which connects to the Robot Server to send actions, and to set or get the state of the robot and the scene.
55 | 
56 | According to the framework provided by the Gym, environments are organized in classes, each constructed from a common base one. In addition, robo-gym extends this setup with a different wrapper for either a real or a simulated robot. These wrappers differentiate regarding the constructor that is being called.
57 | In the case of the simulated robot environment, the argument for the IP address refers to the Server Manager, whereas in the case of the real robot environment it refers to the IP address of the Robot Server.
58 | The Server Manager for simulated robots provides the address of the  Robot Server to which the Environment gRPC stub is then connected.
59 | On the other hand, in the case of the real robot environment, extra attention for the network configuration is needed to guarantee communication with the hardware. Furthermore, environment failures and eventual emergency stops must be inspected by an human operator.
60 | As a consequence, the Server Manager is currently not employed when using real robots and the Environment gRPC stub is connected directly to the Robot Server, which is started manually.
61 | 
62 | ### Server Manager
63 | 
64 | It is the orchestrator of the Robot Servers, it exposes gRPC services to spawn, kill, and check Robot Servers.
65 | When used with simulated robots it handles the robot simulations as well.
66 | 
67 | Each cluster of Robot Server, CH and real or simulated robot runs on an isolated ROS network.
68 | To achieve this, the Server Manager launches each cluster in an isolated shell environment handled with the help of tmux.
69 | 
70 | This component implements error handling features to automatically restart the Robot Server and the robot simulation in case of:
71 | - an error in the connection to the Robot Server
72 | - an exceeded deadline when calling a Robot Server service
73 | - a non responding simulation
74 | - data received from simulation out of defined bounds
75 | - a manual request of simulation restart
76 | 


--------------------------------------------------------------------------------
/tests/robo-gym/envs/ur/test_ur_avoidance_raad_fixed_spline.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import gymnasium as gym
  4 | import robo_gym
  5 | from robo_gym.utils import ur_utils
  6 | import math
  7 | import numpy as np
  8 | import pathlib
  9 | import json
 10 | 
 11 | import pytest
 12 | 
 13 | 
 14 | robo_gym_path = (
 15 |     pathlib.Path(__file__)
 16 |     .parent.parent.parent.parent.parent.absolute()
 17 |     .joinpath("robo_gym")
 18 | )
 19 | 
 20 | ur_models = [
 21 |     pytest.param("ur3", marks=pytest.mark.skip(reason="not implemented yet")),
 22 |     pytest.param("ur3e", marks=pytest.mark.skip(reason="not implemented yet")),
 23 |     pytest.param("ur5", marks=pytest.mark.commit),
 24 |     pytest.param("ur5e", marks=pytest.mark.skip(reason="not implemented yet")),
 25 |     pytest.param("ur10", marks=pytest.mark.skip(reason="not implemented yet")),
 26 |     pytest.param("ur10e", marks=pytest.mark.skip(reason="not implemented yet")),
 27 |     pytest.param("ur16e", marks=pytest.mark.skip(reason="not implemented yet")),
 28 | ]
 29 | 
 30 | 
 31 | @pytest.fixture(scope="module", params=ur_models)
 32 | def env(request):
 33 |     ip = os.environ.get("ROBOGYM_SERVERS_HOST", "robot-servers")
 34 |     env = gym.make("AvoidanceRaad2022TestURSim-v0", ip=ip, ur_model=request.param)
 35 |     env.request_param = request.param
 36 |     yield env
 37 |     env.kill_sim()
 38 | 
 39 | 
 40 | @pytest.mark.commit
 41 | def test_initialization(env):
 42 |     assert env.ur.model_key == env.request_param
 43 |     env.reset()
 44 |     done = False
 45 |     env.step([0, 0, 0, 0, 0])
 46 |     for _ in range(10):
 47 |         if not done:
 48 |             action = env.action_space.sample()
 49 |             observation, _, done, _, _ = env.step(action)
 50 | 
 51 |     assert env.observation_space.contains(observation)
 52 | 
 53 | 
 54 | @pytest.mark.skip(reason="This fails only in CI")
 55 | @pytest.mark.flaky(reruns=3)
 56 | def test_object_collision(env):
 57 |     params = {
 58 |         "ur5": {"object_coords": [-0.2, -0.1, 0.5], "n_steps": 250},
 59 |     }
 60 | 
 61 |     env.reset(
 62 |         options={"fixed_object_position": params[env.ur.model_key]["object_coords"]}
 63 |     )
 64 |     done = False
 65 |     i = 0
 66 |     while (not done) or i <= params[env.ur.model_key]["n_steps"]:
 67 |         _, _, done, info = env.step(np.zeros(5))
 68 |         i += 1
 69 |     assert info["final_status"] == "collision"
 70 | 
 71 | 
 72 | @pytest.mark.commit
 73 | def test_robot_trajectory(env):
 74 |     params = {
 75 |         "ur5": {
 76 |             "traj_relative_path": "envs/ur/robot_trajectories/trajectory_raad_2022.json"
 77 |         }
 78 |     }
 79 | 
 80 |     env.reset()
 81 |     # load trajectory
 82 |     traj_path = robo_gym_path.joinpath(params[env.ur.model_key]["traj_relative_path"])
 83 |     with open(traj_path) as json_file:
 84 |         trajectory = json.load(json_file)["trajectory"]
 85 | 
 86 |     action = np.zeros(5)
 87 |     for i in range(len(trajectory[0])):
 88 |         traj_joint_positions = trajectory[0][i]
 89 |         state, _, _, _ = env.step(action)
 90 |         ur_j_pos_norm = state[3:9]
 91 |         delta_joints = state[9:15]
 92 |         desired_joints = state[18:24]
 93 |         # check if robot follows the trajectory in all steps of trajectory segment 0
 94 |         assert np.isclose(
 95 |             env.ur.normalize_joint_values(traj_joint_positions), ur_j_pos_norm, atol=0.1
 96 |         ).all()
 97 |         # check if calculation of delta joints is correct
 98 |         assert np.isclose(
 99 |             (ur_j_pos_norm - desired_joints), delta_joints, atol=0.01
100 |         ).all()
101 |     # check that the current trajectory point is a target point
102 |     assert state[24] == 1.0
103 |     # check that state machine has transitioned to segment 1 of trajectory
104 |     traj_joint_positions = trajectory[1][0]
105 |     state, _, _, _, _ = env.step(action)
106 |     assert np.isclose(
107 |         env.ur.normalize_joint_values(traj_joint_positions), state[3:9], atol=0.1
108 |     ).all()
109 | 
110 | 
111 | test_ur_fixed_joints = [
112 |     (
113 |         "AvoidanceRaad2022TestURSim-v0",
114 |         False,
115 |         False,
116 |         False,
117 |         False,
118 |         False,
119 |         True,
120 |         "ur5",
121 |     ),  # fixed wrist_3
122 |     (
123 |         "AvoidanceRaad2022TestURSim-v0",
124 |         True,
125 |         False,
126 |         True,
127 |         False,
128 |         False,
129 |         False,
130 |         "ur5",
131 |     ),  # fixed Base and Elbow
132 | ]
133 | 
134 | 
135 | @pytest.mark.nightly
136 | @pytest.mark.parametrize(
137 |     "env_name, fix_base, fix_shoulder, fix_elbow, fix_wrist_1, fix_wrist_2, fix_wrist_3, ur_model",
138 |     test_ur_fixed_joints,
139 | )
140 | @pytest.mark.flaky(reruns=3)
141 | def test_fixed_joints(
142 |     env_name,
143 |     fix_base,
144 |     fix_shoulder,
145 |     fix_elbow,
146 |     fix_wrist_1,
147 |     fix_wrist_2,
148 |     fix_wrist_3,
149 |     ur_model,
150 | ):
151 |     env = gym.make(
152 |         env_name,
153 |         ip="robot-servers",
154 |         fix_base=fix_base,
155 |         fix_shoulder=fix_shoulder,
156 |         fix_elbow=fix_elbow,
157 |         fix_wrist_1=fix_wrist_1,
158 |         fix_wrist_2=fix_wrist_2,
159 |         fix_wrist_3=fix_wrist_3,
160 |         ur_model=ur_model,
161 |     )
162 |     state = env.reset()
163 |     initial_joint_positions = state[3:9]
164 |     # Take 20 actions
165 |     action = env.action_space.sample()
166 |     for _ in range(20):
167 |         state, _, _, _, _ = env.step(action)
168 |     joint_positions = state[3:9]
169 | 
170 |     if fix_base:
171 |         assert math.isclose(
172 |             initial_joint_positions[0], joint_positions[0], abs_tol=0.05
173 |         )
174 |     if fix_shoulder:
175 |         assert math.isclose(
176 |             initial_joint_positions[1], joint_positions[1], abs_tol=0.05
177 |         )
178 |     if fix_elbow:
179 |         assert math.isclose(
180 |             initial_joint_positions[2], joint_positions[2], abs_tol=0.05
181 |         )
182 |     if fix_wrist_1:
183 |         assert math.isclose(
184 |             initial_joint_positions[3], joint_positions[3], abs_tol=0.05
185 |         )
186 |     if fix_wrist_2:
187 |         assert math.isclose(
188 |             initial_joint_positions[4], joint_positions[4], abs_tol=0.05
189 |         )
190 |     if fix_wrist_3:
191 |         assert math.isclose(
192 |             initial_joint_positions[5], joint_positions[5], abs_tol=0.05
193 |         )
194 | 
195 |     env.kill_sim()
196 | 


--------------------------------------------------------------------------------
/tests/robo-gym/utils/test_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import math
  3 | 
  4 | import pytest
  5 | from robo_gym.utils import utils
  6 | import numpy as np
  7 | 
  8 | 
  9 | ### normalize_angle_rad ###
 10 | test_normalize_angle_rad = [
 11 |     (7.123, 0.8398),
 12 |     (0.8398, 0.8398),
 13 |     (-2.47, -2.47),
 14 |     (-47.47, 2.795),
 15 | ]
 16 | 
 17 | 
 18 | @pytest.mark.parametrize("a, expected_a", test_normalize_angle_rad)
 19 | def test_normalize_angle_rad(a, expected_a):
 20 |     normalized_a = utils.normalize_angle_rad(a)
 21 |     assert abs(expected_a - normalized_a) < 0.01
 22 | 
 23 | 
 24 | ### point_inside_circle ###
 25 | test_inside_circle = [
 26 |     (5.47, 3.51, 6, 8, 5.23, True),
 27 |     (5.11, 2.23, 6, 8, 5.23, False),
 28 |     (5.69, 2.78, 6, 8, 5.23, True),
 29 | ]
 30 | 
 31 | 
 32 | @pytest.mark.parametrize(
 33 |     "x, y, center_x, center_y, radius, expected_result", test_inside_circle
 34 | )
 35 | def test_point_inside_circle(x, y, center_x, center_y, radius, expected_result):
 36 |     is_inside = utils.point_inside_circle(x, y, center_x, center_y, radius)
 37 |     assert is_inside == expected_result
 38 | 
 39 | 
 40 | ### rotate_point ###
 41 | test_rotate_points = [
 42 |     (3.13, 5.83, 0.785398, -1.91, 6.34),
 43 |     (4, 3, -2.14675498, 0.34, -4.99),
 44 |     (1, 3, -0.0, 1, 3),
 45 | ]
 46 | 
 47 | 
 48 | @pytest.mark.parametrize("x, y, theta, expected_x, expected_y", test_rotate_points)
 49 | def test_rotate_point(x, y, theta, expected_x, expected_y):
 50 |     new_x, new_y = utils.rotate_point(x, y, theta)
 51 | 
 52 |     print(new_x, new_y)
 53 | 
 54 |     assert abs(new_x - expected_x) < 0.01
 55 |     assert abs(new_y - expected_y) < 0.01
 56 | 
 57 | 
 58 | ### cartesian_to_polar_2d ###
 59 | def test_cartesian_to_polar_2d_default_origin():
 60 |     target_x = 3
 61 |     target_y = 4
 62 | 
 63 |     polar_r, polar_theta = utils.cartesian_to_polar_2d(
 64 |         x_target=target_x, y_target=target_y
 65 |     )
 66 | 
 67 |     assert abs(polar_r - 5) < 0.01
 68 |     assert abs(polar_theta - 0.927) < 0.01
 69 | 
 70 | 
 71 | def test_cartesian_to_polar_2d_set_origin():
 72 |     target_x = 3
 73 |     target_y = 4
 74 |     origin_x = 1
 75 |     origin_y = 1
 76 | 
 77 |     polar_r, polar_theta = utils.cartesian_to_polar_2d(
 78 |         x_target=target_x, y_target=target_y, x_origin=origin_x, y_origin=origin_y
 79 |     )
 80 | 
 81 |     assert abs(polar_r - 3.61) < 0.01
 82 |     assert abs(polar_theta - 0.9827949) < 0.01
 83 | 
 84 | 
 85 | test_equal_points = [
 86 |     ([0, 0], [0, 0], 0, 0),
 87 |     ([1, 1], [1, 1], 0, 0),
 88 |     ([3.141, 3.141], [3.141, 3.141], 0, 0),
 89 | ]
 90 | 
 91 | 
 92 | @pytest.mark.parametrize("p1, p2, expected_r, expected_theta", test_equal_points)
 93 | def test_cartesian_to_polar_2d_equal_points(p1, p2, expected_r, expected_theta):
 94 | 
 95 |     polar_r, polar_theta = utils.cartesian_to_polar_2d(
 96 |         x_target=p1[0], y_target=p1[1], x_origin=p2[0], y_origin=p2[1]
 97 |     )
 98 | 
 99 |     assert abs(polar_r - expected_r) < 0.001
100 |     assert abs(polar_theta - expected_theta) < 0.001
101 | 
102 | 
103 | ### cartesian_to_polar_3d ###
104 | def test_cartesian_to_polar_3d():
105 |     cartesian_coordinates = [9, 4, 5]
106 | 
107 |     r, theta, phi = utils.cartesian_to_polar_3d(
108 |         cartesian_coordinates=cartesian_coordinates
109 |     )
110 | 
111 |     assert abs(r - 11.045361017187) < 0.01
112 |     assert abs(phi - 0.41822432957) < 0.01
113 |     assert abs(theta - 1.1010291108682) < 0.01
114 | 
115 | 
116 | ### downsample_list_to_len ###
117 | test_downsample = [
118 |     2,
119 |     4,
120 |     5,
121 |     6,
122 |     7,
123 |     8,
124 |     9,
125 |     10,
126 |     13,
127 |     15,
128 |     24,
129 |     51,
130 |     100,
131 |     101,
132 |     200,
133 |     201,
134 |     349,
135 |     501,
136 | ]
137 | 
138 | 
139 | @pytest.mark.parametrize("target_length", test_downsample)
140 | def test_downsample_list_to_len(target_length):
141 |     input_length = 1000
142 |     sample_list = [i for i in range(input_length)]
143 | 
144 |     downsampled_list = utils.downsample_list_to_len(
145 |         data=sample_list, output_len=target_length
146 |     )
147 | 
148 |     assert len(downsampled_list) == target_length
149 | 
150 | 
151 | ### change_reference_frame ###
152 | def test_translation_change_reference_frame():
153 | 
154 |     point = [5, 3, 2]
155 |     translation = [-11, 6, -1]
156 |     quaternion = [0, 0, 0, 1]
157 | 
158 |     assert (
159 |         utils.change_reference_frame(point, translation, quaternion) == [-6, 9, 1]
160 |     ).all()
161 | 
162 | 
163 | def test_rotation_change_reference_frame():
164 | 
165 |     point = [-0.250, 0.256, 1.118]
166 |     translation = [0.0, 0.0, -0.227]
167 |     quaternion = [0.0, 0.0, 1.0, 0.0]
168 | 
169 |     assert np.allclose(
170 |         a=utils.change_reference_frame(point, translation, quaternion),
171 |         b=[0.250, -0.256, 0.890],
172 |         atol=0.001,
173 |     )
174 | 
175 | 
176 | def test_quat_from_rpy():
177 |     roll = 0.25
178 |     pitch = 0.5
179 |     yaw = 0.75
180 | 
181 |     quat_scipy = utils.quat_from_euler_xyz(roll, pitch, yaw)
182 |     quat_isaac = utils.quat_from_euler_xyz_isaac(roll, pitch, yaw)
183 |     assert np.allclose(a=quat_scipy, b=quat_isaac, atol=0.000001)
184 | 
185 | 
186 | def test_quat_from_rpy_isaac_panda():
187 |     roll = 0.0
188 |     pitch = math.pi
189 |     for yaw in [-1.2807, -1.5201, 0.419, -0.6453, 2.2513]:
190 | 
191 |         quat_scipy = utils.quat_from_euler_xyz(roll, pitch, yaw)
192 |         quat_isaac = utils.quat_from_euler_xyz_isaac(roll, pitch, yaw)
193 |         assert np.allclose(a=quat_scipy, b=quat_isaac, atol=0.0001)
194 | 
195 | 
196 | def test_rot_diff():
197 |     r1 = utils.quat_from_euler_xyz(0.0, 0.0, 0.0)
198 |     r2 = utils.quat_from_euler_xyz(0.0, 0.2, 0.0)
199 |     r3 = utils.quat_from_euler_xyz(0.0, 0.5, 0.0)
200 | 
201 |     diff_1_2 = utils.rotation_error_magnitude(r1, r2)
202 |     diff_1_3 = utils.rotation_error_magnitude(r1, r3)
203 |     diff_2_3 = utils.rotation_error_magnitude(r2, r3)
204 | 
205 |     diff_3_3 = utils.rotation_error_magnitude(r3, r3)
206 | 
207 |     assert math.isclose(diff_3_3, 0, abs_tol=0.000001)
208 | 
209 |     assert diff_1_2 < diff_1_3
210 |     assert diff_1_2 < diff_2_3
211 |     assert diff_2_3 < diff_1_3
212 | 
213 | 
214 | def test_rot_inv():
215 |     q1 = np.array([0.693359, -0.138306, 0.693359, 0.138306])
216 |     q1_inv = utils.quat_inv(q1)
217 |     q_mult = utils.quat_mul(q1, q1_inv)
218 |     assert math.isclose(q_mult[0], 0, abs_tol=0.000001)
219 |     assert math.isclose(q_mult[1], 0, abs_tol=0.000001)
220 |     assert math.isclose(q_mult[2], 0, abs_tol=0.000001)
221 |     assert math.isclose(q_mult[3], 1, abs_tol=0.000001)
222 | 


--------------------------------------------------------------------------------
/robo_gym/wrappers/env_wrappers/ur_ee_positioning_training.py:
--------------------------------------------------------------------------------
  1 | import gymnasium as gym
  2 | import numpy as np
  3 | from typing import Tuple
  4 | 
  5 | 
  6 | class EndEffectorPositioningURTrainingCurriculum(gym.Wrapper):
  7 |     def __init__(self, env, print_reward=False):
  8 |         super().__init__(env)
  9 |         self.env = env
 10 | 
 11 |         # use counter as metric for level up
 12 |         self.episode_counter = 0
 13 | 
 14 |         self.reward_composition = {}
 15 |         self.print_reward = print_reward
 16 | 
 17 | 
 18 |     def reset(self, **kwargs):
 19 |         if self.episode_counter % 5 == 0:
 20 |             state = self.env.reset(randomize_start=True)
 21 |         else:
 22 |             state = self.env.reset(continue_on_success=True)
 23 | 
 24 |         self.reward_composition = { 'goal_reached_weight': 0,
 25 |                                     'collision_weight': 0,
 26 |                                     'distance_weight': 0,
 27 |                                     'smoothness_weight': 0,
 28 |                                     'action_magnitude_weight': 0,
 29 |                                     'velocity_magnitude_weight': 0}
 30 | 
 31 |         return state
 32 | 
 33 |     def step(self, action):
 34 |         self.previous_action = self.env.previous_action
 35 |         next_state, _, _, _ = self.env.step(action)
 36 | 
 37 |         action = self.env.add_fixed_joints(action)
 38 |         reward, done, info = self.reward(rs_state=self.env.rs_state, action=action)
 39 | 
 40 |         if done:
 41 |             self.episode_counter += 1
 42 |         
 43 |         if done and self.print_reward:
 44 |             print(f'Episode counter: {self.episode_counter}   Current level: {self.get_level()}')
 45 |             print(self.reward_composition)
 46 | 
 47 |         return next_state, reward, done, info
 48 | 
 49 |     def get_level(self):
 50 |         level_thresholds = [75, 250, 500, 1000, 1500, 2500]
 51 |         
 52 |         for i in range(len(level_thresholds)):
 53 |             if self.episode_counter < level_thresholds[i]:
 54 |                 return i+1
 55 | 
 56 |         return len(level_thresholds) + 1
 57 | 
 58 |     def get_weights(self, level):
 59 |         # weights
 60 | 
 61 |         # reward for reaching the goal position
 62 |         g_w = 2
 63 |         # reward for collision (ground, table or self)
 64 |         c_w = -1
 65 |         # punishment according to the distance to the goal
 66 |         d_w = -0.005
 67 |         # punishment delta in two consecutive actions
 68 |         s_w = -0.0002
 69 |         # punishment for acting in general
 70 |         a_w = -0.0001
 71 |         # punishment for deltas in velocity
 72 |         v_w = -0.0002
 73 | 
 74 |         if level == 1:
 75 |             s_w = s_w * 0
 76 |             a_w = a_w * 0
 77 |             v_w = v_w * 0
 78 |             self.min_distance = 0.15
 79 |         if level == 2:
 80 |             d_w = d_w * 0
 81 |             self.min_distance = 0.15
 82 |         if level == 3:
 83 |             d_w = d_w * 0
 84 |             s_w = s_w * 5
 85 |             a_w = a_w * 5
 86 |             v_w = v_w * 5
 87 |             self.min_distance = 0.15
 88 |         if level == 4:
 89 |             d_w = d_w * 0
 90 |             s_w = s_w * 10
 91 |             a_w = a_w * 10
 92 |             v_w = v_w * 10
 93 |             self.min_distance = 0.1
 94 |         if level == 5:
 95 |             d_w = d_w * 0
 96 |             s_w = s_w * 15
 97 |             a_w = a_w * 15
 98 |             v_w = v_w * 15
 99 |             self.min_distance = 0.05
100 |         if level == 6:
101 |             d_w = d_w * 0
102 |             s_w = s_w * 20
103 |             a_w = a_w * 20
104 |             v_w = v_w * 20
105 |             self.min_distance = 0.05
106 |         if level == 7:
107 |             d_w = d_w * 0
108 |             s_w = s_w * 25
109 |             a_w = a_w * 25
110 |             v_w = v_w * 25
111 |             self.min_distance = 0.01
112 | 
113 |         return g_w, c_w, d_w, s_w, a_w, v_w
114 | 
115 | 
116 |     def reward(self, rs_state, action) -> Tuple[float, bool, dict]:
117 |         env_state = self.env._robot_server_state_to_env_state(rs_state)
118 |         reward = 0
119 |         done = False
120 |         info = {}
121 | 
122 |         level = self.get_level()
123 |         g_w, c_w, d_w, s_w, a_w, v_w = self.get_weights(level)
124 | 
125 |         
126 |         # Calculate distance to the target
127 |         target_coord = np.array([rs_state['object_0_to_ref_translation_x'], rs_state['object_0_to_ref_translation_y'], rs_state['object_0_to_ref_translation_z']])
128 |         ee_coord = np.array([rs_state['ee_to_ref_translation_x'], rs_state['ee_to_ref_translation_y'], rs_state['ee_to_ref_translation_z']])
129 |         euclidean_dist_3d = np.linalg.norm(target_coord - ee_coord)
130 |         
131 |         joint_velocities = np.array([rs_state['base_joint_velocity'], rs_state['shoulder_joint_velocity'], rs_state['elbow_joint_velocity'], rs_state['wrist_1_joint_velocity'], rs_state['wrist_2_joint_velocity'], rs_state['wrist_3_joint_velocity']]) 
132 | 
133 |         previous_action = self.previous_action
134 |         
135 | 
136 |         # distance weight
137 |         x = d_w * euclidean_dist_3d
138 |         reward += x
139 |         self.reward_composition['distance_weight'] += x
140 |         # smoothness 
141 |         x = s_w * np.linalg.norm(action - previous_action)**2
142 |         reward += x
143 |         self.reward_composition['smoothness_weight'] += x
144 |         # action magnitude
145 |         x = a_w * np.linalg.norm(action)**2
146 |         reward += x
147 |         self.reward_composition['action_magnitude_weight'] += x
148 |         # velocity magnitude 
149 |         x = v_w * np.linalg.norm(joint_velocities)**2
150 |         reward += x
151 |         self.reward_composition['velocity_magnitude_weight'] += x
152 | 
153 | 
154 |         if euclidean_dist_3d <= self.min_distance:
155 |             # goal reached
156 |             x = g_w * 1
157 |             reward = x
158 |             self.reward_composition['goal_reached_weight'] += x
159 | 
160 |             done = True
161 |             info['final_status'] = 'success'
162 |             info['target_coord'] = target_coord
163 |             
164 |         if rs_state['in_collision']: 
165 |             # punishment for collision
166 |             x = c_w * 1
167 |             reward = x
168 |             self.reward_composition['collision_weight'] += x
169 | 
170 |             done = True
171 |             info['final_status'] = 'collision'
172 |             info['target_coord'] = target_coord
173 | 
174 |         if self.elapsed_steps >= self.max_episode_steps:
175 |             done = True
176 |             info['final_status'] = 'max_steps_exceeded'
177 |             info['target_coord'] = target_coord
178 |         
179 |         return reward, done, info
180 | 


--------------------------------------------------------------------------------
/tests/robo-gym/envs/ur/test_ur_base_env.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import gymnasium as gym
  4 | import robo_gym
  5 | import math
  6 | import numpy as np
  7 | import pytest
  8 | 
  9 | ur_models = [
 10 |     pytest.param("ur3", marks=pytest.mark.nightly),
 11 |     pytest.param("ur3e", marks=pytest.mark.nightly),
 12 |     pytest.param("ur5", marks=pytest.mark.commit),
 13 |     pytest.param("ur5e", marks=pytest.mark.nightly),
 14 |     pytest.param("ur10", marks=pytest.mark.nightly),
 15 |     pytest.param("ur10e", marks=pytest.mark.nightly),
 16 |     pytest.param("ur16e", marks=pytest.mark.nightly),
 17 | ]
 18 | 
 19 | 
 20 | @pytest.fixture(scope="module", params=ur_models)
 21 | def env(request):
 22 |     ip = os.environ.get("ROBOGYM_SERVERS_HOST", "robot-servers")
 23 |     env = gym.make(
 24 |         "EmptyEnvironmentURSim-v0", ip=ip, ur_model=request.param, fix_wrist_3=True
 25 |     )
 26 |     env.request_param = request.param
 27 |     yield env
 28 |     env.kill_sim()
 29 | 
 30 | 
 31 | @pytest.mark.commit
 32 | def test_initialization(env):
 33 |     assert env.ur.model_key == env.request_param
 34 |     env.reset()
 35 |     done = False
 36 |     env.step([0, 0, 0, 0, 0])
 37 |     for _ in range(10):
 38 |         if not done:
 39 |             action = env.action_space.sample()
 40 |             observation, _, done, _, _ = env.step(action)
 41 | 
 42 |     assert env.observation_space.contains(observation)
 43 | 
 44 | 
 45 | @pytest.mark.commit
 46 | @pytest.mark.flaky(reruns=3)
 47 | def test_self_collision(env):
 48 |     collision_joint_config = {
 49 |         "ur3": [0.0, 0.0, -3.14, -1.77, 1.0],
 50 |         "ur3e": [0.0, -1.88, 2.8, -0.75, -1.88],
 51 |         "ur5": [0.0, -1.26, -3.14, 0.0, 0.0],
 52 |         "ur5e": [0.0, -0.50, -3.14, 3.14, 0.0],
 53 |         "ur10": [0.0, -1.5, 3.14, 0.0, 0.0],
 54 |         "ur10e": [0.0, -0.15, -2.83, -2.51, 1.63],
 55 |         "ur16e": [0.0, -1.15, 2.9, -0.19, 0.42],
 56 |     }
 57 |     env.reset()
 58 |     action = env.ur.normalize_joint_values(collision_joint_config[env.ur.model_key])
 59 |     done = False
 60 |     while not done:
 61 |         _, _, done, _, info = env.step(action)
 62 |     assert info["final_status"] == "collision"
 63 | 
 64 | 
 65 | @pytest.mark.commit
 66 | @pytest.mark.flaky(reruns=3)
 67 | def test_collision_with_ground(env):
 68 |     collision_joint_config = {
 69 |         "ur3": [0.0, 2.64, -1.95, -2.98, 0.41],
 70 |         "ur3e": [1.13, 1.88, -2.19, -3.43, 2.43],
 71 |         "ur5": [0.0, 1.0, 1.8, 0.0, 0.0],
 72 |         "ur5e": [0.0, 3.52, -2.58, 0.0, 0.0],
 73 |         "ur10": [0.0, 1.0, 1.15, 0.0, 0.0],
 74 |         "ur10e": [-2.14, -0.13, 0.63, -1.13, 1.63],
 75 |         "ur16e": [0.0, -0.15, 1.32, 0.0, 1.63],
 76 |     }
 77 |     env.reset()
 78 |     action = env.ur.normalize_joint_values(collision_joint_config[env.ur.model_key])
 79 |     done = False
 80 |     while not done:
 81 |         _, _, done, _, info = env.step(action)
 82 |     assert info["final_status"] == "collision"
 83 | 
 84 | 
 85 | @pytest.mark.commit
 86 | def test_reset_joint_positions(env):
 87 |     joint_positions = [0.2, -2.5, 1.1, -2.0, -1.2, 1.2]
 88 | 
 89 |     state = env.reset(options={"joint_positions": joint_positions})
 90 |     assert np.isclose(
 91 |         env.ur.normalize_joint_values(joint_positions), state[0:6], atol=0.1
 92 |     ).all()
 93 | 
 94 | 
 95 | test_ur_fixed_joints = [
 96 |     (
 97 |         "EmptyEnvironmentURSim-v0",
 98 |         True,
 99 |         False,
100 |         False,
101 |         False,
102 |         False,
103 |         False,
104 |         "ur3",
105 |     ),  # fixed shoulder_pan
106 |     (
107 |         "EmptyEnvironmentURSim-v0",
108 |         False,
109 |         True,
110 |         False,
111 |         False,
112 |         False,
113 |         False,
114 |         "ur3e",
115 |     ),  # fixed shoulder_lift
116 |     (
117 |         "EmptyEnvironmentURSim-v0",
118 |         False,
119 |         False,
120 |         False,
121 |         False,
122 |         False,
123 |         True,
124 |         "ur5",
125 |     ),  # fixed wrist_3
126 |     (
127 |         "EmptyEnvironmentURSim-v0",
128 |         True,
129 |         False,
130 |         True,
131 |         False,
132 |         False,
133 |         False,
134 |         "ur5e",
135 |     ),  # fixed Base and Elbow
136 |     (
137 |         "EmptyEnvironmentURSim-v0",
138 |         False,
139 |         False,
140 |         True,
141 |         False,
142 |         False,
143 |         False,
144 |         "ur10",
145 |     ),  # fixed elbow
146 |     (
147 |         "EmptyEnvironmentURSim-v0",
148 |         False,
149 |         False,
150 |         False,
151 |         True,
152 |         False,
153 |         False,
154 |         "ur10e",
155 |     ),  # fixed wrist_1
156 |     (
157 |         "EmptyEnvironmentURSim-v0",
158 |         False,
159 |         False,
160 |         False,
161 |         False,
162 |         True,
163 |         False,
164 |         "ur16e",
165 |     ),  # fixed wrist_2
166 | ]
167 | 
168 | 
169 | @pytest.mark.nightly
170 | @pytest.mark.parametrize(
171 |     "env_name, fix_base, fix_shoulder, fix_elbow, fix_wrist_1, fix_wrist_2, fix_wrist_3, ur_model",
172 |     test_ur_fixed_joints,
173 | )
174 | @pytest.mark.flaky(reruns=3)
175 | def test_fixed_joints(
176 |     env_name,
177 |     fix_base,
178 |     fix_shoulder,
179 |     fix_elbow,
180 |     fix_wrist_1,
181 |     fix_wrist_2,
182 |     fix_wrist_3,
183 |     ur_model,
184 | ):
185 |     env = gym.make(
186 |         env_name,
187 |         ip="robot-servers",
188 |         fix_base=fix_base,
189 |         fix_shoulder=fix_shoulder,
190 |         fix_elbow=fix_elbow,
191 |         fix_wrist_1=fix_wrist_1,
192 |         fix_wrist_2=fix_wrist_2,
193 |         fix_wrist_3=fix_wrist_3,
194 |         ur_model=ur_model,
195 |     )
196 |     state = env.reset()
197 |     initial_joint_positions = state[0:6]
198 |     # Take 20 actions
199 |     action = env.action_space.sample()
200 |     for _ in range(20):
201 |         state, _, _, _, _ = env.step(action)
202 |     joint_positions = state[0:6]
203 | 
204 |     if fix_base:
205 |         assert math.isclose(
206 |             initial_joint_positions[0], joint_positions[0], abs_tol=0.05
207 |         )
208 |     if fix_shoulder:
209 |         assert math.isclose(
210 |             initial_joint_positions[1], joint_positions[1], abs_tol=0.05
211 |         )
212 |     if fix_elbow:
213 |         assert math.isclose(
214 |             initial_joint_positions[2], joint_positions[2], abs_tol=0.05
215 |         )
216 |     if fix_wrist_1:
217 |         assert math.isclose(
218 |             initial_joint_positions[3], joint_positions[3], abs_tol=0.05
219 |         )
220 |     if fix_wrist_2:
221 |         assert math.isclose(
222 |             initial_joint_positions[4], joint_positions[4], abs_tol=0.05
223 |         )
224 |     if fix_wrist_3:
225 |         assert math.isclose(
226 |             initial_joint_positions[5], joint_positions[5], abs_tol=0.05
227 |         )
228 | 
229 |     env.kill_sim()
230 | 


--------------------------------------------------------------------------------
/robo_gym/envs/simulation_wrapper.py:
--------------------------------------------------------------------------------
  1 | import robo_gym_server_modules.server_manager.client as sm_client
  2 | import robo_gym_server_modules.robot_server.client as rs_client
  3 | import traceback
  4 | 
  5 | class Simulation:
  6 |     """Simulation Wrapper Class - can be used to add simulation capability to a robo-gym environment.
  7 | 
  8 |     Attributes:
  9 |     -----------
 10 |     verbose: bool
 11 |         A class variable that controls whether certain methods of this class log in a verbose manner, with the purpose of tracking the management of connections to simulation robot servers.
 12 |     del_try_async_kill: bool
 13 |         A class variable that controls whether the __del__ method should try to kill a remaining associated simulation in an asynchronous fashion.
 14 |     _instances_count: int
 15 |         A class variable that counts the instances constructed so far, used for assigning a name for verbose output
 16 |     """
 17 |     _instances_count = 0
 18 |     verbose = False
 19 |     del_try_async_kill = True
 20 | 
 21 |     def __init__(self, cmd, ip=None, lower_bound_port=None, upper_bound_port=None, gui=False, **kwargs):
 22 |         """ Initializes the simulation-specific aspects of a RoboGym en.
 23 | 
 24 |         Args:
 25 |             cmd (str): roslaunch command to execute to start the simulated Robot Server.
 26 |             ip (str): IP address of the machine hosting the Server Manager. Defaults to None.
 27 |             lower_bound_port (str): Lower bound of Server Manager port range. Defaults to None.
 28 |             upper_bound_port (str): Upper bound of Server Manager port range. Defaults to None.
 29 |             gui (bool): If True the simulation is started with GUI. Defaults to False.
 30 |         """
 31 |         self.robot_server_ip = None
 32 |         self.cmd = cmd
 33 |         self.gui = gui
 34 |         self.connections_count = 0
 35 |         self.expect_sim_running = False
 36 |         self.verbose = Simulation.verbose
 37 |         self.instance_id = str(chr(ord('A') + Simulation._instances_count))
 38 |         Simulation._instances_count += 1
 39 |         if ip:
 40 |             if lower_bound_port and upper_bound_port:
 41 |                 self.sm_client = sm_client.Client(ip,lower_bound_port,upper_bound_port)
 42 | 
 43 |             else:
 44 |                 self.sm_client = sm_client.Client(ip)
 45 |             self._start_sim()
 46 | 
 47 |     def get_instance_string(self):
 48 |         if self.robot_server_ip:
 49 |             return self.instance_id + "/" + self.robot_server_ip
 50 |         return self.instance_id
 51 | 
 52 |     def print(self, msg):
 53 |         if(self.verbose):
 54 |             print("Simulation " + self.get_instance_string() + ": " + str(msg))
 55 | 
 56 |     def _start_sim(self):
 57 |         """Start a new simulated Robot Server.
 58 |         """
 59 |         self.print("_start_sim: starting, stack trace:")
 60 |         if(self.verbose):
 61 |             traceback.print_stack()
 62 |         self.robot_server_ip = self.sm_client.start_new_server(cmd = self.cmd, gui = self.gui)
 63 |         self.connections_count += 1
 64 |         self.expect_sim_running = True
 65 |         self.print("_start_sim: end")
 66 | 
 67 | 
 68 |     def kill_sim(self):
 69 |         """Kill the simulated Robot Server.
 70 |         """
 71 |         if not self.connections_count:
 72 |             return
 73 |         if not self.expect_sim_running:
 74 |             return
 75 |         assert self.sm_client.kill_server(self.robot_server_ip)
 76 |         self.expect_sim_running = False
 77 | 
 78 |     def restart_sim(self):
 79 |         """Restart the simulated Robot Server.
 80 |         """
 81 | 
 82 |         if self.expect_sim_running:
 83 |             self.kill_sim()
 84 |         self._start_sim()
 85 |         
 86 |         # usually initialized in each environment's constructor
 87 |         self.client = rs_client.Client(self.robot_server_ip)
 88 | 
 89 |     def close(self):
 90 |         self.print("close start")
 91 |         try:
 92 |             if not self.expect_sim_running:
 93 |                 self.print("close returning early: I don't expect my simulation to be running!")
 94 |                 return
 95 |             else:
 96 |                 self.print("close: I expect my simulation to be running.")                
 97 |             if not self.connections_count:
 98 |                 # should be unreachable
 99 |                 self.print("close returning early: I have never been connected!")
100 |                 return
101 |             else:
102 |                 self.print("close: I have been connected " + str(self.connections_count) + " time(s)")
103 |             if True: # hasattr(self, "sm_client") and self.sm_client is not None:
104 |                 self.print("close calling self.kill_sim")
105 |                 self.kill_sim()
106 |                 self.print("close finished self.kill_sim")
107 |         finally:
108 |             self.print("close end")
109 | 
110 |     def __del__(self):
111 |         # If we reach this point and the simulation is still running, we want to kill it.
112 |         # But in some situations normal kill_sim won't work here (because of the state we're in).
113 |         # We can try to kill outside of the context of the client object if server modules are upgraded to provide the kill_server_async.
114 |         # The flag Simulation.del_try_async_kill can be set to False to skip trying.
115 |         self.print("__del__ start")
116 |         try:
117 |             try:
118 |                 self.print("__del__ trying self.kill_sim")
119 |                 self.kill_sim()
120 |                 self.print("__del__ self.kill_sim returned")
121 |             except Exception as e:
122 |                 self.print("__del__ self.kill_sim threw " + str(e))
123 |             if hasattr(self, "expect_sim_running"):
124 |                 if not self.expect_sim_running:
125 |                     self.print("__del__ returning early: I don't expect my simulation to be running!")
126 |                     return
127 |                 else:
128 |                     self.print("__del__: I expect my simulation to be running.")  
129 |             else:
130 |                 self.print("__del__: I do not know if I expect my simulation to be running!")
131 |             if hasattr(self, "connections_count"):
132 |                 if not self.connections_count:
133 |                     self.print("__del__ returning early: I have never been connected!")
134 |                     return
135 |                 else:
136 |                     self.print("__del__: I have been connected " + str(self.connections_count) + " times")
137 |             else:
138 |                 self.print("__del__: I do not know my connections count!")
139 |             if Simulation.del_try_async_kill and hasattr(self, "sm_client") and hasattr(sm_client.Client, 'kill_server_async') and callable(getattr(sm_client.Client, 'kill_server_async') and self.sm_client is not None):
140 |                 self.print("__del__ calling self.sm_client.kill_server_async")
141 |                 self.sm_client.kill_server_async(self.robot_server_ip)
142 |                 self.print("__del__ finished self.sm_client.kill_server_async")
143 |         finally:
144 |             self.print("__del__ end")


--------------------------------------------------------------------------------
/tests/robo-gym/envs/ur/test_ur_avoidance_raad_training.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import gymnasium as gym
  4 | import robo_gym
  5 | import math
  6 | import numpy as np
  7 | import pathlib
  8 | import json
  9 | import pytest
 10 | 
 11 | 
 12 | robo_gym_path = (
 13 |     pathlib.Path(__file__)
 14 |     .parent.parent.parent.parent.parent.absolute()
 15 |     .joinpath("robo_gym")
 16 | )
 17 | 
 18 | ur_models = [
 19 |     pytest.param("ur3", marks=pytest.mark.skip(reason="not implemented yet")),
 20 |     pytest.param("ur3e", marks=pytest.mark.skip(reason="not implemented yet")),
 21 |     pytest.param("ur5", marks=pytest.mark.commit),
 22 |     pytest.param("ur5e", marks=pytest.mark.skip(reason="not implemented yet")),
 23 |     pytest.param("ur10", marks=pytest.mark.skip(reason="not implemented yet")),
 24 |     pytest.param("ur10e", marks=pytest.mark.skip(reason="not implemented yet")),
 25 |     pytest.param("ur16e", marks=pytest.mark.skip(reason="not implemented yet")),
 26 | ]
 27 | 
 28 | 
 29 | @pytest.fixture(scope="module", params=ur_models)
 30 | def env(request):
 31 |     ip = os.environ.get("ROBOGYM_SERVERS_HOST", "robot-servers")
 32 |     env = gym.make("AvoidanceRaad2022URSim-v0", ip=ip, ur_model=request.param)
 33 |     env.request_param = request.param
 34 |     yield env
 35 |     env.kill_sim()
 36 | 
 37 | 
 38 | @pytest.mark.commit
 39 | def test_initialization(env):
 40 |     assert env.ur.model_key == env.request_param
 41 |     env.reset()
 42 |     done = False
 43 |     env.step([0, 0, 0, 0, 0])
 44 |     for _ in range(10):
 45 |         if not done:
 46 |             action = env.action_space.sample()
 47 |             observation, _, done, _, _ = env.step(action)
 48 | 
 49 |     assert env.observation_space.contains(observation)
 50 | 
 51 | 
 52 | @pytest.mark.skip(reason="This fails only in CI")
 53 | @pytest.mark.flaky(reruns=3)
 54 | def test_object_collision(env):
 55 |     params = {
 56 |         "ur5": {"object_coords": [-0.2, -0.1, 0.5], "n_steps": 250},
 57 |     }
 58 | 
 59 |     env.reset(
 60 |         options={"fixed_object_position": params[env.ur.model_key]["object_coords"]}
 61 |     )
 62 |     done = False
 63 |     i = 0
 64 |     while (not done) or i <= params[env.ur.model_key]["n_steps"]:
 65 |         _, _, done, _, info = env.step(np.zeros(5))
 66 |         i += 1
 67 |     assert info["final_status"] == "collision"
 68 | 
 69 | 
 70 | @pytest.mark.commit
 71 | def test_object_coordinates(env):
 72 |     params = {
 73 |         "ur5": {
 74 |             "object_coords": [0.3, 0.1, 1.0, 0.0, 0.0, 0.0],
 75 |             "polar_coords_ee": {"r": 0.980, "theta": 2.488, "phi": 1.246},
 76 |             "polar_coords_forearm": {"r": 0.607, "theta": 1.023, "phi": -0.825},
 77 |         }
 78 |     }
 79 | 
 80 |     state = env.reset(
 81 |         options={"fixed_object_position": params[env.ur.model_key]["object_coords"]}
 82 |     )
 83 |     assert np.isclose(
 84 |         [
 85 |             params[env.ur.model_key]["polar_coords_ee"]["r"],
 86 |             params[env.ur.model_key]["polar_coords_ee"]["theta"],
 87 |             params[env.ur.model_key]["polar_coords_ee"]["phi"],
 88 |         ],
 89 |         state[0:3],
 90 |         atol=0.1,
 91 |     ).all()
 92 |     assert np.isclose(
 93 |         [
 94 |             params[env.ur.model_key]["polar_coords_forearm"]["r"],
 95 |             params[env.ur.model_key]["polar_coords_forearm"]["theta"],
 96 |             params[env.ur.model_key]["polar_coords_forearm"]["phi"],
 97 |         ],
 98 |         state[15:18],
 99 |         atol=0.1,
100 |     ).all()
101 | 
102 | 
103 | @pytest.mark.commit
104 | def test_robot_trajectory(env):
105 |     params = {
106 |         "ur5": {
107 |             "traj_relative_path": "envs/ur/robot_trajectories/trajectory_raad_2022.json"
108 |         }
109 |     }
110 | 
111 |     env.reset()
112 |     # load trajectory
113 |     traj_path = robo_gym_path.joinpath(params[env.ur.model_key]["traj_relative_path"])
114 |     with open(traj_path) as json_file:
115 |         trajectory = json.load(json_file)["trajectory"]
116 | 
117 |     action = np.zeros(5)
118 |     for i in range(len(trajectory[0])):
119 |         traj_joint_positions = trajectory[0][i]
120 |         state, _, _, _, _ = env.step(action)
121 |         ur_j_pos_norm = state[3:9]
122 |         delta_joints = state[9:15]
123 |         desired_joints = state[18:24]
124 |         # check if robot follows the trajectory in all steps of trajectory segment 0
125 |         assert np.isclose(
126 |             env.ur.normalize_joint_values(traj_joint_positions), ur_j_pos_norm, atol=0.1
127 |         ).all()
128 |         # check if calculation of delta joints is correct
129 |         assert np.isclose(
130 |             (ur_j_pos_norm - desired_joints), delta_joints, atol=0.01
131 |         ).all()
132 |     # check that the current trajectory point is a target point
133 |     assert state[24] == 1.0
134 |     # check that state machine has transitioned to segment 1 of trajectory
135 |     traj_joint_positions = trajectory[1][0]
136 |     state, _, _, _, _ = env.step(action)
137 |     assert np.isclose(
138 |         env.ur.normalize_joint_values(traj_joint_positions), state[3:9], atol=0.1
139 |     ).all()
140 | 
141 | 
142 | test_ur_fixed_joints = [
143 |     (
144 |         "AvoidanceRaad2022URSim-v0",
145 |         False,
146 |         False,
147 |         False,
148 |         False,
149 |         False,
150 |         True,
151 |         "ur5",
152 |     ),  # fixed wrist_3
153 |     (
154 |         "AvoidanceRaad2022URSim-v0",
155 |         True,
156 |         False,
157 |         True,
158 |         False,
159 |         False,
160 |         False,
161 |         "ur5",
162 |     ),  # fixed Base and Elbow
163 | ]
164 | 
165 | 
166 | @pytest.mark.nightly
167 | @pytest.mark.parametrize(
168 |     "env_name, fix_base, fix_shoulder, fix_elbow, fix_wrist_1, fix_wrist_2, fix_wrist_3, ur_model",
169 |     test_ur_fixed_joints,
170 | )
171 | @pytest.mark.flaky(reruns=3)
172 | def test_fixed_joints(
173 |     env_name,
174 |     fix_base,
175 |     fix_shoulder,
176 |     fix_elbow,
177 |     fix_wrist_1,
178 |     fix_wrist_2,
179 |     fix_wrist_3,
180 |     ur_model,
181 | ):
182 |     env = gym.make(
183 |         env_name,
184 |         ip="robot-servers",
185 |         fix_base=fix_base,
186 |         fix_shoulder=fix_shoulder,
187 |         fix_elbow=fix_elbow,
188 |         fix_wrist_1=fix_wrist_1,
189 |         fix_wrist_2=fix_wrist_2,
190 |         fix_wrist_3=fix_wrist_3,
191 |         ur_model=ur_model,
192 |     )
193 |     state = env.reset()
194 |     initial_joint_positions = state[3:9]
195 |     # Take 20 actions
196 |     action = env.action_space.sample()
197 |     for _ in range(20):
198 |         state, _, _, _, _ = env.step(action)
199 |     joint_positions = state[3:9]
200 | 
201 |     if fix_base:
202 |         assert math.isclose(
203 |             initial_joint_positions[0], joint_positions[0], abs_tol=0.05
204 |         )
205 |     if fix_shoulder:
206 |         assert math.isclose(
207 |             initial_joint_positions[1], joint_positions[1], abs_tol=0.05
208 |         )
209 |     if fix_elbow:
210 |         assert math.isclose(
211 |             initial_joint_positions[2], joint_positions[2], abs_tol=0.05
212 |         )
213 |     if fix_wrist_1:
214 |         assert math.isclose(
215 |             initial_joint_positions[3], joint_positions[3], abs_tol=0.05
216 |         )
217 |     if fix_wrist_2:
218 |         assert math.isclose(
219 |             initial_joint_positions[4], joint_positions[4], abs_tol=0.05
220 |         )
221 |     if fix_wrist_3:
222 |         assert math.isclose(
223 |             initial_joint_positions[5], joint_positions[5], abs_tol=0.05
224 |         )
225 | 
226 |     env.kill_sim()
227 | 


--------------------------------------------------------------------------------
/robo_gym/envs/manipulator/isaac_reach.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from robo_gym.envs.base.robogym_env import *
  4 | from robo_gym.envs.manipulator.ee_pos_base import *
  5 | from robo_gym.envs.manipulator.manipulator_base import *
  6 | 
  7 | 
  8 | class IsaacReachEnv(ManipulatorEePosEnv):
  9 |     """
 10 |     Base class for compatibility environments for agents trained with IsaacLab reach tasks.
 11 |     Environment actions and observations use the spaces from the corresponding IsaacLab environments.
 12 |     On the back-end, a UR robot server as for our End Effector Positioning environments is used.
 13 |     The environment nodes are responsible for achieving compatibility between that robot server and the spaces as used in IsaacLab.
 14 |     """
 15 | 
 16 |     KW_ISAAC_SCALE = "isaac_scale"
 17 | 
 18 |     # Add an NDArray to add static joints in the env observation that we don't get in the robot server state but we want in the env observation.
 19 |     # 2-dimensional: for each joint: 3 floats: initial joint positions, min, max position. Velocity is always 0.
 20 |     KW_ISAAC_OBS_EXTRA_STATIC_JOINTS = "isaac_extra_static_joints"
 21 | 
 22 |     def __init__(self, **kwargs):
 23 |         # not too nice - repeated in super init
 24 |         self._config = kwargs
 25 | 
 26 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_ROTATION_MATTERS, True)
 27 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_ROTATION_THRESHOLD, 0.05)
 28 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_DISTANCE_THRESHOLD, 0.02)
 29 | 
 30 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_TARGET_VOLUME_BOUNDING_BOX, True)
 31 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_POSITION_X_RANGE, [0.35, 0.65])
 32 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_POSITION_Y_RANGE, [-0.2, 0.2])
 33 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_POSITION_Z_RANGE, [0.15, 0.5])
 34 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_ROTATION_ROLL_RANGE, 0)
 35 | 
 36 |         # as in IsaacLab reach envs, pi only to the first two fractional digits
 37 |         kwargs.setdefault(ManipulatorEePosEnv.KW_EE_ROTATION_YAW_RANGE, [-3.14, 3.14])
 38 | 
 39 |         kwargs.setdefault(IsaacReachEnv.KW_ISAAC_SCALE, 0.5)
 40 | 
 41 |         # env nodes
 42 |         action_node: ActionNode | None = kwargs.get(RoboGymEnv.KW_ACTION_NODE)
 43 |         if not action_node:
 44 |             action_node = IsaacReachActionNode(**self.get_action_node_setup_kwargs())
 45 |         kwargs[RoboGymEnv.KW_ACTION_NODE] = action_node
 46 | 
 47 |         super().__init__(**kwargs)
 48 |         # robot model joint positions were set from the joint_positions kw by super().__init__
 49 |         self.default_joint_positions = self._robot_model.joint_positions
 50 | 
 51 |     def create_main_observation_node(self, node_index: int = 0, **kwargs):
 52 |         return IsaacReachObservationNode(**self.get_obs_node_setup_kwargs(node_index))
 53 | 
 54 | 
 55 | class IsaacReachObservationNode(ManipulatorEePosObservationNode):
 56 | 
 57 |     def __init__(self, **kwargs):
 58 |         super().__init__(**kwargs)
 59 |         self.default_joint_positions = np.array(
 60 |             self._config.get(ManipulatorBaseEnv.KW_JOINT_POSITIONS)
 61 |         )
 62 |         self.obs_extra_static_joints = self._config.get(
 63 |             IsaacReachEnv.KW_ISAAC_OBS_EXTRA_STATIC_JOINTS
 64 |         )
 65 | 
 66 |     def get_observation_space_part(self) -> gym.spaces.Box:
 67 |         num_joints = len(self._robot_model.joint_names)
 68 | 
 69 |         # joint positions
 70 |         tolerance_abs = self._robot_model.denormalize_joint_ranges(
 71 |             np.array([self.joint_position_tolerance_normalized] * num_joints)
 72 |         )
 73 |         max_joint_positions = (
 74 |             self._robot_model.get_max_joint_positions()
 75 |             + tolerance_abs
 76 |             - self.default_joint_positions
 77 |         )
 78 |         min_joint_positions = (
 79 |             self._robot_model.get_min_joint_positions()
 80 |             - tolerance_abs
 81 |             - self.default_joint_positions
 82 |         )
 83 | 
 84 |         # velocities
 85 |         max_joint_velocities = np.array([np.inf] * num_joints)
 86 |         min_joint_velocities = -np.array([np.inf] * num_joints)
 87 | 
 88 |         if self.obs_extra_static_joints is not None:
 89 |             for joint_values in self.obs_extra_static_joints:
 90 |                 min_joint_positions = np.concatenate(
 91 |                     (min_joint_positions, [joint_values[1]]), dtype=np.float32
 92 |                 )
 93 |                 max_joint_positions = np.concatenate(
 94 |                     (max_joint_positions, [joint_values[2]]), dtype=np.float32
 95 |                 )
 96 |                 min_joint_velocities = np.concatenate(
 97 |                     (min_joint_velocities, [-np.inf]), dtype=np.float32
 98 |                 )
 99 |                 max_joint_velocities = np.concatenate(
100 |                     (max_joint_velocities, [np.inf]), dtype=np.float32
101 |                 )
102 | 
103 |         # ee target
104 |         ee_target_max = np.concatenate((np.array([np.inf] * 3), np.array([1.0] * 4)))
105 |         ee_target_min = -1 * ee_target_max
106 | 
107 |         obs_space_max = np.concatenate(
108 |             (max_joint_positions, max_joint_velocities, ee_target_max)
109 |         ).astype(np.float32)
110 |         obs_space_min = np.concatenate(
111 |             (min_joint_positions, min_joint_velocities, ee_target_min)
112 |         ).astype(np.float32)
113 |         obs_space = gym.spaces.Box(
114 |             high=obs_space_max, low=obs_space_min, dtype=np.float32
115 |         )
116 |         return obs_space
117 | 
118 |     def rs_state_to_observation_part(
119 |         self, rs_state_array: NDArray, rs_state_dict: dict[str, float], **kwargs
120 |     ) -> NDArray:
121 |         # previous action is added by a separate LastActionObservationNode
122 |         joint_positions = (
123 |             self.extract_joint_positions_from_rs_state_dict(rs_state_dict)
124 |             - self.default_joint_positions
125 |         )
126 |         joint_velocities = self.extract_joint_velocities_from_rs_state_dict(
127 |             rs_state_dict
128 |         )
129 |         if self.obs_extra_static_joints is not None:
130 |             for joint_values in self.obs_extra_static_joints:
131 |                 # position 0 - it stays in initial pose
132 |                 joint_positions = np.concatenate(
133 |                     (joint_positions, [0]), dtype=np.float32
134 |                 )
135 |                 # velocity 0 - it does not move
136 |                 joint_velocities = np.concatenate(
137 |                     (joint_velocities, [0]), dtype=np.float32
138 |                 )
139 | 
140 |         command = utils.pose_quat_wxyz_from_xyzw(np.array(self.get_target_pose()))
141 |         obs = np.concatenate(
142 |             (joint_positions, joint_velocities, command), dtype=np.float32
143 |         )
144 | 
145 |         return obs
146 | 
147 | 
148 | class IsaacReachActionNode(ManipulatorActionNode):
149 | 
150 |     def __init__(self, **kwargs):
151 |         self.scale = kwargs.get(IsaacReachEnv.KW_ISAAC_SCALE, 0.5)
152 |         super().__init__(**kwargs)
153 |         self.default_joint_positions = np.array(
154 |             self._config.get(ManipulatorBaseEnv.KW_JOINT_POSITIONS)
155 |         )
156 | 
157 |     def env_action_to_rs_action(self, env_action: NDArray, **kwargs) -> NDArray:
158 |         rs_action = self.default_joint_positions + env_action * self.scale
159 |         rs_action = self.robot_model.reorder_joints_for_rs(rs_action).astype(np.float32)
160 |         return rs_action
161 | 
162 |     def get_action_space(self) -> gym.spaces.Box:
163 |         max_joint_positions = self._robot_model.get_max_joint_positions()
164 |         min_joint_positions = self._robot_model.get_min_joint_positions()
165 | 
166 |         action_max = (
167 |             (max_joint_positions - self.default_joint_positions) / self.scale
168 |         ).astype(np.float32)
169 |         action_min = (
170 |             (min_joint_positions - self.default_joint_positions) / self.scale
171 |         ).astype(np.float32)
172 | 
173 |         action_space = gym.spaces.Box(high=action_max, low=action_min, dtype=np.float32)
174 |         return action_space
175 | 


--------------------------------------------------------------------------------
/robo_gym/envs/example/example_env.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | from __future__ import annotations
  3 | import numpy as np
  4 | import gymnasium as gym
  5 | from typing import Tuple, Any
  6 | from robo_gym.utils.exceptions import InvalidStateError, RobotServerError, InvalidActionError
  7 | import robo_gym_server_modules.robot_server.client as rs_client
  8 | from robo_gym_server_modules.robot_server.grpc_msgs.python import robot_server_pb2
  9 | from robo_gym.envs.simulation_wrapper import Simulation
 10 | 
 11 | 
 12 | class ExampleEnv(gym.Env):
 13 |     """Example environment.
 14 | 
 15 |     Args:
 16 |         rs_address (str): Robot Server address. Formatted as 'ip:port'. Defaults to None.
 17 |   
 18 |     Attributes:
 19 | 
 20 |         client (:obj:str): Robot Server client.
 21 |         real_robot (bool): True if the environment is controlling a real robot.
 22 | 
 23 |     """
 24 |     real_robot = False
 25 |     max_episode_steps = 300
 26 | 
 27 |     def __init__(self, rs_address=None, rs_state_to_info=True, **kwargs):
 28 |         self.elapsed_steps = 0
 29 | 
 30 |         self.rs_state_to_info = rs_state_to_info
 31 | 
 32 |         self.observation_space = self._get_observation_space()
 33 |         self.action_space = self._get_action_space()
 34 | 
 35 |         self.rs_state = None
 36 | 
 37 |         # Connect to Robot Server
 38 |         if rs_address:
 39 |             self.client = rs_client.Client(rs_address)
 40 |         else:
 41 |             print("WARNING: No IP and Port passed. Simulation will not be started")
 42 |             print("WARNING: Use this only to get environment shape")
 43 | 
 44 |     def _set_initial_robot_server_state(self, rs_state) -> robot_server_pb2.State:
 45 |         string_params = {}
 46 |         float_params = {}
 47 |         state = {}
 48 | 
 49 |         state_msg = robot_server_pb2.State(state=state, float_params=float_params,
 50 |                                            string_params=string_params, state_dict=rs_state)
 51 |         return state_msg
 52 | 
 53 |     def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[np.ndarray, dict[str, Any]]:
 54 |         """Environment reset.
 55 | 
 56 |         options:
 57 |             position (list[2] or np.array[2]): [x,y] initial robot position.
 58 |            
 59 |         Returns:
 60 |             np.array: Environment state.
 61 |             dict: info
 62 | 
 63 |         """
 64 |         super().reset(seed=seed)
 65 |         if options is None:
 66 |             options = {}
 67 |         position = options["position"] if "position" in options else None
 68 | 
 69 |         # Set Robot starting position
 70 |         if position:
 71 |             assert len(position) == 2
 72 |         else:
 73 |             position = [0, 0]
 74 | 
 75 |         self.elapsed_steps = 0
 76 | 
 77 |         # Initialize environment state
 78 |         state_len = self.observation_space.shape[0]
 79 |         state = np.zeros(state_len)
 80 |         rs_state = dict.fromkeys(self.get_robot_server_composition(), 0.0)
 81 | 
 82 |         # Fill rs_state
 83 |         rs_state['pos_x'] = position[0]
 84 |         rs_state['pos_y'] = position[1]
 85 | 
 86 |         # Set initial state of the Robot Server
 87 |         state_msg = self._set_initial_robot_server_state(rs_state)
 88 | 
 89 |         if not self.client.set_state_msg(state_msg):
 90 |             raise RobotServerError("set_state")
 91 | 
 92 |         # Get Robot Server state
 93 |         rs_state = self.client.get_state_msg().state_dict
 94 | 
 95 |         # Check if the length and keys of the Robot Server state received is correct
 96 |         self._check_rs_state_keys(rs_state)
 97 | 
 98 |         # Convert the initial state from Robot Server format to environment format
 99 |         state = self._robot_server_state_to_env_state(rs_state)
100 | 
101 |         # Check if the environment state is contained in the observation space
102 |         if not self.observation_space.contains(state):
103 |             raise InvalidStateError()
104 | 
105 |         self.rs_state = rs_state
106 | 
107 |         return state, {}
108 | 
109 |     def reward(self, rs_state, action) -> Tuple[float, bool, dict]:
110 |         done = False
111 |         info = {}
112 | 
113 |         if self.elapsed_steps >= self.max_episode_steps:
114 |             done = True
115 |             info['final_status'] = 'max_steps_exceeded'
116 | 
117 |         return 0, done, info
118 | 
119 |     def step(self, action) -> Tuple[np.array, float, bool, bool, dict]:
120 |         if type(action) == list: action = np.array(action)
121 | 
122 |         self.elapsed_steps += 1
123 | 
124 |         # Check if the action is contained in the action space
125 |         if not self.action_space.contains(action):
126 |             raise InvalidActionError()
127 | 
128 |         # Send action to Robot Server and get state
129 |         rs_state = self.client.send_action_get_state(action.tolist()).state_dict
130 |         self._check_rs_state_keys(rs_state)
131 | 
132 |         # Convert the state from Robot Server format to environment format
133 |         state = self._robot_server_state_to_env_state(rs_state)
134 | 
135 |         # Check if the environment state is contained in the observation space
136 |         if not self.observation_space.contains(state):
137 |             raise InvalidStateError()
138 | 
139 |         self.rs_state = rs_state
140 | 
141 |         # Assign reward
142 |         reward = 0
143 |         done = False
144 |         reward, done, info = self.reward(rs_state=rs_state, action=action)
145 |         if self.rs_state_to_info: info['rs_state'] = self.rs_state
146 | 
147 |         return state, reward, done, False, info
148 | 
149 |     def get_rs_state(self):
150 |         return self.rs_state
151 | 
152 |     def render(self):
153 |         pass
154 | 
155 |     def get_robot_server_composition(self) -> list:
156 |         rs_state_keys = [
157 |             'pos_x',
158 |             'pos_y',
159 |             'lin_vel',
160 |             'ang_vel',
161 |         ]
162 |         return rs_state_keys
163 | 
164 |     def _get_robot_server_state_len(self) -> int:
165 |         """Get length of the Robot Server state.
166 | 
167 |         Describes the composition of the Robot Server state and returns
168 |         its length.
169 |         """
170 |         return len(self.get_robot_server_composition())
171 | 
172 |     def _check_rs_state_keys(self, rs_state) -> None:
173 |         keys = self.get_robot_server_composition()
174 |         if not len(keys) == len(rs_state.keys()):
175 |             raise InvalidStateError("Robot Server state keys to not match. Different lengths.")
176 | 
177 |         for key in keys:
178 |             if key not in rs_state.keys():
179 |                 raise InvalidStateError("Robot Server state keys to not match")
180 | 
181 |     def _robot_server_state_to_env_state(self, rs_state) -> np.ndarray:
182 |         """Transform state from Robot Server to environment format.
183 | 
184 |         Args:
185 |             rs_state (list): State in Robot Server format.
186 | 
187 |         Returns:
188 |             numpy.array: State in environment format.
189 | 
190 |         """
191 | 
192 |         pos_x = rs_state['pos_x']
193 |         pos_y = rs_state['pos_y']
194 |         lin_vel = rs_state['lin_vel']
195 |         ang_vel = rs_state['ang_vel']
196 | 
197 |         # Compose environment state
198 |         state = np.array([pos_x, pos_y, lin_vel, ang_vel])
199 | 
200 |         return state
201 | 
202 |     def _get_observation_space(self) -> gym.spaces.Box:
203 |         """Get environment observation space.
204 | 
205 |         Returns:
206 |             gym.spaces: Gym observation space object.
207 | 
208 |         """
209 | 
210 |         # Definition of environment observation_space
211 |         max_obs = np.array([np.inf] * 4)
212 |         min_obs = -np.array([np.inf] * 4)
213 | 
214 |         return gym.spaces.Box(low=min_obs, high=max_obs, dtype=np.float32)
215 | 
216 |     def _get_action_space(self) -> gym.spaces.Box:
217 |         """Get environment action space.
218 | 
219 |         Returns:
220 |             gym.spaces: Gym action space object.
221 | 
222 |         """
223 | 
224 |         return gym.spaces.Box(low=np.full(2, -1.0), high=np.full(2, 1.0), dtype=np.float32)
225 | 
226 | 
227 | class ExampleEnvSim(ExampleEnv, Simulation):
228 |     cmd = "roslaunch example_robot_server robot_server.launch"
229 | 
230 |     def __init__(self, ip=None, lower_bound_port=None, upper_bound_port=None, gui=False, **kwargs):
231 |         Simulation.__init__(self, self.cmd, ip, lower_bound_port, upper_bound_port, gui, **kwargs)
232 |         ExampleEnv.__init__(self, rs_address=self.robot_server_ip, **kwargs)
233 | 
234 | 
235 | class ExampleEnvRob(ExampleEnv):
236 |     real_robot = True
237 | 
238 | # roslaunch example_robot_server robot_server.launch  gui:=true real_robot:=true
239 | 


--------------------------------------------------------------------------------
/tests/robo-gym/envs/ur/test_ur_avoidance_basic.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import gymnasium as gym
  4 | import robo_gym
  5 | import math
  6 | import numpy as np
  7 | import pytest
  8 | 
  9 | ur_models = [
 10 |     pytest.param("ur3", marks=pytest.mark.nightly),
 11 |     pytest.param("ur3e", marks=pytest.mark.nightly),
 12 |     pytest.param("ur5", marks=pytest.mark.commit),
 13 |     pytest.param("ur5e", marks=pytest.mark.nightly),
 14 |     pytest.param("ur10", marks=pytest.mark.nightly),
 15 |     pytest.param("ur10e", marks=pytest.mark.nightly),
 16 |     pytest.param("ur16e", marks=pytest.mark.nightly),
 17 | ]
 18 | 
 19 | 
 20 | @pytest.fixture(scope="module", params=ur_models)
 21 | def env(request):
 22 |     ip = os.environ.get("ROBOGYM_SERVERS_HOST", "robot-servers")
 23 |     env = gym.make("BasicAvoidanceURSim-v0", ip=ip, ur_model=request.param)
 24 |     env.request_param = request.param
 25 |     yield env
 26 |     env.kill_sim()
 27 | 
 28 | 
 29 | @pytest.mark.commit
 30 | def test_initialization(env):
 31 |     assert env.ur.model_key == env.request_param
 32 |     env.reset()
 33 |     done = False
 34 |     env.step([0, 0, 0, 0, 0])
 35 |     for _ in range(10):
 36 |         if not done:
 37 |             action = env.action_space.sample()
 38 |             observation, _, done, _, _ = env.step(action)
 39 | 
 40 |     assert env.observation_space.contains(observation)
 41 | 
 42 | 
 43 | @pytest.mark.skip(reason="This fails only in CI")
 44 | @pytest.mark.flaky(reruns=3)
 45 | def test_object_collision(env):
 46 |     params = {
 47 |         "ur5": {
 48 |             "joint_positions": [0.0, -1.57, 1.57, -1.57, 0.0, 0.0],
 49 |             "object_coords": [0.2, -0.1, 0.52],
 50 |             "action": [-1, 0, 0, 0, 0],
 51 |             "n_steps": 30,
 52 |         },
 53 |     }
 54 | 
 55 |     env.reset(
 56 |         options={
 57 |             "desired_joint_positions": params[env.ur.model_key]["joint_positions"],
 58 |             "fixed_object_position": params[env.ur.model_key]["object_coords"],
 59 |         }
 60 |     )
 61 |     done = False
 62 |     i = 0
 63 |     while (not done) or i <= params[env.ur.model_key]["n_steps"]:
 64 |         _, _, done, _, info = env.step(params[env.ur.model_key]["action"])
 65 |         i += 1
 66 |     assert info["final_status"] == "collision"
 67 | 
 68 | 
 69 | @pytest.mark.nightly
 70 | def test_reset_joint_positions(env):
 71 |     joint_positions = [0.5, -2.7, 1.3, -1.7, -1.9, 1.6]
 72 | 
 73 |     state = env.reset(options={"joint_positions": joint_positions})
 74 |     assert np.isclose(
 75 |         env.ur.normalize_joint_values(joint_positions), state[3:9], atol=0.1
 76 |     ).all()
 77 | 
 78 | 
 79 | @pytest.mark.commit
 80 | def test_object_coordinates(env):
 81 | 
 82 |     params = {
 83 |         # ? robot up-right, target_coord_in_ee_frame 0.0, -0.3, 0.2, coordinates of target calculated using official dimensions from DH parameters.
 84 |         # ? first value is d4+d6
 85 |         # ? second value is: d1+a2+a3+d5
 86 |         "ur3": {
 87 |             "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0],
 88 |             "object_coords": [0.0, (0.194 + 0.2), (0.692 + 0.3), 0.0, 0.0, 0.0],
 89 |             "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571},
 90 |         },
 91 |         "ur3e": {
 92 |             "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0],
 93 |             "object_coords": [0.0, (0.223 + 0.2), (0.694 + 0.3), 0.0, 0.0, 0.0],
 94 |             "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571},
 95 |         },
 96 |         "ur5": {
 97 |             "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0],
 98 |             "object_coords": [0.0, (0.191 + 0.2), (1.001 + 0.3), 0.0, 0.0, 0.0],
 99 |             "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571},
100 |         },
101 |         "ur5e": {
102 |             "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0],
103 |             "object_coords": [0.0, (0.233 + 0.2), (1.079 + 0.3), 0.0, 0.0, 0.0],
104 |             "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571},
105 |         },
106 |         "ur10": {
107 |             "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0],
108 |             "object_coords": [0.0, (0.256 + 0.2), (1.428 + 0.3), 0.0, 0.0, 0.0],
109 |             "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571},
110 |         },
111 |         "ur10e": {
112 |             "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0],
113 |             "object_coords": [0.0, (0.291 + 0.2), (1.485 + 0.3), 0.0, 0.0, 0.0],
114 |             "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571},
115 |         },
116 |         "ur16e": {
117 |             "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0],
118 |             "object_coords": [0.0, (0.291 + 0.2), (1.139 + 0.3), 0.0, 0.0, 0.0],
119 |             "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571},
120 |         },
121 |     }
122 | 
123 |     state = env.reset(
124 |         options={
125 |             "joint_positions": params[env.ur.model_key]["joint_positions"],
126 |             "fixed_object_position": params[env.ur.model_key]["object_coords"],
127 |         }
128 |     )
129 |     assert np.isclose(
130 |         [params[env.ur.model_key]["polar_coords"]["r"]], state[0], atol=0.05
131 |     ).all()
132 |     assert np.isclose(
133 |         [
134 |             params[env.ur.model_key]["polar_coords"]["theta"],
135 |             params[env.ur.model_key]["polar_coords"]["phi"],
136 |         ],
137 |         state[1:3],
138 |         atol=0.2,
139 |     ).all()
140 | 
141 | 
142 | test_ur_fixed_joints = [
143 |     (
144 |         "BasicAvoidanceURSim-v0",
145 |         True,
146 |         False,
147 |         False,
148 |         False,
149 |         False,
150 |         False,
151 |         "ur3",
152 |     ),  # fixed shoulder_pan
153 |     (
154 |         "BasicAvoidanceURSim-v0",
155 |         False,
156 |         True,
157 |         False,
158 |         False,
159 |         False,
160 |         False,
161 |         "ur3e",
162 |     ),  # fixed shoulder_lift
163 |     (
164 |         "BasicAvoidanceURSim-v0",
165 |         False,
166 |         False,
167 |         False,
168 |         False,
169 |         False,
170 |         True,
171 |         "ur5",
172 |     ),  # fixed wrist_3
173 |     (
174 |         "BasicAvoidanceURSim-v0",
175 |         True,
176 |         False,
177 |         True,
178 |         False,
179 |         False,
180 |         False,
181 |         "ur5e",
182 |     ),  # fixed Base and Elbow
183 |     (
184 |         "BasicAvoidanceURSim-v0",
185 |         False,
186 |         False,
187 |         True,
188 |         False,
189 |         False,
190 |         False,
191 |         "ur10",
192 |     ),  # fixed elbow
193 |     (
194 |         "BasicAvoidanceURSim-v0",
195 |         False,
196 |         False,
197 |         False,
198 |         True,
199 |         False,
200 |         False,
201 |         "ur10e",
202 |     ),  # fixed wrist_1
203 |     (
204 |         "BasicAvoidanceURSim-v0",
205 |         False,
206 |         False,
207 |         False,
208 |         False,
209 |         True,
210 |         False,
211 |         "ur16e",
212 |     ),  # fixed wrist_2
213 | ]
214 | 
215 | 
216 | @pytest.mark.nightly
217 | @pytest.mark.parametrize(
218 |     "env_name, fix_base, fix_shoulder, fix_elbow, fix_wrist_1, fix_wrist_2, fix_wrist_3, ur_model",
219 |     test_ur_fixed_joints,
220 | )
221 | @pytest.mark.flaky(reruns=3)
222 | def test_fixed_joints(
223 |     env_name,
224 |     fix_base,
225 |     fix_shoulder,
226 |     fix_elbow,
227 |     fix_wrist_1,
228 |     fix_wrist_2,
229 |     fix_wrist_3,
230 |     ur_model,
231 | ):
232 |     env = gym.make(
233 |         env_name,
234 |         ip="robot-servers",
235 |         fix_base=fix_base,
236 |         fix_shoulder=fix_shoulder,
237 |         fix_elbow=fix_elbow,
238 |         fix_wrist_1=fix_wrist_1,
239 |         fix_wrist_2=fix_wrist_2,
240 |         fix_wrist_3=fix_wrist_3,
241 |         ur_model=ur_model,
242 |     )
243 |     state = env.reset()
244 |     initial_joint_positions = state[3:9]
245 |     # Take 20 actions
246 |     action = env.action_space.sample()
247 |     for _ in range(20):
248 |         state, _, _, _, _ = env.step(action)
249 |     joint_positions = state[3:9]
250 | 
251 |     if fix_base:
252 |         assert math.isclose(
253 |             initial_joint_positions[0], joint_positions[0], abs_tol=0.05
254 |         )
255 |     if fix_shoulder:
256 |         assert math.isclose(
257 |             initial_joint_positions[1], joint_positions[1], abs_tol=0.05
258 |         )
259 |     if fix_elbow:
260 |         assert math.isclose(
261 |             initial_joint_positions[2], joint_positions[2], abs_tol=0.05
262 |         )
263 |     if fix_wrist_1:
264 |         assert math.isclose(
265 |             initial_joint_positions[3], joint_positions[3], abs_tol=0.05
266 |         )
267 |     if fix_wrist_2:
268 |         assert math.isclose(
269 |             initial_joint_positions[4], joint_positions[4], abs_tol=0.05
270 |         )
271 |     if fix_wrist_3:
272 |         assert math.isclose(
273 |             initial_joint_positions[5], joint_positions[5], abs_tol=0.05
274 |         )
275 | 
276 |     env.kill_sim()
277 | 


--------------------------------------------------------------------------------
/robo_gym/envs/ur/ur_avoidance_basic.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Environment for basic obstacle avoidance controlling a robotic arm from UR.
  3 | 
  4 | In this environment the obstacle is only moving up and down in a vertical line in front of the robot.
  5 | The goal is for the robot to stay within a predefined minimum distance to the moving obstacle.
  6 | When feasible the robot should continue to the original configuration,
  7 | otherwise wait for the obstacle to move away before proceeding
  8 | """
  9 | 
 10 | from __future__ import annotations
 11 | import numpy as np
 12 | from typing import Tuple, Any
 13 | from robo_gym_server_modules.robot_server.grpc_msgs.python import robot_server_pb2
 14 | from robo_gym.envs.simulation_wrapper import Simulation
 15 | from robo_gym.envs.ur.ur_base_avoidance_env import URBaseAvoidanceEnv
 16 | 
 17 | # base, shoulder, elbow, wrist_1, wrist_2, wrist_3
 18 | JOINT_POSITIONS = [-1.57, -1.31, -1.31, -2.18, 1.57, 0.0]
 19 | DEBUG = True
 20 | MINIMUM_DISTANCE = 0.3  # the distance [cm] the robot should keep to the obstacle
 21 | 
 22 | 
 23 | class BasicAvoidanceUR(URBaseAvoidanceEnv):
 24 |     """Universal Robots UR basic obstacle avoidance environment.
 25 | 
 26 |     Args:
 27 |         rs_address (str): Robot Server address. Formatted as 'ip:port'. Defaults to None.
 28 |         fix_base (bool): Wether or not the base joint stays fixed or is moveable. Defaults to False.
 29 |         fix_shoulder (bool): Wether or not the shoulder joint stays fixed or is moveable. Defaults to False.
 30 |         fix_elbow (bool): Wether or not the elbow joint stays fixed or is moveable. Defaults to False.
 31 |         fix_wrist_1 (bool): Wether or not the wrist 1 joint stays fixed or is moveable. Defaults to False.
 32 |         fix_wrist_2 (bool): Wether or not the wrist 2 joint stays fixed or is moveable. Defaults to False.
 33 |         fix_wrist_3 (bool): Wether or not the wrist 3 joint stays fixed or is moveable. Defaults to True.
 34 |         ur_model (str): determines which ur model will be used in the environment. Defaults to 'ur5'.
 35 |         include_polar_to_elbow (bool): determines wether or not the polar coordinates to the elbow joint are included in the state. Defaults to False.
 36 | 
 37 |     Attributes:
 38 |         ur (:obj:): Robot utilities object.
 39 |         client (:obj:str): Robot Server client.
 40 |         real_robot (bool): True if the environment is controlling a real robot.
 41 | 
 42 |     """
 43 | 
 44 |     max_episode_steps = 1000
 45 | 
 46 |     def _set_initial_robot_server_state(
 47 |         self, rs_state, fixed_object_position=None
 48 |     ) -> robot_server_pb2.State:
 49 |         if fixed_object_position:
 50 |             state_msg = super()._set_initial_robot_server_state(
 51 |                 rs_state=rs_state, fixed_object_position=fixed_object_position
 52 |             )
 53 |             return state_msg
 54 | 
 55 |         z_amplitude = np.random.default_rng().uniform(low=0.09, high=0.35)
 56 |         z_frequency = 0.125
 57 |         z_offset = np.random.default_rng().uniform(low=0.2, high=0.6)
 58 | 
 59 |         string_params = {"object_0_function": "triangle_wave"}
 60 |         float_params = {
 61 |             "object_0_x": 0.12,
 62 |             "object_0_y": 0.34,
 63 |             "object_0_z_amplitude": z_amplitude,
 64 |             "object_0_z_frequency": z_frequency,
 65 |             "object_0_z_offset": z_offset,
 66 |         }
 67 |         state = {}
 68 | 
 69 |         state_msg = robot_server_pb2.State(
 70 |             state=state,
 71 |             float_params=float_params,
 72 |             string_params=string_params,
 73 |             state_dict=rs_state,
 74 |         )
 75 |         return state_msg
 76 | 
 77 |     def reset(
 78 |         self, *, seed: int | None = None, options: dict[str, Any] | None = None
 79 |     ) -> tuple[np.ndarray, dict[str, Any]]:
 80 |         """Environment reset.
 81 | 
 82 |         options:
 83 |             joint_positions (list[6] or np.array[6]): robot joint positions in radians.
 84 |             fixed_object_position (list[3]): x,y,z fixed position of object
 85 | 
 86 |         Returns:
 87 |             np.array: Environment state.
 88 |             dict: info
 89 |         """
 90 |         self.prev_action = np.zeros(6)
 91 | 
 92 |         initial_state, info = super().reset(seed=seed, options=options)
 93 | 
 94 |         return initial_state, info
 95 | 
 96 |     def reward(self, rs_state, action) -> Tuple[float, bool, dict]:
 97 |         env_state = self._robot_server_state_to_env_state(rs_state)
 98 | 
 99 |         reward = 0
100 |         done = False
101 |         info = {}
102 | 
103 |         # Reward weights
104 |         close_distance_weight = -2
105 |         delta_joint_weight = 1
106 |         action_usage_weight = 1
107 |         rapid_action_weight = -0.2
108 | 
109 |         # Difference in joint position current vs. starting position
110 |         delta_joint_pos = env_state[9:15]
111 | 
112 |         # Calculate distance to the obstacle
113 |         obstacle_coord = np.array(
114 |             [
115 |                 rs_state["object_0_to_ref_translation_x"],
116 |                 rs_state["object_0_to_ref_translation_y"],
117 |                 rs_state["object_0_to_ref_translation_z"],
118 |             ]
119 |         )
120 |         ee_coord = np.array(
121 |             [
122 |                 rs_state["ee_to_ref_translation_x"],
123 |                 rs_state["ee_to_ref_translation_y"],
124 |                 rs_state["ee_to_ref_translation_z"],
125 |             ]
126 |         )
127 |         forearm_coord = np.array(
128 |             [
129 |                 rs_state["forearm_to_ref_translation_x"],
130 |                 rs_state["forearm_to_ref_translation_y"],
131 |                 rs_state["forearm_to_ref_translation_z"],
132 |             ]
133 |         )
134 |         distance_to_ee = np.linalg.norm(obstacle_coord - ee_coord)
135 |         distance_to_forearm = np.linalg.norm(obstacle_coord - forearm_coord)
136 |         distance_to_target = np.min([distance_to_ee, distance_to_forearm])
137 | 
138 |         # Reward staying close to the predefined joint position
139 |         if abs(env_state[-6:]).sum() < 0.1 * action.size:
140 |             reward += (
141 |                 delta_joint_weight
142 |                 * (1 - (abs(delta_joint_pos).sum() / (0.1 * action.size)))
143 |                 * (1 / 1000)
144 |             )
145 | 
146 |         # Reward for not acting
147 |         if abs(action).sum() <= action.size:
148 |             reward += (
149 |                 action_usage_weight
150 |                 * (1 - (np.square(action).sum() / action.size))
151 |                 * (1 / 1000)
152 |             )
153 | 
154 |         # Negative reward if actions change to rapidly between steps
155 |         for i in range(len(action)):
156 |             if abs(action[i] - self.prev_action[i]) > 0.5:
157 |                 reward += rapid_action_weight * (1 / 1000)
158 | 
159 |         # Negative reward if the obstacle is close than the predefined minimum distance
160 |         if distance_to_target < MINIMUM_DISTANCE:
161 |             reward += close_distance_weight * (1 / self.max_episode_steps)
162 | 
163 |         # Check if there is a collision
164 |         collision = True if rs_state["in_collision"] == 1 else False
165 |         if collision:
166 |             done = True
167 |             info["final_status"] = "collision"
168 |             info["target_coord"] = obstacle_coord
169 |             self.last_position_on_success = []
170 | 
171 |         elif self.elapsed_steps >= self.max_episode_steps:
172 |             done = True
173 |             info["final_status"] = "success"
174 |             info["target_coord"] = obstacle_coord
175 |             self.last_position_on_success = []
176 | 
177 |         return reward, done, info
178 | 
179 |     def step(self, action) -> Tuple[np.array, float, bool, bool, dict]:
180 |         if type(action) == list:
181 |             action = np.array(action)
182 | 
183 |         action = action.astype(np.float32)
184 | 
185 |         state, reward, done, truncated, info = super().step(action)
186 | 
187 |         self.prev_action = self.add_fixed_joints(action)
188 | 
189 |         return state, reward, done, truncated, info
190 | 
191 | 
192 | class BasicAvoidanceURSim(BasicAvoidanceUR, Simulation):
193 |     cmd = "roslaunch ur_robot_server ur_robot_server.launch \
194 |         world_name:=tabletop_sphere50.world \
195 |         reference_frame:=base_link \
196 |         max_velocity_scale_factor:=0.2 \
197 |         action_cycle_rate:=20 \
198 |         rviz_gui:=false \
199 |         gazebo_gui:=true \
200 |         objects_controller:=true \
201 |         rs_mode:=1moving2points \
202 |         n_objects:=1.0 \
203 |         object_0_model_name:=sphere50 \
204 |         object_0_frame:=target"
205 | 
206 |     def __init__(
207 |         self,
208 |         ip=None,
209 |         lower_bound_port=None,
210 |         upper_bound_port=None,
211 |         gui=False,
212 |         ur_model="ur5",
213 |         **kwargs,
214 |     ):
215 |         self.cmd = self.cmd + " " + "ur_model:=" + ur_model
216 |         Simulation.__init__(
217 |             self, self.cmd, ip, lower_bound_port, upper_bound_port, gui, **kwargs
218 |         )
219 |         BasicAvoidanceUR.__init__(
220 |             self, rs_address=self.robot_server_ip, ur_model=ur_model, **kwargs
221 |         )
222 | 
223 | 
224 | class BasicAvoidanceURRob(BasicAvoidanceUR):
225 |     real_robot = True
226 | 
227 | 
228 | # roslaunch ur_robot_server ur_robot_server.launch ur_model:=ur5 real_robot:=true rviz_gui:=true gui:=true reference_frame:=base max_velocity_scale_factor:=0.2 action_cycle_rate:=20 rs_mode:=moving
229 | 


--------------------------------------------------------------------------------
/robo_gym/utils/manipulator_model.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | from __future__ import annotations
  3 | 
  4 | import numpy as np
  5 | from numpy.typing import NDArray
  6 | import yaml
  7 | import copy
  8 | from robo_gym.utils import utils
  9 | 
 10 | 
 11 | class ManipulatorModel:
 12 |     """Manipulator robots model class.
 13 | 
 14 |     Attributes:
 15 |         max_joint_positions (np.array): Maximum joint position values (rad)`.
 16 |         min_joint_positions (np.array): Minimum joint position values (rad)`.
 17 |         max_joint_velocities (np.array): Maximum joint velocity values (rad/s)`.
 18 |         min_joint_velocities (np.array): Minimum joint velocity values (rad/s)`.
 19 |         joint_names (list): Joint names (Standard Indexing)`.
 20 | 
 21 |     Joint Names (ROS Indexing):
 22 |     [elbow_joint, shoulder_lift_joint, shoulder_pan_joint, wrist_1_joint, wrist_2_joint,
 23 |      wrist_3_joint]
 24 | 
 25 |     NOTE: Where not specified, Standard Indexing is used.
 26 |     """
 27 | 
 28 |     def __init__(
 29 |         self,
 30 |         model_key: str,
 31 |         file_path: str,
 32 |         norm_min: float = -1.0,
 33 |         norm_max: float = 1.0,
 34 |         override_joint_names: list[str] | None = None,
 35 |     ):
 36 | 
 37 |         self.model_key = model_key
 38 |         self._norm_min = norm_min
 39 |         self._norm_max = norm_max
 40 | 
 41 |         # Load robot parameters
 42 |         with open(file_path, "r") as stream:
 43 |             try:
 44 |                 p = yaml.safe_load(stream)
 45 |             except yaml.YAMLError as exc:
 46 |                 print(exc)
 47 | 
 48 |         if override_joint_names:
 49 |             self.joint_names = override_joint_names
 50 |         else:
 51 |             self.joint_names = p.get("joint_names")
 52 |         if "remote_joint_names" in p:
 53 |             self.remote_joint_names = p["remote_joint_names"]
 54 |         else:
 55 |             self.remote_joint_names = self.joint_namess
 56 | 
 57 |         # Initialize joint limits attributes
 58 |         self.max_joint_positions = np.zeros(len(self.joint_names))
 59 |         self.min_joint_positions = np.zeros(len(self.joint_names))
 60 |         self.max_joint_velocities = np.zeros(len(self.joint_names))
 61 |         self.min_joint_velocities = np.zeros(len(self.joint_names))
 62 | 
 63 |         self.joint_positions = np.zeros(len(self.joint_names))
 64 | 
 65 |         for idx, joint in enumerate(self.joint_names):
 66 |             self.max_joint_positions[idx] = p["joint_limits"][joint]["max_position"]
 67 |             self.min_joint_positions[idx] = p["joint_limits"][joint]["min_position"]
 68 |             self.max_joint_velocities[idx] = p["joint_limits"][joint]["max_velocity"]
 69 |             self.min_joint_velocities[idx] = -p["joint_limits"][joint]["max_velocity"]
 70 | 
 71 |         # Workspace parameters
 72 |         self.ws_r = p["workspace_area"]["r"]
 73 |         self.ws_min_r = p["workspace_area"]["min_r"]
 74 | 
 75 |     def get_max_joint_positions(self) -> NDArray:
 76 | 
 77 |         return self.max_joint_positions
 78 | 
 79 |     def get_min_joint_positions(self) -> NDArray:
 80 | 
 81 |         return self.min_joint_positions
 82 | 
 83 |     def get_max_joint_velocities(self) -> NDArray:
 84 | 
 85 |         return self.max_joint_velocities
 86 | 
 87 |     def get_min_joint_velocities(self) -> NDArray:
 88 | 
 89 |         return self.min_joint_velocities
 90 | 
 91 |     def normalize_joint_value(self, source_value: float, joint_index: int) -> float:
 92 |         dest_value = float(
 93 |             np.interp(
 94 |                 source_value,
 95 |                 [
 96 |                     self.min_joint_positions[joint_index],
 97 |                     self.max_joint_positions[joint_index],
 98 |                 ],
 99 |                 [self._norm_min, self._norm_max],
100 |             )
101 |         )
102 |         return dest_value
103 | 
104 |     def normalize_joint_values(self, joints: NDArray) -> NDArray:
105 |         """Normalize joint position values
106 | 
107 |         Args:
108 |             joints (np.array): Joint position values
109 | 
110 |         Returns:
111 |             norm_joints (np.array): Joint position values normalized between [-1 , 1]
112 |         """
113 | 
114 |         result = copy.deepcopy(joints)
115 |         for joint_index in range(len(joints)):
116 |             source_value = float(joints[joint_index])
117 |             dest_value = self.normalize_joint_value(source_value, joint_index)
118 |             result[joint_index] = dest_value
119 |         return result
120 | 
121 |     def denormalize_joint_value(self, source_value: float, joint_index: int) -> float:
122 |         dest_value = float(
123 |             np.interp(
124 |                 source_value,
125 |                 [self._norm_min, self._norm_max],
126 |                 [
127 |                     self.min_joint_positions[joint_index],
128 |                     self.max_joint_positions[joint_index],
129 |                 ],
130 |             )
131 |         )
132 |         return dest_value
133 | 
134 |     def denormalize_joint_values(self, joints: NDArray) -> NDArray:
135 |         """Map normalized joint values to joint position in min/max range
136 | 
137 |         Args:
138 |             joints (np.array): Normalized joint position values
139 | 
140 |         Returns:
141 |             norm_joints (np.array): Joint position values normalized between [-1 , 1]
142 |         """
143 | 
144 |         result = copy.deepcopy(joints)
145 |         for joint_index in range(len(joints)):
146 |             source_value = float(joints[joint_index])
147 |             dest_value = self.denormalize_joint_value(source_value, joint_index)
148 |             result[joint_index] = dest_value
149 |         return result
150 | 
151 |     def denormalize_joint_range(self, source_range: float, joint_index: int) -> float:
152 |         dest_value = float(
153 |             np.interp(
154 |                 source_range,
155 |                 [0, 1],
156 |                 [
157 |                     0,
158 |                     abs(
159 |                         self.max_joint_positions[joint_index]
160 |                         - self.min_joint_positions[joint_index]
161 |                     ),
162 |                 ],
163 |             )
164 |         )
165 |         return dest_value
166 | 
167 |     def denormalize_joint_ranges(self, joints: NDArray) -> NDArray:
168 |         result = np.zeros_like(joints)
169 |         for joint_index in range(len(joints)):
170 |             source_value = float(joints[joint_index])
171 |             dest_value = self.denormalize_joint_range(source_value, joint_index)
172 |             result[joint_index] = dest_value
173 |         return result
174 | 
175 |     def get_random_workspace_pose_rpy(
176 |         self,
177 |         np_random: np.random.Generator | None = None,
178 |         roll_range: NDArray | None = None,
179 |         pitch_range: NDArray | None = None,
180 |         yaw_range: NDArray | None = None,
181 |     ) -> NDArray:
182 |         """Get pose of a random point in the robot workspace.
183 | 
184 |         Returns:
185 |             np.array: [x,y,z,alpha,theta,gamma] pose.
186 | 
187 |         """
188 | 
189 |         if np_random is None:
190 |             np_random = np.random.default_rng()
191 | 
192 |         pose = np.zeros(6)
193 | 
194 |         singularity_area = True
195 | 
196 |         # check if generated x,y,z are in singularityarea
197 |         while singularity_area:
198 |             # Generate random uniform sample in semisphere taking advantage of the
199 |             # sampling rule
200 | 
201 |             phi = np_random.uniform(low=0.0, high=2 * np.pi)
202 |             costheta = np_random.uniform(low=0.0, high=1.0)  # [-1.0,1.0] for a sphere
203 |             u = np_random.uniform(low=0.0, high=1.0)
204 | 
205 |             theta = np.arccos(costheta)
206 |             r = self.ws_r * np.cbrt(u)
207 | 
208 |             x = r * np.sin(theta) * np.cos(phi)
209 |             y = r * np.sin(theta) * np.sin(phi)
210 |             z = r * np.cos(theta)
211 | 
212 |             # TODO this is UR specific
213 |             if (x**2 + y**2) > self.ws_min_r**2:
214 |                 singularity_area = False
215 | 
216 |         roll = utils.get_uniform_from_range(np_random, roll_range, 0.0)
217 |         pitch = utils.get_uniform_from_range(np_random, pitch_range, 0.0)
218 |         yaw = utils.get_uniform_from_range(np_random, yaw_range, 0.0)
219 | 
220 |         pose[0:6] = [x, y, z, roll, pitch, yaw]
221 | 
222 |         return pose
223 | 
224 |     def get_random_workspace_pose_quat(
225 |         self,
226 |         np_random: np.random.Generator | None = None,
227 |         roll_range: NDArray | None = None,
228 |         pitch_range: NDArray | None = None,
229 |         yaw_range: NDArray | None = None,
230 |         seq="xyz",
231 |         quat_unique: bool = False,
232 |     ) -> NDArray:
233 |         pose_rpy = self.get_random_workspace_pose_rpy(
234 |             np_random, roll_range, pitch_range, yaw_range
235 |         )
236 |         pose_quat = utils.quat_from_euler(
237 |             pose_rpy[3], pose_rpy[4], pose_rpy[5], seq, quat_unique
238 |         )
239 |         return pose_quat
240 | 
241 |     def get_random_offset_joint_positions(
242 |         self,
243 |         joint_positions: NDArray,
244 |         random_offset: NDArray,
245 |         np_random: np.random.Generator | None = None,
246 |     ):
247 | 
248 |         if np_random is None:
249 |             np_random = np.random.default_rng()
250 | 
251 |         joint_positions_low = joint_positions - random_offset
252 |         joint_positions_high = joint_positions + random_offset
253 | 
254 |         joint_positions = np_random.uniform(
255 |             low=joint_positions_low, high=joint_positions_high
256 |         )
257 |         return joint_positions
258 | 
259 |     def reorder_joints_from_rs(self, ros_thetas: NDArray) -> NDArray:
260 |         return ros_thetas
261 | 
262 |     def reorder_joints_for_rs(self, thetas: NDArray) -> NDArray:
263 |         return thetas
264 | 
265 |     @property
266 |     def joint_count(self) -> int:
267 |         return len(self.joint_names)
268 | 


--------------------------------------------------------------------------------
/docs/modular_environments.md:
--------------------------------------------------------------------------------
  1 | # Modular environments
  2 | 
  3 | We have started the creation of a new hierarchy of environment classes to improve the modularity and extensibility of our environments.
  4 | 
  5 | The base environment class is called RoboGymEnv. 
  6 | It lays out the common ground for robo-gym environments, such as the initialization of the client for the robot server and the functionality of the methods step and reset.
  7 | In terms of structure, the RoboGymEnv has components that we call nodes to take over elements of the environment's main functionality and to be specialized for different subclasses or configurations of environments:
  8 | 
  9 | * ActionNodes deal with the action from the agent and map it to the form that is understood by the robot server;
 10 | * ObservationNodes build parts of the observation from the state reported by the robot server; and
 11 | * RewardNodes provide the reward calculation and related data to be returned by  a step.
 12 | 
 13 | In general, an environment has one ActionNode, one RewardNode, and any number of ObservationNodes (reasoning for this multiplicity: for any given task, adding extra observations should be a relatively small change of implementation, such as additional sensors).
 14 | As for the environment itself, their initialization is largely driven by processing arguments. 
 15 | By default, the arguments that the nodes receive are equal to the ones of the environment, but the environment implementation has the freedom of preparing each node's arguments as required.
 16 | 
 17 | The functionality of the nodes is integrated into the environment's methods as needed and in a way that allows minimization duplicated code as much as possible. 
 18 | For example, the node types for actions and observations provide the respective methods for mapping messages (in opposite directions) that are used in the base environment's step method, so a specialized environment should not need to (but might) override the base step method. 
 19 | Also, the action space and observation space are calculated by the respective nodes. Nodes of all three types can contribute to the state data that is sent to the robot server in the course of a reset. This is useful in scenarios such as delegating the management of a target pose to the reward node, which can still provide the target data for the robot server.  
 20 | 
 21 | This new environment structure is used in a few concrete environments so far, some of which are designed to be direct replacements of our original environments.
 22 | The examples provided so far highlight a key benefit of the class hierarchy: Creating a variation of an environment should be possible with small additions and using as much of existing code as possible, without duplicating it. An example would be performing the same task with a different robot. We are already exploiting this potential with new environment implementations for the UR robots and corresponding environments for the same tasks with the Panda robot.
 23 | The environment classes that add specifics for a robot type are relatively short and simple, limited to configuring functionality provided by their base classes.
 24 | 
 25 | # Modular environment classes
 26 | 
 27 | This diagram sketches the relations between the provided environment classes so far in the hierarchy:
 28 | 
 29 | ```mermaid
 30 | classDiagram
 31 | 
 32 | class RoboGymEnv
 33 | RoboGymEnv: ActionNode actionNode
 34 | RoboGymEnv: ObservationNode[] observationNodes
 35 | RoboGymEnv: RewardNode rewardNode
 36 | RoboGymEnv: Client client
 37 | 
 38 | RoboGymEnv <|-- ManipulatorBaseEnv
 39 | 
 40 | ManipulatorBaseEnv <|-- ManipulatorEePosEnv
 41 | ManipulatorBaseEnv: ManipulatorActionNode actionNode
 42 | ManipulatorBaseEnv: ManipulatorObservationNode[] observationNodes
 43 | ManipulatorBaseEnv: ManipulatorRewardNode rewardNode
 44 | 
 45 | ManipulatorEePosEnv <|-- IsaacReachEnv
 46 | 
 47 | ManipulatorBaseEnv <|-- URBaseEnv2
 48 | URBaseEnv2 <|-- EmptyEnvironment2UR
 49 | 
 50 | ManipulatorEePosEnv <|-- EndEffectorPositioning2UR
 51 | ManipulatorEePosEnv: ManipulatorEePosObservationNode[] observationNodes
 52 | ManipulatorEePosEnv: ManipulatorEePosRewardNode rewardNode
 53 | 
 54 | IsaacReachEnv <|-- IsaacReachUR
 55 | IsaacReachEnv: IsaacReachActionNode actionNode
 56 | IsaacReachEnv: IsaacReachObservationNode[] observationNodes
 57 | 
 58 | ManipulatorBaseEnv <|-- PandaBaseEnv
 59 | PandaBaseEnv <|-- EmptyEnvironmentPanda
 60 | 
 61 | ManipulatorEePosEnv <|-- EndEffectorPositioningPanda
 62 | 
 63 | IsaacReachEnv <|-- IsaacReachPanda
 64 | ```
 65 | Note that the field indications are neither complete nor technically correct, as they are written like redefinitions with varying types. In fact, the fields are declared solely in the RoboGymEnv class and are only filled with instances of the corresponding subtypes by default. The notion in the diagram serves to illustrate how subclasses of the nodes can be used as needed for a certain environment type. Where possible, a less specialized node implementation may suffice. E.g., there is no special action node for the ManipulatorEePosEnv environment, but there are specific implementations of ObservationNode and RewardNode for it.
 66 | 
 67 | Our pre-existing environments have the convention of providing separate subclasses for simulated and real robot for each environment type, with class names and environment ids (for gym.make) obtained by adding the suffix "Sim" or "Rob". Due to changed logic in setting up the client for the robot server, this distinction is not technically necessary (setting a flag would be enough) for the new hierarchy, but to have matching names we also provide this specialization. Each leaf class in the diagram has two according not depicted subclasses with the appended "Sim" and "Rob".
 68 | 
 69 | ## RoboGymEnv
 70 | 
 71 | As indicated above, the RoboGymEnv class is the base class in our hierarchy.
 72 | 
 73 | Note that, in contrast to our previous environments, it can work with pre-existing simulation robot servers (rather than only obtaining simulation robot servers from the server manager), which can save a lot of time during testing. However, as for a real robot server, it is necessary to make sure that the server is compatible with the environment. The roslaunch command to start the matching robot server for a given environment configuration can be read on the console output when starting the environment with a server manager address. Subsequently, the environment can be used with the address of the manually started server.
 74 | 
 75 | ## ManipulatorBaseEnv
 76 | 
 77 | This environment class adds logic to control and observe a manipulator robot. It matches the previous empty UR environment on a logical level, however, it is independent of the robot type. There are subclasses for UR robots and for the Franka Emika Panda. The reward calculation is limited to surviving the maximum duration of the eposide without a collision.
 78 | 
 79 | ## ManipulatorEePosEnv
 80 | 
 81 | The end-effector-positioning environment also correspond to the similarly named UR environment in our pre-existing set of classes, again with a robot-type-agnostic base class and subclasses for UR and Panda. As a new feature, the rotation of the end effector can be chosen to be considered in the reward calculation. Consequently, the tradition-based class name is accurate only if disabling this feature. There are richer options for determining the robot's starting pose (and whether the robot should return to it in reset) and for obtaining the target pose.
 82 | 
 83 | ## IsaacReachEnv
 84 | 
 85 | This environment type and its subclasses are specializations of the ManipulatorEePosEnv class, intended to allow running agents trained with Isaac Lab in a robo-gym context. The special implementations of ActionNode and ObservationNode provide the corresponding mappings to the spaces as they are used by the supported agents in Isaac Lab. See also [Isaac Lab Compatibility](isaac_lab_compatibility.md)
 86 | 
 87 | In order to avoid a general dependency on Torch for robo-gym, the short code to load and interpret policies trained with Isaac Lab is included in the [robo_envtest](.\robo_envtest.py) example script. It comes into effect when passing the path of an exported trained policy from Isaac Lab. Specifically, the code supports policies trained with the rsl_rl train script that comes with Isaac Lab, tested with V4.5.0. Note that the export folder with this policy file is created upon running the rsl_rl play script and not during training itself.
 88 | 
 89 | # Open Issues (not comprehensive)
 90 | 
 91 | In general, it is intended to replace all of our old robo-gym environment implementations to the modular structure. Parts of this effort that are outside of the initial vertical slice implementation that ranges from the base environment down to the Isaac Reach task for UR and Panda remain to be done. While the process in ongoing, legacy implementations prevail (causing potentally confusing situations with environment names and mixed directories).
 92 | 
 93 | In some parts, the side goal of eliminating hardcoded values or other deficiencies is not yet reached. In particular, the construction of the roslaunch command in the robot-specific environment classes leaves a few things to be desired.
 94 | 
 95 | ## MiR
 96 | 
 97 | The MiR environments have not yet been rebuilt with the new structure. The plan would be to have a base environment class for mobile robots as a sibling of the ManipulatorBaseEnv, with according task-specfic environment subclasses next and MiR-specific subclasses of those, next to other mobile robot types of interest.
 98 | 
 99 | ## UR
100 | 
101 | The avoidance classes are left to be reconstructed in the modular fashion. Of course the implementation of avoidance-specific functionality would be robot-agnostic first and thus also compatible with other manipulator types.
102 | 
103 | ## Panda
104 | 
105 | As of now, the Panda environments do not perform well. Attempts to use them with a real robot are strongly discouraged at this point. An overhaul of the server-side implementation may be necessary. Alternative approaches for action modes (in which actions don't correspond to absolute joint positions), preparations for which can be found on the server side and in unmerged branches, as well as many options in action execution and state collection have been tried but not yet led to satisfying results.
106 | 
107 | ## Isaac Lab
108 | 
109 | There are other (example or custom) tasks that we would like to train for in Isaac Lab and provide execution for in robo-gym. It may be desirable to extract fields from configuration files/recordings from Isaac Lab; however, this may be outside of the scope of the environment classes themselves.
110 | 
111 | For the IsaacReachEnv, a shortcoming is that the server-side visualizations of the target do not correctly reflect the rotation of the target.
112 | 


--------------------------------------------------------------------------------
/robo_gym/envs/manipulator/manipulator_base.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import Tuple
  4 | 
  5 | import gymnasium as gym
  6 | import numpy as np
  7 | from numpy.typing import NDArray
  8 | 
  9 | from robo_gym.envs.base.robogym_env import (
 10 |     RoboGymEnv,
 11 |     ActionNode,
 12 |     ObservationNode,
 13 |     RewardNode,
 14 | )
 15 | from robo_gym.utils.manipulator_model import ManipulatorModel
 16 | 
 17 | 
 18 | class ManipulatorBaseEnv(RoboGymEnv):
 19 | 
 20 |     KW_RS_JOINT_NAMES = "rs_joint_names"
 21 |     KW_JOINT_POSITIONS = "joint_position"
 22 |     RS_STATE_KEY_SUFFIX_JOINT_POSITION = "_position"
 23 |     RS_STATE_KEY_SUFFIX_JOINT_VELOCITY = "_velocity"
 24 | 
 25 |     KW_ACTION_MODE = "action_mode"
 26 |     ACTION_MODE_ABS_POS = "abs_pos"
 27 |     ACTION_MODE_DELTA_POS = "delta_pos"
 28 |     ACTION_MODE_ABS_VEL = "abs_vel"
 29 |     ACTION_MODE_DELTA_VEL = "delta_vel"
 30 | 
 31 |     KW_MAX_VELOCITY_SCALE_FACTOR = "max_velocity_scale_factor"
 32 | 
 33 |     def __init__(self, **kwargs):
 34 |         # not too nice - repeated in super init
 35 |         self._config = kwargs
 36 | 
 37 |         self._robot_model: ManipulatorModel | None = kwargs.get(
 38 |             RoboGymEnv.KW_ROBOT_MODEL_OBJECT
 39 |         )
 40 |         if self.KW_JOINT_POSITIONS in kwargs:
 41 |             # TODO check values
 42 |             self._robot_model.joint_positions = kwargs[self.KW_JOINT_POSITIONS]
 43 | 
 44 |         self.action_mode = self._config.setdefault(
 45 |             self.KW_ACTION_MODE, self.ACTION_MODE_ABS_POS
 46 |         )
 47 |         # we only support this one action mode for now
 48 |         assert self.action_mode == self.ACTION_MODE_ABS_POS
 49 | 
 50 |         # env nodes
 51 |         action_node: ActionNode | None = kwargs.get(RoboGymEnv.KW_ACTION_NODE)
 52 |         if not action_node:
 53 |             action_node = ManipulatorActionNode(**self.get_action_node_setup_kwargs())
 54 |         kwargs[RoboGymEnv.KW_ACTION_NODE] = action_node
 55 | 
 56 |         RoboGymEnv.assure_instance_of_type_in_list(
 57 |             kwargs,
 58 |             RoboGymEnv.KW_OBSERVATION_NODES,
 59 |             ManipulatorObservationNode,
 60 |             True,
 61 |             self.create_main_observation_node,
 62 |             {},
 63 |         )
 64 | 
 65 |         reward_node: RewardNode | None = kwargs.get(RoboGymEnv.KW_REWARD_NODE)
 66 |         if not reward_node:
 67 |             reward_node = ManipulatorRewardNode(**self.get_reward_node_setup_kwargs())
 68 |         kwargs[RoboGymEnv.KW_REWARD_NODE] = reward_node
 69 | 
 70 |         super().__init__(**kwargs)
 71 | 
 72 |     def create_main_observation_node(self, node_index: int = 0, **kwargs):
 73 |         return ManipulatorObservationNode(**self.get_obs_node_setup_kwargs(node_index))
 74 | 
 75 | 
 76 | class ManipulatorActionNode(ActionNode):
 77 |     # TODO
 78 |     # consider other action modes - current impl corresponds to ABS_POS only
 79 | 
 80 |     KW_PREFIX_FIX_JOINT = "fix_"
 81 | 
 82 |     def __init__(self, **kwargs):
 83 |         super().__init__(**kwargs)
 84 |         self._robot_model: ManipulatorModel | None = kwargs.get(
 85 |             RoboGymEnv.KW_ROBOT_MODEL_OBJECT
 86 |         )
 87 |         assert self._robot_model is not None
 88 |         self._joint_names: list[str] = self._robot_model.joint_names
 89 |         self._fixed_joint_names: list[str] = []
 90 |         self._controlled_joint_names: list[str] = []
 91 |         for joint_name in self._joint_names:
 92 |             fix_arg_name = self.KW_PREFIX_FIX_JOINT + joint_name
 93 |             if kwargs.get(fix_arg_name, False):
 94 |                 self._fixed_joint_names.append(joint_name)
 95 |             else:
 96 |                 self._controlled_joint_names.append(joint_name)
 97 | 
 98 |     def get_action_space(self) -> gym.spaces.Box:
 99 |         length = len(self._controlled_joint_names)
100 |         return gym.spaces.Box(
101 |             low=np.full(length, -1.0, dtype=np.float32),
102 |             high=np.full(length, 1.0, dtype=np.float32),
103 |             dtype=np.float32,
104 |         )
105 | 
106 |     def env_action_to_rs_action(self, env_action: NDArray, **kwargs) -> NDArray:
107 |         # optimization potential, but more concise than it was:
108 |         # start with default positions and overwrite non-fixed joints with values from env action
109 |         normalized_full_action = self._robot_model.normalize_joint_values(
110 |             np.array(self._robot_model.joint_positions, dtype=np.float32)
111 |         )
112 |         source_index = 0
113 |         for joint_index in range(len(self._joint_names)):
114 |             joint_name = self._joint_names[joint_index]
115 |             if joint_name in self._controlled_joint_names:
116 |                 normalized_full_action[joint_index] = env_action[source_index]
117 |                 source_index += 1
118 | 
119 |         denormalized_full_action = self._robot_model.denormalize_joint_values(
120 |             normalized_full_action
121 |         )
122 |         result = self._robot_model.reorder_joints_for_rs(
123 |             denormalized_full_action
124 |         ).astype(np.float32)
125 |         return result
126 | 
127 |     def get_reset_state_part_state_dict(self) -> dict[str, float]:
128 |         result: dict[str, float] = {}
129 |         for joint_index in range(len(self._joint_names)):
130 |             joint_name = self._robot_model.remote_joint_names[joint_index]
131 |             state_key = (
132 |                 joint_name + ManipulatorBaseEnv.RS_STATE_KEY_SUFFIX_JOINT_POSITION
133 |             )
134 |             result[state_key] = self._robot_model.joint_positions[joint_index]
135 |         return result
136 | 
137 |     @property
138 |     def robot_model(self):
139 |         return self._robot_model
140 | 
141 | 
142 | class ManipulatorObservationNode(ObservationNode):
143 |     KW_JOINT_POSITION_TOLERANCE_NORMALIZED = "joint_position_tolerance_normalized"
144 | 
145 |     def __init__(self, **kwargs):
146 |         super().__init__(**kwargs)
147 |         self._robot_model: ManipulatorModel | None = kwargs.get(
148 |             RoboGymEnv.KW_ROBOT_MODEL_OBJECT
149 |         )
150 |         assert self._robot_model is not None
151 |         self._joint_names: list[str] = (
152 |             self._robot_model.joint_names
153 |         )  # kwargs.get("joint_names")
154 | 
155 |     def get_observation_space_part(self) -> gym.spaces.Box:
156 |         # Joint position range tolerance
157 |         num_joints = len(self._robot_model.joint_names)
158 |         pos_tolerance = np.full(num_joints, self.joint_position_tolerance_normalized)
159 | 
160 |         # Joint positions range used to determine if there is an error in the sensor readings
161 |         max_joint_positions = np.add(np.full(num_joints, 1.0), pos_tolerance)
162 |         min_joint_positions = np.subtract(np.full(num_joints, -1.0), pos_tolerance)
163 |         # Joint velocities range
164 |         max_joint_velocities = np.array([np.inf] * num_joints)
165 |         min_joint_velocities = -np.array([np.inf] * num_joints)
166 |         # Definition of environment observation_space
167 |         max_obs = np.concatenate(
168 |             (max_joint_positions, max_joint_velocities), dtype=np.float32
169 |         )
170 |         min_obs = np.concatenate(
171 |             (min_joint_positions, min_joint_velocities), dtype=np.float32
172 |         )
173 | 
174 |         return gym.spaces.Box(low=min_obs, high=max_obs, dtype=np.float32)
175 | 
176 |     def rs_state_to_observation_part(
177 |         self, rs_state_array: NDArray, rs_state_dict: dict[str, float], **kwargs
178 |     ) -> NDArray:
179 |         # from old impl, but why are only the joint positions normalized?
180 |         joint_positions = self.extract_normalized_joint_positions_from_rs_state_dict(
181 |             rs_state_dict
182 |         )
183 | 
184 |         # Joint Velocities
185 |         joint_velocities = self.extract_joint_velocities_from_rs_state_dict(
186 |             rs_state_dict
187 |         )
188 | 
189 |         # Compose environment state
190 |         state = np.concatenate((joint_positions, joint_velocities))
191 | 
192 |         return state.astype(np.float32)
193 | 
194 |     def extract_joint_velocities_from_rs_state_dict(self, rs_state_dict):
195 |         joint_velocities = []
196 |         joint_velocities_keys = [
197 |             joint_name + ManipulatorBaseEnv.RS_STATE_KEY_SUFFIX_JOINT_VELOCITY
198 |             for joint_name in self._robot_model.remote_joint_names
199 |         ]
200 |         for velocity in joint_velocities_keys:
201 |             joint_velocities.append(rs_state_dict[velocity])
202 |         joint_velocities = np.array(joint_velocities)
203 |         return joint_velocities
204 | 
205 |     def extract_normalized_joint_positions_from_rs_state_dict(self, rs_state_dict):
206 |         joint_positions = self.extract_joint_positions_from_rs_state_dict(rs_state_dict)
207 |         # Normalize joint position values
208 |         joint_positions = self._robot_model.normalize_joint_values(
209 |             joints=joint_positions
210 |         )
211 |         return joint_positions
212 | 
213 |     def extract_joint_positions_from_rs_state_dict(self, rs_state_dict):
214 |         joint_positions = []
215 |         joint_positions_keys = [
216 |             joint_name + ManipulatorBaseEnv.RS_STATE_KEY_SUFFIX_JOINT_POSITION
217 |             for joint_name in self._robot_model.remote_joint_names
218 |         ]
219 |         for position in joint_positions_keys:
220 |             joint_positions.append(rs_state_dict[position])
221 |         joint_positions = np.array(joint_positions)
222 |         return joint_positions
223 | 
224 |     @property
225 |     def joint_position_tolerance_normalized(self) -> float:
226 |         return self._config.get(
227 |             ManipulatorObservationNode.KW_JOINT_POSITION_TOLERANCE_NORMALIZED, 0.1
228 |         )
229 | 
230 | 
231 | class ManipulatorRewardNode(RewardNode):
232 | 
233 |     def __init__(self, **kwargs):
234 |         super().__init__(**kwargs)
235 | 
236 |     def get_reward(
237 |         self,
238 |         rs_state_array: NDArray,
239 |         rs_state_dict: dict[str, float],
240 |         env_action: NDArray,
241 |         **kwargs,
242 |     ) -> Tuple[float, bool, dict]:
243 | 
244 |         info = self.env.get_default_info()
245 |         done = False
246 | 
247 |         # Check if robot is in collision
248 |         collision = rs_state_dict["in_collision"] == 1
249 |         if collision:
250 |             done = True
251 |             info[RoboGymEnv.INFO_KW_FINAL_STATUS] = RoboGymEnv.FINAL_STATUS_COLLISION
252 | 
253 |         elif (
254 |             self.max_episode_steps is not None
255 |             and self.env.elapsed_steps >= self.max_episode_steps
256 |         ):
257 |             done = True
258 |             info[RoboGymEnv.INFO_KW_FINAL_STATUS] = RoboGymEnv.FINAL_STATUS_SUCCESS
259 | 
260 |         return 0.0, done, info
261 | 


--------------------------------------------------------------------------------