├── robo_gym ├── envs │ ├── ur │ │ ├── __init__.py │ │ ├── ur_ee_pos.py │ │ ├── ur_base.py │ │ ├── ur_isaac_reach.py │ │ └── ur_avoidance_basic.py │ ├── base │ │ └── __init__.py │ ├── example │ │ ├── __init__.py │ │ └── example_env.py │ ├── mir100 │ │ └── __init__.py │ ├── manipulator │ │ ├── __init__.py │ │ ├── isaac_reach.py │ │ └── manipulator_base.py │ ├── __init__.py │ ├── panda │ │ ├── panda_base.py │ │ ├── panda_ee_pos.py │ │ └── panda_isaac_reach.py │ └── simulation_wrapper.py ├── utils │ ├── __init__.py │ ├── exceptions.py │ ├── managed_rs_client.py │ ├── panda_utils.py │ ├── mir100_utils.py │ ├── ur_utils.py │ ├── panda_parameters │ │ └── panda.yaml │ ├── table.py │ ├── ur_parameters │ │ ├── ur10.yaml │ │ ├── ur3.yaml │ │ ├── ur10e.yaml │ │ ├── ur16e.yaml │ │ ├── ur3e.yaml │ │ ├── ur5.yaml │ │ └── ur5e.yaml │ └── manipulator_model.py ├── wrappers │ ├── __init__.py │ └── env_wrappers │ │ └── ur_ee_positioning_training.py ├── version.py └── __init__.py ├── bin ├── docker_entrypoint └── run-rs-side-standard ├── MANIFEST.in ├── docs ├── media │ ├── eurobin.png │ ├── robo-gym-logo.png │ └── funded_by_the_eu.png ├── examples │ ├── random_agent_sim.py │ ├── random_agent_robot.py │ ├── stable-baselines │ │ ├── td3_script.py │ │ ├── td3_load.py │ │ └── basic_example.md │ ├── random_agent_sim_custom_flags.py │ └── rllib │ │ ├── policy_from_cp_ur.py │ │ ├── policy_from_cp_mir.py │ │ ├── README.md │ │ ├── training_inference_mir.py │ │ └── training_inference_ur.py ├── creating_environments.md ├── managing_multiple_python_vers.md ├── isaac_lab_compatibility.md ├── the_framework.md └── modular_environments.md ├── .gitignore ├── pytest.ini ├── Dockerfile ├── docker-compose-test-melodic.yml ├── tests └── robo-gym │ ├── envs │ ├── mir100 │ │ ├── test_mir100_base_env.py │ │ └── test_mir100_obstacle_avoidance.py │ └── ur │ │ ├── test_ur_avoidance_raad_fixed_spline.py │ │ ├── test_ur_base_env.py │ │ ├── test_ur_avoidance_raad_training.py │ │ └── test_ur_avoidance_basic.py │ └── utils │ ├── test_ur.py │ └── test_utils.py ├── LICENSE ├── setup.py └── .gitlab-ci.yml /robo_gym/envs/ur/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robo_gym/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robo_gym/envs/base/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robo_gym/envs/example/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robo_gym/envs/mir100/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robo_gym/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robo_gym/envs/manipulator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robo_gym/version.py: -------------------------------------------------------------------------------- 1 | VERSION = "2.1.0" 2 | -------------------------------------------------------------------------------- /bin/docker_entrypoint: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | exec "$@" 5 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include robo_gym/utils/ur_parameters/* 2 | include robo_gym/envs/ur/robot_trajectories/* -------------------------------------------------------------------------------- /docs/media/eurobin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jr-robotics/robo-gym/HEAD/docs/media/eurobin.png -------------------------------------------------------------------------------- /docs/media/robo-gym-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jr-robotics/robo-gym/HEAD/docs/media/robo-gym-logo.png -------------------------------------------------------------------------------- /docs/media/funded_by_the_eu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jr-robotics/robo-gym/HEAD/docs/media/funded_by_the_eu.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *__pycache__ 2 | *.egg-info 3 | /.vscode 4 | /.idea 5 | 6 | # Setuptools 7 | /build 8 | /dist 9 | /*.egg-info 10 | /Pipfile 11 | -------------------------------------------------------------------------------- /bin/run-rs-side-standard: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker run -it \ 4 | --net=host \ 5 | joanneumrobotics/robo-gym-robot-server-side:0.1.0-standard 6 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | nightly: marks tests to be run in nightly pipeline (deselect with '-m "not nightly"') 4 | commit: marks tests to be run by default (select with '-m "commit"') -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTHON_VER 2 | FROM python:$PYTHON_VER-bullseye 3 | RUN apt-get -y update && apt-get install -y unzip libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg 4 | RUN pip install pytest pytest-rerunfailures 5 | ARG CACHEBUST=1 6 | COPY . /usr/local/robo-gym/ 7 | WORKDIR /usr/local/robo-gym/ 8 | RUN pip install . 9 | ENTRYPOINT ["/usr/local/robo-gym/bin/docker_entrypoint"] 10 | CMD ["bash"] 11 | -------------------------------------------------------------------------------- /docs/examples/random_agent_sim.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | import robo_gym 3 | 4 | target_machine_ip = "127.0.0.1" # or other machine 'xxx.xxx.xxx.xxx' 5 | 6 | # initialize environment 7 | env = gym.make("NoObstacleNavigationMir100Sim-v0", ip=target_machine_ip, gui=True) 8 | 9 | num_episodes = 10 10 | 11 | for episode in range(num_episodes): 12 | done = False 13 | env.reset() 14 | while not done: 15 | # random step in the environment 16 | state, reward, terminated, truncated, info = env.step(env.action_space.sample()) 17 | done = terminated or truncated 18 | -------------------------------------------------------------------------------- /docker-compose-test-melodic.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | robot-servers: 5 | container_name: robot-servers 6 | image: robot-servers 7 | command: bash -c "start-server-manager && tail -f /dev/null" #This starts the server manager and hangs 8 | restart: always 9 | 10 | robo-gym: 11 | container_name: robo-gym 12 | # build: 13 | # context: . 14 | # dockerfile: Dockerfile 15 | image: robo-gym 16 | restart: always 17 | entrypoint: tail -f /dev/null 18 | depends_on: 19 | - robot-servers 20 | # stdin_open: true 21 | # tty: true 22 | -------------------------------------------------------------------------------- /docs/examples/random_agent_robot.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | import robo_gym 3 | 4 | # PLEASE DON'T USE RANDOM SAMPLES WITH A REAL ROBOT! THIS IS JUST A CODE EXAMPLE! 5 | 6 | robot_address = 'xxx.xxx.xxx.xxx' 7 | 8 | # initialize environment 9 | env = gym.make('NoObstacleNavigationMir100Rob-v0', rs_address=robot_address) 10 | 11 | num_episodes = 1 12 | 13 | for episode in range(num_episodes): 14 | done = False 15 | env.reset() 16 | while not done: 17 | # random step in the environment 18 | state, reward, terminated, truncated, info = env.step(env.action_space.sample()) 19 | done = terminated or truncated -------------------------------------------------------------------------------- /docs/examples/stable-baselines/td3_script.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | import robo_gym 3 | from stable_baselines3 import TD3 4 | from stable_baselines3.td3.policies import MlpPolicy 5 | 6 | # specify the ip of the machine running the robot-server 7 | target_machine_ip = "127.0.0.1" # or other xxx.xxx.xxx.xxx 8 | 9 | # initialize environment 10 | env = gym.make("NoObstacleNavigationMir100Sim-v0", ip=target_machine_ip, gui=True) 11 | 12 | # choose and run appropriate algorithm provided by stable-baselines 13 | model = TD3(MlpPolicy, env, verbose=1) 14 | model.learn(total_timesteps=2000) 15 | 16 | # save model 17 | model.save("td3_mir_basic") 18 | -------------------------------------------------------------------------------- /tests/robo-gym/envs/mir100/test_mir100_base_env.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import gymnasium as gym 4 | import robo_gym 5 | import pytest 6 | 7 | @pytest.fixture(scope='module') 8 | def env(request): 9 | ip = os.environ.get("ROBOGYM_SERVERS_HOST", 'robot-servers') 10 | env = gym.make('NoObstacleNavigationMir100Sim-v0', ip=ip) 11 | yield env 12 | env.kill_sim() 13 | 14 | @pytest.mark.commit 15 | def test_initialization(env): 16 | env.reset() 17 | done = False 18 | for _ in range(10): 19 | if not done: 20 | action = env.action_space.sample() 21 | observation, _, done, _, _ = env.step(action) 22 | 23 | assert env.observation_space.contains(observation) -------------------------------------------------------------------------------- /tests/robo-gym/envs/mir100/test_mir100_obstacle_avoidance.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import gymnasium as gym 4 | import robo_gym 5 | import pytest 6 | 7 | @pytest.fixture(scope='module') 8 | def env(request): 9 | ip = os.environ.get("ROBOGYM_SERVERS_HOST", 'robot-servers') 10 | env = gym.make('ObstacleAvoidanceMir100Sim-v0', ip=ip) 11 | yield env 12 | env.kill_sim() 13 | 14 | @pytest.mark.commit 15 | def test_initialization(env): 16 | env.reset() 17 | done = False 18 | for _ in range(10): 19 | if not done: 20 | action = env.action_space.sample() 21 | observation, _, done, _, _ = env.step(action) 22 | 23 | assert env.observation_space.contains(observation) -------------------------------------------------------------------------------- /docs/examples/random_agent_sim_custom_flags.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | import robo_gym 3 | from robo_gym.envs.simulation_wrapper import Simulation 4 | 5 | target_machine_ip = "127.0.0.1" # or other machine 'xxx.xxx.xxx.xxx' 6 | 7 | Simulation.verbose = True 8 | Simulation.del_try_async_kill = False 9 | 10 | # initialize environment 11 | env = gym.make("NoObstacleNavigationMir100Sim-v0", ip=target_machine_ip, gui=True) 12 | 13 | num_episodes = 10 14 | 15 | for episode in range(num_episodes): 16 | done = False 17 | env.reset() 18 | while not done: 19 | # random step in the environment 20 | state, reward, terminated, truncated, info = env.step(env.action_space.sample()) 21 | done = terminated or truncated 22 | 23 | env.close() 24 | -------------------------------------------------------------------------------- /robo_gym/utils/exceptions.py: -------------------------------------------------------------------------------- 1 | class InvalidStateError(Exception): 2 | def __init__(self, message = "The environment state received is not contained in the observation space."): 3 | self.message = message 4 | def __str__(self): 5 | return self.message 6 | 7 | class RobotServerError(Exception): 8 | def __init__(self, service): 9 | if service == "set_state": 10 | self.message = "set_state service call failed." 11 | elif service == "send_action": 12 | self.message = "send_action service call failed." 13 | elif service == "get_state": 14 | self.message = "get_state service call failed." 15 | else: 16 | self.message = "service call failed" 17 | def __str__(self): 18 | return self.message 19 | 20 | class InvalidActionError(Exception): 21 | def __init__(self, message = "The action is not contained in the action space."): 22 | self.message = message 23 | def __str__(self): 24 | return self.message -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 JOANNEUM RESEARCH Forschungsgesellschaft mbH 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | import sys, os 3 | 4 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "robo_gym")) 5 | from version import VERSION 6 | 7 | setuptools.setup( 8 | name="robo-gym", 9 | version=VERSION, 10 | description="robo-gym: an open source toolkit for Distributed Deep Reinforcement Learning on real and simulated robots.", 11 | url="https://github.com/jr-robotics/robo-gym", 12 | author="Matteo Lucchi, Friedemann Zindler, Bernhard Reiterer, Mara Vukadinovic, Christian Bauer, Thomas Gallien, Benjamim Breiling", 13 | author_email="bernhard.reiterer@joanneum.at", 14 | packages=setuptools.find_packages(), 15 | include_package_data=True, 16 | classifiers=[ 17 | "Programming Language :: Python :: 3", 18 | "Programming Language :: Python :: 3.8", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | ], 23 | install_requires=[ 24 | "gymnasium", 25 | "robo-gym-server-modules", 26 | "numpy", 27 | "scipy", 28 | "pyyaml", 29 | ], 30 | python_requires=">=3.8", 31 | scripts=["bin/run-rs-side-standard"], 32 | ) 33 | -------------------------------------------------------------------------------- /robo_gym/utils/managed_rs_client.py: -------------------------------------------------------------------------------- 1 | import robo_gym_server_modules.robot_server.client as rs_client 2 | import robo_gym_server_modules.server_manager.client as sm_client 3 | 4 | 5 | class ManagedClient(rs_client.Client): 6 | 7 | def __init__( 8 | self, 9 | *, 10 | server_manager_host: str, 11 | server_manager_port: int = 50100, 12 | launch_cmd: str, 13 | gui: bool = False 14 | ): 15 | if not server_manager_host: 16 | raise Exception("ManagedClient: missing server manager host - cannot work!") 17 | print( 18 | "Command for new simulation robot server: " 19 | + launch_cmd 20 | + " gui:=" 21 | + ("true" if gui else "false") 22 | ) 23 | self.sm_client = sm_client.Client( 24 | server_manager_host, server_manager_port, server_manager_port + 1 25 | ) 26 | self.cmd = launch_cmd 27 | self.gui = gui 28 | self.rs_address = self.sm_client.start_new_server(cmd=self.cmd, gui=self.gui) 29 | super().__init__(self.rs_address) 30 | 31 | def kill(self): 32 | self.sm_client.kill_server(self.rs_address) 33 | # TODO might want to try async killing, too 34 | -------------------------------------------------------------------------------- /robo_gym/utils/panda_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import numpy as np 4 | from numpy.typing import NDArray 5 | import yaml 6 | import os 7 | import copy 8 | 9 | from robo_gym.utils.manipulator_model import * 10 | 11 | 12 | class Panda(ManipulatorModel): 13 | """Franka Emika Panda model class. 14 | 15 | Attributes: 16 | max_joint_positions (np.array): Maximum joint position values (rad)`. 17 | min_joint_positions (np.array): Minimum joint position values (rad)`. 18 | max_joint_velocities (np.array): Maximum joint velocity values (rad/s)`. 19 | min_joint_velocities (np.array): Minimum joint velocity values (rad/s)`. 20 | joint_names (list): Joint names (Standard Indexing)`. 21 | 22 | Joint Names (ROS Indexing): 23 | [panda_joint1, panda_joint2, panda_joint3, panda_joint4, panda_joint5, 24 | panda_joint6, panda_joint7] 25 | 26 | NOTE: Where not specified, Standard Indexing is used. 27 | """ 28 | 29 | def __init__(self, model_key: str): 30 | 31 | assert model_key == "panda" 32 | 33 | self.model = model_key 34 | 35 | file_name = model_key + ".yaml" 36 | file_path = os.path.join( 37 | os.path.dirname(__file__), "panda_parameters", file_name 38 | ) 39 | 40 | super().__init__(model_key, file_path) 41 | -------------------------------------------------------------------------------- /tests/robo-gym/utils/test_ur.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from robo_gym.utils.ur_utils import UR 3 | import numpy as np 4 | 5 | 6 | class TestUrUtils: 7 | # TODO more assertions on test results 8 | 9 | def test_random_workspace_pose(self): 10 | ur = UR(model_key="ur10") 11 | 12 | pose = ur.get_random_workspace_pose_rpy() 13 | assert 6 == len(pose) 14 | 15 | pose = ur.get_random_workspace_pose_rpy(np_random=np.random.default_rng()) 16 | assert 6 == len(pose) 17 | 18 | def test_random_offset_joint_positions(self): 19 | ur = UR(model_key="ur10") 20 | joint_positions = np.array( 21 | [ 22 | 0.0, 23 | -2.5, 24 | 1.5, 25 | 0.0, 26 | -1.4, 27 | 0.0, 28 | ], 29 | dtype=np.float32, 30 | ) 31 | random_offset = np.array( 32 | [ 33 | 1.0, 34 | 1.0, 35 | 1.0, 36 | 1.0, 37 | 1.0, 38 | 1.0, 39 | ], 40 | dtype=np.float32, 41 | ) 42 | 43 | positions = ur.get_random_offset_joint_positions(joint_positions, random_offset) 44 | assert 6 == len(positions) 45 | 46 | positions = ur.get_random_offset_joint_positions( 47 | joint_positions, random_offset, np_random=np.random.default_rng() 48 | ) 49 | assert 6 == len(positions) 50 | -------------------------------------------------------------------------------- /docs/examples/stable-baselines/td3_load.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | import robo_gym 3 | from stable_baselines3 import TD3 4 | from stable_baselines3.td3.policies import MlpPolicy 5 | 6 | # specify the ip of the machine running the robot-server 7 | target_machine_ip = "127.0.0.1" # or other xxx.xxx.xxx.xxx 8 | 9 | # initialize environment 10 | env = gym.make("NoObstacleNavigationMir100Sim-v0", ip=target_machine_ip, gui=True) 11 | 12 | model = TD3.load("td3_mir_basic") 13 | 14 | timesteps = 1000 15 | 16 | passed_timesteps = 0 17 | passed_episodes = 0 18 | 19 | while True: 20 | print("Starting episode {} - reset... ".format((passed_episodes + 1))) 21 | observation, info = env.reset() 22 | done = False 23 | episode_timesteps = 0 24 | print("Reset finished, entering execution.".format((passed_episodes + 1))) 25 | while not done: 26 | action, _ = model.predict(observation=observation) 27 | observation, reward, terminated, truncated, info = env.step(action) 28 | done = terminated or truncated 29 | passed_timesteps += 1 30 | episode_timesteps += 1 31 | if passed_timesteps >= timesteps: 32 | break 33 | if done: 34 | passed_episodes += 1 35 | print( 36 | "Episode {} terminated after {} timesteps with reward {}".format( 37 | passed_episodes, episode_timesteps, reward 38 | ) 39 | ) 40 | 41 | if passed_timesteps >= timesteps: 42 | break 43 | 44 | env.close() 45 | -------------------------------------------------------------------------------- /robo_gym/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # Example 2 | from robo_gym.envs.example.example_env import ExampleEnvSim, ExampleEnvRob 3 | 4 | # MiR100 5 | from robo_gym.envs.mir100.mir100 import ( 6 | NoObstacleNavigationMir100Sim, 7 | NoObstacleNavigationMir100Rob, 8 | ) 9 | from robo_gym.envs.mir100.mir100 import ( 10 | ObstacleAvoidanceMir100Sim, 11 | ObstacleAvoidanceMir100Rob, 12 | ) 13 | 14 | # UR 15 | from robo_gym.envs.ur.ur_base_env import EmptyEnvironmentURSim, EmptyEnvironmentURRob 16 | from robo_gym.envs.ur.ur_ee_positioning import ( 17 | EndEffectorPositioningURSim, 18 | EndEffectorPositioningURRob, 19 | ) 20 | from robo_gym.envs.ur.ur_avoidance_basic import BasicAvoidanceURSim, BasicAvoidanceURRob 21 | from robo_gym.envs.ur.ur_avoidance_raad import ( 22 | AvoidanceRaad2022URSim, 23 | AvoidanceRaad2022URRob, 24 | ) 25 | from robo_gym.envs.ur.ur_avoidance_raad import ( 26 | AvoidanceRaad2022TestURSim, 27 | AvoidanceRaad2022TestURRob, 28 | ) 29 | 30 | from robo_gym.envs.ur.ur_base import EmptyEnvironment2URSim, EmptyEnvironment2URRob 31 | from robo_gym.envs.ur.ur_ee_pos import ( 32 | EndEffectorPositioning2URSim, 33 | EndEffectorPositioning2URRob, 34 | ) 35 | from robo_gym.envs.ur.ur_isaac_reach import IsaacReachURSim, IsaacReachURRob 36 | 37 | from robo_gym.envs.panda.panda_base import ( 38 | EmptyEnvironmentPandaSim, 39 | EmptyEnvironmentPandaRob, 40 | ) 41 | from robo_gym.envs.panda.panda_ee_pos import ( 42 | EndEffectorPositioningPandaSim, 43 | EndEffectorPositioningPandaRob, 44 | ) 45 | from robo_gym.envs.panda.panda_isaac_reach import IsaacReachPandaSim, IsaacReachPandaRob 46 | -------------------------------------------------------------------------------- /docs/examples/rllib/policy_from_cp_ur.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | from ray.rllib.policy.policy import Policy 3 | import robo_gym 4 | import ray 5 | import sys 6 | import numpy as np 7 | import torch 8 | import torch.nn 9 | from ray.rllib.core.rl_module import RLModule 10 | 11 | if __name__ == "__main__": 12 | 13 | policy_cp_path = sys.argv[1] 14 | 15 | policy = Policy.from_checkpoint(policy_cp_path) 16 | 17 | env = gym.make("EndEffectorPositioningURSim-v0", ip="127.0.0.1", gui=True, ur_model="ur5", rs_state_to_info=False) 18 | obs, _ = env.reset() 19 | 20 | num_episodes = 0 21 | episode_reward = 0.0 22 | 23 | count_overall = 0 24 | count_clipped = 0 25 | 26 | while num_episodes < 5: 27 | torch_obs_batch = torch.from_numpy(np.array([obs])) 28 | a = policy.compute_single_action(torch_obs_batch) 29 | a_np = np.array(a[0], dtype=np.float32) 30 | 31 | clipped_a_np = a_np.clip(-1, 1) 32 | 33 | if not np.array_equal(clipped_a_np, a_np): 34 | count_clipped += 1 35 | 36 | count_overall += 1 37 | 38 | # Send the computed action `a` to the env. 39 | obs, reward, done, truncated, _ = env.step(clipped_a_np) 40 | episode_reward += reward 41 | # Is the episode `done`? -> Reset. 42 | if done: 43 | print(f"Episode done: Total reward = {episode_reward}") 44 | obs, _ = env.reset() 45 | num_episodes += 1 46 | episode_reward = 0.0 47 | 48 | env.close() 49 | 50 | ray.shutdown() 51 | print(f"clipped {count_clipped} of {count_overall} actions") -------------------------------------------------------------------------------- /docs/creating_environments.md: -------------------------------------------------------------------------------- 1 | # Creating Environments 2 | 3 | One of the benefits of the modular architecture of robo-gym is that it is fairly simple to add new environments to it. 4 | 5 | All the environments are stored under the [envs](robo_gym/envs) directory, in here we organized the different environments in different folders for each robot. 6 | 7 | See also newer documentation in [Modular Environments](modular_environments.md) to learn about the benefits of basing environments on `RoboGymEnv` rather than directly on `gym.env`. 8 | 9 | All the environments are based on the `gym.env` class, you can base your environment class directly on `gym.env` or start from one of the classes already included in robo-gym and expand them. 10 | 11 | Once you created the class for your environment it has to be added to the registration in order to make it available. Following what it has already been done for the existing environments, add the import of your classes to [robo_gym/envs/\_\_init\_\_.py](robo_gym/envs/__init__.py) and the registration of the environment to [robo_gym/\_\_init\_\_.py](robo_gym/__init__.py). 12 | 13 | ## Example 14 | 15 | We provide a base Environment together with its Robot Server with minimum functionality as a base for you to start with the implementation of your own Robot Server. 16 | 17 | [Robot Server Example](https://github.com/jr-robotics/robo-gym-robot-servers/tree/master/example_robot_server) 18 | 19 | [Environment Example](../robo_gym/envs/example/example_env.py) 20 | 21 | ## Integrating new robot and sensors 22 | 23 | Integrating new robots and sensors is possible but requires knowledge of ROS and Gazebo or alternatives. 24 | -------------------------------------------------------------------------------- /docs/examples/rllib/policy_from_cp_mir.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | from ray.rllib.policy.policy import Policy 3 | import robo_gym 4 | import ray 5 | import sys 6 | import numpy as np 7 | from robo_gym.envs.simulation_wrapper import Simulation 8 | import torch 9 | import torch.nn 10 | from ray.rllib.core.rl_module import RLModule 11 | 12 | if __name__ == "__main__": 13 | 14 | policy_cp_path = sys.argv[1] 15 | 16 | policy = Policy.from_checkpoint(policy_cp_path) 17 | 18 | env = gym.make("NoObstacleNavigationMir100Sim-v0", ip="127.0.0.1", gui=True) 19 | obs, _ = env.reset() 20 | 21 | num_episodes = 0 22 | episode_reward = 0.0 23 | 24 | count_overall = 0 25 | count_clipped = 0 26 | 27 | while num_episodes < 5: 28 | torch_obs_batch = torch.from_numpy(np.array([obs])) 29 | a = policy.compute_single_action(torch_obs_batch) 30 | a_np = np.array(a[0], dtype=np.float32) 31 | 32 | clipped_a_np = a_np.clip(-1, 1) 33 | 34 | if not np.array_equal(clipped_a_np, a_np): 35 | count_clipped += 1 36 | 37 | count_overall += 1 38 | 39 | # Send the computed action `a` to the env. 40 | obs, reward, done, truncated, _ = env.step(clipped_a_np) 41 | episode_reward += reward 42 | # Is the episode `done`? -> Reset. 43 | if done: 44 | print(f"Episode done: Total reward = {episode_reward}") 45 | obs, _ = env.reset() 46 | env.render() 47 | num_episodes += 1 48 | episode_reward = 0.0 49 | 50 | env.close() 51 | 52 | ray.shutdown() 53 | print(f"clipped {count_clipped} of {count_overall} actions") -------------------------------------------------------------------------------- /docs/managing_multiple_python_vers.md: -------------------------------------------------------------------------------- 1 | # Managing Multiple Python Versions 2 | 3 | 4 | To manage multiple Python versions on the same machine we suggest to use [pyenv](https://github.com/pyenv/pyenv). 5 | 6 | [Here](https://realpython.com/intro-to-pyenv/) there is a great article on pyenv. 7 | 8 | We recommend using [pyenv-installer](https://github.com/pyenv/pyenv-installer) to install pyenv on your machine. 9 | 10 | Once pyenv is installed you can check the python versions available with: 11 | 12 | ``` 13 | pyenv versions 14 | ``` 15 | 16 | If this is the first time that you use pyenv you the previous command should return: 17 | 18 | ``` 19 | * system (set by /home/username/.pyenv/version) 20 | ``` 21 | Which shows that the only python installation available is the system default python. 22 | 23 | The Standard Installation of the Robot Server Side has to be performed on 24 | the system default python which should be python 2.7. 25 | 26 | The Environment Side installation requires Python >= 3.5, let's first install a suitable python version (e.g. 3.6.10) with: 27 | 28 | ``` 29 | pyenv install 3.6.10 30 | ``` 31 | 32 | Now we can create a virtual environment in which to install and run robo-gym: 33 | 34 | ``` 35 | pyenv virtualenv 3.6.10 robo-gym 36 | ``` 37 | 38 | Where 3.6.10 is the desired python version and robo-gym is the name of the virtual 39 | environment. 40 | 41 | To activate the virtual environment use: 42 | 43 | ``` 44 | pyenv activate robo-gym 45 | ``` 46 | 47 | You should see in your shell something like: 48 | 49 | ``` 50 | (robo-gym) user@machine:$ 51 | ``` 52 | 53 | This means that you are now within the virtual environment. 54 | This is the virtual environment in which you should perform the Environment Side 55 | installation. When you run `pip install robo-gym` the robo-gym package will 56 | be installed in the virtual environment. 57 | -------------------------------------------------------------------------------- /docs/examples/stable-baselines/basic_example.md: -------------------------------------------------------------------------------- 1 | # Getting Started Using Stable-Baselines3 2 | 3 | ## Prerequisites 4 | ### Installation of robo-gym 5 | Follow the installation instructions: https://github.com/jr-robotics/robo-gym#installation 6 | 7 | ### Installation of stable-baselines3: 8 | Follow the installation instructions: https://github.com/DLR-RM/stable-baselines3 9 | 10 | ## 1. Start the robot-server-manager 11 | Start the robot-server-manager on the machine where the simulation is supposed to be run by entering the start command in the bash: 12 | 13 | ```bash 14 | start-server-manager 15 | ``` 16 | 17 | 18 | ## 2. Example of simulation training using the MiR100 base environment 19 | Here is a quick example running TD3 in the MiR100 base environment 20 | 21 | (_Python >=3.8_ / _robo-gym_ virtual environment) 22 | ```python 23 | import gymnasium as gym 24 | import robo_gym 25 | from stable_baselines3 import TD3 26 | from stable_baselines3.td3.policies import MlpPolicy 27 | 28 | # specify the ip of the machine running the robot-server 29 | target_machine_ip = '127.0.0.1' 30 | 31 | # initialize environment (to render the environment set gui=True) 32 | env = gym.make('NoObstacleNavigationMir100Sim-v0', ip=target_machine_ip, gui=False) 33 | 34 | # follow the instructions provided by stable-baselines 35 | model = TD3(MlpPolicy, env, verbose=1) 36 | model.learn(total_timesteps=15000) 37 | 38 | # saving and loading a model 39 | model.save('td3_mir_basic') 40 | del model 41 | model = TD3.load("td3_mir_basic") 42 | 43 | # reinitialize simulation environment with rendering enabled 44 | env.kill_sim() 45 | env = gym.make('NoObstacleNavigationMir100Sim-v0', ip=target_machine_ip, gui=True) 46 | 47 | # run the environment 10 times using the trained model 48 | num_episodes = 10 49 | for episode in range(num_episodes): 50 | obs, _ = env.reset() 51 | done = False 52 | while not done: 53 | action, _states = model.predict(obs) 54 | obs, rewards, terminated, truncated, info = env.step(action) 55 | done = terminated or truncated 56 | ``` -------------------------------------------------------------------------------- /robo_gym/utils/mir100_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import numpy as np 4 | from robo_gym.utils import utils 5 | 6 | class Mir100(): 7 | """Mobile Industrial Robots MiR100 utilities. 8 | 9 | Attributes: 10 | max_lin_vel (float): Maximum robot's linear velocity (m/s). 11 | min_lin_vel (float): Minimum robot's linear velocity (m/s). 12 | max_ang_vel (float): Maximum robot's angular velocity (rad/s). 13 | min_ang_vel (float): Minimum robot's linear velocity (rad/s). 14 | 15 | """ 16 | def __init__(self): 17 | 18 | self.max_lin_vel = 1.0 19 | self.min_lin_vel = -1.0 20 | 21 | self.max_ang_vel = 1.5 22 | self.min_ang_vel = -1.5 23 | 24 | def get_max_lin_vel(self): 25 | 26 | return self.max_lin_vel 27 | 28 | def get_min_lin_vel(self): 29 | 30 | return self.min_lin_vel 31 | 32 | def get_max_ang_vel(self): 33 | 34 | return self.max_ang_vel 35 | 36 | def get_min_ang_vel(self): 37 | 38 | return self.min_ang_vel 39 | 40 | def get_corners_positions(self,x,y,yaw): 41 | """Get robot's corners coordinates given the coordinates of its center. 42 | 43 | Args: 44 | x (float): x coordinate of robot's geometric center. 45 | y (float): y coordinate of robot's geometric center. 46 | yaw (float): yaw angle of robot's geometric center. 47 | 48 | The coordinates are given with respect to the map origin and cartesian system. 49 | 50 | Returns: 51 | list[list]: x and y coordinates of the 4 robot's corners. 52 | 53 | """ 54 | 55 | robot_x_dimension = 0.9 56 | robot_y_dimension = 0.58 57 | dx = robot_x_dimension/2 58 | dy = robot_y_dimension/2 59 | 60 | delta_corners = [[dx,-dy],[dx,dy],[-dx,dy],[-dx,-dy]] 61 | corners = [] 62 | 63 | for corner_xy in delta_corners: 64 | # Rotate point around origin 65 | r_xy = utils.rotate_point(corner_xy[0],corner_xy[1],yaw) 66 | # Translate back from origin to corner 67 | corners.append([sum(x) for x in zip(r_xy,[x,y])]) 68 | 69 | return corners 70 | -------------------------------------------------------------------------------- /robo_gym/envs/ur/ur_ee_pos.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from robo_gym.envs.base.robogym_env import * 4 | from robo_gym.envs.manipulator.ee_pos_base import ManipulatorEePosEnv 5 | from robo_gym.envs.ur.ur_base import URBaseEnv2 6 | 7 | 8 | class EndEffectorPositioning2UR(ManipulatorEePosEnv): 9 | def __init__(self, **kwargs): 10 | # not too nice - repeated in super init 11 | self._config = kwargs 12 | 13 | obs_nodes: list[ObservationNode] | None = kwargs.get( 14 | RoboGymEnv.KW_OBSERVATION_NODES 15 | ) 16 | if obs_nodes is None: 17 | # having this hardcoded index is suboptimal; 1 should be correct since 0 will be the manipulator observation node 18 | obs_nodes = [LastActionObservationNode(**self.get_obs_node_setup_kwargs(1))] 19 | kwargs[RoboGymEnv.KW_OBSERVATION_NODES] = obs_nodes 20 | 21 | URBaseEnv2.set_robot_defaults(kwargs) 22 | kwargs.setdefault(RoboGymEnv.KW_ACTION_RATE, 10.0) 23 | kwargs.setdefault( 24 | ManipulatorEePosEnv.KW_RANDOM_JOINT_OFFSET, [1.5, 0.25, 0.5, 1.0, 0.4, 3.14] 25 | ) 26 | 27 | super().__init__(**kwargs) 28 | 29 | def get_launch_cmd(self) -> str: 30 | # TODO make string composition more dynamic 31 | return f"roslaunch ur_robot_server ur_robot_server.launch \ 32 | rviz_gui:={self._config.get(self.KW_RVIZ_GUI_FLAG, True)} \ 33 | gazebo_gui:={self._config.get(self.KW_GAZEBO_GUI_FLAG, True)} \ 34 | world_name:=tabletop_sphere50_no_collision.world \ 35 | reference_frame:=base_link \ 36 | max_velocity_scale_factor:={self._config.get(self.KW_MAX_VELOCITY_SCALE_FACTOR, .1)} \ 37 | action_cycle_rate:={self.action_rate} \ 38 | objects_controller:=true \ 39 | rs_mode:=1object \ 40 | n_objects:=1.0 \ 41 | object_0_model_name:=sphere50_no_collision \ 42 | object_0_frame:=target \ 43 | ur_model:={self.ur_model_key}" 44 | 45 | @property 46 | def ur_model_key(self) -> str: 47 | return self._config.get(URBaseEnv2.KW_UR_MODEL_KEY) 48 | 49 | 50 | class EndEffectorPositioning2URSim(EndEffectorPositioning2UR): 51 | 52 | def __init__(self, **kwargs): 53 | kwargs[self.KW_REAL_ROBOT] = False 54 | super().__init__(**kwargs) 55 | 56 | 57 | class EndEffectorPositioning2URRob(EndEffectorPositioning2UR): 58 | 59 | def __init__(self, **kwargs): 60 | kwargs[self.KW_REAL_ROBOT] = True 61 | super().__init__(**kwargs) 62 | -------------------------------------------------------------------------------- /robo_gym/envs/panda/panda_base.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from robo_gym.envs.base.robogym_env import * 4 | from robo_gym.envs.manipulator.manipulator_base import ( 5 | ManipulatorBaseEnv, 6 | ManipulatorActionNode, 7 | ) 8 | from robo_gym.utils.panda_utils import Panda 9 | 10 | 11 | class PandaBaseEnv(ManipulatorBaseEnv): 12 | 13 | KW_PANDA_MODEL_KEY = "panda_model" 14 | 15 | def __init__(self, **kwargs): 16 | 17 | PandaBaseEnv.set_robot_defaults(kwargs) 18 | 19 | super().__init__(**kwargs) 20 | 21 | @staticmethod 22 | def set_robot_defaults(kwargs: dict[str, Any]): 23 | 24 | # prepare robot model set it in the kwargs 25 | robot_model: Panda | None = kwargs.get(RoboGymEnv.KW_ROBOT_MODEL_OBJECT) 26 | if robot_model is None: 27 | robot_model = Panda( 28 | model_key=kwargs.get(PandaBaseEnv.KW_PANDA_MODEL_KEY, "panda") 29 | ) 30 | kwargs[RoboGymEnv.KW_ROBOT_MODEL_OBJECT] = robot_model 31 | 32 | # default action rate 33 | kwargs.setdefault(RoboGymEnv.KW_ACTION_RATE, 30.0) 34 | 35 | # default max episode steps 36 | kwargs.setdefault(RewardNode.KW_MAX_EPISODE_STEPS, 300) 37 | 38 | # default fixed joints: last joint 39 | prefix_fix = ManipulatorActionNode.KW_PREFIX_FIX_JOINT 40 | last_joint_name = robot_model.joint_names[-1] 41 | kw_fix_last_joint = prefix_fix + last_joint_name 42 | kwargs.setdefault(kw_fix_last_joint, True) 43 | 44 | # default joint positions 45 | value = [-0.018, -0.76, 0.02, -2.342, 0.03, 1.541, 0.753] 46 | 47 | kwargs.setdefault(ManipulatorBaseEnv.KW_JOINT_POSITIONS, value) 48 | 49 | def get_launch_cmd(self) -> str: 50 | # TODO make string composition more dynamic 51 | return f"roslaunch panda_robot_server panda_robot_server.launch \ 52 | rviz_gui:={self._config.get(self.KW_RVIZ_GUI_FLAG, True)} \ 53 | gazebo_gui:={self._config.get(self.KW_GAZEBO_GUI_FLAG, True)} \ 54 | world_name:=empty_no_gravity.world \ 55 | reference_frame:=world \ 56 | max_velocity_scale_factor:={self._config.get(self.KW_MAX_VELOCITY_SCALE_FACTOR, .2)} \ 57 | action_cycle_rate:={self.action_rate} \ 58 | rs_mode:=only_robot \ 59 | action_mode:={self.action_mode}" 60 | 61 | 62 | class EmptyEnvironmentPandaSim(PandaBaseEnv): 63 | 64 | def __init__(self, **kwargs): 65 | kwargs[self.KW_REAL_ROBOT] = False 66 | super().__init__(**kwargs) 67 | 68 | 69 | class EmptyEnvironmentPandaRob(PandaBaseEnv): 70 | 71 | def __init__(self, **kwargs): 72 | kwargs[self.KW_REAL_ROBOT] = True 73 | super().__init__(**kwargs) 74 | -------------------------------------------------------------------------------- /robo_gym/utils/ur_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import numpy as np 4 | from numpy.typing import NDArray 5 | import yaml 6 | import os 7 | import copy 8 | 9 | from robo_gym.utils.manipulator_model import * 10 | 11 | 12 | class UR(ManipulatorModel): 13 | """Universal Robots model class. 14 | 15 | Attributes: 16 | max_joint_positions (np.array): Maximum joint position values (rad)`. 17 | min_joint_positions (np.array): Minimum joint position values (rad)`. 18 | max_joint_velocities (np.array): Maximum joint velocity values (rad/s)`. 19 | min_joint_velocities (np.array): Minimum joint velocity values (rad/s)`. 20 | joint_names (list): Joint names (Standard Indexing)`. 21 | 22 | Joint Names (ROS Indexing): 23 | [elbow_joint, shoulder_lift_joint, shoulder_pan_joint, wrist_1_joint, wrist_2_joint, 24 | wrist_3_joint] 25 | 26 | NOTE: Where not specified, Standard Indexing is used. 27 | """ 28 | 29 | def __init__(self, model_key: str): 30 | 31 | assert model_key in ["ur3", "ur3e", "ur5", "ur5e", "ur10", "ur10e", "ur16e"] 32 | 33 | self.model = model_key 34 | 35 | file_name = model_key + ".yaml" 36 | file_path = os.path.join(os.path.dirname(__file__), "ur_parameters", file_name) 37 | 38 | super().__init__(model_key, file_path) 39 | 40 | def _swap_base_and_elbow(self, thetas: NDArray) -> NDArray: 41 | return np.array( 42 | [thetas[2], thetas[1], thetas[0], thetas[3], thetas[4], thetas[5]] 43 | ) 44 | 45 | def reorder_joints_from_rs(self, ros_thetas): 46 | """Transform joint angles list from ROS indexing to standard indexing. 47 | 48 | Rearrange a list containing the joints values from the joint indexes used 49 | in the ROS join_states messages to the standard joint indexing going from 50 | base to end effector. 51 | 52 | Args: 53 | ros_thetas (list): Joint angles with ROS indexing. 54 | 55 | Returns: 56 | np.array: Joint angles with standard indexing. 57 | 58 | """ 59 | 60 | return self._swap_base_and_elbow(ros_thetas) 61 | 62 | def reorder_joints_for_rs(self, thetas): 63 | """Transform joint angles list from standard indexing to ROS indexing. 64 | 65 | Rearrange a list containing the joints values from the standard joint indexing 66 | going from base to end effector to the indexing used in the ROS 67 | join_states messages. 68 | 69 | Args: 70 | thetas (list): Joint angles with standard indexing. 71 | 72 | Returns: 73 | np.array: Joint angles with ROS indexing. 74 | 75 | """ 76 | 77 | return self._swap_base_and_elbow(thetas) 78 | -------------------------------------------------------------------------------- /robo_gym/utils/panda_parameters/panda.yaml: -------------------------------------------------------------------------------- 1 | # Sources: 2 | # 3 | # - Franka Emika Panda Documentation 4 | # https://wiki.happylab.at/images/5/5e/Panda_Robot_Manual_English.pdf 5 | joint_names: 6 | - panda_joint1 7 | - panda_joint2 8 | - panda_joint3 9 | - panda_joint4 10 | - panda_joint5 11 | - panda_joint6 12 | - panda_joint7 13 | remote_joint_names: 14 | - joint1 15 | - joint2 16 | - joint3 17 | - joint4 18 | - joint5 19 | - joint6 20 | - joint7 21 | workspace_area: 22 | # Values for the recommended workspace area 23 | r: 0.755 24 | min_r: 0.200 25 | 26 | # From https://frankaemika.github.io/docs/control_parameters.html 27 | # Joints limits 28 | joint_limits: 29 | panda_joint1: 30 | has_acceleration_limits: true 31 | has_effort_limits: true 32 | has_position_limits: true 33 | has_velocity_limits: true 34 | max_acceleration: 15.0 35 | max_effort: 87.0 36 | max_position: 2.8973 37 | max_velocity: 2.1750 38 | min_position: -2.8973 39 | panda_joint2: 40 | has_acceleration_limits: true 41 | has_effort_limits: true 42 | has_position_limits: true 43 | has_velocity_limits: true 44 | max_acceleration: 7.5 45 | max_effort: 87.0 46 | max_position: 1.7628 47 | max_velocity: 2.1750 48 | min_position: -1.7628 49 | panda_joint3: 50 | has_acceleration_limits: true 51 | has_effort_limits: true 52 | has_position_limits: true 53 | has_velocity_limits: true 54 | max_acceleration: 10.0 55 | max_effort: 87.0 56 | max_position: 2.8973 57 | max_velocity: 2.1750 58 | min_position: -2.8973 59 | panda_joint4: 60 | has_acceleration_limits: true 61 | has_effort_limits: true 62 | has_position_limits: true 63 | has_velocity_limits: true 64 | max_acceleration: 12.5 65 | max_effort: 87.0 66 | max_position: -0.0698 67 | max_velocity: 2.1750 68 | min_position: -3.0718 69 | panda_joint5: 70 | has_acceleration_limits: true 71 | has_effort_limits: true 72 | has_position_limits: true 73 | has_velocity_limits: true 74 | max_acceleration: 15.0 75 | max_effort: 15.0 76 | max_position: 2.8973 77 | max_velocity: 2.6100 78 | min_position: -2.8973 79 | panda_joint6: 80 | has_acceleration_limits: true 81 | has_effort_limits: true 82 | has_position_limits: true 83 | has_velocity_limits: true 84 | max_acceleration: 20.0 85 | max_effort: 12.0 86 | max_position: 3.7525 87 | max_velocity: 2.6100 88 | min_position: -0.0175 89 | panda_joint7: 90 | has_acceleration_limits: true 91 | has_effort_limits: true 92 | has_position_limits: true 93 | has_velocity_limits: true 94 | max_acceleration: 20.0 95 | max_effort: 12.0 96 | max_position: 2.8973 97 | max_velocity: 2.6100 98 | min_position: -2.8973 -------------------------------------------------------------------------------- /robo_gym/envs/panda/panda_ee_pos.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from robo_gym.envs.base.robogym_env import * 4 | from robo_gym.envs.manipulator.ee_pos_base import ManipulatorEePosEnv 5 | from robo_gym.envs.panda.panda_base import PandaBaseEnv 6 | 7 | 8 | class EndEffectorPositioningPanda(ManipulatorEePosEnv): 9 | def __init__(self, **kwargs): 10 | # not too nice - repeated in super init 11 | self._config = kwargs 12 | 13 | obs_nodes: list[ObservationNode] | None = kwargs.get( 14 | RoboGymEnv.KW_OBSERVATION_NODES 15 | ) 16 | if obs_nodes is None: 17 | # having this hardcoded index is suboptimal; 1 should be correct since 0 will be the manipulator observation node 18 | obs_nodes = [LastActionObservationNode(**self.get_obs_node_setup_kwargs(1))] 19 | kwargs[RoboGymEnv.KW_OBSERVATION_NODES] = obs_nodes 20 | 21 | PandaBaseEnv.set_robot_defaults(kwargs) 22 | 23 | # these are probably not too meaningful since first 6 joints are equal to UR 24 | value = [1.5, 0.25, 0.5, 1.0, 0.4, 3.14, 1.0] 25 | kwargs.setdefault(ManipulatorEePosEnv.KW_RANDOM_JOINT_OFFSET, value) 26 | 27 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_TARGET_VOLUME_BOUNDING_BOX, True) 28 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_POSITION_X_RANGE, [0.35, 0.65]) 29 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_POSITION_Y_RANGE, [-0.2, 0.2]) 30 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_POSITION_Z_RANGE, [0.15, 0.5]) 31 | 32 | super().__init__(**kwargs) 33 | 34 | def get_launch_cmd(self) -> str: 35 | # TODO make string composition more dynamic 36 | return f"roslaunch panda_robot_server panda_robot_server.launch \ 37 | rviz_gui:={self._config.get(self.KW_RVIZ_GUI_FLAG, True)} \ 38 | gazebo_gui:={self._config.get(self.KW_GAZEBO_GUI_FLAG, True)} \ 39 | world_name:=tabletop_sphere50_no_collision_no_gravity.world \ 40 | reference_frame:=world \ 41 | max_velocity_scale_factor:={self._config.get(self.KW_MAX_VELOCITY_SCALE_FACTOR, .2)} \ 42 | action_cycle_rate:={self.action_rate} \ 43 | objects_controller:=true \ 44 | rs_mode:=1object \ 45 | action_mode:={self.action_mode}\ 46 | n_objects:=1.0 \ 47 | object_0_model_name:=sphere50_no_collision \ 48 | object_0_frame:=target " 49 | 50 | 51 | class EndEffectorPositioningPandaSim(EndEffectorPositioningPanda): 52 | 53 | def __init__(self, **kwargs): 54 | kwargs[self.KW_REAL_ROBOT] = False 55 | super().__init__(**kwargs) 56 | 57 | 58 | class EndEffectorPositioningPandaRob(EndEffectorPositioningPanda): 59 | 60 | def __init__(self, **kwargs): 61 | kwargs[self.KW_REAL_ROBOT] = True 62 | super().__init__(**kwargs) 63 | -------------------------------------------------------------------------------- /robo_gym/envs/ur/ur_base.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from robo_gym.envs.base.robogym_env import * 4 | from robo_gym.envs.manipulator.manipulator_base import ( 5 | ManipulatorBaseEnv, 6 | ManipulatorActionNode, 7 | ) 8 | from robo_gym.utils.ur_utils import UR 9 | 10 | 11 | class URBaseEnv2(ManipulatorBaseEnv): 12 | 13 | KW_UR_MODEL_KEY = "ur_model" 14 | 15 | def __init__(self, **kwargs): 16 | 17 | URBaseEnv2.set_robot_defaults(kwargs) 18 | 19 | super().__init__(**kwargs) 20 | 21 | @staticmethod 22 | def set_robot_defaults(kwargs: dict[str, Any]): 23 | 24 | # prepare UR model depending on ur_model and set it in the kwargs 25 | robot_model: UR | None = kwargs.get(RoboGymEnv.KW_ROBOT_MODEL_OBJECT) 26 | if robot_model is None: 27 | robot_model = UR(model_key=kwargs.get(URBaseEnv2.KW_UR_MODEL_KEY)) 28 | kwargs[RoboGymEnv.KW_ROBOT_MODEL_OBJECT] = robot_model 29 | 30 | # default action rate 31 | kwargs.setdefault(RoboGymEnv.KW_ACTION_RATE, 20.0) 32 | 33 | # default max episode steps 34 | kwargs.setdefault(RewardNode.KW_MAX_EPISODE_STEPS, 300) 35 | 36 | # default fixed joints: last joint 37 | prefix_fix = ManipulatorActionNode.KW_PREFIX_FIX_JOINT 38 | last_joint_name = robot_model.joint_names[-1] 39 | kw_fix_last_joint = prefix_fix + last_joint_name 40 | kwargs.setdefault(kw_fix_last_joint, True) 41 | 42 | # default joint positions 43 | value = [ 44 | 0.0, 45 | -2.5, 46 | 1.5, 47 | 0.0, 48 | -1.4, 49 | 0.0, 50 | ] 51 | kwargs.setdefault(ManipulatorBaseEnv.KW_JOINT_POSITIONS, value) 52 | 53 | def get_launch_cmd(self) -> str: 54 | # TODO make string composition more dynamic 55 | return f"roslaunch ur_robot_server ur_robot_server.launch \ 56 | rviz_gui:={self._config.get(self.KW_RVIZ_GUI_FLAG, True)} \ 57 | gazebo_gui:={self._config.get(self.KW_GAZEBO_GUI_FLAG, True)} \ 58 | world_name:=empty.world \ 59 | reference_frame:=base_link \ 60 | max_velocity_scale_factor:={self._config.get(self.KW_MAX_VELOCITY_SCALE_FACTOR, .2)} \ 61 | action_cycle_rate:={self.action_rate} \ 62 | rs_mode:=only_robot \ 63 | ur_model:={self.ur_model_key}" 64 | 65 | @property 66 | def ur_model_key(self) -> str: 67 | return self._config.get(URBaseEnv2.KW_UR_MODEL_KEY) 68 | 69 | 70 | class EmptyEnvironment2URSim(URBaseEnv2): 71 | 72 | def __init__(self, **kwargs): 73 | kwargs[self.KW_REAL_ROBOT] = False 74 | super().__init__(**kwargs) 75 | 76 | 77 | class EmptyEnvironment2URRob(URBaseEnv2): 78 | 79 | def __init__(self, **kwargs): 80 | kwargs[self.KW_REAL_ROBOT] = True 81 | super().__init__(**kwargs) 82 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | stages: 2 | - build 3 | - deploy 4 | 5 | variables: 6 | PYTHON_VER: "3.8" 7 | ROS_DISTRO: "noetic" 8 | ROBO_GYM_IMAGE: $CI_REGISTRY_IMAGE:$PYTHON_VER-$CI_COMMIT_SHA 9 | ROBOT_SERVERS_IMAGE: "$CI_REGISTRY/robo-gym/robot-servers:$ROS_DISTRO-develop-latest" 10 | 11 | before_script: 12 | # docker login asks for the password to be passed through stdin for security 13 | # we use $CI_JOB_TOKEN here which is a special token provided by GitLab 14 | - echo -n $CI_JOB_TOKEN | docker login -u gitlab-ci-token --password-stdin $CI_REGISTRY 15 | 16 | # The pipeline needs at minimum one job to run 17 | dummy: 18 | stage: build 19 | variables: 20 | GIT_STRATEGY: none 21 | script: 22 | - ":" 23 | only: 24 | - /^docs.*$/ 25 | 26 | .build_robo-gym_image: &build_robo-gym_image_def 27 | image: docker:18.09 28 | services: 29 | - docker:18.09-dind 30 | tags: 31 | - docker-executor 32 | stage: build 33 | retry: 34 | max: 2 35 | script: 36 | # fetch the latest image (not failing if image is not found) 37 | - docker pull $CI_REGISTRY_IMAGE:$PYTHON_VER-latest || true 38 | - > 39 | docker build 40 | --pull 41 | --cache-from $CI_REGISTRY_IMAGE:$PYTHON_VER-latest 42 | --tag $ROBO_GYM_IMAGE 43 | --build-arg PYTHON_VER=$PYTHON_VER 44 | --build-arg CACHEBUST=$CI_COMMIT_SHA 45 | . 46 | - docker push $ROBO_GYM_IMAGE 47 | - docker tag $ROBO_GYM_IMAGE $CI_REGISTRY_IMAGE:$PYTHON_VER-latest 48 | - docker push $CI_REGISTRY_IMAGE:$PYTHON_VER-latest 49 | except: 50 | - /^docs.*$/ 51 | 52 | build_robo-gym_image_py38: 53 | <<: *build_robo-gym_image_def 54 | variables: 55 | PYTHON_VER: "3.8" 56 | only: 57 | - schedules 58 | 59 | build_robo-gym_image_py39: 60 | <<: *build_robo-gym_image_def 61 | variables: 62 | PYTHON_VER: "3.9" 63 | only: 64 | - schedules 65 | 66 | deploy_docker_image: 67 | image: docker:18.09 68 | services: 69 | - docker:18.09-dind 70 | tags: 71 | - docker-executor 72 | stage: deploy 73 | variables: 74 | GIT_STRATEGY: none 75 | script: 76 | - echo -n $CI_JOB_TOKEN | docker login -u gitlab-ci-token --password-stdin $CI_REGISTRY 77 | - docker pull $ROBO_GYM_IMAGE 78 | - docker tag $ROBO_GYM_IMAGE $CI_REGISTRY_IMAGE/$CI_COMMIT_BRANCH:latest 79 | - docker push $CI_REGISTRY_IMAGE/$CI_COMMIT_BRANCH:latest 80 | only: 81 | - master 82 | - develop 83 | except: 84 | - /^docs.*$/ 85 | 86 | deploy_production: 87 | image: python:3.8-slim-buster 88 | before_script: 89 | - pip3 install twine 90 | - python3 setup.py sdist 91 | stage: deploy 92 | variables: 93 | TWINE_USERNAME: $PYPI_USERNAME 94 | TWINE_PASSWORD: $PYPI_PASSWORD 95 | script: 96 | - twine upload --repository-url https://upload.pypi.org/legacy/ dist/* 97 | only: 98 | - /^v.*$/ 99 | except: 100 | refs: 101 | - branches 102 | - triggers 103 | -------------------------------------------------------------------------------- /robo_gym/utils/table.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | 4 | class RowIterator(object): 5 | def __init__(self, table, starting_row_index=-1): 6 | self.table = table 7 | self.current_row_index = starting_row_index 8 | 9 | def __next__(self): 10 | self.current_row_index += 1 11 | if not self.has_current_row(): 12 | raise StopIteration 13 | result = self.get_current_row() 14 | return result 15 | 16 | def get_current_row(self): 17 | return self.table.get_row(self.current_row_index) 18 | 19 | def has_current_row(self): 20 | return self.table and self.current_row_index < self.table.count_rows() 21 | 22 | def has_next_row(self): 23 | return self.table and self.current_row_index + 1 < self.table.count_rows() 24 | 25 | def get_columns(self): 26 | return self.table.get_columns() 27 | 28 | 29 | class Table(object): 30 | 31 | def __init__(self, rows=[], columns=[]): 32 | self.rows = rows # list of dicts 33 | self.columns = columns 34 | if not self.columns: 35 | if self.rows: 36 | self.columns = self.rows[0].keys 37 | 38 | def __iter__(self): 39 | return RowIterator(self) 40 | 41 | def get_row(self, row_index): 42 | return self.rows[row_index] 43 | 44 | def add_row(self, row): 45 | self.rows.append(row) 46 | if not self.columns: 47 | self.columns = row.keys() 48 | 49 | def count_rows(self): 50 | return len(self.rows) 51 | 52 | def get_columns(self): 53 | return self.columns 54 | 55 | def clear_rows(self): 56 | self.rows = [] 57 | 58 | 59 | def write_csv(table, file_path, delimiter=",", value_formatters=None): 60 | with open(file_path, "w", newline="") as file: 61 | writer = csv.DictWriter(file, table.columns, delimiter=delimiter) 62 | writer.writeheader() 63 | for row in table: 64 | processed_row = row 65 | for column in table.columns: 66 | value = row[column] 67 | if value_formatters is None: 68 | value = str(value) 69 | elif callable(value_formatters): 70 | value = value_formatters(value) 71 | elif column in value_formatters and callable(value_formatters[column]): 72 | value = value_formatters[column](value) 73 | processed_row[column] = value 74 | writer.writerow(processed_row) 75 | 76 | 77 | def read_csv(file_path, delimiter=",", value_parsers=None): 78 | with open(file_path, "r") as file: 79 | reader = csv.DictReader(file, delimiter=delimiter) 80 | table = Table([], reader.fieldnames) 81 | for row in reader: 82 | processed_row = row 83 | for column in reader.fieldnames: 84 | value = row[column] 85 | if value_parsers is None: 86 | value = float(value) 87 | elif callable(value_parsers): 88 | value = value_parsers(value) 89 | elif column in value_parsers and callable(value_parsers[column]): 90 | value = value_parsers[column](value) 91 | processed_row[column] = value 92 | table.add_row(processed_row) 93 | return table 94 | -------------------------------------------------------------------------------- /robo_gym/envs/ur/ur_isaac_reach.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from robo_gym.envs.base.robogym_env import * 4 | from robo_gym.envs.manipulator.isaac_reach import * 5 | from robo_gym.envs.ur.ur_base import URBaseEnv2 6 | from robo_gym.utils.ur_utils import UR 7 | 8 | 9 | class IsaacReachUR(IsaacReachEnv): 10 | def __init__(self, **kwargs): 11 | # not too nice - repeated in super init 12 | self._config = kwargs 13 | 14 | kwargs.setdefault(URBaseEnv2.KW_UR_MODEL_KEY, "ur10") 15 | 16 | IsaacReachUR.set_robot_defaults(kwargs) 17 | 18 | obs_nodes: list[ObservationNode] | None = kwargs.get( 19 | RoboGymEnv.KW_OBSERVATION_NODES 20 | ) 21 | if obs_nodes is None: 22 | # having this hardcoded index is suboptimal; 1 should be correct since 0 will be the manipulator observation node 23 | obs_nodes = [LastActionObservationNode(**self.get_obs_node_setup_kwargs(1))] 24 | kwargs[RoboGymEnv.KW_OBSERVATION_NODES] = obs_nodes 25 | 26 | super().__init__(**kwargs) 27 | 28 | @staticmethod 29 | def set_robot_defaults(kwargs: dict[str, Any]): 30 | 31 | # prepare UR model depending on ur_model and set it in the kwargs 32 | ur_model: UR | None = kwargs.get(RoboGymEnv.KW_ROBOT_MODEL_OBJECT) 33 | if ur_model is None: 34 | ur_model = UR(model_key=kwargs.get(URBaseEnv2.KW_UR_MODEL_KEY)) 35 | kwargs[RoboGymEnv.KW_ROBOT_MODEL_OBJECT] = ur_model 36 | 37 | # default action rate 38 | kwargs.setdefault(RoboGymEnv.KW_ACTION_RATE, 30.0) 39 | 40 | # default max episode steps 41 | kwargs.setdefault(RewardNode.KW_MAX_EPISODE_STEPS, 600) 42 | 43 | kwargs.setdefault(ManipulatorEePosEnv.KW_CONTINUE_EXCEPT_COLLISION, True) 44 | # default joint positions 45 | kwargs.setdefault( 46 | ManipulatorBaseEnv.KW_JOINT_POSITIONS, [0, -1.7120, 1.7120, 0, 0, 0] 47 | ) 48 | 49 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_ROTATION_PITCH_RANGE, math.pi / 2) 50 | 51 | def get_launch_cmd(self) -> str: 52 | # TODO make string composition more dynamic 53 | # TODO duplicated from EndEffectorPositioning2UR 54 | return f"roslaunch ur_robot_server ur_robot_server.launch \ 55 | rviz_gui:={self._config.get(self.KW_RVIZ_GUI_FLAG, True)} \ 56 | gazebo_gui:={self._config.get(self.KW_GAZEBO_GUI_FLAG, True)} \ 57 | world_name:=isaactabletop_sphere50_no_collision.world \ 58 | reference_frame:=base_link \ 59 | ee_frame:=tool1 \ 60 | max_velocity_scale_factor:={self._config.get(self.KW_MAX_VELOCITY_SCALE_FACTOR, .2)} \ 61 | action_cycle_rate:={self.action_rate} \ 62 | objects_controller:=true \ 63 | rs_mode:=1object \ 64 | n_objects:=1.0 \ 65 | object_0_model_name:=sphere50_no_collision \ 66 | object_0_frame:=target \ 67 | z:=0.0 \ 68 | ur_model:={self.ur_model_key}" 69 | 70 | @property 71 | def ur_model_key(self) -> str: 72 | return self._config.get(URBaseEnv2.KW_UR_MODEL_KEY) 73 | 74 | 75 | class IsaacReachURSim(IsaacReachUR): 76 | 77 | def __init__(self, **kwargs): 78 | kwargs[self.KW_REAL_ROBOT] = False 79 | super().__init__(**kwargs) 80 | 81 | 82 | class IsaacReachURRob(IsaacReachUR): 83 | 84 | def __init__(self, **kwargs): 85 | kwargs[self.KW_REAL_ROBOT] = True 86 | super().__init__(**kwargs) 87 | -------------------------------------------------------------------------------- /docs/examples/rllib/README.md: -------------------------------------------------------------------------------- 1 | # Getting started using RLlib with robo-gym 2 | 3 | ## Prerequisites 4 | 5 | Tested on Ubuntu 20.04 using Python 3.8. 6 | 7 | ### Installation of robo-gym 8 | 9 | Install an up-to-date version of robo-gym-server-modules. Recommended to check out version 0.3.0.0 or newer and install from source using 10 | 11 | pip install -e . 12 | 13 | Install robo-gym from the copy at hand using 14 | 15 | pip install -e . 16 | 17 | ### Installation of RLlib 18 | 19 | Follow [instructions from RLlib](https://docs.ray.io/en/latest/rllib/index.html). 20 | 21 | It is recommended to also install gputil for use by RLlib: 22 | 23 | pip install gputil 24 | 25 | At runtime, you may encounter this error: 26 | 27 | (...) ray/_private/accelerators/nvidia_gpu.py", line 71, in get_current_node_accelerator_type 28 | device_name = device_name.decode("utf-8") 29 | UnicodeDecodeError: 'utf-8' codec can't decode byte 0xf8 in position 0: invalid start byte 30 | 31 | It is typically resolved by updating the NVidia GPU driver. If occuring in an WSL environment, this typically concerns the driver for Windows. 32 | 33 | ## Using RLlib with robo-gym 34 | 35 | The scripts here are based on examples from RLlib. They are tested against Ray/RLlib v2.1.0. The API may deviate if you use a different version. 36 | 37 | Mind that setting the environment argument `rs_state_to_info` in the initialization of UR environments to a value of `False` is important for being able to save checkpoints. 38 | 39 | ### Scripts for training followed by inference 40 | 41 | * [Training, saving and inference for MiR100](./training_inference_mir.py) 42 | * [Training, saving and inference for UR5](./training_inference_ur.py) 43 | 44 | Each training script does a few inference runs in its last phase, applying the trained model to a new instance of the respective environment. The training scripts support a number of arguments to customize the training and inference runs. Call the script with the argument `-h` or `--help` to see the available arguments. 45 | 46 | In addition, the environment variable `RLLIB_NUM_GPUS` can be set to the desired value (default: 0) to set the number of GPUs that RLlib should use. 47 | 48 | The inference scripts for the policy-only approach may need to be adjusted if a non-default framework (i.e., not Torch) was used for the training. 49 | 50 | Note that the default environment registration from Gymnasium (establishing the link from environment names to the implementing Python classes) is not sufficient for using the environments in RLlib agents. This is why each of the training scripts uses RLlib's own registration mechanism and consequently has to import the respective environment class. 51 | 52 | ### Scripts for inference from policy checkpoint 53 | 54 | The saved checkpoints from the training scripts can also be used in the standalone inference scripts. 55 | 56 | Pass the policy checkpoint path as an argument to the script. If using a policy from an algorithm checkpoint, a typical folder for a policy checkpoint is obtained by appending policies/default_policy to the path. 57 | 58 | * [Inference from a saved policy checkpoint for MiR100](./policy_from_cp_mir.py) 59 | * [Inference from a saved policy checkpoint for UR5](./policy_from_cp_ur.py) 60 | 61 | Inference from saved algorithm checkpoints is possible as an alternative to the shown restoring of policy checkpoints. The inference code can be derived from the inference section of the training scripts. However, configured algorithms in RLlib contain their environment. For robo-gym environments, this implies that a simulation server is started for each worker instance. -------------------------------------------------------------------------------- /robo_gym/envs/panda/panda_isaac_reach.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import numpy as np 4 | 5 | from robo_gym.envs.base.robogym_env import * 6 | from robo_gym.envs.manipulator.isaac_reach import * 7 | from robo_gym.envs.panda.panda_base import PandaBaseEnv 8 | from robo_gym.utils.panda_utils import Panda 9 | 10 | 11 | class IsaacReachPanda(IsaacReachEnv): 12 | def __init__(self, **kwargs): 13 | # not too nice - repeated in super init 14 | self._config = kwargs 15 | 16 | IsaacReachPanda.set_robot_defaults(kwargs) 17 | 18 | obs_nodes: list[ObservationNode] | None = kwargs.get( 19 | RoboGymEnv.KW_OBSERVATION_NODES 20 | ) 21 | if obs_nodes is None: 22 | # having this hardcoded index is suboptimal; 1 should be correct since 0 will be the manipulator observation node 23 | obs_nodes = [LastActionObservationNode(**self.get_obs_node_setup_kwargs(1))] 24 | kwargs[RoboGymEnv.KW_OBSERVATION_NODES] = obs_nodes 25 | 26 | super().__init__(**kwargs) 27 | 28 | @staticmethod 29 | def set_robot_defaults(kwargs: dict[str, Any]): 30 | 31 | # prepare robot model set it in the kwargs 32 | robot_model: Panda | None = kwargs.get(RoboGymEnv.KW_ROBOT_MODEL_OBJECT) 33 | if robot_model is None: 34 | robot_model = Panda( 35 | model_key=kwargs.get(PandaBaseEnv.KW_PANDA_MODEL_KEY, "panda") 36 | ) 37 | kwargs[RoboGymEnv.KW_ROBOT_MODEL_OBJECT] = robot_model 38 | 39 | # default action rate 40 | kwargs.setdefault(RoboGymEnv.KW_ACTION_RATE, 30.0) 41 | 42 | # default max episode steps 43 | kwargs.setdefault(RewardNode.KW_MAX_EPISODE_STEPS, 600) 44 | 45 | kwargs.setdefault(ManipulatorEePosEnv.KW_CONTINUE_EXCEPT_COLLISION, False) 46 | # default joint positions 47 | # panda_finger_joint.*: 0.04 48 | kwargs.setdefault( 49 | ManipulatorBaseEnv.KW_JOINT_POSITIONS, 50 | [0, -0.569, 0, -2.81, 0, 3.037, 0.741], 51 | ) 52 | 53 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_ROTATION_PITCH_RANGE, math.pi) 54 | 55 | # values for finger joints in observation 56 | kwargs.setdefault( 57 | IsaacReachEnv.KW_ISAAC_OBS_EXTRA_STATIC_JOINTS, 58 | # not sure about min (-1) and max (1) 59 | np.array([[0.04, -1, 1], [0.04, -1, 1]], dtype=np.float32), 60 | ) 61 | 62 | def get_launch_cmd(self) -> str: 63 | # TODO make string composition more dynamic 64 | return f"roslaunch panda_robot_server panda_robot_server.launch \ 65 | rviz_gui:={self._config.get(self.KW_RVIZ_GUI_FLAG, True)} \ 66 | gazebo_gui:={self._config.get(self.KW_GAZEBO_GUI_FLAG, True)} \ 67 | world_name:=isaactabletop_sphere50_no_collision.world \ 68 | reference_frame:=world \ 69 | max_velocity_scale_factor:={self._config.get(self.KW_MAX_VELOCITY_SCALE_FACTOR, .2)} \ 70 | action_cycle_rate:={self.action_rate} \ 71 | objects_controller:=true \ 72 | rs_mode:=1object \ 73 | action_mode:={self.action_mode}\ 74 | n_objects:=1.0 \ 75 | object_0_model_name:=sphere50_no_collision \ 76 | object_0_frame:=target \ 77 | z:=0.0 " 78 | 79 | 80 | class IsaacReachPandaSim(IsaacReachPanda): 81 | 82 | def __init__(self, **kwargs): 83 | kwargs[self.KW_REAL_ROBOT] = False 84 | super().__init__(**kwargs) 85 | 86 | 87 | class IsaacReachPandaRob(IsaacReachPanda): 88 | 89 | def __init__(self, **kwargs): 90 | kwargs[self.KW_REAL_ROBOT] = True 91 | super().__init__(**kwargs) 92 | -------------------------------------------------------------------------------- /robo_gym/utils/ur_parameters/ur10.yaml: -------------------------------------------------------------------------------- 1 | # Sources: 2 | # 3 | # - UR10 User Manual, Universal Robots, UR10/CB3 4 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/77490/99203_UR10_User_Manual_en_Global.pdf 5 | joint_names: 6 | - shoulder_pan 7 | - shoulder_lift 8 | - elbow_joint 9 | - wrist_1 10 | - wrist_2 11 | - wrist_3 12 | remote_joint_names: 13 | - base_joint 14 | - shoulder_joint 15 | - elbow_joint 16 | - wrist_1_joint 17 | - wrist_2_joint 18 | - wrist_3_joint 19 | workspace_area: 20 | # Values for the recommended workspace area 21 | r: 1.300 22 | min_r: 0.300 23 | 24 | # From https://github.com/ros-industrial/universal_robot 25 | # Joints limits 26 | # 27 | # Sources: 28 | # 29 | # - UR10 User Manual, Universal Robots, UR10/CB3, Version 3.13 30 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/69442/99203_UR10_User_Manual_en_Global.pdf 31 | # - Support > Articles > UR articles > Max. joint torques 32 | # https://www.universal-robots.com/articles/ur-articles/max-joint-torques 33 | # retrieved: 2020-06-16, last modified: 2020-06-09 34 | joint_limits: 35 | shoulder_pan: 36 | # acceleration limits are not publicly available 37 | has_acceleration_limits: false 38 | has_effort_limits: true 39 | has_position_limits: true 40 | has_velocity_limits: true 41 | max_effort: 330.0 42 | max_position: 6.28318530718 43 | max_velocity: 2.0943951 44 | min_position: -6.28318530718 45 | shoulder_lift: 46 | # acceleration limits are not publicly available 47 | has_acceleration_limits: false 48 | has_effort_limits: true 49 | has_position_limits: true 50 | has_velocity_limits: true 51 | max_effort: 330.0 52 | max_position: 6.28318530718 53 | max_velocity: 2.0943951 54 | min_position: -6.28318530718 55 | elbow_joint: 56 | # acceleration limits are not publicly available 57 | has_acceleration_limits: false 58 | has_effort_limits: true 59 | has_position_limits: true 60 | has_velocity_limits: true 61 | max_effort: 150.0 62 | # we artificially limit this joint to half its actual joint position limit 63 | # to avoid (MoveIt/OMPL) planning problems, as due to the physical 64 | # construction of the robot, it's impossible to rotate the 'elbow_joint' 65 | # over more than approx +- 1 pi (the shoulder lift joint gets in the way). 66 | # 67 | # This leads to planning problems as the search space will be divided into 68 | # two sections, with no connections from one to the other. 69 | # 70 | # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for 71 | # more information. 72 | max_position: 3.14159265359 73 | max_velocity: 3.14159265359 74 | min_position: -3.14159265359 75 | wrist_1: 76 | # acceleration limits are not publicly available 77 | has_acceleration_limits: false 78 | has_effort_limits: true 79 | has_position_limits: true 80 | has_velocity_limits: true 81 | max_effort: 56.0 82 | max_position: 6.28318530718 83 | max_velocity: 3.14159265359 84 | min_position: -6.28318530718 85 | wrist_2: 86 | # acceleration limits are not publicly available 87 | has_acceleration_limits: false 88 | has_effort_limits: true 89 | has_position_limits: true 90 | has_velocity_limits: true 91 | max_effort: 56.0 92 | max_position: 6.28318530718 93 | max_velocity: 3.14159265359 94 | min_position: -6.28318530718 95 | wrist_3: 96 | # acceleration limits are not publicly available 97 | has_acceleration_limits: false 98 | has_effort_limits: true 99 | has_position_limits: true 100 | has_velocity_limits: true 101 | max_effort: 56.0 102 | max_position: 6.28318530718 103 | max_velocity: 3.14159265359 104 | min_position: -6.28318530718 -------------------------------------------------------------------------------- /robo_gym/utils/ur_parameters/ur3.yaml: -------------------------------------------------------------------------------- 1 | # Sources: 2 | # 3 | # - UR3 User Manual, Universal Robots, UR3/CB3 4 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/77348/99241_UR3_User_Manual_en_Global.pdf 5 | joint_names: 6 | - shoulder_pan 7 | - shoulder_lift 8 | - elbow_joint 9 | - wrist_1 10 | - wrist_2 11 | - wrist_3 12 | remote_joint_names: 13 | - base_joint 14 | - shoulder_joint 15 | - elbow_joint 16 | - wrist_1_joint 17 | - wrist_2_joint 18 | - wrist_3_joint 19 | workspace_area: 20 | # Values for the recommended workspace area 21 | r: 0.450 22 | min_r: 0.200 23 | 24 | # From https://github.com/ros-industrial/universal_robot 25 | # Joints limits 26 | # 27 | # Sources: 28 | # 29 | # - UR3 User Manual, Universal Robots, UR3/CB3, Version 3.13 30 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/69300/99241_UR3_User_Manual_en_Global.pdf 31 | # - Support > Articles > UR articles > Max. joint torques 32 | # https://www.universal-robots.com/articles/ur-articles/max-joint-torques 33 | # retrieved: 2020-06-16, last modified: 2020-06-09 34 | joint_limits: 35 | shoulder_pan: 36 | # acceleration limits are not publicly available 37 | has_acceleration_limits: false 38 | has_effort_limits: true 39 | has_position_limits: true 40 | has_velocity_limits: true 41 | max_effort: 56.0 42 | max_position: 6.28318530718 43 | max_velocity: 3.14159265359 44 | min_position: -6.28318530718 45 | shoulder_lift: 46 | # acceleration limits are not publicly available 47 | has_acceleration_limits: false 48 | has_effort_limits: true 49 | has_position_limits: true 50 | has_velocity_limits: true 51 | max_effort: 56.0 52 | max_position: 6.28318530718 53 | max_velocity: 3.14159265359 54 | min_position: -6.28318530718 55 | elbow_joint: 56 | # acceleration limits are not publicly available 57 | has_acceleration_limits: false 58 | has_effort_limits: true 59 | has_position_limits: true 60 | has_velocity_limits: true 61 | max_effort: 28.0 62 | # we artificially limit this joint to half its actual joint position limit 63 | # to avoid (MoveIt/OMPL) planning problems, as due to the physical 64 | # construction of the robot, it's impossible to rotate the 'elbow_joint' 65 | # over more than approx +- 1 pi (the shoulder lift joint gets in the way). 66 | # 67 | # This leads to planning problems as the search space will be divided into 68 | # two sections, with no connections from one to the other. 69 | # 70 | # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for 71 | # more information. 72 | max_position: 3.14159265359 73 | max_velocity: 3.14159265359 74 | min_position: -3.14159265359 75 | wrist_1: 76 | # acceleration limits are not publicly available 77 | has_acceleration_limits: false 78 | has_effort_limits: true 79 | has_position_limits: true 80 | has_velocity_limits: true 81 | max_effort: 12.0 82 | max_position: 6.28318530718 83 | max_velocity: 6.28318530718 84 | min_position: -6.28318530718 85 | wrist_2: 86 | # acceleration limits are not publicly available 87 | has_acceleration_limits: false 88 | has_effort_limits: true 89 | has_position_limits: true 90 | has_velocity_limits: true 91 | max_effort: 12.0 92 | max_position: 6.28318530718 93 | max_velocity: 6.28318530718 94 | min_position: -6.28318530718 95 | wrist_3: 96 | # acceleration limits are not publicly available 97 | has_acceleration_limits: false 98 | has_effort_limits: true 99 | has_position_limits: true 100 | has_velocity_limits: true 101 | max_effort: 12.0 102 | max_position: 6.28318530718 103 | max_velocity: 6.28318530718 104 | min_position: -6.28318530718 -------------------------------------------------------------------------------- /robo_gym/utils/ur_parameters/ur10e.yaml: -------------------------------------------------------------------------------- 1 | # Sources: 2 | # 3 | # - Universal Robots e-Series, User Manual, UR10e 4 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/77195/99405_UR10e_User_Manual_en_Global.pdf 5 | joint_names: 6 | - shoulder_pan 7 | - shoulder_lift 8 | - elbow_joint 9 | - wrist_1 10 | - wrist_2 11 | - wrist_3 12 | remote_joint_names: 13 | - base_joint 14 | - shoulder_joint 15 | - elbow_joint 16 | - wrist_1_joint 17 | - wrist_2_joint 18 | - wrist_3_joint 19 | workspace_area: 20 | # Values for the recommended workspace area 21 | r: 1.300 22 | min_r: 0.300 23 | 24 | # From https://github.com/ros-industrial/universal_robot 25 | # Joints limits 26 | # 27 | # Sources: 28 | # 29 | # - Universal Robots e-Series, User Manual, UR10e, Version 5.8 30 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/69139/99405_UR10e_User_Manual_en_Global.pdf 31 | # - Support > Articles > UR articles > Max. joint torques 32 | # https://www.universal-robots.com/articles/ur-articles/max-joint-torques 33 | # retrieved: 2020-06-16, last modified: 2020-06-09 34 | joint_limits: 35 | shoulder_pan: 36 | # acceleration limits are not publicly available 37 | has_acceleration_limits: false 38 | has_effort_limits: true 39 | has_position_limits: true 40 | has_velocity_limits: true 41 | max_effort: 330.0 42 | max_position: 6.28318530718 43 | max_velocity: 2.0943951 44 | min_position: -6.28318530718 45 | shoulder_lift: 46 | # acceleration limits are not publicly available 47 | has_acceleration_limits: false 48 | has_effort_limits: true 49 | has_position_limits: true 50 | has_velocity_limits: true 51 | max_effort: 330.0 52 | max_position: 6.28318530718 53 | max_velocity: 2.0943951 54 | min_position: -6.28318530718 55 | elbow_joint: 56 | # acceleration limits are not publicly available 57 | has_acceleration_limits: false 58 | has_effort_limits: true 59 | has_position_limits: true 60 | has_velocity_limits: true 61 | max_effort: 150.0 62 | # we artificially limit this joint to half its actual joint position limit 63 | # to avoid (MoveIt/OMPL) planning problems, as due to the physical 64 | # construction of the robot, it's impossible to rotate the 'elbow_joint' 65 | # over more than approx +- 1 pi (the shoulder lift joint gets in the way). 66 | # 67 | # This leads to planning problems as the search space will be divided into 68 | # two sections, with no connections from one to the other. 69 | # 70 | # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for 71 | # more information. 72 | max_position: 3.14159265359 73 | max_velocity: 3.14159265359 74 | min_position: -3.14159265359 75 | wrist_1: 76 | # acceleration limits are not publicly available 77 | has_acceleration_limits: false 78 | has_effort_limits: true 79 | has_position_limits: true 80 | has_velocity_limits: true 81 | max_effort: 56.0 82 | max_position: 6.28318530718 83 | max_velocity: 3.14159265359 84 | min_position: -6.28318530718 85 | wrist_2: 86 | # acceleration limits are not publicly available 87 | has_acceleration_limits: false 88 | has_effort_limits: true 89 | has_position_limits: true 90 | has_velocity_limits: true 91 | max_effort: 56.0 92 | max_position: 6.28318530718 93 | max_velocity: 3.14159265359 94 | min_position: -6.28318530718 95 | wrist_3: 96 | # acceleration limits are not publicly available 97 | has_acceleration_limits: false 98 | has_effort_limits: true 99 | has_position_limits: true 100 | has_velocity_limits: true 101 | max_effort: 56.0 102 | max_position: 6.28318530718 103 | max_velocity: 3.14159265359 104 | min_position: -6.28318530718 105 | -------------------------------------------------------------------------------- /robo_gym/utils/ur_parameters/ur16e.yaml: -------------------------------------------------------------------------------- 1 | # Sources: 2 | # 3 | # - Universal Robots e-Series, User Manual, UR16e 4 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/77243/99473_UR16e_User_Manual_en_Global.pdf 5 | joint_names: 6 | - shoulder_pan 7 | - shoulder_lift 8 | - elbow_joint 9 | - wrist_1 10 | - wrist_2 11 | - wrist_3 12 | remote_joint_names: 13 | - base_joint 14 | - shoulder_joint 15 | - elbow_joint 16 | - wrist_1_joint 17 | - wrist_2_joint 18 | - wrist_3_joint 19 | workspace_area: 20 | # Values for the recommended workspace area 21 | r: 0.800 22 | min_r: 0.300 23 | 24 | # From https://github.com/ros-industrial/universal_robot 25 | # Joints limits 26 | # 27 | # Sources: 28 | # 29 | # - Universal Robots e-Series, User Manual, UR16e, Version 5.8 30 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/69187/99473_UR16e_User_Manual_en_Global.pdf 31 | # - Support > Articles > UR articles > Max. joint torques 32 | # https://www.universal-robots.com/articles/ur-articles/max-joint-torques 33 | # retrieved: 2020-06-16, last modified: 2020-06-09 34 | joint_limits: 35 | shoulder_pan: 36 | # acceleration limits are not publicly available 37 | has_acceleration_limits: false 38 | has_effort_limits: true 39 | has_position_limits: true 40 | has_velocity_limits: true 41 | max_effort: 330.0 42 | max_position: 6.28318530718 43 | max_velocity: 2.0943951 44 | min_position: -6.28318530718 45 | shoulder_lift: 46 | # acceleration limits are not publicly available 47 | has_acceleration_limits: false 48 | has_effort_limits: true 49 | has_position_limits: true 50 | has_velocity_limits: true 51 | max_effort: 330.0 52 | max_position: 6.28318530718 53 | max_velocity: 2.0943951 54 | min_position: -6.28318530718 55 | elbow_joint: 56 | # acceleration limits are not publicly available 57 | has_acceleration_limits: false 58 | has_effort_limits: true 59 | has_position_limits: true 60 | has_velocity_limits: true 61 | max_effort: 150.0 62 | # we artificially limit this joint to half its actual joint position limit 63 | # to avoid (MoveIt/OMPL) planning problems, as due to the physical 64 | # construction of the robot, it's impossible to rotate the 'elbow_joint' 65 | # over more than approx +- 1 pi (the shoulder lift joint gets in the way). 66 | # 67 | # This leads to planning problems as the search space will be divided into 68 | # two sections, with no connections from one to the other. 69 | # 70 | # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for 71 | # more information. 72 | max_position: 3.14159265359 73 | max_velocity: 3.14159265359 74 | min_position: -3.14159265359 75 | wrist_1: 76 | # acceleration limits are not publicly available 77 | has_acceleration_limits: false 78 | has_effort_limits: true 79 | has_position_limits: true 80 | has_velocity_limits: true 81 | max_effort: 56.0 82 | max_position: 6.28318530718 83 | max_velocity: 3.14159265359 84 | min_position: -6.28318530718 85 | wrist_2: 86 | # acceleration limits are not publicly available 87 | has_acceleration_limits: false 88 | has_effort_limits: true 89 | has_position_limits: true 90 | has_velocity_limits: true 91 | max_effort: 56.0 92 | max_position: 6.28318530718 93 | max_velocity: 3.14159265359 94 | min_position: -6.28318530718 95 | wrist_3: 96 | # acceleration limits are not publicly available 97 | has_acceleration_limits: false 98 | has_effort_limits: true 99 | has_position_limits: true 100 | has_velocity_limits: true 101 | max_effort: 56.0 102 | max_position: 6.28318530718 103 | max_velocity: 3.14159265359 104 | min_position: -6.28318530718 105 | -------------------------------------------------------------------------------- /robo_gym/utils/ur_parameters/ur3e.yaml: -------------------------------------------------------------------------------- 1 | # Sources: 2 | # 3 | # - Universal Robots e-Series, User Manual, UR3e 4 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/77099/99403_UR3e_User_Manual_en_Global.pdf 5 | joint_names: 6 | - shoulder_pan 7 | - shoulder_lift 8 | - elbow_joint 9 | - wrist_1 10 | - wrist_2 11 | - wrist_3 12 | remote_joint_names: 13 | - base_joint 14 | - shoulder_joint 15 | - elbow_joint 16 | - wrist_1_joint 17 | - wrist_2_joint 18 | - wrist_3_joint 19 | workspace_area: 20 | # Values for the recommended workspace area 21 | r: 0.450 22 | min_r: 0.200 23 | 24 | # From https://github.com/ros-industrial/universal_robot 25 | # Joints limits 26 | # 27 | # Sources: 28 | # 29 | # - Universal Robots e-Series, User Manual, UR3e, Version 5.8 30 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/69043/99403_UR3e_User_Manual_en_Global.pdf 31 | # - Support > Articles > UR articles > Max. joint torques 32 | # https://www.universal-robots.com/articles/ur-articles/max-joint-torques 33 | # retrieved: 2020-06-16, last modified: 2020-06-09 34 | joint_limits: 35 | shoulder_pan: 36 | # acceleration limits are not publicly available 37 | has_acceleration_limits: false 38 | has_effort_limits: true 39 | has_position_limits: true 40 | has_velocity_limits: true 41 | max_effort: 56.0 42 | max_position: 6.28318530718 43 | max_velocity: 3.14159265359 44 | min_position: -6.28318530718 45 | shoulder_lift: 46 | # acceleration limits are not publicly available 47 | has_acceleration_limits: false 48 | has_effort_limits: true 49 | has_position_limits: true 50 | has_velocity_limits: true 51 | max_effort: 56.0 52 | max_position: 6.28318530718 53 | max_velocity: 3.14159265359 54 | min_position: -6.28318530718 55 | elbow_joint: 56 | # acceleration limits are not publicly available 57 | has_acceleration_limits: false 58 | has_effort_limits: true 59 | has_position_limits: true 60 | has_velocity_limits: true 61 | max_effort: 28.0 62 | # we artificially limit this joint to half its actual joint position limit 63 | # to avoid (MoveIt/OMPL) planning problems, as due to the physical 64 | # construction of the robot, it's impossible to rotate the 'elbow_joint' 65 | # over more than approx +- 1 pi (the shoulder lift joint gets in the way). 66 | # 67 | # This leads to planning problems as the search space will be divided into 68 | # two sections, with no connections from one to the other. 69 | # 70 | # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for 71 | # more information. 72 | max_position: 3.14159265359 73 | max_velocity: 3.14159265359 74 | min_position: -3.14159265359 75 | wrist_1: 76 | # acceleration limits are not publicly available 77 | has_acceleration_limits: false 78 | has_effort_limits: true 79 | has_position_limits: true 80 | has_velocity_limits: true 81 | max_effort: 12.0 82 | max_position: 6.28318530718 83 | max_velocity: 6.28318530718 84 | min_position: -6.28318530718 85 | wrist_2: 86 | # acceleration limits are not publicly available 87 | has_acceleration_limits: false 88 | has_effort_limits: true 89 | has_position_limits: true 90 | has_velocity_limits: true 91 | max_effort: 12.0 92 | max_position: 6.28318530718 93 | max_velocity: 6.28318530718 94 | min_position: -6.28318530718 95 | wrist_3: 96 | # acceleration limits are not publicly available 97 | has_acceleration_limits: false 98 | has_effort_limits: true 99 | has_position_limits: true 100 | has_velocity_limits: true 101 | max_effort: 12.0 102 | max_position: 6.28318530718 103 | max_velocity: 6.28318530718 104 | min_position: -6.28318530718 105 | -------------------------------------------------------------------------------- /robo_gym/utils/ur_parameters/ur5.yaml: -------------------------------------------------------------------------------- 1 | # Sources: 2 | # 3 | # - UR5 User Manual, Universal Robots, UR5/CB3 4 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/77419/99202_UR5_User_Manual_en_Global.pdf 5 | joint_names: 6 | - shoulder_pan 7 | - shoulder_lift 8 | - elbow_joint 9 | - wrist_1 10 | - wrist_2 11 | - wrist_3 12 | remote_joint_names: 13 | - base_joint 14 | - shoulder_joint 15 | - elbow_joint 16 | - wrist_1_joint 17 | - wrist_2_joint 18 | - wrist_3_joint 19 | workspace_area: 20 | # Values for the recommended workspace area 21 | r: 0.750 22 | min_r: 0.200 23 | 24 | # From https://github.com/ros-industrial/universal_robot 25 | # Joints limits 26 | # 27 | # Sources: 28 | # 29 | # - UR5 User Manual, Universal Robots, UR5/CB3, Version 3.13 30 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/69371/99202_UR5_User_Manual_en_Global.pdf 31 | # - Support > Articles > UR articles > Max. joint torques 32 | # https://www.universal-robots.com/articles/ur-articles/max-joint-torques 33 | # retrieved: 2020-06-16, last modified: 2020-06-09 34 | joint_limits: 35 | shoulder_pan: 36 | # acceleration limits are not publicly available 37 | has_acceleration_limits: false 38 | has_effort_limits: true 39 | has_position_limits: true 40 | has_velocity_limits: true 41 | max_effort: 150.0 42 | max_position: 6.28318530718 43 | max_velocity: 3.14159265359 44 | min_position: -6.28318530718 45 | shoulder_lift: 46 | # acceleration limits are not publicly available 47 | has_acceleration_limits: false 48 | has_effort_limits: true 49 | has_position_limits: true 50 | has_velocity_limits: true 51 | max_effort: 150.0 52 | max_position: 6.28318530718 53 | max_velocity: 3.14159265359 54 | min_position: -6.28318530718 55 | elbow_joint: 56 | # acceleration limits are not publicly available 57 | has_acceleration_limits: false 58 | has_effort_limits: true 59 | has_position_limits: true 60 | has_velocity_limits: true 61 | max_effort: 150.0 62 | # we artificially limit this joint to half its actual joint position limit 63 | # to avoid (MoveIt/OMPL) planning problems, as due to the physical 64 | # construction of the robot, it's impossible to rotate the 'elbow_joint' 65 | # over more than approx +- 1 pi (the shoulder lift joint gets in the way). 66 | # 67 | # This leads to planning problems as the search space will be divided into 68 | # two sections, with no connections from one to the other. 69 | # 70 | # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for 71 | # more information. 72 | max_position: 3.14159265359 73 | max_velocity: 3.14159265359 74 | min_position: -3.14159265359 75 | wrist_1: 76 | # acceleration limits are not publicly available 77 | has_acceleration_limits: false 78 | has_effort_limits: true 79 | has_position_limits: true 80 | has_velocity_limits: true 81 | max_effort: 28.0 82 | max_position: 6.28318530718 83 | max_velocity: 3.14159265359 84 | min_position: -6.28318530718 85 | wrist_2: 86 | # acceleration limits are not publicly available 87 | has_acceleration_limits: false 88 | has_effort_limits: true 89 | has_position_limits: true 90 | has_velocity_limits: true 91 | max_effort: 28.0 92 | max_position: 6.28318530718 93 | max_velocity: 3.14159265359 94 | min_position: -6.28318530718 95 | wrist_3: 96 | # acceleration limits are not publicly available 97 | has_acceleration_limits: false 98 | has_effort_limits: true 99 | has_position_limits: true 100 | has_velocity_limits: true 101 | max_effort: 28.0 102 | max_position: 6.28318530718 103 | max_velocity: 3.14159265359 104 | min_position: -6.28318530718 105 | 106 | -------------------------------------------------------------------------------- /robo_gym/utils/ur_parameters/ur5e.yaml: -------------------------------------------------------------------------------- 1 | # Sources: 2 | # 3 | # - Universal Robots e-Series, User Manual, UR5e 4 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/77147/99404_UR5e_User_Manual_en_Global.pdf 5 | joint_names: 6 | - shoulder_pan 7 | - shoulder_lift 8 | - elbow_joint 9 | - wrist_1 10 | - wrist_2 11 | - wrist_3 12 | remote_joint_names: 13 | - base_joint 14 | - shoulder_joint 15 | - elbow_joint 16 | - wrist_1_joint 17 | - wrist_2_joint 18 | - wrist_3_joint 19 | workspace_area: 20 | # Values for the recommended workspace area 21 | r: 0.750 22 | min_r: 0.200 23 | 24 | # From https://github.com/ros-industrial/universal_robot 25 | # Joints limits 26 | # 27 | # Sources: 28 | # 29 | # - Universal Robots e-Series, User Manual, UR5e, Version 5.8 30 | # https://s3-eu-west-1.amazonaws.com/ur-support-site/69091/99404_UR5e_User_Manual_en_Global.pdf 31 | # - Support > Articles > UR articles > Max. joint torques 32 | # https://www.universal-robots.com/articles/ur-articles/max-joint-torques 33 | # retrieved: 2020-06-16, last modified: 2020-06-09 34 | joint_limits: 35 | shoulder_pan: 36 | # acceleration limits are not publicly available 37 | has_acceleration_limits: false 38 | has_effort_limits: true 39 | has_position_limits: true 40 | has_velocity_limits: true 41 | max_effort: 150.0 42 | max_position: 6.28318530718 43 | max_velocity: 3.14159265359 44 | min_position: -6.28318530718 45 | shoulder_lift: 46 | # acceleration limits are not publicly available 47 | has_acceleration_limits: false 48 | has_effort_limits: true 49 | has_position_limits: true 50 | has_velocity_limits: true 51 | max_effort: 150.0 52 | max_position: 6.28318530718 53 | max_velocity: 3.14159265359 54 | min_position: -6.28318530718 55 | elbow_joint: 56 | # acceleration limits are not publicly available 57 | has_acceleration_limits: false 58 | has_effort_limits: true 59 | has_position_limits: true 60 | has_velocity_limits: true 61 | max_effort: 150.0 62 | # we artificially limit this joint to half its actual joint position limit 63 | # to avoid (MoveIt/OMPL) planning problems, as due to the physical 64 | # construction of the robot, it's impossible to rotate the 'elbow_joint' 65 | # over more than approx +- 1 pi (the shoulder lift joint gets in the way). 66 | # 67 | # This leads to planning problems as the search space will be divided into 68 | # two sections, with no connections from one to the other. 69 | # 70 | # Refer to https://github.com/ros-industrial/universal_robot/issues/265 for 71 | # more information. 72 | max_position: 3.14159265359 73 | max_velocity: 3.14159265359 74 | min_position: -3.14159265359 75 | wrist_1: 76 | # acceleration limits are not publicly available 77 | has_acceleration_limits: false 78 | has_effort_limits: true 79 | has_position_limits: true 80 | has_velocity_limits: true 81 | max_effort: 28.0 82 | max_position: 6.28318530718 83 | max_velocity: 3.14159265359 84 | min_position: -6.28318530718 85 | wrist_2: 86 | # acceleration limits are not publicly available 87 | has_acceleration_limits: false 88 | has_effort_limits: true 89 | has_position_limits: true 90 | has_velocity_limits: true 91 | max_effort: 28.0 92 | max_position: 6.28318530718 93 | max_velocity: 3.14159265359 94 | min_position: -6.28318530718 95 | wrist_3: 96 | # acceleration limits are not publicly available 97 | has_acceleration_limits: false 98 | has_effort_limits: true 99 | has_position_limits: true 100 | has_velocity_limits: true 101 | max_effort: 28.0 102 | max_position: 6.28318530718 103 | max_velocity: 3.14159265359 104 | min_position: -6.28318530718 105 | -------------------------------------------------------------------------------- /robo_gym/__init__.py: -------------------------------------------------------------------------------- 1 | from gymnasium.envs.registration import register 2 | 3 | # naming convention: EnvnameRobotSim 4 | 5 | # Example Environments 6 | register( 7 | id="ExampleEnvSim-v0", 8 | entry_point="robo_gym.envs:ExampleEnvSim", 9 | ) 10 | 11 | register( 12 | id="ExampleEnvRob-v0", 13 | entry_point="robo_gym.envs:ExampleEnvRob", 14 | ) 15 | 16 | # MiR100 Environments 17 | register( 18 | id="NoObstacleNavigationMir100Sim-v0", 19 | entry_point="robo_gym.envs:NoObstacleNavigationMir100Sim", 20 | ) 21 | 22 | register( 23 | id="NoObstacleNavigationMir100Rob-v0", 24 | entry_point="robo_gym.envs:NoObstacleNavigationMir100Rob", 25 | ) 26 | 27 | register( 28 | id="ObstacleAvoidanceMir100Sim-v0", 29 | entry_point="robo_gym.envs:ObstacleAvoidanceMir100Sim", 30 | ) 31 | 32 | register( 33 | id="ObstacleAvoidanceMir100Rob-v0", 34 | entry_point="robo_gym.envs:ObstacleAvoidanceMir100Rob", 35 | ) 36 | 37 | # UR Environments 38 | register( 39 | id="EmptyEnvironmentURSim-v0", 40 | entry_point="robo_gym.envs:EmptyEnvironmentURSim", 41 | ) 42 | 43 | register( 44 | id="EmptyEnvironmentURRob-v0", 45 | entry_point="robo_gym.envs:EmptyEnvironmentURRob", 46 | ) 47 | 48 | register( 49 | id="EndEffectorPositioningURSim-v0", 50 | entry_point="robo_gym.envs:EndEffectorPositioningURSim", 51 | ) 52 | 53 | register( 54 | id="EndEffectorPositioningURRob-v0", 55 | entry_point="robo_gym.envs:EndEffectorPositioningURRob", 56 | ) 57 | 58 | register( 59 | id="BasicAvoidanceURSim-v0", 60 | entry_point="robo_gym.envs:BasicAvoidanceURSim", 61 | ) 62 | 63 | register( 64 | id="BasicAvoidanceURRob-v0", 65 | entry_point="robo_gym.envs:BasicAvoidanceURRob", 66 | ) 67 | 68 | register( 69 | id="AvoidanceRaad2022URSim-v0", 70 | entry_point="robo_gym.envs:AvoidanceRaad2022URSim", 71 | ) 72 | 73 | register( 74 | id="AvoidanceRaad2022URRob-v0", 75 | entry_point="robo_gym.envs:AvoidanceRaad2022URRob", 76 | ) 77 | 78 | register( 79 | id="AvoidanceRaad2022TestURSim-v0", 80 | entry_point="robo_gym.envs:AvoidanceRaad2022TestURSim", 81 | ) 82 | 83 | register( 84 | id="AvoidanceRaad2022TestURRob-v0", 85 | entry_point="robo_gym.envs:AvoidanceRaad2022TestURRob", 86 | ) 87 | 88 | # TODO register the following as v1 or v2 of the corresponding v0 ones above instead? 89 | register( 90 | id="EmptyEnvironment2URSim-v0", 91 | entry_point="robo_gym.envs:EmptyEnvironment2URSim", 92 | ) 93 | 94 | register( 95 | id="EmptyEnvironment2URRob-v0", 96 | entry_point="robo_gym.envs:EmptyEnvironment2URRob", 97 | ) 98 | 99 | register( 100 | id="EndEffectorPositioning2URSim-v0", 101 | entry_point="robo_gym.envs:EndEffectorPositioning2URSim", 102 | ) 103 | 104 | register( 105 | id="EndEffectorPositioning2URRob-v0", 106 | entry_point="robo_gym.envs:EndEffectorPositioning2URRob", 107 | ) 108 | 109 | register( 110 | id="IsaacReachURSim-v0", 111 | entry_point="robo_gym.envs:IsaacReachURSim", 112 | ) 113 | 114 | register( 115 | id="IsaacReachURRob-v0", 116 | entry_point="robo_gym.envs:IsaacReachURRob", 117 | ) 118 | 119 | # Panda Environments 120 | register( 121 | id="EmptyEnvironmentPandaSim-v0", 122 | entry_point="robo_gym.envs:EmptyEnvironmentPandaSim", 123 | ) 124 | 125 | register( 126 | id="EmptyEnvironmentPandaRob-v0", 127 | entry_point="robo_gym.envs:EmptyEnvironmentPandaRob", 128 | ) 129 | 130 | 131 | register( 132 | id="EndEffectorPositioningPandaSim-v0", 133 | entry_point="robo_gym.envs:EndEffectorPositioningPandaSim", 134 | ) 135 | 136 | register( 137 | id="EndEffectorPositioningPandaRob-v0", 138 | entry_point="robo_gym.envs:EndEffectorPositioningPandaRob", 139 | ) 140 | register( 141 | id="IsaacReachPandaSim-v0", 142 | entry_point="robo_gym.envs:IsaacReachPandaSim", 143 | ) 144 | 145 | register( 146 | id="IsaacReachPandaRob-v0", 147 | entry_point="robo_gym.envs:IsaacReachPandaRob", 148 | ) 149 | -------------------------------------------------------------------------------- /docs/examples/rllib/training_inference_mir.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gymnasium as gym 3 | import os 4 | 5 | import ray 6 | from ray import air, tune 7 | from ray.rllib.algorithms.algorithm import Algorithm 8 | from ray.tune.registry import get_trainable_cls, register_env 9 | 10 | import robo_gym 11 | from robo_gym.envs.mir100.mir100 import NoObstacleNavigationMir100Sim 12 | 13 | def env_creator_mir(config): 14 | return NoObstacleNavigationMir100Sim(**config) 15 | 16 | 17 | register_env( 18 | name="NoObstacleNavigationMir100Sim-v0", 19 | env_creator=env_creator_mir, 20 | ) 21 | 22 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 23 | parser.add_argument( 24 | "--run", type=str, default="PPO", help="The RLlib-registered algorithm to use." 25 | ) 26 | parser.add_argument("--num-cpus", type=int, default=0) 27 | parser.add_argument( 28 | "--framework", 29 | choices=["tf", "tf2", "torch"], 30 | default="torch", 31 | help="The DL framework specifier.", 32 | ) 33 | parser.add_argument( 34 | "--stop-iters", 35 | type=int, 36 | default=200, 37 | help="Number of iterations to train before we do inference.", 38 | ) 39 | parser.add_argument( 40 | "--stop-timesteps", 41 | type=int, 42 | default=100000, 43 | help="Number of timesteps to train before we do inference.", 44 | ) 45 | parser.add_argument( 46 | "--stop-reward", 47 | type=float, 48 | default=150.0, 49 | help="Reward at which we stop training before we do inference.", 50 | ) 51 | parser.add_argument( 52 | "--explore-during-inference", 53 | action="store_true", 54 | help="Whether the trained policy should use exploration during action " 55 | "inference.", 56 | ) 57 | parser.add_argument( 58 | "--num-episodes-during-inference", 59 | type=int, 60 | default=10, 61 | help="Number of episodes to do inference over after training.", 62 | ) 63 | 64 | if __name__ == "__main__": 65 | args = parser.parse_args() 66 | 67 | ray.init(num_cpus=args.num_cpus or None) 68 | 69 | config = ( 70 | get_trainable_cls(args.run) 71 | .get_default_config() 72 | .environment("NoObstacleNavigationMir100Sim-v0", env_config={"ip": "127.0.0.1", "gui": False},) 73 | # Run with tracing enabled for tf2? 74 | .framework(args.framework) 75 | # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0. 76 | .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0"))) 77 | ) 78 | 79 | stop = { 80 | "training_iteration": args.stop_iters, 81 | "timesteps_total": args.stop_timesteps, 82 | "episode_reward_mean": args.stop_reward, 83 | } 84 | 85 | print("Training policy until desired reward/timesteps/iterations. ...") 86 | tuner = tune.Tuner( 87 | args.run, 88 | param_space=config.to_dict(), 89 | run_config=air.RunConfig( 90 | stop=stop, 91 | verbose=2, 92 | checkpoint_config=air.CheckpointConfig( 93 | checkpoint_frequency=1, checkpoint_at_end=True 94 | ), 95 | ), 96 | ) 97 | results = tuner.fit() 98 | 99 | print("Training completed. Restoring new Algorithm for action inference.") 100 | # Get the last checkpoint from the above training run. 101 | checkpoint = results.get_best_result().checkpoint 102 | # Create new Algorithm and restore its state from the last checkpoint. 103 | algo = Algorithm.from_checkpoint(checkpoint) 104 | 105 | # Create the env to do inference in. 106 | env = gym.make("NoObstacleNavigationMir100Sim-v0", ip="127.0.0.1", gui=True) 107 | obs, info = env.reset() 108 | env.render() 109 | 110 | num_episodes = 0 111 | episode_reward = 0.0 112 | 113 | while num_episodes < args.num_episodes_during_inference: 114 | # Compute an action (`a`). 115 | a = algo.compute_single_action( 116 | observation=obs, 117 | explore=args.explore_during_inference, 118 | policy_id="default_policy", # <- default value 119 | ) 120 | # Send the computed action `a` to the env. 121 | obs, reward, done, truncated, _ = env.step(a) 122 | env.render() 123 | episode_reward += reward 124 | # Is the episode `done`? -> Reset. 125 | if done: 126 | print(f"Episode done: Total reward = {episode_reward}") 127 | obs, info = env.reset() 128 | env.render() 129 | num_episodes += 1 130 | episode_reward = 0.0 131 | 132 | algo.stop() 133 | env.close() 134 | ray.shutdown() 135 | -------------------------------------------------------------------------------- /docs/examples/rllib/training_inference_ur.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gymnasium as gym 3 | import os 4 | 5 | import ray 6 | from ray import air, tune 7 | from ray.rllib.algorithms.algorithm import Algorithm 8 | from ray.tune.registry import get_trainable_cls, register_env 9 | 10 | import robo_gym 11 | from robo_gym.envs.ur.ur_ee_positioning import EndEffectorPositioningURSim 12 | 13 | def env_creator_ur(config): 14 | return EndEffectorPositioningURSim(**config) 15 | 16 | 17 | register_env( 18 | name="EndEffectorPositioningURSim-v0", 19 | env_creator=env_creator_ur, 20 | ) 21 | 22 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 23 | parser.add_argument( 24 | "--run", type=str, default="PPO", help="The RLlib-registered algorithm to use." 25 | ) 26 | parser.add_argument("--num-cpus", type=int, default=0) 27 | parser.add_argument( 28 | "--framework", 29 | choices=["tf", "tf2", "torch"], 30 | default="torch", 31 | help="The DL framework specifier.", 32 | ) 33 | parser.add_argument( 34 | "--stop-iters", 35 | type=int, 36 | default=200, 37 | help="Number of iterations to train before we do inference.", 38 | ) 39 | parser.add_argument( 40 | "--stop-timesteps", 41 | type=int, 42 | default=100000, 43 | help="Number of timesteps to train before we do inference.", 44 | ) 45 | parser.add_argument( 46 | "--stop-reward", 47 | type=float, 48 | default=150.0, 49 | help="Reward at which we stop training before we do inference.", 50 | ) 51 | parser.add_argument( 52 | "--explore-during-inference", 53 | action="store_true", 54 | help="Whether the trained policy should use exploration during action " 55 | "inference.", 56 | ) 57 | parser.add_argument( 58 | "--num-episodes-during-inference", 59 | type=int, 60 | default=10, 61 | help="Number of episodes to do inference over after training.", 62 | ) 63 | 64 | if __name__ == "__main__": 65 | args = parser.parse_args() 66 | 67 | ray.init(num_cpus=args.num_cpus or None) 68 | 69 | config = ( 70 | get_trainable_cls(args.run) 71 | .get_default_config() 72 | .environment("EndEffectorPositioningURSim-v0", env_config={"ip": "127.0.0.1", "gui": False, "ur_model": "ur5", "rs_state_to_info": False},) 73 | # Run with tracing enabled for tf2? 74 | .framework(args.framework) 75 | # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0. 76 | .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0"))) 77 | ) 78 | 79 | stop = { 80 | "training_iteration": args.stop_iters, 81 | "timesteps_total": args.stop_timesteps, 82 | "episode_reward_mean": args.stop_reward, 83 | } 84 | 85 | print("Training policy until desired reward/timesteps/iterations. ...") 86 | tuner = tune.Tuner( 87 | args.run, 88 | param_space=config.to_dict(), 89 | run_config=air.RunConfig( 90 | stop=stop, 91 | verbose=2, 92 | checkpoint_config=air.CheckpointConfig( 93 | checkpoint_frequency=1, checkpoint_at_end=True 94 | ), 95 | ), 96 | ) 97 | results = tuner.fit() 98 | 99 | print("Training completed. Restoring new Algorithm for action inference.") 100 | # Get the last checkpoint from the above training run. 101 | checkpoint = results.get_best_result().checkpoint 102 | # Create new Algorithm and restore its state from the last checkpoint. 103 | algo = Algorithm.from_checkpoint(checkpoint) 104 | 105 | # Create the env to do inference in. 106 | env = gym.make("EndEffectorPositioningURSim-v0", ip="127.0.0.1", gui=True, ur_model="ur5", rs_state_to_info=False) 107 | obs, info = env.reset() 108 | env.render() 109 | 110 | num_episodes = 0 111 | episode_reward = 0.0 112 | 113 | while num_episodes < args.num_episodes_during_inference: 114 | # Compute an action (`a`). 115 | a = algo.compute_single_action( 116 | observation=obs, 117 | explore=args.explore_during_inference, 118 | policy_id="default_policy", # <- default value 119 | ) 120 | # Send the computed action `a` to the env. 121 | obs, reward, done, truncated, _ = env.step(a) 122 | env.render() 123 | episode_reward += reward 124 | # Is the episode `done`? -> Reset. 125 | if done: 126 | print(f"Episode done: Total reward = {episode_reward}") 127 | obs, info = env.reset() 128 | env.render() 129 | num_episodes += 1 130 | episode_reward = 0.0 131 | 132 | algo.stop() 133 | env.close() 134 | ray.shutdown() 135 | -------------------------------------------------------------------------------- /docs/isaac_lab_compatibility.md: -------------------------------------------------------------------------------- 1 | # Isaac Lab Compatibility 2 | 3 | The environments of robo-gym include a few that are intended to provide compatibility with agents trained using [Isaac Lab](https://developer.nvidia.com/isaac/lab). The main purpose of this compatibility is to offer robo-gym as a way of transferring the trained policies from Isaac Lab to real robots, with robo-gym simulations as an intermediate step. 4 | 5 | An overview of the robo-gym environments implemented for this feature is included in [Modular Environments](modular_environments.md). 6 | 7 | ## Setup 8 | 9 | ### robo-gym 10 | 11 | #### Agent side 12 | 13 | Install robo-gym version >=2.1.0 and its prerequisites, including robo-gym server modules version 0.3.0.1. 14 | 15 | In addition, install Torch: 16 | 17 | `pip install torch==2.5.1 torchvision==0.20.1 --index-url https://download.pytorch.org/whl/cu121` 18 | 19 | #### Server side 20 | 21 | Install robo-gym-robot-servers >=2.1.1 and its prerequisites, including robo-gym server modules version 0.3.0.1 and prerequisites for the robots you want to use in your ROS noetic Python environment. 22 | 23 | ### Isaac Lab 24 | 25 | Isaac Lab is not required to run our compatibility environments. 26 | 27 | If you already have a policy that you want to use, you don't need to run Isaac Lab. 28 | 29 | It is recommended to use a different Python environment for Isaac Lab and robo-gym (both sides), as their dependencies may be incompatible. 30 | 31 | Install Isaac Lab as by their [instructions](https://isaac-sim.github.io/IsaacLab/main/source/setup/installation/index.html#local-installation). 32 | 33 | Tested with policies from this setup: 34 | * Isaac Sim: 4.5.0, installed via pip 35 | * Isaac Lab: 2.1.0/commit 5af5f388 (main as of 29 April 2025) 36 | * Python: 3.10.11 37 | * OS: Windows 10 Enterprise Build 19045 38 | 39 | ## Training 40 | 41 | Use the rsl_rl train script to perform training either on the Franka Reach or UR10 Reach tasks. The commands are executed from the Isaac Lab folder with the corresponding Python environment active. 42 | 43 | See also the [overview of environments in Isaac Lab](https://isaac-sim.github.io/IsaacLab/main/source/overview/environments.html). 44 | 45 | If not running on Windows, replace `isaaclab.bat` by the corresponding script file name for your system. 46 | 47 | ### Franka Emika Panda 48 | 49 | Default training: 50 | 51 | `isaaclab.bat -p scripts/reinforcement_learning/rsl_rl/train.py --task=Isaac-Reach-Franka-v0 --headless` 52 | 53 | Play the environment with the trained policy from the last run (needs to be started at least once to obtain exported policy file): 54 | 55 | `isaaclab.bat -p scripts/reinforcement_learning/rsl_rl/play.py --task=Isaac-Reach-Franka-v0` 56 | 57 | ### UR10 58 | 59 | Default training: 60 | 61 | `isaaclab.bat -p scripts/reinforcement_learning/rsl_rl/train.py --task=Isaac-Reach-UR10-v0 --headless` 62 | 63 | Play the environment with the trained policy from the last run (needs to be started at least once to obtain exported policy file): 64 | 65 | `isaaclab.bat -p scripts/reinforcement_learning/rsl_rl/play.py --task=Isaac-Reach-UR10-v0` 66 | 67 | ## Running 68 | 69 | ### Server side 70 | 71 | #### Simulation 72 | 73 | In a commandline where you have your robo-gym server-side ROS workspace ready: 74 | 75 | `start-server-manager && attach-to-server-manager` 76 | 77 | #### Real robot 78 | 79 | The procedure would be the same as for other environments for the corresponding robot. Due to limited progress in our own tests with real robots, we do not hand out instructions for this case right now. 80 | 81 | ### Agent side 82 | 83 | We use our [env test script](robo_envtest.py) for convenient testing and to show that the Isaac Lab Compatibility environments are to be used in the same way as our other environments. 84 | 85 | Arguments that need adaptation for your setup: 86 | * `-i`: server manager host 87 | * `-p`: Path to the exported policy 88 | 89 | The commands are executed from this folder with the robo-gym Python environment active. 90 | 91 | #### UR10 92 | 93 | Command to run the [env test script](robo_envtest.py): 94 | 95 | `python robo-envtest.py -i localhost -g 1 -gz 1 -rv 0 -e IsaacReachURSim-v0 -u ur10 -et 500 -t 5000 -p D:\\PATH\\TO\\IsaacLab\\logs\\rsl_rl\\reach_ur10\\2025-04-30_09-36-28\\exported\\policy.pt` 96 | 97 | Your console output should give you information about the progress while a few episodes are executed. During each episode, you should see the UR10 in Gazebo reach the target to a degree comparable to what you see when playing in Isaac Lab. Mind that the orientation of the target pose cannot be seen in Gazebo. The console output of the test script should give you a success indication at the end of each episode, with the distance in translation ("dist_coord") in m and the distance in rotation ("dist_rot") in rad. As by the default configuration of the environment, the target is located within a certain cuboid and rotated randomly around the vertical axis. 98 | 99 | The environment identifier usable in the `gym.make` command is `IsaacReachURSim-v0`. 100 | 101 | #### Panda 102 | 103 | Command to run the [env test script](robo_envtest.py): 104 | 105 | `python robo-envtest.py -i localhost -g 1 -gz 1 -rv 0 -e IsaacReachPandaSim-v0 -et 500 -t 5000 -p D:\\PATH\\TO\\IsaacLab\\logs\\rsl_rl\\franka_reach\\2025-05-16_17-53-15\\exported\\policy.pt` 106 | 107 | Note that, while similar results should be reachable for the Panda as for the UR10 (granted that significantly longer training time may be required), we have not yet achieved satisfactory performance with controlling the Panda robot via this approach. 108 | 109 | The environment identifier usable in the `gym.make` command is `IsaacReachPandaSim-v0`. 110 | -------------------------------------------------------------------------------- /docs/the_framework.md: -------------------------------------------------------------------------------- 1 | # The robo-gym framework 2 | 3 | The following information is extracted from: *M. Lucchi, F. Zindler, S. Mühlbacher-Karrer, H. Pichler, "robo-gym - An Open Source Toolkit for Distributed Deep Reinforcement Learning on Real and Simulated Robots"* Submitted to IROS 2020. Under review. 4 | 5 | ![alt text](https://user-images.githubusercontent.com/36470989/79330117-4498dc80-7f19-11ea-9de4-bed4f6390f3a.jpg "The robo-gym framework") 6 | 7 | ## The Components 8 | 9 | ### Real or Simulated Robot 10 | 11 | This component includes the robot itself, the sensors, and the scene surrounding the robot. 12 | 13 | The interface to the robots and the sensors is implemented in ROS for both real and simulated hardware. The interface of the simulated robots is generally corresponding to the one of the real robots augmented with features that would be impractical or too expensive to match in the real world. An example is virtual collision sensors that detect any kind of collision of a robot link. 14 | The simulated and the real robot must use the same controllers. 15 | 16 | The simulated scenes are generated in Gazebo and are described using the SDFormat (SDF), an XML format. These can be created and manipulated in multiple ways: online, via API or GUI, and offline, by editing the SDF files. 17 | 18 | ### Command Handler 19 | 20 | Within the Markov Decision Process (MDP) framework, it is assumed that interactions between agent and environment take place at each of a series of discrete time steps. 21 | In a real-world system, however, time passes in a continuous manner. 22 | It is therefore necessary to make some adjustments to the real world system so that its behavior gets closer to the one defined by the MDP framework. 23 | The Command Handler (CH) implements these aspects. 24 | 25 | The CH uses a queue with capacity for one command message. 26 | When the component receives a command message it tries to fill the queue with it. 27 | New elements get ignored until the element in the queue gets consumed. 28 | The CH continuously publishes command messages to the robot at the frequency required by its controller. 29 | If, at the moment of publishing, the queue is full, the CH retrieves the command, publishes it to the robot for the selected number of times and after that it empties the queue. 30 | In the opposite case, the handler publishes the command corresponding to an interruption of the movement execution. This corresponds to either zero velocities for mobile robots or preemption of the trajectory execution for robot arms. 31 | 32 | The framework's CH supports the standard *diff_drive_controller* and *joint_trajectory_controller* from ROS controllers. 33 | This covers the most common types of robots; nevertheless, this component can be easily implemented for any other ROS controller. 34 | 35 | ### Robot Server 36 | 37 | It exposes a gRPC server that allows to interact with the robot through the integrated ROS bridge. 38 | 39 | The first function of the server is to store updated information regarding the state of the robot, that can be queried at any time via a gRPC service call. 40 | The robot and the sensors constantly publish information via ROS. 41 | The ROS Bridge collects the information from the different sources and stores it in a buffer as an array of values. 42 | The robot and the sensors update their information with different frequencies. 43 | The buffer is managed with a threading mechanism to ensure that the data delivered to the client is consistent and containing the latest information. 44 | 45 | The second function is to set the robot and the scene to a desired state. For instance, the user might want to set the joint positions of a robotic arm to a specific value when resetting the environment. 46 | 47 | Lastly, it provides a service to publish commands to the CH. 48 | 49 | ### Environment 50 | 51 | This is the top element of the framework, which provides the standard OpenAI Gym interface to the agent. 52 | 53 | The main function of the Environment component is to define the composition of the state, the initial conditions of an episode and the reward function. 54 | In addition, it includes a gRPC stub which connects to the Robot Server to send actions, and to set or get the state of the robot and the scene. 55 | 56 | According to the framework provided by the Gym, environments are organized in classes, each constructed from a common base one. In addition, robo-gym extends this setup with a different wrapper for either a real or a simulated robot. These wrappers differentiate regarding the constructor that is being called. 57 | In the case of the simulated robot environment, the argument for the IP address refers to the Server Manager, whereas in the case of the real robot environment it refers to the IP address of the Robot Server. 58 | The Server Manager for simulated robots provides the address of the Robot Server to which the Environment gRPC stub is then connected. 59 | On the other hand, in the case of the real robot environment, extra attention for the network configuration is needed to guarantee communication with the hardware. Furthermore, environment failures and eventual emergency stops must be inspected by an human operator. 60 | As a consequence, the Server Manager is currently not employed when using real robots and the Environment gRPC stub is connected directly to the Robot Server, which is started manually. 61 | 62 | ### Server Manager 63 | 64 | It is the orchestrator of the Robot Servers, it exposes gRPC services to spawn, kill, and check Robot Servers. 65 | When used with simulated robots it handles the robot simulations as well. 66 | 67 | Each cluster of Robot Server, CH and real or simulated robot runs on an isolated ROS network. 68 | To achieve this, the Server Manager launches each cluster in an isolated shell environment handled with the help of tmux. 69 | 70 | This component implements error handling features to automatically restart the Robot Server and the robot simulation in case of: 71 | - an error in the connection to the Robot Server 72 | - an exceeded deadline when calling a Robot Server service 73 | - a non responding simulation 74 | - data received from simulation out of defined bounds 75 | - a manual request of simulation restart 76 | -------------------------------------------------------------------------------- /tests/robo-gym/envs/ur/test_ur_avoidance_raad_fixed_spline.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import gymnasium as gym 4 | import robo_gym 5 | from robo_gym.utils import ur_utils 6 | import math 7 | import numpy as np 8 | import pathlib 9 | import json 10 | 11 | import pytest 12 | 13 | 14 | robo_gym_path = ( 15 | pathlib.Path(__file__) 16 | .parent.parent.parent.parent.parent.absolute() 17 | .joinpath("robo_gym") 18 | ) 19 | 20 | ur_models = [ 21 | pytest.param("ur3", marks=pytest.mark.skip(reason="not implemented yet")), 22 | pytest.param("ur3e", marks=pytest.mark.skip(reason="not implemented yet")), 23 | pytest.param("ur5", marks=pytest.mark.commit), 24 | pytest.param("ur5e", marks=pytest.mark.skip(reason="not implemented yet")), 25 | pytest.param("ur10", marks=pytest.mark.skip(reason="not implemented yet")), 26 | pytest.param("ur10e", marks=pytest.mark.skip(reason="not implemented yet")), 27 | pytest.param("ur16e", marks=pytest.mark.skip(reason="not implemented yet")), 28 | ] 29 | 30 | 31 | @pytest.fixture(scope="module", params=ur_models) 32 | def env(request): 33 | ip = os.environ.get("ROBOGYM_SERVERS_HOST", "robot-servers") 34 | env = gym.make("AvoidanceRaad2022TestURSim-v0", ip=ip, ur_model=request.param) 35 | env.request_param = request.param 36 | yield env 37 | env.kill_sim() 38 | 39 | 40 | @pytest.mark.commit 41 | def test_initialization(env): 42 | assert env.ur.model_key == env.request_param 43 | env.reset() 44 | done = False 45 | env.step([0, 0, 0, 0, 0]) 46 | for _ in range(10): 47 | if not done: 48 | action = env.action_space.sample() 49 | observation, _, done, _, _ = env.step(action) 50 | 51 | assert env.observation_space.contains(observation) 52 | 53 | 54 | @pytest.mark.skip(reason="This fails only in CI") 55 | @pytest.mark.flaky(reruns=3) 56 | def test_object_collision(env): 57 | params = { 58 | "ur5": {"object_coords": [-0.2, -0.1, 0.5], "n_steps": 250}, 59 | } 60 | 61 | env.reset( 62 | options={"fixed_object_position": params[env.ur.model_key]["object_coords"]} 63 | ) 64 | done = False 65 | i = 0 66 | while (not done) or i <= params[env.ur.model_key]["n_steps"]: 67 | _, _, done, info = env.step(np.zeros(5)) 68 | i += 1 69 | assert info["final_status"] == "collision" 70 | 71 | 72 | @pytest.mark.commit 73 | def test_robot_trajectory(env): 74 | params = { 75 | "ur5": { 76 | "traj_relative_path": "envs/ur/robot_trajectories/trajectory_raad_2022.json" 77 | } 78 | } 79 | 80 | env.reset() 81 | # load trajectory 82 | traj_path = robo_gym_path.joinpath(params[env.ur.model_key]["traj_relative_path"]) 83 | with open(traj_path) as json_file: 84 | trajectory = json.load(json_file)["trajectory"] 85 | 86 | action = np.zeros(5) 87 | for i in range(len(trajectory[0])): 88 | traj_joint_positions = trajectory[0][i] 89 | state, _, _, _ = env.step(action) 90 | ur_j_pos_norm = state[3:9] 91 | delta_joints = state[9:15] 92 | desired_joints = state[18:24] 93 | # check if robot follows the trajectory in all steps of trajectory segment 0 94 | assert np.isclose( 95 | env.ur.normalize_joint_values(traj_joint_positions), ur_j_pos_norm, atol=0.1 96 | ).all() 97 | # check if calculation of delta joints is correct 98 | assert np.isclose( 99 | (ur_j_pos_norm - desired_joints), delta_joints, atol=0.01 100 | ).all() 101 | # check that the current trajectory point is a target point 102 | assert state[24] == 1.0 103 | # check that state machine has transitioned to segment 1 of trajectory 104 | traj_joint_positions = trajectory[1][0] 105 | state, _, _, _, _ = env.step(action) 106 | assert np.isclose( 107 | env.ur.normalize_joint_values(traj_joint_positions), state[3:9], atol=0.1 108 | ).all() 109 | 110 | 111 | test_ur_fixed_joints = [ 112 | ( 113 | "AvoidanceRaad2022TestURSim-v0", 114 | False, 115 | False, 116 | False, 117 | False, 118 | False, 119 | True, 120 | "ur5", 121 | ), # fixed wrist_3 122 | ( 123 | "AvoidanceRaad2022TestURSim-v0", 124 | True, 125 | False, 126 | True, 127 | False, 128 | False, 129 | False, 130 | "ur5", 131 | ), # fixed Base and Elbow 132 | ] 133 | 134 | 135 | @pytest.mark.nightly 136 | @pytest.mark.parametrize( 137 | "env_name, fix_base, fix_shoulder, fix_elbow, fix_wrist_1, fix_wrist_2, fix_wrist_3, ur_model", 138 | test_ur_fixed_joints, 139 | ) 140 | @pytest.mark.flaky(reruns=3) 141 | def test_fixed_joints( 142 | env_name, 143 | fix_base, 144 | fix_shoulder, 145 | fix_elbow, 146 | fix_wrist_1, 147 | fix_wrist_2, 148 | fix_wrist_3, 149 | ur_model, 150 | ): 151 | env = gym.make( 152 | env_name, 153 | ip="robot-servers", 154 | fix_base=fix_base, 155 | fix_shoulder=fix_shoulder, 156 | fix_elbow=fix_elbow, 157 | fix_wrist_1=fix_wrist_1, 158 | fix_wrist_2=fix_wrist_2, 159 | fix_wrist_3=fix_wrist_3, 160 | ur_model=ur_model, 161 | ) 162 | state = env.reset() 163 | initial_joint_positions = state[3:9] 164 | # Take 20 actions 165 | action = env.action_space.sample() 166 | for _ in range(20): 167 | state, _, _, _, _ = env.step(action) 168 | joint_positions = state[3:9] 169 | 170 | if fix_base: 171 | assert math.isclose( 172 | initial_joint_positions[0], joint_positions[0], abs_tol=0.05 173 | ) 174 | if fix_shoulder: 175 | assert math.isclose( 176 | initial_joint_positions[1], joint_positions[1], abs_tol=0.05 177 | ) 178 | if fix_elbow: 179 | assert math.isclose( 180 | initial_joint_positions[2], joint_positions[2], abs_tol=0.05 181 | ) 182 | if fix_wrist_1: 183 | assert math.isclose( 184 | initial_joint_positions[3], joint_positions[3], abs_tol=0.05 185 | ) 186 | if fix_wrist_2: 187 | assert math.isclose( 188 | initial_joint_positions[4], joint_positions[4], abs_tol=0.05 189 | ) 190 | if fix_wrist_3: 191 | assert math.isclose( 192 | initial_joint_positions[5], joint_positions[5], abs_tol=0.05 193 | ) 194 | 195 | env.kill_sim() 196 | -------------------------------------------------------------------------------- /tests/robo-gym/utils/test_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import math 3 | 4 | import pytest 5 | from robo_gym.utils import utils 6 | import numpy as np 7 | 8 | 9 | ### normalize_angle_rad ### 10 | test_normalize_angle_rad = [ 11 | (7.123, 0.8398), 12 | (0.8398, 0.8398), 13 | (-2.47, -2.47), 14 | (-47.47, 2.795), 15 | ] 16 | 17 | 18 | @pytest.mark.parametrize("a, expected_a", test_normalize_angle_rad) 19 | def test_normalize_angle_rad(a, expected_a): 20 | normalized_a = utils.normalize_angle_rad(a) 21 | assert abs(expected_a - normalized_a) < 0.01 22 | 23 | 24 | ### point_inside_circle ### 25 | test_inside_circle = [ 26 | (5.47, 3.51, 6, 8, 5.23, True), 27 | (5.11, 2.23, 6, 8, 5.23, False), 28 | (5.69, 2.78, 6, 8, 5.23, True), 29 | ] 30 | 31 | 32 | @pytest.mark.parametrize( 33 | "x, y, center_x, center_y, radius, expected_result", test_inside_circle 34 | ) 35 | def test_point_inside_circle(x, y, center_x, center_y, radius, expected_result): 36 | is_inside = utils.point_inside_circle(x, y, center_x, center_y, radius) 37 | assert is_inside == expected_result 38 | 39 | 40 | ### rotate_point ### 41 | test_rotate_points = [ 42 | (3.13, 5.83, 0.785398, -1.91, 6.34), 43 | (4, 3, -2.14675498, 0.34, -4.99), 44 | (1, 3, -0.0, 1, 3), 45 | ] 46 | 47 | 48 | @pytest.mark.parametrize("x, y, theta, expected_x, expected_y", test_rotate_points) 49 | def test_rotate_point(x, y, theta, expected_x, expected_y): 50 | new_x, new_y = utils.rotate_point(x, y, theta) 51 | 52 | print(new_x, new_y) 53 | 54 | assert abs(new_x - expected_x) < 0.01 55 | assert abs(new_y - expected_y) < 0.01 56 | 57 | 58 | ### cartesian_to_polar_2d ### 59 | def test_cartesian_to_polar_2d_default_origin(): 60 | target_x = 3 61 | target_y = 4 62 | 63 | polar_r, polar_theta = utils.cartesian_to_polar_2d( 64 | x_target=target_x, y_target=target_y 65 | ) 66 | 67 | assert abs(polar_r - 5) < 0.01 68 | assert abs(polar_theta - 0.927) < 0.01 69 | 70 | 71 | def test_cartesian_to_polar_2d_set_origin(): 72 | target_x = 3 73 | target_y = 4 74 | origin_x = 1 75 | origin_y = 1 76 | 77 | polar_r, polar_theta = utils.cartesian_to_polar_2d( 78 | x_target=target_x, y_target=target_y, x_origin=origin_x, y_origin=origin_y 79 | ) 80 | 81 | assert abs(polar_r - 3.61) < 0.01 82 | assert abs(polar_theta - 0.9827949) < 0.01 83 | 84 | 85 | test_equal_points = [ 86 | ([0, 0], [0, 0], 0, 0), 87 | ([1, 1], [1, 1], 0, 0), 88 | ([3.141, 3.141], [3.141, 3.141], 0, 0), 89 | ] 90 | 91 | 92 | @pytest.mark.parametrize("p1, p2, expected_r, expected_theta", test_equal_points) 93 | def test_cartesian_to_polar_2d_equal_points(p1, p2, expected_r, expected_theta): 94 | 95 | polar_r, polar_theta = utils.cartesian_to_polar_2d( 96 | x_target=p1[0], y_target=p1[1], x_origin=p2[0], y_origin=p2[1] 97 | ) 98 | 99 | assert abs(polar_r - expected_r) < 0.001 100 | assert abs(polar_theta - expected_theta) < 0.001 101 | 102 | 103 | ### cartesian_to_polar_3d ### 104 | def test_cartesian_to_polar_3d(): 105 | cartesian_coordinates = [9, 4, 5] 106 | 107 | r, theta, phi = utils.cartesian_to_polar_3d( 108 | cartesian_coordinates=cartesian_coordinates 109 | ) 110 | 111 | assert abs(r - 11.045361017187) < 0.01 112 | assert abs(phi - 0.41822432957) < 0.01 113 | assert abs(theta - 1.1010291108682) < 0.01 114 | 115 | 116 | ### downsample_list_to_len ### 117 | test_downsample = [ 118 | 2, 119 | 4, 120 | 5, 121 | 6, 122 | 7, 123 | 8, 124 | 9, 125 | 10, 126 | 13, 127 | 15, 128 | 24, 129 | 51, 130 | 100, 131 | 101, 132 | 200, 133 | 201, 134 | 349, 135 | 501, 136 | ] 137 | 138 | 139 | @pytest.mark.parametrize("target_length", test_downsample) 140 | def test_downsample_list_to_len(target_length): 141 | input_length = 1000 142 | sample_list = [i for i in range(input_length)] 143 | 144 | downsampled_list = utils.downsample_list_to_len( 145 | data=sample_list, output_len=target_length 146 | ) 147 | 148 | assert len(downsampled_list) == target_length 149 | 150 | 151 | ### change_reference_frame ### 152 | def test_translation_change_reference_frame(): 153 | 154 | point = [5, 3, 2] 155 | translation = [-11, 6, -1] 156 | quaternion = [0, 0, 0, 1] 157 | 158 | assert ( 159 | utils.change_reference_frame(point, translation, quaternion) == [-6, 9, 1] 160 | ).all() 161 | 162 | 163 | def test_rotation_change_reference_frame(): 164 | 165 | point = [-0.250, 0.256, 1.118] 166 | translation = [0.0, 0.0, -0.227] 167 | quaternion = [0.0, 0.0, 1.0, 0.0] 168 | 169 | assert np.allclose( 170 | a=utils.change_reference_frame(point, translation, quaternion), 171 | b=[0.250, -0.256, 0.890], 172 | atol=0.001, 173 | ) 174 | 175 | 176 | def test_quat_from_rpy(): 177 | roll = 0.25 178 | pitch = 0.5 179 | yaw = 0.75 180 | 181 | quat_scipy = utils.quat_from_euler_xyz(roll, pitch, yaw) 182 | quat_isaac = utils.quat_from_euler_xyz_isaac(roll, pitch, yaw) 183 | assert np.allclose(a=quat_scipy, b=quat_isaac, atol=0.000001) 184 | 185 | 186 | def test_quat_from_rpy_isaac_panda(): 187 | roll = 0.0 188 | pitch = math.pi 189 | for yaw in [-1.2807, -1.5201, 0.419, -0.6453, 2.2513]: 190 | 191 | quat_scipy = utils.quat_from_euler_xyz(roll, pitch, yaw) 192 | quat_isaac = utils.quat_from_euler_xyz_isaac(roll, pitch, yaw) 193 | assert np.allclose(a=quat_scipy, b=quat_isaac, atol=0.0001) 194 | 195 | 196 | def test_rot_diff(): 197 | r1 = utils.quat_from_euler_xyz(0.0, 0.0, 0.0) 198 | r2 = utils.quat_from_euler_xyz(0.0, 0.2, 0.0) 199 | r3 = utils.quat_from_euler_xyz(0.0, 0.5, 0.0) 200 | 201 | diff_1_2 = utils.rotation_error_magnitude(r1, r2) 202 | diff_1_3 = utils.rotation_error_magnitude(r1, r3) 203 | diff_2_3 = utils.rotation_error_magnitude(r2, r3) 204 | 205 | diff_3_3 = utils.rotation_error_magnitude(r3, r3) 206 | 207 | assert math.isclose(diff_3_3, 0, abs_tol=0.000001) 208 | 209 | assert diff_1_2 < diff_1_3 210 | assert diff_1_2 < diff_2_3 211 | assert diff_2_3 < diff_1_3 212 | 213 | 214 | def test_rot_inv(): 215 | q1 = np.array([0.693359, -0.138306, 0.693359, 0.138306]) 216 | q1_inv = utils.quat_inv(q1) 217 | q_mult = utils.quat_mul(q1, q1_inv) 218 | assert math.isclose(q_mult[0], 0, abs_tol=0.000001) 219 | assert math.isclose(q_mult[1], 0, abs_tol=0.000001) 220 | assert math.isclose(q_mult[2], 0, abs_tol=0.000001) 221 | assert math.isclose(q_mult[3], 1, abs_tol=0.000001) 222 | -------------------------------------------------------------------------------- /robo_gym/wrappers/env_wrappers/ur_ee_positioning_training.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | import numpy as np 3 | from typing import Tuple 4 | 5 | 6 | class EndEffectorPositioningURTrainingCurriculum(gym.Wrapper): 7 | def __init__(self, env, print_reward=False): 8 | super().__init__(env) 9 | self.env = env 10 | 11 | # use counter as metric for level up 12 | self.episode_counter = 0 13 | 14 | self.reward_composition = {} 15 | self.print_reward = print_reward 16 | 17 | 18 | def reset(self, **kwargs): 19 | if self.episode_counter % 5 == 0: 20 | state = self.env.reset(randomize_start=True) 21 | else: 22 | state = self.env.reset(continue_on_success=True) 23 | 24 | self.reward_composition = { 'goal_reached_weight': 0, 25 | 'collision_weight': 0, 26 | 'distance_weight': 0, 27 | 'smoothness_weight': 0, 28 | 'action_magnitude_weight': 0, 29 | 'velocity_magnitude_weight': 0} 30 | 31 | return state 32 | 33 | def step(self, action): 34 | self.previous_action = self.env.previous_action 35 | next_state, _, _, _ = self.env.step(action) 36 | 37 | action = self.env.add_fixed_joints(action) 38 | reward, done, info = self.reward(rs_state=self.env.rs_state, action=action) 39 | 40 | if done: 41 | self.episode_counter += 1 42 | 43 | if done and self.print_reward: 44 | print(f'Episode counter: {self.episode_counter} Current level: {self.get_level()}') 45 | print(self.reward_composition) 46 | 47 | return next_state, reward, done, info 48 | 49 | def get_level(self): 50 | level_thresholds = [75, 250, 500, 1000, 1500, 2500] 51 | 52 | for i in range(len(level_thresholds)): 53 | if self.episode_counter < level_thresholds[i]: 54 | return i+1 55 | 56 | return len(level_thresholds) + 1 57 | 58 | def get_weights(self, level): 59 | # weights 60 | 61 | # reward for reaching the goal position 62 | g_w = 2 63 | # reward for collision (ground, table or self) 64 | c_w = -1 65 | # punishment according to the distance to the goal 66 | d_w = -0.005 67 | # punishment delta in two consecutive actions 68 | s_w = -0.0002 69 | # punishment for acting in general 70 | a_w = -0.0001 71 | # punishment for deltas in velocity 72 | v_w = -0.0002 73 | 74 | if level == 1: 75 | s_w = s_w * 0 76 | a_w = a_w * 0 77 | v_w = v_w * 0 78 | self.min_distance = 0.15 79 | if level == 2: 80 | d_w = d_w * 0 81 | self.min_distance = 0.15 82 | if level == 3: 83 | d_w = d_w * 0 84 | s_w = s_w * 5 85 | a_w = a_w * 5 86 | v_w = v_w * 5 87 | self.min_distance = 0.15 88 | if level == 4: 89 | d_w = d_w * 0 90 | s_w = s_w * 10 91 | a_w = a_w * 10 92 | v_w = v_w * 10 93 | self.min_distance = 0.1 94 | if level == 5: 95 | d_w = d_w * 0 96 | s_w = s_w * 15 97 | a_w = a_w * 15 98 | v_w = v_w * 15 99 | self.min_distance = 0.05 100 | if level == 6: 101 | d_w = d_w * 0 102 | s_w = s_w * 20 103 | a_w = a_w * 20 104 | v_w = v_w * 20 105 | self.min_distance = 0.05 106 | if level == 7: 107 | d_w = d_w * 0 108 | s_w = s_w * 25 109 | a_w = a_w * 25 110 | v_w = v_w * 25 111 | self.min_distance = 0.01 112 | 113 | return g_w, c_w, d_w, s_w, a_w, v_w 114 | 115 | 116 | def reward(self, rs_state, action) -> Tuple[float, bool, dict]: 117 | env_state = self.env._robot_server_state_to_env_state(rs_state) 118 | reward = 0 119 | done = False 120 | info = {} 121 | 122 | level = self.get_level() 123 | g_w, c_w, d_w, s_w, a_w, v_w = self.get_weights(level) 124 | 125 | 126 | # Calculate distance to the target 127 | target_coord = np.array([rs_state['object_0_to_ref_translation_x'], rs_state['object_0_to_ref_translation_y'], rs_state['object_0_to_ref_translation_z']]) 128 | ee_coord = np.array([rs_state['ee_to_ref_translation_x'], rs_state['ee_to_ref_translation_y'], rs_state['ee_to_ref_translation_z']]) 129 | euclidean_dist_3d = np.linalg.norm(target_coord - ee_coord) 130 | 131 | joint_velocities = np.array([rs_state['base_joint_velocity'], rs_state['shoulder_joint_velocity'], rs_state['elbow_joint_velocity'], rs_state['wrist_1_joint_velocity'], rs_state['wrist_2_joint_velocity'], rs_state['wrist_3_joint_velocity']]) 132 | 133 | previous_action = self.previous_action 134 | 135 | 136 | # distance weight 137 | x = d_w * euclidean_dist_3d 138 | reward += x 139 | self.reward_composition['distance_weight'] += x 140 | # smoothness 141 | x = s_w * np.linalg.norm(action - previous_action)**2 142 | reward += x 143 | self.reward_composition['smoothness_weight'] += x 144 | # action magnitude 145 | x = a_w * np.linalg.norm(action)**2 146 | reward += x 147 | self.reward_composition['action_magnitude_weight'] += x 148 | # velocity magnitude 149 | x = v_w * np.linalg.norm(joint_velocities)**2 150 | reward += x 151 | self.reward_composition['velocity_magnitude_weight'] += x 152 | 153 | 154 | if euclidean_dist_3d <= self.min_distance: 155 | # goal reached 156 | x = g_w * 1 157 | reward = x 158 | self.reward_composition['goal_reached_weight'] += x 159 | 160 | done = True 161 | info['final_status'] = 'success' 162 | info['target_coord'] = target_coord 163 | 164 | if rs_state['in_collision']: 165 | # punishment for collision 166 | x = c_w * 1 167 | reward = x 168 | self.reward_composition['collision_weight'] += x 169 | 170 | done = True 171 | info['final_status'] = 'collision' 172 | info['target_coord'] = target_coord 173 | 174 | if self.elapsed_steps >= self.max_episode_steps: 175 | done = True 176 | info['final_status'] = 'max_steps_exceeded' 177 | info['target_coord'] = target_coord 178 | 179 | return reward, done, info 180 | -------------------------------------------------------------------------------- /tests/robo-gym/envs/ur/test_ur_base_env.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import gymnasium as gym 4 | import robo_gym 5 | import math 6 | import numpy as np 7 | import pytest 8 | 9 | ur_models = [ 10 | pytest.param("ur3", marks=pytest.mark.nightly), 11 | pytest.param("ur3e", marks=pytest.mark.nightly), 12 | pytest.param("ur5", marks=pytest.mark.commit), 13 | pytest.param("ur5e", marks=pytest.mark.nightly), 14 | pytest.param("ur10", marks=pytest.mark.nightly), 15 | pytest.param("ur10e", marks=pytest.mark.nightly), 16 | pytest.param("ur16e", marks=pytest.mark.nightly), 17 | ] 18 | 19 | 20 | @pytest.fixture(scope="module", params=ur_models) 21 | def env(request): 22 | ip = os.environ.get("ROBOGYM_SERVERS_HOST", "robot-servers") 23 | env = gym.make( 24 | "EmptyEnvironmentURSim-v0", ip=ip, ur_model=request.param, fix_wrist_3=True 25 | ) 26 | env.request_param = request.param 27 | yield env 28 | env.kill_sim() 29 | 30 | 31 | @pytest.mark.commit 32 | def test_initialization(env): 33 | assert env.ur.model_key == env.request_param 34 | env.reset() 35 | done = False 36 | env.step([0, 0, 0, 0, 0]) 37 | for _ in range(10): 38 | if not done: 39 | action = env.action_space.sample() 40 | observation, _, done, _, _ = env.step(action) 41 | 42 | assert env.observation_space.contains(observation) 43 | 44 | 45 | @pytest.mark.commit 46 | @pytest.mark.flaky(reruns=3) 47 | def test_self_collision(env): 48 | collision_joint_config = { 49 | "ur3": [0.0, 0.0, -3.14, -1.77, 1.0], 50 | "ur3e": [0.0, -1.88, 2.8, -0.75, -1.88], 51 | "ur5": [0.0, -1.26, -3.14, 0.0, 0.0], 52 | "ur5e": [0.0, -0.50, -3.14, 3.14, 0.0], 53 | "ur10": [0.0, -1.5, 3.14, 0.0, 0.0], 54 | "ur10e": [0.0, -0.15, -2.83, -2.51, 1.63], 55 | "ur16e": [0.0, -1.15, 2.9, -0.19, 0.42], 56 | } 57 | env.reset() 58 | action = env.ur.normalize_joint_values(collision_joint_config[env.ur.model_key]) 59 | done = False 60 | while not done: 61 | _, _, done, _, info = env.step(action) 62 | assert info["final_status"] == "collision" 63 | 64 | 65 | @pytest.mark.commit 66 | @pytest.mark.flaky(reruns=3) 67 | def test_collision_with_ground(env): 68 | collision_joint_config = { 69 | "ur3": [0.0, 2.64, -1.95, -2.98, 0.41], 70 | "ur3e": [1.13, 1.88, -2.19, -3.43, 2.43], 71 | "ur5": [0.0, 1.0, 1.8, 0.0, 0.0], 72 | "ur5e": [0.0, 3.52, -2.58, 0.0, 0.0], 73 | "ur10": [0.0, 1.0, 1.15, 0.0, 0.0], 74 | "ur10e": [-2.14, -0.13, 0.63, -1.13, 1.63], 75 | "ur16e": [0.0, -0.15, 1.32, 0.0, 1.63], 76 | } 77 | env.reset() 78 | action = env.ur.normalize_joint_values(collision_joint_config[env.ur.model_key]) 79 | done = False 80 | while not done: 81 | _, _, done, _, info = env.step(action) 82 | assert info["final_status"] == "collision" 83 | 84 | 85 | @pytest.mark.commit 86 | def test_reset_joint_positions(env): 87 | joint_positions = [0.2, -2.5, 1.1, -2.0, -1.2, 1.2] 88 | 89 | state = env.reset(options={"joint_positions": joint_positions}) 90 | assert np.isclose( 91 | env.ur.normalize_joint_values(joint_positions), state[0:6], atol=0.1 92 | ).all() 93 | 94 | 95 | test_ur_fixed_joints = [ 96 | ( 97 | "EmptyEnvironmentURSim-v0", 98 | True, 99 | False, 100 | False, 101 | False, 102 | False, 103 | False, 104 | "ur3", 105 | ), # fixed shoulder_pan 106 | ( 107 | "EmptyEnvironmentURSim-v0", 108 | False, 109 | True, 110 | False, 111 | False, 112 | False, 113 | False, 114 | "ur3e", 115 | ), # fixed shoulder_lift 116 | ( 117 | "EmptyEnvironmentURSim-v0", 118 | False, 119 | False, 120 | False, 121 | False, 122 | False, 123 | True, 124 | "ur5", 125 | ), # fixed wrist_3 126 | ( 127 | "EmptyEnvironmentURSim-v0", 128 | True, 129 | False, 130 | True, 131 | False, 132 | False, 133 | False, 134 | "ur5e", 135 | ), # fixed Base and Elbow 136 | ( 137 | "EmptyEnvironmentURSim-v0", 138 | False, 139 | False, 140 | True, 141 | False, 142 | False, 143 | False, 144 | "ur10", 145 | ), # fixed elbow 146 | ( 147 | "EmptyEnvironmentURSim-v0", 148 | False, 149 | False, 150 | False, 151 | True, 152 | False, 153 | False, 154 | "ur10e", 155 | ), # fixed wrist_1 156 | ( 157 | "EmptyEnvironmentURSim-v0", 158 | False, 159 | False, 160 | False, 161 | False, 162 | True, 163 | False, 164 | "ur16e", 165 | ), # fixed wrist_2 166 | ] 167 | 168 | 169 | @pytest.mark.nightly 170 | @pytest.mark.parametrize( 171 | "env_name, fix_base, fix_shoulder, fix_elbow, fix_wrist_1, fix_wrist_2, fix_wrist_3, ur_model", 172 | test_ur_fixed_joints, 173 | ) 174 | @pytest.mark.flaky(reruns=3) 175 | def test_fixed_joints( 176 | env_name, 177 | fix_base, 178 | fix_shoulder, 179 | fix_elbow, 180 | fix_wrist_1, 181 | fix_wrist_2, 182 | fix_wrist_3, 183 | ur_model, 184 | ): 185 | env = gym.make( 186 | env_name, 187 | ip="robot-servers", 188 | fix_base=fix_base, 189 | fix_shoulder=fix_shoulder, 190 | fix_elbow=fix_elbow, 191 | fix_wrist_1=fix_wrist_1, 192 | fix_wrist_2=fix_wrist_2, 193 | fix_wrist_3=fix_wrist_3, 194 | ur_model=ur_model, 195 | ) 196 | state = env.reset() 197 | initial_joint_positions = state[0:6] 198 | # Take 20 actions 199 | action = env.action_space.sample() 200 | for _ in range(20): 201 | state, _, _, _, _ = env.step(action) 202 | joint_positions = state[0:6] 203 | 204 | if fix_base: 205 | assert math.isclose( 206 | initial_joint_positions[0], joint_positions[0], abs_tol=0.05 207 | ) 208 | if fix_shoulder: 209 | assert math.isclose( 210 | initial_joint_positions[1], joint_positions[1], abs_tol=0.05 211 | ) 212 | if fix_elbow: 213 | assert math.isclose( 214 | initial_joint_positions[2], joint_positions[2], abs_tol=0.05 215 | ) 216 | if fix_wrist_1: 217 | assert math.isclose( 218 | initial_joint_positions[3], joint_positions[3], abs_tol=0.05 219 | ) 220 | if fix_wrist_2: 221 | assert math.isclose( 222 | initial_joint_positions[4], joint_positions[4], abs_tol=0.05 223 | ) 224 | if fix_wrist_3: 225 | assert math.isclose( 226 | initial_joint_positions[5], joint_positions[5], abs_tol=0.05 227 | ) 228 | 229 | env.kill_sim() 230 | -------------------------------------------------------------------------------- /robo_gym/envs/simulation_wrapper.py: -------------------------------------------------------------------------------- 1 | import robo_gym_server_modules.server_manager.client as sm_client 2 | import robo_gym_server_modules.robot_server.client as rs_client 3 | import traceback 4 | 5 | class Simulation: 6 | """Simulation Wrapper Class - can be used to add simulation capability to a robo-gym environment. 7 | 8 | Attributes: 9 | ----------- 10 | verbose: bool 11 | A class variable that controls whether certain methods of this class log in a verbose manner, with the purpose of tracking the management of connections to simulation robot servers. 12 | del_try_async_kill: bool 13 | A class variable that controls whether the __del__ method should try to kill a remaining associated simulation in an asynchronous fashion. 14 | _instances_count: int 15 | A class variable that counts the instances constructed so far, used for assigning a name for verbose output 16 | """ 17 | _instances_count = 0 18 | verbose = False 19 | del_try_async_kill = True 20 | 21 | def __init__(self, cmd, ip=None, lower_bound_port=None, upper_bound_port=None, gui=False, **kwargs): 22 | """ Initializes the simulation-specific aspects of a RoboGym en. 23 | 24 | Args: 25 | cmd (str): roslaunch command to execute to start the simulated Robot Server. 26 | ip (str): IP address of the machine hosting the Server Manager. Defaults to None. 27 | lower_bound_port (str): Lower bound of Server Manager port range. Defaults to None. 28 | upper_bound_port (str): Upper bound of Server Manager port range. Defaults to None. 29 | gui (bool): If True the simulation is started with GUI. Defaults to False. 30 | """ 31 | self.robot_server_ip = None 32 | self.cmd = cmd 33 | self.gui = gui 34 | self.connections_count = 0 35 | self.expect_sim_running = False 36 | self.verbose = Simulation.verbose 37 | self.instance_id = str(chr(ord('A') + Simulation._instances_count)) 38 | Simulation._instances_count += 1 39 | if ip: 40 | if lower_bound_port and upper_bound_port: 41 | self.sm_client = sm_client.Client(ip,lower_bound_port,upper_bound_port) 42 | 43 | else: 44 | self.sm_client = sm_client.Client(ip) 45 | self._start_sim() 46 | 47 | def get_instance_string(self): 48 | if self.robot_server_ip: 49 | return self.instance_id + "/" + self.robot_server_ip 50 | return self.instance_id 51 | 52 | def print(self, msg): 53 | if(self.verbose): 54 | print("Simulation " + self.get_instance_string() + ": " + str(msg)) 55 | 56 | def _start_sim(self): 57 | """Start a new simulated Robot Server. 58 | """ 59 | self.print("_start_sim: starting, stack trace:") 60 | if(self.verbose): 61 | traceback.print_stack() 62 | self.robot_server_ip = self.sm_client.start_new_server(cmd = self.cmd, gui = self.gui) 63 | self.connections_count += 1 64 | self.expect_sim_running = True 65 | self.print("_start_sim: end") 66 | 67 | 68 | def kill_sim(self): 69 | """Kill the simulated Robot Server. 70 | """ 71 | if not self.connections_count: 72 | return 73 | if not self.expect_sim_running: 74 | return 75 | assert self.sm_client.kill_server(self.robot_server_ip) 76 | self.expect_sim_running = False 77 | 78 | def restart_sim(self): 79 | """Restart the simulated Robot Server. 80 | """ 81 | 82 | if self.expect_sim_running: 83 | self.kill_sim() 84 | self._start_sim() 85 | 86 | # usually initialized in each environment's constructor 87 | self.client = rs_client.Client(self.robot_server_ip) 88 | 89 | def close(self): 90 | self.print("close start") 91 | try: 92 | if not self.expect_sim_running: 93 | self.print("close returning early: I don't expect my simulation to be running!") 94 | return 95 | else: 96 | self.print("close: I expect my simulation to be running.") 97 | if not self.connections_count: 98 | # should be unreachable 99 | self.print("close returning early: I have never been connected!") 100 | return 101 | else: 102 | self.print("close: I have been connected " + str(self.connections_count) + " time(s)") 103 | if True: # hasattr(self, "sm_client") and self.sm_client is not None: 104 | self.print("close calling self.kill_sim") 105 | self.kill_sim() 106 | self.print("close finished self.kill_sim") 107 | finally: 108 | self.print("close end") 109 | 110 | def __del__(self): 111 | # If we reach this point and the simulation is still running, we want to kill it. 112 | # But in some situations normal kill_sim won't work here (because of the state we're in). 113 | # We can try to kill outside of the context of the client object if server modules are upgraded to provide the kill_server_async. 114 | # The flag Simulation.del_try_async_kill can be set to False to skip trying. 115 | self.print("__del__ start") 116 | try: 117 | try: 118 | self.print("__del__ trying self.kill_sim") 119 | self.kill_sim() 120 | self.print("__del__ self.kill_sim returned") 121 | except Exception as e: 122 | self.print("__del__ self.kill_sim threw " + str(e)) 123 | if hasattr(self, "expect_sim_running"): 124 | if not self.expect_sim_running: 125 | self.print("__del__ returning early: I don't expect my simulation to be running!") 126 | return 127 | else: 128 | self.print("__del__: I expect my simulation to be running.") 129 | else: 130 | self.print("__del__: I do not know if I expect my simulation to be running!") 131 | if hasattr(self, "connections_count"): 132 | if not self.connections_count: 133 | self.print("__del__ returning early: I have never been connected!") 134 | return 135 | else: 136 | self.print("__del__: I have been connected " + str(self.connections_count) + " times") 137 | else: 138 | self.print("__del__: I do not know my connections count!") 139 | if Simulation.del_try_async_kill and hasattr(self, "sm_client") and hasattr(sm_client.Client, 'kill_server_async') and callable(getattr(sm_client.Client, 'kill_server_async') and self.sm_client is not None): 140 | self.print("__del__ calling self.sm_client.kill_server_async") 141 | self.sm_client.kill_server_async(self.robot_server_ip) 142 | self.print("__del__ finished self.sm_client.kill_server_async") 143 | finally: 144 | self.print("__del__ end") -------------------------------------------------------------------------------- /tests/robo-gym/envs/ur/test_ur_avoidance_raad_training.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import gymnasium as gym 4 | import robo_gym 5 | import math 6 | import numpy as np 7 | import pathlib 8 | import json 9 | import pytest 10 | 11 | 12 | robo_gym_path = ( 13 | pathlib.Path(__file__) 14 | .parent.parent.parent.parent.parent.absolute() 15 | .joinpath("robo_gym") 16 | ) 17 | 18 | ur_models = [ 19 | pytest.param("ur3", marks=pytest.mark.skip(reason="not implemented yet")), 20 | pytest.param("ur3e", marks=pytest.mark.skip(reason="not implemented yet")), 21 | pytest.param("ur5", marks=pytest.mark.commit), 22 | pytest.param("ur5e", marks=pytest.mark.skip(reason="not implemented yet")), 23 | pytest.param("ur10", marks=pytest.mark.skip(reason="not implemented yet")), 24 | pytest.param("ur10e", marks=pytest.mark.skip(reason="not implemented yet")), 25 | pytest.param("ur16e", marks=pytest.mark.skip(reason="not implemented yet")), 26 | ] 27 | 28 | 29 | @pytest.fixture(scope="module", params=ur_models) 30 | def env(request): 31 | ip = os.environ.get("ROBOGYM_SERVERS_HOST", "robot-servers") 32 | env = gym.make("AvoidanceRaad2022URSim-v0", ip=ip, ur_model=request.param) 33 | env.request_param = request.param 34 | yield env 35 | env.kill_sim() 36 | 37 | 38 | @pytest.mark.commit 39 | def test_initialization(env): 40 | assert env.ur.model_key == env.request_param 41 | env.reset() 42 | done = False 43 | env.step([0, 0, 0, 0, 0]) 44 | for _ in range(10): 45 | if not done: 46 | action = env.action_space.sample() 47 | observation, _, done, _, _ = env.step(action) 48 | 49 | assert env.observation_space.contains(observation) 50 | 51 | 52 | @pytest.mark.skip(reason="This fails only in CI") 53 | @pytest.mark.flaky(reruns=3) 54 | def test_object_collision(env): 55 | params = { 56 | "ur5": {"object_coords": [-0.2, -0.1, 0.5], "n_steps": 250}, 57 | } 58 | 59 | env.reset( 60 | options={"fixed_object_position": params[env.ur.model_key]["object_coords"]} 61 | ) 62 | done = False 63 | i = 0 64 | while (not done) or i <= params[env.ur.model_key]["n_steps"]: 65 | _, _, done, _, info = env.step(np.zeros(5)) 66 | i += 1 67 | assert info["final_status"] == "collision" 68 | 69 | 70 | @pytest.mark.commit 71 | def test_object_coordinates(env): 72 | params = { 73 | "ur5": { 74 | "object_coords": [0.3, 0.1, 1.0, 0.0, 0.0, 0.0], 75 | "polar_coords_ee": {"r": 0.980, "theta": 2.488, "phi": 1.246}, 76 | "polar_coords_forearm": {"r": 0.607, "theta": 1.023, "phi": -0.825}, 77 | } 78 | } 79 | 80 | state = env.reset( 81 | options={"fixed_object_position": params[env.ur.model_key]["object_coords"]} 82 | ) 83 | assert np.isclose( 84 | [ 85 | params[env.ur.model_key]["polar_coords_ee"]["r"], 86 | params[env.ur.model_key]["polar_coords_ee"]["theta"], 87 | params[env.ur.model_key]["polar_coords_ee"]["phi"], 88 | ], 89 | state[0:3], 90 | atol=0.1, 91 | ).all() 92 | assert np.isclose( 93 | [ 94 | params[env.ur.model_key]["polar_coords_forearm"]["r"], 95 | params[env.ur.model_key]["polar_coords_forearm"]["theta"], 96 | params[env.ur.model_key]["polar_coords_forearm"]["phi"], 97 | ], 98 | state[15:18], 99 | atol=0.1, 100 | ).all() 101 | 102 | 103 | @pytest.mark.commit 104 | def test_robot_trajectory(env): 105 | params = { 106 | "ur5": { 107 | "traj_relative_path": "envs/ur/robot_trajectories/trajectory_raad_2022.json" 108 | } 109 | } 110 | 111 | env.reset() 112 | # load trajectory 113 | traj_path = robo_gym_path.joinpath(params[env.ur.model_key]["traj_relative_path"]) 114 | with open(traj_path) as json_file: 115 | trajectory = json.load(json_file)["trajectory"] 116 | 117 | action = np.zeros(5) 118 | for i in range(len(trajectory[0])): 119 | traj_joint_positions = trajectory[0][i] 120 | state, _, _, _, _ = env.step(action) 121 | ur_j_pos_norm = state[3:9] 122 | delta_joints = state[9:15] 123 | desired_joints = state[18:24] 124 | # check if robot follows the trajectory in all steps of trajectory segment 0 125 | assert np.isclose( 126 | env.ur.normalize_joint_values(traj_joint_positions), ur_j_pos_norm, atol=0.1 127 | ).all() 128 | # check if calculation of delta joints is correct 129 | assert np.isclose( 130 | (ur_j_pos_norm - desired_joints), delta_joints, atol=0.01 131 | ).all() 132 | # check that the current trajectory point is a target point 133 | assert state[24] == 1.0 134 | # check that state machine has transitioned to segment 1 of trajectory 135 | traj_joint_positions = trajectory[1][0] 136 | state, _, _, _, _ = env.step(action) 137 | assert np.isclose( 138 | env.ur.normalize_joint_values(traj_joint_positions), state[3:9], atol=0.1 139 | ).all() 140 | 141 | 142 | test_ur_fixed_joints = [ 143 | ( 144 | "AvoidanceRaad2022URSim-v0", 145 | False, 146 | False, 147 | False, 148 | False, 149 | False, 150 | True, 151 | "ur5", 152 | ), # fixed wrist_3 153 | ( 154 | "AvoidanceRaad2022URSim-v0", 155 | True, 156 | False, 157 | True, 158 | False, 159 | False, 160 | False, 161 | "ur5", 162 | ), # fixed Base and Elbow 163 | ] 164 | 165 | 166 | @pytest.mark.nightly 167 | @pytest.mark.parametrize( 168 | "env_name, fix_base, fix_shoulder, fix_elbow, fix_wrist_1, fix_wrist_2, fix_wrist_3, ur_model", 169 | test_ur_fixed_joints, 170 | ) 171 | @pytest.mark.flaky(reruns=3) 172 | def test_fixed_joints( 173 | env_name, 174 | fix_base, 175 | fix_shoulder, 176 | fix_elbow, 177 | fix_wrist_1, 178 | fix_wrist_2, 179 | fix_wrist_3, 180 | ur_model, 181 | ): 182 | env = gym.make( 183 | env_name, 184 | ip="robot-servers", 185 | fix_base=fix_base, 186 | fix_shoulder=fix_shoulder, 187 | fix_elbow=fix_elbow, 188 | fix_wrist_1=fix_wrist_1, 189 | fix_wrist_2=fix_wrist_2, 190 | fix_wrist_3=fix_wrist_3, 191 | ur_model=ur_model, 192 | ) 193 | state = env.reset() 194 | initial_joint_positions = state[3:9] 195 | # Take 20 actions 196 | action = env.action_space.sample() 197 | for _ in range(20): 198 | state, _, _, _, _ = env.step(action) 199 | joint_positions = state[3:9] 200 | 201 | if fix_base: 202 | assert math.isclose( 203 | initial_joint_positions[0], joint_positions[0], abs_tol=0.05 204 | ) 205 | if fix_shoulder: 206 | assert math.isclose( 207 | initial_joint_positions[1], joint_positions[1], abs_tol=0.05 208 | ) 209 | if fix_elbow: 210 | assert math.isclose( 211 | initial_joint_positions[2], joint_positions[2], abs_tol=0.05 212 | ) 213 | if fix_wrist_1: 214 | assert math.isclose( 215 | initial_joint_positions[3], joint_positions[3], abs_tol=0.05 216 | ) 217 | if fix_wrist_2: 218 | assert math.isclose( 219 | initial_joint_positions[4], joint_positions[4], abs_tol=0.05 220 | ) 221 | if fix_wrist_3: 222 | assert math.isclose( 223 | initial_joint_positions[5], joint_positions[5], abs_tol=0.05 224 | ) 225 | 226 | env.kill_sim() 227 | -------------------------------------------------------------------------------- /robo_gym/envs/manipulator/isaac_reach.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from robo_gym.envs.base.robogym_env import * 4 | from robo_gym.envs.manipulator.ee_pos_base import * 5 | from robo_gym.envs.manipulator.manipulator_base import * 6 | 7 | 8 | class IsaacReachEnv(ManipulatorEePosEnv): 9 | """ 10 | Base class for compatibility environments for agents trained with IsaacLab reach tasks. 11 | Environment actions and observations use the spaces from the corresponding IsaacLab environments. 12 | On the back-end, a UR robot server as for our End Effector Positioning environments is used. 13 | The environment nodes are responsible for achieving compatibility between that robot server and the spaces as used in IsaacLab. 14 | """ 15 | 16 | KW_ISAAC_SCALE = "isaac_scale" 17 | 18 | # Add an NDArray to add static joints in the env observation that we don't get in the robot server state but we want in the env observation. 19 | # 2-dimensional: for each joint: 3 floats: initial joint positions, min, max position. Velocity is always 0. 20 | KW_ISAAC_OBS_EXTRA_STATIC_JOINTS = "isaac_extra_static_joints" 21 | 22 | def __init__(self, **kwargs): 23 | # not too nice - repeated in super init 24 | self._config = kwargs 25 | 26 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_ROTATION_MATTERS, True) 27 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_ROTATION_THRESHOLD, 0.05) 28 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_DISTANCE_THRESHOLD, 0.02) 29 | 30 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_TARGET_VOLUME_BOUNDING_BOX, True) 31 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_POSITION_X_RANGE, [0.35, 0.65]) 32 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_POSITION_Y_RANGE, [-0.2, 0.2]) 33 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_POSITION_Z_RANGE, [0.15, 0.5]) 34 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_ROTATION_ROLL_RANGE, 0) 35 | 36 | # as in IsaacLab reach envs, pi only to the first two fractional digits 37 | kwargs.setdefault(ManipulatorEePosEnv.KW_EE_ROTATION_YAW_RANGE, [-3.14, 3.14]) 38 | 39 | kwargs.setdefault(IsaacReachEnv.KW_ISAAC_SCALE, 0.5) 40 | 41 | # env nodes 42 | action_node: ActionNode | None = kwargs.get(RoboGymEnv.KW_ACTION_NODE) 43 | if not action_node: 44 | action_node = IsaacReachActionNode(**self.get_action_node_setup_kwargs()) 45 | kwargs[RoboGymEnv.KW_ACTION_NODE] = action_node 46 | 47 | super().__init__(**kwargs) 48 | # robot model joint positions were set from the joint_positions kw by super().__init__ 49 | self.default_joint_positions = self._robot_model.joint_positions 50 | 51 | def create_main_observation_node(self, node_index: int = 0, **kwargs): 52 | return IsaacReachObservationNode(**self.get_obs_node_setup_kwargs(node_index)) 53 | 54 | 55 | class IsaacReachObservationNode(ManipulatorEePosObservationNode): 56 | 57 | def __init__(self, **kwargs): 58 | super().__init__(**kwargs) 59 | self.default_joint_positions = np.array( 60 | self._config.get(ManipulatorBaseEnv.KW_JOINT_POSITIONS) 61 | ) 62 | self.obs_extra_static_joints = self._config.get( 63 | IsaacReachEnv.KW_ISAAC_OBS_EXTRA_STATIC_JOINTS 64 | ) 65 | 66 | def get_observation_space_part(self) -> gym.spaces.Box: 67 | num_joints = len(self._robot_model.joint_names) 68 | 69 | # joint positions 70 | tolerance_abs = self._robot_model.denormalize_joint_ranges( 71 | np.array([self.joint_position_tolerance_normalized] * num_joints) 72 | ) 73 | max_joint_positions = ( 74 | self._robot_model.get_max_joint_positions() 75 | + tolerance_abs 76 | - self.default_joint_positions 77 | ) 78 | min_joint_positions = ( 79 | self._robot_model.get_min_joint_positions() 80 | - tolerance_abs 81 | - self.default_joint_positions 82 | ) 83 | 84 | # velocities 85 | max_joint_velocities = np.array([np.inf] * num_joints) 86 | min_joint_velocities = -np.array([np.inf] * num_joints) 87 | 88 | if self.obs_extra_static_joints is not None: 89 | for joint_values in self.obs_extra_static_joints: 90 | min_joint_positions = np.concatenate( 91 | (min_joint_positions, [joint_values[1]]), dtype=np.float32 92 | ) 93 | max_joint_positions = np.concatenate( 94 | (max_joint_positions, [joint_values[2]]), dtype=np.float32 95 | ) 96 | min_joint_velocities = np.concatenate( 97 | (min_joint_velocities, [-np.inf]), dtype=np.float32 98 | ) 99 | max_joint_velocities = np.concatenate( 100 | (max_joint_velocities, [np.inf]), dtype=np.float32 101 | ) 102 | 103 | # ee target 104 | ee_target_max = np.concatenate((np.array([np.inf] * 3), np.array([1.0] * 4))) 105 | ee_target_min = -1 * ee_target_max 106 | 107 | obs_space_max = np.concatenate( 108 | (max_joint_positions, max_joint_velocities, ee_target_max) 109 | ).astype(np.float32) 110 | obs_space_min = np.concatenate( 111 | (min_joint_positions, min_joint_velocities, ee_target_min) 112 | ).astype(np.float32) 113 | obs_space = gym.spaces.Box( 114 | high=obs_space_max, low=obs_space_min, dtype=np.float32 115 | ) 116 | return obs_space 117 | 118 | def rs_state_to_observation_part( 119 | self, rs_state_array: NDArray, rs_state_dict: dict[str, float], **kwargs 120 | ) -> NDArray: 121 | # previous action is added by a separate LastActionObservationNode 122 | joint_positions = ( 123 | self.extract_joint_positions_from_rs_state_dict(rs_state_dict) 124 | - self.default_joint_positions 125 | ) 126 | joint_velocities = self.extract_joint_velocities_from_rs_state_dict( 127 | rs_state_dict 128 | ) 129 | if self.obs_extra_static_joints is not None: 130 | for joint_values in self.obs_extra_static_joints: 131 | # position 0 - it stays in initial pose 132 | joint_positions = np.concatenate( 133 | (joint_positions, [0]), dtype=np.float32 134 | ) 135 | # velocity 0 - it does not move 136 | joint_velocities = np.concatenate( 137 | (joint_velocities, [0]), dtype=np.float32 138 | ) 139 | 140 | command = utils.pose_quat_wxyz_from_xyzw(np.array(self.get_target_pose())) 141 | obs = np.concatenate( 142 | (joint_positions, joint_velocities, command), dtype=np.float32 143 | ) 144 | 145 | return obs 146 | 147 | 148 | class IsaacReachActionNode(ManipulatorActionNode): 149 | 150 | def __init__(self, **kwargs): 151 | self.scale = kwargs.get(IsaacReachEnv.KW_ISAAC_SCALE, 0.5) 152 | super().__init__(**kwargs) 153 | self.default_joint_positions = np.array( 154 | self._config.get(ManipulatorBaseEnv.KW_JOINT_POSITIONS) 155 | ) 156 | 157 | def env_action_to_rs_action(self, env_action: NDArray, **kwargs) -> NDArray: 158 | rs_action = self.default_joint_positions + env_action * self.scale 159 | rs_action = self.robot_model.reorder_joints_for_rs(rs_action).astype(np.float32) 160 | return rs_action 161 | 162 | def get_action_space(self) -> gym.spaces.Box: 163 | max_joint_positions = self._robot_model.get_max_joint_positions() 164 | min_joint_positions = self._robot_model.get_min_joint_positions() 165 | 166 | action_max = ( 167 | (max_joint_positions - self.default_joint_positions) / self.scale 168 | ).astype(np.float32) 169 | action_min = ( 170 | (min_joint_positions - self.default_joint_positions) / self.scale 171 | ).astype(np.float32) 172 | 173 | action_space = gym.spaces.Box(high=action_max, low=action_min, dtype=np.float32) 174 | return action_space 175 | -------------------------------------------------------------------------------- /robo_gym/envs/example/example_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import annotations 3 | import numpy as np 4 | import gymnasium as gym 5 | from typing import Tuple, Any 6 | from robo_gym.utils.exceptions import InvalidStateError, RobotServerError, InvalidActionError 7 | import robo_gym_server_modules.robot_server.client as rs_client 8 | from robo_gym_server_modules.robot_server.grpc_msgs.python import robot_server_pb2 9 | from robo_gym.envs.simulation_wrapper import Simulation 10 | 11 | 12 | class ExampleEnv(gym.Env): 13 | """Example environment. 14 | 15 | Args: 16 | rs_address (str): Robot Server address. Formatted as 'ip:port'. Defaults to None. 17 | 18 | Attributes: 19 | 20 | client (:obj:str): Robot Server client. 21 | real_robot (bool): True if the environment is controlling a real robot. 22 | 23 | """ 24 | real_robot = False 25 | max_episode_steps = 300 26 | 27 | def __init__(self, rs_address=None, rs_state_to_info=True, **kwargs): 28 | self.elapsed_steps = 0 29 | 30 | self.rs_state_to_info = rs_state_to_info 31 | 32 | self.observation_space = self._get_observation_space() 33 | self.action_space = self._get_action_space() 34 | 35 | self.rs_state = None 36 | 37 | # Connect to Robot Server 38 | if rs_address: 39 | self.client = rs_client.Client(rs_address) 40 | else: 41 | print("WARNING: No IP and Port passed. Simulation will not be started") 42 | print("WARNING: Use this only to get environment shape") 43 | 44 | def _set_initial_robot_server_state(self, rs_state) -> robot_server_pb2.State: 45 | string_params = {} 46 | float_params = {} 47 | state = {} 48 | 49 | state_msg = robot_server_pb2.State(state=state, float_params=float_params, 50 | string_params=string_params, state_dict=rs_state) 51 | return state_msg 52 | 53 | def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[np.ndarray, dict[str, Any]]: 54 | """Environment reset. 55 | 56 | options: 57 | position (list[2] or np.array[2]): [x,y] initial robot position. 58 | 59 | Returns: 60 | np.array: Environment state. 61 | dict: info 62 | 63 | """ 64 | super().reset(seed=seed) 65 | if options is None: 66 | options = {} 67 | position = options["position"] if "position" in options else None 68 | 69 | # Set Robot starting position 70 | if position: 71 | assert len(position) == 2 72 | else: 73 | position = [0, 0] 74 | 75 | self.elapsed_steps = 0 76 | 77 | # Initialize environment state 78 | state_len = self.observation_space.shape[0] 79 | state = np.zeros(state_len) 80 | rs_state = dict.fromkeys(self.get_robot_server_composition(), 0.0) 81 | 82 | # Fill rs_state 83 | rs_state['pos_x'] = position[0] 84 | rs_state['pos_y'] = position[1] 85 | 86 | # Set initial state of the Robot Server 87 | state_msg = self._set_initial_robot_server_state(rs_state) 88 | 89 | if not self.client.set_state_msg(state_msg): 90 | raise RobotServerError("set_state") 91 | 92 | # Get Robot Server state 93 | rs_state = self.client.get_state_msg().state_dict 94 | 95 | # Check if the length and keys of the Robot Server state received is correct 96 | self._check_rs_state_keys(rs_state) 97 | 98 | # Convert the initial state from Robot Server format to environment format 99 | state = self._robot_server_state_to_env_state(rs_state) 100 | 101 | # Check if the environment state is contained in the observation space 102 | if not self.observation_space.contains(state): 103 | raise InvalidStateError() 104 | 105 | self.rs_state = rs_state 106 | 107 | return state, {} 108 | 109 | def reward(self, rs_state, action) -> Tuple[float, bool, dict]: 110 | done = False 111 | info = {} 112 | 113 | if self.elapsed_steps >= self.max_episode_steps: 114 | done = True 115 | info['final_status'] = 'max_steps_exceeded' 116 | 117 | return 0, done, info 118 | 119 | def step(self, action) -> Tuple[np.array, float, bool, bool, dict]: 120 | if type(action) == list: action = np.array(action) 121 | 122 | self.elapsed_steps += 1 123 | 124 | # Check if the action is contained in the action space 125 | if not self.action_space.contains(action): 126 | raise InvalidActionError() 127 | 128 | # Send action to Robot Server and get state 129 | rs_state = self.client.send_action_get_state(action.tolist()).state_dict 130 | self._check_rs_state_keys(rs_state) 131 | 132 | # Convert the state from Robot Server format to environment format 133 | state = self._robot_server_state_to_env_state(rs_state) 134 | 135 | # Check if the environment state is contained in the observation space 136 | if not self.observation_space.contains(state): 137 | raise InvalidStateError() 138 | 139 | self.rs_state = rs_state 140 | 141 | # Assign reward 142 | reward = 0 143 | done = False 144 | reward, done, info = self.reward(rs_state=rs_state, action=action) 145 | if self.rs_state_to_info: info['rs_state'] = self.rs_state 146 | 147 | return state, reward, done, False, info 148 | 149 | def get_rs_state(self): 150 | return self.rs_state 151 | 152 | def render(self): 153 | pass 154 | 155 | def get_robot_server_composition(self) -> list: 156 | rs_state_keys = [ 157 | 'pos_x', 158 | 'pos_y', 159 | 'lin_vel', 160 | 'ang_vel', 161 | ] 162 | return rs_state_keys 163 | 164 | def _get_robot_server_state_len(self) -> int: 165 | """Get length of the Robot Server state. 166 | 167 | Describes the composition of the Robot Server state and returns 168 | its length. 169 | """ 170 | return len(self.get_robot_server_composition()) 171 | 172 | def _check_rs_state_keys(self, rs_state) -> None: 173 | keys = self.get_robot_server_composition() 174 | if not len(keys) == len(rs_state.keys()): 175 | raise InvalidStateError("Robot Server state keys to not match. Different lengths.") 176 | 177 | for key in keys: 178 | if key not in rs_state.keys(): 179 | raise InvalidStateError("Robot Server state keys to not match") 180 | 181 | def _robot_server_state_to_env_state(self, rs_state) -> np.ndarray: 182 | """Transform state from Robot Server to environment format. 183 | 184 | Args: 185 | rs_state (list): State in Robot Server format. 186 | 187 | Returns: 188 | numpy.array: State in environment format. 189 | 190 | """ 191 | 192 | pos_x = rs_state['pos_x'] 193 | pos_y = rs_state['pos_y'] 194 | lin_vel = rs_state['lin_vel'] 195 | ang_vel = rs_state['ang_vel'] 196 | 197 | # Compose environment state 198 | state = np.array([pos_x, pos_y, lin_vel, ang_vel]) 199 | 200 | return state 201 | 202 | def _get_observation_space(self) -> gym.spaces.Box: 203 | """Get environment observation space. 204 | 205 | Returns: 206 | gym.spaces: Gym observation space object. 207 | 208 | """ 209 | 210 | # Definition of environment observation_space 211 | max_obs = np.array([np.inf] * 4) 212 | min_obs = -np.array([np.inf] * 4) 213 | 214 | return gym.spaces.Box(low=min_obs, high=max_obs, dtype=np.float32) 215 | 216 | def _get_action_space(self) -> gym.spaces.Box: 217 | """Get environment action space. 218 | 219 | Returns: 220 | gym.spaces: Gym action space object. 221 | 222 | """ 223 | 224 | return gym.spaces.Box(low=np.full(2, -1.0), high=np.full(2, 1.0), dtype=np.float32) 225 | 226 | 227 | class ExampleEnvSim(ExampleEnv, Simulation): 228 | cmd = "roslaunch example_robot_server robot_server.launch" 229 | 230 | def __init__(self, ip=None, lower_bound_port=None, upper_bound_port=None, gui=False, **kwargs): 231 | Simulation.__init__(self, self.cmd, ip, lower_bound_port, upper_bound_port, gui, **kwargs) 232 | ExampleEnv.__init__(self, rs_address=self.robot_server_ip, **kwargs) 233 | 234 | 235 | class ExampleEnvRob(ExampleEnv): 236 | real_robot = True 237 | 238 | # roslaunch example_robot_server robot_server.launch gui:=true real_robot:=true 239 | -------------------------------------------------------------------------------- /tests/robo-gym/envs/ur/test_ur_avoidance_basic.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import gymnasium as gym 4 | import robo_gym 5 | import math 6 | import numpy as np 7 | import pytest 8 | 9 | ur_models = [ 10 | pytest.param("ur3", marks=pytest.mark.nightly), 11 | pytest.param("ur3e", marks=pytest.mark.nightly), 12 | pytest.param("ur5", marks=pytest.mark.commit), 13 | pytest.param("ur5e", marks=pytest.mark.nightly), 14 | pytest.param("ur10", marks=pytest.mark.nightly), 15 | pytest.param("ur10e", marks=pytest.mark.nightly), 16 | pytest.param("ur16e", marks=pytest.mark.nightly), 17 | ] 18 | 19 | 20 | @pytest.fixture(scope="module", params=ur_models) 21 | def env(request): 22 | ip = os.environ.get("ROBOGYM_SERVERS_HOST", "robot-servers") 23 | env = gym.make("BasicAvoidanceURSim-v0", ip=ip, ur_model=request.param) 24 | env.request_param = request.param 25 | yield env 26 | env.kill_sim() 27 | 28 | 29 | @pytest.mark.commit 30 | def test_initialization(env): 31 | assert env.ur.model_key == env.request_param 32 | env.reset() 33 | done = False 34 | env.step([0, 0, 0, 0, 0]) 35 | for _ in range(10): 36 | if not done: 37 | action = env.action_space.sample() 38 | observation, _, done, _, _ = env.step(action) 39 | 40 | assert env.observation_space.contains(observation) 41 | 42 | 43 | @pytest.mark.skip(reason="This fails only in CI") 44 | @pytest.mark.flaky(reruns=3) 45 | def test_object_collision(env): 46 | params = { 47 | "ur5": { 48 | "joint_positions": [0.0, -1.57, 1.57, -1.57, 0.0, 0.0], 49 | "object_coords": [0.2, -0.1, 0.52], 50 | "action": [-1, 0, 0, 0, 0], 51 | "n_steps": 30, 52 | }, 53 | } 54 | 55 | env.reset( 56 | options={ 57 | "desired_joint_positions": params[env.ur.model_key]["joint_positions"], 58 | "fixed_object_position": params[env.ur.model_key]["object_coords"], 59 | } 60 | ) 61 | done = False 62 | i = 0 63 | while (not done) or i <= params[env.ur.model_key]["n_steps"]: 64 | _, _, done, _, info = env.step(params[env.ur.model_key]["action"]) 65 | i += 1 66 | assert info["final_status"] == "collision" 67 | 68 | 69 | @pytest.mark.nightly 70 | def test_reset_joint_positions(env): 71 | joint_positions = [0.5, -2.7, 1.3, -1.7, -1.9, 1.6] 72 | 73 | state = env.reset(options={"joint_positions": joint_positions}) 74 | assert np.isclose( 75 | env.ur.normalize_joint_values(joint_positions), state[3:9], atol=0.1 76 | ).all() 77 | 78 | 79 | @pytest.mark.commit 80 | def test_object_coordinates(env): 81 | 82 | params = { 83 | # ? robot up-right, target_coord_in_ee_frame 0.0, -0.3, 0.2, coordinates of target calculated using official dimensions from DH parameters. 84 | # ? first value is d4+d6 85 | # ? second value is: d1+a2+a3+d5 86 | "ur3": { 87 | "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0], 88 | "object_coords": [0.0, (0.194 + 0.2), (0.692 + 0.3), 0.0, 0.0, 0.0], 89 | "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571}, 90 | }, 91 | "ur3e": { 92 | "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0], 93 | "object_coords": [0.0, (0.223 + 0.2), (0.694 + 0.3), 0.0, 0.0, 0.0], 94 | "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571}, 95 | }, 96 | "ur5": { 97 | "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0], 98 | "object_coords": [0.0, (0.191 + 0.2), (1.001 + 0.3), 0.0, 0.0, 0.0], 99 | "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571}, 100 | }, 101 | "ur5e": { 102 | "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0], 103 | "object_coords": [0.0, (0.233 + 0.2), (1.079 + 0.3), 0.0, 0.0, 0.0], 104 | "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571}, 105 | }, 106 | "ur10": { 107 | "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0], 108 | "object_coords": [0.0, (0.256 + 0.2), (1.428 + 0.3), 0.0, 0.0, 0.0], 109 | "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571}, 110 | }, 111 | "ur10e": { 112 | "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0], 113 | "object_coords": [0.0, (0.291 + 0.2), (1.485 + 0.3), 0.0, 0.0, 0.0], 114 | "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571}, 115 | }, 116 | "ur16e": { 117 | "joint_positions": [0.0, -1.57, 0.0, -1.57, 0.0, 0.0], 118 | "object_coords": [0.0, (0.291 + 0.2), (1.139 + 0.3), 0.0, 0.0, 0.0], 119 | "polar_coords": {"r": 0.360, "theta": 0.983, "phi": -1.571}, 120 | }, 121 | } 122 | 123 | state = env.reset( 124 | options={ 125 | "joint_positions": params[env.ur.model_key]["joint_positions"], 126 | "fixed_object_position": params[env.ur.model_key]["object_coords"], 127 | } 128 | ) 129 | assert np.isclose( 130 | [params[env.ur.model_key]["polar_coords"]["r"]], state[0], atol=0.05 131 | ).all() 132 | assert np.isclose( 133 | [ 134 | params[env.ur.model_key]["polar_coords"]["theta"], 135 | params[env.ur.model_key]["polar_coords"]["phi"], 136 | ], 137 | state[1:3], 138 | atol=0.2, 139 | ).all() 140 | 141 | 142 | test_ur_fixed_joints = [ 143 | ( 144 | "BasicAvoidanceURSim-v0", 145 | True, 146 | False, 147 | False, 148 | False, 149 | False, 150 | False, 151 | "ur3", 152 | ), # fixed shoulder_pan 153 | ( 154 | "BasicAvoidanceURSim-v0", 155 | False, 156 | True, 157 | False, 158 | False, 159 | False, 160 | False, 161 | "ur3e", 162 | ), # fixed shoulder_lift 163 | ( 164 | "BasicAvoidanceURSim-v0", 165 | False, 166 | False, 167 | False, 168 | False, 169 | False, 170 | True, 171 | "ur5", 172 | ), # fixed wrist_3 173 | ( 174 | "BasicAvoidanceURSim-v0", 175 | True, 176 | False, 177 | True, 178 | False, 179 | False, 180 | False, 181 | "ur5e", 182 | ), # fixed Base and Elbow 183 | ( 184 | "BasicAvoidanceURSim-v0", 185 | False, 186 | False, 187 | True, 188 | False, 189 | False, 190 | False, 191 | "ur10", 192 | ), # fixed elbow 193 | ( 194 | "BasicAvoidanceURSim-v0", 195 | False, 196 | False, 197 | False, 198 | True, 199 | False, 200 | False, 201 | "ur10e", 202 | ), # fixed wrist_1 203 | ( 204 | "BasicAvoidanceURSim-v0", 205 | False, 206 | False, 207 | False, 208 | False, 209 | True, 210 | False, 211 | "ur16e", 212 | ), # fixed wrist_2 213 | ] 214 | 215 | 216 | @pytest.mark.nightly 217 | @pytest.mark.parametrize( 218 | "env_name, fix_base, fix_shoulder, fix_elbow, fix_wrist_1, fix_wrist_2, fix_wrist_3, ur_model", 219 | test_ur_fixed_joints, 220 | ) 221 | @pytest.mark.flaky(reruns=3) 222 | def test_fixed_joints( 223 | env_name, 224 | fix_base, 225 | fix_shoulder, 226 | fix_elbow, 227 | fix_wrist_1, 228 | fix_wrist_2, 229 | fix_wrist_3, 230 | ur_model, 231 | ): 232 | env = gym.make( 233 | env_name, 234 | ip="robot-servers", 235 | fix_base=fix_base, 236 | fix_shoulder=fix_shoulder, 237 | fix_elbow=fix_elbow, 238 | fix_wrist_1=fix_wrist_1, 239 | fix_wrist_2=fix_wrist_2, 240 | fix_wrist_3=fix_wrist_3, 241 | ur_model=ur_model, 242 | ) 243 | state = env.reset() 244 | initial_joint_positions = state[3:9] 245 | # Take 20 actions 246 | action = env.action_space.sample() 247 | for _ in range(20): 248 | state, _, _, _, _ = env.step(action) 249 | joint_positions = state[3:9] 250 | 251 | if fix_base: 252 | assert math.isclose( 253 | initial_joint_positions[0], joint_positions[0], abs_tol=0.05 254 | ) 255 | if fix_shoulder: 256 | assert math.isclose( 257 | initial_joint_positions[1], joint_positions[1], abs_tol=0.05 258 | ) 259 | if fix_elbow: 260 | assert math.isclose( 261 | initial_joint_positions[2], joint_positions[2], abs_tol=0.05 262 | ) 263 | if fix_wrist_1: 264 | assert math.isclose( 265 | initial_joint_positions[3], joint_positions[3], abs_tol=0.05 266 | ) 267 | if fix_wrist_2: 268 | assert math.isclose( 269 | initial_joint_positions[4], joint_positions[4], abs_tol=0.05 270 | ) 271 | if fix_wrist_3: 272 | assert math.isclose( 273 | initial_joint_positions[5], joint_positions[5], abs_tol=0.05 274 | ) 275 | 276 | env.kill_sim() 277 | -------------------------------------------------------------------------------- /robo_gym/envs/ur/ur_avoidance_basic.py: -------------------------------------------------------------------------------- 1 | """ 2 | Environment for basic obstacle avoidance controlling a robotic arm from UR. 3 | 4 | In this environment the obstacle is only moving up and down in a vertical line in front of the robot. 5 | The goal is for the robot to stay within a predefined minimum distance to the moving obstacle. 6 | When feasible the robot should continue to the original configuration, 7 | otherwise wait for the obstacle to move away before proceeding 8 | """ 9 | 10 | from __future__ import annotations 11 | import numpy as np 12 | from typing import Tuple, Any 13 | from robo_gym_server_modules.robot_server.grpc_msgs.python import robot_server_pb2 14 | from robo_gym.envs.simulation_wrapper import Simulation 15 | from robo_gym.envs.ur.ur_base_avoidance_env import URBaseAvoidanceEnv 16 | 17 | # base, shoulder, elbow, wrist_1, wrist_2, wrist_3 18 | JOINT_POSITIONS = [-1.57, -1.31, -1.31, -2.18, 1.57, 0.0] 19 | DEBUG = True 20 | MINIMUM_DISTANCE = 0.3 # the distance [cm] the robot should keep to the obstacle 21 | 22 | 23 | class BasicAvoidanceUR(URBaseAvoidanceEnv): 24 | """Universal Robots UR basic obstacle avoidance environment. 25 | 26 | Args: 27 | rs_address (str): Robot Server address. Formatted as 'ip:port'. Defaults to None. 28 | fix_base (bool): Wether or not the base joint stays fixed or is moveable. Defaults to False. 29 | fix_shoulder (bool): Wether or not the shoulder joint stays fixed or is moveable. Defaults to False. 30 | fix_elbow (bool): Wether or not the elbow joint stays fixed or is moveable. Defaults to False. 31 | fix_wrist_1 (bool): Wether or not the wrist 1 joint stays fixed or is moveable. Defaults to False. 32 | fix_wrist_2 (bool): Wether or not the wrist 2 joint stays fixed or is moveable. Defaults to False. 33 | fix_wrist_3 (bool): Wether or not the wrist 3 joint stays fixed or is moveable. Defaults to True. 34 | ur_model (str): determines which ur model will be used in the environment. Defaults to 'ur5'. 35 | include_polar_to_elbow (bool): determines wether or not the polar coordinates to the elbow joint are included in the state. Defaults to False. 36 | 37 | Attributes: 38 | ur (:obj:): Robot utilities object. 39 | client (:obj:str): Robot Server client. 40 | real_robot (bool): True if the environment is controlling a real robot. 41 | 42 | """ 43 | 44 | max_episode_steps = 1000 45 | 46 | def _set_initial_robot_server_state( 47 | self, rs_state, fixed_object_position=None 48 | ) -> robot_server_pb2.State: 49 | if fixed_object_position: 50 | state_msg = super()._set_initial_robot_server_state( 51 | rs_state=rs_state, fixed_object_position=fixed_object_position 52 | ) 53 | return state_msg 54 | 55 | z_amplitude = np.random.default_rng().uniform(low=0.09, high=0.35) 56 | z_frequency = 0.125 57 | z_offset = np.random.default_rng().uniform(low=0.2, high=0.6) 58 | 59 | string_params = {"object_0_function": "triangle_wave"} 60 | float_params = { 61 | "object_0_x": 0.12, 62 | "object_0_y": 0.34, 63 | "object_0_z_amplitude": z_amplitude, 64 | "object_0_z_frequency": z_frequency, 65 | "object_0_z_offset": z_offset, 66 | } 67 | state = {} 68 | 69 | state_msg = robot_server_pb2.State( 70 | state=state, 71 | float_params=float_params, 72 | string_params=string_params, 73 | state_dict=rs_state, 74 | ) 75 | return state_msg 76 | 77 | def reset( 78 | self, *, seed: int | None = None, options: dict[str, Any] | None = None 79 | ) -> tuple[np.ndarray, dict[str, Any]]: 80 | """Environment reset. 81 | 82 | options: 83 | joint_positions (list[6] or np.array[6]): robot joint positions in radians. 84 | fixed_object_position (list[3]): x,y,z fixed position of object 85 | 86 | Returns: 87 | np.array: Environment state. 88 | dict: info 89 | """ 90 | self.prev_action = np.zeros(6) 91 | 92 | initial_state, info = super().reset(seed=seed, options=options) 93 | 94 | return initial_state, info 95 | 96 | def reward(self, rs_state, action) -> Tuple[float, bool, dict]: 97 | env_state = self._robot_server_state_to_env_state(rs_state) 98 | 99 | reward = 0 100 | done = False 101 | info = {} 102 | 103 | # Reward weights 104 | close_distance_weight = -2 105 | delta_joint_weight = 1 106 | action_usage_weight = 1 107 | rapid_action_weight = -0.2 108 | 109 | # Difference in joint position current vs. starting position 110 | delta_joint_pos = env_state[9:15] 111 | 112 | # Calculate distance to the obstacle 113 | obstacle_coord = np.array( 114 | [ 115 | rs_state["object_0_to_ref_translation_x"], 116 | rs_state["object_0_to_ref_translation_y"], 117 | rs_state["object_0_to_ref_translation_z"], 118 | ] 119 | ) 120 | ee_coord = np.array( 121 | [ 122 | rs_state["ee_to_ref_translation_x"], 123 | rs_state["ee_to_ref_translation_y"], 124 | rs_state["ee_to_ref_translation_z"], 125 | ] 126 | ) 127 | forearm_coord = np.array( 128 | [ 129 | rs_state["forearm_to_ref_translation_x"], 130 | rs_state["forearm_to_ref_translation_y"], 131 | rs_state["forearm_to_ref_translation_z"], 132 | ] 133 | ) 134 | distance_to_ee = np.linalg.norm(obstacle_coord - ee_coord) 135 | distance_to_forearm = np.linalg.norm(obstacle_coord - forearm_coord) 136 | distance_to_target = np.min([distance_to_ee, distance_to_forearm]) 137 | 138 | # Reward staying close to the predefined joint position 139 | if abs(env_state[-6:]).sum() < 0.1 * action.size: 140 | reward += ( 141 | delta_joint_weight 142 | * (1 - (abs(delta_joint_pos).sum() / (0.1 * action.size))) 143 | * (1 / 1000) 144 | ) 145 | 146 | # Reward for not acting 147 | if abs(action).sum() <= action.size: 148 | reward += ( 149 | action_usage_weight 150 | * (1 - (np.square(action).sum() / action.size)) 151 | * (1 / 1000) 152 | ) 153 | 154 | # Negative reward if actions change to rapidly between steps 155 | for i in range(len(action)): 156 | if abs(action[i] - self.prev_action[i]) > 0.5: 157 | reward += rapid_action_weight * (1 / 1000) 158 | 159 | # Negative reward if the obstacle is close than the predefined minimum distance 160 | if distance_to_target < MINIMUM_DISTANCE: 161 | reward += close_distance_weight * (1 / self.max_episode_steps) 162 | 163 | # Check if there is a collision 164 | collision = True if rs_state["in_collision"] == 1 else False 165 | if collision: 166 | done = True 167 | info["final_status"] = "collision" 168 | info["target_coord"] = obstacle_coord 169 | self.last_position_on_success = [] 170 | 171 | elif self.elapsed_steps >= self.max_episode_steps: 172 | done = True 173 | info["final_status"] = "success" 174 | info["target_coord"] = obstacle_coord 175 | self.last_position_on_success = [] 176 | 177 | return reward, done, info 178 | 179 | def step(self, action) -> Tuple[np.array, float, bool, bool, dict]: 180 | if type(action) == list: 181 | action = np.array(action) 182 | 183 | action = action.astype(np.float32) 184 | 185 | state, reward, done, truncated, info = super().step(action) 186 | 187 | self.prev_action = self.add_fixed_joints(action) 188 | 189 | return state, reward, done, truncated, info 190 | 191 | 192 | class BasicAvoidanceURSim(BasicAvoidanceUR, Simulation): 193 | cmd = "roslaunch ur_robot_server ur_robot_server.launch \ 194 | world_name:=tabletop_sphere50.world \ 195 | reference_frame:=base_link \ 196 | max_velocity_scale_factor:=0.2 \ 197 | action_cycle_rate:=20 \ 198 | rviz_gui:=false \ 199 | gazebo_gui:=true \ 200 | objects_controller:=true \ 201 | rs_mode:=1moving2points \ 202 | n_objects:=1.0 \ 203 | object_0_model_name:=sphere50 \ 204 | object_0_frame:=target" 205 | 206 | def __init__( 207 | self, 208 | ip=None, 209 | lower_bound_port=None, 210 | upper_bound_port=None, 211 | gui=False, 212 | ur_model="ur5", 213 | **kwargs, 214 | ): 215 | self.cmd = self.cmd + " " + "ur_model:=" + ur_model 216 | Simulation.__init__( 217 | self, self.cmd, ip, lower_bound_port, upper_bound_port, gui, **kwargs 218 | ) 219 | BasicAvoidanceUR.__init__( 220 | self, rs_address=self.robot_server_ip, ur_model=ur_model, **kwargs 221 | ) 222 | 223 | 224 | class BasicAvoidanceURRob(BasicAvoidanceUR): 225 | real_robot = True 226 | 227 | 228 | # roslaunch ur_robot_server ur_robot_server.launch ur_model:=ur5 real_robot:=true rviz_gui:=true gui:=true reference_frame:=base max_velocity_scale_factor:=0.2 action_cycle_rate:=20 rs_mode:=moving 229 | -------------------------------------------------------------------------------- /robo_gym/utils/manipulator_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import annotations 3 | 4 | import numpy as np 5 | from numpy.typing import NDArray 6 | import yaml 7 | import copy 8 | from robo_gym.utils import utils 9 | 10 | 11 | class ManipulatorModel: 12 | """Manipulator robots model class. 13 | 14 | Attributes: 15 | max_joint_positions (np.array): Maximum joint position values (rad)`. 16 | min_joint_positions (np.array): Minimum joint position values (rad)`. 17 | max_joint_velocities (np.array): Maximum joint velocity values (rad/s)`. 18 | min_joint_velocities (np.array): Minimum joint velocity values (rad/s)`. 19 | joint_names (list): Joint names (Standard Indexing)`. 20 | 21 | Joint Names (ROS Indexing): 22 | [elbow_joint, shoulder_lift_joint, shoulder_pan_joint, wrist_1_joint, wrist_2_joint, 23 | wrist_3_joint] 24 | 25 | NOTE: Where not specified, Standard Indexing is used. 26 | """ 27 | 28 | def __init__( 29 | self, 30 | model_key: str, 31 | file_path: str, 32 | norm_min: float = -1.0, 33 | norm_max: float = 1.0, 34 | override_joint_names: list[str] | None = None, 35 | ): 36 | 37 | self.model_key = model_key 38 | self._norm_min = norm_min 39 | self._norm_max = norm_max 40 | 41 | # Load robot parameters 42 | with open(file_path, "r") as stream: 43 | try: 44 | p = yaml.safe_load(stream) 45 | except yaml.YAMLError as exc: 46 | print(exc) 47 | 48 | if override_joint_names: 49 | self.joint_names = override_joint_names 50 | else: 51 | self.joint_names = p.get("joint_names") 52 | if "remote_joint_names" in p: 53 | self.remote_joint_names = p["remote_joint_names"] 54 | else: 55 | self.remote_joint_names = self.joint_namess 56 | 57 | # Initialize joint limits attributes 58 | self.max_joint_positions = np.zeros(len(self.joint_names)) 59 | self.min_joint_positions = np.zeros(len(self.joint_names)) 60 | self.max_joint_velocities = np.zeros(len(self.joint_names)) 61 | self.min_joint_velocities = np.zeros(len(self.joint_names)) 62 | 63 | self.joint_positions = np.zeros(len(self.joint_names)) 64 | 65 | for idx, joint in enumerate(self.joint_names): 66 | self.max_joint_positions[idx] = p["joint_limits"][joint]["max_position"] 67 | self.min_joint_positions[idx] = p["joint_limits"][joint]["min_position"] 68 | self.max_joint_velocities[idx] = p["joint_limits"][joint]["max_velocity"] 69 | self.min_joint_velocities[idx] = -p["joint_limits"][joint]["max_velocity"] 70 | 71 | # Workspace parameters 72 | self.ws_r = p["workspace_area"]["r"] 73 | self.ws_min_r = p["workspace_area"]["min_r"] 74 | 75 | def get_max_joint_positions(self) -> NDArray: 76 | 77 | return self.max_joint_positions 78 | 79 | def get_min_joint_positions(self) -> NDArray: 80 | 81 | return self.min_joint_positions 82 | 83 | def get_max_joint_velocities(self) -> NDArray: 84 | 85 | return self.max_joint_velocities 86 | 87 | def get_min_joint_velocities(self) -> NDArray: 88 | 89 | return self.min_joint_velocities 90 | 91 | def normalize_joint_value(self, source_value: float, joint_index: int) -> float: 92 | dest_value = float( 93 | np.interp( 94 | source_value, 95 | [ 96 | self.min_joint_positions[joint_index], 97 | self.max_joint_positions[joint_index], 98 | ], 99 | [self._norm_min, self._norm_max], 100 | ) 101 | ) 102 | return dest_value 103 | 104 | def normalize_joint_values(self, joints: NDArray) -> NDArray: 105 | """Normalize joint position values 106 | 107 | Args: 108 | joints (np.array): Joint position values 109 | 110 | Returns: 111 | norm_joints (np.array): Joint position values normalized between [-1 , 1] 112 | """ 113 | 114 | result = copy.deepcopy(joints) 115 | for joint_index in range(len(joints)): 116 | source_value = float(joints[joint_index]) 117 | dest_value = self.normalize_joint_value(source_value, joint_index) 118 | result[joint_index] = dest_value 119 | return result 120 | 121 | def denormalize_joint_value(self, source_value: float, joint_index: int) -> float: 122 | dest_value = float( 123 | np.interp( 124 | source_value, 125 | [self._norm_min, self._norm_max], 126 | [ 127 | self.min_joint_positions[joint_index], 128 | self.max_joint_positions[joint_index], 129 | ], 130 | ) 131 | ) 132 | return dest_value 133 | 134 | def denormalize_joint_values(self, joints: NDArray) -> NDArray: 135 | """Map normalized joint values to joint position in min/max range 136 | 137 | Args: 138 | joints (np.array): Normalized joint position values 139 | 140 | Returns: 141 | norm_joints (np.array): Joint position values normalized between [-1 , 1] 142 | """ 143 | 144 | result = copy.deepcopy(joints) 145 | for joint_index in range(len(joints)): 146 | source_value = float(joints[joint_index]) 147 | dest_value = self.denormalize_joint_value(source_value, joint_index) 148 | result[joint_index] = dest_value 149 | return result 150 | 151 | def denormalize_joint_range(self, source_range: float, joint_index: int) -> float: 152 | dest_value = float( 153 | np.interp( 154 | source_range, 155 | [0, 1], 156 | [ 157 | 0, 158 | abs( 159 | self.max_joint_positions[joint_index] 160 | - self.min_joint_positions[joint_index] 161 | ), 162 | ], 163 | ) 164 | ) 165 | return dest_value 166 | 167 | def denormalize_joint_ranges(self, joints: NDArray) -> NDArray: 168 | result = np.zeros_like(joints) 169 | for joint_index in range(len(joints)): 170 | source_value = float(joints[joint_index]) 171 | dest_value = self.denormalize_joint_range(source_value, joint_index) 172 | result[joint_index] = dest_value 173 | return result 174 | 175 | def get_random_workspace_pose_rpy( 176 | self, 177 | np_random: np.random.Generator | None = None, 178 | roll_range: NDArray | None = None, 179 | pitch_range: NDArray | None = None, 180 | yaw_range: NDArray | None = None, 181 | ) -> NDArray: 182 | """Get pose of a random point in the robot workspace. 183 | 184 | Returns: 185 | np.array: [x,y,z,alpha,theta,gamma] pose. 186 | 187 | """ 188 | 189 | if np_random is None: 190 | np_random = np.random.default_rng() 191 | 192 | pose = np.zeros(6) 193 | 194 | singularity_area = True 195 | 196 | # check if generated x,y,z are in singularityarea 197 | while singularity_area: 198 | # Generate random uniform sample in semisphere taking advantage of the 199 | # sampling rule 200 | 201 | phi = np_random.uniform(low=0.0, high=2 * np.pi) 202 | costheta = np_random.uniform(low=0.0, high=1.0) # [-1.0,1.0] for a sphere 203 | u = np_random.uniform(low=0.0, high=1.0) 204 | 205 | theta = np.arccos(costheta) 206 | r = self.ws_r * np.cbrt(u) 207 | 208 | x = r * np.sin(theta) * np.cos(phi) 209 | y = r * np.sin(theta) * np.sin(phi) 210 | z = r * np.cos(theta) 211 | 212 | # TODO this is UR specific 213 | if (x**2 + y**2) > self.ws_min_r**2: 214 | singularity_area = False 215 | 216 | roll = utils.get_uniform_from_range(np_random, roll_range, 0.0) 217 | pitch = utils.get_uniform_from_range(np_random, pitch_range, 0.0) 218 | yaw = utils.get_uniform_from_range(np_random, yaw_range, 0.0) 219 | 220 | pose[0:6] = [x, y, z, roll, pitch, yaw] 221 | 222 | return pose 223 | 224 | def get_random_workspace_pose_quat( 225 | self, 226 | np_random: np.random.Generator | None = None, 227 | roll_range: NDArray | None = None, 228 | pitch_range: NDArray | None = None, 229 | yaw_range: NDArray | None = None, 230 | seq="xyz", 231 | quat_unique: bool = False, 232 | ) -> NDArray: 233 | pose_rpy = self.get_random_workspace_pose_rpy( 234 | np_random, roll_range, pitch_range, yaw_range 235 | ) 236 | pose_quat = utils.quat_from_euler( 237 | pose_rpy[3], pose_rpy[4], pose_rpy[5], seq, quat_unique 238 | ) 239 | return pose_quat 240 | 241 | def get_random_offset_joint_positions( 242 | self, 243 | joint_positions: NDArray, 244 | random_offset: NDArray, 245 | np_random: np.random.Generator | None = None, 246 | ): 247 | 248 | if np_random is None: 249 | np_random = np.random.default_rng() 250 | 251 | joint_positions_low = joint_positions - random_offset 252 | joint_positions_high = joint_positions + random_offset 253 | 254 | joint_positions = np_random.uniform( 255 | low=joint_positions_low, high=joint_positions_high 256 | ) 257 | return joint_positions 258 | 259 | def reorder_joints_from_rs(self, ros_thetas: NDArray) -> NDArray: 260 | return ros_thetas 261 | 262 | def reorder_joints_for_rs(self, thetas: NDArray) -> NDArray: 263 | return thetas 264 | 265 | @property 266 | def joint_count(self) -> int: 267 | return len(self.joint_names) 268 | -------------------------------------------------------------------------------- /docs/modular_environments.md: -------------------------------------------------------------------------------- 1 | # Modular environments 2 | 3 | We have started the creation of a new hierarchy of environment classes to improve the modularity and extensibility of our environments. 4 | 5 | The base environment class is called RoboGymEnv. 6 | It lays out the common ground for robo-gym environments, such as the initialization of the client for the robot server and the functionality of the methods step and reset. 7 | In terms of structure, the RoboGymEnv has components that we call nodes to take over elements of the environment's main functionality and to be specialized for different subclasses or configurations of environments: 8 | 9 | * ActionNodes deal with the action from the agent and map it to the form that is understood by the robot server; 10 | * ObservationNodes build parts of the observation from the state reported by the robot server; and 11 | * RewardNodes provide the reward calculation and related data to be returned by a step. 12 | 13 | In general, an environment has one ActionNode, one RewardNode, and any number of ObservationNodes (reasoning for this multiplicity: for any given task, adding extra observations should be a relatively small change of implementation, such as additional sensors). 14 | As for the environment itself, their initialization is largely driven by processing arguments. 15 | By default, the arguments that the nodes receive are equal to the ones of the environment, but the environment implementation has the freedom of preparing each node's arguments as required. 16 | 17 | The functionality of the nodes is integrated into the environment's methods as needed and in a way that allows minimization duplicated code as much as possible. 18 | For example, the node types for actions and observations provide the respective methods for mapping messages (in opposite directions) that are used in the base environment's step method, so a specialized environment should not need to (but might) override the base step method. 19 | Also, the action space and observation space are calculated by the respective nodes. Nodes of all three types can contribute to the state data that is sent to the robot server in the course of a reset. This is useful in scenarios such as delegating the management of a target pose to the reward node, which can still provide the target data for the robot server. 20 | 21 | This new environment structure is used in a few concrete environments so far, some of which are designed to be direct replacements of our original environments. 22 | The examples provided so far highlight a key benefit of the class hierarchy: Creating a variation of an environment should be possible with small additions and using as much of existing code as possible, without duplicating it. An example would be performing the same task with a different robot. We are already exploiting this potential with new environment implementations for the UR robots and corresponding environments for the same tasks with the Panda robot. 23 | The environment classes that add specifics for a robot type are relatively short and simple, limited to configuring functionality provided by their base classes. 24 | 25 | # Modular environment classes 26 | 27 | This diagram sketches the relations between the provided environment classes so far in the hierarchy: 28 | 29 | ```mermaid 30 | classDiagram 31 | 32 | class RoboGymEnv 33 | RoboGymEnv: ActionNode actionNode 34 | RoboGymEnv: ObservationNode[] observationNodes 35 | RoboGymEnv: RewardNode rewardNode 36 | RoboGymEnv: Client client 37 | 38 | RoboGymEnv <|-- ManipulatorBaseEnv 39 | 40 | ManipulatorBaseEnv <|-- ManipulatorEePosEnv 41 | ManipulatorBaseEnv: ManipulatorActionNode actionNode 42 | ManipulatorBaseEnv: ManipulatorObservationNode[] observationNodes 43 | ManipulatorBaseEnv: ManipulatorRewardNode rewardNode 44 | 45 | ManipulatorEePosEnv <|-- IsaacReachEnv 46 | 47 | ManipulatorBaseEnv <|-- URBaseEnv2 48 | URBaseEnv2 <|-- EmptyEnvironment2UR 49 | 50 | ManipulatorEePosEnv <|-- EndEffectorPositioning2UR 51 | ManipulatorEePosEnv: ManipulatorEePosObservationNode[] observationNodes 52 | ManipulatorEePosEnv: ManipulatorEePosRewardNode rewardNode 53 | 54 | IsaacReachEnv <|-- IsaacReachUR 55 | IsaacReachEnv: IsaacReachActionNode actionNode 56 | IsaacReachEnv: IsaacReachObservationNode[] observationNodes 57 | 58 | ManipulatorBaseEnv <|-- PandaBaseEnv 59 | PandaBaseEnv <|-- EmptyEnvironmentPanda 60 | 61 | ManipulatorEePosEnv <|-- EndEffectorPositioningPanda 62 | 63 | IsaacReachEnv <|-- IsaacReachPanda 64 | ``` 65 | Note that the field indications are neither complete nor technically correct, as they are written like redefinitions with varying types. In fact, the fields are declared solely in the RoboGymEnv class and are only filled with instances of the corresponding subtypes by default. The notion in the diagram serves to illustrate how subclasses of the nodes can be used as needed for a certain environment type. Where possible, a less specialized node implementation may suffice. E.g., there is no special action node for the ManipulatorEePosEnv environment, but there are specific implementations of ObservationNode and RewardNode for it. 66 | 67 | Our pre-existing environments have the convention of providing separate subclasses for simulated and real robot for each environment type, with class names and environment ids (for gym.make) obtained by adding the suffix "Sim" or "Rob". Due to changed logic in setting up the client for the robot server, this distinction is not technically necessary (setting a flag would be enough) for the new hierarchy, but to have matching names we also provide this specialization. Each leaf class in the diagram has two according not depicted subclasses with the appended "Sim" and "Rob". 68 | 69 | ## RoboGymEnv 70 | 71 | As indicated above, the RoboGymEnv class is the base class in our hierarchy. 72 | 73 | Note that, in contrast to our previous environments, it can work with pre-existing simulation robot servers (rather than only obtaining simulation robot servers from the server manager), which can save a lot of time during testing. However, as for a real robot server, it is necessary to make sure that the server is compatible with the environment. The roslaunch command to start the matching robot server for a given environment configuration can be read on the console output when starting the environment with a server manager address. Subsequently, the environment can be used with the address of the manually started server. 74 | 75 | ## ManipulatorBaseEnv 76 | 77 | This environment class adds logic to control and observe a manipulator robot. It matches the previous empty UR environment on a logical level, however, it is independent of the robot type. There are subclasses for UR robots and for the Franka Emika Panda. The reward calculation is limited to surviving the maximum duration of the eposide without a collision. 78 | 79 | ## ManipulatorEePosEnv 80 | 81 | The end-effector-positioning environment also correspond to the similarly named UR environment in our pre-existing set of classes, again with a robot-type-agnostic base class and subclasses for UR and Panda. As a new feature, the rotation of the end effector can be chosen to be considered in the reward calculation. Consequently, the tradition-based class name is accurate only if disabling this feature. There are richer options for determining the robot's starting pose (and whether the robot should return to it in reset) and for obtaining the target pose. 82 | 83 | ## IsaacReachEnv 84 | 85 | This environment type and its subclasses are specializations of the ManipulatorEePosEnv class, intended to allow running agents trained with Isaac Lab in a robo-gym context. The special implementations of ActionNode and ObservationNode provide the corresponding mappings to the spaces as they are used by the supported agents in Isaac Lab. See also [Isaac Lab Compatibility](isaac_lab_compatibility.md) 86 | 87 | In order to avoid a general dependency on Torch for robo-gym, the short code to load and interpret policies trained with Isaac Lab is included in the [robo_envtest](.\robo_envtest.py) example script. It comes into effect when passing the path of an exported trained policy from Isaac Lab. Specifically, the code supports policies trained with the rsl_rl train script that comes with Isaac Lab, tested with V4.5.0. Note that the export folder with this policy file is created upon running the rsl_rl play script and not during training itself. 88 | 89 | # Open Issues (not comprehensive) 90 | 91 | In general, it is intended to replace all of our old robo-gym environment implementations to the modular structure. Parts of this effort that are outside of the initial vertical slice implementation that ranges from the base environment down to the Isaac Reach task for UR and Panda remain to be done. While the process in ongoing, legacy implementations prevail (causing potentally confusing situations with environment names and mixed directories). 92 | 93 | In some parts, the side goal of eliminating hardcoded values or other deficiencies is not yet reached. In particular, the construction of the roslaunch command in the robot-specific environment classes leaves a few things to be desired. 94 | 95 | ## MiR 96 | 97 | The MiR environments have not yet been rebuilt with the new structure. The plan would be to have a base environment class for mobile robots as a sibling of the ManipulatorBaseEnv, with according task-specfic environment subclasses next and MiR-specific subclasses of those, next to other mobile robot types of interest. 98 | 99 | ## UR 100 | 101 | The avoidance classes are left to be reconstructed in the modular fashion. Of course the implementation of avoidance-specific functionality would be robot-agnostic first and thus also compatible with other manipulator types. 102 | 103 | ## Panda 104 | 105 | As of now, the Panda environments do not perform well. Attempts to use them with a real robot are strongly discouraged at this point. An overhaul of the server-side implementation may be necessary. Alternative approaches for action modes (in which actions don't correspond to absolute joint positions), preparations for which can be found on the server side and in unmerged branches, as well as many options in action execution and state collection have been tried but not yet led to satisfying results. 106 | 107 | ## Isaac Lab 108 | 109 | There are other (example or custom) tasks that we would like to train for in Isaac Lab and provide execution for in robo-gym. It may be desirable to extract fields from configuration files/recordings from Isaac Lab; however, this may be outside of the scope of the environment classes themselves. 110 | 111 | For the IsaacReachEnv, a shortcoming is that the server-side visualizations of the target do not correctly reflect the rotation of the target. 112 | -------------------------------------------------------------------------------- /robo_gym/envs/manipulator/manipulator_base.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Tuple 4 | 5 | import gymnasium as gym 6 | import numpy as np 7 | from numpy.typing import NDArray 8 | 9 | from robo_gym.envs.base.robogym_env import ( 10 | RoboGymEnv, 11 | ActionNode, 12 | ObservationNode, 13 | RewardNode, 14 | ) 15 | from robo_gym.utils.manipulator_model import ManipulatorModel 16 | 17 | 18 | class ManipulatorBaseEnv(RoboGymEnv): 19 | 20 | KW_RS_JOINT_NAMES = "rs_joint_names" 21 | KW_JOINT_POSITIONS = "joint_position" 22 | RS_STATE_KEY_SUFFIX_JOINT_POSITION = "_position" 23 | RS_STATE_KEY_SUFFIX_JOINT_VELOCITY = "_velocity" 24 | 25 | KW_ACTION_MODE = "action_mode" 26 | ACTION_MODE_ABS_POS = "abs_pos" 27 | ACTION_MODE_DELTA_POS = "delta_pos" 28 | ACTION_MODE_ABS_VEL = "abs_vel" 29 | ACTION_MODE_DELTA_VEL = "delta_vel" 30 | 31 | KW_MAX_VELOCITY_SCALE_FACTOR = "max_velocity_scale_factor" 32 | 33 | def __init__(self, **kwargs): 34 | # not too nice - repeated in super init 35 | self._config = kwargs 36 | 37 | self._robot_model: ManipulatorModel | None = kwargs.get( 38 | RoboGymEnv.KW_ROBOT_MODEL_OBJECT 39 | ) 40 | if self.KW_JOINT_POSITIONS in kwargs: 41 | # TODO check values 42 | self._robot_model.joint_positions = kwargs[self.KW_JOINT_POSITIONS] 43 | 44 | self.action_mode = self._config.setdefault( 45 | self.KW_ACTION_MODE, self.ACTION_MODE_ABS_POS 46 | ) 47 | # we only support this one action mode for now 48 | assert self.action_mode == self.ACTION_MODE_ABS_POS 49 | 50 | # env nodes 51 | action_node: ActionNode | None = kwargs.get(RoboGymEnv.KW_ACTION_NODE) 52 | if not action_node: 53 | action_node = ManipulatorActionNode(**self.get_action_node_setup_kwargs()) 54 | kwargs[RoboGymEnv.KW_ACTION_NODE] = action_node 55 | 56 | RoboGymEnv.assure_instance_of_type_in_list( 57 | kwargs, 58 | RoboGymEnv.KW_OBSERVATION_NODES, 59 | ManipulatorObservationNode, 60 | True, 61 | self.create_main_observation_node, 62 | {}, 63 | ) 64 | 65 | reward_node: RewardNode | None = kwargs.get(RoboGymEnv.KW_REWARD_NODE) 66 | if not reward_node: 67 | reward_node = ManipulatorRewardNode(**self.get_reward_node_setup_kwargs()) 68 | kwargs[RoboGymEnv.KW_REWARD_NODE] = reward_node 69 | 70 | super().__init__(**kwargs) 71 | 72 | def create_main_observation_node(self, node_index: int = 0, **kwargs): 73 | return ManipulatorObservationNode(**self.get_obs_node_setup_kwargs(node_index)) 74 | 75 | 76 | class ManipulatorActionNode(ActionNode): 77 | # TODO 78 | # consider other action modes - current impl corresponds to ABS_POS only 79 | 80 | KW_PREFIX_FIX_JOINT = "fix_" 81 | 82 | def __init__(self, **kwargs): 83 | super().__init__(**kwargs) 84 | self._robot_model: ManipulatorModel | None = kwargs.get( 85 | RoboGymEnv.KW_ROBOT_MODEL_OBJECT 86 | ) 87 | assert self._robot_model is not None 88 | self._joint_names: list[str] = self._robot_model.joint_names 89 | self._fixed_joint_names: list[str] = [] 90 | self._controlled_joint_names: list[str] = [] 91 | for joint_name in self._joint_names: 92 | fix_arg_name = self.KW_PREFIX_FIX_JOINT + joint_name 93 | if kwargs.get(fix_arg_name, False): 94 | self._fixed_joint_names.append(joint_name) 95 | else: 96 | self._controlled_joint_names.append(joint_name) 97 | 98 | def get_action_space(self) -> gym.spaces.Box: 99 | length = len(self._controlled_joint_names) 100 | return gym.spaces.Box( 101 | low=np.full(length, -1.0, dtype=np.float32), 102 | high=np.full(length, 1.0, dtype=np.float32), 103 | dtype=np.float32, 104 | ) 105 | 106 | def env_action_to_rs_action(self, env_action: NDArray, **kwargs) -> NDArray: 107 | # optimization potential, but more concise than it was: 108 | # start with default positions and overwrite non-fixed joints with values from env action 109 | normalized_full_action = self._robot_model.normalize_joint_values( 110 | np.array(self._robot_model.joint_positions, dtype=np.float32) 111 | ) 112 | source_index = 0 113 | for joint_index in range(len(self._joint_names)): 114 | joint_name = self._joint_names[joint_index] 115 | if joint_name in self._controlled_joint_names: 116 | normalized_full_action[joint_index] = env_action[source_index] 117 | source_index += 1 118 | 119 | denormalized_full_action = self._robot_model.denormalize_joint_values( 120 | normalized_full_action 121 | ) 122 | result = self._robot_model.reorder_joints_for_rs( 123 | denormalized_full_action 124 | ).astype(np.float32) 125 | return result 126 | 127 | def get_reset_state_part_state_dict(self) -> dict[str, float]: 128 | result: dict[str, float] = {} 129 | for joint_index in range(len(self._joint_names)): 130 | joint_name = self._robot_model.remote_joint_names[joint_index] 131 | state_key = ( 132 | joint_name + ManipulatorBaseEnv.RS_STATE_KEY_SUFFIX_JOINT_POSITION 133 | ) 134 | result[state_key] = self._robot_model.joint_positions[joint_index] 135 | return result 136 | 137 | @property 138 | def robot_model(self): 139 | return self._robot_model 140 | 141 | 142 | class ManipulatorObservationNode(ObservationNode): 143 | KW_JOINT_POSITION_TOLERANCE_NORMALIZED = "joint_position_tolerance_normalized" 144 | 145 | def __init__(self, **kwargs): 146 | super().__init__(**kwargs) 147 | self._robot_model: ManipulatorModel | None = kwargs.get( 148 | RoboGymEnv.KW_ROBOT_MODEL_OBJECT 149 | ) 150 | assert self._robot_model is not None 151 | self._joint_names: list[str] = ( 152 | self._robot_model.joint_names 153 | ) # kwargs.get("joint_names") 154 | 155 | def get_observation_space_part(self) -> gym.spaces.Box: 156 | # Joint position range tolerance 157 | num_joints = len(self._robot_model.joint_names) 158 | pos_tolerance = np.full(num_joints, self.joint_position_tolerance_normalized) 159 | 160 | # Joint positions range used to determine if there is an error in the sensor readings 161 | max_joint_positions = np.add(np.full(num_joints, 1.0), pos_tolerance) 162 | min_joint_positions = np.subtract(np.full(num_joints, -1.0), pos_tolerance) 163 | # Joint velocities range 164 | max_joint_velocities = np.array([np.inf] * num_joints) 165 | min_joint_velocities = -np.array([np.inf] * num_joints) 166 | # Definition of environment observation_space 167 | max_obs = np.concatenate( 168 | (max_joint_positions, max_joint_velocities), dtype=np.float32 169 | ) 170 | min_obs = np.concatenate( 171 | (min_joint_positions, min_joint_velocities), dtype=np.float32 172 | ) 173 | 174 | return gym.spaces.Box(low=min_obs, high=max_obs, dtype=np.float32) 175 | 176 | def rs_state_to_observation_part( 177 | self, rs_state_array: NDArray, rs_state_dict: dict[str, float], **kwargs 178 | ) -> NDArray: 179 | # from old impl, but why are only the joint positions normalized? 180 | joint_positions = self.extract_normalized_joint_positions_from_rs_state_dict( 181 | rs_state_dict 182 | ) 183 | 184 | # Joint Velocities 185 | joint_velocities = self.extract_joint_velocities_from_rs_state_dict( 186 | rs_state_dict 187 | ) 188 | 189 | # Compose environment state 190 | state = np.concatenate((joint_positions, joint_velocities)) 191 | 192 | return state.astype(np.float32) 193 | 194 | def extract_joint_velocities_from_rs_state_dict(self, rs_state_dict): 195 | joint_velocities = [] 196 | joint_velocities_keys = [ 197 | joint_name + ManipulatorBaseEnv.RS_STATE_KEY_SUFFIX_JOINT_VELOCITY 198 | for joint_name in self._robot_model.remote_joint_names 199 | ] 200 | for velocity in joint_velocities_keys: 201 | joint_velocities.append(rs_state_dict[velocity]) 202 | joint_velocities = np.array(joint_velocities) 203 | return joint_velocities 204 | 205 | def extract_normalized_joint_positions_from_rs_state_dict(self, rs_state_dict): 206 | joint_positions = self.extract_joint_positions_from_rs_state_dict(rs_state_dict) 207 | # Normalize joint position values 208 | joint_positions = self._robot_model.normalize_joint_values( 209 | joints=joint_positions 210 | ) 211 | return joint_positions 212 | 213 | def extract_joint_positions_from_rs_state_dict(self, rs_state_dict): 214 | joint_positions = [] 215 | joint_positions_keys = [ 216 | joint_name + ManipulatorBaseEnv.RS_STATE_KEY_SUFFIX_JOINT_POSITION 217 | for joint_name in self._robot_model.remote_joint_names 218 | ] 219 | for position in joint_positions_keys: 220 | joint_positions.append(rs_state_dict[position]) 221 | joint_positions = np.array(joint_positions) 222 | return joint_positions 223 | 224 | @property 225 | def joint_position_tolerance_normalized(self) -> float: 226 | return self._config.get( 227 | ManipulatorObservationNode.KW_JOINT_POSITION_TOLERANCE_NORMALIZED, 0.1 228 | ) 229 | 230 | 231 | class ManipulatorRewardNode(RewardNode): 232 | 233 | def __init__(self, **kwargs): 234 | super().__init__(**kwargs) 235 | 236 | def get_reward( 237 | self, 238 | rs_state_array: NDArray, 239 | rs_state_dict: dict[str, float], 240 | env_action: NDArray, 241 | **kwargs, 242 | ) -> Tuple[float, bool, dict]: 243 | 244 | info = self.env.get_default_info() 245 | done = False 246 | 247 | # Check if robot is in collision 248 | collision = rs_state_dict["in_collision"] == 1 249 | if collision: 250 | done = True 251 | info[RoboGymEnv.INFO_KW_FINAL_STATUS] = RoboGymEnv.FINAL_STATUS_COLLISION 252 | 253 | elif ( 254 | self.max_episode_steps is not None 255 | and self.env.elapsed_steps >= self.max_episode_steps 256 | ): 257 | done = True 258 | info[RoboGymEnv.INFO_KW_FINAL_STATUS] = RoboGymEnv.FINAL_STATUS_SUCCESS 259 | 260 | return 0.0, done, info 261 | --------------------------------------------------------------------------------