├── panda
    ├── rl
    │   ├── policies
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   ├── distributions.py
    │   │   ├── mlp_actor_critic.py
    │   │   └── actor_critic.py
    │   ├── base_agent.py
    │   ├── main.py
    │   ├── dataset.py
    │   ├── rollouts.py
    │   ├── normalizer.py
    │   ├── ppo_agent.py
    │   ├── sac_agent.py
    │   └── trainer.py
    ├── models
    │   ├── robot
    │   │   ├── __init__.py
    │   │   ├── robot.py
    │   │   └── panda_robot.py
    │   ├── task
    │   │   ├── __init__.py
    │   │   ├── task.py
    │   │   └── grasping_task.py
    │   ├── arena
    │   │   ├── __init__.py
    │   │   ├── arena.py
    │   │   └── table_arena.py
    │   ├── gripper
    │   │   ├── __init__.py
    │   │   ├── panda_gripper.py
    │   │   └── gripper.py
    │   ├── __init__.py
    │   ├── assets
    │   │   ├── textures
    │   │   │   ├── metal.png
    │   │   │   ├── dark-wood.png
    │   │   │   └── light-wood.png
    │   │   ├── objects
    │   │   │   ├── meshes
    │   │   │   │   └── base.stl
    │   │   │   ├── cube.xml
    │   │   │   ├── cyl.xml
    │   │   │   ├── cyl2.xml
    │   │   │   └── basepart.xml
    │   │   ├── robot
    │   │   │   └── panda
    │   │   │   │   ├── meshes
    │   │   │   │       ├── hand.stl
    │   │   │   │       ├── finger.stl
    │   │   │   │       ├── link0.stl
    │   │   │   │       ├── link1.stl
    │   │   │   │       ├── link2.stl
    │   │   │   │       ├── link3.stl
    │   │   │   │       ├── link4.stl
    │   │   │   │       ├── link5.stl
    │   │   │   │       ├── link6.stl
    │   │   │   │       ├── link7.stl
    │   │   │   │       ├── hand_vis.stl
    │   │   │   │       ├── link0_vis.stl
    │   │   │   │       ├── link1_vis.stl
    │   │   │   │       ├── link2_vis.stl
    │   │   │   │       ├── link3_vis.stl
    │   │   │   │       ├── link4_vis.stl
    │   │   │   │       ├── link5_vis.stl
    │   │   │   │       ├── link6_vis.stl
    │   │   │   │       ├── link7_vis.stl
    │   │   │   │       ├── pedestal.stl
    │   │   │   │       └── finger_vis.stl
    │   │   │   │   └── robot_torque.xml
    │   │   ├── gripper
    │   │   │   ├── meshes
    │   │   │   │   └── panda_gripper
    │   │   │   │   │   ├── hand.stl
    │   │   │   │   │   ├── finger.stl
    │   │   │   │   │   ├── hand_vis.stl
    │   │   │   │   │   ├── finger_vis.stl
    │   │   │   │   │   ├── finger_longer.stl
    │   │   │   │   │   ├── finger_longer_a.stl
    │   │   │   │   │   ├── finger_longer_b.stl
    │   │   │   │   │   ├── finger_longer_c.stl
    │   │   │   │   │   └── finger_longer_a_old.stl
    │   │   │   └── panda_gripper.xml
    │   │   ├── base.xml
    │   │   └── arena
    │   │   │   └── table_arena.xml
    │   ├── objects
    │   │   ├── __init__.py
    │   │   ├── xml_objects.py
    │   │   └── objects.py
    │   ├── world.py
    │   └── base.py
    ├── controller
    │   ├── __init__.py
    │   ├── controller.py
    │   └── arm_controller.py
    ├── log
    │   └── rl.111.
    │   │   ├── ckpt_09000000.pt
    │   │   ├── ckpt_09360000.pt
    │   │   ├── replay_09000000.pkl
    │   │   └── replay_09360000.pkl
    ├── environments
    │   ├── __init__.py
    │   ├── action_space.py
    │   ├── base.py
    │   ├── panda_grasping.py
    │   └── panda.py
    ├── config
    │   ├── controller_config.hjson
    │   ├── grasping.py
    │   └── __init__.py
    ├── utils
    │   ├── __init__.py
    │   ├── logger.py
    │   ├── mpi.py
    │   ├── mujoco_py_renderer.py
    │   ├── mjcf_utils.py
    │   ├── pytorch.py
    │   └── transform_utils.py
    ├── README.md
    └── environment.yml
├── base_policy.gif
├── exp_image.png
├── adapted_policy.gif
└── README.md


/panda/rl/policies/__init__.py:
--------------------------------------------------------------------------------
1 | from .mlp_actor_critic import MlpActor, MlpCritic
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/panda/models/robot/__init__.py:
--------------------------------------------------------------------------------
1 | from .robot import Robot
2 | from .panda_robot import Panda
3 | 


--------------------------------------------------------------------------------
/base_policy.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/base_policy.gif


--------------------------------------------------------------------------------
/exp_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/exp_image.png


--------------------------------------------------------------------------------
/panda/controller/__init__.py:
--------------------------------------------------------------------------------
1 | from .controller import Controller
2 | from .arm_controller import *
3 | 


--------------------------------------------------------------------------------
/panda/models/task/__init__.py:
--------------------------------------------------------------------------------
1 | from .task import Task
2 | 
3 | from .grasping_task import GraspingTask
4 | 


--------------------------------------------------------------------------------
/adapted_policy.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/adapted_policy.gif


--------------------------------------------------------------------------------
/panda/models/arena/__init__.py:
--------------------------------------------------------------------------------
1 | from .arena import Arena
2 | from .table_arena import TableArena
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/panda/models/gripper/__init__.py:
--------------------------------------------------------------------------------
1 | from .gripper import Gripper
2 | from .panda_gripper import PandaGripper
3 | 
4 | 


--------------------------------------------------------------------------------
/panda/log/rl.111./ckpt_09000000.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/log/rl.111./ckpt_09000000.pt


--------------------------------------------------------------------------------
/panda/log/rl.111./ckpt_09360000.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/log/rl.111./ckpt_09360000.pt


--------------------------------------------------------------------------------
/panda/log/rl.111./replay_09000000.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/log/rl.111./replay_09000000.pkl


--------------------------------------------------------------------------------
/panda/log/rl.111./replay_09360000.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/log/rl.111./replay_09360000.pkl


--------------------------------------------------------------------------------
/panda/models/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | from .world import MujocoWorldBase
3 | 
4 | assets_root = os.path.join(os.path.dirname(__file__), "assets")
5 | 


--------------------------------------------------------------------------------
/panda/models/assets/textures/metal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/textures/metal.png


--------------------------------------------------------------------------------
/panda/models/assets/textures/dark-wood.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/textures/dark-wood.png


--------------------------------------------------------------------------------
/panda/models/assets/objects/meshes/base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/objects/meshes/base.stl


--------------------------------------------------------------------------------
/panda/models/assets/textures/light-wood.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/textures/light-wood.png


--------------------------------------------------------------------------------
/panda/environments/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import MujocoEnv
2 | from .base import make
3 | from .panda import PandaEnv
4 | from .panda_grasping import PandaGrasp
5 | 


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/hand.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/hand.stl


--------------------------------------------------------------------------------
/panda/models/objects/__init__.py:
--------------------------------------------------------------------------------
1 | from .objects import MujocoObject, MujocoXMLObject
2 | from .xml_objects import CubeObject, BasePartObject, CylObject, Cyl2Object
3 | 


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/finger.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/finger.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link0.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link0.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link1.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link2.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link3.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link4.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link4.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link5.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link5.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link6.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link6.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link7.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link7.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/hand_vis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/hand_vis.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link0_vis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link0_vis.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link1_vis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link1_vis.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link2_vis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link2_vis.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link3_vis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link3_vis.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link4_vis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link4_vis.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link5_vis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link5_vis.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link6_vis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link6_vis.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/link7_vis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link7_vis.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/pedestal.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/pedestal.stl


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/meshes/finger_vis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/finger_vis.stl


--------------------------------------------------------------------------------
/panda/models/assets/gripper/meshes/panda_gripper/hand.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/hand.stl


--------------------------------------------------------------------------------
/panda/models/assets/gripper/meshes/panda_gripper/finger.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger.stl


--------------------------------------------------------------------------------
/panda/models/assets/gripper/meshes/panda_gripper/hand_vis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/hand_vis.stl


--------------------------------------------------------------------------------
/panda/models/assets/gripper/meshes/panda_gripper/finger_vis.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger_vis.stl


--------------------------------------------------------------------------------
/panda/models/assets/gripper/meshes/panda_gripper/finger_longer.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger_longer.stl


--------------------------------------------------------------------------------
/panda/models/assets/gripper/meshes/panda_gripper/finger_longer_a.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger_longer_a.stl


--------------------------------------------------------------------------------
/panda/models/assets/gripper/meshes/panda_gripper/finger_longer_b.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger_longer_b.stl


--------------------------------------------------------------------------------
/panda/models/assets/gripper/meshes/panda_gripper/finger_longer_c.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger_longer_c.stl


--------------------------------------------------------------------------------
/panda/models/assets/gripper/meshes/panda_gripper/finger_longer_a_old.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger_longer_a_old.stl


--------------------------------------------------------------------------------
/panda/models/world.py:
--------------------------------------------------------------------------------
 1 | from models.base import MujocoXML
 2 | from utils.mjcf_utils import xml_path_completion
 3 | 
 4 | 
 5 | class MujocoWorldBase(MujocoXML):
 6 |     """Base class to inherit all mujoco worlds from."""
 7 | 
 8 |     def __init__(self):
 9 |         super().__init__(xml_path_completion("base.xml"))
10 | 


--------------------------------------------------------------------------------
/panda/config/controller_config.hjson:
--------------------------------------------------------------------------------
 1 | /* Default values for controller config parameters */
 2 | {
 3 |     // Joint Velocity controller
 4 |     "joint_velocity":
 5 |     {
 6 |         "control_range": [1, 1, 1, 1, 1, 1, 1],
 7 |         "kv": [8.0, 7.0, 6.0, 4.0, 2.0, 0.5, 0.1],
 8 |         "interpolation": "linear"
 9 |     }
10 | }
11 | 


--------------------------------------------------------------------------------
/panda/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .mujoco_py_renderer import MujocoPyRenderer
 2 | 
 3 | def str2bool(v):
 4 |     return v.lower() == 'true'
 5 | 
 6 | 
 7 | def str2intlist(value):
 8 |     if not value:
 9 |         return value
10 |     else:
11 |        return [int(num) for num in value.split(',')]
12 | 
13 | 
14 | def str2list(value):
15 |     if not value:
16 |         return value
17 |     else:
18 |         return [num for num in value.split(',')]
19 | 


--------------------------------------------------------------------------------
/panda/models/assets/base.xml:
--------------------------------------------------------------------------------
 1 | <!-- This is the base xml for all physics simulations. Set global configs here. -->
 2 | <mujoco model="base">
 3 |   <compiler angle="radian" meshdir="meshes/"/>
 4 |   <option impratio="20" cone="elliptic"/>
 5 |   <size nconmax="5000" njmax="5000"/>
 6 | 
 7 |   <asset>
 8 |   </asset>
 9 | 
10 |   <actuator>
11 |   </actuator>
12 | 
13 |   <worldbody>
14 |   </worldbody>
15 | 
16 |   <equality>
17 |   </equality>
18 | </mujoco>


--------------------------------------------------------------------------------
/panda/models/arena/arena.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from models.base import MujocoXML
 3 | from utils.mjcf_utils import array_to_string, string_to_array
 4 | 
 5 | 
 6 | class Arena(MujocoXML):
 7 |     """Base arena class."""
 8 | 
 9 |     def set_origin(self, offset):
10 |         """Applies a constant offset to all objects."""
11 |         offset = np.array(offset)
12 |         for node in self.worldbody.findall("./*[@pos]"):
13 |             cur_pos = string_to_array(node.get("pos"))
14 |             new_pos = cur_pos + offset
15 |             node.set("pos", array_to_string(new_pos))
16 | 


--------------------------------------------------------------------------------
/panda/models/assets/objects/cube.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="cube">
 2 |   <asset>
 3 |     <texture file="../textures/metal.png" name="tex-metal"/>
 4 |     <material name="metal" reflectance="0.7" texrepeat="1 1" texture="tex-metal" texuniform="true"/>
 5 |   </asset>
 6 |   <worldbody>
 7 |     <body>
 8 |       <body name="collision" pos="0 -0.1 0.85" quat="1 0 0 0">
 9 |         <geom pos="0 0 0" type="box" size="0.03 0.03 0.03" solimp="0.998 0.998 0.001" solref="0.001 1" density="1500" friction="0.95 0.3 0.1" material="metal" group="1" condim="4"/>
10 |       </body>
11 |     </body>
12 |   </worldbody>
13 | </mujoco>
14 | 


--------------------------------------------------------------------------------
/panda/models/assets/objects/cyl.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="cube">
 2 |   <asset>
 3 |     <texture file="../textures/metal.png" name="tex-metal"/>
 4 |     <material name="metal" reflectance="0.7" texrepeat="1 1" texture="tex-metal" texuniform="true"/>
 5 |   </asset>
 6 |   <worldbody>
 7 |     <body>
 8 |       <body name="collision" pos="0 0.02 0.9" quat="1 0 0 0">
 9 |         <geom pos="0 0 0" type="cylinder" size="0.02 0.08" solimp="0.998 0.998 0.001" solref="0.001 1" density="1500" friction="0.95 0.3 0.1" material="metal" group="1" condim="4"/>
10 |       </body>
11 |     </body>
12 |   </worldbody>
13 | </mujoco>
14 | 


--------------------------------------------------------------------------------
/panda/models/assets/objects/cyl2.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="ingranaggio">
 2 |   <asset>
 3 |     <texture file="../textures/metal.png" name="tex-metal"/>
 4 |     <material name="metal" reflectance="0.7" texrepeat="1 1" texture="tex-metal" texuniform="true"/>
 5 |   </asset>
 6 |   <worldbody>
 7 |     <body>
 8 |       <body name="collision" pos="0 -0.23 0.9" quat="1 0 0 0">
 9 |         <geom pos="0 0 0" type="cylinder"  size="0.02 0.08" solimp="0.998 0.998 0.001" solref="0.001 1" density="1500" friction="0.95 0.3 0.1" material="metal" group="1" condim="4"/>
10 |       </body>
11 |     </body>
12 |   </worldbody>
13 | </mujoco>
14 | 


--------------------------------------------------------------------------------
/panda/models/assets/objects/basepart.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="base">
 2 |   <asset>
 3 |     <mesh file="meshes/base.stl" scale="0.02 0.02 0.02" name="base_mesh"/>
 4 |     <texture file="../textures/metal.png" name="tex-metal"/>
 5 |     <material name="metal" reflectance="0.7" texrepeat="1 1" texture="tex-metal" texuniform="true"/>
 6 |   </asset>
 7 |   <worldbody>
 8 |     <body>
 9 |       <body name="collision" pos="0 0.1 0.82" quat="1 0 0 0">
10 |         <geom pos="0 0 0" mesh="base_mesh" type="mesh" solimp="0.998 0.998 0.001" solref="0.001 1" density="1500" friction="0.95 0.3 0.1" material="metal" group="1" condim="4"/>
11 |       </body>
12 |     </body>
13 |   </worldbody>
14 | </mujoco>
15 | 


--------------------------------------------------------------------------------
/panda/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import logging
 3 | 
 4 | import numpy as np
 5 | import colorlog
 6 | 
 7 | 
 8 | formatter = colorlog.ColoredFormatter(
 9 |     "%(log_color)s[%(asctime)s] %(message)s",
10 |     datefmt=None,
11 |     reset=True,
12 |     log_colors={'DEBUG': 'cyan', 'INFO': 'white', 'WARNING': 'yellow', 'ERROR': 'red,bold', 'CRITICAL': 'red,bg_white'},
13 |     secondary_log_colors={},
14 |     style='%')
15 | 
16 | logger = colorlog.getLogger('gear_assembly')
17 | logger.setLevel(logging.DEBUG)
18 | 
19 | #fh = logging.FileHandler('log')
20 | #fh.setLevel(logging.DEBUG)
21 | #fh.setFormatter(formatter)
22 | #logger.addHandler(fh)
23 | 
24 | ch = colorlog.StreamHandler()
25 | ch.setLevel(logging.DEBUG)
26 | ch.setFormatter(formatter)
27 | logger.addHandler(ch)
28 | 


--------------------------------------------------------------------------------
/panda/models/task/task.py:
--------------------------------------------------------------------------------
 1 | from models.world import MujocoWorldBase
 2 | 
 3 | 
 4 | class Task(MujocoWorldBase):
 5 |     """
 6 |     Base class for creating MJCF model of a task.
 7 | 
 8 |     A task typically involves a robot interacting with objects in an arena
 9 |     (workshpace). The purpose of a task class is to generate a MJCF model
10 |     of the task by combining the MJCF models of each component together and
11 |     place them to the right positions.
12 |     """
13 | 
14 |     def merge_robot(self, mujoco_robot):
15 |         """Adds robot model to the MJCF model."""
16 |         pass
17 | 
18 |     def merge_arena(self, mujoco_arena):
19 |         """Adds arena model to the MJCF model."""
20 |         pass
21 | 
22 |     def merge_objects(self, mujoco_objects):
23 |         """Adds physical objects to the MJCF model."""
24 |         pass
25 | 


--------------------------------------------------------------------------------
/panda/utils/mpi.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from mpi4py import MPI
 3 | 
 4 | 
 5 | def _mpi_average(x):
 6 |     buf = np.zeros_like(x)
 7 |     MPI.COMM_WORLD.Allreduce(x, buf, op=MPI.SUM)
 8 |     buf /= MPI.COMM_WORLD.Get_size()
 9 |     return buf
10 | 
11 | 
12 | # Average across the cpu's data
13 | def mpi_average(x):
14 |     if isinstance(x, dict):
15 |         keys = sorted(x.keys())
16 |         return {k: _mpi_average(np.array(x[k])) for k in keys}
17 |     else:
18 |         return _mpi_average(np.array(x))
19 | 
20 | 
21 | def _mpi_sum(x):
22 |     buf = np.zeros_like(x)
23 |     MPI.COMM_WORLD.Allreduce(x, buf, op=MPI.SUM)
24 |     return buf
25 | 
26 | 
27 | # Sum over the cpu's data
28 | def mpi_sum(x):
29 |     if isinstance(x, dict):
30 |         keys = sorted(x.keys())
31 |         return {k: _mpi_sum(np.array(x[k])) for k in keys}
32 |     else:
33 |         return _mpi_sum(np.array(x))
34 | 


--------------------------------------------------------------------------------
/panda/models/objects/xml_objects.py:
--------------------------------------------------------------------------------
 1 | from models.objects import MujocoXMLObject
 2 | from utils.mjcf_utils import xml_path_completion
 3 | 
 4 | 
 5 | class CubeObject(MujocoXMLObject):
 6 |     """
 7 |     Round Gear
 8 |     """
 9 | 
10 |     def __init__(self):
11 |         super().__init__(xml_path_completion("objects/cube.xml"))
12 | 
13 | 
14 | class BasePartObject(MujocoXMLObject):
15 |     """
16 |     Base of the assembly
17 |     """
18 | 
19 |     def __init__(self):
20 |         super().__init__(xml_path_completion("objects/basepart.xml"))
21 | 
22 | class CylObject(MujocoXMLObject):
23 |     """
24 |     Clutter
25 |     """
26 | 
27 |     def __init__(self):
28 |         super().__init__(xml_path_completion("objects/cyl.xml"))
29 | 
30 | class Cyl2Object(MujocoXMLObject):
31 |     """
32 |     Clutter
33 |     """
34 | 
35 |     def __init__(self):
36 |         super().__init__(xml_path_completion("objects/cyl2.xml"))
37 | 


--------------------------------------------------------------------------------
/panda/controller/controller.py:
--------------------------------------------------------------------------------
 1 | import abc  # for abstract base class definitions
 2 | 
 3 | 
 4 | class Controller(metaclass=abc.ABCMeta):
 5 |     """
 6 |     Base class for all robot controllers.
 7 |     Defines basic interface for all controllers to adhere to.
 8 |     """
 9 | 
10 |     def __init__(self, bullet_data_path, robot_jpos_getter):
11 |         """
12 |         Args:
13 |             bullet_data_path (str): base path to bullet data.
14 | 
15 |             robot_jpos_getter (function): function that returns the position of the joints
16 |                 as a numpy array of the right dimension.
17 |         """
18 |         raise NotImplementedError
19 | 
20 |     @abc.abstractmethod
21 |     def get_control(self, *args, **kwargs):
22 |         """
23 |         Retrieve a control input from the controller.
24 |         """
25 |         raise NotImplementedError
26 | 
27 |     @abc.abstractmethod
28 |     def sync_state(self):
29 |         """
30 |         This function does internal bookkeeping to maintain
31 |         consistency between the robot being controlled and
32 |         the controller state.
33 |         """
34 |         raise NotImplementedError
35 | 


--------------------------------------------------------------------------------
/panda/rl/policies/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | def fanin_init(tensor): # Initialization
 7 |     size = tensor.size()
 8 |     if len(size) == 2:
 9 |         fan_in = size[0]
10 |     elif len(size) > 2:
11 |         fan_in = np.prod(size[1:])
12 |     else:
13 |         raise Exception("Shape must be have dimension at least 2.")
14 |     bound = 1. / np.sqrt(fan_in)
15 |     return tensor.data.uniform_(-bound, bound) # fills a tensor with unifrom distribution between bounds
16 | 
17 | class MLP(nn.Module):
18 |     def __init__(self, config, input_dim, output_dim, hid_dims=[]):
19 |         super().__init__()
20 |         #activation_fn = getattr(F, config.activation)
21 |         activation_fn = nn.ReLU()
22 | 
23 |         fc = []
24 |         prev_dim = input_dim
25 |         for d in hid_dims:
26 |             fc.append(nn.Linear(prev_dim, d))
27 |             fanin_init(fc[-1].weight)
28 |             fc[-1].bias.data.fill_(0.1)
29 |             fc.append(activation_fn)
30 |             prev_dim = d
31 |         fc.append(nn.Linear(prev_dim, output_dim))
32 |         fc[-1].weight.data.uniform_(-1e-3, 1e-3)
33 |         fc[-1].bias.data.uniform_(-1e-3, 1e-3)
34 |         self.fc = nn.Sequential(*fc)
35 | 
36 |     def forward(self, ob):
37 |         return self.fc(ob)
38 | 


--------------------------------------------------------------------------------
/panda/README.md:
--------------------------------------------------------------------------------
 1 | ## Config
 2 | Config contains configuration details about environment, controller and learning algorithms. 
 3 | 
 4 | ## Controller
 5 | Controller contains the joint velocity controller class, actions coming from learning algorithms are interprested as joint velocities but real robot is equipped with torque controller. Controller class converts the joint velocities to joint torques and also performs interpolation between each successive action coming from policy if needed.
 6 | 
 7 | ## Envionments
 8 | Envionments include base enviornment and environments for the Panda Robot and the Grasping task. Panda Robot defines everything robot related. Grasping task defines everything task related including a reward function. Environments also contain an action space class that defines the size and limits of robot's action space to be used in learning
 9 | 
10 | ## Models
11 | Models is where everything is actually defined for simulation. Arena, Gripper, Objects and Robot are the classes that use xmls respectively from Assets. Task merges all pieces and instantiates a scene model for MuJoCo to perform the simulation. Task is used later on in the environment.
12 | 
13 | ## RL
14 | RL is where actual learning happens. PPO and SAC are the implemented algorithms. Neural network models are present in policies.
15 | 
16 | 
17 | ## Utils
18 | Utils contain auxiliaries both for environment defintion and learning part. 
19 | 
20 | Note: all the files have been commented heavily (with possible output) to have a faster understanding of what's happening.
21 | 


--------------------------------------------------------------------------------
/panda/models/robot/robot.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | from models.base import MujocoXML
 3 | 
 4 | 
 5 | class Robot(MujocoXML):
 6 |     """Base class for all robot models."""
 7 | 
 8 |     def __init__(self, fname):
 9 |         """Initializes from file @fname."""
10 |         super().__init__(fname)
11 |         # key: gripper name and value: gripper model
12 |         self.grippers = OrderedDict()
13 | 
14 |     def add_gripper(self, arm_name, gripper):
15 |         """
16 |         Mounts gripper to arm.
17 | 
18 |         Throws error if robot already has a gripper or gripper type is incorrect.
19 | 
20 |         Args:
21 |             arm_name (str): name of arm mount
22 |             gripper (MujocoGripper instance): gripper MJCF model
23 |         """
24 |         if arm_name in self.grippers:
25 |             raise ValueError("Attempts to add multiple grippers to one body")
26 | 
27 |         arm_subtree = self.worldbody.find(".//body[@name='{}']".format(arm_name))
28 | 
29 |         for body in gripper.worldbody:
30 |             arm_subtree.append(body)
31 | 
32 |         self.merge(gripper, merge_body=False)
33 |         self.grippers[arm_name] = gripper
34 | 
35 |     @property
36 |     def dof(self):
37 |         """Returns the number of DOF of the robot, not including gripper."""
38 |         raise NotImplementedError
39 | 
40 |     @property
41 |     def joints(self):
42 |         """Returns a list of joint names of the robot."""
43 |         raise NotImplementedError
44 | 
45 |     @property
46 |     def init_qpos(self):
47 |         """Returns default qpos."""
48 |         raise NotImplementedError
49 | 


--------------------------------------------------------------------------------
/panda/models/gripper/panda_gripper.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Gripper for Franka's Panda (has two fingers).
 3 | """
 4 | import numpy as np
 5 | from utils.mjcf_utils import xml_path_completion
 6 | from models.gripper.gripper import Gripper
 7 | 
 8 | 
 9 | class PandaGripperBase(Gripper):
10 |     """
11 |     Gripper for Franka's Panda (has two fingers).
12 |     """
13 | 
14 |     def __init__(self):
15 |         super().__init__(xml_path_completion("gripper/panda_gripper.xml"))
16 | 
17 |     def format_action(self, action):
18 |         return action
19 | 
20 |     @property
21 |     def init_qpos(self):
22 |         return np.array([0.020833, -0.020833])
23 | 
24 |     @property
25 |     def joints(self):
26 |         return ["finger_joint1", "finger_joint2"]
27 | 
28 |     @property
29 |     def dof(self):
30 |         return 2
31 | 
32 |     @property
33 |     def visualization_sites(self):
34 |         return ["grip_site"]
35 | 
36 |     def contact_geoms(self):
37 |         return ["hand_collision", "finger1_collision", "finger2_collision", "finger1_tip_collision", "finger2_tip_collision"]
38 | 
39 |     @property
40 |     def left_finger_geoms(self):
41 |         return ["finger1_tip_collision"]
42 | 
43 |     @property
44 |     def right_finger_geoms(self):
45 |         return ["finger2_tip_collision"]
46 | 
47 | 
48 | class PandaGripper(PandaGripperBase):
49 |     """
50 |     Modifies PandaGripperBase to only take one action.
51 |     """
52 | 
53 |     def format_action(self, action):
54 |         """
55 |         1 => closed, -1 => open
56 |         """
57 |         assert len(action) == 1
58 |         return np.array([-1 * action[0], 1 * action[0]])
59 | 
60 |     @property
61 |     def dof(self):
62 |         return 1
63 | 


--------------------------------------------------------------------------------
/panda/models/arena/table_arena.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from models.arena import Arena
 3 | from utils.mjcf_utils import xml_path_completion
 4 | from utils.mjcf_utils import array_to_string, string_to_array
 5 | 
 6 | 
 7 | class TableArena(Arena):
 8 |     """Workspace that contains a tabletop."""
 9 | 
10 |     def __init__(self, table_full_size=(0.35, 0.6, 0.02), table_friction=(1, 0.005, 0.0001)):
11 |         """
12 |         Args:
13 |             table_full_size: full dimensions of the table
14 |             table_friction: friction parameters of the table
15 |         """
16 |         super().__init__(xml_path_completion("arena/table_arena.xml"))
17 | 
18 |         self.table_full_size = np.array(table_full_size)
19 |         self.table_half_size = self.table_full_size / 2
20 |         self.table_friction = table_friction
21 | 
22 |         self.floor = self.worldbody.find("./geom[@name='floor']")
23 |         self.table_body = self.worldbody.find("./body[@name='table']")
24 |         self.table_collision = self.table_body.find("./geom[@name='table_collision']")
25 |         self.table_view = self.table_body.find("./geom[@name='table_view']")
26 |         self.configure_location()
27 | 
28 |     def configure_location(self):
29 |         self.bottom_pos = np.array([0, 0, 0])
30 |         self.floor.set("pos", array_to_string(self.bottom_pos))
31 |         self.table_collision.set("size", array_to_string(self.table_full_size))
32 |         self.table_view.set("size", array_to_string(self.table_full_size))
33 | 
34 |     @property
35 |     def table_top_abs(self):
36 |         """
37 |         Returns the absolute position of table top.
38 |         """
39 |         return string_to_array(self.table_body.get("pos"))
40 | 


--------------------------------------------------------------------------------
/panda/rl/policies/distributions.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.distributions
 7 | 
 8 | 
 9 | # Normal
10 | FixedNormal = torch.distributions.Normal
11 | 
12 | normal_init = FixedNormal.__init__
13 | FixedNormal.__init__ = lambda self, mean, std: normal_init(self, mean.double(), std.double())
14 | 
15 | log_prob_normal = FixedNormal.log_prob
16 | FixedNormal.log_probs = lambda self, actions: log_prob_normal(self, actions.double()).sum(-1, keepdim=True).float()
17 | 
18 | normal_entropy = FixedNormal.entropy
19 | FixedNormal.entropy = lambda self: normal_entropy(self).sum(-1).float()
20 | 
21 | FixedNormal.mode = lambda self: self.mean.float()
22 | 
23 | normal_sample = FixedNormal.sample
24 | FixedNormal.sample = lambda self: normal_sample(self).float()
25 | 
26 | normal_rsample = FixedNormal.rsample
27 | FixedNormal.rsample = lambda self: normal_rsample(self).float()
28 | 
29 | class MixedDistribution(nn.Module):
30 |     def __init__(self, distributions):
31 |         super().__init__()
32 |         assert isinstance(distributions, OrderedDict)
33 |         self.distributions = distributions
34 | 
35 |     def mode(self):
36 |         return OrderedDict([(k, dist.mode()) for k, dist in self.distributions.items()])
37 | 
38 |     def sample(self):
39 |         return OrderedDict([(k, dist.sample()) for k, dist in self.distributions.items()])
40 | 
41 |     def rsample(self):
42 |         return OrderedDict([(k, dist.rsample()) for k, dist in self.distributions.items()])
43 | 
44 |     def log_probs(self, x):
45 |         assert isinstance(x, dict)
46 |         return OrderedDict([(k, dist.log_probs(x[k])) for k, dist in self.distributions.items()])
47 | 
48 |     def entropy(self):
49 |         return sum([dist.entropy() for dist in self.distributions.values()])
50 | 
51 | 


--------------------------------------------------------------------------------
/panda/rl/base_agent.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | from rl.normalizer import Normalizer
 3 | 
 4 | 
 5 | class BaseAgent():
 6 |     def __init__(self, config, ob_space): # ob_space is an ordered dict where keys are ob names and values are dimensions
 7 |         self._config = config
 8 |         self._ob_norm = Normalizer(ob_space, default_clip_range=config.clip_range, clip_obs=config.clip_obs)
 9 | 
10 |     def normalize(self, ob):
11 |         if self._config.ob_norm:
12 |             return self._ob_norm.normalize(ob)
13 |         return ob
14 | 
15 |     def act(self, ob, is_train=True):
16 |         ob = self.normalize(ob)
17 |         ac, activation = self._actor.act(ob, is_train=is_train)
18 |         return ac, activation
19 | 
20 |     def update_normalizer(self, obs):
21 |         if self._config.ob_norm:
22 |             self._ob_norm.update(obs)
23 |             self._ob_norm.recompute_stats()
24 | 
25 |     def store_episode(self, rollouts):
26 |         raise NotImplementedError()
27 | 
28 |     def replay_buffer(self):
29 |         return self._buffer.state_dict() # gives episodes buffer; buffer is like a rollout but can store more values for multiple episodes
30 | 
31 |     def load_replay_buffer(self, state_dict):
32 |         self._buffer.load_state_dict(state_dict)
33 | 
34 |     def sync_networks(self):
35 |         raise NotImplementedError()
36 | 
37 |     def train(self):
38 |         raise NotImplementedError()
39 | 
40 |     def _soft_update_target_network(self, target, source, tau):
41 |         for target_param, source_param in zip(target.parameters(), source.parameters()):
42 |             target_param.data.copy_((1 - tau) * source_param.data + tau * target_param.data)
43 | 
44 |     def _copy_target_network(self, target, source):
45 |         for target_param, source_param in zip(target.parameters(), source.parameters()):
46 |             target_param.data.copy_(source_param.data)
47 | 


--------------------------------------------------------------------------------
/panda/models/task/grasping_task.py:
--------------------------------------------------------------------------------
 1 | from utils.mjcf_utils import new_joint, array_to_string
 2 | from models.task import Task
 3 | 
 4 | 
 5 | class GraspingTask(Task):
 6 |     """
 7 |     Creates MJCF model of a grasping task.
 8 | 
 9 |     A gear assembly task consists of a robot picking up a cube from a table. This class combines
10 |     the robot, the arena with table, and the objects into a single MJCF model.
11 |     """
12 | 
13 |     def __init__(self, mujoco_arena, mujoco_robot, mujoco_objects):
14 |         """
15 |         Args:
16 |             mujoco_arena: MJCF model of robot workspace
17 |             mujoco_robot: MJCF model of robot model
18 |             mujoco_objects: a list of MJCF models of physical objects
19 |         """
20 |         super().__init__()
21 | 
22 |         self.merge_arena(mujoco_arena)
23 |         self.merge_robot(mujoco_robot)
24 |         self.merge_objects(mujoco_objects)
25 | 
26 | 
27 |     def merge_robot(self, mujoco_robot):
28 |         """Adds robot model to the MJCF model."""
29 |         self.robot = mujoco_robot
30 |         self.merge(mujoco_robot)
31 | 
32 |     def merge_arena(self, mujoco_arena):
33 |         """Adds arena model to the MJCF model."""
34 |         self.arena = mujoco_arena
35 |         self.table_offset = mujoco_arena.table_top_abs
36 |         self.table_size = mujoco_arena.table_full_size
37 |         self.table_body = mujoco_arena.table_body
38 |         self.merge(mujoco_arena)
39 | 
40 |     def merge_objects(self, mujoco_objects):
41 |         """Adds physical objects to the MJCF model."""
42 |         self.mujoco_objects = mujoco_objects
43 |         self.objects = {}  # xml manifestation
44 |         self.max_horizontal_radius = 0
45 |         for obj_name, obj_mjcf in mujoco_objects.items():
46 |             self.merge_asset(obj_mjcf)
47 |             # Load object
48 |             obj = obj_mjcf.get_collision(name=obj_name, site=True)
49 |             obj.append(new_joint(name=obj_name, type="free", damping="0.0005"))
50 |             self.objects[obj_name] = obj
51 |             self.worldbody.append(obj)
52 | 


--------------------------------------------------------------------------------
/panda/environments/action_space.py:
--------------------------------------------------------------------------------
 1 | """ Define ActionSpace class to represent action space. """
 2 | 
 3 | from collections import OrderedDict
 4 | import numpy as np
 5 | from utils.logger import logger
 6 | 
 7 | 
 8 | class ActionSpace(object):
 9 |     """
10 |     Base class for action space
11 |     This action space is used in the provided RL training code.
12 |     """
13 | 
14 |     def __init__(self, size, minimum=-1., maximum=1.):
15 |         """
16 |         Loads a mujoco xml from file.
17 | 
18 |         Args:
19 |             size (int): action dimension.
20 |             min: minimum values for action.
21 |             max: maximum values for action.
22 |         """
23 |         self.size = size
24 |         self.shape = OrderedDict([('default', size)])
25 | 
26 |         self._minimum = np.array(minimum)
27 |         self._minimum.setflags(write=False)
28 | 
29 |         self._maximum = np.array(maximum)
30 |         self._maximum.setflags(write=False)
31 | 
32 |     @property
33 |     def minimum(self):
34 |         """
35 |         Returns the minimum values of the action.
36 |         """
37 |         return self._minimum
38 | 
39 |     @property
40 |     def maximum(self):
41 |         """
42 |         Returns the maximum values of the action.
43 |         """
44 |         return self._maximum
45 | 
46 |     def keys(self):
47 |         """
48 |         Returns the keys of the action space.
49 |         """
50 |         return self.shape.keys()
51 | 
52 |     def __repr__(self):
53 |         template = ('ActionSpace(shape={},''minimum={}, maximum={})')
54 |         return template.format(self.shape, self._minimum, self._maximum)
55 | 
56 |     def __eq__(self, other):
57 |         """
58 |         Returns whether other action space is the same or not.
59 |         """
60 |         if not isinstance(other, ActionSpace):
61 |             return False
62 |         return (self.minimum == other.minimum).all() and (self.maximum == other.maximum).all()
63 | 
64 |     def sample(self):
65 |         """
66 |         Returns a sample from the action space.
67 |         """
68 |         return np.random.uniform(low=self.minimum, high=self.maximum, size=self.size)
69 | 


--------------------------------------------------------------------------------
/panda/config/grasping.py:
--------------------------------------------------------------------------------
 1 | from utils import str2bool, str2intlist
 2 | 
 3 | 
 4 | 
 5 | def add_argument(parser):
 6 |     """
 7 |     Adds a list of arguments to argparser for the lift environment.
 8 |     """
 9 | 
10 |     # training scene
11 | 
12 |     parser.add_argument('--mode', type=int, default=1,
13 |                         help='1: nominal cube scene, 2: collision avoidance scene')
14 | 
15 | 
16 |     # mujoco simulation
17 | 
18 |     parser.add_argument('--table_full_size', type=float, default=(0.35, 0.46, 0.02),
19 |                         help='x, y, and z dimensions of the table')
20 |     parser.add_argument('--gripper_type', type=str, default='PandaGripper',
21 |                         help='Gripper type of robot')
22 |     parser.add_argument('--gripper_visualization', type=str2bool, default=True,
23 |                         help='using gripper visualization')
24 | 
25 |     # rendering
26 | 
27 |     parser.add_argument('--render_collision_mesh', type=str2bool, default=False,
28 |                         help='if rendering collision meshes in camera')
29 |     parser.add_argument('--render_visual_mesh', type=str2bool, default=True,
30 |                         help='if rendering visual meshes in camera')
31 | 
32 |     # episode settings
33 | 
34 |     parser.add_argument('--horizon', type=int, default=600,
35 |                         help='Every episode lasts for exactly @horizon timesteps')
36 |     parser.add_argument('--ignore_done', type=str2bool, default=True,
37 |                         help='if never terminating the environment (ignore @horizon)')
38 | 
39 |     # controller
40 | 
41 |     parser.add_argument('--control_freq', type=int, default= 250,
42 |                         help='control signals to receive in every simulated second, sets the amount of simulation time that passes between every action input')
43 | 
44 | 
45 | def get_default_config():
46 |     """
47 |     Gets default configurations for the lift environment.
48 |     """
49 |     import argparse
50 |     parser = argparse.ArgumentParser("Default Configuration for lift Environment")
51 |     add_argument(parser)
52 | 
53 |     config = parser.parse_args([])
54 |     return config
55 | 


--------------------------------------------------------------------------------
/panda/models/gripper/gripper.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Defines the base class of gripper
 3 | """
 4 | from models.base import MujocoXML
 5 | 
 6 | 
 7 | class Gripper(MujocoXML):
 8 |     """Base class for gripper"""
 9 | 
10 |     def __init__(self, fname):
11 |         super().__init__(fname)
12 | 
13 |     def format_action(self, action):
14 |         """
15 |         Given (-1,1) abstract control as np-array
16 |         returns the (-1,1) control signals
17 |         for underlying actuators as 1-d np array
18 |         """
19 |         raise NotImplementedError
20 | 
21 |     @property
22 |     def init_qpos(self):
23 |         """
24 |         Returns rest(open) qpos of the gripper
25 |         """
26 |         raise NotImplementedError
27 | 
28 |     @property
29 |     def dof(self):
30 |         """
31 |         Returns the number of DOF of the gripper
32 |         """
33 |         raise NotImplementedError
34 | 
35 |     @property
36 |     def joints(self):
37 |         """
38 |         Returns a list of joint names of the gripper
39 |         """
40 |         raise NotImplementedError
41 | 
42 |     def contact_geoms(self):
43 |         """
44 |         Returns a list of names corresponding to the geoms
45 |         used to determine contact with the gripper.
46 |         """
47 |         return []
48 | 
49 |     @property
50 |     def visualization_sites(self):
51 |         """
52 |         Returns a list of sites corresponding to the geoms
53 |         used to aid visualization by human.
54 |         (and should be hidden from robots)
55 |         """
56 |         return []
57 | 
58 |     @property
59 |     def left_finger_geoms(self):
60 |         """
61 |         Geoms corresponding to left finger of a gripper
62 |         """
63 |         raise NotImplementedError
64 | 
65 |     @property
66 |     def right_finger_geoms(self):
67 |         """
68 |         Geoms corresponding to raise finger of a gripper
69 |         """
70 |         raise NotImplementedError
71 | 
72 |     def hide_visualization(self):
73 |         """
74 |         Hides all visualization geoms and sites.
75 |         This should be called before rendering to agents
76 |         """
77 |         for site_name in self.visualization_sites:
78 |             site = self.worldbody.find(".//site[@name='{}']".format(site_name))
79 |             site.set("rgba", "0 0 0 0")
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Intelligent-Task-Learning
 2 | The Repository for the _Autonomous Robots 2022 Journal article:_
 3 | 
 4 | "[Continuous control actions learning and adaptation for robotic manipulation through reinforcement learning](https://link.springer.com/article/10.1007/s10514-022-10034-z)" 
 5 | 
 6 | This contains a python toolkit for learning a grasping task with Franka Emika Panda Robot. The robot can be trained to grasp the cube, avoid obstacles and learn to manage redundancy using modern Reinforcement Learning algorithms of [Proximal Policy Optimization (PPO)](https://arxiv.org/abs/1707.06347) and [Soft Actor-Critic (SAC)](https://arxiv.org/abs/1812.05905). It is powered by [MuJoCo physics engine](http://www.mujoco.org/) 
 7 | 
 8 | 
 9 | # How to cite
10 | ```
11 | @article{shahid2022continuous,
12 |   title={Continuous control actions learning and adaptation for robotic manipulation through reinforcement learning},
13 |   author={Shahid, Asad Ali and Piga, Dario and Braghin, Francesco and Roveda, Loris},
14 |   journal={Autonomous Robots},
15 |   pages={1--16},
16 |   year={2022},
17 |   publisher={Springer}
18 | }
19 | ```
20 | 
21 | # New Experiment (Dynamic Enviornment)
22 | The adapted policy can grasp the moving cube in 30 mints of retraining.
23 | ![](adapted_policy.gif)
24 | 
25 | Before, the base grasping policy trained on a static cube is not able to grasp the moving cube.
26 | ![](base_policy.gif)
27 | 
28 | # Simulation Video
29 | [![Video](https://img.youtube.com/vi/aX55Zc2XMTE/maxres3.jpg)](https://www.youtube.com/watch?v=aX55Zc2XMTE)
30 | 
31 | # Experimental validation
32 | [![Video](https://github.com/Asad-Shahid/Intelligent-Task-Learning/blob/master/exp_image.png)](https://drive.google.com/file/d/1zlS-_HIWMlIAvrxqGNGRyMbuDfQrws8z/view)
33 | 
34 | 
35 | ## Installation
36 | 
37 | To use this toolkit, it is required to first install [MuJoCo 200](https://www.roboti.us/index.html) and then [mujoco-py](https://github.com/openai/mujoco-py) from Open AI. mujoco-py allows using MuJoCo from python interface.
38 | The installation requires python 3.6 or higher. It is recommended to install all the required packages under a conda virtual environment
39 | 
40 | 
41 | ## References
42 | This toolit is mainly developed based on [Surreal Robotics Suite](https://github.com/StanfordVL/robosuite) and the Reinforcement learning part is referenced from
43 | [this repo](https://github.com/clvrai/furniture)
44 | 


--------------------------------------------------------------------------------
/panda/models/robot/panda_robot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from models.robot.robot import Robot
 3 | from utils.mjcf_utils import xml_path_completion, array_to_string
 4 | 
 5 | 
 6 | class Panda(Robot):
 7 |     """Panda is a sensitive single-arm robot designed by Franka."""
 8 | 
 9 |     def __init__(self, xml_path="robot/panda/robot_torque.xml"):
10 | 
11 |         super().__init__(xml_path_completion(xml_path))
12 | 
13 |         self.bottom_offset = np.array([0, 0, 0]) # ignore the bottom offset; directly define base_xpos in world frame
14 |         self.set_joint_damping()
15 |         self._init_qpos = np.array([0, np.pi / 16.0, 0.00, -np.pi / 2.0 - np.pi / 3.0, 0.00, np.pi - 0.2, -np.pi/4])
16 | 
17 |     def set_base_xpos(self, pos):
18 |         """Places the robot on position @pos."""
19 |         node = self.worldbody.find("./body[@name='link0']")
20 |         node.set("pos", array_to_string(pos - self.bottom_offset))
21 | 
22 |     def set_joint_damping(self, damping=np.array((0.1, 0.1, 0.1, 0.1, 0.1, 0.01, 0.01))):
23 |         """Set joint damping """
24 |         body = self._base_body
25 |         for i in range(len(self._link_body)):
26 |             body = body.find("./body[@name='{}']".format(self._link_body[i]))
27 |             joint = body.find("./joint[@name='{}']".format(self._joints[i]))
28 |             joint.set("damping", array_to_string(np.array([damping[i]])))
29 | 
30 |     def set_joint_frictionloss(self, friction=np.array((0.1, 0.1, 0.1, 0.1, 0.1, 0.01, 0.01))):
31 |         """Set joint friction loss (static friction)"""
32 |         body = self._base_body
33 |         for i in range(len(self._link_body)):
34 |             body = body.find("./body[@name='{}']".format(self._link_body[i]))
35 |             joint = body.find("./joint[@name='{}']".format(self._joints[i]))
36 |             joint.set("frictionloss", array_to_string(np.array([friction[i]])))
37 | 
38 |     @property
39 |     def dof(self):
40 |         return 7
41 | 
42 |     @property
43 |     def joints(self):
44 |         return ["joint{}".format(x) for x in range(1, 8)]
45 | 
46 |     @property
47 |     def init_qpos(self):
48 |         return self._init_qpos
49 | 
50 |     @property
51 |     def contact_geoms(self):
52 |         return ["link{}_collision".format(x) for x in range(1, 8)]
53 | 
54 |     @property
55 |     def _base_body(self):
56 |         node = self.worldbody.find("./body[@name='link0']")
57 |         return node
58 | 
59 |     @property
60 |     def _link_body(self):
61 |         return ["link1", "link2", "link3", "link4", "link5", "link6", "link7"]
62 | 
63 |     @property
64 |     def _joints(self):
65 |         return ["joint1", "joint2", "joint3", "joint4", "joint5", "joint6", "joint7"]
66 | 


--------------------------------------------------------------------------------
/panda/environment.yml:
--------------------------------------------------------------------------------
 1 | name: robo
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - _libgcc_mutex=0.1=main
 6 |   - _openmp_mutex=5.1=1_gnu
 7 |   - ca-certificates=2023.08.22=h06a4308_0
 8 |   - ld_impl_linux-64=2.38=h1181459_1
 9 |   - libffi=3.4.4=h6a678d5_0
10 |   - libgcc-ng=11.2.0=h1234567_1
11 |   - libgfortran-ng=7.5.0=ha8ba4b0_17
12 |   - libgfortran4=7.5.0=ha8ba4b0_17
13 |   - libgomp=11.2.0=h1234567_1
14 |   - libstdcxx-ng=11.2.0=h1234567_1
15 |   - mpi=1.0=mpich
16 |   - mpi4py=3.1.4=py39hfc96bbd_0
17 |   - mpich=3.3.2=hc856adb_0
18 |   - ncurses=6.4=h6a678d5_0
19 |   - openssl=3.0.10=h7f8727e_2
20 |   - pip=23.2.1=py39h06a4308_0
21 |   - python=3.9.18=h955ad1f_0
22 |   - readline=8.2=h5eee18b_0
23 |   - setuptools=68.0.0=py39h06a4308_0
24 |   - sqlite=3.41.2=h5eee18b_0
25 |   - tk=8.6.12=h1ccaba5_0
26 |   - tzdata=2023c=h04d1e81_0
27 |   - wheel=0.38.4=py39h06a4308_0
28 |   - xz=5.4.2=h5eee18b_0
29 |   - zlib=1.2.13=h5eee18b_0
30 |   - pip:
31 |       - appdirs==1.4.4
32 |       - certifi==2023.7.22
33 |       - cffi==1.15.1
34 |       - charset-normalizer==3.2.0
35 |       - click==8.1.7
36 |       - cmake==3.27.4.1
37 |       - colorlog==6.7.0
38 |       - cython==0.29.36
39 |       - docker-pycreds==0.4.0
40 |       - fasteners==0.18
41 |       - filelock==3.12.4
42 |       - gitdb==4.0.10
43 |       - gitpython==3.1.36
44 |       - glfw==2.6.2
45 |       - h5py==3.9.0
46 |       - hjson==3.1.0
47 |       - idna==3.4
48 |       - imageio==2.31.3
49 |       - jinja2==3.1.2
50 |       - lit==16.0.6
51 |       - markupsafe==2.1.3
52 |       - mpmath==1.3.0
53 |       - mujoco-py==2.1.2.14
54 |       - networkx==3.1
55 |       - numpy==1.25.2
56 |       - nvidia-cublas-cu11==11.10.3.66
57 |       - nvidia-cuda-cupti-cu11==11.7.101
58 |       - nvidia-cuda-nvrtc-cu11==11.7.99
59 |       - nvidia-cuda-runtime-cu11==11.7.99
60 |       - nvidia-cudnn-cu11==8.5.0.96
61 |       - nvidia-cufft-cu11==10.9.0.58
62 |       - nvidia-curand-cu11==10.2.10.91
63 |       - nvidia-cusolver-cu11==11.4.0.1
64 |       - nvidia-cusparse-cu11==11.7.4.91
65 |       - nvidia-nccl-cu11==2.14.3
66 |       - nvidia-nvtx-cu11==11.7.91
67 |       - pathtools==0.1.2
68 |       - pillow==10.0.0
69 |       - protobuf==4.24.3
70 |       - psutil==5.9.5
71 |       - pycparser==2.21
72 |       - pyyaml==6.0.1
73 |       - requests==2.31.0
74 |       - scipy==1.11.2
75 |       - sentry-sdk==1.31.0
76 |       - setproctitle==1.3.2
77 |       - six==1.16.0
78 |       - smmap==5.0.0
79 |       - sympy==1.12
80 |       - torch==2.0.1
81 |       - torchvision==0.15.2
82 |       - tqdm==4.66.1
83 |       - triton==2.0.0
84 |       - typing-extensions==4.7.1
85 |       - urllib3==2.0.4
86 |       - wandb==0.15.10
87 | prefix: /home/asad/miniconda3/envs/robo
88 | 


--------------------------------------------------------------------------------
/panda/models/assets/arena/table_arena.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="table_arena">
 2 |   <asset>
 3 |     <texture builtin="gradient" height="256" rgb1=".9 .9 1." rgb2=".2 .3 .4" type="skybox" width="256"/>
 4 |     <texture builtin="checker" height="512" name="texplane" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2" type="2d" width="512"/>
 5 |     <texture file="../textures/light-wood.png" type="2d" name="tex-light-wood"/>
 6 |     <texture file="../textures/metal.png" type="2d" name="tex-dark-wood"/>
 7 |     <material name="light-wood" reflectance="0.5" texrepeat="15 15" texture="tex-light-wood" texuniform="true"/>
 8 |     <material name="metal" reflectance="0.5" texrepeat="5 5" texture="tex-dark-wood" texuniform="true"/>
 9 |     <material name="floorplane" reflectance="0.5" texrepeat="1 1" texture="texplane" texuniform="true"/>
10 |   </asset>
11 |   <worldbody>
12 |     <geom condim="3" material="floorplane" name="floor" pos="0 0 0" size="3 3 .125" type="plane"/>
13 |     <body name="table" pos="0 0 0.8">
14 |       <geom pos="0 0 0" size="0.35 0.6 0.02" type="box" friction="1 0.005 0.0001" name="table_collision"/>
15 |       <geom pos="0 0 0" material="light-wood" size="0.35 0.6 0.02" type="box" conaffinity="0" contype="0" group="1" name="table_view"/>
16 |     </body>
17 |     <body name="table_legs" pos="0 0 0">
18 |       <geom pos="-0.26 0.38 0.4" size="0.03 0.4" type="cylinder" friction="1 0.005 0.0001" name="leg1_collision"/>
19 |       <geom pos="-0.26 0.38 0.4" material="metal" size="0.03 0.4" type="cylinder" conaffinity="0" contype="0" group="1" name="leg1"/>
20 |       <geom pos="0.26 0.38 0.4" size="0.03 0.4" type="cylinder" friction="1 0.005 0.0001" name="leg2_collision"/>
21 |       <geom pos="0.26 0.38 0.4" material="metal" size="0.03 0.4" type="cylinder" conaffinity="0" contype="0" group="1" name="leg2"/>
22 |       <geom pos="-0.26 -0.38 0.4" size="0.03 0.4" type="cylinder" friction="1 0.005 0.0001" name="leg3_collision"/>
23 |       <geom pos="-0.26 -0.38 0.4" material="metal" size="0.03 0.4" type="cylinder" conaffinity="0" contype="0" group="1" name="leg3"/>
24 |       <geom pos="0.26 -0.38 0.4" size="0.03 0.4" type="cylinder" friction="1 0.005 0.0001" name="leg4_collision"/>
25 |       <geom pos="0.26 -0.38 0.4" material="metal" size="0.03 0.4" type="cylinder" conaffinity="0" contype="0" group="1" name="leg4"/>
26 |     </body>
27 |     <light diffuse=".8 .8 .8" dir="0 -.15 -1" directional="false" pos="1 1 4.0" specular="0.3 0.3 0.3" castshadow="false"/>
28 |     <light diffuse=".8 .8 .8" dir="0 -.15 -1" directional="false" pos="-3. -3. 4.0" specular="0.3 0.3 0.3" castshadow="false"/>
29 |     <!-- front view -->
30 |     <camera mode="fixed" name="frontview" pos="1.6 0.15 1.45" quat="0.56 0.43 0.43 0.56"/>
31 |     <!-- bird view -->
32 |     <camera mode="fixed" name="birdview" pos="-0.2 0.15 3.0" quat="0.7071 0 0 0.7071"/>
33 |     <!-- agent view -->
34 |     <camera mode="fixed" name="agentview" pos="0.8 0.15 1.65" quat="0.653 0.271 0.271 0.653"/>
35 |   </worldbody>
36 | </mujoco>
37 | 


--------------------------------------------------------------------------------
/panda/rl/main.py:
--------------------------------------------------------------------------------
 1 | """ Launch RL training and evaluation. """
 2 | 
 3 | import sys
 4 | import signal
 5 | import os
 6 | import json
 7 | import numpy as np
 8 | import torch
 9 | from mpi4py import MPI
10 | from config import argparser
11 | from rl.trainer import Trainer
12 | from utils.logger import logger
13 | 
14 | 
15 | np.set_printoptions(precision=3)
16 | np.set_printoptions(suppress=True)
17 | 
18 | 
19 | def run(config):
20 |     """
21 |     Runs Trainer.
22 |     """
23 |     # for parallel workers training (distributed training)
24 |     rank = MPI.COMM_WORLD.Get_rank() # Each process is assigned a rank that is unique within Communicator(group of processes)
25 |     config.rank = rank
26 |     config.is_chef = rank == 0 # Binary
27 |     config.seed = config.seed + rank
28 |     config.num_workers = MPI.COMM_WORLD.Get_size() # total no. of processes is a size of communicator
29 | 
30 |     if config.is_chef:
31 |         logger.warn('Run a base worker.')
32 |         make_log_files(config)
33 |     else:
34 |         logger.warn('Run worker %d and disable logger.', config.rank)
35 |         import logging
36 |         logger.setLevel(logging.CRITICAL)
37 | 
38 |     def shutdown(signal, frame):
39 |         logger.warn('Received signal %s: exiting', signal)
40 |         sys.exit(128+signal)
41 | 
42 |     signal.signal(signal.SIGHUP, shutdown)
43 |     signal.signal(signal.SIGINT, shutdown)
44 |     signal.signal(signal.SIGTERM, shutdown)
45 | 
46 |     # set global seed
47 |     np.random.seed(config.seed)
48 |     torch.manual_seed(config.seed)
49 |     torch.cuda.manual_seed_all(config.seed)
50 | 
51 |     # set the display no. configured with gpu
52 |     os.environ["DISPLAY"] = ":0"
53 |     # use gpu or cpu
54 |     if config.gpu is not None:
55 |         os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(config.gpu)
56 |         assert torch.cuda.is_available()
57 |         config.device = torch.device("cuda")
58 |     else:
59 |         config.device = torch.device("cpu")
60 | 
61 |     # build a trainer
62 |     trainer = Trainer(config)
63 |     if config.is_train:
64 |         trainer.train()
65 |         logger.info("Finish training")
66 |     else:
67 |         trainer.evaluate()
68 |         logger.info("Finish evaluating")
69 | 
70 | 
71 | def make_log_files(config):
72 |     """
73 |     Sets up log directories and saves git diff and command line.
74 |     """
75 |     config.run_name = '{}.{}.{}'.format(config.prefix, config.seed, config.suffix)
76 | 
77 |     config.log_dir = os.path.join(config.log_root_dir, config.run_name)
78 |     logger.info('Create log directory: %s', config.log_dir)
79 |     os.makedirs(config.log_dir, exist_ok=True)
80 | 
81 |     if config.is_train:
82 |         # log config
83 |         param_path = os.path.join(config.log_dir, 'params.json')
84 |         logger.info('Store parameters in %s', param_path)
85 |         with open(param_path, 'w') as fp:
86 |             json.dump(config.__dict__, fp, indent=4, sort_keys=True)
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     args, unparsed = argparser()
91 |     if len(unparsed):
92 |         logger.error('Unparsed argument is detected:\n%s', unparsed)
93 |     else:
94 |         run(args)
95 | 


--------------------------------------------------------------------------------
/panda/rl/policies/mlp_actor_critic.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | import torch
 3 | import torch.nn as nn
 4 | import numpy as np
 5 | from rl.policies.utils import MLP
 6 | from rl.policies.actor_critic import Actor, Critic
 7 | 
 8 | 
 9 | class MlpActor(Actor):
10 |     def __init__(self, config, ob_space, ac_space, tanh_policy):
11 |         super().__init__(config, ob_space, ac_space, tanh_policy)
12 | 
13 |         self._ac_space = ac_space # e.g. ActionSpace(shape=OrderedDict([('default', 8)]),minimum=-1.0, maximum=1.0)
14 | 
15 |         # observation # e.g. OrderedDict([('object-state', [10]), ('robot-state', [36])])
16 |         input_dim = sum([np.prod(x) for x in ob_space.values()]) # [[226,226,3],[36],[10]] (226*226*3)+36+10
17 | 
18 |         # build sequential layers of neural network
19 |         self.fc = MLP(config, input_dim, config.rl_hid_size, [config.rl_hid_size]) # inp= 46, output= 64
20 |         self.fc_means = nn.ModuleDict()
21 |         self.fc_log_stds = nn.ModuleDict()
22 | 
23 |         for k, size in ac_space.shape.items(): # shape=OrderedDict([('default', 8)])
24 |             self.fc_means.update({k: MLP(config, config.rl_hid_size, size)}) # MLP here defines the out_layer of nn
25 |             self.fc_log_stds.update({k: MLP(config, config.rl_hid_size, size)})
26 | 
27 |     def forward(self, ob): # extracts input values from ob odict and passes it to nn
28 |         inp = list(ob.values()) # [tensor([-0.007,.....,-0.51]), tensor([-0.08,.....,0.08])]
29 |         if len(inp[0].shape) == 1: #[36]
30 |             inp = [x.unsqueeze(0) for x in inp] # change the tensor shape from e.g. [36] to [1, 36]
31 | 
32 |         # concatenates the tensor to [1, 46], passes to model and activation_fn; out is a tensor of shape (1, hid_size)
33 |         out = self._activation_fn(self.fc(torch.cat(inp, dim=-1)))
34 |         out = torch.reshape(out, (out.shape[0], -1)) # [1, 64]
35 | 
36 |         means, stds = OrderedDict(), OrderedDict()
37 |         for k in self._ac_space.keys():
38 |             mean = self.fc_means[k](out) # passes previous layers output to last layer to produce action tensor [1,8]
39 |             log_std = self.fc_log_stds[k](out) # passes previous layers output to last layer for s.d. tensor
40 |             log_std = torch.clamp(log_std, -10, 2) # clips between min and max
41 |             std = torch.exp(log_std.double()) # exponential on s.d.
42 |             means[k] = mean
43 |             stds[k] = std
44 | 
45 |         return means, stds
46 | 
47 | 
48 | class MlpCritic(Critic):
49 |     def __init__(self, config, ob_space, ac_space=None):
50 |         super().__init__(config)
51 | 
52 |         input_dim = sum([np.prod(x) for x in ob_space.values()])
53 |         if ac_space is not None:
54 |             input_dim += ac_space.size
55 | 
56 |         self.fc = MLP(config, input_dim, 1, [config.rl_hid_size] * 2)
57 | 
58 |     def forward(self, ob, ac=None):
59 |         inp = list(ob.values())
60 |         if len(inp[0].shape) == 1:
61 |             inp = [x.unsqueeze(0) for x in inp]
62 | 
63 |         if ac is not None:
64 |             ac = list(ac.values())
65 |             if len(ac[0].shape) == 1:
66 |                 ac = [x.unsqueeze(0) for x in ac]
67 |             inp.extend(ac)
68 | 
69 |         out = self.fc(torch.cat(inp, dim=-1))
70 |         out = torch.reshape(out, (out.shape[0], 1))
71 | 
72 |         return out
73 | 


--------------------------------------------------------------------------------
/panda/rl/dataset.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from time import time
 3 | import numpy as np
 4 | 
 5 | class ReplayBuffer:
 6 |     def __init__(self, keys, buffer_size, sample_func):
 7 |         self._size = buffer_size # buffer size in config
 8 | 
 9 |         # memory management
10 |         self._idx = 0
11 |         self._current_size = 0
12 |         self._sample_func = sample_func
13 | 
14 |         # create the buffer to store info
15 |         self._keys = keys
16 |         self._buffers = defaultdict(list)
17 | 
18 |     def clear(self):
19 |         self._idx = 0
20 |         self._current_size = 0
21 |         self._buffers = defaultdict(list)
22 | 
23 |     # store the episode
24 |     def store_episode(self, rollout): # stores an episode given a rollout; calling each time adds a new episode
25 |         idx = self._idx = (self._idx + 1) % self._size
26 |         self._current_size += 1
27 | 
28 |         if self._current_size > self._size:
29 |             for k in self._keys:
30 |                 self._buffers[k][idx] = rollout[k]
31 |         else:
32 |             for k in self._keys:
33 |                 self._buffers[k].append(rollout[k])
34 | 
35 |     # sample the data from the replay buffer
36 |     def sample(self, batch_size):
37 |         # sample transitions
38 |         transitions = self._sample_func(self._buffers, batch_size) # buffers contains rollout(s), batch_size is in config
39 |         return transitions
40 | 
41 |     def state_dict(self):
42 |         return self._buffers
43 | 
44 |     def load_state_dict(self, state_dict):
45 |         self._buffers = state_dict
46 |         self._current_size = len(self._buffers['ac']) # no. of episodes in buffer
47 | 
48 | 
49 | class RandomSampler:
50 |     def sample_func(self, episode_batch, batch_size_in_transitions): # episode_batch is buffers
51 |         rollout_batch_size = len(episode_batch['ac']) # no. of episodes in buffer
52 |         batch_size = batch_size_in_transitions
53 | 
54 |         episode_idxs = np.random.randint(0, rollout_batch_size, batch_size) #  selects a list of episode idxs i.e. [2,0,4,0...], length equal to batch_size
55 |         t_samples = [np.random.randint(len(episode_batch['ac'][episode_idx])) for episode_idx in episode_idxs] # [135,78,54,180,...] list
56 | 
57 |         transitions = {}
58 |         for key in episode_batch.keys(): # selects transitions experiences corresponding to sampled timesteps from episode(s)
59 |             # values are lists {'ob': [], 'ac': [], 'done': []}; len of each list is equal to batch_size
60 |             transitions[key] = [episode_batch[key][episode_idx][t] for episode_idx, t in zip(episode_idxs, t_samples)]
61 | 
62 |         # selects next observations corresponding to sampled transitions from an episode(s)
63 |         transitions['ob_next'] = [episode_batch['ob'][episode_idx][t + 1] for episode_idx, t in zip(episode_idxs, t_samples)]
64 | 
65 |         # join sequence of array values in transitions; ob values is a dict {'robot-state': array(), 'object-state': array()}, others are array
66 |         new_transitions = {}
67 |         for k, v in transitions.items():
68 |             if isinstance(v[0], dict):
69 |                 sub_keys = v[0].keys()
70 |                 new_transitions[k] = {sub_key: np.stack([v_[sub_key] for v_ in v]) for sub_key in sub_keys}
71 |             else:
72 |                 new_transitions[k] = np.stack(v)
73 |         return new_transitions
74 | 


--------------------------------------------------------------------------------
/panda/utils/mujoco_py_renderer.py:
--------------------------------------------------------------------------------
 1 | from mujoco_py import MjViewer
 2 | from mujoco_py.generated import const
 3 | import glfw
 4 | from collections import defaultdict
 5 | 
 6 | 
 7 | class CustomMjViewer(MjViewer):
 8 | 
 9 |     keypress = defaultdict(list)
10 |     keyup = defaultdict(list)
11 |     keyrepeat = defaultdict(list)
12 | 
13 |     def key_callback(self, window, key, scancode, action, mods):
14 |         if action == glfw.PRESS:
15 |             tgt = self.keypress
16 |         elif action == glfw.RELEASE:
17 |             tgt = self.keyup
18 |         elif action == glfw.REPEAT:
19 |             tgt = self.keyrepeat
20 |         else:
21 |             return
22 |         if tgt.get(key):
23 |             for fn in tgt[key]:
24 |                 fn(window, key, scancode, action, mods)
25 |         if tgt.get("any"):
26 |             for fn in tgt["any"]:
27 |                 fn(window, key, scancode, action, mods)
28 |             # retain functionality for closing the viewer
29 |             if key == glfw.KEY_ESCAPE:
30 |                 super().key_callback(window, key, scancode, action, mods)
31 |         else:
32 |             # only use default mujoco callbacks if "any" callbacks are unset
33 |             super().key_callback(window, key, scancode, action, mods)
34 | 
35 | 
36 | class MujocoPyRenderer:
37 |     def __init__(self, sim):
38 |         """
39 |         Args:
40 |             sim: MjSim object
41 |         """
42 | 
43 |         self.viewer = CustomMjViewer(sim)
44 |         self.viewer.cam.fixedcamid = 6
45 |         self.callbacks = {}
46 | 
47 | 
48 |     def set_camera(self, camera_id):
49 |         """
50 |         Set the camera view to the specified camera ID.
51 |         """
52 |         self.viewer.cam.fixedcamid = camera_id
53 |         self.viewer.cam.type = const.CAMERA_FIXED
54 | 
55 |     def render(self):
56 |         # safe for multiple calls
57 |         self.viewer.render()
58 | 
59 |     def close(self):
60 |         """
61 |         Destroys the open window and renders (pun intended) the viewer useless.
62 |         """
63 |         glfw.destroy_window(self.viewer.window)
64 |         self.viewer = None
65 | 
66 |     def add_keypress_callback(self, key, fn):
67 |         """
68 |         Allows for custom callback functions for the viewer. Called on key down.
69 |         Parameter 'any' will ensure that the callback is called on any key down,
70 |         and block default mujoco viewer callbacks from executing, except for
71 |         the ESC callback to close the viewer.
72 |         """
73 |         self.viewer.keypress[key].append(fn)
74 | 
75 |     def add_keyup_callback(self, key, fn):
76 |         """
77 |         Allows for custom callback functions for the viewer. Called on key up.
78 |         Parameter 'any' will ensure that the callback is called on any key up,
79 |         and block default mujoco viewer callbacks from executing, except for
80 |         the ESC callback to close the viewer.
81 |         """
82 |         self.viewer.keyup[key].append(fn)
83 | 
84 |     def add_keyrepeat_callback(self, key, fn):
85 |         """
86 |         Allows for custom callback functions for the viewer. Called on key repeat.
87 |         Parameter 'any' will ensure that the callback is called on any key repeat,
88 |         and block default mujoco viewer callbacks from executing, except for
89 |         the ESC callback to close the viewer.
90 |         """
91 |         self.viewer.keyrepeat[key].append(fn)
92 | 


--------------------------------------------------------------------------------
/panda/config/__init__.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from utils import str2bool, str2list
 4 | 
 5 | 
 6 | def argparser():
 7 |     parser = argparse.ArgumentParser('Grasping Environment', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 8 | 
 9 |     # Grasping
10 |     import config.grasping as grasp
11 |     grasp.add_argument(parser)
12 | 
13 |     # training algorithm
14 |     parser.add_argument('--algo', type=str, default='ppo',
15 |                         choices=['sac', 'ppo'])
16 | 
17 |     # vanilla rl
18 |     parser.add_argument('--rl_hid_size', type=int, default= 64)
19 |     parser.add_argument('--rl_activation', type=str, default='relu',
20 |                         choices=['relu', 'elu', 'tanh'])
21 |     parser.add_argument('--tanh_policy', type=str2bool, default=True)
22 | 
23 |     # observation normalization
24 |     parser.add_argument('--ob_norm', type=str2bool, default=True)
25 |     parser.add_argument('--clip_obs', type=float, default=200, help='the clip range of observation')
26 |     parser.add_argument('--clip_range', type=float, default=5, help='the clip range after normalization of observation')
27 | 
28 |     # off-policy rl
29 |     parser.add_argument('--buffer_size', type=int, default=int(1e5), help='the size of the buffer')
30 |     parser.add_argument('--discount_factor', type=float, default=0.99, help='the discount factor')
31 |     parser.add_argument('--lr_actor', type=float, default=3e-4, help='the learning rate of the actor')
32 |     parser.add_argument('--lr_critic', type=float, default=3e-4, help='the learning rate of the critic')
33 |     parser.add_argument('--polyak', type=float, default=0.995, help='the average coefficient')
34 | 
35 |     # training
36 |     parser.add_argument('--is_train', type=str2bool, default=False)
37 |     parser.add_argument('--num_batches', type=int, default=60, help='the times to update the network per epoch')
38 |     parser.add_argument('--batch_size', type=int, default=512, help='the sample batch size')
39 |     parser.add_argument('--max_grad_norm', type=float, default=100)
40 |     parser.add_argument('--max_global_step', type=int, default=int(1e7))
41 |     parser.add_argument('--gpu', type=int, default=0)
42 | 
43 |     # ppo
44 |     parser.add_argument('--clip_param', type=float, default=0.2)
45 |     parser.add_argument('--value_loss_coeff', type=float, default=0.5)
46 |     parser.add_argument('--action_loss_coeff', type=float, default=1e-2)
47 |     parser.add_argument('--entropy_loss_coeff', type=float, default=1e-4)
48 |     parser.add_argument('--rollout_length', type=int, default=1800)
49 |     parser.add_argument('--gae_lambda', type=float, default=0.95)
50 | 
51 |     # sac
52 |     parser.add_argument('--reward_scale', type=float, default=1.0, help='reward scale')
53 | 
54 |     # log
55 |     parser.add_argument('--log_interval', type=int, default=1)
56 |     parser.add_argument('--evaluate_interval', type=int, default=10)
57 |     parser.add_argument('--ckpt_interval', type=int, default=200)
58 |     parser.add_argument('--log_root_dir', type=str, default='log')
59 |     parser.add_argument('--wandb', type=str2bool, default=True,
60 |                         help='set it True if you want to use wandb')
61 | 
62 |     # evaluation
63 |     parser.add_argument('--ckpt_num', type=int, default=None)
64 |     parser.add_argument('--num_eval', type=int, default=10)
65 |     parser.add_argument('--num_record_samples', type=int, default=1, help='number of trajectories to collect during eval')
66 | 
67 | 
68 |     # misc
69 |     parser.add_argument('--prefix', type=str, default='rl')
70 |     parser.add_argument('--suffix', type=str, default='')
71 |     parser.add_argument('--notes', type=str, default='')
72 |     parser.add_argument('--seed', type=int, default=111, help='Random seed')
73 | 
74 |     args, unparsed = parser.parse_known_args()
75 | 
76 |     return args, unparsed
77 | 


--------------------------------------------------------------------------------
/panda/rl/rollouts.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Runs rollouts (RolloutRunner class) and collects transitions using Rollout class.
  3 | """
  4 | 
  5 | from collections import defaultdict
  6 | import numpy as np
  7 | 
  8 | 
  9 | class Rollout():
 10 |     """
 11 |     Rollout storing an episode.(Stores data for all timesteps in a full episode)
 12 |     """
 13 | 
 14 |     def __init__(self):
 15 |         """ Initialize buffer. """
 16 |         self._history = defaultdict(list)
 17 | 
 18 |     def add(self, data):
 19 |         """ Add a transition @data to rollout buffer. """
 20 |         for key, value in data.items():
 21 |             self._history[key].append(value)
 22 | 
 23 |     def get(self):
 24 |         """ Returns rollout buffer and clears buffer. """
 25 |         batch = {}
 26 |         batch['ob'] = self._history['ob']
 27 |         batch['ac'] = self._history['ac']
 28 |         batch['ac_before_activation'] = self._history['ac_before_activation']
 29 |         batch['done'] = self._history['done']
 30 |         batch['rew'] = self._history['rew']
 31 |         self._history = defaultdict(list)
 32 |         return batch
 33 | 
 34 | class RolloutRunner():
 35 |     """
 36 |     Run rollout given environment and policy.
 37 |     """
 38 | 
 39 |     def __init__(self, config, env, pi):
 40 |         """
 41 |         Args:
 42 |             config: configurations for the environment.
 43 |             env: environment.
 44 |             pi: policy.
 45 |         """
 46 | 
 47 |         self._config = config
 48 |         self._env = env
 49 |         self._pi = pi
 50 | 
 51 |     def run_episode(self, max_step=600, is_train=True):
 52 |         """
 53 |         Runs one episode and returns the rollout.
 54 | 
 55 |         Args:
 56 |             max_step: maximum number of steps of the rollout.
 57 |             is_train: whether rollout is for training or evaluation.
 58 |         """
 59 |         config = self._config
 60 |         device = config.device
 61 |         env = self._env
 62 |         pi = self._pi
 63 | 
 64 |         # initialize rollout buffer
 65 |         rollout = Rollout()
 66 |         reward_info = defaultdict(list)
 67 |         acs = []
 68 |         done = False
 69 |         ep_len = 0
 70 |         ep_rew = 0
 71 |         ob = self._env.reset()
 72 | 
 73 |         # buffer to save qpos
 74 |         saved_qpos = []
 75 | 
 76 |         # run rollout
 77 |         while not done and ep_len < max_step:
 78 |             # sample action from policy
 79 |             ac, ac_before_activation = pi.act(ob, is_train=is_train)
 80 | 
 81 |             rollout.add({'ob': ob, 'ac': ac, 'ac_before_activation': ac_before_activation})
 82 |             # joints positions
 83 |             saved_qpos.append(env.sim.get_state().qpos[:9].copy())
 84 | 
 85 |             # take a step
 86 |             ob, reward, done, info = env.step(ac) # e.g. info = {'episode_success': 0}
 87 |             env.render()
 88 |             rollout.add({'done': done, 'rew': reward})
 89 |             acs.append(ac)
 90 |             ep_len += 1
 91 |             ep_rew += reward
 92 | 
 93 |             for key, value in info.items():
 94 |                 reward_info[key].append(value)
 95 | 
 96 |         # last frame
 97 |         rollout.add({'ob': ob})
 98 |         saved_qpos.append(env.sim.get_state().qpos[:9].copy())
 99 | 
100 |         # compute average/sum of information
101 |         ep_info = {'len': ep_len, 'rew': ep_rew, 'saved_qpos': saved_qpos} # qpos contains values for all timesteps, len and rew contain only sum
102 |         for key, value in reward_info.items():
103 |             if isinstance(value[0], (int, float, bool)):
104 |                 if '_mean' in key:
105 |                     ep_info[key] = np.mean(value)
106 |                 else:
107 |                     ep_info[key] = np.sum(value)
108 | 
109 | 
110 |         return rollout.get(), ep_info
111 | 


--------------------------------------------------------------------------------
/panda/models/objects/objects.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import xml.etree.ElementTree as ET
  3 | import numpy as np
  4 | from models.base import MujocoXML
  5 | from utils.mjcf_utils import string_to_array, array_to_string
  6 | 
  7 | 
  8 | class MujocoObject:
  9 |     """
 10 |     Base class for all objects.
 11 | 
 12 |     We use Mujoco Objects to implement all objects that
 13 |         1) may appear for multiple times in a task
 14 |         2) can be swapped between different tasks
 15 | 
 16 |     Typical methods return copy so the caller can all joints/attributes as wanted
 17 | 
 18 |     Attributes:
 19 |         asset (TYPE): Description
 20 |     """
 21 | 
 22 |     def __init__(self):
 23 |         self.asset = ET.Element("asset")
 24 | 
 25 |     def get_horizontal_radius(self):
 26 |         """
 27 |         Returns scalar
 28 |         If object a,b has horizontal distance d
 29 |         a.get_horizontal_radius() + b.get_horizontal_radius() < d
 30 |         should mean that a, b has no contact
 31 | 
 32 |         Helps us put objects programmatically without them flying away due to
 33 |         a huge initial contact force
 34 | 
 35 |         Returns:
 36 |             Float: radius
 37 | 
 38 |         Raises:
 39 |             NotImplementedError: Description
 40 |         """
 41 |         raise NotImplementedError
 42 |         # return 2
 43 | 
 44 |     def get_collision(self, name=None, site=False):
 45 |         """
 46 |         Returns a ET.Element
 47 |         It is a <body/> subtree that defines all collision related fields
 48 |         of this object.
 49 | 
 50 |         Return is a copy
 51 | 
 52 |         Args:
 53 |             name (None, optional): Assign name to body
 54 |             site (False, optional): Add a site (with name @name
 55 |                  when applicable) to the returned body
 56 | 
 57 |         Returns:
 58 |             ET.Element: body
 59 | 
 60 |         Raises:
 61 |             NotImplementedError: Description
 62 |         """
 63 |         raise NotImplementedError
 64 | 
 65 |     def get_site_attrib_template(self):
 66 |         """
 67 |         Returns attribs of spherical site used to mark body origin
 68 | 
 69 |         Returns:
 70 |             Dictionary of default site attributes
 71 |         """
 72 |         return {
 73 |             "pos": "0 0 0",
 74 |             "size": "0.006 0.006 0.006",
 75 |             "rgba": "1 0 0 1",
 76 |             "type": "sphere",
 77 |         }
 78 | 
 79 | 
 80 | class MujocoXMLObject(MujocoXML, MujocoObject):
 81 |     """
 82 |     MujocoObjects that are loaded from xml files
 83 |     """
 84 | 
 85 |     def __init__(self, fname):
 86 |         """
 87 |         Args:
 88 |             fname (TYPE): XML File path
 89 |         """
 90 |         MujocoXML.__init__(self, fname)
 91 | 
 92 |     def get_horizontal_radius(self):
 93 |         horizontal_radius_site = self.worldbody.find("./body/site[@name='horizontal_radius_site']")
 94 |         return string_to_array(horizontal_radius_site.get("pos"))[0]
 95 | 
 96 |     def get_collision(self, name=None, site=False):
 97 | 
 98 |         collision = copy.deepcopy(self.worldbody.find("./body/body[@name='collision']"))
 99 |         collision.attrib.pop("name")
100 |         if name is not None:
101 |             collision.attrib["name"] = name
102 |             geoms = collision.findall("geom")
103 |             if len(geoms) == 1:
104 |                 geoms[0].set("name", name)
105 |             else:
106 |                 for i in range(len(geoms)):
107 |                     geoms[i].set("name", "{}-{}".format(name, i))
108 |         if site:
109 |             # add a site as well
110 |             template = self.get_site_attrib_template()
111 |             template["rgba"] = "1 1 0 1"
112 |             if name is not None:
113 |                 template["name"] = name
114 |             collision.append(ET.Element("site", attrib=template))
115 |         return collision
116 | 


--------------------------------------------------------------------------------
/panda/models/assets/gripper/panda_gripper.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="panda_hand">
 2 |     <size njmax="500" nconmax="100" />
 3 |     <asset>
 4 |         <mesh name="hand" file="meshes/panda_gripper/hand.stl" />
 5 |         <mesh name="hand_vis" file="meshes/panda_gripper/hand_vis.stl" />
 6 |         <mesh name="finger" file="meshes/panda_gripper/finger.stl" />
 7 |         <mesh name="finger_vis" file="meshes/panda_gripper/finger_vis.stl" />
 8 |         <mesh name="finger_vis2" file="meshes/panda_gripper/finger_longer.stl" />
 9 |     </asset>
10 | 
11 |     <actuator>
12 |         <position ctrllimited="true" ctrlrange="0.00 0.04" joint="finger_joint1" kp="10000" name="gripper_joint1" forcelimited="true" forcerange="-20 20"/>
13 |         <position ctrllimited="true" ctrlrange="-0.04 -0.00" joint="finger_joint2" kp="10000" name="gripper_joint2" forcelimited="true" forcerange="-20 20"/>
14 |     </actuator>
15 | 
16 |     <worldbody>
17 |       <body name="gripper" pos="0 0 0">
18 | 
19 |             <site name="ft_frame" pos="0 0 0" size="0.01 0.01 0.01" rgba="1 0 0 1" type="sphere" group="1"></site>
20 | 
21 |             <inertial pos="0 0 0.17" quat="0.707107 0.707107 0 0" mass="0.0" diaginertia="0.09 0.07 0.05" />
22 |             <geom pos="0 0 0." quat="0.707107 0 0 0.707" type="mesh" contype="0" conaffinity="0" group="1" mesh="hand_vis" name="hand_visual" rgba="1 1 1 1"/>
23 |             <geom pos="0 0 0." quat="0.707107 0 0 0.707" type="mesh" mesh="hand"  group="0" name="hand_collision"/>
24 | 
25 |             <!-- This site was added for visualization: Its a green circle that we can use to reference objects. -->
26 |             <site name="grip_site" pos="0 0 0.1" size="0.01 0.01 0.01" rgba="1 0 0 1" type="sphere" group="1"/>
27 | 
28 |             <body name="leftfinger" pos="0 0 0.0524" quat="0.707107 0 0 0.70">
29 |                 <inertial pos="0 0 0.05" mass="0.1" diaginertia="0.01 0.01 0.005" />
30 |                 <joint name="finger_joint1" pos="0 0 0" axis="0 1 0" type="slide" limited="true" range="0.00 0.04" damping="100"/>
31 |                 <geom quat="0.707107 0.707107 0 0" type="mesh" contype="0" conaffinity="0" group="1" mesh="finger_vis" name="finger1_visual"/>
32 |                 <geom type="mesh" conaffinity="1" contype="0" solref="0.02 1" friction="1 0.005 0.0001" condim="4" mesh="finger" name="finger1_collision"/>
33 |                 <body name="finger_joint1_tip" pos="0 0.0085 0.059">
34 |                     <inertial pos="0 0 0" quat="0 0 0 1" mass="0.01" diaginertia="0.01 0.01 0.01" />
35 |                     <geom size="0.008 0.004 0.008" pos="0 -0.005 -0.015" quat="0 0 0 1" type="box"  conaffinity="1" contype="1" name="finger1_tip_collision"/>
36 |                 </body>
37 |             </body>
38 |             <body name="rightfinger" pos="0 0 0.0524" quat="0.707107 0 0 0.70">
39 |                 <inertial pos="0 0 0.05" mass="0.1" diaginertia="0.01 0.01 0.005" />
40 |                 <joint name="finger_joint2" pos="0 0 0" axis="0 1 0" type="slide" limited="true" range="-0.04 -0.00" damping="100"/>
41 |                 <geom quat="0 0 0.707107 0.707107" type="mesh" contype="0" conaffinity="0" group="1" mesh="finger_vis" name="finger2_visual"/>
42 |                 <geom quat="0 0 0 1" type="mesh" conaffinity="1" contype="0" solref="0.02 1" friction="1 0.005 0.0001" condim="4" mesh="finger" name="finger2_collision"/>
43 |                 <body name="finger_joint2_tip" pos="0 -0.0085 0.059">
44 |                     <inertial pos="0 0 0" quat="0 0 0 1" mass="0.01" diaginertia="0.01 0.01 0.01" />
45 |                     <geom size="0.008 0.004 0.008" pos="0 0.005 -0.015" quat="0 0 0 1" type="box"  conaffinity="1" contype="1" name="finger2_tip_collision"/>
46 |                 </body>
47 |             </body>
48 |         </body>
49 |     </worldbody>
50 |       <sensor>
51 |         <force name="force_ee" site="ft_frame"/>
52 |         <torque name="torque_ee" site="ft_frame"/>
53 |       </sensor>
54 | </mujoco>
55 | 


--------------------------------------------------------------------------------
/panda/rl/policies/actor_critic.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from rl.policies.distributions import FixedNormal, MixedDistribution
  7 | from utils.pytorch import to_tensor
  8 | 
  9 | 
 10 | class Actor(nn.Module):
 11 |     def __init__(self, config, ob_space, ac_space, tanh_policy):
 12 |         super().__init__()
 13 |         self._config = config
 14 |         self._activation_fn = getattr(F, config.rl_activation) # elu, relu, tanh
 15 |         self._tanh = tanh_policy
 16 | 
 17 |     @property
 18 |     def info(self):
 19 |         return {}
 20 | 
 21 |     def act(self, ob, is_train=True, return_log_prob=False):
 22 |         ob = to_tensor(ob, self._config.device)
 23 |         self._ob = ob
 24 |         means, stds = self.forward(ob) # OrderedDict([('default', tensor()]
 25 |         dists = OrderedDict()
 26 |         actions = OrderedDict()
 27 | 
 28 |         for k in self._ac_space.keys():
 29 |             dists[k] = FixedNormal(means[k], stds[k]) # Normal dist with double args Normal(loc: , scale: )
 30 |         # dists - OrderedDict([('default', Normal(loc: torch.Size([1, 8]), scale: torch.Size([1, 8])))])
 31 | 
 32 |         mixed_dist = MixedDistribution(dists) # a normal distribution
 33 | 
 34 |         activations = mixed_dist.sample() # OrderedDict([('default', tensor()]
 35 | 
 36 |         if return_log_prob:
 37 |             log_probs = mixed_dist.log_probs(activations)
 38 | 
 39 |         for k in self._ac_space.keys():
 40 |             z = activations[k]
 41 |             if self._tanh:
 42 |                 action = torch.tanh(z)
 43 |                 if return_log_prob:
 44 |                     # follow the Appendix C. Enforcing Action Bounds
 45 |                     log_det_jacobian = 2 * (np.log(2.) - z - F.softplus(-2. * z)).sum(dim=-1, keepdim=True)
 46 |                     log_probs[k] = log_probs[k] - log_det_jacobian
 47 |             else:
 48 |                 action = z
 49 | 
 50 |             # convert tensor to numpy array
 51 |             actions[k] = action.detach().cpu().numpy().squeeze(0)
 52 |             activations[k] = z.detach().cpu().numpy().squeeze(0)
 53 | 
 54 |         if return_log_prob:
 55 |             log_probs_ = torch.cat(list(log_probs.values()), -1).sum(-1, keepdim=True)
 56 |             if log_probs_.min() < -100:
 57 |                 print('sampling an action with a probability of 1e-100')
 58 |                 import ipdb; ipdb.set_trace()
 59 | 
 60 |             log_probs_ = log_probs_.detach().cpu().numpy().squeeze(0)
 61 |             return actions, activations, log_probs_
 62 |         else:
 63 |             return actions, activations
 64 | 
 65 |     def act_log(self, ob, activations=None):
 66 |         self._ob = ob.copy()
 67 |         means, stds = self.forward(ob)
 68 |         dists = OrderedDict()
 69 |         actions = OrderedDict()
 70 | 
 71 |         for k in self._ac_space.keys():
 72 |             dists[k] = FixedNormal(means[k], stds[k])
 73 | 
 74 |         mixed_dist = MixedDistribution(dists)
 75 |         # activations is OrderedDict([('default', tensor()]; tensor size is (batch_size, 8)
 76 |         activations_ = mixed_dist.rsample() if activations is None else activations
 77 | 
 78 |         for k in activations_.keys():
 79 |             if len(activations_[k].shape) == 1:
 80 |                 activations_[k] = activations_[k].unsqueeze(0)
 81 | 
 82 |         # log_probs is OrderedDict([('default', tensor()]; tensor size is (batch_size, 1)
 83 |         log_probs = mixed_dist.log_probs(activations_)
 84 | 
 85 |         for k in self._ac_space.keys():
 86 |             z = activations_[k]
 87 |             if self._tanh:
 88 |                 action = torch.tanh(z)
 89 |                 # follow the Appendix C. Enforcing Action Bounds
 90 |                 log_det_jacobian = 2 * (np.log(2.) - z - F.softplus(-2. * z)).sum(dim=-1, keepdim=True)
 91 |                 log_probs[k] = log_probs[k] - log_det_jacobian
 92 |             else:
 93 |                 action = z
 94 | 
 95 |             actions[k] = action
 96 | 
 97 |         ents = mixed_dist.entropy() # ents is tensor of shape (batch_size)
 98 |         log_probs_ = torch.cat(list(log_probs.values()), -1).sum(-1, keepdim=True) # log_probs_ is a tensor of shape (batch_size, 1)
 99 |         if activations is None:
100 |             return actions, log_probs_
101 |         else:
102 |             return log_probs_, ents
103 | 
104 | class Critic(nn.Module):
105 |     def __init__(self, config):
106 |         super().__init__()
107 |         self._config = config
108 | 


--------------------------------------------------------------------------------
/panda/utils/mjcf_utils.py:
--------------------------------------------------------------------------------
  1 | # utility functions for manipulating MJCF XML models
  2 | 
  3 | import xml.etree.ElementTree as ET
  4 | import os
  5 | import numpy as np
  6 | 
  7 | import models
  8 | 
  9 | RED = [1, 0, 0, 1]
 10 | GREEN = [0, 1, 0, 1]
 11 | BLUE = [0, 0, 1, 1]
 12 | 
 13 | 
 14 | def xml_path_completion(xml_path):
 15 |     """
 16 |     Takes in a local xml path and returns a full path.
 17 |         if @xml_path is absolute, do nothing
 18 |         if @xml_path is not absolute, load xml that is shipped by the package
 19 |     """
 20 |     if xml_path.startswith("/"):
 21 |         full_path = xml_path
 22 |     else:
 23 |         full_path = os.path.join(models.assets_root, xml_path)
 24 |     return full_path
 25 | 
 26 | 
 27 | def array_to_string(array):
 28 |     """
 29 |     Converts a numeric array into the string format in mujoco.
 30 | 
 31 |     Examples:
 32 |         [0, 1, 2] => "0 1 2"
 33 |     """
 34 |     return " ".join(["{}".format(x) for x in array])
 35 | 
 36 | 
 37 | def string_to_array(string):
 38 |     """
 39 |     Converts a array string in mujoco xml to np.array.
 40 | 
 41 |     Examples:
 42 |         "0 1 2" => [0, 1, 2]
 43 |     """
 44 |     return np.array([float(x) for x in string.split(" ")])
 45 | 
 46 | 
 47 | def set_alpha(node, alpha=0.1):
 48 |     """
 49 |     Sets all a(lpha) field of the rgba attribute to be @alpha
 50 |     for @node and all subnodes
 51 |     used for managing display
 52 |     """
 53 |     for child_node in node.findall(".//*[@rgba]"):
 54 |         rgba_orig = string_to_array(child_node.get("rgba"))
 55 |         child_node.set("rgba", array_to_string(list(rgba_orig[0:3]) + [alpha]))
 56 | 
 57 | 
 58 | def new_joint(**kwargs):
 59 |     """
 60 |     Creates a joint tag with attributes specified by @**kwargs.
 61 |     """
 62 | 
 63 |     element = ET.Element("joint", attrib=kwargs)
 64 |     return element
 65 | 
 66 | 
 67 | def new_actuator(joint, act_type="actuator", **kwargs):
 68 |     """
 69 |     Creates an actuator tag with attributes specified by @**kwargs.
 70 | 
 71 |     Args:
 72 |         joint: type of actuator transmission.
 73 |             see all types here: http://mujoco.org/book/modeling.html#actuator
 74 |         act_type (str): actuator type. Defaults to "actuator"
 75 | 
 76 |     """
 77 |     element = ET.Element(act_type, attrib=kwargs)
 78 |     element.set("joint", joint)
 79 |     return element
 80 | 
 81 | 
 82 | def new_site(name, rgba=RED, pos=(0, 0, 0), size=(0.005,), **kwargs):
 83 |     """
 84 |     Creates a site element with attributes specified by @**kwargs.
 85 | 
 86 |     Args:
 87 |         name (str): site name.
 88 |         rgba: color and transparency. Defaults to solid red.
 89 |         pos: 3d position of the site.
 90 |         size ([float]): site size (sites are spherical by default).
 91 |     """
 92 |     kwargs["rgba"] = array_to_string(rgba)
 93 |     kwargs["pos"] = array_to_string(pos)
 94 |     kwargs["size"] = array_to_string(size)
 95 |     kwargs["name"] = name
 96 |     element = ET.Element("site", attrib=kwargs)
 97 |     return element
 98 | 
 99 | 
100 | def new_geom(geom_type, size, pos=(0, 0, 0), rgba=RED, group=0, **kwargs):
101 |     """
102 |     Creates a geom element with attributes specified by @**kwargs.
103 | 
104 |     Args:
105 |         geom_type (str): type of the geom.
106 |             see all types here: http://mujoco.org/book/modeling.html#geom
107 |         size: geom size parameters.
108 |         pos: 3d position of the geom frame.
109 |         rgba: color and transparency. Defaults to solid red.
110 |         group: the integrer group that the geom belongs to. useful for
111 |             separating visual and physical elements.
112 |     """
113 |     kwargs["type"] = str(geom_type)
114 |     kwargs["size"] = array_to_string(size)
115 |     kwargs["rgba"] = array_to_string(rgba)
116 |     kwargs["group"] = str(group)
117 |     kwargs["pos"] = array_to_string(pos)
118 |     element = ET.Element("geom", attrib=kwargs)
119 |     return element
120 | 
121 | 
122 | def new_body(name=None, pos=None, **kwargs):
123 |     """
124 |     Creates a body element with attributes specified by @**kwargs.
125 | 
126 |     Args:
127 |         name (str): body name.
128 |         pos: 3d position of the body frame.
129 |     """
130 |     if name is not None:
131 |         kwargs["name"] = name
132 |     if pos is not None:
133 |         kwargs["pos"] = array_to_string(pos)
134 |     element = ET.Element("body", attrib=kwargs)
135 |     return element
136 | 
137 | 
138 | def new_inertial(name=None, pos=(0, 0, 0), mass=None, **kwargs):
139 |     """
140 |     Creates a inertial element with attributes specified by @**kwargs.
141 | 
142 |     Args:
143 |         mass: The mass of inertial
144 |     """
145 |     if mass is not None:
146 |         kwargs["mass"] = str(mass)
147 |     kwargs["pos"] = array_to_string(pos)
148 |     element = ET.Element("inertial", attrib=kwargs)
149 |     return element
150 | 


--------------------------------------------------------------------------------
/panda/models/base.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import xml.dom.minidom
  3 | import xml.etree.ElementTree as ET
  4 | import io
  5 | import numpy as np
  6 | 
  7 | 
  8 | class MujocoXML(object):
  9 |     """
 10 |     Base class of Mujoco xml file
 11 |     Wraps around ElementTree and provides additional functionality for merging different models.
 12 |     Specially, we keep track of <worldbody/>, <actuator/> and <asset/>
 13 |     """
 14 | 
 15 |     def __init__(self, fname):
 16 |         """
 17 |         Loads a mujoco xml from file.
 18 | 
 19 |         Args:
 20 |             fname (str): path to the MJCF xml file.
 21 |         """
 22 |         self.file = fname
 23 |         self.folder = os.path.dirname(fname)
 24 |         self.tree = ET.parse(fname)
 25 |         self.root = self.tree.getroot()
 26 |         self.name = self.root.get("model")
 27 |         self.worldbody = self.create_default_element("worldbody")
 28 |         self.actuator = self.create_default_element("actuator")
 29 |         self.asset = self.create_default_element("asset")
 30 |         self.equality = self.create_default_element("equality")
 31 |         self.sensor = self.create_default_element("sensor")
 32 |         self.contact = self.create_default_element("contact")
 33 |         self.default = self.create_default_element("default")
 34 |         self.resolve_asset_dependency()
 35 | 
 36 |     def resolve_asset_dependency(self):
 37 |         """
 38 |         Converts every file dependency into absolute path so when we merge we don't break things.
 39 |         """
 40 | 
 41 |         for node in self.asset.findall("./*[@file]"):
 42 |             file = node.get("file")
 43 |             abs_path = os.path.abspath(self.folder)
 44 |             abs_path = os.path.join(abs_path, file)
 45 |             node.set("file", abs_path)
 46 | 
 47 |     def create_default_element(self, name):
 48 |         """
 49 |         Creates a <@name/> tag under root if there is none.
 50 |         """
 51 | 
 52 |         found = self.root.find(name)
 53 |         if found is not None:
 54 |             return found
 55 |         ele = ET.Element(name)
 56 |         self.root.append(ele)
 57 |         return ele
 58 | 
 59 |     def merge(self, other, merge_body=True):
 60 |         """
 61 |         Default merge method.
 62 | 
 63 |         Args:
 64 |             other: another MujocoXML instance
 65 |                 raises XML error if @other is not a MujocoXML instance.
 66 |                 merges <worldbody/>, <actuator/> and <asset/> of @other into @self
 67 |             merge_body: True if merging child bodies of @other. Defaults to True.
 68 |         """
 69 |         if merge_body:
 70 |             for body in other.worldbody:
 71 |                 self.worldbody.append(body)
 72 |         self.merge_asset(other)
 73 |         for one_actuator in other.actuator:
 74 |             self.actuator.append(one_actuator)
 75 |         for one_equality in other.equality:
 76 |             self.equality.append(one_equality)
 77 |         for one_sensor in other.sensor:
 78 |             self.sensor.append(one_sensor)
 79 |         for one_contact in other.contact:
 80 |             self.contact.append(one_contact)
 81 |         for one_default in other.default:
 82 |             self.default.append(one_default)
 83 |         # self.config.append(other.config)
 84 | 
 85 |     def get_model(self, mode="mujoco_py"):
 86 |         """
 87 |         Returns a MjModel instance from the current xml tree.
 88 |         """
 89 | 
 90 |         available_modes = ["mujoco_py"]
 91 |         with io.StringIO() as string:
 92 |             string.write(ET.tostring(self.root, encoding="unicode"))
 93 |             if mode == "mujoco_py":
 94 |                 from mujoco_py import load_model_from_xml
 95 | 
 96 |                 model = load_model_from_xml(string.getvalue())
 97 |                 return model
 98 |             raise ValueError(
 99 |                 "Unkown model mode: {}. Available options are: {}".format(
100 |                     mode, ",".join(available_modes)
101 |                 )
102 |             )
103 | 
104 |     def get_xml(self):
105 |         """
106 |         Returns a string of the MJCF XML file.
107 |         """
108 |         with io.StringIO() as string:
109 |             string.write(ET.tostring(self.root, encoding="unicode"))
110 |             return string.getvalue()
111 | 
112 |     def save_model(self, fname, pretty=False):
113 |         """
114 |         Saves the xml to file.
115 | 
116 |         Args:
117 |             fname: output file location
118 |             pretty: attempts!! to pretty print the output
119 |         """
120 |         with open(fname, "w") as f:
121 |             xml_str = ET.tostring(self.root, encoding="unicode")
122 |             if pretty:
123 |                 # TODO: get a better pretty print library
124 |                 parsed_xml = xml.dom.minidom.parseString(xml_str)
125 |                 xml_str = parsed_xml.toprettyxml(newl="")
126 |             f.write(xml_str)
127 | 
128 |     def merge_asset(self, other):
129 |         """
130 |         Useful for merging other files in a custom logic.
131 |         """
132 |         for asset in other.asset:
133 |             asset_name = asset.get("name")
134 |             asset_type = asset.tag
135 |             # Avoids duplication
136 |             pattern = "./{}[@name='{}']".format(asset_type, asset_name)
137 |             if self.asset.find(pattern) is None:
138 |                 self.asset.append(asset)
139 | 


--------------------------------------------------------------------------------
/panda/controller/arm_controller.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from enum import Enum
  3 | import mujoco_py
  4 | from scipy.interpolate import CubicSpline
  5 | 
  6 | 
  7 | class ControllerType(str, Enum):
  8 |     JOINT_VEL = 'joint_velocity'
  9 | 
 10 | 
 11 | class Controller():
 12 |     def __init__(self, control_max, control_min, max_action, min_action, policy_freq=250, interpolation=None):
 13 | 
 14 |         # Upper and lower limits to the input action (only pos/ori)
 15 |         self.control_max = control_max # array([1, 1, 1, 1, 1, 1, 1])
 16 |         self.control_min = control_min # array([-1, -1, -1, -1, -1, -1, -1])
 17 | 
 18 |         # Dimensionality of the action
 19 |         self.control_dim = self.control_max.shape[0] # 7
 20 | 
 21 |         # Limits to the policy outputs
 22 |         self.input_max = max_action #  1
 23 |         self.input_min = min_action # -1
 24 | 
 25 |         # This handles when the mean of max and min control is not zero -> actions are around that mean
 26 |         self.action_scale = abs(self.control_max - self.control_min) / abs(max_action - min_action) # array([1., 1., 1., 1., 1., 1., 1.])
 27 |         self.action_output_transform = (self.control_max + self.control_min) / 2.0 # array([0., 0., 0., 0., 0., 0., 0.])
 28 |         self.action_input_transform = (max_action + min_action) / 2.0 # 0
 29 | 
 30 |         # Frequency at which actions from the robot policy are fed into this controller
 31 |         self.policy_freq = policy_freq
 32 |         self.interpolation = interpolation # "linear"
 33 | 
 34 |         self.ramp_ratio = 1  # Percentage of the time between policy timesteps used for interpolation
 35 | 
 36 |         # Initialize the remaining attributes
 37 |         self.model_timestep = None
 38 |         self.interpolation_steps = None
 39 |         self.current_joint_velocity = None
 40 | 
 41 |     def reset(self):
 42 |         """
 43 |         Resets the internal values of the controller
 44 |         """
 45 |         pass
 46 | 
 47 |     def transform_action(self, action):
 48 |         """
 49 |         Scale the action to go to the right min and max
 50 |         """
 51 |         action = np.clip(action, self.input_min, self.input_max) # -1, 1
 52 |         transformed_action = (action - self.action_input_transform) * self.action_scale + self.action_output_transform
 53 | 
 54 |         return transformed_action
 55 | 
 56 |     def update_model(self, sim, joint_index):
 57 |         """
 58 |         Updates the state of the robot used to compute the control command
 59 |         """
 60 |         self.sim_freq = 1 / sim.model.opt.timestep
 61 |         self.interpolation_steps = np.floor(self.ramp_ratio * self.sim_freq / self.policy_freq)
 62 |         self.current_joint_velocity = sim.data.qvel[joint_index]
 63 | 
 64 | 
 65 |     def linear_interpolate(self, last_goal, goal):
 66 |         """
 67 |         Set self.linear to be a function interpolating between last_goal and goal based on the ramp_ratio
 68 |         """
 69 |         # We interpolate to reach the commanded desired position in self.ramp_ratio % of the time we have this goal
 70 |         delta_x_per_step = (goal - last_goal) / self.interpolation_steps
 71 |         self.linear = np.array([(last_goal + i * delta_x_per_step) for i in range(1, int(self.interpolation_steps) + 1)])
 72 | 
 73 |     def action_to_torques(self, action, policy_step):
 74 |         raise NotImplementedError
 75 | 
 76 |     @property
 77 |     def action_dim(self):
 78 |         """
 79 |         Returns dimensionality of the actions
 80 |         """
 81 |         dim = self.control_dim
 82 |         return dim
 83 | class JointVelocityController(Controller):
 84 |     """
 85 |     Class to interprete actions as joint velocities
 86 |     """
 87 |     ##  "joint_velocity":{"control_range": [1, 1, 1, 1, 1, 1, 1], "kv": [8.0, 7.0, 6.0, 4.0, 2.0, 0.5, 0.1], "interpolation": "linear"},
 88 | 
 89 |     def __init__(self, control_range, kv, max_action=1, min_action=-1, interpolation=None):
 90 |         super(JointVelocityController, self).__init__(
 91 |             control_max=np.array(control_range),
 92 |             control_min=-1 * np.array(control_range),
 93 |             max_action=max_action,
 94 |             min_action=min_action,
 95 |             interpolation=interpolation)
 96 | 
 97 |         self.kv = np.array(kv)
 98 |         self.interpolate = True
 99 | 
100 |         self.last_goal = np.zeros(self.control_dim)
101 |         self.step = 0
102 | 
103 |     def reset(self):
104 |         super().reset()
105 |         self.step = 0
106 |         self.last_goal = np.zeros(self.control_dim)
107 | 
108 |     def action_to_torques(self, action, policy_step):
109 |         action = self.transform_action(action) # clipping is done
110 |         if policy_step:
111 |             self.step = 0
112 |             self.goal = np.array((action))
113 | 
114 |             if self.interpolation == "linear":
115 |                 self.linear_interpolate(self.last_goal, self.goal)
116 |             else:
117 |                 self.last_goal = np.array((self.goal))
118 | 
119 |         if self.interpolation == "linear":
120 |             self.last_goal = self.linear[self.step]
121 | 
122 |             if self.step < self.interpolation_steps - 1:
123 |                 self.step += 1
124 |         # Torques for each joint are kv*(q_dot_desired - q_dot)
125 |         torques = np.multiply(self.kv, (self.last_goal - self.current_joint_velocity))
126 | 
127 |         return torques
128 | 


--------------------------------------------------------------------------------
/panda/rl/normalizer.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import numpy as np
  3 | from utils.mpi import mpi_average
  4 | 
  5 | 
  6 | class SubNormalizer:
  7 |     def __init__(self, size, eps=1e-2, default_clip_range=np.inf, clip_obs=np.inf):
  8 |         if isinstance(size, list): # size is ob_space dimension i-e [10], [36]
  9 |             self.size = size
 10 |         else:
 11 |             self.size = [size]
 12 |         self.eps = eps
 13 |         self.default_clip_range = default_clip_range
 14 |         self.clip_obs = clip_obs
 15 | 
 16 |         # some local information
 17 |         self.local_sum = np.zeros(self.size, np.float32)
 18 |         self.local_sumsq = np.zeros(self.size, np.float32)
 19 |         self.local_count = np.zeros(1, np.float32)
 20 |         # get the total sum sumsq and sum count
 21 |         self.total_sum = np.zeros(self.size, np.float32)
 22 |         self.total_sumsq = np.zeros(self.size, np.float32)
 23 |         self.total_count = np.ones(1, np.float32)
 24 |         # get the mean and std
 25 |         self.mean = np.zeros(self.size, np.float32)
 26 |         self.std = np.ones(self.size, np.float32)
 27 | 
 28 |     def _clip(self, v): # clips the observation in range defined by clip_obs e.g.(-200, 200)
 29 |         return np.clip(v, -self.clip_obs, self.clip_obs)
 30 | 
 31 |     # update the parameters of the normalizer
 32 |     def update(self, v): # takes in observation array i-e. v['robot-state']; shape is e.g. (201,36)
 33 |         v = self._clip(v)
 34 |         v = v.reshape([-1] + self.size) # size is dim. of observation e.g. [36], [10], depending on obs key
 35 |         # do the computing
 36 |         self.local_sum += v.sum(axis=0) # sum obs along first axis(timesteps); e.g. shape is 36
 37 |         self.local_sumsq += (np.square(v)).sum(axis=0)
 38 |         self.local_count[0] += v.shape[0] # e.g. 201
 39 | 
 40 |     # sync the parameters across the cpus
 41 |     def sync(self, local_sum, local_sumsq, local_count):
 42 |         local_sum[...] = mpi_average(local_sum)
 43 |         local_sumsq[...] = mpi_average(local_sumsq)
 44 |         local_count[...] = mpi_average(local_count)
 45 |         return local_sum, local_sumsq, local_count
 46 | 
 47 |     def recompute_stats(self):
 48 |         local_count = self.local_count.copy()
 49 |         local_sum = self.local_sum.copy()
 50 |         local_sumsq = self.local_sumsq.copy()
 51 |         # reset
 52 |         self.local_count[...] = 0
 53 |         self.local_sum[...] = 0
 54 |         self.local_sumsq[...] = 0
 55 |         # synrc the stats
 56 |         sync_sum, sync_sumsq, sync_count = self.sync(local_sum, local_sumsq, local_count)
 57 |         # update the total stuff
 58 |         self.total_sum += sync_sum
 59 |         self.total_sumsq += sync_sumsq
 60 |         self.total_count += sync_count
 61 |         # calculate the new mean and std
 62 |         self.mean = self.total_sum / self.total_count
 63 |         self.std = np.sqrt(np.maximum(np.square(self.eps), (self.total_sumsq / self.total_count) - np.square(self.total_sum / self.total_count)))
 64 | 
 65 |     # normalize the observation
 66 |     def normalize(self, v, clip_range=None): # v is observation array
 67 |         v = self._clip(v)
 68 |         if clip_range is None:
 69 |             clip_range = self.default_clip_range
 70 |         return np.clip((v - self.mean) / (self.std), -clip_range, clip_range)
 71 | 
 72 |     def state_dict(self):
 73 |         return {'sum': self.total_sum, 'sumsq': self.total_sumsq, 'count': self.total_count}
 74 | 
 75 |     def load_state_dict(self, state_dict):
 76 |         self.total_sum = state_dict['sum']
 77 |         self.total_sumsq = state_dict['sumsq']
 78 |         self.total_count = state_dict['count']
 79 |         self.mean = self.total_sum / self.total_count
 80 |         self.std = np.sqrt(np.maximum(np.square(self.eps), (self.total_sumsq / self.total_count) - np.square(self.total_sum / self.total_count)))
 81 | 
 82 | 
 83 | class Normalizer:
 84 |     def __init__(self, shape, eps=1e-2, default_clip_range=np.inf, clip_obs=np.inf):
 85 |         self._shape = shape # shape = ODict([('object-state', [10]), ('robot-state', [36])])
 86 |         if not isinstance(shape, dict):
 87 |             self._shape = {'': shape}
 88 |         print('New ob_norm with shape', self._shape)
 89 | 
 90 |         self._keys = sorted(self._shape.keys()) # ['object-state', 'robot-state']
 91 | 
 92 |         self.sub_norm = {} # i-e {'object-state': SubNormalizer([10], eps, 5, 200), 'robot-state': SubNormalizer([36], eps, 5, 200)}
 93 |         for key in self._keys:
 94 |             self.sub_norm[key] = SubNormalizer(self._shape[key], eps, default_clip_range, clip_obs)
 95 | 
 96 |     # update the parameters of the normalizer
 97 |     def update(self, v): # v is rollout['ob']
 98 |         if isinstance(v, list):
 99 |             if isinstance(v[0], dict):
100 |                 # stacks obs arrays values acc. to keys i-e ['robot-state'] arrays for all time steps stacked together
101 |                 v = OrderedDict([(k, np.asarray([x[k] for x in v])) for k in self._keys])
102 |             else:
103 |                 v = np.asarray(v)
104 | 
105 |         if isinstance(v, dict):
106 |             for k, v_ in v.items():
107 |                 if k in self._keys:
108 |                     self.sub_norm[k].update(v_)
109 |         else:
110 |             self.sub_norm[''].update(v)
111 | 
112 |     def recompute_stats(self):
113 |         for k in self._keys:
114 |             self.sub_norm[k].recompute_stats()
115 | 
116 |     # normalize the observation
117 |     def _normalize(self, v, clip_range=None):
118 |         if not isinstance(v, dict):
119 |             return self.sub_norm[''].normalize(v, clip_range)
120 | 
121 |         return OrderedDict([(k, self.sub_norm[k].normalize(v_, clip_range)) for k, v_ in v.items() if k in self._keys])
122 | 
123 |     def normalize(self, v, clip_range=None):
124 |         # v is rollout['ob'], a list where each element(ref. to timestep) is ODict([('robot-state', array()), ('obj-state', array())])   OR
125 |         #   transitions['ob'], a dict {'robot-state': array(), 'object-state': array()} array length is equal to batch_size
126 |         if isinstance(v, list):
127 |             return [self._normalize(x, clip_range) for x in v] # x is a odict
128 |         else:
129 |             return self._normalize(v, clip_range)
130 | 
131 |     def state_dict(self):
132 |         return OrderedDict([(k, self.sub_norm[k].state_dict()) for k in self._keys])
133 | 
134 |     def load_state_dict(self, state_dict):
135 |         for k in self._keys:
136 |             self.sub_norm[k].load_state_dict(state_dict[k])
137 | 


--------------------------------------------------------------------------------
/panda/environments/base.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | from mujoco_py import MjSim, MjRenderContextOffscreen
  4 | from mujoco_py import load_model_from_xml
  5 | from mujoco_py import GlfwContext
  6 | import numpy as np
  7 | from utils import MujocoPyRenderer
  8 | 
  9 | GlfwContext(offscreen=True)
 10 | REGISTERED_ENVS = {}
 11 | 
 12 | def register_env(target_class):
 13 |     REGISTERED_ENVS[target_class.__name__] = target_class
 14 | 
 15 | def get_env(name):
 16 |     """Try to get the equivalent functionality of gym.make in a sloppy way."""
 17 | 
 18 |     return REGISTERED_ENVS[name]
 19 | 
 20 | def make(name, config=None):
 21 |     """
 22 |     Creates a new environment instance with @name and @config.
 23 |     """
 24 |     env = get_env(name)
 25 | 
 26 |     # get default configuration
 27 |     if config is None:
 28 |         import argparse
 29 |         import config.grasping as grasp
 30 | 
 31 |         parser = argparse.ArgumentParser()
 32 |         grasp.add_argument(parser)
 33 | 
 34 |         config, unparsed = parser.parse_known_args()
 35 | 
 36 |     return env(config)
 37 | 
 38 | class EnvMeta(type):
 39 |     """Metaclass for registering environments"""
 40 | 
 41 |     def __new__(meta, name, bases, class_dict):
 42 |         cls = super().__new__(meta, name, bases, class_dict)
 43 | 
 44 |         # List all environments that should not be registered here.
 45 |         _unregistered_envs = ["MujocoEnv", "PandaEnv"]
 46 | 
 47 |         if cls.__name__ not in _unregistered_envs:
 48 |             register_env(cls)
 49 |         return cls
 50 | 
 51 | 
 52 | class MujocoEnv(metaclass=EnvMeta):
 53 |     """Initializes a Mujoco Environment."""
 54 | 
 55 |     def __init__(self, config):
 56 | 
 57 |         self.render_collision_mesh = config.render_collision_mesh
 58 |         self.render_visual_mesh = config.render_visual_mesh
 59 |         self.control_freq = config.control_freq
 60 |         self.horizon = config.horizon
 61 |         self.ignore_done = config.ignore_done
 62 |         self.viewer = None
 63 |         self.model = None
 64 | 
 65 |         # Load the model
 66 |         self._load_model()
 67 | 
 68 |         # Initialize the simulation
 69 |         self._initialize_sim()
 70 | 
 71 |         # Run all further internal (re-)initialization required
 72 |         self._reset_internal()
 73 | 
 74 |     def initialize_time(self, control_freq):
 75 |         """
 76 |         Initializes the time constants used for simulation.
 77 |         """
 78 |         self.cur_time = 0
 79 |         self.model_timestep = self.sim.model.opt.timestep #0.002 is default simulation timestep in mujoco
 80 |         self.control_freq = control_freq
 81 |         self.control_timestep = 1. / control_freq # 0.01 if control freq is 100
 82 | 
 83 |     def _load_model(self):
 84 |         """Loads an xml model, puts it in self.model"""
 85 |         pass
 86 | 
 87 |     def _get_reference(self):
 88 |         """
 89 |         Sets up references to important components. A reference is typically an
 90 |         index or a list of indices that point to the corresponding elements
 91 |         in a flatten array, which is how MuJoCo stores physical simulation data.
 92 |         """
 93 |         pass
 94 | 
 95 |     def _initialize_sim(self, xml_string=None):
 96 |         """
 97 |         Creates a MjSim object and stores it in self.sim. If @xml_string is specified, the MjSim object will be created
 98 |         from the specified xml_string. Else, it will pull from self.model to instantiate the simulation
 99 |         """
100 |         # if we have an xml string, use that to create the sim. Otherwise, use the local model
101 |         self.mjpy_model = self.model.get_model()
102 | 
103 |         # Create the simulation instance and run a single step to make sure changes have propagated through sim state
104 |         self.sim = MjSim(self.mjpy_model)
105 |         #self.sim.step()
106 | 
107 |         # Setup sim time based on control frequency
108 |         self.initialize_time(self.control_freq)
109 | 
110 |     def reset(self):
111 |         """Resets simulation."""
112 |         self._reset_internal()
113 |         self.sim.forward()
114 |         return self._get_observation()
115 | 
116 |     def _reset_internal(self):
117 |         """Resets simulation internal configurations."""
118 | 
119 |         # create visualization screen or renderer
120 |         if self.viewer is None:
121 |             self.viewer = MujocoPyRenderer(self.sim)
122 |             self.viewer.viewer.vopt.geomgroup[0] = 1 if self.render_collision_mesh else 0
123 |             self.viewer.viewer.vopt.geomgroup[1] = 1 if self.render_visual_mesh else 0
124 |             # hiding the overlay speeds up rendering significantly
125 |             self.viewer.viewer._hide_overlay = True
126 | 
127 |             # make sure mujoco-py doesn't block rendering frames
128 |             self.viewer.viewer._render_every_frame = True
129 | 
130 |         # additional housekeeping
131 |         self.sim_state_initial = self.sim.get_state() # copy of simulator state (qpos(23), qvel(21))
132 |         self._get_reference()
133 |         self.cur_time = 0
134 |         self.timestep = 0
135 |         self.done = False
136 | 
137 |     def _get_observation(self):
138 |         """Returns an OrderedDict containing observations [(name_string, np.array), ...]."""
139 |         return OrderedDict()
140 | 
141 |     def step(self, action):
142 |         """Takes a step in simulation with control command @action."""
143 |         if self.done:
144 |             raise ValueError("executing action in terminated episode")
145 | 
146 |         if isinstance(action, list):
147 |             action = {key: val for ac_i in action for key, val in ac_i.items()}
148 |         if isinstance(action, dict):
149 |             action = np.concatenate([action[key] for key in self.action_space.shape.keys()])
150 | 
151 |         self.timestep += 1
152 |         # If the env.step frequency is lower than the mjsim timestep frequency(1/0.002), the internal controller will output
153 |         # multiple torque commands in between new high level action commands. Therefore, we need to denote via
154 |         # 'policy_step' whether the current step we're taking is simply an internal update of the controller,
155 |         # or an actual policy update
156 | 
157 |         # Loop through the simulation at the model timestep rate until we're ready to take the next policy step
158 |         # (as defined by the control frequency specified at the environment level)
159 |         policy_step = True
160 |         for i in range(int(self.control_timestep / self.model_timestep)):
161 |             self._pre_action(action, policy_step)
162 |             self.sim.step() # advances simulation
163 |             policy_step = False
164 | 
165 |         # Note: this is done all at once to avoid floating point inaccuracies
166 |         self.cur_time += self.control_timestep
167 | 
168 |         reward, done, info = self._post_action(action)
169 |         return self._get_observation(), reward, done, info
170 | 
171 |     def _pre_action(self, action, policy_step=False):
172 |         """Do any preprocessing before taking an action."""
173 |         self.sim.data.ctrl[:] = action #sim.data.ctrl is the control signal that is sent to the actuators in the simulation
174 | 
175 |     def _post_action(self, action):
176 |         """Do any housekeeping after taking an action."""
177 |         reward = self.reward(action)
178 |         info = {}
179 |         info['episode_success'] = int(self._check_success())
180 |         info['grasp'] = self.has_grasp
181 |         info['phase'] = self.phase
182 | 
183 |         # done if number of elapsed timesteps is greater than horizon
184 |         self.done = (self.timestep >= self.horizon) and not self.ignore_done
185 |         return reward, self.done, info
186 | 
187 |     def reward(self, action):
188 |         """Reward should be a function of state and action."""
189 |         return 0
190 | 
191 |     def render(self):
192 |         """
193 |         Renders to an on-screen window.a
194 |         """
195 |         self.viewer.render()
196 | 
197 |     def _check_success(self):
198 |         """
199 |         Returns True if task has been completed.
200 |         """
201 |         return False
202 | 


--------------------------------------------------------------------------------
/panda/rl/ppo_agent.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.optim as optim
  6 | from rl.dataset import ReplayBuffer, RandomSampler
  7 | from rl.base_agent import BaseAgent
  8 | from utils.logger import logger
  9 | from utils.mpi import mpi_average
 10 | from utils.pytorch import optimizer_cuda, count_parameters, \
 11 |     compute_gradient_norm, compute_weight_norm, sync_networks, sync_grads, \
 12 |     obs2tensor, to_tensor
 13 | 
 14 | 
 15 | 
 16 | class PPOAgent(BaseAgent):
 17 |     def __init__(self, config, ob_space, ac_space, actor, critic):
 18 |         super().__init__(config, ob_space)
 19 | 
 20 |         self._ac_space = ac_space # e.g. ActionSpace(shape=OrderedDict([('default', 8)]),minimum=-1.0, maximum=1.0)
 21 | 
 22 |         # build up networks
 23 |         self._actor = actor(config, ob_space, ac_space, config.tanh_policy) # actor model (MLP)
 24 |         self._old_actor = actor(config, ob_space, ac_space, config.tanh_policy)
 25 |         self._critic = critic(config, ob_space) # critic model (MLP)
 26 |         self._network_cuda(config.device)
 27 | 
 28 |         self._actor_optim = optim.Adam(self._actor.parameters(), lr=config.lr_actor)
 29 |         self._critic_optim = optim.Adam(self._critic.parameters(), lr=config.lr_critic)
 30 | 
 31 |         sampler = RandomSampler()
 32 |         buffer_keys = ['ob', 'ac', 'done', 'rew', 'ret', 'adv', 'ac_before_activation']
 33 |         self._buffer = ReplayBuffer(buffer_keys, config.buffer_size, sampler.sample_func)
 34 | 
 35 |         if config.is_chef:
 36 |             logger.info('Creating a PPO agent')
 37 |             logger.info('The actor has %d parameters', count_parameters(self._actor))
 38 |             logger.info('The critic has %d parameters', count_parameters(self._critic))
 39 | 
 40 |     def store_episode(self, rollouts):
 41 |         self._compute_gae(rollouts)
 42 |         self._buffer.store_episode(rollouts)
 43 | 
 44 |     def _compute_gae(self, rollouts):
 45 |         T = len(rollouts['done'])
 46 |         ob = rollouts['ob']
 47 |         ob = self.normalize(ob)
 48 |         ob = obs2tensor(ob, self._config.device)
 49 |         vpred = self._critic(ob).detach().cpu().numpy()[:,0]
 50 |         assert len(vpred) == T + 1
 51 | 
 52 |         done = rollouts['done']
 53 |         rew = rollouts['rew']
 54 |         adv = np.empty((T, ) , 'float32')
 55 |         lastgaelam = 0
 56 |         for t in reversed(range(T)):
 57 |             nonterminal = 1 - done[t]
 58 |             delta = rew[t] + self._config.discount_factor * vpred[t + 1] * nonterminal - vpred[t]
 59 |             adv[t] = lastgaelam = delta + self._config.discount_factor * self._config.gae_lambda * nonterminal * lastgaelam
 60 | 
 61 |         ret = adv + vpred[:-1]
 62 | 
 63 |         assert np.isfinite(adv).all()
 64 |         assert np.isfinite(ret).all()
 65 | 
 66 |         # update rollouts
 67 |         rollouts['adv'] = ((adv - adv.mean()) / adv.std()).tolist()
 68 |         rollouts['ret'] = ret.tolist()
 69 | 
 70 |     def state_dict(self):
 71 |         return {
 72 |             'actor_state_dict': self._actor.state_dict(),  # state_dict contains info about model params (learnable tensors: weights&biases)
 73 |             'critic_state_dict': self._critic.state_dict(),
 74 |             'actor_optim_state_dict': self._actor_optim.state_dict(),  # state_dict contains info about optim state and hyperparams
 75 |             'critic_optim_state_dict': self._critic_optim.state_dict(),
 76 |             'ob_norm_state_dict': self._ob_norm.state_dict(),
 77 |         }
 78 | 
 79 |     def load_state_dict(self, ckpt):
 80 |         self._actor.load_state_dict(ckpt['actor_state_dict'])
 81 |         self._critic.load_state_dict(ckpt['critic_state_dict'])
 82 |         self._ob_norm.load_state_dict(ckpt['ob_norm_state_dict'])
 83 |         self._network_cuda(self._config.device)
 84 | 
 85 |         self._actor_optim.load_state_dict(ckpt['actor_optim_state_dict'])
 86 |         self._critic_optim.load_state_dict(ckpt['critic_optim_state_dict'])
 87 |         optimizer_cuda(self._actor_optim, self._config.device) # required when loading optim state from checkpoint
 88 |         optimizer_cuda(self._critic_optim, self._config.device)
 89 | 
 90 |     def _network_cuda(self, device):
 91 |         self._actor.to(device)
 92 |         self._old_actor.to(device)
 93 |         self._critic.to(device)
 94 | 
 95 |     def sync_networks(self):
 96 |         sync_networks(self._actor)
 97 |         sync_networks(self._critic)
 98 | 
 99 |     def train(self):
100 |         self._soft_update_target_network(self._old_actor, self._actor, 0.0)
101 | 
102 |         for _ in range(self._config.num_batches):
103 |             transitions = self._buffer.sample(self._config.batch_size)
104 |             train_info = self._update_network(transitions)
105 | 
106 |         self._buffer.clear()
107 | 
108 |         train_info.update({
109 |             'actor_grad_norm': compute_gradient_norm(self._actor),
110 |             'actor_weight_norm': compute_weight_norm(self._actor),
111 |             'critic_grad_norm': compute_gradient_norm(self._critic),
112 |             'critic_weight_norm': compute_weight_norm(self._critic),
113 |         })
114 |         return train_info
115 | 
116 |     def _update_network(self, transitions):
117 |         info = {}
118 | 
119 |         # pre-process observations
120 |         o = transitions['ob'] #  dict {'robot-state': array(), 'object-state': array()} coming from Sampler
121 |         o = self.normalize(o)
122 | 
123 |         bs = len(transitions['done']) # batch size
124 |         _to_tensor = lambda x: to_tensor(x, self._config.device)
125 |         o = _to_tensor(o)
126 |         ac = _to_tensor(transitions['ac'])
127 |         a_z = _to_tensor(transitions['ac_before_activation']) # is OrderedDict([('default', tensor()]; tensor shape is (batch_size, 8)
128 |         ret = _to_tensor(transitions['ret']).reshape(bs, 1)
129 |         adv = _to_tensor(transitions['adv']).reshape(bs, 1)
130 | 
131 |         log_pi, ent = self._actor.act_log(o, a_z) # log_pi is tensor of shape (batch_size, 1)
132 |         old_log_pi, _ = self._old_actor.act_log(o, a_z)
133 | 
134 |         if old_log_pi.min() < -100:
135 |             import ipdb; ipdb.set_trace()
136 | 
137 |         # the actor loss
138 |         entropy_loss = self._config.entropy_loss_coeff * ent.mean()
139 |         ratio = torch.exp(log_pi - old_log_pi)
140 |         surr1 = ratio * adv
141 |         surr2 = torch.clamp(ratio, 1.0 - self._config.clip_param, 1.0 + self._config.clip_param) * adv
142 |         actor_loss = -torch.min(surr1, surr2).mean()
143 | 
144 |         if not np.isfinite(ratio.cpu().detach()).all() or not np.isfinite(adv.cpu().detach()).all():
145 |             import ipdb; ipdb.set_trace()
146 |         info['entropy_loss'] = entropy_loss.cpu().item()
147 |         info['actor_loss'] = actor_loss.cpu().item()
148 |         actor_loss += entropy_loss
149 | 
150 |         #custom_loss = self._actor.custom_loss()
151 |         #if custom_loss is not None:
152 |             #actor_loss += custom_loss * self._config.custom_loss_weight
153 |             #info['custom_loss'] = custom_loss.cpu().item()
154 | 
155 |         # the q loss
156 |         value_pred = self._critic(o)
157 |         value_loss = self._config.value_loss_coeff * (ret - value_pred).pow(2).mean()
158 | 
159 |         info['value_target'] = ret.mean().cpu().item()
160 |         info['value_predicted'] = value_pred.mean().cpu().item()
161 |         info['value_loss'] = value_loss.cpu().item()
162 | 
163 |         # update the actor
164 |         self._actor_optim.zero_grad()
165 |         actor_loss.backward()
166 |         #torch.nn.utils.clip_grad_norm_(self._actor.parameters(), self._config.max_grad_norm)
167 |         sync_grads(self._actor)
168 |         self._actor_optim.step()
169 | 
170 |         # update the critic
171 |         self._critic_optim.zero_grad()
172 |         value_loss.backward()
173 |         #torch.nn.utils.clip_grad_norm_(self._critic1.parameters(), self._config.max_grad_norm)
174 |         sync_grads(self._critic)
175 |         self._critic_optim.step()
176 | 
177 |         # include info from policy
178 |         info.update(self._actor.info)
179 | 
180 |         return mpi_average(info)
181 | 


--------------------------------------------------------------------------------
/panda/models/assets/robot/panda/robot_torque.xml:
--------------------------------------------------------------------------------
  1 | <mujoco model="panda">
  2 |     <actuator>
  3 |         <!-- Physical limits of the actuator. -->
  4 |         <motor ctrllimited="true" ctrlrange="-80.0 80.0" joint="joint1" name="torq_right_j0"/>
  5 |         <motor ctrllimited="true" ctrlrange="-80.0 80.0" joint="joint2" name="torq_right_j1"/>
  6 |         <motor ctrllimited="true" ctrlrange="-80.0 80.0" joint="joint3" name="torq_right_j2"/>
  7 |         <motor ctrllimited="true" ctrlrange="-80.0 80.0" joint="joint4" name="torq_right_j3"/>
  8 |         <motor ctrllimited="true" ctrlrange="-80.0 80.0" joint="joint5" name="torq_right_j4"/>
  9 |         <motor ctrllimited="true" ctrlrange="-12.0 12.0" joint="joint6" name="torq_right_j5"/>
 10 |         <motor ctrllimited="true" ctrlrange="-12.0 12.0" joint="joint7" name="torq_right_j6"/>
 11 |     </actuator>
 12 |     <default>
 13 |        <joint damping=".1"/>
 14 |     </default>
 15 |     <asset>
 16 |         <mesh name="link0" file="meshes/link0.stl" />
 17 |         <mesh name="link1" file="meshes/link1.stl" />
 18 |         <mesh name="link2" file="meshes/link2.stl" />
 19 |         <mesh name="link3" file="meshes/link3.stl" />
 20 |         <mesh name="link4" file="meshes/link4.stl" />
 21 |         <mesh name="link5" file="meshes/link5.stl" />
 22 |         <mesh name="link6" file="meshes/link6.stl" />
 23 |         <mesh name="link7" file="meshes/link7.stl" />
 24 |         <mesh name="link0_vis" file="meshes/link0_vis.stl" />
 25 |         <mesh name="link1_vis" file="meshes/link1_vis.stl" />
 26 |         <mesh name="link2_vis" file="meshes/link2_vis.stl" />
 27 |         <mesh name="link3_vis" file="meshes/link3_vis.stl" />
 28 |         <mesh name="link4_vis" file="meshes/link4_vis.stl" />
 29 |         <mesh name="link5_vis" file="meshes/link5_vis.stl" />
 30 |         <mesh name="link6_vis" file="meshes/link6_vis.stl" />
 31 |         <mesh name="link7_vis" file="meshes/link7_vis.stl" />
 32 |         <mesh file="meshes/pedestal.stl" name="pedestal"/>
 33 |     </asset>
 34 |     <worldbody>
 35 |         <body name="link0" pos="0 0 0">
 36 |             <body name="base" pos="0 0 0">
 37 |                 <inertial diaginertia="0 0 0" mass="0" pos="0 0 0"/>
 38 |                 <body name="controller_box" pos="0 0 0">
 39 |                     <inertial diaginertia="1.71363 1.27988 0.809981" mass="46.64" pos="-0.325 0 -0.38"/>
 40 |                     <geom pos="-0.325 0 -0.38" size="0.11 0.2 0.265" type="box" name="controller_box_col"/>
 41 |                 </body>
 42 |                 <body name="pedestal_feet" pos="0 0 0">
 43 |                     <inertial diaginertia="8.16095 9.59375 15.0785" mass="167.09" pos="-0.1225 0 -0.758"/>
 44 |                     <geom pos="-0.1225 0 -0.758" size="0.385 0.35 0.155" type="box" name="pedestal_feet_col"/>
 45 |                 </body>
 46 |                 <body name="torso" pos="0 0 0">
 47 |                     <inertial diaginertia="1e-08 1e-08 1e-08" mass="0.0001" pos="0 0 0"/>
 48 |                     <geom conaffinity="0" contype="0" group="1" rgba="0.2 0.2 0.2 1" size="0.05 0.05 0.05" type="box" name="torso_col"/>
 49 |                 </body>
 50 |                 <body name="pedestal" pos="0 0 0">
 51 |                     <inertial diaginertia="6.0869 5.81635 4.20915" mass="60.864" pos="0 0 0" quat="0.659267 -0.259505 -0.260945 0.655692"/>
 52 |                     <geom conaffinity="0" contype="0" group="1" mesh="pedestal" pos="0.26 0.345 -0.91488" quat="0.5 0.5 -0.5 -0.5" rgba="0.2 0.2 0.2 1" type="mesh" name="pedestal_col1"/>
 53 |                     <geom pos="-0.02 0 -0.29" rgba="0.2 0.2 0.2 1" size="0.18 0.31" type="cylinder" name="pedestal_col2"/>
 54 |                 </body>
 55 |             </body>
 56 |             <inertial pos="0 0 0.05" mass="4" diaginertia="0.4 0.4 0.4" />
 57 |             <geom type="mesh" contype="0" conaffinity="0" group="1" mesh="link0_vis" name="link0_visual" rgba="1 1 1 1"/>
 58 |             <geom type="mesh" group="0" mesh="link0" name="link0_collision"/>
 59 |             <body name="link1" pos="0 0 0.333">
 60 |                 <inertial pos="0 0 -0.07" mass="3" diaginertia="0.3 0.3 0.3" />
 61 |                 <joint name="joint1" pos="0 0 0" axis="0 0 1" limited="true" range="-2.8973 2.8973" />
 62 |                 <geom type="mesh" contype="0" conaffinity="0" group="1" mesh="link1_vis" name="link1_visual" rgba="1 1 1 1"/>
 63 |                 <geom type="mesh" group="0" mesh="link1" name="link1_collision"/>
 64 |                 <body name="link2" pos="0 0 0" quat="0.707107 -0.707107 0 0">
 65 |                     <inertial pos="0 -0.1 0" mass="3" diaginertia="0.3 0.3 0.3" />
 66 |                     <joint name="joint2" pos="0 0 0" axis="0 0 1" limited="true" range="-1.7628 1.7628" />
 67 |                     <geom type="mesh" contype="0" conaffinity="0" group="1" mesh="link2_vis" name="link2_visual" rgba="1 1 1 1"/>
 68 |                     <geom type="mesh" group="0" mesh="link2" name="link2_collision"/>
 69 |                     <body name="link3" pos="0 -0.316 0" quat="0.707107 0.707107 0 0">
 70 |                         <inertial pos="0.04 0 -0.05" mass="2" diaginertia="0.2 0.2 0.2" />
 71 |                         <joint name="joint3" pos="0 0 0" axis="0 0 1" limited="true" range="-2.8973 2.8973" />
 72 |                         <geom type="mesh" contype="0" conaffinity="0" group="1" mesh="link3_vis" name="link3_visual" rgba="1 1 1 1"/>
 73 |                         <geom type="mesh" group="0" mesh="link3" name="link3_collision"/>
 74 |                         <body name="link4" pos="0.0825 0 0" quat="0.707107 0.707107 0 0">
 75 |                             <inertial pos="-0.04 0.05 0" mass="2" diaginertia="0.2 0.2 0.2" />
 76 |                             <joint name="joint4" pos="0 0 0" axis="0 0 1" limited="true" range="-3.0718 -0.0698" />
 77 |                             <geom type="mesh" contype="0" conaffinity="0" group="1" mesh="link4_vis" name="link4_visual" rgba="1 1 1 1"/>
 78 |                             <geom type="mesh" group="0" mesh="link4" name="link4_collision"/>
 79 |                             <body name="link5" pos="-0.0825 0.384 0" quat="0.707107 -0.707107 0 0">
 80 |                                 <inertial pos="0 0 -0.15" mass="2" diaginertia="0.2 0.2 0.2" />
 81 |                                 <joint name="joint5" pos="0 0 0" axis="0 0 1" limited="true" range="-2.8973 2.8973" />
 82 |                                 <geom type="mesh" contype="0" conaffinity="0" group="1" mesh="link5_vis" name="link5_visual" rgba="1 1 1 1"/>
 83 |                                 <geom type="mesh" group="0" mesh="link5" name="link5_collision"/>
 84 |                                 <body name="link6" pos="0 0 0" quat="0.707107 0.707107 0 0">
 85 |                                     <inertial pos="0.06 0 0" mass="1.5" diaginertia="0.1 0.1 0.1" />
 86 |                                     <joint name="joint6" pos="0 0 0" axis="0 0 1" limited="true" range="-0.0175 3.7525" />
 87 |                                     <geom type="mesh" contype="0" conaffinity="0" group="1" mesh="link6_vis" name="link6_visual" rgba="1 1 1 1"/>
 88 |                                     <geom type="mesh" group="0" mesh="link6" name="link6_collision"/>
 89 |                                     <body name="link7" pos="0.088 0 0" quat="0.707107 0.707107 0 0">
 90 |                                         <inertial pos="0 0 0.08" mass="0.5" diaginertia="0.05 0.05 0.05" />
 91 |                                         <joint name="joint7" pos="0 0 0" axis="0 0 1" limited="true" range="-2.8973 2.8973" />
 92 |                                         <geom type="mesh" contype="0" conaffinity="0" group="1" mesh="link7_vis" name="link7_visual" rgba="1 1 1 1"/>
 93 |                                         <geom type="mesh" group="0" mesh="link7" name="link7_collision"/>
 94 |                                         <!-- to add gripper -->
 95 |                                         <!-- rotate 45deg to align physically to the tool-->
 96 |                                         <body name="hand" pos="0 0 0.107" quat="0.924 0 0 0.383">
 97 |                                             <inertial pos="0 0 0" mass="0.5" diaginertia="0.05 0.05 0.05" />
 98 |                                             <!-- This sites were added at the hand for visualization = 3 axes -->
 99 |                                             <site name="ee" pos="0 0 0" size="0.01 0.01 0.01" rgba="0 0 1 1" type="sphere" group="1"/>
100 |                                             <site name="ee_x" pos="0 0 0" size="0.005 .1"  quat="0.707105 0.707108 0 0 " rgba="1 0 0 1" type="cylinder" group="1"/>
101 |                                             <site name="ee_z" pos="0 0 0" size="0.005 .1" quat="0.707105 0 0 0.707108" rgba="0 0 1 1" type="cylinder" group="1"/>
102 |                                             <site name="ee_y" pos="0 0 0" size="0.005 .1" quat="0.707105 0 0.707108 0 " rgba="0 1 0 1" type="cylinder" group="1"/>
103 |                                         </body>
104 |                                     </body>
105 |                                 </body>
106 |                             </body>
107 |                         </body>
108 |                     </body>
109 |                 </body>
110 |             </body>
111 |         </body>
112 |     </worldbody>
113 | </mujoco>
114 | 


--------------------------------------------------------------------------------
/panda/utils/pytorch.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import io
  3 | from glob import glob
  4 | from collections import OrderedDict, defaultdict
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | import torch.distributed as dist
  9 | import torchvision.utils as vutils
 10 | import torchvision.transforms.functional as TF
 11 | import PIL.Image
 12 | from mpi4py import MPI
 13 | 
 14 | 
 15 | # Note! This is l2 square, not l2
 16 | def l2(a, b):
 17 |     return torch.pow(torch.abs(a - b), 2).sum(dim=1)
 18 | 
 19 | 
 20 | # required when we load optimizer from a checkpoint
 21 | def optimizer_cuda(optimizer, device):
 22 |     for state in optimizer.state.values():
 23 |         for k, v in state.items():
 24 |             if isinstance(v, torch.Tensor):
 25 |                 state[k] = v.to(device)
 26 | 
 27 | 
 28 | def get_ckpt_path(base_dir, ckpt_num):
 29 |     if ckpt_num is None:
 30 |         return get_recent_ckpt_path(base_dir)
 31 |     files = glob(os.path.join(base_dir, "*.pt"))
 32 |     for f in files:
 33 |         if 'ckpt_%08d.pt' % ckpt_num in f:
 34 |             return f, ckpt_num
 35 |     raise Exception("Did not find ckpt_%s.pt" % ckpt_num)
 36 | 
 37 | 
 38 | def get_recent_ckpt_path(base_dir):
 39 |     files = glob(os.path.join(base_dir, "*.pt"))
 40 |     files.sort()
 41 |     if len(files) == 0:
 42 |         return None, None
 43 |     max_step = max([f.rsplit('_', 1)[-1].split('.')[0] for f in files])
 44 |     paths = [f for f in files if max_step in f]
 45 |     if len(paths) == 1:
 46 |         return paths[0], int(max_step)
 47 |     else:
 48 |         raise Exception("Multiple most recent ckpts %s" % paths)
 49 | 
 50 | 
 51 | def image_grid(image, n=4):
 52 |     return vutils.make_grid(image[:n], nrow=n).cpu().detach().numpy()
 53 | 
 54 | 
 55 | def count_parameters(model):
 56 |     return sum(p.numel() for p in model.parameters() if p.requires_grad)
 57 | 
 58 | 
 59 | def slice_tensor(input, indices):
 60 |     ret = {}
 61 |     for k, v in input.items():
 62 |         ret[k] = v[indices]
 63 |     return ret
 64 | 
 65 | 
 66 | def average_gradients(model):
 67 |     size = float(dist.get_world_size())
 68 |     for p in model.parameters():
 69 |         if p.grad is not None:
 70 |             dist.all_reduce(p.grad.data, op=dist.ReduceOp.SUM)
 71 |             p.grad.data /= size
 72 | 
 73 | 
 74 | def ensure_shared_grads(model, shared_model):
 75 |     """for A3C"""
 76 |     for param, shared_param in zip(model.parameters(), shared_model.parameters()):
 77 |         if shared_param.grad is not None:
 78 |             return
 79 |         shared_param._grad = param.grad
 80 | 
 81 | 
 82 | def compute_gradient_norm(model):
 83 |     grad_norm = 0
 84 |     for p in model.parameters():
 85 |         if p.grad is not None:
 86 |             grad_norm += (p.grad.data ** 2).sum().item()
 87 |     return grad_norm
 88 | 
 89 | 
 90 | def compute_weight_norm(model):
 91 |     weight_norm = 0
 92 |     for p in model.parameters():
 93 |         if p.data is not None:
 94 |             weight_norm += (p.data ** 2).sum().item()
 95 |     return weight_norm
 96 | 
 97 | 
 98 | def compute_weight_sum(model):
 99 |     weight_sum = 0
100 |     for p in model.parameters():
101 |         if p.data is not None:
102 |             weight_sum += p.data.abs().sum().item()
103 |     return weight_sum
104 | 
105 | 
106 | # sync_networks across the different cores
107 | def sync_networks(network):
108 |     """
109 |     network is the network you want to sync
110 |     """
111 |     comm = MPI.COMM_WORLD
112 |     flat_params, params_shape = _get_flat_params(network)
113 |     comm.Bcast(flat_params, root=0)
114 |     # set the flat params back to the network
115 |     _set_flat_params(network, params_shape, flat_params)
116 | 
117 | 
118 | # get the flat params from the network
119 | def _get_flat_params(network):
120 |     param_shape = {}
121 |     flat_params = None
122 |     for key_name, value in network.named_parameters():
123 |         param_shape[key_name] = value.cpu().detach().numpy().shape
124 |         if flat_params is None:
125 |             flat_params = value.cpu().detach().numpy().flatten()
126 |         else:
127 |             flat_params = np.append(flat_params, value.cpu().detach().numpy().flatten())
128 |     return flat_params, param_shape
129 | 
130 | 
131 | # set the params from the network
132 | def _set_flat_params(network, params_shape, params):
133 |     pointer = 0
134 |     if hasattr(network, '_config'):
135 |         device = network._config.device
136 |     else:
137 |         device = torch.device("cpu")
138 | 
139 |     for key_name, values in network.named_parameters():
140 |         # get the length of the parameters
141 |         len_param = np.prod(params_shape[key_name])
142 |         copy_params = params[pointer:pointer + len_param].reshape(params_shape[key_name])
143 |         copy_params = torch.tensor(copy_params).to(device)
144 |         # copy the params
145 |         values.data.copy_(copy_params.data)
146 |         # update the pointer
147 |         pointer += len_param
148 | 
149 | 
150 | # sync gradients across the different cores
151 | def sync_grads(network):
152 |     flat_grads, grads_shape = _get_flat_grads(network)
153 |     comm = MPI.COMM_WORLD
154 |     global_grads = np.zeros_like(flat_grads)
155 |     comm.Allreduce(flat_grads, global_grads, op=MPI.SUM)
156 |     _set_flat_grads(network, grads_shape, global_grads)
157 | 
158 | 
159 | def _set_flat_grads(network, grads_shape, flat_grads):
160 |     pointer = 0
161 |     if hasattr(network, '_config'):
162 |         device = network._config.device
163 |     else:
164 |         device = torch.device("cpu")
165 | 
166 |     for key_name, value in network.named_parameters():
167 |         len_grads = np.prod(grads_shape[key_name])
168 |         copy_grads = flat_grads[pointer:pointer + len_grads].reshape(grads_shape[key_name])
169 |         copy_grads = torch.tensor(copy_grads).to(device)
170 |         # copy the grads
171 |         value.grad.data.copy_(copy_grads.data)
172 |         pointer += len_grads
173 | 
174 | 
175 | def _get_flat_grads(network):
176 |     grads_shape = {}
177 |     flat_grads = None
178 |     for key_name, value in network.named_parameters():
179 |         try:
180 |             grads_shape[key_name] = value.grad.data.cpu().numpy().shape
181 |         except:
182 |             print('Cannot get grad of tensor {}'.format(key_name))
183 |             import pdb; pdb.set_trace()
184 |         if flat_grads is None:
185 |             flat_grads = value.grad.data.cpu().numpy().flatten()
186 |         else:
187 |             flat_grads = np.append(flat_grads, value.grad.data.cpu().numpy().flatten())
188 |     return flat_grads, grads_shape
189 | 
190 | 
191 | def fig2tensor(draw_func):
192 |     def decorate(*args, **kwargs):
193 |         tmp = io.BytesIO()
194 |         fig = draw_func(*args, **kwargs)
195 |         fig.savefig(tmp, dpi=88)
196 |         tmp.seek(0)
197 |         fig.clf()
198 |         return TF.to_tensor(PIL.Image.open(tmp))
199 | 
200 |     return decorate
201 | 
202 | 
203 | def tensor2np(t):
204 |     if isinstance(t, torch.Tensor):
205 |         return t.clone().detach().cpu().numpy()
206 |     else:
207 |         return t
208 | 
209 | 
210 | def tensor2img(tensor):
211 |     if len(tensor.shape) == 4:
212 |         assert tensor.shape[0] == 1
213 |         tensor = tensor.squeeze(0)
214 |     img = tensor.permute(1, 2, 0).detach().cpu().numpy()
215 |     import cv2
216 |     cv2.imwrite('tensor.png', img)
217 | 
218 | 
219 | def obs2tensor(obs, device):
220 |     if isinstance(obs, list):
221 |         obs = list2dict(obs)
222 | 
223 |     return OrderedDict([(k, torch.tensor(np.stack(v), dtype=torch.float32).to(device)) for k, v in obs.items()])
224 | 
225 | 
226 | # transfer a numpy array/list into a tensor
227 | def to_tensor(x, device):
228 |     if isinstance(x, dict):
229 |         return OrderedDict([(k, torch.tensor(v, dtype=torch.float32).to(device)) for k, v in x.items()])
230 | 
231 |     if isinstance(x, list):
232 |         return [torch.tensor(v, dtype=torch.float32).to(device) for v in x]
233 | 
234 |     return torch.tensor(x, dtype=torch.float32).to(device)
235 | 
236 | 
237 | def list2dict(rollout):
238 |     ret = OrderedDict()
239 |     for k in rollout[0].keys():
240 |         ret[k] = []
241 |     for transition in rollout:
242 |         for k, v in transition.items():
243 |             ret[k].append(v)
244 |     return ret
245 | 
246 | 
247 | # From softlearning repo
248 | def flatten(unflattened, parent_key='', separator='/'):
249 |     items = []
250 |     for k, v in unflattened.items():
251 |         if separator in k:
252 |             raise ValueError(
253 |                 "Found separator ({}) from key ({})".format(separator, k))
254 | 
255 |         new_key = parent_key + separator + k if parent_key else k
256 | 
257 |         if isinstance(v, collections.MutableMapping) and v:
258 |             items.extend(flatten(v, new_key, separator=separator).items())
259 |         else:
260 |             items.append((new_key, v))
261 | 
262 |     return OrderedDict(items)
263 | 
264 | 
265 | # From softlearning repo
266 | def unflatten(flattened, separator='.'):
267 |     result = {}
268 |     for key, value in flattened.items():
269 |         parts = key.split(separator)
270 |         d = result
271 |         for part in parts[:-1]:
272 |             if part not in d:
273 |                 d[part] = {}
274 |             d = d[part]
275 |         d[parts[-1]] = value
276 | 
277 |     return result
278 | 


--------------------------------------------------------------------------------
/panda/rl/sac_agent.py:
--------------------------------------------------------------------------------
  1 | # SAC training code reference
  2 | # https://github.com/vitchyr/rlkit/blob/master/rlkit/torch/sac/sac.py
  3 | 
  4 | from collections import OrderedDict
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.optim as optim
 10 | from rl.dataset import ReplayBuffer, RandomSampler
 11 | from rl.base_agent import BaseAgent
 12 | from utils.logger import logger
 13 | from utils.mpi import mpi_average
 14 | from utils.pytorch import optimizer_cuda, count_parameters, compute_gradient_norm, compute_weight_norm, sync_networks, sync_grads, to_tensor
 15 | 
 16 | 
 17 | class SACAgent(BaseAgent):
 18 |     def __init__(self, config, ob_space, ac_space, actor, critic):
 19 |         super().__init__(config, ob_space)
 20 | 
 21 |         self._ob_space = ob_space
 22 |         self._ac_space = ac_space
 23 | 
 24 |         self._target_entropy = -ac_space.size
 25 |         self._log_alpha = torch.zeros(1, requires_grad=True, device=config.device)
 26 |         self._alpha_optim = optim.Adam([self._log_alpha], lr=config.lr_actor)
 27 | 
 28 |         # build up networks
 29 |         self._build_actor(actor)
 30 |         self._critic1 = critic(config, ob_space, ac_space)
 31 |         self._critic2 = critic(config, ob_space, ac_space)
 32 | 
 33 |         # build up target networks
 34 |         self._critic1_target = critic(config, ob_space, ac_space)
 35 |         self._critic2_target = critic(config, ob_space, ac_space)
 36 |         self._critic1_target.load_state_dict(self._critic1.state_dict())
 37 |         self._critic2_target.load_state_dict(self._critic2.state_dict())
 38 |         self._network_cuda(config.device)
 39 | 
 40 |         self._actor_optim = optim.Adam(self._actor.parameters(), lr=config.lr_actor)
 41 |         self._critic1_optim = optim.Adam(self._critic1.parameters(), lr=config.lr_critic)
 42 |         self._critic2_optim = optim.Adam(self._critic2.parameters(), lr=config.lr_critic)
 43 | 
 44 |         sampler = RandomSampler()
 45 |         buffer_keys = ['ob', 'ac', 'done', 'rew']
 46 |         self._buffer = ReplayBuffer(buffer_keys,
 47 |                                     config.buffer_size,
 48 |                                     sampler.sample_func)
 49 | 
 50 |         self._log_creation()
 51 | 
 52 |     def _log_creation(self):
 53 |         if self._config.is_chef:
 54 |             logger.info('Creating a SAC agent')
 55 |             logger.info('The actor has %d parameters', count_parameters(self._actor))
 56 |             logger.info('The critic1 has %d parameters', count_parameters(self._critic1))
 57 |             logger.info('The critic2 has %d parameters', count_parameters(self._critic2))
 58 | 
 59 |     def _build_actor(self, actor):
 60 |         self._actor = actor(self._config, self._ob_space, self._ac_space, self._config.tanh_policy)
 61 | 
 62 |     def store_episode(self, rollouts):
 63 |         self._buffer.store_episode(rollouts)
 64 | 
 65 |     def state_dict(self):
 66 |         return {
 67 |             'log_alpha': self._log_alpha.cpu().detach().numpy(),
 68 |             'actor_state_dict': self._actor.state_dict(),
 69 |             'critic1_state_dict': self._critic1.state_dict(),
 70 |             'critic2_state_dict': self._critic2.state_dict(),
 71 |             'alpha_optim_state_dict': self._alpha_optim.state_dict(),
 72 |             'actor_optim_state_dict': self._actor_optim.state_dict(),
 73 |             'critic1_optim_state_dict': self._critic1_optim.state_dict(),
 74 |             'critic2_optim_state_dict': self._critic2_optim.state_dict(),
 75 |             'ob_norm_state_dict': self._ob_norm.state_dict(),
 76 |         }
 77 | 
 78 |     def load_state_dict(self, ckpt):
 79 |         self._log_alpha.data = torch.tensor(ckpt['log_alpha'], requires_grad=True,
 80 |                                             device=self._config.device)
 81 |         self._actor.load_state_dict(ckpt['actor_state_dict'])
 82 |         self._critic1.load_state_dict(ckpt['critic1_state_dict'])
 83 |         self._critic2.load_state_dict(ckpt['critic2_state_dict'])
 84 |         self._critic1_target.load_state_dict(self._critic1.state_dict())
 85 |         self._critic2_target.load_state_dict(self._critic2.state_dict())
 86 |         self._ob_norm.load_state_dict(ckpt['ob_norm_state_dict'])
 87 |         self._network_cuda(self._config.device)
 88 | 
 89 |         self._alpha_optim.load_state_dict(ckpt['alpha_optim_state_dict'])
 90 |         self._actor_optim.load_state_dict(ckpt['actor_optim_state_dict'])
 91 |         self._critic1_optim.load_state_dict(ckpt['critic1_optim_state_dict'])
 92 |         self._critic2_optim.load_state_dict(ckpt['critic2_optim_state_dict'])
 93 |         optimizer_cuda(self._alpha_optim, self._config.device)
 94 |         optimizer_cuda(self._actor_optim, self._config.device)
 95 |         optimizer_cuda(self._critic1_optim, self._config.device)
 96 |         optimizer_cuda(self._critic2_optim, self._config.device)
 97 | 
 98 |     def _network_cuda(self, device):
 99 |         self._actor.to(device)
100 |         self._critic1.to(device)
101 |         self._critic2.to(device)
102 |         self._critic1_target.to(device)
103 |         self._critic2_target.to(device)
104 | 
105 |     def sync_networks(self):
106 |         sync_networks(self._actor)
107 |         sync_networks(self._critic1)
108 |         sync_networks(self._critic2)
109 | 
110 |     def train(self):
111 |         for _ in range(self._config.num_batches):
112 |             transitions = self._buffer.sample(self._config.batch_size)
113 |             train_info = self._update_network(transitions)
114 |             self._soft_update_target_network(self._critic1_target, self._critic1, self._config.polyak)
115 |             self._soft_update_target_network(self._critic2_target, self._critic2, self._config.polyak)
116 | 
117 |         train_info.update({
118 |             'actor_grad_norm': np.mean(compute_gradient_norm(self._actor)),
119 |             'actor_weight_norm': np.mean(compute_weight_norm(self._actor)),
120 |             'critic1_grad_norm': compute_gradient_norm(self._critic1),
121 |             'critic2_grad_norm': compute_gradient_norm(self._critic2),
122 |             'critic1_weight_norm': compute_weight_norm(self._critic1),
123 |             'critic2_weight_norm': compute_weight_norm(self._critic2),
124 |         })
125 |         return train_info
126 | 
127 |     def act_log(self, ob):
128 |         return self._actor.act_log(ob)
129 | 
130 |     def _update_network(self, transitions):
131 |         info = {}
132 | 
133 |         # pre-process observations
134 |         o, o_next = transitions['ob'], transitions['ob_next']
135 |         o = self.normalize(o)
136 |         o_next = self.normalize(o_next)
137 | 
138 |         bs = len(transitions['done'])
139 |         _to_tensor = lambda x: to_tensor(x, self._config.device)
140 |         o = _to_tensor(o)
141 |         o_next = _to_tensor(o_next)
142 |         ac = _to_tensor(transitions['ac'])
143 |         done = _to_tensor(transitions['done']).reshape(bs, 1)
144 |         rew = _to_tensor(transitions['rew']).reshape(bs, 1)
145 | 
146 |         # update alpha
147 |         actions_real, log_pi = self.act_log(o)
148 |         alpha_loss = -(self._log_alpha * (log_pi + self._target_entropy).detach()).mean()
149 |         self._alpha_optim.zero_grad()
150 |         alpha_loss.backward()
151 |         self._alpha_optim.step()
152 |         alpha = self._log_alpha.exp()
153 | 
154 |         # the actor loss
155 |         entropy_loss = (alpha * log_pi).mean()
156 |         actor_loss = -torch.min(self._critic1(o, actions_real),
157 |                                 self._critic2(o, actions_real)).mean()
158 |         info['entropy_alpha'] = alpha.cpu().item()
159 |         info['entropy_loss'] = entropy_loss.cpu().item()
160 |         info['actor_loss'] = actor_loss.cpu().item()
161 |         actor_loss += entropy_loss
162 | 
163 |         # calculate the target Q value function
164 |         with torch.no_grad():
165 |             actions_next, log_pi_next = self.act_log(o_next)
166 |             q_next_value1 = self._critic1_target(o_next, actions_next)
167 |             q_next_value2 = self._critic2_target(o_next, actions_next)
168 |             q_next_value = torch.min(q_next_value1, q_next_value2) - alpha * log_pi_next
169 |             target_q_value = rew * self._config.reward_scale + (1 - done) * self._config.discount_factor * q_next_value
170 |             target_q_value = target_q_value.detach()
171 |             ## clip the q value
172 |             clip_return = 10 / (1 - self._config.discount_factor)
173 |             target_q_value = torch.clamp(target_q_value, -clip_return, clip_return)
174 | 
175 |         # the q loss
176 |         real_q_value1 = self._critic1(o, ac)
177 |         real_q_value2 = self._critic2(o, ac)
178 |         critic1_loss = 0.5 * (target_q_value - real_q_value1).pow(2).mean()
179 |         critic2_loss = 0.5 * (target_q_value - real_q_value2).pow(2).mean()
180 | 
181 |         info['min_target_q'] = target_q_value.min().cpu().item()
182 |         info['target_q'] = target_q_value.mean().cpu().item()
183 |         info['min_real1_q'] = real_q_value1.min().cpu().item()
184 |         info['min_real2_q'] = real_q_value2.min().cpu().item()
185 |         info['real1_q'] = real_q_value1.mean().cpu().item()
186 |         info['real2_q'] = real_q_value2.mean().cpu().item()
187 |         info['critic1_loss'] = critic1_loss.cpu().item()
188 |         info['critic2_loss'] = critic2_loss.cpu().item()
189 | 
190 |         # update the actor
191 |         self._actor_optim.zero_grad()
192 |         actor_loss.backward()
193 |         #torch.nn.utils.clip_grad_norm_(self._actor.parameters(), self._config.max_grad_norm)
194 |         sync_grads(self._actor)
195 |         self._actor_optim.step()
196 | 
197 |         # update the critic
198 |         self._critic1_optim.zero_grad()
199 |         critic1_loss.backward()
200 |         #torch.nn.utils.clip_grad_norm_(self._critic1.parameters(), self._config.max_grad_norm)
201 |         sync_grads(self._critic1)
202 |         self._critic1_optim.step()
203 | 
204 |         self._critic2_optim.zero_grad()
205 |         critic2_loss.backward()
206 |         #torch.nn.utils.clip_grad_norm_(self._critic2.parameters(), self._config.max_grad_norm)
207 |         sync_grads(self._critic2)
208 |         self._critic2_optim.step()
209 | 
210 |         # include info from policy
211 |         info.update(self._actor.info)
212 |         return mpi_average(info)
213 | 


--------------------------------------------------------------------------------
/panda/environments/panda_grasping.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import random
  3 | import numpy as np
  4 | 
  5 | import utils.transform_utils as T
  6 | from utils.mjcf_utils import string_to_array
  7 | from utils.transform_utils import convert_quat
  8 | from environments.panda import PandaEnv
  9 | 
 10 | from models.arena import TableArena
 11 | from models.objects import CubeObject, BasePartObject, CylObject, Cyl2Object
 12 | from models.robot import Panda
 13 | from models.task import GraspingTask
 14 | 
 15 | import hjson
 16 | import os
 17 | 
 18 | 
 19 | class PandaGrasp(PandaEnv):
 20 |     """
 21 |     This class corresponds to the grasping task for the 🐼️ robot arm.
 22 |     """
 23 | 
 24 |     def __init__(self, config):
 25 |         """
 26 |         Args:
 27 |             config = configuration file of environment and task parameters
 28 |         """
 29 | 
 30 |         # settings for table top
 31 |         self.config = config
 32 |         self.table_full_size = config.table_full_size
 33 | 
 34 |         # Load the controller parameter configuration files
 35 |         controller_filepath = os.path.join(os.path.dirname(__file__), '..','config/controller_config.hjson')
 36 |         super().__init__(config, controller_config_file=controller_filepath)
 37 | 
 38 | 
 39 |     def _load_model(self):
 40 |         super()._load_model()
 41 |         # set the robot base pos in world ref frame; World frame is right at the center of floor
 42 |         self.mujoco_robot.set_base_xpos([-0.5, 0, 0.913])
 43 | 
 44 |         # load model for table top workspace
 45 |         self.mujoco_arena = TableArena(table_full_size=self.table_full_size)
 46 |         self.mujoco_arena.set_origin([0, 0, 0])
 47 | 
 48 |         # define mujoco objects
 49 |         Cube = CubeObject()
 50 |         Base = BasePartObject() # This object has no function in learning its just for viewing purpose
 51 | 
 52 |         self.mujoco_objects = OrderedDict([("BasePart", Base), ("Cube", Cube)])
 53 | 
 54 |         # For collision avoidance scenario
 55 |         if self.config.mode == 2:
 56 |             Cylinder = CylObject()
 57 |             Cylinder2 = Cyl2Object()
 58 |             self.mujoco_objects = OrderedDict([("BasePart", Base), ("Cube", Cube), ("Cylinder", Cylinder), ("Cylinder2", Cylinder2)])
 59 | 
 60 |         # task includes arena, robot, and objects of interest
 61 |         self.model = GraspingTask(self.mujoco_arena, self.mujoco_robot, self.mujoco_objects)
 62 |         self.table_pos = string_to_array(self.model.table_body.get("pos"))
 63 | 
 64 | 
 65 |     def _get_reference(self):
 66 |         super()._get_reference()
 67 | 
 68 |         self.cube_body_id = self.sim.model.body_name2id("Cube") # 'cube': 23
 69 |         self.cube_geom_id = self.sim.model.geom_name2id("Cube") # 'cube': 41
 70 | 
 71 |         #information of objects
 72 |         self.object_names = list(self.mujoco_objects.keys())
 73 |         self.object_site_ids = [self.sim.model.site_name2id(ob_name) for ob_name in self.object_names]
 74 | 
 75 |         # clutter objects ids
 76 |         if self.config.mode == 2:
 77 |             self.cyl_geom_id = self.sim.model.geom_name2id("Cylinder")
 78 |             self.cyl2_geom_id = self.sim.model.geom_name2id("Cylinder2")
 79 | 
 80 |         # id of grippers for contact checking
 81 |         # ['hand_collision', 'finger1_collision', 'finger2_collision', 'finger1_tip_collision', 'finger2_tip_collision']
 82 |         if self.has_gripper:
 83 |             self.finger_names = self.gripper.contact_geoms()
 84 |             self.l_finger_geom_ids = [self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms] # finger1_visual': 34
 85 |             self.r_finger_geom_ids = [self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms] # 'finger2_visual': 37
 86 | 
 87 |         # self.sim.data.contact # list, geom1, geom2
 88 |         self.collision_check_geom_names = self.sim.model._geom_name2id.keys() # {...,'hand_collision': 33 ,...,'BasePart': 40, 'cube': 41}
 89 |         self.collision_check_geom_ids = [self.sim.model._geom_name2id[k] for k in self.collision_check_geom_names]
 90 | 
 91 | 
 92 |     def _reset_internal(self):
 93 |         """
 94 |         Resets simulation internal configurations.
 95 |         """
 96 |         super()._reset_internal()
 97 | 
 98 |         init_pos = np.array([0, np.pi / 16.0, 0.00, -np.pi / 2.0 - np.pi / 3.0, 0.00, np.pi - 0.2, np.pi / 4])
 99 | 
100 |         # Uncomment below line for adding randomization to initial position of robot position
101 |         #init_pos += np.random.randn(init_pos.shape[0]) * 0.02
102 | 
103 |         self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos)
104 | 
105 |         # reset position of objects on table top
106 | 
107 |         # Cube
108 |         self.sim.data.qpos[16:19] = np.array([0 ,-0.1 ,0.85])
109 |         self.sim.data.qpos[19:23] = np.array([1 ,0 ,0, 0])
110 | 
111 |         # Random Base Part
112 |         self.sim.data.qpos[9:12] = np.array([0 ,0.1 ,0.82])
113 |         self.sim.data.qpos[12:16] = np.array([1 ,0 ,0, 0])
114 | 
115 |         if self.config.mode == 2:
116 |             # reset cylinders pos
117 |             self.sim.data.qpos[23:26] = np.array([0 ,0.03 ,0.9])
118 |             self.sim.data.qpos[26:30] = np.array([1 ,0 ,0, 0])
119 | 
120 |             self.sim.data.qpos[30:33] = np.array([0 ,-0.23 ,0.9])
121 |             self.sim.data.qpos[33:37] = np.array([1 ,0 ,0, 0])
122 | 
123 |         # reset the phase and grasp state
124 |         self.phase = 0
125 |         self.has_grasp = False
126 | 
127 |     def reward(self, action=None):
128 |         """
129 |         Reward function for the task.
130 |         Returns:
131 |             reward (float): the reward
132 |         """
133 |         # reaching reward
134 |         cube_pos = self.sim.data.body_xpos[self.cube_body_id]
135 |         gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id]
136 |         dist = np.linalg.norm(gripper_site_pos - cube_pos) # Eucledian distance b/w cube and grip site
137 |         reaching_reward = 1 - np.tanh(10.0 * dist) # tanh function on distance
138 | 
139 |         # slow down reward
140 |         vel = np.sum(abs(self.ee_v)) / 6
141 |         vel_reward = 1 - np.tanh(10.0 * vel)
142 | 
143 |         # Two phases of the task (0.Reaching and 1.Grasping+Lifiting)
144 |         if self.phase == 0:
145 | 
146 |             reward = 0.6 * reaching_reward
147 | 
148 |             if dist < 0.08:
149 |                 reward += 0.3 * vel_reward
150 | 
151 |             # gripper open reward
152 |             if action[-1] < 0:
153 |                 reward += 0.1 * abs(action[-1])
154 | 
155 |             if dist < 0.025:
156 |                 self.phase = 1
157 | 
158 |         elif self.phase == 1:
159 | 
160 |             reward = reaching_reward + vel_reward
161 | 
162 |             # gripper closing reward
163 |             if action[-1] > 0:
164 |                 reward += 0.5 * action[-1]
165 | 
166 |             # check contact between fingers and cube
167 |             touch_left_finger = False
168 |             touch_right_finger = False
169 | 
170 |             for i in range(self.sim.data.ncon):
171 |                 c = self.sim.data.contact[i]
172 |                 if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cube_geom_id:
173 |                     touch_left_finger = True
174 |                 if c.geom1 == self.cube_geom_id and c.geom2 in self.l_finger_geom_ids:
175 |                     touch_left_finger = True
176 |                 if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cube_geom_id:
177 |                     touch_right_finger = True
178 |                 if c.geom1 == self.cube_geom_id and c.geom2 in self.r_finger_geom_ids:
179 |                     touch_right_finger = True
180 | 
181 |             self.has_grasp = touch_left_finger and touch_right_finger
182 | 
183 |             # grasping reward
184 |             if self.has_grasp:
185 |                 reward += 0.5
186 | 
187 |         # success reward
188 |         if self._check_success():
189 |             reward += 5.0
190 | 
191 |         # stay within joint limits! This is used only in retraining condition
192 |         #if self._check_q_limits():
193 |             #reward -= 1.0
194 | 
195 |         # Collision avoidance scene
196 |         if self.config.mode == 2:
197 |                 # collision penalty
198 |                 collision = False
199 |                 for contact in self.sim.data.contact[:self.sim.data.ncon]:
200 |                     # hand collision check
201 |                     if self.sim.model.geom_id2name(contact.geom1) in self.hand_names and contact.geom2 == self.cyl_geom_id:
202 |                         collision = True
203 |                     if self.sim.model.geom_id2name(contact.geom2) in self.hand_names and contact.geom1 == self.cyl_geom_id:
204 |                         collision = True
205 |                     if self.sim.model.geom_id2name(contact.geom1) in self.hand_names and contact.geom2 == self.cyl2_geom_id:
206 |                         collision = True
207 |                     if self.sim.model.geom_id2name(contact.geom2) in self.hand_names and contact.geom1 == self.cyl2_geom_id:
208 |                         collision = True
209 |                     # cube collision check
210 |                     if contact.geom1 == self.cyl_geom_id and contact.geom2 == self.cube_geom_id:
211 |                         collision = True
212 |                     if contact.geom1 == self.cube_geom_id and contact.geom2 == self.cyl_geom_id:
213 |                         collision = True
214 |                     if contact.geom1 == self.cyl2_geom_id and contact.geom2 == self.cube_geom_id:
215 |                         collision = True
216 |                     if contact.geom1 == self.cube_geom_id and contact.geom2 == self.cyl2_geom_id:
217 |                         collision = True
218 | 
219 |                 if collision:
220 |                     reward -= 5.0
221 | 
222 |         return reward
223 | 
224 |     def _check_success(self):
225 |         """
226 |         Returns True if task has been completed.
227 |         """
228 |         cube_pos = self.sim.data.body_xpos[self.cube_body_id]
229 |         gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id]
230 |         dist = np.linalg.norm(gripper_site_pos - cube_pos)
231 | 
232 |         return self.has_grasp and cube_pos[2] > 0.86
233 | 
234 | 
235 |     def _get_observation(self):
236 |         """
237 |         Returns an OrderedDict containing object observations
238 |         """
239 |         state = super()._get_observation()
240 |         di = OrderedDict()
241 | 
242 |         # low-level object information
243 |         # position and rotation of object
244 |         cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id])
245 |         cube_quat = convert_quat(np.array(self.sim.data.body_xquat[self.cube_body_id]), to="xyzw")
246 |         di["cube_pos"] = cube_pos
247 |         di["cube_quat"] = cube_quat
248 | 
249 |         gripper_site_pos = np.array(self.sim.data.site_xpos[self.eef_site_id])
250 |         # gripper to cube distance
251 |         di["gripper_to_cube"] = gripper_site_pos - cube_pos
252 | 
253 |         state["object-state"] = np.concatenate([cube_pos, cube_quat, di["gripper_to_cube"]])
254 | 
255 |         return state
256 | 


--------------------------------------------------------------------------------
/panda/rl/trainer.py:
--------------------------------------------------------------------------------
  1 | """ Base code for RL training. Collects rollouts and updates policy networks. """
  2 | 
  3 | import os
  4 | from time import time
  5 | from collections import defaultdict, OrderedDict
  6 | import gzip
  7 | import pickle
  8 | import h5py
  9 | import torch
 10 | import wandb
 11 | import numpy as np
 12 | from tqdm import tqdm, trange
 13 | from rl.policies import MlpActor, MlpCritic
 14 | from rl.ppo_agent import PPOAgent
 15 | from rl.sac_agent import SACAgent
 16 | from rl.rollouts import RolloutRunner
 17 | from utils.logger import logger
 18 | from utils.pytorch import get_ckpt_path
 19 | from utils.mpi import mpi_sum
 20 | from environments import make
 21 | 
 22 | class Trainer():
 23 |     """
 24 |     Trainer class for SAC and PPO in PyTorch.
 25 |     """
 26 | 
 27 |     def __init__(self, config):
 28 |         """
 29 |         Initializes class with the configuration.
 30 |         """
 31 |         self._config = config
 32 |         self._is_chef = config.is_chef
 33 | 
 34 |         # create a new environment
 35 |         self._env = make("PandaGrasp", config)
 36 |         ob_space = self._env.observation_space # e.g. OrderedDict([('object-state', [10]), ('robot-state', [36])])
 37 |         ac_space = self._env.action_space # e.g. ActionSpace(shape=OrderedDict([('default', 8)]),minimum=-1.0, maximum=1.0)
 38 |         print('***', ac_space)
 39 | 
 40 |         # get actor and critic networks
 41 |         actor, critic = MlpActor, MlpCritic
 42 | 
 43 |         # build up networks for PPO agent
 44 |         if self._config.algo == 'sac':
 45 |             self._agent = SACAgent(config, ob_space, ac_space, actor, critic)
 46 |         else:
 47 |             self._agent = PPOAgent(config, ob_space, ac_space, actor, critic)
 48 | 
 49 |         # build rollout runner
 50 |         self._runner = RolloutRunner(config, self._env, self._agent)
 51 | 
 52 |         # setup log
 53 |         if self._is_chef and self._config.is_train:
 54 |             exclude = ['device']
 55 |             if not self._config.wandb:
 56 |                 os.environ['WANDB_MODE'] = 'dryrun'
 57 | 
 58 |         # Weights and Biases (wandb) is used for logging, set the account details below or dry run above
 59 |             # user or team name
 60 |             entity = 'panda'
 61 |             # project name
 62 |             project = 'robo'
 63 | 
 64 |             wandb.init(
 65 |                 resume=config.run_name,
 66 |                 project=project,
 67 |                 config={k: v for k, v in config.__dict__.items() if k not in exclude},
 68 |                 dir=config.log_dir,
 69 |                 entity=entity,
 70 |                 notes=config.notes
 71 |             )
 72 | 
 73 |     def _save_ckpt(self, ckpt_num, update_iter):
 74 |         """
 75 |         Save checkpoint to log directory.
 76 | 
 77 |         Args:
 78 |             ckpt_num: number appended to checkpoint name. The number of environment step is used in this code.
 79 |             update_iter: number of policy update. It will be used for resuming training.
 80 |         """
 81 |         ckpt_path = os.path.join(self._config.log_dir, 'ckpt_%08d.pt' % ckpt_num)
 82 |         state_dict = {'step': ckpt_num, 'update_iter': update_iter}
 83 |         state_dict['agent'] = self._agent.state_dict()
 84 |         torch.save(state_dict, ckpt_path)
 85 |         logger.warn('Save checkpoint: %s', ckpt_path)
 86 | 
 87 |         replay_path = os.path.join(self._config.log_dir, 'replay_%08d.pkl' % ckpt_num)
 88 |         with gzip.open(replay_path, 'wb') as f:
 89 |             replay_buffers = {'replay': self._agent.replay_buffer()}
 90 |             pickle.dump(replay_buffers, f)
 91 | 
 92 |     def _load_ckpt(self, ckpt_num=None):
 93 |         """
 94 |         Loads checkpoint with index number @ckpt_num. If @ckpt_num is None,
 95 |         it loads and returns the checkpoint with the largest index number.
 96 |         """
 97 |         ckpt_path, ckpt_num = get_ckpt_path(self._config.log_dir, ckpt_num)
 98 | 
 99 |         if ckpt_path is not None:
100 |             logger.warn('Load checkpoint %s', ckpt_path)
101 |             ckpt = torch.load(ckpt_path) # ckpt is a dict with keys (step, update_iter, agent)
102 |             self._agent.load_state_dict(ckpt['agent'])
103 | 
104 |             if self._config.is_train:
105 |                 replay_path = os.path.join(self._config.log_dir, 'replay_%08d.pkl' % ckpt_num)
106 |                 logger.warn('Load replay_buffer %s', replay_path)
107 |                 with gzip.open(replay_path, 'rb') as f:
108 |                     replay_buffers = pickle.load(f)
109 |                     self._agent.load_replay_buffer(replay_buffers['replay'])
110 | 
111 |             return ckpt['step'], ckpt['update_iter']
112 |         else:
113 |             logger.warn('Randomly initialize models')
114 |             return 0, 0
115 | 
116 |     def _log_ep(self, step, ep_info):
117 |         """
118 |         Logs episode information to wandb.
119 |         Args:
120 |             step: the number of environment steps.
121 |             ep_info: episode information to log, such as reward, episode time.
122 |         """
123 |         for k, v in ep_info.items():
124 |             wandb.log({'train_ep/%s' % k: np.mean(v)}, step=step)
125 |             wandb.log({'train_ep_max/%s' % k: np.max(v)}, step=step)
126 | 
127 |     def _log_train(self, step, train_info):
128 |         """
129 |         Logs training information to wandb.
130 |         Args:
131 |             step: the number of environment steps.
132 |             train_info: training information to log, such as loss, gradient.
133 |         """
134 |         for k, v in train_info.items():
135 |             if np.isscalar(v) or (hasattr(v, 'shape') and np.prod(v.shape) == 1):
136 |                 wandb.log({'train_rl/%s' % k: v}, step=step)
137 |             else:
138 |                 wandb.log({'train_rl/%s' % k: [wandb.Image(v)]}, step=step)
139 | 
140 |     def _log_test(self, step, ep_info):
141 |         """
142 |         Logs episode information during testing to wandb.
143 |         Args:
144 |             step: the number of environment steps.
145 |             ep_info: episode information to log, such as reward, episode time.
146 |         """
147 |         if self._config.is_train:
148 |             for k, v in ep_info.items():
149 |                 wandb.log({'test_ep/%s' % k: np.mean(v)}, step=step)
150 | 
151 |     def train(self):
152 |         """ Trains an agent. """
153 |         config = self._config
154 |         num_batches = config.num_batches
155 | 
156 |         # load checkpoint
157 |         step, update_iter = self._load_ckpt()
158 | 
159 |         # sync the networks across the cpus
160 |         self._agent.sync_networks()
161 | 
162 |         logger.info("Start training at step=%d", step)
163 |         if self._is_chef:
164 |             pbar = tqdm(initial=step, total=config.max_global_step, desc=config.run_name)
165 |             ep_info = defaultdict(list)
166 | 
167 |         # decide how many episodes or how long rollout to collect
168 |         if self._config.algo == 'sac':
169 |             run_ep_max = 1
170 |             run_step_max = 600
171 |         else:
172 |             run_step_max = self._config.rollout_length
173 |             run_ep_max = 3
174 | 
175 |         # dummy run for preventing weird error in a cold run
176 |         self._runner.run_episode()
177 | 
178 |         st_time = time()
179 |         st_step = step
180 |         log_step = 0
181 |         while step < config.max_global_step:
182 |             # collect rollouts
183 |             run_ep = 0
184 |             run_step = 0
185 |             while run_step < run_step_max and run_ep < run_ep_max:
186 |                 # return one episode and info{len:, rew:, qpos:[], success:} about that episode
187 |                 rollout, info = self._runner.run_episode()
188 |                 run_step += info['len']
189 |                 run_ep += 1
190 |                 log_step += info['len']
191 | 
192 |                 for k, v in info.items():
193 |                     if isinstance(v, list):
194 |                         ep_info[k].extend(v)
195 |                     else:
196 |                         ep_info[k].append(v)
197 | 
198 |                 self._log_ep(log_step, ep_info)
199 |                 ep_info = defaultdict(list)
200 |                 logger.info('rollout: %s', {k: v for k, v in info.items() if not 'qpos' in k})
201 |                 self._agent.store_episode(rollout)
202 |                 self._update_normalizer(rollout)
203 | 
204 |             step_per_batch = mpi_sum(run_step)
205 | 
206 |             # train an agent
207 |             logger.info('Update networks %d', update_iter)
208 |             train_info = self._agent.train()
209 |             logger.info('Update networks done')
210 | 
211 |             step += step_per_batch
212 |             update_iter += 1
213 | 
214 |             # log training and episode information or evaluate
215 |             if self._is_chef:
216 |                 pbar.update(step_per_batch)
217 |                 if update_iter % config.log_interval == 0:
218 |                     train_info.update({
219 |                         'sec': (time() - st_time) / config.log_interval,
220 |                         'steps_per_sec': (step - st_step) / (time() - st_time),
221 |                         'update_iter': update_iter
222 |                     })
223 |                     st_time = time()
224 |                     st_step = step
225 |                     self._log_train(step, train_info)
226 | 
227 |                 if update_iter % config.evaluate_interval == 1:
228 |                     logger.info('Evaluate at %d', update_iter)
229 |                     rollout, info = self._evaluate(step=step)##
230 |                     self._log_test(step, info)
231 | 
232 |                 if update_iter % config.ckpt_interval == 0:
233 |                     self._save_ckpt(step, update_iter)
234 | 
235 |         logger.info('Reached %s steps. worker %d stopped.', step, config.rank)
236 | 
237 |     def _update_normalizer(self, rollout):
238 |         """ Updates normalizer with @rollout. """
239 |         if self._config.ob_norm:
240 |             self._agent.update_normalizer(rollout['ob'])
241 | 
242 |     def _evaluate(self, step=None, idx=None):
243 |         """
244 |         Runs one rollout if in eval mode (@idx is not None).
245 |         Runs num_record_samples rollouts if in train mode (@idx is None).
246 | 
247 |         Args:
248 |             step: the number of environment steps.
249 |         """
250 |         for i in range(self._config.num_record_samples):
251 |             rollout, info = self._runner.run_episode(is_train=False)
252 | 
253 |             if idx is not None:
254 |                 break
255 |         logger.info('rollout: %s', {k: v for k, v in info.items() if not 'qpos' in k})
256 |         return rollout, info
257 | 
258 |     def evaluate(self):
259 |         """ Evaluates an agent stored in chekpoint with @self._config.ckpt_num. """
260 | 
261 |         step, update_iter = self._load_ckpt(ckpt_num=self._config.ckpt_num)
262 |         logger.info('Run %d evaluations at step=%d, update_iter=%d', self._config.num_eval, step, update_iter)
263 | 
264 |         for i in trange(self._config.num_eval):
265 |             logger.warn("Evalute run %d", i+1)
266 |             rollout, info = self._evaluate(step=step, idx=i)
267 | 


--------------------------------------------------------------------------------
/panda/environments/panda.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import numpy as np
  3 | import utils.transform_utils as T
  4 | from environments import MujocoEnv
  5 | from models.gripper import PandaGripper
  6 | from models.robot import Panda
  7 | from environments.action_space import ActionSpace
  8 | from controller.arm_controller import *
  9 | from collections import deque
 10 | import hjson
 11 | 
 12 | 
 13 | class PandaEnv(MujocoEnv):
 14 |     """Initializes a Panda robot environment."""
 15 | 
 16 |     def __init__(self, config, controller_config_file):
 17 |         """
 18 |         Args:
 19 |             All configuratin parameters are defined in config file
 20 | 
 21 |             controller_config_file (str): filepath to the corresponding controller config file that contains the
 22 |             associated controller parameters
 23 |         """
 24 |         self.control_freq = config.control_freq
 25 |         self.has_gripper = config.gripper_type is not None
 26 |         self.gripper_visualization = config.gripper_visualization
 27 | 
 28 |         # Load the appropriate controller
 29 |         self._load_controller('joint_velocity', controller_config_file)
 30 |         super().__init__(config)
 31 | 
 32 |     def _load_controller(self, controller_type, controller_file):
 33 |         """
 34 |         Loads controller to be used for dynamic trajectories
 35 |         Controller_type is a specified controller, and controller_params is a config file containing the appropriate
 36 |         parameters for that controller
 37 |         """
 38 |         # Load the controller config file
 39 |         try:
 40 |             with open(controller_file) as f:
 41 |                 params = hjson.load(f)
 42 |         except FileNotFoundError:
 43 |             print("Controller config file '{}' not found. Please check filepath and try again.".format(controller_file))
 44 | 
 45 |         controller_params = params[controller_type]
 46 |         self.controller = JointVelocityController(**controller_params)
 47 | 
 48 |     def _load_model(self):
 49 |         """
 50 |         Loads robot and add gripper.
 51 |         """
 52 |         super()._load_model()
 53 |         # Use xml that has motor torque actuators enabled
 54 |         self.mujoco_robot = Panda(xml_path="robot/panda/robot_torque.xml")
 55 | 
 56 |         if self.has_gripper:
 57 |             self.gripper = PandaGripper()
 58 |             if not self.gripper_visualization:
 59 |                 self.gripper.hide_visualization()
 60 |             self.mujoco_robot.add_gripper("hand", self.gripper)
 61 | 
 62 |     def _reset_internal(self):
 63 |         """
 64 |         Sets initial pose of arm and grippers.
 65 |         """
 66 |         super()._reset_internal()
 67 |         self.sim.data.qpos[self._ref_joint_pos_indexes] = self.mujoco_robot.init_qpos
 68 |         self.sim.data.qvel[self._ref_joint_vel_indexes] = np.zeros(len(self._ref_joint_vel_indexes))
 69 |         if self.has_gripper:
 70 |             self.sim.data.qpos[self._ref_gripper_joint_pos_indexes] = self.gripper.init_qpos
 71 |             self.sim.data.qvel[self._ref_gripper_joint_vel_indexes] = np.zeros(len(self._ref_gripper_joint_vel_indexes))
 72 | 
 73 | 
 74 |         self.controller.reset()
 75 |         self.total_joint_torque = 0
 76 |         self.joint_torques = 0
 77 |         self.ee_v = np.zeros(6)
 78 | 
 79 |     def _get_reference(self):
 80 |         """
 81 |         Sets up necessary reference for robot, gripper, and objects.
 82 |         """
 83 |         super()._get_reference()
 84 |         # indices for joints in qpos, qvel
 85 |         self.robot_joints = list(self.mujoco_robot.joints) # ['joint1', 'joint2', 'joint3', 'joint4', 'joint5', 'joint6', 'joint7']
 86 |         self._ref_joint_pos_indexes = [self.sim.model.get_joint_qpos_addr(x) for x in self.robot_joints] # [0, 1, 2, 3, 4, 5, 6]
 87 |         self._ref_joint_vel_indexes = [self.sim.model.get_joint_qvel_addr(x) for x in self.robot_joints] # [0, 1, 2, 3, 4, 5, 6]
 88 | 
 89 |         # indices for grippers in qpos, qvel
 90 |         if self.has_gripper:
 91 |             self.gripper_joints = list(self.gripper.joints) # ['finger_joint1', 'finger_joint2']
 92 |             self._ref_gripper_joint_pos_indexes = [self.sim.model.get_joint_qpos_addr(x) for x in self.gripper_joints] # [7, 8]
 93 |             self._ref_gripper_joint_vel_indexes = [self.sim.model.get_joint_qvel_addr(x) for x in self.gripper_joints] # [7, 8]
 94 |             self._ref_joint_gripper_actuator_indexes = [self.sim.model.actuator_name2id(actuator) for actuator in self.sim.model.actuator_names
 95 |                 if actuator.startswith("gripper")] # [7, 8]
 96 |             # IDs of sites for gripper visualization
 97 |             self.eef_site_id = self.sim.model.site_name2id("grip_site") # 5
 98 | 
 99 | 
100 |     def _pre_action(self, action, policy_step):
101 |         """
102 |         Overrides the superclass method to actuate the robot with the
103 |         passed joint velocities and gripper control.
104 | 
105 |         Args:
106 |             action (numpy array): The control to apply to the robot. The first
107 |                 @self.mujoco_robot.dof dimensions should be the desired
108 |                 normalized joint velocities and the next @self.gripper.dof dimensions should be
109 |                 actuation controls for the gripper.
110 |         """
111 | 
112 |         self.policy_step = policy_step
113 | 
114 |         # Make sure action length is correct
115 |         assert len(action) == self.dof, "environment got invalid action dimension"
116 | 
117 |         # using controller
118 |         # Split action into joint control and peripheral (i.e.: gripper) control
119 |         gripper_action = [],
120 |         if self.has_gripper:
121 |             gripper_action = action[self.controller.control_dim:]  # all indexes past controller dimension indexes
122 |             action = action[:self.controller.control_dim]
123 | 
124 |         action = action.copy()  # ensure that we don't change the action outside of this scope
125 |         self.controller.update_model(self.sim, joint_index=self._ref_joint_pos_indexes)
126 |         torques = self.controller.action_to_torques(action, self.policy_step)  # this scales and clips the actions correctly
127 |         self.total_joint_torque += np.sum(abs(torques))
128 |         self.joint_torques = torques
129 | 
130 |         # Get gripper action
131 |         if self.has_gripper:
132 |             gripper_action_actual = self.gripper.format_action(gripper_action) # mirrors the commanded action i-e [0.54] ==> [-0.54 0.54]
133 | 
134 |             # rescale normalized gripper action to control ranges
135 |             ctrl_range = self.sim.model.actuator_ctrlrange[self._ref_gripper_joint_vel_indexes] # e.g.[[0. 0.04] [-0.04 -0.]]
136 |             bias = 0.5 * (ctrl_range[:, 1] + ctrl_range[:, 0]) # e.g.[0.02 -0.02]
137 |             weight = 0.5 * (ctrl_range[:, 1] - ctrl_range[:, 0]) # e.g.[0.02 0.02]
138 |             applied_gripper_action = bias + weight * gripper_action_actual
139 |             self.sim.data.ctrl[self._ref_gripper_joint_vel_indexes] = [applied_gripper_action] #[1, -1]
140 | 
141 |         # Now,control the joints, qfrc_bias accounts for gravity compensation and other internal forces (coirolis, centrifugal)
142 |         self.sim.data.ctrl[self._ref_joint_vel_indexes] = self.sim.data.qfrc_bias[self._ref_joint_vel_indexes] + torques
143 | 
144 |         # velocity of hand and joints to be used in reward
145 |         self.ee_v = np.concatenate([self.sim.data.body_xvelp[self.sim.model.body_name2id("hand")],
146 |                  self.sim.data.body_xvelr[self.sim.model.body_name2id("hand")]])
147 |         self.q_vel = self.sim.data.qvel[self._ref_joint_vel_indexes]
148 | 
149 |     def _post_action(self, action):
150 |         """
151 |         (Optional) does gripper visualization after actions.
152 |         """
153 |         ret = super()._post_action(action)
154 |         if self.gripper_visualization:
155 |             self._gripper_visualization()
156 |         return ret
157 | 
158 |     def _get_observation(self):
159 |         """
160 |         Returns an OrderedDict containing robot observations
161 |         """
162 | 
163 |         state = super()._get_observation()
164 |         di = OrderedDict()
165 | 
166 |         # proprioceptive features
167 |         di["joint_pos"] = np.array([self.sim.data.qpos[x] for x in self._ref_joint_pos_indexes])
168 |         di["joint_vel"] = np.array([self.sim.data.qvel[x] for x in self._ref_joint_vel_indexes])
169 | 
170 |         # 7 x 3 (sinq, cosq, qdot) = 21
171 |         robot_states = [np.sin(di["joint_pos"]), np.cos(di["joint_pos"]), di["joint_vel"]]
172 | 
173 |         if self.has_gripper:
174 |             di["gripper_qpos"] = np.array([self.sim.data.qpos[x] for x in self._ref_gripper_joint_pos_indexes])
175 |             di["gripper_qvel"] = np.array([self.sim.data.qvel[x] for x in self._ref_gripper_joint_vel_indexes])
176 | 
177 |             di["eef_pos"] = np.array(self.sim.data.body_xpos[self.sim.model.body_name2id('hand')])
178 |             di["eef_quat"] = T.convert_quat(self.sim.data.get_body_xquat("hand"), to="xyzw")
179 |             di["eef_vlin"] = np.array(self.sim.data.get_body_xvelp('hand'))
180 |             di["eef_vang"] = np.array(self.sim.data.get_body_xvelr('hand'))
181 | 
182 |             # add in gripper information (2(gripper) + 3(end-effector pos) + 4(end-effector rot) + 3(end-effector vel) + 3(end-effector w))
183 |             robot_states.extend([di["gripper_qpos"], di["eef_pos"], di["eef_quat"], di["eef_vlin"], di["eef_vang"]])
184 | 
185 |         state["robot-state"] = np.concatenate(robot_states)
186 | 
187 |         return state
188 | 
189 | 
190 |     @property
191 |     def observation_space(self):
192 |         """
193 |         Returns dict where keys are ob names and values are dimensions.
194 |         """
195 |         ob_space = OrderedDict()
196 |         observation = self._get_observation()
197 |         ob_space['object-state'] = [observation['object-state'].size]
198 |         ob_space['robot-state'] =  [observation['robot-state'].size]
199 |         return ob_space
200 | 
201 |     @property
202 |     def dof(self):
203 |         """
204 |         Returns the DoF of the robot (with grippers).
205 |         """
206 |         dof = self.controller.action_dim
207 | 
208 |         if self.has_gripper:
209 |             dof += self.gripper.dof
210 |             return dof
211 | 
212 |     @property
213 |     def action_space(self):
214 |         """
215 |         Returns ActionSpec of action space, see
216 |         action_spec.py for more documentation.
217 |         """
218 |         return ActionSpace(self.dof)
219 | 
220 | 
221 |     def _gripper_visualization(self):
222 |         """
223 |         Do any needed visualization here.
224 |         """
225 | 
226 |         # By default, don't do any coloring.
227 |         self.sim.model.site_rgba[self.eef_site_id] = [0., 1., 0., 1.]
228 | 
229 | 
230 |     def _check_q_limits(self):
231 |         """
232 |         Returns True if the arm is in joint limits or very close to.
233 |         """
234 |         joint_limits = False
235 |         tolerance = 0.15
236 |         for (q, q_limits) in zip(self.sim.data.qpos[self._ref_joint_pos_indexes], self.sim.model.jnt_range[:7]):
237 |             if (q < q_limits[0] + tolerance) or (q > abs(q_limits[1]) - tolerance):
238 |                 joint_limits = True
239 |         return joint_limits
240 | 


--------------------------------------------------------------------------------
/panda/utils/transform_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utility functions of matrix and vector transformations.
  3 | 
  4 | NOTE: convention for quaternions is (x, y, z, w)
  5 | """
  6 | 
  7 | import math
  8 | import numpy as np
  9 | 
 10 | 
 11 | PI = np.pi
 12 | EPS = np.finfo(float).eps * 4.
 13 | 
 14 | # axis sequences for Euler angles
 15 | _NEXT_AXIS = [1, 2, 0, 1]
 16 | 
 17 | # map axes strings to/from tuples of inner axis, parity, repetition, frame
 18 | _AXES2TUPLE = {
 19 |     "sxyz": (0, 0, 0, 0),
 20 |     "sxyx": (0, 0, 1, 0),
 21 |     "sxzy": (0, 1, 0, 0),
 22 |     "sxzx": (0, 1, 1, 0),
 23 |     "syzx": (1, 0, 0, 0),
 24 |     "syzy": (1, 0, 1, 0),
 25 |     "syxz": (1, 1, 0, 0),
 26 |     "syxy": (1, 1, 1, 0),
 27 |     "szxy": (2, 0, 0, 0),
 28 |     "szxz": (2, 0, 1, 0),
 29 |     "szyx": (2, 1, 0, 0),
 30 |     "szyz": (2, 1, 1, 0),
 31 |     "rzyx": (0, 0, 0, 1),
 32 |     "rxyx": (0, 0, 1, 1),
 33 |     "ryzx": (0, 1, 0, 1),
 34 |     "rxzx": (0, 1, 1, 1),
 35 |     "rxzy": (1, 0, 0, 1),
 36 |     "ryzy": (1, 0, 1, 1),
 37 |     "rzxy": (1, 1, 0, 1),
 38 |     "ryxy": (1, 1, 1, 1),
 39 |     "ryxz": (2, 0, 0, 1),
 40 |     "rzxz": (2, 0, 1, 1),
 41 |     "rxyz": (2, 1, 0, 1),
 42 |     "rzyz": (2, 1, 1, 1),
 43 | }
 44 | 
 45 | _TUPLE2AXES = dict((v, k) for k, v in _AXES2TUPLE.items())
 46 | 
 47 | 
 48 | def convert_quat(q, to="xyzw"):
 49 |     """
 50 |     Converts quaternion from one convention to another.
 51 |     The convention to convert TO is specified as an optional argument.
 52 |     If to == 'xyzw', then the input is in 'wxyz' format, and vice-versa.
 53 | 
 54 |     Args:
 55 |         q: a 4-dim numpy array corresponding to a quaternion
 56 |         to: a string, either 'xyzw' or 'wxyz', determining
 57 |             which convention to convert to.
 58 |     """
 59 |     if to == "xyzw":
 60 |         return q[[1, 2, 3, 0]]
 61 |     if to == "wxyz":
 62 |         return q[[3, 0, 1, 2]]
 63 |     raise Exception("convert_quat: choose a valid `to` argument (xyzw or wxyz)")
 64 | 
 65 | 
 66 | def quat_multiply(quaternion1, quaternion0):
 67 |     """Return multiplication of two quaternions.
 68 |     >>> q = quat_multiply([1, -2, 3, 4], [-5, 6, 7, 8])
 69 |     >>> np.allclose(q, [-44, -14, 48, 28])
 70 |     True
 71 |     """
 72 |     x0, y0, z0, w0 = quaternion0
 73 |     x1, y1, z1, w1 = quaternion1
 74 |     return np.array(
 75 |         (
 76 |             x1 * w0 + y1 * z0 - z1 * y0 + w1 * x0,
 77 |             -x1 * z0 + y1 * w0 + z1 * x0 + w1 * y0,
 78 |             x1 * y0 - y1 * x0 + z1 * w0 + w1 * z0,
 79 |             -x1 * x0 - y1 * y0 - z1 * z0 + w1 * w0,
 80 |         ),
 81 |         dtype=np.float32,
 82 |     )
 83 | 
 84 | 
 85 | def quat_conjugate(quaternion):
 86 |     """Return conjugate of quaternion.
 87 |     >>> q0 = random_quaternion()
 88 |     >>> q1 = quat_conjugate(q0)
 89 |     >>> q1[3] == q0[3] and all(q1[:3] == -q0[:3])
 90 |     True
 91 |     """
 92 |     return np.array(
 93 |         (-quaternion[0], -quaternion[1], -quaternion[2], quaternion[3]),
 94 |         dtype=np.float32,
 95 |     )
 96 | 
 97 | 
 98 | def quat_inverse(quaternion):
 99 |     """Return inverse of quaternion.
100 |     >>> q0 = random_quaternion()
101 |     >>> q1 = quat_inverse(q0)
102 |     >>> np.allclose(quat_multiply(q0, q1), [0, 0, 0, 1])
103 |     True
104 |     """
105 |     return quat_conjugate(quaternion) / np.dot(quaternion, quaternion)
106 | 
107 | 
108 | def quat_slerp(quat0, quat1, fraction, spin=0, shortestpath=True):
109 |     """Return spherical linear interpolation between two quaternions.
110 |     >>> q0 = random_quat()
111 |     >>> q1 = random_quat()
112 |     >>> q = quat_slerp(q0, q1, 0.0)
113 |     >>> np.allclose(q, q0)
114 |     True
115 |     >>> q = quat_slerp(q0, q1, 1.0, 1)
116 |     >>> np.allclose(q, q1)
117 |     True
118 |     >>> q = quat_slerp(q0, q1, 0.5)
119 |     >>> angle = math.acos(np.dot(q0, q))
120 |     >>> np.allclose(2.0, math.acos(np.dot(q0, q1)) / angle) or \
121 |         np.allclose(2.0, math.acos(-np.dot(q0, q1)) / angle)
122 |     True
123 |     """
124 |     q0 = unit_vector(quat0[:4])
125 |     q1 = unit_vector(quat1[:4])
126 |     if fraction == 0.0:
127 |         return q0
128 |     elif fraction == 1.0:
129 |         return q1
130 |     d = np.dot(q0, q1)
131 |     if abs(abs(d) - 1.0) < _EPS:
132 |         return q0
133 |     if shortestpath and d < 0.0:
134 |         # invert rotation
135 |         d = -d
136 |         q1 *= -1.0
137 |     angle = math.acos(d) + spin * math.pi
138 |     if abs(angle) < _EPS:
139 |         return q0
140 |     isin = 1.0 / math.sin(angle)
141 |     q0 *= math.sin((1.0 - fraction) * angle) * isin
142 |     q1 *= math.sin(fraction * angle) * isin
143 |     q0 += q1
144 |     return q0
145 | 
146 | 
147 | def random_quat(rand=None):
148 |     """Return uniform random unit quaternion.
149 |     rand: array like or None
150 |         Three independent random variables that are uniformly distributed
151 |         between 0 and 1.
152 |     >>> q = random_quat()
153 |     >>> np.allclose(1.0, vector_norm(q))
154 |     True
155 |     >>> q = random_quat(np.random.random(3))
156 |     >>> q.shape
157 |     (4,)
158 |     """
159 |     if rand is None:
160 |         rand = np.random.rand(3)
161 |     else:
162 |         assert len(rand) == 3
163 |     r1 = np.sqrt(1.0 - rand[0])
164 |     r2 = np.sqrt(rand[0])
165 |     pi2 = math.pi * 2.0
166 |     t1 = pi2 * rand[1]
167 |     t2 = pi2 * rand[2]
168 |     return np.array(
169 |         (np.sin(t1) * r1, np.cos(t1) * r1, np.sin(t2) * r2, np.cos(t2) * r2),
170 |         dtype=np.float32,
171 |     )
172 | 
173 | 
174 | def vec(values):
175 |     """
176 |     Converts value tuple into a numpy vector.
177 | 
178 |     Args:
179 |         values: a tuple of numbers
180 | 
181 |     Returns:
182 |         a numpy vector of given values
183 |     """
184 |     return np.array(values, dtype=np.float32)
185 | 
186 | 
187 | def mat4(array):
188 |     """
189 |     Converts an array to 4x4 matrix.
190 | 
191 |     Args:
192 |         array: the array in form of vec, list, or tuple
193 | 
194 |     Returns:
195 |         a 4x4 numpy matrix
196 |     """
197 |     return np.array(array, dtype=np.float32).reshape((4, 4))
198 | 
199 | 
200 | def mat2pose(hmat):
201 |     """
202 |     Converts a homogeneous 4x4 matrix into pose.
203 | 
204 |     Args:
205 |         hmat: a 4x4 homogeneous matrix
206 | 
207 |     Returns:
208 |         (pos, orn) tuple where pos is vec3 float in cartesian,
209 |             orn is vec4 float quaternion
210 |     """
211 |     pos = hmat[:3, 3]
212 |     orn = mat2quat(hmat[:3, :3])
213 |     return pos, orn
214 | 
215 | 
216 | def mat2quat(rmat, precise=False):
217 |     """
218 |     Converts given rotation matrix to quaternion.
219 | 
220 |     Args:
221 |         rmat: 3x3 rotation matrix
222 |         precise: If isprecise is True, the input matrix is assumed to be a precise
223 |              rotation matrix and a faster algorithm is used.
224 | 
225 |     Returns:
226 |         vec4 float quaternion angles
227 |     """
228 |     M = np.array(rmat, dtype=np.float32, copy=False)[:3, :3]
229 |     if precise:
230 |         q = np.empty((4,))
231 |         t = np.trace(M)
232 |         if t > M[3, 3]:
233 |             q[0] = t
234 |             q[3] = M[1, 0] - M[0, 1]
235 |             q[2] = M[0, 2] - M[2, 0]
236 |             q[1] = M[2, 1] - M[1, 2]
237 |         else:
238 |             i, j, k = 0, 1, 2
239 |             if M[1, 1] > M[0, 0]:
240 |                 i, j, k = 1, 2, 0
241 |             if M[2, 2] > M[i, i]:
242 |                 i, j, k = 2, 0, 1
243 |             t = M[i, i] - (M[j, j] + M[k, k]) + M[3, 3]
244 |             q[i] = t
245 |             q[j] = M[i, j] + M[j, i]
246 |             q[k] = M[k, i] + M[i, k]
247 |             q[3] = M[k, j] - M[j, k]
248 |             q = q[[3, 0, 1, 2]]
249 |         q *= 0.5 / math.sqrt(t * M[3, 3])
250 |     else:
251 |         m00 = M[0, 0]
252 |         m01 = M[0, 1]
253 |         m02 = M[0, 2]
254 |         m10 = M[1, 0]
255 |         m11 = M[1, 1]
256 |         m12 = M[1, 2]
257 |         m20 = M[2, 0]
258 |         m21 = M[2, 1]
259 |         m22 = M[2, 2]
260 |         # symmetric matrix K
261 |         K = np.array(
262 |             [
263 |                 [m00 - m11 - m22, 0.0, 0.0, 0.0],
264 |                 [m01 + m10, m11 - m00 - m22, 0.0, 0.0],
265 |                 [m02 + m20, m12 + m21, m22 - m00 - m11, 0.0],
266 |                 [m21 - m12, m02 - m20, m10 - m01, m00 + m11 + m22],
267 |             ]
268 |         )
269 |         K /= 3.0
270 |         # quaternion is Eigen vector of K that corresponds to largest eigenvalue
271 |         w, V = np.linalg.eigh(K)
272 |         q = V[[3, 0, 1, 2], np.argmax(w)]
273 |     if q[0] < 0.0:
274 |         np.negative(q, q)
275 |     return q[[1, 2, 3, 0]]
276 | 
277 | def euler2mat(euler): #assume xyz
278 |     euler = np.asarray(euler, dtype=np.float64)
279 |     assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler)
280 | 
281 |     ai, aj, ak = -euler[..., 2], -euler[..., 1], -euler[..., 0]
282 |     si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak)
283 |     ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak)
284 |     cc, cs = ci * ck, ci * sk
285 |     sc, ss = si * ck, si * sk
286 | 
287 |     mat = np.empty(euler.shape[:-1] + (3, 3), dtype=np.float64)
288 |     mat[..., 2, 2] = cj * ck
289 |     mat[..., 2, 1] = sj * sc - cs
290 |     mat[..., 2, 0] = sj * cc + ss
291 |     mat[..., 1, 2] = cj * sk
292 |     mat[..., 1, 1] = sj * ss + cc
293 |     mat[..., 1, 0] = sj * cs - sc
294 |     mat[..., 0, 2] = -sj
295 |     mat[..., 0, 1] = cj * si
296 |     mat[..., 0, 0] = cj * ci
297 |     return mat
298 | 
299 | def mat2euler(rmat, axes="sxyz"):
300 |     """
301 |     Converts given rotation matrix to euler angles in radian.
302 | 
303 |     Args:
304 |         rmat: 3x3 rotation matrix
305 |         axes: One of 24 axis sequences as string or encoded tuple
306 | 
307 |     Returns:
308 |         converted euler angles in radian vec3 float
309 |     """
310 |     try:
311 |         firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
312 |     except (AttributeError, KeyError):
313 |         firstaxis, parity, repetition, frame = axes
314 | 
315 |     i = firstaxis
316 |     j = _NEXT_AXIS[i + parity]
317 |     k = _NEXT_AXIS[i - parity + 1]
318 | 
319 |     M = np.array(rmat, dtype=np.float32, copy=False)[:3, :3]
320 |     if repetition:
321 |         sy = math.sqrt(M[i, j] * M[i, j] + M[i, k] * M[i, k])
322 |         if sy > EPS:
323 |             ax = math.atan2(M[i, j], M[i, k])
324 |             ay = math.atan2(sy, M[i, i])
325 |             az = math.atan2(M[j, i], -M[k, i])
326 |         else:
327 |             ax = math.atan2(-M[j, k], M[j, j])
328 |             ay = math.atan2(sy, M[i, i])
329 |             az = 0.0
330 |     else:
331 |         cy = math.sqrt(M[i, i] * M[i, i] + M[j, i] * M[j, i])
332 |         if cy > EPS:
333 |             ax = math.atan2(M[k, j], M[k, k])
334 |             ay = math.atan2(-M[k, i], cy)
335 |             az = math.atan2(M[j, i], M[i, i])
336 |         else:
337 |             ax = math.atan2(-M[j, k], M[j, j])
338 |             ay = math.atan2(-M[k, i], cy)
339 |             az = 0.0
340 | 
341 |     if parity:
342 |         ax, ay, az = -ax, -ay, -az
343 |     if frame:
344 |         ax, az = az, ax
345 |     return vec((ax, ay, az))
346 | 
347 | 
348 | def pose2mat(pose):
349 |     """
350 |     Converts pose to homogeneous matrix.
351 | 
352 |     Args:
353 |         pose: a (pos, orn) tuple where pos is vec3 float cartesian, and
354 |             orn is vec4 float quaternion.
355 | 
356 |     Returns:
357 |         4x4 homogeneous matrix
358 |     """
359 |     homo_pose_mat = np.zeros((4, 4), dtype=np.float32)
360 |     homo_pose_mat[:3, :3] = quat2mat(pose[1])
361 |     homo_pose_mat[:3, 3] = np.array(pose[0], dtype=np.float32)
362 |     homo_pose_mat[3, 3] = 1.
363 |     return homo_pose_mat
364 | 
365 | 
366 | def quat2mat(quaternion):
367 |     """
368 |     Converts given quaternion (x, y, z, w) to matrix.
369 | 
370 |     Args:
371 |         quaternion: vec4 float angles
372 | 
373 |     Returns:
374 |         3x3 rotation matrix
375 |     """
376 |     q = np.array(quaternion, dtype=np.float32, copy=True)[[3, 0, 1, 2]]
377 |     n = np.dot(q, q)
378 |     if n < EPS:
379 |         return np.identity(3)
380 |     q *= math.sqrt(2.0 / n)
381 |     q = np.outer(q, q)
382 |     return np.array(
383 |         [
384 |             [1.0 - q[2, 2] - q[3, 3], q[1, 2] - q[3, 0], q[1, 3] + q[2, 0]],
385 |             [q[1, 2] + q[3, 0], 1.0 - q[1, 1] - q[3, 3], q[2, 3] - q[1, 0]],
386 |             [q[1, 3] - q[2, 0], q[2, 3] + q[1, 0], 1.0 - q[1, 1] - q[2, 2]],
387 |         ]
388 |     )
389 | 
390 | 
391 | def pose_in_A_to_pose_in_B(pose_A, pose_A_in_B):
392 |     """
393 |     Converts a homogenous matrix corresponding to a point C in frame A
394 |     to a homogenous matrix corresponding to the same point C in frame B.
395 | 
396 |     Args:
397 |         pose_A: numpy array of shape (4,4) corresponding to the pose of C in frame A
398 |         pose_A_in_B: numpy array of shape (4,4) corresponding to the pose of A in frame B
399 | 
400 |     Returns:
401 |         numpy array of shape (4,4) corresponding to the pose of C in frame B
402 |     """
403 | 
404 |     # pose of A in B takes a point in A and transforms it to a point in C.
405 | 
406 |     # pose of C in B = pose of A in B * pose of C in A
407 |     # take a point in C, transform it to A, then to B
408 |     # T_B^C = T_A^C * T_B^A
409 |     return pose_A_in_B.dot(pose_A)
410 | 
411 | 
412 | def pose_inv(pose):
413 |     """
414 |     Computes the inverse of a homogenous matrix corresponding to the pose of some
415 |     frame B in frame A. The inverse is the pose of frame A in frame B.
416 | 
417 |     Args:
418 |         pose: numpy array of shape (4,4) for the pose to inverse
419 | 
420 |     Returns:
421 |         numpy array of shape (4,4) for the inverse pose
422 |     """
423 | 
424 |     # Note, the inverse of a pose matrix is the following
425 |     # [R t; 0 1]^-1 = [R.T -R.T*t; 0 1]
426 | 
427 |     # Intuitively, this makes sense.
428 |     # The original pose matrix translates by t, then rotates by R.
429 |     # We just invert the rotation by applying R-1 = R.T, and also translate back.
430 |     # Since we apply translation first before rotation, we need to translate by
431 |     # -t in the original frame, which is -R-1*t in the new frame, and then rotate back by
432 |     # R-1 to align the axis again.
433 | 
434 |     pose_inv = np.zeros((4, 4))
435 |     pose_inv[:3, :3] = pose[:3, :3].T
436 |     pose_inv[:3, 3] = -pose_inv[:3, :3].dot(pose[:3, 3])
437 |     pose_inv[3, 3] = 1.0
438 |     return pose_inv
439 | 
440 | 
441 | def _skew_symmetric_translation(pos_A_in_B):
442 |     """
443 |     Helper function to get a skew symmetric translation matrix for converting quantities
444 |     between frames.
445 |     """
446 |     return np.array(
447 |         [
448 |             0.,
449 |             -pos_A_in_B[2],
450 |             pos_A_in_B[1],
451 |             pos_A_in_B[2],
452 |             0.,
453 |             -pos_A_in_B[0],
454 |             -pos_A_in_B[1],
455 |             pos_A_in_B[0],
456 |             0.,
457 |         ]
458 |     ).reshape((3, 3))
459 | 
460 | 
461 | def vel_in_A_to_vel_in_B(vel_A, ang_vel_A, pose_A_in_B):
462 |     """
463 |     Converts linear and angular velocity of a point in frame A to the equivalent in frame B.
464 | 
465 |     Args:
466 |         vel_A: 3-dim iterable for linear velocity in A
467 |         ang_vel_A: 3-dim iterable for angular velocity in A
468 |         pose_A_in_B: numpy array of shape (4,4) corresponding to the pose of A in frame B
469 | 
470 |     Returns:
471 |         vel_B, ang_vel_B: two numpy arrays of shape (3,) for the velocities in B
472 |     """
473 |     pos_A_in_B = pose_A_in_B[:3, 3]
474 |     rot_A_in_B = pose_A_in_B[:3, :3]
475 |     skew_symm = _skew_symmetric_translation(pos_A_in_B)
476 |     vel_B = rot_A_in_B.dot(vel_A) + skew_symm.dot(rot_A_in_B.dot(ang_vel_A))
477 |     ang_vel_B = rot_A_in_B.dot(ang_vel_A)
478 |     return vel_B, ang_vel_B
479 | 
480 | 
481 | def force_in_A_to_force_in_B(force_A, torque_A, pose_A_in_B):
482 |     """
483 |     Converts linear and rotational force at a point in frame A to the equivalent in frame B.
484 | 
485 |     Args:
486 |         force_A: 3-dim iterable for linear force in A
487 |         torque_A: 3-dim iterable for rotational force (moment) in A
488 |         pose_A_in_B: numpy array of shape (4,4) corresponding to the pose of A in frame B
489 | 
490 |     Returns:
491 |         force_B, torque_B: two numpy arrays of shape (3,) for the forces in B
492 |     """
493 |     pos_A_in_B = pose_A_in_B[:3, 3]
494 |     rot_A_in_B = pose_A_in_B[:3, :3]
495 |     skew_symm = _skew_symmetric_translation(pos_A_in_B)
496 |     force_B = rot_A_in_B.T.dot(force_A)
497 |     torque_B = -rot_A_in_B.T.dot(skew_symm.dot(force_A)) + rot_A_in_B.T.dot(torque_A)
498 |     return force_B, torque_B
499 | 
500 | 
501 | def rotation_matrix(angle, direction, point=None):
502 |     """
503 |     Returns matrix to rotate about axis defined by point and direction.
504 | 
505 |     Examples:
506 | 
507 |         >>> angle = (random.random() - 0.5) * (2*math.pi)
508 |         >>> direc = numpy.random.random(3) - 0.5
509 |         >>> point = numpy.random.random(3) - 0.5
510 |         >>> R0 = rotation_matrix(angle, direc, point)
511 |         >>> R1 = rotation_matrix(angle-2*math.pi, direc, point)
512 |         >>> is_same_transform(R0, R1)
513 |         True
514 |         >>> R0 = rotation_matrix(angle, direc, point)
515 |         >>> R1 = rotation_matrix(-angle, -direc, point)
516 |         >>> is_same_transform(R0, R1)
517 |         True
518 |         >>> I = numpy.identity(4, numpy.float32)
519 |         >>> numpy.allclose(I, rotation_matrix(math.pi*2, direc))
520 |         True
521 |         >>> numpy.allclose(2., numpy.trace(rotation_matrix(math.pi/2,
522 |         ...                                                direc, point)))
523 |         True
524 | 
525 |     """
526 |     sina = math.sin(angle)
527 |     cosa = math.cos(angle)
528 |     direction = unit_vector(direction[:3])
529 |     # rotation matrix around unit vector
530 |     R = np.array(
531 |         ((cosa, 0.0, 0.0), (0.0, cosa, 0.0), (0.0, 0.0, cosa)), dtype=np.float32
532 |     )
533 |     R += np.outer(direction, direction) * (1.0 - cosa)
534 |     direction *= sina
535 |     R += np.array(
536 |         (
537 |             (0.0, -direction[2], direction[1]),
538 |             (direction[2], 0.0, -direction[0]),
539 |             (-direction[1], direction[0], 0.0),
540 |         ),
541 |         dtype=np.float32,
542 |     )
543 |     M = np.identity(4)
544 |     M[:3, :3] = R
545 |     if point is not None:
546 |         # rotation not around origin
547 |         point = np.array(point[:3], dtype=np.float32, copy=False)
548 |         M[:3, 3] = point - np.dot(R, point)
549 |     return M
550 | 
551 | 
552 | def make_pose(translation, rotation):
553 |     """
554 |     Makes a homogenous pose matrix from a translation vector and a rotation matrix.
555 | 
556 |     Args:
557 |         translation: a 3-dim iterable
558 |         rotation: a 3x3 matrix
559 | 
560 |     Returns:
561 |         pose: a 4x4 homogenous matrix
562 |     """
563 |     pose = np.zeros((4, 4))
564 |     pose[:3, :3] = rotation
565 |     pose[:3, 3] = translation
566 |     pose[3, 3] = 1.0
567 |     return pose
568 | 
569 | 
570 | def unit_vector(data, axis=None, out=None):
571 |     """
572 |     Returns ndarray normalized by length, i.e. eucledian norm, along axis.
573 | 
574 |     Examples:
575 | 
576 |         >>> v0 = numpy.random.random(3)
577 |         >>> v1 = unit_vector(v0)
578 |         >>> numpy.allclose(v1, v0 / numpy.linalg.norm(v0))
579 |         True
580 |         >>> v0 = numpy.random.rand(5, 4, 3)
581 |         >>> v1 = unit_vector(v0, axis=-1)
582 |         >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=2)), 2)
583 |         >>> numpy.allclose(v1, v2)
584 |         True
585 |         >>> v1 = unit_vector(v0, axis=1)
586 |         >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=1)), 1)
587 |         >>> numpy.allclose(v1, v2)
588 |         True
589 |         >>> v1 = numpy.empty((5, 4, 3), dtype=numpy.float32)
590 |         >>> unit_vector(v0, axis=1, out=v1)
591 |         >>> numpy.allclose(v1, v2)
592 |         True
593 |         >>> list(unit_vector([]))
594 |         []
595 |         >>> list(unit_vector([1.0]))
596 |         [1.0]
597 | 
598 |     """
599 |     if out is None:
600 |         data = np.array(data, dtype=np.float32, copy=True)
601 |         if data.ndim == 1:
602 |             data /= math.sqrt(np.dot(data, data))
603 |             return data
604 |     else:
605 |         if out is not data:
606 |             out[:] = np.array(data, copy=False)
607 |         data = out
608 |     length = np.atleast_1d(np.sum(data * data, axis))
609 |     np.sqrt(length, length)
610 |     if axis is not None:
611 |         length = np.expand_dims(length, axis)
612 |     data /= length
613 |     if out is None:
614 |         return data
615 | 
616 | 
617 | def get_orientation_error(target_orn, current_orn):
618 |     """
619 |     Returns the difference between two quaternion orientations as a 3 DOF numpy array.
620 |     For use in an impedance controller / task-space PD controller.
621 | 
622 |     Args:
623 |         target_orn: 4-dim iterable, desired orientation as a (x, y, z, w) quaternion
624 |         current_orn: 4-dim iterable, current orientation as a (x, y, z, w) quaternion
625 | 
626 |     Returns:
627 |         orn_error: 3-dim numpy array for current orientation error, corresponds to
628 |             (target_orn - current_orn)
629 |     """
630 |     current_orn = np.array(
631 |         [current_orn[3], current_orn[0], current_orn[1], current_orn[2]]
632 |     )
633 |     target_orn = np.array([target_orn[3], target_orn[0], target_orn[1], target_orn[2]])
634 | 
635 |     pinv = np.zeros((3, 4))
636 |     pinv[0, :] = [-current_orn[1], current_orn[0], -current_orn[3], current_orn[2]]
637 |     pinv[1, :] = [-current_orn[2], current_orn[3], current_orn[0], -current_orn[1]]
638 |     pinv[2, :] = [-current_orn[3], -current_orn[2], current_orn[1], current_orn[0]]
639 |     orn_error = 2.0 * pinv.dot(np.array(target_orn))
640 |     return orn_error
641 | 
642 | 
643 | def get_pose_error(target_pose, current_pose):
644 |     """
645 |     Computes the error corresponding to target pose - current pose as a 6-dim vector.
646 |     The first 3 components correspond to translational error while the last 3 components
647 |     correspond to the rotational error.
648 | 
649 |     Args:
650 |         target_pose: a 4x4 homogenous matrix for the target pose
651 |         current_pose: a 4x4 homogenous matrix for the current pose
652 | 
653 |     Returns:
654 |         A 6-dim numpy array for the pose error.
655 |     """
656 |     error = np.zeros(6)
657 | 
658 |     # compute translational error
659 |     target_pos = target_pose[:3, 3]
660 |     current_pos = current_pose[:3, 3]
661 |     pos_err = target_pos - current_pos
662 | 
663 |     # compute rotational error
664 |     r1 = current_pose[:3, 0]
665 |     r2 = current_pose[:3, 1]
666 |     r3 = current_pose[:3, 2]
667 |     r1d = target_pose[:3, 0]
668 |     r2d = target_pose[:3, 1]
669 |     r3d = target_pose[:3, 2]
670 |     rot_err = 0.5 * (np.cross(r1, r1d) + np.cross(r2, r2d) + np.cross(r3, r3d))
671 | 
672 |     error[:3] = pos_err
673 |     error[3:] = rot_err
674 |     return error
675 | 


--------------------------------------------------------------------------------