├── panda ├── rl │ ├── policies │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── distributions.py │ │ ├── mlp_actor_critic.py │ │ └── actor_critic.py │ ├── base_agent.py │ ├── main.py │ ├── dataset.py │ ├── rollouts.py │ ├── normalizer.py │ ├── ppo_agent.py │ ├── sac_agent.py │ └── trainer.py ├── models │ ├── robot │ │ ├── __init__.py │ │ ├── robot.py │ │ └── panda_robot.py │ ├── task │ │ ├── __init__.py │ │ ├── task.py │ │ └── grasping_task.py │ ├── arena │ │ ├── __init__.py │ │ ├── arena.py │ │ └── table_arena.py │ ├── gripper │ │ ├── __init__.py │ │ ├── panda_gripper.py │ │ └── gripper.py │ ├── __init__.py │ ├── assets │ │ ├── textures │ │ │ ├── metal.png │ │ │ ├── dark-wood.png │ │ │ └── light-wood.png │ │ ├── objects │ │ │ ├── meshes │ │ │ │ └── base.stl │ │ │ ├── cube.xml │ │ │ ├── cyl.xml │ │ │ ├── cyl2.xml │ │ │ └── basepart.xml │ │ ├── robot │ │ │ └── panda │ │ │ │ ├── meshes │ │ │ │ ├── hand.stl │ │ │ │ ├── finger.stl │ │ │ │ ├── link0.stl │ │ │ │ ├── link1.stl │ │ │ │ ├── link2.stl │ │ │ │ ├── link3.stl │ │ │ │ ├── link4.stl │ │ │ │ ├── link5.stl │ │ │ │ ├── link6.stl │ │ │ │ ├── link7.stl │ │ │ │ ├── hand_vis.stl │ │ │ │ ├── link0_vis.stl │ │ │ │ ├── link1_vis.stl │ │ │ │ ├── link2_vis.stl │ │ │ │ ├── link3_vis.stl │ │ │ │ ├── link4_vis.stl │ │ │ │ ├── link5_vis.stl │ │ │ │ ├── link6_vis.stl │ │ │ │ ├── link7_vis.stl │ │ │ │ ├── pedestal.stl │ │ │ │ └── finger_vis.stl │ │ │ │ └── robot_torque.xml │ │ ├── gripper │ │ │ ├── meshes │ │ │ │ └── panda_gripper │ │ │ │ │ ├── hand.stl │ │ │ │ │ ├── finger.stl │ │ │ │ │ ├── hand_vis.stl │ │ │ │ │ ├── finger_vis.stl │ │ │ │ │ ├── finger_longer.stl │ │ │ │ │ ├── finger_longer_a.stl │ │ │ │ │ ├── finger_longer_b.stl │ │ │ │ │ ├── finger_longer_c.stl │ │ │ │ │ └── finger_longer_a_old.stl │ │ │ └── panda_gripper.xml │ │ ├── base.xml │ │ └── arena │ │ │ └── table_arena.xml │ ├── objects │ │ ├── __init__.py │ │ ├── xml_objects.py │ │ └── objects.py │ ├── world.py │ └── base.py ├── controller │ ├── __init__.py │ ├── controller.py │ └── arm_controller.py ├── log │ └── rl.111. │ │ ├── ckpt_09000000.pt │ │ ├── ckpt_09360000.pt │ │ ├── replay_09000000.pkl │ │ └── replay_09360000.pkl ├── environments │ ├── __init__.py │ ├── action_space.py │ ├── base.py │ ├── panda_grasping.py │ └── panda.py ├── config │ ├── controller_config.hjson │ ├── grasping.py │ └── __init__.py ├── utils │ ├── __init__.py │ ├── logger.py │ ├── mpi.py │ ├── mujoco_py_renderer.py │ ├── mjcf_utils.py │ ├── pytorch.py │ └── transform_utils.py ├── README.md └── environment.yml ├── base_policy.gif ├── exp_image.png ├── adapted_policy.gif └── README.md /panda/rl/policies/__init__.py: -------------------------------------------------------------------------------- 1 | from .mlp_actor_critic import MlpActor, MlpCritic 2 | 3 | 4 | -------------------------------------------------------------------------------- /panda/models/robot/__init__.py: -------------------------------------------------------------------------------- 1 | from .robot import Robot 2 | from .panda_robot import Panda 3 | -------------------------------------------------------------------------------- /base_policy.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/base_policy.gif -------------------------------------------------------------------------------- /exp_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/exp_image.png -------------------------------------------------------------------------------- /panda/controller/__init__.py: -------------------------------------------------------------------------------- 1 | from .controller import Controller 2 | from .arm_controller import * 3 | -------------------------------------------------------------------------------- /panda/models/task/__init__.py: -------------------------------------------------------------------------------- 1 | from .task import Task 2 | 3 | from .grasping_task import GraspingTask 4 | -------------------------------------------------------------------------------- /adapted_policy.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/adapted_policy.gif -------------------------------------------------------------------------------- /panda/models/arena/__init__.py: -------------------------------------------------------------------------------- 1 | from .arena import Arena 2 | from .table_arena import TableArena 3 | 4 | 5 | -------------------------------------------------------------------------------- /panda/models/gripper/__init__.py: -------------------------------------------------------------------------------- 1 | from .gripper import Gripper 2 | from .panda_gripper import PandaGripper 3 | 4 | -------------------------------------------------------------------------------- /panda/log/rl.111./ckpt_09000000.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/log/rl.111./ckpt_09000000.pt -------------------------------------------------------------------------------- /panda/log/rl.111./ckpt_09360000.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/log/rl.111./ckpt_09360000.pt -------------------------------------------------------------------------------- /panda/log/rl.111./replay_09000000.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/log/rl.111./replay_09000000.pkl -------------------------------------------------------------------------------- /panda/log/rl.111./replay_09360000.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/log/rl.111./replay_09360000.pkl -------------------------------------------------------------------------------- /panda/models/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .world import MujocoWorldBase 3 | 4 | assets_root = os.path.join(os.path.dirname(__file__), "assets") 5 | -------------------------------------------------------------------------------- /panda/models/assets/textures/metal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/textures/metal.png -------------------------------------------------------------------------------- /panda/models/assets/textures/dark-wood.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/textures/dark-wood.png -------------------------------------------------------------------------------- /panda/models/assets/objects/meshes/base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/objects/meshes/base.stl -------------------------------------------------------------------------------- /panda/models/assets/textures/light-wood.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/textures/light-wood.png -------------------------------------------------------------------------------- /panda/environments/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import MujocoEnv 2 | from .base import make 3 | from .panda import PandaEnv 4 | from .panda_grasping import PandaGrasp 5 | -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/hand.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/hand.stl -------------------------------------------------------------------------------- /panda/models/objects/__init__.py: -------------------------------------------------------------------------------- 1 | from .objects import MujocoObject, MujocoXMLObject 2 | from .xml_objects import CubeObject, BasePartObject, CylObject, Cyl2Object 3 | -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/finger.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/finger.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link0.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link0.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link1.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link2.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link3.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link4.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link5.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link6.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link7.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link7.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/hand_vis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/hand_vis.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link0_vis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link0_vis.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link1_vis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link1_vis.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link2_vis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link2_vis.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link3_vis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link3_vis.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link4_vis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link4_vis.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link5_vis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link5_vis.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link6_vis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link6_vis.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/link7_vis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/link7_vis.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/pedestal.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/pedestal.stl -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/meshes/finger_vis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/robot/panda/meshes/finger_vis.stl -------------------------------------------------------------------------------- /panda/models/assets/gripper/meshes/panda_gripper/hand.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/hand.stl -------------------------------------------------------------------------------- /panda/models/assets/gripper/meshes/panda_gripper/finger.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger.stl -------------------------------------------------------------------------------- /panda/models/assets/gripper/meshes/panda_gripper/hand_vis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/hand_vis.stl -------------------------------------------------------------------------------- /panda/models/assets/gripper/meshes/panda_gripper/finger_vis.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger_vis.stl -------------------------------------------------------------------------------- /panda/models/assets/gripper/meshes/panda_gripper/finger_longer.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger_longer.stl -------------------------------------------------------------------------------- /panda/models/assets/gripper/meshes/panda_gripper/finger_longer_a.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger_longer_a.stl -------------------------------------------------------------------------------- /panda/models/assets/gripper/meshes/panda_gripper/finger_longer_b.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger_longer_b.stl -------------------------------------------------------------------------------- /panda/models/assets/gripper/meshes/panda_gripper/finger_longer_c.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger_longer_c.stl -------------------------------------------------------------------------------- /panda/models/assets/gripper/meshes/panda_gripper/finger_longer_a_old.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Asad-Shahid/Intelligent-Task-Learning/HEAD/panda/models/assets/gripper/meshes/panda_gripper/finger_longer_a_old.stl -------------------------------------------------------------------------------- /panda/models/world.py: -------------------------------------------------------------------------------- 1 | from models.base import MujocoXML 2 | from utils.mjcf_utils import xml_path_completion 3 | 4 | 5 | class MujocoWorldBase(MujocoXML): 6 | """Base class to inherit all mujoco worlds from.""" 7 | 8 | def __init__(self): 9 | super().__init__(xml_path_completion("base.xml")) 10 | -------------------------------------------------------------------------------- /panda/config/controller_config.hjson: -------------------------------------------------------------------------------- 1 | /* Default values for controller config parameters */ 2 | { 3 | // Joint Velocity controller 4 | "joint_velocity": 5 | { 6 | "control_range": [1, 1, 1, 1, 1, 1, 1], 7 | "kv": [8.0, 7.0, 6.0, 4.0, 2.0, 0.5, 0.1], 8 | "interpolation": "linear" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /panda/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .mujoco_py_renderer import MujocoPyRenderer 2 | 3 | def str2bool(v): 4 | return v.lower() == 'true' 5 | 6 | 7 | def str2intlist(value): 8 | if not value: 9 | return value 10 | else: 11 | return [int(num) for num in value.split(',')] 12 | 13 | 14 | def str2list(value): 15 | if not value: 16 | return value 17 | else: 18 | return [num for num in value.split(',')] 19 | -------------------------------------------------------------------------------- /panda/models/assets/base.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /panda/models/arena/arena.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from models.base import MujocoXML 3 | from utils.mjcf_utils import array_to_string, string_to_array 4 | 5 | 6 | class Arena(MujocoXML): 7 | """Base arena class.""" 8 | 9 | def set_origin(self, offset): 10 | """Applies a constant offset to all objects.""" 11 | offset = np.array(offset) 12 | for node in self.worldbody.findall("./*[@pos]"): 13 | cur_pos = string_to_array(node.get("pos")) 14 | new_pos = cur_pos + offset 15 | node.set("pos", array_to_string(new_pos)) 16 | -------------------------------------------------------------------------------- /panda/models/assets/objects/cube.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /panda/models/assets/objects/cyl.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /panda/models/assets/objects/cyl2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /panda/models/assets/objects/basepart.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /panda/utils/logger.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | 4 | import numpy as np 5 | import colorlog 6 | 7 | 8 | formatter = colorlog.ColoredFormatter( 9 | "%(log_color)s[%(asctime)s] %(message)s", 10 | datefmt=None, 11 | reset=True, 12 | log_colors={'DEBUG': 'cyan', 'INFO': 'white', 'WARNING': 'yellow', 'ERROR': 'red,bold', 'CRITICAL': 'red,bg_white'}, 13 | secondary_log_colors={}, 14 | style='%') 15 | 16 | logger = colorlog.getLogger('gear_assembly') 17 | logger.setLevel(logging.DEBUG) 18 | 19 | #fh = logging.FileHandler('log') 20 | #fh.setLevel(logging.DEBUG) 21 | #fh.setFormatter(formatter) 22 | #logger.addHandler(fh) 23 | 24 | ch = colorlog.StreamHandler() 25 | ch.setLevel(logging.DEBUG) 26 | ch.setFormatter(formatter) 27 | logger.addHandler(ch) 28 | -------------------------------------------------------------------------------- /panda/models/task/task.py: -------------------------------------------------------------------------------- 1 | from models.world import MujocoWorldBase 2 | 3 | 4 | class Task(MujocoWorldBase): 5 | """ 6 | Base class for creating MJCF model of a task. 7 | 8 | A task typically involves a robot interacting with objects in an arena 9 | (workshpace). The purpose of a task class is to generate a MJCF model 10 | of the task by combining the MJCF models of each component together and 11 | place them to the right positions. 12 | """ 13 | 14 | def merge_robot(self, mujoco_robot): 15 | """Adds robot model to the MJCF model.""" 16 | pass 17 | 18 | def merge_arena(self, mujoco_arena): 19 | """Adds arena model to the MJCF model.""" 20 | pass 21 | 22 | def merge_objects(self, mujoco_objects): 23 | """Adds physical objects to the MJCF model.""" 24 | pass 25 | -------------------------------------------------------------------------------- /panda/utils/mpi.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from mpi4py import MPI 3 | 4 | 5 | def _mpi_average(x): 6 | buf = np.zeros_like(x) 7 | MPI.COMM_WORLD.Allreduce(x, buf, op=MPI.SUM) 8 | buf /= MPI.COMM_WORLD.Get_size() 9 | return buf 10 | 11 | 12 | # Average across the cpu's data 13 | def mpi_average(x): 14 | if isinstance(x, dict): 15 | keys = sorted(x.keys()) 16 | return {k: _mpi_average(np.array(x[k])) for k in keys} 17 | else: 18 | return _mpi_average(np.array(x)) 19 | 20 | 21 | def _mpi_sum(x): 22 | buf = np.zeros_like(x) 23 | MPI.COMM_WORLD.Allreduce(x, buf, op=MPI.SUM) 24 | return buf 25 | 26 | 27 | # Sum over the cpu's data 28 | def mpi_sum(x): 29 | if isinstance(x, dict): 30 | keys = sorted(x.keys()) 31 | return {k: _mpi_sum(np.array(x[k])) for k in keys} 32 | else: 33 | return _mpi_sum(np.array(x)) 34 | -------------------------------------------------------------------------------- /panda/models/objects/xml_objects.py: -------------------------------------------------------------------------------- 1 | from models.objects import MujocoXMLObject 2 | from utils.mjcf_utils import xml_path_completion 3 | 4 | 5 | class CubeObject(MujocoXMLObject): 6 | """ 7 | Round Gear 8 | """ 9 | 10 | def __init__(self): 11 | super().__init__(xml_path_completion("objects/cube.xml")) 12 | 13 | 14 | class BasePartObject(MujocoXMLObject): 15 | """ 16 | Base of the assembly 17 | """ 18 | 19 | def __init__(self): 20 | super().__init__(xml_path_completion("objects/basepart.xml")) 21 | 22 | class CylObject(MujocoXMLObject): 23 | """ 24 | Clutter 25 | """ 26 | 27 | def __init__(self): 28 | super().__init__(xml_path_completion("objects/cyl.xml")) 29 | 30 | class Cyl2Object(MujocoXMLObject): 31 | """ 32 | Clutter 33 | """ 34 | 35 | def __init__(self): 36 | super().__init__(xml_path_completion("objects/cyl2.xml")) 37 | -------------------------------------------------------------------------------- /panda/controller/controller.py: -------------------------------------------------------------------------------- 1 | import abc # for abstract base class definitions 2 | 3 | 4 | class Controller(metaclass=abc.ABCMeta): 5 | """ 6 | Base class for all robot controllers. 7 | Defines basic interface for all controllers to adhere to. 8 | """ 9 | 10 | def __init__(self, bullet_data_path, robot_jpos_getter): 11 | """ 12 | Args: 13 | bullet_data_path (str): base path to bullet data. 14 | 15 | robot_jpos_getter (function): function that returns the position of the joints 16 | as a numpy array of the right dimension. 17 | """ 18 | raise NotImplementedError 19 | 20 | @abc.abstractmethod 21 | def get_control(self, *args, **kwargs): 22 | """ 23 | Retrieve a control input from the controller. 24 | """ 25 | raise NotImplementedError 26 | 27 | @abc.abstractmethod 28 | def sync_state(self): 29 | """ 30 | This function does internal bookkeeping to maintain 31 | consistency between the robot being controlled and 32 | the controller state. 33 | """ 34 | raise NotImplementedError 35 | -------------------------------------------------------------------------------- /panda/rl/policies/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | def fanin_init(tensor): # Initialization 7 | size = tensor.size() 8 | if len(size) == 2: 9 | fan_in = size[0] 10 | elif len(size) > 2: 11 | fan_in = np.prod(size[1:]) 12 | else: 13 | raise Exception("Shape must be have dimension at least 2.") 14 | bound = 1. / np.sqrt(fan_in) 15 | return tensor.data.uniform_(-bound, bound) # fills a tensor with unifrom distribution between bounds 16 | 17 | class MLP(nn.Module): 18 | def __init__(self, config, input_dim, output_dim, hid_dims=[]): 19 | super().__init__() 20 | #activation_fn = getattr(F, config.activation) 21 | activation_fn = nn.ReLU() 22 | 23 | fc = [] 24 | prev_dim = input_dim 25 | for d in hid_dims: 26 | fc.append(nn.Linear(prev_dim, d)) 27 | fanin_init(fc[-1].weight) 28 | fc[-1].bias.data.fill_(0.1) 29 | fc.append(activation_fn) 30 | prev_dim = d 31 | fc.append(nn.Linear(prev_dim, output_dim)) 32 | fc[-1].weight.data.uniform_(-1e-3, 1e-3) 33 | fc[-1].bias.data.uniform_(-1e-3, 1e-3) 34 | self.fc = nn.Sequential(*fc) 35 | 36 | def forward(self, ob): 37 | return self.fc(ob) 38 | -------------------------------------------------------------------------------- /panda/README.md: -------------------------------------------------------------------------------- 1 | ## Config 2 | Config contains configuration details about environment, controller and learning algorithms. 3 | 4 | ## Controller 5 | Controller contains the joint velocity controller class, actions coming from learning algorithms are interprested as joint velocities but real robot is equipped with torque controller. Controller class converts the joint velocities to joint torques and also performs interpolation between each successive action coming from policy if needed. 6 | 7 | ## Envionments 8 | Envionments include base enviornment and environments for the Panda Robot and the Grasping task. Panda Robot defines everything robot related. Grasping task defines everything task related including a reward function. Environments also contain an action space class that defines the size and limits of robot's action space to be used in learning 9 | 10 | ## Models 11 | Models is where everything is actually defined for simulation. Arena, Gripper, Objects and Robot are the classes that use xmls respectively from Assets. Task merges all pieces and instantiates a scene model for MuJoCo to perform the simulation. Task is used later on in the environment. 12 | 13 | ## RL 14 | RL is where actual learning happens. PPO and SAC are the implemented algorithms. Neural network models are present in policies. 15 | 16 | 17 | ## Utils 18 | Utils contain auxiliaries both for environment defintion and learning part. 19 | 20 | Note: all the files have been commented heavily (with possible output) to have a faster understanding of what's happening. 21 | -------------------------------------------------------------------------------- /panda/models/robot/robot.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from models.base import MujocoXML 3 | 4 | 5 | class Robot(MujocoXML): 6 | """Base class for all robot models.""" 7 | 8 | def __init__(self, fname): 9 | """Initializes from file @fname.""" 10 | super().__init__(fname) 11 | # key: gripper name and value: gripper model 12 | self.grippers = OrderedDict() 13 | 14 | def add_gripper(self, arm_name, gripper): 15 | """ 16 | Mounts gripper to arm. 17 | 18 | Throws error if robot already has a gripper or gripper type is incorrect. 19 | 20 | Args: 21 | arm_name (str): name of arm mount 22 | gripper (MujocoGripper instance): gripper MJCF model 23 | """ 24 | if arm_name in self.grippers: 25 | raise ValueError("Attempts to add multiple grippers to one body") 26 | 27 | arm_subtree = self.worldbody.find(".//body[@name='{}']".format(arm_name)) 28 | 29 | for body in gripper.worldbody: 30 | arm_subtree.append(body) 31 | 32 | self.merge(gripper, merge_body=False) 33 | self.grippers[arm_name] = gripper 34 | 35 | @property 36 | def dof(self): 37 | """Returns the number of DOF of the robot, not including gripper.""" 38 | raise NotImplementedError 39 | 40 | @property 41 | def joints(self): 42 | """Returns a list of joint names of the robot.""" 43 | raise NotImplementedError 44 | 45 | @property 46 | def init_qpos(self): 47 | """Returns default qpos.""" 48 | raise NotImplementedError 49 | -------------------------------------------------------------------------------- /panda/models/gripper/panda_gripper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Gripper for Franka's Panda (has two fingers). 3 | """ 4 | import numpy as np 5 | from utils.mjcf_utils import xml_path_completion 6 | from models.gripper.gripper import Gripper 7 | 8 | 9 | class PandaGripperBase(Gripper): 10 | """ 11 | Gripper for Franka's Panda (has two fingers). 12 | """ 13 | 14 | def __init__(self): 15 | super().__init__(xml_path_completion("gripper/panda_gripper.xml")) 16 | 17 | def format_action(self, action): 18 | return action 19 | 20 | @property 21 | def init_qpos(self): 22 | return np.array([0.020833, -0.020833]) 23 | 24 | @property 25 | def joints(self): 26 | return ["finger_joint1", "finger_joint2"] 27 | 28 | @property 29 | def dof(self): 30 | return 2 31 | 32 | @property 33 | def visualization_sites(self): 34 | return ["grip_site"] 35 | 36 | def contact_geoms(self): 37 | return ["hand_collision", "finger1_collision", "finger2_collision", "finger1_tip_collision", "finger2_tip_collision"] 38 | 39 | @property 40 | def left_finger_geoms(self): 41 | return ["finger1_tip_collision"] 42 | 43 | @property 44 | def right_finger_geoms(self): 45 | return ["finger2_tip_collision"] 46 | 47 | 48 | class PandaGripper(PandaGripperBase): 49 | """ 50 | Modifies PandaGripperBase to only take one action. 51 | """ 52 | 53 | def format_action(self, action): 54 | """ 55 | 1 => closed, -1 => open 56 | """ 57 | assert len(action) == 1 58 | return np.array([-1 * action[0], 1 * action[0]]) 59 | 60 | @property 61 | def dof(self): 62 | return 1 63 | -------------------------------------------------------------------------------- /panda/models/arena/table_arena.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from models.arena import Arena 3 | from utils.mjcf_utils import xml_path_completion 4 | from utils.mjcf_utils import array_to_string, string_to_array 5 | 6 | 7 | class TableArena(Arena): 8 | """Workspace that contains a tabletop.""" 9 | 10 | def __init__(self, table_full_size=(0.35, 0.6, 0.02), table_friction=(1, 0.005, 0.0001)): 11 | """ 12 | Args: 13 | table_full_size: full dimensions of the table 14 | table_friction: friction parameters of the table 15 | """ 16 | super().__init__(xml_path_completion("arena/table_arena.xml")) 17 | 18 | self.table_full_size = np.array(table_full_size) 19 | self.table_half_size = self.table_full_size / 2 20 | self.table_friction = table_friction 21 | 22 | self.floor = self.worldbody.find("./geom[@name='floor']") 23 | self.table_body = self.worldbody.find("./body[@name='table']") 24 | self.table_collision = self.table_body.find("./geom[@name='table_collision']") 25 | self.table_view = self.table_body.find("./geom[@name='table_view']") 26 | self.configure_location() 27 | 28 | def configure_location(self): 29 | self.bottom_pos = np.array([0, 0, 0]) 30 | self.floor.set("pos", array_to_string(self.bottom_pos)) 31 | self.table_collision.set("size", array_to_string(self.table_full_size)) 32 | self.table_view.set("size", array_to_string(self.table_full_size)) 33 | 34 | @property 35 | def table_top_abs(self): 36 | """ 37 | Returns the absolute position of table top. 38 | """ 39 | return string_to_array(self.table_body.get("pos")) 40 | -------------------------------------------------------------------------------- /panda/rl/policies/distributions.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.distributions 7 | 8 | 9 | # Normal 10 | FixedNormal = torch.distributions.Normal 11 | 12 | normal_init = FixedNormal.__init__ 13 | FixedNormal.__init__ = lambda self, mean, std: normal_init(self, mean.double(), std.double()) 14 | 15 | log_prob_normal = FixedNormal.log_prob 16 | FixedNormal.log_probs = lambda self, actions: log_prob_normal(self, actions.double()).sum(-1, keepdim=True).float() 17 | 18 | normal_entropy = FixedNormal.entropy 19 | FixedNormal.entropy = lambda self: normal_entropy(self).sum(-1).float() 20 | 21 | FixedNormal.mode = lambda self: self.mean.float() 22 | 23 | normal_sample = FixedNormal.sample 24 | FixedNormal.sample = lambda self: normal_sample(self).float() 25 | 26 | normal_rsample = FixedNormal.rsample 27 | FixedNormal.rsample = lambda self: normal_rsample(self).float() 28 | 29 | class MixedDistribution(nn.Module): 30 | def __init__(self, distributions): 31 | super().__init__() 32 | assert isinstance(distributions, OrderedDict) 33 | self.distributions = distributions 34 | 35 | def mode(self): 36 | return OrderedDict([(k, dist.mode()) for k, dist in self.distributions.items()]) 37 | 38 | def sample(self): 39 | return OrderedDict([(k, dist.sample()) for k, dist in self.distributions.items()]) 40 | 41 | def rsample(self): 42 | return OrderedDict([(k, dist.rsample()) for k, dist in self.distributions.items()]) 43 | 44 | def log_probs(self, x): 45 | assert isinstance(x, dict) 46 | return OrderedDict([(k, dist.log_probs(x[k])) for k, dist in self.distributions.items()]) 47 | 48 | def entropy(self): 49 | return sum([dist.entropy() for dist in self.distributions.values()]) 50 | 51 | -------------------------------------------------------------------------------- /panda/rl/base_agent.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from rl.normalizer import Normalizer 3 | 4 | 5 | class BaseAgent(): 6 | def __init__(self, config, ob_space): # ob_space is an ordered dict where keys are ob names and values are dimensions 7 | self._config = config 8 | self._ob_norm = Normalizer(ob_space, default_clip_range=config.clip_range, clip_obs=config.clip_obs) 9 | 10 | def normalize(self, ob): 11 | if self._config.ob_norm: 12 | return self._ob_norm.normalize(ob) 13 | return ob 14 | 15 | def act(self, ob, is_train=True): 16 | ob = self.normalize(ob) 17 | ac, activation = self._actor.act(ob, is_train=is_train) 18 | return ac, activation 19 | 20 | def update_normalizer(self, obs): 21 | if self._config.ob_norm: 22 | self._ob_norm.update(obs) 23 | self._ob_norm.recompute_stats() 24 | 25 | def store_episode(self, rollouts): 26 | raise NotImplementedError() 27 | 28 | def replay_buffer(self): 29 | return self._buffer.state_dict() # gives episodes buffer; buffer is like a rollout but can store more values for multiple episodes 30 | 31 | def load_replay_buffer(self, state_dict): 32 | self._buffer.load_state_dict(state_dict) 33 | 34 | def sync_networks(self): 35 | raise NotImplementedError() 36 | 37 | def train(self): 38 | raise NotImplementedError() 39 | 40 | def _soft_update_target_network(self, target, source, tau): 41 | for target_param, source_param in zip(target.parameters(), source.parameters()): 42 | target_param.data.copy_((1 - tau) * source_param.data + tau * target_param.data) 43 | 44 | def _copy_target_network(self, target, source): 45 | for target_param, source_param in zip(target.parameters(), source.parameters()): 46 | target_param.data.copy_(source_param.data) 47 | -------------------------------------------------------------------------------- /panda/models/task/grasping_task.py: -------------------------------------------------------------------------------- 1 | from utils.mjcf_utils import new_joint, array_to_string 2 | from models.task import Task 3 | 4 | 5 | class GraspingTask(Task): 6 | """ 7 | Creates MJCF model of a grasping task. 8 | 9 | A gear assembly task consists of a robot picking up a cube from a table. This class combines 10 | the robot, the arena with table, and the objects into a single MJCF model. 11 | """ 12 | 13 | def __init__(self, mujoco_arena, mujoco_robot, mujoco_objects): 14 | """ 15 | Args: 16 | mujoco_arena: MJCF model of robot workspace 17 | mujoco_robot: MJCF model of robot model 18 | mujoco_objects: a list of MJCF models of physical objects 19 | """ 20 | super().__init__() 21 | 22 | self.merge_arena(mujoco_arena) 23 | self.merge_robot(mujoco_robot) 24 | self.merge_objects(mujoco_objects) 25 | 26 | 27 | def merge_robot(self, mujoco_robot): 28 | """Adds robot model to the MJCF model.""" 29 | self.robot = mujoco_robot 30 | self.merge(mujoco_robot) 31 | 32 | def merge_arena(self, mujoco_arena): 33 | """Adds arena model to the MJCF model.""" 34 | self.arena = mujoco_arena 35 | self.table_offset = mujoco_arena.table_top_abs 36 | self.table_size = mujoco_arena.table_full_size 37 | self.table_body = mujoco_arena.table_body 38 | self.merge(mujoco_arena) 39 | 40 | def merge_objects(self, mujoco_objects): 41 | """Adds physical objects to the MJCF model.""" 42 | self.mujoco_objects = mujoco_objects 43 | self.objects = {} # xml manifestation 44 | self.max_horizontal_radius = 0 45 | for obj_name, obj_mjcf in mujoco_objects.items(): 46 | self.merge_asset(obj_mjcf) 47 | # Load object 48 | obj = obj_mjcf.get_collision(name=obj_name, site=True) 49 | obj.append(new_joint(name=obj_name, type="free", damping="0.0005")) 50 | self.objects[obj_name] = obj 51 | self.worldbody.append(obj) 52 | -------------------------------------------------------------------------------- /panda/environments/action_space.py: -------------------------------------------------------------------------------- 1 | """ Define ActionSpace class to represent action space. """ 2 | 3 | from collections import OrderedDict 4 | import numpy as np 5 | from utils.logger import logger 6 | 7 | 8 | class ActionSpace(object): 9 | """ 10 | Base class for action space 11 | This action space is used in the provided RL training code. 12 | """ 13 | 14 | def __init__(self, size, minimum=-1., maximum=1.): 15 | """ 16 | Loads a mujoco xml from file. 17 | 18 | Args: 19 | size (int): action dimension. 20 | min: minimum values for action. 21 | max: maximum values for action. 22 | """ 23 | self.size = size 24 | self.shape = OrderedDict([('default', size)]) 25 | 26 | self._minimum = np.array(minimum) 27 | self._minimum.setflags(write=False) 28 | 29 | self._maximum = np.array(maximum) 30 | self._maximum.setflags(write=False) 31 | 32 | @property 33 | def minimum(self): 34 | """ 35 | Returns the minimum values of the action. 36 | """ 37 | return self._minimum 38 | 39 | @property 40 | def maximum(self): 41 | """ 42 | Returns the maximum values of the action. 43 | """ 44 | return self._maximum 45 | 46 | def keys(self): 47 | """ 48 | Returns the keys of the action space. 49 | """ 50 | return self.shape.keys() 51 | 52 | def __repr__(self): 53 | template = ('ActionSpace(shape={},''minimum={}, maximum={})') 54 | return template.format(self.shape, self._minimum, self._maximum) 55 | 56 | def __eq__(self, other): 57 | """ 58 | Returns whether other action space is the same or not. 59 | """ 60 | if not isinstance(other, ActionSpace): 61 | return False 62 | return (self.minimum == other.minimum).all() and (self.maximum == other.maximum).all() 63 | 64 | def sample(self): 65 | """ 66 | Returns a sample from the action space. 67 | """ 68 | return np.random.uniform(low=self.minimum, high=self.maximum, size=self.size) 69 | -------------------------------------------------------------------------------- /panda/config/grasping.py: -------------------------------------------------------------------------------- 1 | from utils import str2bool, str2intlist 2 | 3 | 4 | 5 | def add_argument(parser): 6 | """ 7 | Adds a list of arguments to argparser for the lift environment. 8 | """ 9 | 10 | # training scene 11 | 12 | parser.add_argument('--mode', type=int, default=1, 13 | help='1: nominal cube scene, 2: collision avoidance scene') 14 | 15 | 16 | # mujoco simulation 17 | 18 | parser.add_argument('--table_full_size', type=float, default=(0.35, 0.46, 0.02), 19 | help='x, y, and z dimensions of the table') 20 | parser.add_argument('--gripper_type', type=str, default='PandaGripper', 21 | help='Gripper type of robot') 22 | parser.add_argument('--gripper_visualization', type=str2bool, default=True, 23 | help='using gripper visualization') 24 | 25 | # rendering 26 | 27 | parser.add_argument('--render_collision_mesh', type=str2bool, default=False, 28 | help='if rendering collision meshes in camera') 29 | parser.add_argument('--render_visual_mesh', type=str2bool, default=True, 30 | help='if rendering visual meshes in camera') 31 | 32 | # episode settings 33 | 34 | parser.add_argument('--horizon', type=int, default=600, 35 | help='Every episode lasts for exactly @horizon timesteps') 36 | parser.add_argument('--ignore_done', type=str2bool, default=True, 37 | help='if never terminating the environment (ignore @horizon)') 38 | 39 | # controller 40 | 41 | parser.add_argument('--control_freq', type=int, default= 250, 42 | help='control signals to receive in every simulated second, sets the amount of simulation time that passes between every action input') 43 | 44 | 45 | def get_default_config(): 46 | """ 47 | Gets default configurations for the lift environment. 48 | """ 49 | import argparse 50 | parser = argparse.ArgumentParser("Default Configuration for lift Environment") 51 | add_argument(parser) 52 | 53 | config = parser.parse_args([]) 54 | return config 55 | -------------------------------------------------------------------------------- /panda/models/gripper/gripper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines the base class of gripper 3 | """ 4 | from models.base import MujocoXML 5 | 6 | 7 | class Gripper(MujocoXML): 8 | """Base class for gripper""" 9 | 10 | def __init__(self, fname): 11 | super().__init__(fname) 12 | 13 | def format_action(self, action): 14 | """ 15 | Given (-1,1) abstract control as np-array 16 | returns the (-1,1) control signals 17 | for underlying actuators as 1-d np array 18 | """ 19 | raise NotImplementedError 20 | 21 | @property 22 | def init_qpos(self): 23 | """ 24 | Returns rest(open) qpos of the gripper 25 | """ 26 | raise NotImplementedError 27 | 28 | @property 29 | def dof(self): 30 | """ 31 | Returns the number of DOF of the gripper 32 | """ 33 | raise NotImplementedError 34 | 35 | @property 36 | def joints(self): 37 | """ 38 | Returns a list of joint names of the gripper 39 | """ 40 | raise NotImplementedError 41 | 42 | def contact_geoms(self): 43 | """ 44 | Returns a list of names corresponding to the geoms 45 | used to determine contact with the gripper. 46 | """ 47 | return [] 48 | 49 | @property 50 | def visualization_sites(self): 51 | """ 52 | Returns a list of sites corresponding to the geoms 53 | used to aid visualization by human. 54 | (and should be hidden from robots) 55 | """ 56 | return [] 57 | 58 | @property 59 | def left_finger_geoms(self): 60 | """ 61 | Geoms corresponding to left finger of a gripper 62 | """ 63 | raise NotImplementedError 64 | 65 | @property 66 | def right_finger_geoms(self): 67 | """ 68 | Geoms corresponding to raise finger of a gripper 69 | """ 70 | raise NotImplementedError 71 | 72 | def hide_visualization(self): 73 | """ 74 | Hides all visualization geoms and sites. 75 | This should be called before rendering to agents 76 | """ 77 | for site_name in self.visualization_sites: 78 | site = self.worldbody.find(".//site[@name='{}']".format(site_name)) 79 | site.set("rgba", "0 0 0 0") 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intelligent-Task-Learning 2 | The Repository for the _Autonomous Robots 2022 Journal article:_ 3 | 4 | "[Continuous control actions learning and adaptation for robotic manipulation through reinforcement learning](https://link.springer.com/article/10.1007/s10514-022-10034-z)" 5 | 6 | This contains a python toolkit for learning a grasping task with Franka Emika Panda Robot. The robot can be trained to grasp the cube, avoid obstacles and learn to manage redundancy using modern Reinforcement Learning algorithms of [Proximal Policy Optimization (PPO)](https://arxiv.org/abs/1707.06347) and [Soft Actor-Critic (SAC)](https://arxiv.org/abs/1812.05905). It is powered by [MuJoCo physics engine](http://www.mujoco.org/) 7 | 8 | 9 | # How to cite 10 | ``` 11 | @article{shahid2022continuous, 12 | title={Continuous control actions learning and adaptation for robotic manipulation through reinforcement learning}, 13 | author={Shahid, Asad Ali and Piga, Dario and Braghin, Francesco and Roveda, Loris}, 14 | journal={Autonomous Robots}, 15 | pages={1--16}, 16 | year={2022}, 17 | publisher={Springer} 18 | } 19 | ``` 20 | 21 | # New Experiment (Dynamic Enviornment) 22 | The adapted policy can grasp the moving cube in 30 mints of retraining. 23 | ![](adapted_policy.gif) 24 | 25 | Before, the base grasping policy trained on a static cube is not able to grasp the moving cube. 26 | ![](base_policy.gif) 27 | 28 | # Simulation Video 29 | [![Video](https://img.youtube.com/vi/aX55Zc2XMTE/maxres3.jpg)](https://www.youtube.com/watch?v=aX55Zc2XMTE) 30 | 31 | # Experimental validation 32 | [![Video](https://github.com/Asad-Shahid/Intelligent-Task-Learning/blob/master/exp_image.png)](https://drive.google.com/file/d/1zlS-_HIWMlIAvrxqGNGRyMbuDfQrws8z/view) 33 | 34 | 35 | ## Installation 36 | 37 | To use this toolkit, it is required to first install [MuJoCo 200](https://www.roboti.us/index.html) and then [mujoco-py](https://github.com/openai/mujoco-py) from Open AI. mujoco-py allows using MuJoCo from python interface. 38 | The installation requires python 3.6 or higher. It is recommended to install all the required packages under a conda virtual environment 39 | 40 | 41 | ## References 42 | This toolit is mainly developed based on [Surreal Robotics Suite](https://github.com/StanfordVL/robosuite) and the Reinforcement learning part is referenced from 43 | [this repo](https://github.com/clvrai/furniture) 44 | -------------------------------------------------------------------------------- /panda/models/robot/panda_robot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from models.robot.robot import Robot 3 | from utils.mjcf_utils import xml_path_completion, array_to_string 4 | 5 | 6 | class Panda(Robot): 7 | """Panda is a sensitive single-arm robot designed by Franka.""" 8 | 9 | def __init__(self, xml_path="robot/panda/robot_torque.xml"): 10 | 11 | super().__init__(xml_path_completion(xml_path)) 12 | 13 | self.bottom_offset = np.array([0, 0, 0]) # ignore the bottom offset; directly define base_xpos in world frame 14 | self.set_joint_damping() 15 | self._init_qpos = np.array([0, np.pi / 16.0, 0.00, -np.pi / 2.0 - np.pi / 3.0, 0.00, np.pi - 0.2, -np.pi/4]) 16 | 17 | def set_base_xpos(self, pos): 18 | """Places the robot on position @pos.""" 19 | node = self.worldbody.find("./body[@name='link0']") 20 | node.set("pos", array_to_string(pos - self.bottom_offset)) 21 | 22 | def set_joint_damping(self, damping=np.array((0.1, 0.1, 0.1, 0.1, 0.1, 0.01, 0.01))): 23 | """Set joint damping """ 24 | body = self._base_body 25 | for i in range(len(self._link_body)): 26 | body = body.find("./body[@name='{}']".format(self._link_body[i])) 27 | joint = body.find("./joint[@name='{}']".format(self._joints[i])) 28 | joint.set("damping", array_to_string(np.array([damping[i]]))) 29 | 30 | def set_joint_frictionloss(self, friction=np.array((0.1, 0.1, 0.1, 0.1, 0.1, 0.01, 0.01))): 31 | """Set joint friction loss (static friction)""" 32 | body = self._base_body 33 | for i in range(len(self._link_body)): 34 | body = body.find("./body[@name='{}']".format(self._link_body[i])) 35 | joint = body.find("./joint[@name='{}']".format(self._joints[i])) 36 | joint.set("frictionloss", array_to_string(np.array([friction[i]]))) 37 | 38 | @property 39 | def dof(self): 40 | return 7 41 | 42 | @property 43 | def joints(self): 44 | return ["joint{}".format(x) for x in range(1, 8)] 45 | 46 | @property 47 | def init_qpos(self): 48 | return self._init_qpos 49 | 50 | @property 51 | def contact_geoms(self): 52 | return ["link{}_collision".format(x) for x in range(1, 8)] 53 | 54 | @property 55 | def _base_body(self): 56 | node = self.worldbody.find("./body[@name='link0']") 57 | return node 58 | 59 | @property 60 | def _link_body(self): 61 | return ["link1", "link2", "link3", "link4", "link5", "link6", "link7"] 62 | 63 | @property 64 | def _joints(self): 65 | return ["joint1", "joint2", "joint3", "joint4", "joint5", "joint6", "joint7"] 66 | -------------------------------------------------------------------------------- /panda/environment.yml: -------------------------------------------------------------------------------- 1 | name: robo 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - _openmp_mutex=5.1=1_gnu 7 | - ca-certificates=2023.08.22=h06a4308_0 8 | - ld_impl_linux-64=2.38=h1181459_1 9 | - libffi=3.4.4=h6a678d5_0 10 | - libgcc-ng=11.2.0=h1234567_1 11 | - libgfortran-ng=7.5.0=ha8ba4b0_17 12 | - libgfortran4=7.5.0=ha8ba4b0_17 13 | - libgomp=11.2.0=h1234567_1 14 | - libstdcxx-ng=11.2.0=h1234567_1 15 | - mpi=1.0=mpich 16 | - mpi4py=3.1.4=py39hfc96bbd_0 17 | - mpich=3.3.2=hc856adb_0 18 | - ncurses=6.4=h6a678d5_0 19 | - openssl=3.0.10=h7f8727e_2 20 | - pip=23.2.1=py39h06a4308_0 21 | - python=3.9.18=h955ad1f_0 22 | - readline=8.2=h5eee18b_0 23 | - setuptools=68.0.0=py39h06a4308_0 24 | - sqlite=3.41.2=h5eee18b_0 25 | - tk=8.6.12=h1ccaba5_0 26 | - tzdata=2023c=h04d1e81_0 27 | - wheel=0.38.4=py39h06a4308_0 28 | - xz=5.4.2=h5eee18b_0 29 | - zlib=1.2.13=h5eee18b_0 30 | - pip: 31 | - appdirs==1.4.4 32 | - certifi==2023.7.22 33 | - cffi==1.15.1 34 | - charset-normalizer==3.2.0 35 | - click==8.1.7 36 | - cmake==3.27.4.1 37 | - colorlog==6.7.0 38 | - cython==0.29.36 39 | - docker-pycreds==0.4.0 40 | - fasteners==0.18 41 | - filelock==3.12.4 42 | - gitdb==4.0.10 43 | - gitpython==3.1.36 44 | - glfw==2.6.2 45 | - h5py==3.9.0 46 | - hjson==3.1.0 47 | - idna==3.4 48 | - imageio==2.31.3 49 | - jinja2==3.1.2 50 | - lit==16.0.6 51 | - markupsafe==2.1.3 52 | - mpmath==1.3.0 53 | - mujoco-py==2.1.2.14 54 | - networkx==3.1 55 | - numpy==1.25.2 56 | - nvidia-cublas-cu11==11.10.3.66 57 | - nvidia-cuda-cupti-cu11==11.7.101 58 | - nvidia-cuda-nvrtc-cu11==11.7.99 59 | - nvidia-cuda-runtime-cu11==11.7.99 60 | - nvidia-cudnn-cu11==8.5.0.96 61 | - nvidia-cufft-cu11==10.9.0.58 62 | - nvidia-curand-cu11==10.2.10.91 63 | - nvidia-cusolver-cu11==11.4.0.1 64 | - nvidia-cusparse-cu11==11.7.4.91 65 | - nvidia-nccl-cu11==2.14.3 66 | - nvidia-nvtx-cu11==11.7.91 67 | - pathtools==0.1.2 68 | - pillow==10.0.0 69 | - protobuf==4.24.3 70 | - psutil==5.9.5 71 | - pycparser==2.21 72 | - pyyaml==6.0.1 73 | - requests==2.31.0 74 | - scipy==1.11.2 75 | - sentry-sdk==1.31.0 76 | - setproctitle==1.3.2 77 | - six==1.16.0 78 | - smmap==5.0.0 79 | - sympy==1.12 80 | - torch==2.0.1 81 | - torchvision==0.15.2 82 | - tqdm==4.66.1 83 | - triton==2.0.0 84 | - typing-extensions==4.7.1 85 | - urllib3==2.0.4 86 | - wandb==0.15.10 87 | prefix: /home/asad/miniconda3/envs/robo 88 | -------------------------------------------------------------------------------- /panda/models/assets/arena/table_arena.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /panda/rl/main.py: -------------------------------------------------------------------------------- 1 | """ Launch RL training and evaluation. """ 2 | 3 | import sys 4 | import signal 5 | import os 6 | import json 7 | import numpy as np 8 | import torch 9 | from mpi4py import MPI 10 | from config import argparser 11 | from rl.trainer import Trainer 12 | from utils.logger import logger 13 | 14 | 15 | np.set_printoptions(precision=3) 16 | np.set_printoptions(suppress=True) 17 | 18 | 19 | def run(config): 20 | """ 21 | Runs Trainer. 22 | """ 23 | # for parallel workers training (distributed training) 24 | rank = MPI.COMM_WORLD.Get_rank() # Each process is assigned a rank that is unique within Communicator(group of processes) 25 | config.rank = rank 26 | config.is_chef = rank == 0 # Binary 27 | config.seed = config.seed + rank 28 | config.num_workers = MPI.COMM_WORLD.Get_size() # total no. of processes is a size of communicator 29 | 30 | if config.is_chef: 31 | logger.warn('Run a base worker.') 32 | make_log_files(config) 33 | else: 34 | logger.warn('Run worker %d and disable logger.', config.rank) 35 | import logging 36 | logger.setLevel(logging.CRITICAL) 37 | 38 | def shutdown(signal, frame): 39 | logger.warn('Received signal %s: exiting', signal) 40 | sys.exit(128+signal) 41 | 42 | signal.signal(signal.SIGHUP, shutdown) 43 | signal.signal(signal.SIGINT, shutdown) 44 | signal.signal(signal.SIGTERM, shutdown) 45 | 46 | # set global seed 47 | np.random.seed(config.seed) 48 | torch.manual_seed(config.seed) 49 | torch.cuda.manual_seed_all(config.seed) 50 | 51 | # set the display no. configured with gpu 52 | os.environ["DISPLAY"] = ":0" 53 | # use gpu or cpu 54 | if config.gpu is not None: 55 | os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(config.gpu) 56 | assert torch.cuda.is_available() 57 | config.device = torch.device("cuda") 58 | else: 59 | config.device = torch.device("cpu") 60 | 61 | # build a trainer 62 | trainer = Trainer(config) 63 | if config.is_train: 64 | trainer.train() 65 | logger.info("Finish training") 66 | else: 67 | trainer.evaluate() 68 | logger.info("Finish evaluating") 69 | 70 | 71 | def make_log_files(config): 72 | """ 73 | Sets up log directories and saves git diff and command line. 74 | """ 75 | config.run_name = '{}.{}.{}'.format(config.prefix, config.seed, config.suffix) 76 | 77 | config.log_dir = os.path.join(config.log_root_dir, config.run_name) 78 | logger.info('Create log directory: %s', config.log_dir) 79 | os.makedirs(config.log_dir, exist_ok=True) 80 | 81 | if config.is_train: 82 | # log config 83 | param_path = os.path.join(config.log_dir, 'params.json') 84 | logger.info('Store parameters in %s', param_path) 85 | with open(param_path, 'w') as fp: 86 | json.dump(config.__dict__, fp, indent=4, sort_keys=True) 87 | 88 | 89 | if __name__ == '__main__': 90 | args, unparsed = argparser() 91 | if len(unparsed): 92 | logger.error('Unparsed argument is detected:\n%s', unparsed) 93 | else: 94 | run(args) 95 | -------------------------------------------------------------------------------- /panda/rl/policies/mlp_actor_critic.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import torch 3 | import torch.nn as nn 4 | import numpy as np 5 | from rl.policies.utils import MLP 6 | from rl.policies.actor_critic import Actor, Critic 7 | 8 | 9 | class MlpActor(Actor): 10 | def __init__(self, config, ob_space, ac_space, tanh_policy): 11 | super().__init__(config, ob_space, ac_space, tanh_policy) 12 | 13 | self._ac_space = ac_space # e.g. ActionSpace(shape=OrderedDict([('default', 8)]),minimum=-1.0, maximum=1.0) 14 | 15 | # observation # e.g. OrderedDict([('object-state', [10]), ('robot-state', [36])]) 16 | input_dim = sum([np.prod(x) for x in ob_space.values()]) # [[226,226,3],[36],[10]] (226*226*3)+36+10 17 | 18 | # build sequential layers of neural network 19 | self.fc = MLP(config, input_dim, config.rl_hid_size, [config.rl_hid_size]) # inp= 46, output= 64 20 | self.fc_means = nn.ModuleDict() 21 | self.fc_log_stds = nn.ModuleDict() 22 | 23 | for k, size in ac_space.shape.items(): # shape=OrderedDict([('default', 8)]) 24 | self.fc_means.update({k: MLP(config, config.rl_hid_size, size)}) # MLP here defines the out_layer of nn 25 | self.fc_log_stds.update({k: MLP(config, config.rl_hid_size, size)}) 26 | 27 | def forward(self, ob): # extracts input values from ob odict and passes it to nn 28 | inp = list(ob.values()) # [tensor([-0.007,.....,-0.51]), tensor([-0.08,.....,0.08])] 29 | if len(inp[0].shape) == 1: #[36] 30 | inp = [x.unsqueeze(0) for x in inp] # change the tensor shape from e.g. [36] to [1, 36] 31 | 32 | # concatenates the tensor to [1, 46], passes to model and activation_fn; out is a tensor of shape (1, hid_size) 33 | out = self._activation_fn(self.fc(torch.cat(inp, dim=-1))) 34 | out = torch.reshape(out, (out.shape[0], -1)) # [1, 64] 35 | 36 | means, stds = OrderedDict(), OrderedDict() 37 | for k in self._ac_space.keys(): 38 | mean = self.fc_means[k](out) # passes previous layers output to last layer to produce action tensor [1,8] 39 | log_std = self.fc_log_stds[k](out) # passes previous layers output to last layer for s.d. tensor 40 | log_std = torch.clamp(log_std, -10, 2) # clips between min and max 41 | std = torch.exp(log_std.double()) # exponential on s.d. 42 | means[k] = mean 43 | stds[k] = std 44 | 45 | return means, stds 46 | 47 | 48 | class MlpCritic(Critic): 49 | def __init__(self, config, ob_space, ac_space=None): 50 | super().__init__(config) 51 | 52 | input_dim = sum([np.prod(x) for x in ob_space.values()]) 53 | if ac_space is not None: 54 | input_dim += ac_space.size 55 | 56 | self.fc = MLP(config, input_dim, 1, [config.rl_hid_size] * 2) 57 | 58 | def forward(self, ob, ac=None): 59 | inp = list(ob.values()) 60 | if len(inp[0].shape) == 1: 61 | inp = [x.unsqueeze(0) for x in inp] 62 | 63 | if ac is not None: 64 | ac = list(ac.values()) 65 | if len(ac[0].shape) == 1: 66 | ac = [x.unsqueeze(0) for x in ac] 67 | inp.extend(ac) 68 | 69 | out = self.fc(torch.cat(inp, dim=-1)) 70 | out = torch.reshape(out, (out.shape[0], 1)) 71 | 72 | return out 73 | -------------------------------------------------------------------------------- /panda/rl/dataset.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from time import time 3 | import numpy as np 4 | 5 | class ReplayBuffer: 6 | def __init__(self, keys, buffer_size, sample_func): 7 | self._size = buffer_size # buffer size in config 8 | 9 | # memory management 10 | self._idx = 0 11 | self._current_size = 0 12 | self._sample_func = sample_func 13 | 14 | # create the buffer to store info 15 | self._keys = keys 16 | self._buffers = defaultdict(list) 17 | 18 | def clear(self): 19 | self._idx = 0 20 | self._current_size = 0 21 | self._buffers = defaultdict(list) 22 | 23 | # store the episode 24 | def store_episode(self, rollout): # stores an episode given a rollout; calling each time adds a new episode 25 | idx = self._idx = (self._idx + 1) % self._size 26 | self._current_size += 1 27 | 28 | if self._current_size > self._size: 29 | for k in self._keys: 30 | self._buffers[k][idx] = rollout[k] 31 | else: 32 | for k in self._keys: 33 | self._buffers[k].append(rollout[k]) 34 | 35 | # sample the data from the replay buffer 36 | def sample(self, batch_size): 37 | # sample transitions 38 | transitions = self._sample_func(self._buffers, batch_size) # buffers contains rollout(s), batch_size is in config 39 | return transitions 40 | 41 | def state_dict(self): 42 | return self._buffers 43 | 44 | def load_state_dict(self, state_dict): 45 | self._buffers = state_dict 46 | self._current_size = len(self._buffers['ac']) # no. of episodes in buffer 47 | 48 | 49 | class RandomSampler: 50 | def sample_func(self, episode_batch, batch_size_in_transitions): # episode_batch is buffers 51 | rollout_batch_size = len(episode_batch['ac']) # no. of episodes in buffer 52 | batch_size = batch_size_in_transitions 53 | 54 | episode_idxs = np.random.randint(0, rollout_batch_size, batch_size) # selects a list of episode idxs i.e. [2,0,4,0...], length equal to batch_size 55 | t_samples = [np.random.randint(len(episode_batch['ac'][episode_idx])) for episode_idx in episode_idxs] # [135,78,54,180,...] list 56 | 57 | transitions = {} 58 | for key in episode_batch.keys(): # selects transitions experiences corresponding to sampled timesteps from episode(s) 59 | # values are lists {'ob': [], 'ac': [], 'done': []}; len of each list is equal to batch_size 60 | transitions[key] = [episode_batch[key][episode_idx][t] for episode_idx, t in zip(episode_idxs, t_samples)] 61 | 62 | # selects next observations corresponding to sampled transitions from an episode(s) 63 | transitions['ob_next'] = [episode_batch['ob'][episode_idx][t + 1] for episode_idx, t in zip(episode_idxs, t_samples)] 64 | 65 | # join sequence of array values in transitions; ob values is a dict {'robot-state': array(), 'object-state': array()}, others are array 66 | new_transitions = {} 67 | for k, v in transitions.items(): 68 | if isinstance(v[0], dict): 69 | sub_keys = v[0].keys() 70 | new_transitions[k] = {sub_key: np.stack([v_[sub_key] for v_ in v]) for sub_key in sub_keys} 71 | else: 72 | new_transitions[k] = np.stack(v) 73 | return new_transitions 74 | -------------------------------------------------------------------------------- /panda/utils/mujoco_py_renderer.py: -------------------------------------------------------------------------------- 1 | from mujoco_py import MjViewer 2 | from mujoco_py.generated import const 3 | import glfw 4 | from collections import defaultdict 5 | 6 | 7 | class CustomMjViewer(MjViewer): 8 | 9 | keypress = defaultdict(list) 10 | keyup = defaultdict(list) 11 | keyrepeat = defaultdict(list) 12 | 13 | def key_callback(self, window, key, scancode, action, mods): 14 | if action == glfw.PRESS: 15 | tgt = self.keypress 16 | elif action == glfw.RELEASE: 17 | tgt = self.keyup 18 | elif action == glfw.REPEAT: 19 | tgt = self.keyrepeat 20 | else: 21 | return 22 | if tgt.get(key): 23 | for fn in tgt[key]: 24 | fn(window, key, scancode, action, mods) 25 | if tgt.get("any"): 26 | for fn in tgt["any"]: 27 | fn(window, key, scancode, action, mods) 28 | # retain functionality for closing the viewer 29 | if key == glfw.KEY_ESCAPE: 30 | super().key_callback(window, key, scancode, action, mods) 31 | else: 32 | # only use default mujoco callbacks if "any" callbacks are unset 33 | super().key_callback(window, key, scancode, action, mods) 34 | 35 | 36 | class MujocoPyRenderer: 37 | def __init__(self, sim): 38 | """ 39 | Args: 40 | sim: MjSim object 41 | """ 42 | 43 | self.viewer = CustomMjViewer(sim) 44 | self.viewer.cam.fixedcamid = 6 45 | self.callbacks = {} 46 | 47 | 48 | def set_camera(self, camera_id): 49 | """ 50 | Set the camera view to the specified camera ID. 51 | """ 52 | self.viewer.cam.fixedcamid = camera_id 53 | self.viewer.cam.type = const.CAMERA_FIXED 54 | 55 | def render(self): 56 | # safe for multiple calls 57 | self.viewer.render() 58 | 59 | def close(self): 60 | """ 61 | Destroys the open window and renders (pun intended) the viewer useless. 62 | """ 63 | glfw.destroy_window(self.viewer.window) 64 | self.viewer = None 65 | 66 | def add_keypress_callback(self, key, fn): 67 | """ 68 | Allows for custom callback functions for the viewer. Called on key down. 69 | Parameter 'any' will ensure that the callback is called on any key down, 70 | and block default mujoco viewer callbacks from executing, except for 71 | the ESC callback to close the viewer. 72 | """ 73 | self.viewer.keypress[key].append(fn) 74 | 75 | def add_keyup_callback(self, key, fn): 76 | """ 77 | Allows for custom callback functions for the viewer. Called on key up. 78 | Parameter 'any' will ensure that the callback is called on any key up, 79 | and block default mujoco viewer callbacks from executing, except for 80 | the ESC callback to close the viewer. 81 | """ 82 | self.viewer.keyup[key].append(fn) 83 | 84 | def add_keyrepeat_callback(self, key, fn): 85 | """ 86 | Allows for custom callback functions for the viewer. Called on key repeat. 87 | Parameter 'any' will ensure that the callback is called on any key repeat, 88 | and block default mujoco viewer callbacks from executing, except for 89 | the ESC callback to close the viewer. 90 | """ 91 | self.viewer.keyrepeat[key].append(fn) 92 | -------------------------------------------------------------------------------- /panda/config/__init__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from utils import str2bool, str2list 4 | 5 | 6 | def argparser(): 7 | parser = argparse.ArgumentParser('Grasping Environment', formatter_class=argparse.ArgumentDefaultsHelpFormatter) 8 | 9 | # Grasping 10 | import config.grasping as grasp 11 | grasp.add_argument(parser) 12 | 13 | # training algorithm 14 | parser.add_argument('--algo', type=str, default='ppo', 15 | choices=['sac', 'ppo']) 16 | 17 | # vanilla rl 18 | parser.add_argument('--rl_hid_size', type=int, default= 64) 19 | parser.add_argument('--rl_activation', type=str, default='relu', 20 | choices=['relu', 'elu', 'tanh']) 21 | parser.add_argument('--tanh_policy', type=str2bool, default=True) 22 | 23 | # observation normalization 24 | parser.add_argument('--ob_norm', type=str2bool, default=True) 25 | parser.add_argument('--clip_obs', type=float, default=200, help='the clip range of observation') 26 | parser.add_argument('--clip_range', type=float, default=5, help='the clip range after normalization of observation') 27 | 28 | # off-policy rl 29 | parser.add_argument('--buffer_size', type=int, default=int(1e5), help='the size of the buffer') 30 | parser.add_argument('--discount_factor', type=float, default=0.99, help='the discount factor') 31 | parser.add_argument('--lr_actor', type=float, default=3e-4, help='the learning rate of the actor') 32 | parser.add_argument('--lr_critic', type=float, default=3e-4, help='the learning rate of the critic') 33 | parser.add_argument('--polyak', type=float, default=0.995, help='the average coefficient') 34 | 35 | # training 36 | parser.add_argument('--is_train', type=str2bool, default=False) 37 | parser.add_argument('--num_batches', type=int, default=60, help='the times to update the network per epoch') 38 | parser.add_argument('--batch_size', type=int, default=512, help='the sample batch size') 39 | parser.add_argument('--max_grad_norm', type=float, default=100) 40 | parser.add_argument('--max_global_step', type=int, default=int(1e7)) 41 | parser.add_argument('--gpu', type=int, default=0) 42 | 43 | # ppo 44 | parser.add_argument('--clip_param', type=float, default=0.2) 45 | parser.add_argument('--value_loss_coeff', type=float, default=0.5) 46 | parser.add_argument('--action_loss_coeff', type=float, default=1e-2) 47 | parser.add_argument('--entropy_loss_coeff', type=float, default=1e-4) 48 | parser.add_argument('--rollout_length', type=int, default=1800) 49 | parser.add_argument('--gae_lambda', type=float, default=0.95) 50 | 51 | # sac 52 | parser.add_argument('--reward_scale', type=float, default=1.0, help='reward scale') 53 | 54 | # log 55 | parser.add_argument('--log_interval', type=int, default=1) 56 | parser.add_argument('--evaluate_interval', type=int, default=10) 57 | parser.add_argument('--ckpt_interval', type=int, default=200) 58 | parser.add_argument('--log_root_dir', type=str, default='log') 59 | parser.add_argument('--wandb', type=str2bool, default=True, 60 | help='set it True if you want to use wandb') 61 | 62 | # evaluation 63 | parser.add_argument('--ckpt_num', type=int, default=None) 64 | parser.add_argument('--num_eval', type=int, default=10) 65 | parser.add_argument('--num_record_samples', type=int, default=1, help='number of trajectories to collect during eval') 66 | 67 | 68 | # misc 69 | parser.add_argument('--prefix', type=str, default='rl') 70 | parser.add_argument('--suffix', type=str, default='') 71 | parser.add_argument('--notes', type=str, default='') 72 | parser.add_argument('--seed', type=int, default=111, help='Random seed') 73 | 74 | args, unparsed = parser.parse_known_args() 75 | 76 | return args, unparsed 77 | -------------------------------------------------------------------------------- /panda/rl/rollouts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Runs rollouts (RolloutRunner class) and collects transitions using Rollout class. 3 | """ 4 | 5 | from collections import defaultdict 6 | import numpy as np 7 | 8 | 9 | class Rollout(): 10 | """ 11 | Rollout storing an episode.(Stores data for all timesteps in a full episode) 12 | """ 13 | 14 | def __init__(self): 15 | """ Initialize buffer. """ 16 | self._history = defaultdict(list) 17 | 18 | def add(self, data): 19 | """ Add a transition @data to rollout buffer. """ 20 | for key, value in data.items(): 21 | self._history[key].append(value) 22 | 23 | def get(self): 24 | """ Returns rollout buffer and clears buffer. """ 25 | batch = {} 26 | batch['ob'] = self._history['ob'] 27 | batch['ac'] = self._history['ac'] 28 | batch['ac_before_activation'] = self._history['ac_before_activation'] 29 | batch['done'] = self._history['done'] 30 | batch['rew'] = self._history['rew'] 31 | self._history = defaultdict(list) 32 | return batch 33 | 34 | class RolloutRunner(): 35 | """ 36 | Run rollout given environment and policy. 37 | """ 38 | 39 | def __init__(self, config, env, pi): 40 | """ 41 | Args: 42 | config: configurations for the environment. 43 | env: environment. 44 | pi: policy. 45 | """ 46 | 47 | self._config = config 48 | self._env = env 49 | self._pi = pi 50 | 51 | def run_episode(self, max_step=600, is_train=True): 52 | """ 53 | Runs one episode and returns the rollout. 54 | 55 | Args: 56 | max_step: maximum number of steps of the rollout. 57 | is_train: whether rollout is for training or evaluation. 58 | """ 59 | config = self._config 60 | device = config.device 61 | env = self._env 62 | pi = self._pi 63 | 64 | # initialize rollout buffer 65 | rollout = Rollout() 66 | reward_info = defaultdict(list) 67 | acs = [] 68 | done = False 69 | ep_len = 0 70 | ep_rew = 0 71 | ob = self._env.reset() 72 | 73 | # buffer to save qpos 74 | saved_qpos = [] 75 | 76 | # run rollout 77 | while not done and ep_len < max_step: 78 | # sample action from policy 79 | ac, ac_before_activation = pi.act(ob, is_train=is_train) 80 | 81 | rollout.add({'ob': ob, 'ac': ac, 'ac_before_activation': ac_before_activation}) 82 | # joints positions 83 | saved_qpos.append(env.sim.get_state().qpos[:9].copy()) 84 | 85 | # take a step 86 | ob, reward, done, info = env.step(ac) # e.g. info = {'episode_success': 0} 87 | env.render() 88 | rollout.add({'done': done, 'rew': reward}) 89 | acs.append(ac) 90 | ep_len += 1 91 | ep_rew += reward 92 | 93 | for key, value in info.items(): 94 | reward_info[key].append(value) 95 | 96 | # last frame 97 | rollout.add({'ob': ob}) 98 | saved_qpos.append(env.sim.get_state().qpos[:9].copy()) 99 | 100 | # compute average/sum of information 101 | ep_info = {'len': ep_len, 'rew': ep_rew, 'saved_qpos': saved_qpos} # qpos contains values for all timesteps, len and rew contain only sum 102 | for key, value in reward_info.items(): 103 | if isinstance(value[0], (int, float, bool)): 104 | if '_mean' in key: 105 | ep_info[key] = np.mean(value) 106 | else: 107 | ep_info[key] = np.sum(value) 108 | 109 | 110 | return rollout.get(), ep_info 111 | -------------------------------------------------------------------------------- /panda/models/objects/objects.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import xml.etree.ElementTree as ET 3 | import numpy as np 4 | from models.base import MujocoXML 5 | from utils.mjcf_utils import string_to_array, array_to_string 6 | 7 | 8 | class MujocoObject: 9 | """ 10 | Base class for all objects. 11 | 12 | We use Mujoco Objects to implement all objects that 13 | 1) may appear for multiple times in a task 14 | 2) can be swapped between different tasks 15 | 16 | Typical methods return copy so the caller can all joints/attributes as wanted 17 | 18 | Attributes: 19 | asset (TYPE): Description 20 | """ 21 | 22 | def __init__(self): 23 | self.asset = ET.Element("asset") 24 | 25 | def get_horizontal_radius(self): 26 | """ 27 | Returns scalar 28 | If object a,b has horizontal distance d 29 | a.get_horizontal_radius() + b.get_horizontal_radius() < d 30 | should mean that a, b has no contact 31 | 32 | Helps us put objects programmatically without them flying away due to 33 | a huge initial contact force 34 | 35 | Returns: 36 | Float: radius 37 | 38 | Raises: 39 | NotImplementedError: Description 40 | """ 41 | raise NotImplementedError 42 | # return 2 43 | 44 | def get_collision(self, name=None, site=False): 45 | """ 46 | Returns a ET.Element 47 | It is a subtree that defines all collision related fields 48 | of this object. 49 | 50 | Return is a copy 51 | 52 | Args: 53 | name (None, optional): Assign name to body 54 | site (False, optional): Add a site (with name @name 55 | when applicable) to the returned body 56 | 57 | Returns: 58 | ET.Element: body 59 | 60 | Raises: 61 | NotImplementedError: Description 62 | """ 63 | raise NotImplementedError 64 | 65 | def get_site_attrib_template(self): 66 | """ 67 | Returns attribs of spherical site used to mark body origin 68 | 69 | Returns: 70 | Dictionary of default site attributes 71 | """ 72 | return { 73 | "pos": "0 0 0", 74 | "size": "0.006 0.006 0.006", 75 | "rgba": "1 0 0 1", 76 | "type": "sphere", 77 | } 78 | 79 | 80 | class MujocoXMLObject(MujocoXML, MujocoObject): 81 | """ 82 | MujocoObjects that are loaded from xml files 83 | """ 84 | 85 | def __init__(self, fname): 86 | """ 87 | Args: 88 | fname (TYPE): XML File path 89 | """ 90 | MujocoXML.__init__(self, fname) 91 | 92 | def get_horizontal_radius(self): 93 | horizontal_radius_site = self.worldbody.find("./body/site[@name='horizontal_radius_site']") 94 | return string_to_array(horizontal_radius_site.get("pos"))[0] 95 | 96 | def get_collision(self, name=None, site=False): 97 | 98 | collision = copy.deepcopy(self.worldbody.find("./body/body[@name='collision']")) 99 | collision.attrib.pop("name") 100 | if name is not None: 101 | collision.attrib["name"] = name 102 | geoms = collision.findall("geom") 103 | if len(geoms) == 1: 104 | geoms[0].set("name", name) 105 | else: 106 | for i in range(len(geoms)): 107 | geoms[i].set("name", "{}-{}".format(name, i)) 108 | if site: 109 | # add a site as well 110 | template = self.get_site_attrib_template() 111 | template["rgba"] = "1 1 0 1" 112 | if name is not None: 113 | template["name"] = name 114 | collision.append(ET.Element("site", attrib=template)) 115 | return collision 116 | -------------------------------------------------------------------------------- /panda/models/assets/gripper/panda_gripper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /panda/rl/policies/actor_critic.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from rl.policies.distributions import FixedNormal, MixedDistribution 7 | from utils.pytorch import to_tensor 8 | 9 | 10 | class Actor(nn.Module): 11 | def __init__(self, config, ob_space, ac_space, tanh_policy): 12 | super().__init__() 13 | self._config = config 14 | self._activation_fn = getattr(F, config.rl_activation) # elu, relu, tanh 15 | self._tanh = tanh_policy 16 | 17 | @property 18 | def info(self): 19 | return {} 20 | 21 | def act(self, ob, is_train=True, return_log_prob=False): 22 | ob = to_tensor(ob, self._config.device) 23 | self._ob = ob 24 | means, stds = self.forward(ob) # OrderedDict([('default', tensor()] 25 | dists = OrderedDict() 26 | actions = OrderedDict() 27 | 28 | for k in self._ac_space.keys(): 29 | dists[k] = FixedNormal(means[k], stds[k]) # Normal dist with double args Normal(loc: , scale: ) 30 | # dists - OrderedDict([('default', Normal(loc: torch.Size([1, 8]), scale: torch.Size([1, 8])))]) 31 | 32 | mixed_dist = MixedDistribution(dists) # a normal distribution 33 | 34 | activations = mixed_dist.sample() # OrderedDict([('default', tensor()] 35 | 36 | if return_log_prob: 37 | log_probs = mixed_dist.log_probs(activations) 38 | 39 | for k in self._ac_space.keys(): 40 | z = activations[k] 41 | if self._tanh: 42 | action = torch.tanh(z) 43 | if return_log_prob: 44 | # follow the Appendix C. Enforcing Action Bounds 45 | log_det_jacobian = 2 * (np.log(2.) - z - F.softplus(-2. * z)).sum(dim=-1, keepdim=True) 46 | log_probs[k] = log_probs[k] - log_det_jacobian 47 | else: 48 | action = z 49 | 50 | # convert tensor to numpy array 51 | actions[k] = action.detach().cpu().numpy().squeeze(0) 52 | activations[k] = z.detach().cpu().numpy().squeeze(0) 53 | 54 | if return_log_prob: 55 | log_probs_ = torch.cat(list(log_probs.values()), -1).sum(-1, keepdim=True) 56 | if log_probs_.min() < -100: 57 | print('sampling an action with a probability of 1e-100') 58 | import ipdb; ipdb.set_trace() 59 | 60 | log_probs_ = log_probs_.detach().cpu().numpy().squeeze(0) 61 | return actions, activations, log_probs_ 62 | else: 63 | return actions, activations 64 | 65 | def act_log(self, ob, activations=None): 66 | self._ob = ob.copy() 67 | means, stds = self.forward(ob) 68 | dists = OrderedDict() 69 | actions = OrderedDict() 70 | 71 | for k in self._ac_space.keys(): 72 | dists[k] = FixedNormal(means[k], stds[k]) 73 | 74 | mixed_dist = MixedDistribution(dists) 75 | # activations is OrderedDict([('default', tensor()]; tensor size is (batch_size, 8) 76 | activations_ = mixed_dist.rsample() if activations is None else activations 77 | 78 | for k in activations_.keys(): 79 | if len(activations_[k].shape) == 1: 80 | activations_[k] = activations_[k].unsqueeze(0) 81 | 82 | # log_probs is OrderedDict([('default', tensor()]; tensor size is (batch_size, 1) 83 | log_probs = mixed_dist.log_probs(activations_) 84 | 85 | for k in self._ac_space.keys(): 86 | z = activations_[k] 87 | if self._tanh: 88 | action = torch.tanh(z) 89 | # follow the Appendix C. Enforcing Action Bounds 90 | log_det_jacobian = 2 * (np.log(2.) - z - F.softplus(-2. * z)).sum(dim=-1, keepdim=True) 91 | log_probs[k] = log_probs[k] - log_det_jacobian 92 | else: 93 | action = z 94 | 95 | actions[k] = action 96 | 97 | ents = mixed_dist.entropy() # ents is tensor of shape (batch_size) 98 | log_probs_ = torch.cat(list(log_probs.values()), -1).sum(-1, keepdim=True) # log_probs_ is a tensor of shape (batch_size, 1) 99 | if activations is None: 100 | return actions, log_probs_ 101 | else: 102 | return log_probs_, ents 103 | 104 | class Critic(nn.Module): 105 | def __init__(self, config): 106 | super().__init__() 107 | self._config = config 108 | -------------------------------------------------------------------------------- /panda/utils/mjcf_utils.py: -------------------------------------------------------------------------------- 1 | # utility functions for manipulating MJCF XML models 2 | 3 | import xml.etree.ElementTree as ET 4 | import os 5 | import numpy as np 6 | 7 | import models 8 | 9 | RED = [1, 0, 0, 1] 10 | GREEN = [0, 1, 0, 1] 11 | BLUE = [0, 0, 1, 1] 12 | 13 | 14 | def xml_path_completion(xml_path): 15 | """ 16 | Takes in a local xml path and returns a full path. 17 | if @xml_path is absolute, do nothing 18 | if @xml_path is not absolute, load xml that is shipped by the package 19 | """ 20 | if xml_path.startswith("/"): 21 | full_path = xml_path 22 | else: 23 | full_path = os.path.join(models.assets_root, xml_path) 24 | return full_path 25 | 26 | 27 | def array_to_string(array): 28 | """ 29 | Converts a numeric array into the string format in mujoco. 30 | 31 | Examples: 32 | [0, 1, 2] => "0 1 2" 33 | """ 34 | return " ".join(["{}".format(x) for x in array]) 35 | 36 | 37 | def string_to_array(string): 38 | """ 39 | Converts a array string in mujoco xml to np.array. 40 | 41 | Examples: 42 | "0 1 2" => [0, 1, 2] 43 | """ 44 | return np.array([float(x) for x in string.split(" ")]) 45 | 46 | 47 | def set_alpha(node, alpha=0.1): 48 | """ 49 | Sets all a(lpha) field of the rgba attribute to be @alpha 50 | for @node and all subnodes 51 | used for managing display 52 | """ 53 | for child_node in node.findall(".//*[@rgba]"): 54 | rgba_orig = string_to_array(child_node.get("rgba")) 55 | child_node.set("rgba", array_to_string(list(rgba_orig[0:3]) + [alpha])) 56 | 57 | 58 | def new_joint(**kwargs): 59 | """ 60 | Creates a joint tag with attributes specified by @**kwargs. 61 | """ 62 | 63 | element = ET.Element("joint", attrib=kwargs) 64 | return element 65 | 66 | 67 | def new_actuator(joint, act_type="actuator", **kwargs): 68 | """ 69 | Creates an actuator tag with attributes specified by @**kwargs. 70 | 71 | Args: 72 | joint: type of actuator transmission. 73 | see all types here: http://mujoco.org/book/modeling.html#actuator 74 | act_type (str): actuator type. Defaults to "actuator" 75 | 76 | """ 77 | element = ET.Element(act_type, attrib=kwargs) 78 | element.set("joint", joint) 79 | return element 80 | 81 | 82 | def new_site(name, rgba=RED, pos=(0, 0, 0), size=(0.005,), **kwargs): 83 | """ 84 | Creates a site element with attributes specified by @**kwargs. 85 | 86 | Args: 87 | name (str): site name. 88 | rgba: color and transparency. Defaults to solid red. 89 | pos: 3d position of the site. 90 | size ([float]): site size (sites are spherical by default). 91 | """ 92 | kwargs["rgba"] = array_to_string(rgba) 93 | kwargs["pos"] = array_to_string(pos) 94 | kwargs["size"] = array_to_string(size) 95 | kwargs["name"] = name 96 | element = ET.Element("site", attrib=kwargs) 97 | return element 98 | 99 | 100 | def new_geom(geom_type, size, pos=(0, 0, 0), rgba=RED, group=0, **kwargs): 101 | """ 102 | Creates a geom element with attributes specified by @**kwargs. 103 | 104 | Args: 105 | geom_type (str): type of the geom. 106 | see all types here: http://mujoco.org/book/modeling.html#geom 107 | size: geom size parameters. 108 | pos: 3d position of the geom frame. 109 | rgba: color and transparency. Defaults to solid red. 110 | group: the integrer group that the geom belongs to. useful for 111 | separating visual and physical elements. 112 | """ 113 | kwargs["type"] = str(geom_type) 114 | kwargs["size"] = array_to_string(size) 115 | kwargs["rgba"] = array_to_string(rgba) 116 | kwargs["group"] = str(group) 117 | kwargs["pos"] = array_to_string(pos) 118 | element = ET.Element("geom", attrib=kwargs) 119 | return element 120 | 121 | 122 | def new_body(name=None, pos=None, **kwargs): 123 | """ 124 | Creates a body element with attributes specified by @**kwargs. 125 | 126 | Args: 127 | name (str): body name. 128 | pos: 3d position of the body frame. 129 | """ 130 | if name is not None: 131 | kwargs["name"] = name 132 | if pos is not None: 133 | kwargs["pos"] = array_to_string(pos) 134 | element = ET.Element("body", attrib=kwargs) 135 | return element 136 | 137 | 138 | def new_inertial(name=None, pos=(0, 0, 0), mass=None, **kwargs): 139 | """ 140 | Creates a inertial element with attributes specified by @**kwargs. 141 | 142 | Args: 143 | mass: The mass of inertial 144 | """ 145 | if mass is not None: 146 | kwargs["mass"] = str(mass) 147 | kwargs["pos"] = array_to_string(pos) 148 | element = ET.Element("inertial", attrib=kwargs) 149 | return element 150 | -------------------------------------------------------------------------------- /panda/models/base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xml.dom.minidom 3 | import xml.etree.ElementTree as ET 4 | import io 5 | import numpy as np 6 | 7 | 8 | class MujocoXML(object): 9 | """ 10 | Base class of Mujoco xml file 11 | Wraps around ElementTree and provides additional functionality for merging different models. 12 | Specially, we keep track of , and 13 | """ 14 | 15 | def __init__(self, fname): 16 | """ 17 | Loads a mujoco xml from file. 18 | 19 | Args: 20 | fname (str): path to the MJCF xml file. 21 | """ 22 | self.file = fname 23 | self.folder = os.path.dirname(fname) 24 | self.tree = ET.parse(fname) 25 | self.root = self.tree.getroot() 26 | self.name = self.root.get("model") 27 | self.worldbody = self.create_default_element("worldbody") 28 | self.actuator = self.create_default_element("actuator") 29 | self.asset = self.create_default_element("asset") 30 | self.equality = self.create_default_element("equality") 31 | self.sensor = self.create_default_element("sensor") 32 | self.contact = self.create_default_element("contact") 33 | self.default = self.create_default_element("default") 34 | self.resolve_asset_dependency() 35 | 36 | def resolve_asset_dependency(self): 37 | """ 38 | Converts every file dependency into absolute path so when we merge we don't break things. 39 | """ 40 | 41 | for node in self.asset.findall("./*[@file]"): 42 | file = node.get("file") 43 | abs_path = os.path.abspath(self.folder) 44 | abs_path = os.path.join(abs_path, file) 45 | node.set("file", abs_path) 46 | 47 | def create_default_element(self, name): 48 | """ 49 | Creates a <@name/> tag under root if there is none. 50 | """ 51 | 52 | found = self.root.find(name) 53 | if found is not None: 54 | return found 55 | ele = ET.Element(name) 56 | self.root.append(ele) 57 | return ele 58 | 59 | def merge(self, other, merge_body=True): 60 | """ 61 | Default merge method. 62 | 63 | Args: 64 | other: another MujocoXML instance 65 | raises XML error if @other is not a MujocoXML instance. 66 | merges , and of @other into @self 67 | merge_body: True if merging child bodies of @other. Defaults to True. 68 | """ 69 | if merge_body: 70 | for body in other.worldbody: 71 | self.worldbody.append(body) 72 | self.merge_asset(other) 73 | for one_actuator in other.actuator: 74 | self.actuator.append(one_actuator) 75 | for one_equality in other.equality: 76 | self.equality.append(one_equality) 77 | for one_sensor in other.sensor: 78 | self.sensor.append(one_sensor) 79 | for one_contact in other.contact: 80 | self.contact.append(one_contact) 81 | for one_default in other.default: 82 | self.default.append(one_default) 83 | # self.config.append(other.config) 84 | 85 | def get_model(self, mode="mujoco_py"): 86 | """ 87 | Returns a MjModel instance from the current xml tree. 88 | """ 89 | 90 | available_modes = ["mujoco_py"] 91 | with io.StringIO() as string: 92 | string.write(ET.tostring(self.root, encoding="unicode")) 93 | if mode == "mujoco_py": 94 | from mujoco_py import load_model_from_xml 95 | 96 | model = load_model_from_xml(string.getvalue()) 97 | return model 98 | raise ValueError( 99 | "Unkown model mode: {}. Available options are: {}".format( 100 | mode, ",".join(available_modes) 101 | ) 102 | ) 103 | 104 | def get_xml(self): 105 | """ 106 | Returns a string of the MJCF XML file. 107 | """ 108 | with io.StringIO() as string: 109 | string.write(ET.tostring(self.root, encoding="unicode")) 110 | return string.getvalue() 111 | 112 | def save_model(self, fname, pretty=False): 113 | """ 114 | Saves the xml to file. 115 | 116 | Args: 117 | fname: output file location 118 | pretty: attempts!! to pretty print the output 119 | """ 120 | with open(fname, "w") as f: 121 | xml_str = ET.tostring(self.root, encoding="unicode") 122 | if pretty: 123 | # TODO: get a better pretty print library 124 | parsed_xml = xml.dom.minidom.parseString(xml_str) 125 | xml_str = parsed_xml.toprettyxml(newl="") 126 | f.write(xml_str) 127 | 128 | def merge_asset(self, other): 129 | """ 130 | Useful for merging other files in a custom logic. 131 | """ 132 | for asset in other.asset: 133 | asset_name = asset.get("name") 134 | asset_type = asset.tag 135 | # Avoids duplication 136 | pattern = "./{}[@name='{}']".format(asset_type, asset_name) 137 | if self.asset.find(pattern) is None: 138 | self.asset.append(asset) 139 | -------------------------------------------------------------------------------- /panda/controller/arm_controller.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from enum import Enum 3 | import mujoco_py 4 | from scipy.interpolate import CubicSpline 5 | 6 | 7 | class ControllerType(str, Enum): 8 | JOINT_VEL = 'joint_velocity' 9 | 10 | 11 | class Controller(): 12 | def __init__(self, control_max, control_min, max_action, min_action, policy_freq=250, interpolation=None): 13 | 14 | # Upper and lower limits to the input action (only pos/ori) 15 | self.control_max = control_max # array([1, 1, 1, 1, 1, 1, 1]) 16 | self.control_min = control_min # array([-1, -1, -1, -1, -1, -1, -1]) 17 | 18 | # Dimensionality of the action 19 | self.control_dim = self.control_max.shape[0] # 7 20 | 21 | # Limits to the policy outputs 22 | self.input_max = max_action # 1 23 | self.input_min = min_action # -1 24 | 25 | # This handles when the mean of max and min control is not zero -> actions are around that mean 26 | self.action_scale = abs(self.control_max - self.control_min) / abs(max_action - min_action) # array([1., 1., 1., 1., 1., 1., 1.]) 27 | self.action_output_transform = (self.control_max + self.control_min) / 2.0 # array([0., 0., 0., 0., 0., 0., 0.]) 28 | self.action_input_transform = (max_action + min_action) / 2.0 # 0 29 | 30 | # Frequency at which actions from the robot policy are fed into this controller 31 | self.policy_freq = policy_freq 32 | self.interpolation = interpolation # "linear" 33 | 34 | self.ramp_ratio = 1 # Percentage of the time between policy timesteps used for interpolation 35 | 36 | # Initialize the remaining attributes 37 | self.model_timestep = None 38 | self.interpolation_steps = None 39 | self.current_joint_velocity = None 40 | 41 | def reset(self): 42 | """ 43 | Resets the internal values of the controller 44 | """ 45 | pass 46 | 47 | def transform_action(self, action): 48 | """ 49 | Scale the action to go to the right min and max 50 | """ 51 | action = np.clip(action, self.input_min, self.input_max) # -1, 1 52 | transformed_action = (action - self.action_input_transform) * self.action_scale + self.action_output_transform 53 | 54 | return transformed_action 55 | 56 | def update_model(self, sim, joint_index): 57 | """ 58 | Updates the state of the robot used to compute the control command 59 | """ 60 | self.sim_freq = 1 / sim.model.opt.timestep 61 | self.interpolation_steps = np.floor(self.ramp_ratio * self.sim_freq / self.policy_freq) 62 | self.current_joint_velocity = sim.data.qvel[joint_index] 63 | 64 | 65 | def linear_interpolate(self, last_goal, goal): 66 | """ 67 | Set self.linear to be a function interpolating between last_goal and goal based on the ramp_ratio 68 | """ 69 | # We interpolate to reach the commanded desired position in self.ramp_ratio % of the time we have this goal 70 | delta_x_per_step = (goal - last_goal) / self.interpolation_steps 71 | self.linear = np.array([(last_goal + i * delta_x_per_step) for i in range(1, int(self.interpolation_steps) + 1)]) 72 | 73 | def action_to_torques(self, action, policy_step): 74 | raise NotImplementedError 75 | 76 | @property 77 | def action_dim(self): 78 | """ 79 | Returns dimensionality of the actions 80 | """ 81 | dim = self.control_dim 82 | return dim 83 | class JointVelocityController(Controller): 84 | """ 85 | Class to interprete actions as joint velocities 86 | """ 87 | ## "joint_velocity":{"control_range": [1, 1, 1, 1, 1, 1, 1], "kv": [8.0, 7.0, 6.0, 4.0, 2.0, 0.5, 0.1], "interpolation": "linear"}, 88 | 89 | def __init__(self, control_range, kv, max_action=1, min_action=-1, interpolation=None): 90 | super(JointVelocityController, self).__init__( 91 | control_max=np.array(control_range), 92 | control_min=-1 * np.array(control_range), 93 | max_action=max_action, 94 | min_action=min_action, 95 | interpolation=interpolation) 96 | 97 | self.kv = np.array(kv) 98 | self.interpolate = True 99 | 100 | self.last_goal = np.zeros(self.control_dim) 101 | self.step = 0 102 | 103 | def reset(self): 104 | super().reset() 105 | self.step = 0 106 | self.last_goal = np.zeros(self.control_dim) 107 | 108 | def action_to_torques(self, action, policy_step): 109 | action = self.transform_action(action) # clipping is done 110 | if policy_step: 111 | self.step = 0 112 | self.goal = np.array((action)) 113 | 114 | if self.interpolation == "linear": 115 | self.linear_interpolate(self.last_goal, self.goal) 116 | else: 117 | self.last_goal = np.array((self.goal)) 118 | 119 | if self.interpolation == "linear": 120 | self.last_goal = self.linear[self.step] 121 | 122 | if self.step < self.interpolation_steps - 1: 123 | self.step += 1 124 | # Torques for each joint are kv*(q_dot_desired - q_dot) 125 | torques = np.multiply(self.kv, (self.last_goal - self.current_joint_velocity)) 126 | 127 | return torques 128 | -------------------------------------------------------------------------------- /panda/rl/normalizer.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import numpy as np 3 | from utils.mpi import mpi_average 4 | 5 | 6 | class SubNormalizer: 7 | def __init__(self, size, eps=1e-2, default_clip_range=np.inf, clip_obs=np.inf): 8 | if isinstance(size, list): # size is ob_space dimension i-e [10], [36] 9 | self.size = size 10 | else: 11 | self.size = [size] 12 | self.eps = eps 13 | self.default_clip_range = default_clip_range 14 | self.clip_obs = clip_obs 15 | 16 | # some local information 17 | self.local_sum = np.zeros(self.size, np.float32) 18 | self.local_sumsq = np.zeros(self.size, np.float32) 19 | self.local_count = np.zeros(1, np.float32) 20 | # get the total sum sumsq and sum count 21 | self.total_sum = np.zeros(self.size, np.float32) 22 | self.total_sumsq = np.zeros(self.size, np.float32) 23 | self.total_count = np.ones(1, np.float32) 24 | # get the mean and std 25 | self.mean = np.zeros(self.size, np.float32) 26 | self.std = np.ones(self.size, np.float32) 27 | 28 | def _clip(self, v): # clips the observation in range defined by clip_obs e.g.(-200, 200) 29 | return np.clip(v, -self.clip_obs, self.clip_obs) 30 | 31 | # update the parameters of the normalizer 32 | def update(self, v): # takes in observation array i-e. v['robot-state']; shape is e.g. (201,36) 33 | v = self._clip(v) 34 | v = v.reshape([-1] + self.size) # size is dim. of observation e.g. [36], [10], depending on obs key 35 | # do the computing 36 | self.local_sum += v.sum(axis=0) # sum obs along first axis(timesteps); e.g. shape is 36 37 | self.local_sumsq += (np.square(v)).sum(axis=0) 38 | self.local_count[0] += v.shape[0] # e.g. 201 39 | 40 | # sync the parameters across the cpus 41 | def sync(self, local_sum, local_sumsq, local_count): 42 | local_sum[...] = mpi_average(local_sum) 43 | local_sumsq[...] = mpi_average(local_sumsq) 44 | local_count[...] = mpi_average(local_count) 45 | return local_sum, local_sumsq, local_count 46 | 47 | def recompute_stats(self): 48 | local_count = self.local_count.copy() 49 | local_sum = self.local_sum.copy() 50 | local_sumsq = self.local_sumsq.copy() 51 | # reset 52 | self.local_count[...] = 0 53 | self.local_sum[...] = 0 54 | self.local_sumsq[...] = 0 55 | # synrc the stats 56 | sync_sum, sync_sumsq, sync_count = self.sync(local_sum, local_sumsq, local_count) 57 | # update the total stuff 58 | self.total_sum += sync_sum 59 | self.total_sumsq += sync_sumsq 60 | self.total_count += sync_count 61 | # calculate the new mean and std 62 | self.mean = self.total_sum / self.total_count 63 | self.std = np.sqrt(np.maximum(np.square(self.eps), (self.total_sumsq / self.total_count) - np.square(self.total_sum / self.total_count))) 64 | 65 | # normalize the observation 66 | def normalize(self, v, clip_range=None): # v is observation array 67 | v = self._clip(v) 68 | if clip_range is None: 69 | clip_range = self.default_clip_range 70 | return np.clip((v - self.mean) / (self.std), -clip_range, clip_range) 71 | 72 | def state_dict(self): 73 | return {'sum': self.total_sum, 'sumsq': self.total_sumsq, 'count': self.total_count} 74 | 75 | def load_state_dict(self, state_dict): 76 | self.total_sum = state_dict['sum'] 77 | self.total_sumsq = state_dict['sumsq'] 78 | self.total_count = state_dict['count'] 79 | self.mean = self.total_sum / self.total_count 80 | self.std = np.sqrt(np.maximum(np.square(self.eps), (self.total_sumsq / self.total_count) - np.square(self.total_sum / self.total_count))) 81 | 82 | 83 | class Normalizer: 84 | def __init__(self, shape, eps=1e-2, default_clip_range=np.inf, clip_obs=np.inf): 85 | self._shape = shape # shape = ODict([('object-state', [10]), ('robot-state', [36])]) 86 | if not isinstance(shape, dict): 87 | self._shape = {'': shape} 88 | print('New ob_norm with shape', self._shape) 89 | 90 | self._keys = sorted(self._shape.keys()) # ['object-state', 'robot-state'] 91 | 92 | self.sub_norm = {} # i-e {'object-state': SubNormalizer([10], eps, 5, 200), 'robot-state': SubNormalizer([36], eps, 5, 200)} 93 | for key in self._keys: 94 | self.sub_norm[key] = SubNormalizer(self._shape[key], eps, default_clip_range, clip_obs) 95 | 96 | # update the parameters of the normalizer 97 | def update(self, v): # v is rollout['ob'] 98 | if isinstance(v, list): 99 | if isinstance(v[0], dict): 100 | # stacks obs arrays values acc. to keys i-e ['robot-state'] arrays for all time steps stacked together 101 | v = OrderedDict([(k, np.asarray([x[k] for x in v])) for k in self._keys]) 102 | else: 103 | v = np.asarray(v) 104 | 105 | if isinstance(v, dict): 106 | for k, v_ in v.items(): 107 | if k in self._keys: 108 | self.sub_norm[k].update(v_) 109 | else: 110 | self.sub_norm[''].update(v) 111 | 112 | def recompute_stats(self): 113 | for k in self._keys: 114 | self.sub_norm[k].recompute_stats() 115 | 116 | # normalize the observation 117 | def _normalize(self, v, clip_range=None): 118 | if not isinstance(v, dict): 119 | return self.sub_norm[''].normalize(v, clip_range) 120 | 121 | return OrderedDict([(k, self.sub_norm[k].normalize(v_, clip_range)) for k, v_ in v.items() if k in self._keys]) 122 | 123 | def normalize(self, v, clip_range=None): 124 | # v is rollout['ob'], a list where each element(ref. to timestep) is ODict([('robot-state', array()), ('obj-state', array())]) OR 125 | # transitions['ob'], a dict {'robot-state': array(), 'object-state': array()} array length is equal to batch_size 126 | if isinstance(v, list): 127 | return [self._normalize(x, clip_range) for x in v] # x is a odict 128 | else: 129 | return self._normalize(v, clip_range) 130 | 131 | def state_dict(self): 132 | return OrderedDict([(k, self.sub_norm[k].state_dict()) for k in self._keys]) 133 | 134 | def load_state_dict(self, state_dict): 135 | for k in self._keys: 136 | self.sub_norm[k].load_state_dict(state_dict[k]) 137 | -------------------------------------------------------------------------------- /panda/environments/base.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from mujoco_py import MjSim, MjRenderContextOffscreen 4 | from mujoco_py import load_model_from_xml 5 | from mujoco_py import GlfwContext 6 | import numpy as np 7 | from utils import MujocoPyRenderer 8 | 9 | GlfwContext(offscreen=True) 10 | REGISTERED_ENVS = {} 11 | 12 | def register_env(target_class): 13 | REGISTERED_ENVS[target_class.__name__] = target_class 14 | 15 | def get_env(name): 16 | """Try to get the equivalent functionality of gym.make in a sloppy way.""" 17 | 18 | return REGISTERED_ENVS[name] 19 | 20 | def make(name, config=None): 21 | """ 22 | Creates a new environment instance with @name and @config. 23 | """ 24 | env = get_env(name) 25 | 26 | # get default configuration 27 | if config is None: 28 | import argparse 29 | import config.grasping as grasp 30 | 31 | parser = argparse.ArgumentParser() 32 | grasp.add_argument(parser) 33 | 34 | config, unparsed = parser.parse_known_args() 35 | 36 | return env(config) 37 | 38 | class EnvMeta(type): 39 | """Metaclass for registering environments""" 40 | 41 | def __new__(meta, name, bases, class_dict): 42 | cls = super().__new__(meta, name, bases, class_dict) 43 | 44 | # List all environments that should not be registered here. 45 | _unregistered_envs = ["MujocoEnv", "PandaEnv"] 46 | 47 | if cls.__name__ not in _unregistered_envs: 48 | register_env(cls) 49 | return cls 50 | 51 | 52 | class MujocoEnv(metaclass=EnvMeta): 53 | """Initializes a Mujoco Environment.""" 54 | 55 | def __init__(self, config): 56 | 57 | self.render_collision_mesh = config.render_collision_mesh 58 | self.render_visual_mesh = config.render_visual_mesh 59 | self.control_freq = config.control_freq 60 | self.horizon = config.horizon 61 | self.ignore_done = config.ignore_done 62 | self.viewer = None 63 | self.model = None 64 | 65 | # Load the model 66 | self._load_model() 67 | 68 | # Initialize the simulation 69 | self._initialize_sim() 70 | 71 | # Run all further internal (re-)initialization required 72 | self._reset_internal() 73 | 74 | def initialize_time(self, control_freq): 75 | """ 76 | Initializes the time constants used for simulation. 77 | """ 78 | self.cur_time = 0 79 | self.model_timestep = self.sim.model.opt.timestep #0.002 is default simulation timestep in mujoco 80 | self.control_freq = control_freq 81 | self.control_timestep = 1. / control_freq # 0.01 if control freq is 100 82 | 83 | def _load_model(self): 84 | """Loads an xml model, puts it in self.model""" 85 | pass 86 | 87 | def _get_reference(self): 88 | """ 89 | Sets up references to important components. A reference is typically an 90 | index or a list of indices that point to the corresponding elements 91 | in a flatten array, which is how MuJoCo stores physical simulation data. 92 | """ 93 | pass 94 | 95 | def _initialize_sim(self, xml_string=None): 96 | """ 97 | Creates a MjSim object and stores it in self.sim. If @xml_string is specified, the MjSim object will be created 98 | from the specified xml_string. Else, it will pull from self.model to instantiate the simulation 99 | """ 100 | # if we have an xml string, use that to create the sim. Otherwise, use the local model 101 | self.mjpy_model = self.model.get_model() 102 | 103 | # Create the simulation instance and run a single step to make sure changes have propagated through sim state 104 | self.sim = MjSim(self.mjpy_model) 105 | #self.sim.step() 106 | 107 | # Setup sim time based on control frequency 108 | self.initialize_time(self.control_freq) 109 | 110 | def reset(self): 111 | """Resets simulation.""" 112 | self._reset_internal() 113 | self.sim.forward() 114 | return self._get_observation() 115 | 116 | def _reset_internal(self): 117 | """Resets simulation internal configurations.""" 118 | 119 | # create visualization screen or renderer 120 | if self.viewer is None: 121 | self.viewer = MujocoPyRenderer(self.sim) 122 | self.viewer.viewer.vopt.geomgroup[0] = 1 if self.render_collision_mesh else 0 123 | self.viewer.viewer.vopt.geomgroup[1] = 1 if self.render_visual_mesh else 0 124 | # hiding the overlay speeds up rendering significantly 125 | self.viewer.viewer._hide_overlay = True 126 | 127 | # make sure mujoco-py doesn't block rendering frames 128 | self.viewer.viewer._render_every_frame = True 129 | 130 | # additional housekeeping 131 | self.sim_state_initial = self.sim.get_state() # copy of simulator state (qpos(23), qvel(21)) 132 | self._get_reference() 133 | self.cur_time = 0 134 | self.timestep = 0 135 | self.done = False 136 | 137 | def _get_observation(self): 138 | """Returns an OrderedDict containing observations [(name_string, np.array), ...].""" 139 | return OrderedDict() 140 | 141 | def step(self, action): 142 | """Takes a step in simulation with control command @action.""" 143 | if self.done: 144 | raise ValueError("executing action in terminated episode") 145 | 146 | if isinstance(action, list): 147 | action = {key: val for ac_i in action for key, val in ac_i.items()} 148 | if isinstance(action, dict): 149 | action = np.concatenate([action[key] for key in self.action_space.shape.keys()]) 150 | 151 | self.timestep += 1 152 | # If the env.step frequency is lower than the mjsim timestep frequency(1/0.002), the internal controller will output 153 | # multiple torque commands in between new high level action commands. Therefore, we need to denote via 154 | # 'policy_step' whether the current step we're taking is simply an internal update of the controller, 155 | # or an actual policy update 156 | 157 | # Loop through the simulation at the model timestep rate until we're ready to take the next policy step 158 | # (as defined by the control frequency specified at the environment level) 159 | policy_step = True 160 | for i in range(int(self.control_timestep / self.model_timestep)): 161 | self._pre_action(action, policy_step) 162 | self.sim.step() # advances simulation 163 | policy_step = False 164 | 165 | # Note: this is done all at once to avoid floating point inaccuracies 166 | self.cur_time += self.control_timestep 167 | 168 | reward, done, info = self._post_action(action) 169 | return self._get_observation(), reward, done, info 170 | 171 | def _pre_action(self, action, policy_step=False): 172 | """Do any preprocessing before taking an action.""" 173 | self.sim.data.ctrl[:] = action #sim.data.ctrl is the control signal that is sent to the actuators in the simulation 174 | 175 | def _post_action(self, action): 176 | """Do any housekeeping after taking an action.""" 177 | reward = self.reward(action) 178 | info = {} 179 | info['episode_success'] = int(self._check_success()) 180 | info['grasp'] = self.has_grasp 181 | info['phase'] = self.phase 182 | 183 | # done if number of elapsed timesteps is greater than horizon 184 | self.done = (self.timestep >= self.horizon) and not self.ignore_done 185 | return reward, self.done, info 186 | 187 | def reward(self, action): 188 | """Reward should be a function of state and action.""" 189 | return 0 190 | 191 | def render(self): 192 | """ 193 | Renders to an on-screen window.a 194 | """ 195 | self.viewer.render() 196 | 197 | def _check_success(self): 198 | """ 199 | Returns True if task has been completed. 200 | """ 201 | return False 202 | -------------------------------------------------------------------------------- /panda/rl/ppo_agent.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optim 6 | from rl.dataset import ReplayBuffer, RandomSampler 7 | from rl.base_agent import BaseAgent 8 | from utils.logger import logger 9 | from utils.mpi import mpi_average 10 | from utils.pytorch import optimizer_cuda, count_parameters, \ 11 | compute_gradient_norm, compute_weight_norm, sync_networks, sync_grads, \ 12 | obs2tensor, to_tensor 13 | 14 | 15 | 16 | class PPOAgent(BaseAgent): 17 | def __init__(self, config, ob_space, ac_space, actor, critic): 18 | super().__init__(config, ob_space) 19 | 20 | self._ac_space = ac_space # e.g. ActionSpace(shape=OrderedDict([('default', 8)]),minimum=-1.0, maximum=1.0) 21 | 22 | # build up networks 23 | self._actor = actor(config, ob_space, ac_space, config.tanh_policy) # actor model (MLP) 24 | self._old_actor = actor(config, ob_space, ac_space, config.tanh_policy) 25 | self._critic = critic(config, ob_space) # critic model (MLP) 26 | self._network_cuda(config.device) 27 | 28 | self._actor_optim = optim.Adam(self._actor.parameters(), lr=config.lr_actor) 29 | self._critic_optim = optim.Adam(self._critic.parameters(), lr=config.lr_critic) 30 | 31 | sampler = RandomSampler() 32 | buffer_keys = ['ob', 'ac', 'done', 'rew', 'ret', 'adv', 'ac_before_activation'] 33 | self._buffer = ReplayBuffer(buffer_keys, config.buffer_size, sampler.sample_func) 34 | 35 | if config.is_chef: 36 | logger.info('Creating a PPO agent') 37 | logger.info('The actor has %d parameters', count_parameters(self._actor)) 38 | logger.info('The critic has %d parameters', count_parameters(self._critic)) 39 | 40 | def store_episode(self, rollouts): 41 | self._compute_gae(rollouts) 42 | self._buffer.store_episode(rollouts) 43 | 44 | def _compute_gae(self, rollouts): 45 | T = len(rollouts['done']) 46 | ob = rollouts['ob'] 47 | ob = self.normalize(ob) 48 | ob = obs2tensor(ob, self._config.device) 49 | vpred = self._critic(ob).detach().cpu().numpy()[:,0] 50 | assert len(vpred) == T + 1 51 | 52 | done = rollouts['done'] 53 | rew = rollouts['rew'] 54 | adv = np.empty((T, ) , 'float32') 55 | lastgaelam = 0 56 | for t in reversed(range(T)): 57 | nonterminal = 1 - done[t] 58 | delta = rew[t] + self._config.discount_factor * vpred[t + 1] * nonterminal - vpred[t] 59 | adv[t] = lastgaelam = delta + self._config.discount_factor * self._config.gae_lambda * nonterminal * lastgaelam 60 | 61 | ret = adv + vpred[:-1] 62 | 63 | assert np.isfinite(adv).all() 64 | assert np.isfinite(ret).all() 65 | 66 | # update rollouts 67 | rollouts['adv'] = ((adv - adv.mean()) / adv.std()).tolist() 68 | rollouts['ret'] = ret.tolist() 69 | 70 | def state_dict(self): 71 | return { 72 | 'actor_state_dict': self._actor.state_dict(), # state_dict contains info about model params (learnable tensors: weights&biases) 73 | 'critic_state_dict': self._critic.state_dict(), 74 | 'actor_optim_state_dict': self._actor_optim.state_dict(), # state_dict contains info about optim state and hyperparams 75 | 'critic_optim_state_dict': self._critic_optim.state_dict(), 76 | 'ob_norm_state_dict': self._ob_norm.state_dict(), 77 | } 78 | 79 | def load_state_dict(self, ckpt): 80 | self._actor.load_state_dict(ckpt['actor_state_dict']) 81 | self._critic.load_state_dict(ckpt['critic_state_dict']) 82 | self._ob_norm.load_state_dict(ckpt['ob_norm_state_dict']) 83 | self._network_cuda(self._config.device) 84 | 85 | self._actor_optim.load_state_dict(ckpt['actor_optim_state_dict']) 86 | self._critic_optim.load_state_dict(ckpt['critic_optim_state_dict']) 87 | optimizer_cuda(self._actor_optim, self._config.device) # required when loading optim state from checkpoint 88 | optimizer_cuda(self._critic_optim, self._config.device) 89 | 90 | def _network_cuda(self, device): 91 | self._actor.to(device) 92 | self._old_actor.to(device) 93 | self._critic.to(device) 94 | 95 | def sync_networks(self): 96 | sync_networks(self._actor) 97 | sync_networks(self._critic) 98 | 99 | def train(self): 100 | self._soft_update_target_network(self._old_actor, self._actor, 0.0) 101 | 102 | for _ in range(self._config.num_batches): 103 | transitions = self._buffer.sample(self._config.batch_size) 104 | train_info = self._update_network(transitions) 105 | 106 | self._buffer.clear() 107 | 108 | train_info.update({ 109 | 'actor_grad_norm': compute_gradient_norm(self._actor), 110 | 'actor_weight_norm': compute_weight_norm(self._actor), 111 | 'critic_grad_norm': compute_gradient_norm(self._critic), 112 | 'critic_weight_norm': compute_weight_norm(self._critic), 113 | }) 114 | return train_info 115 | 116 | def _update_network(self, transitions): 117 | info = {} 118 | 119 | # pre-process observations 120 | o = transitions['ob'] # dict {'robot-state': array(), 'object-state': array()} coming from Sampler 121 | o = self.normalize(o) 122 | 123 | bs = len(transitions['done']) # batch size 124 | _to_tensor = lambda x: to_tensor(x, self._config.device) 125 | o = _to_tensor(o) 126 | ac = _to_tensor(transitions['ac']) 127 | a_z = _to_tensor(transitions['ac_before_activation']) # is OrderedDict([('default', tensor()]; tensor shape is (batch_size, 8) 128 | ret = _to_tensor(transitions['ret']).reshape(bs, 1) 129 | adv = _to_tensor(transitions['adv']).reshape(bs, 1) 130 | 131 | log_pi, ent = self._actor.act_log(o, a_z) # log_pi is tensor of shape (batch_size, 1) 132 | old_log_pi, _ = self._old_actor.act_log(o, a_z) 133 | 134 | if old_log_pi.min() < -100: 135 | import ipdb; ipdb.set_trace() 136 | 137 | # the actor loss 138 | entropy_loss = self._config.entropy_loss_coeff * ent.mean() 139 | ratio = torch.exp(log_pi - old_log_pi) 140 | surr1 = ratio * adv 141 | surr2 = torch.clamp(ratio, 1.0 - self._config.clip_param, 1.0 + self._config.clip_param) * adv 142 | actor_loss = -torch.min(surr1, surr2).mean() 143 | 144 | if not np.isfinite(ratio.cpu().detach()).all() or not np.isfinite(adv.cpu().detach()).all(): 145 | import ipdb; ipdb.set_trace() 146 | info['entropy_loss'] = entropy_loss.cpu().item() 147 | info['actor_loss'] = actor_loss.cpu().item() 148 | actor_loss += entropy_loss 149 | 150 | #custom_loss = self._actor.custom_loss() 151 | #if custom_loss is not None: 152 | #actor_loss += custom_loss * self._config.custom_loss_weight 153 | #info['custom_loss'] = custom_loss.cpu().item() 154 | 155 | # the q loss 156 | value_pred = self._critic(o) 157 | value_loss = self._config.value_loss_coeff * (ret - value_pred).pow(2).mean() 158 | 159 | info['value_target'] = ret.mean().cpu().item() 160 | info['value_predicted'] = value_pred.mean().cpu().item() 161 | info['value_loss'] = value_loss.cpu().item() 162 | 163 | # update the actor 164 | self._actor_optim.zero_grad() 165 | actor_loss.backward() 166 | #torch.nn.utils.clip_grad_norm_(self._actor.parameters(), self._config.max_grad_norm) 167 | sync_grads(self._actor) 168 | self._actor_optim.step() 169 | 170 | # update the critic 171 | self._critic_optim.zero_grad() 172 | value_loss.backward() 173 | #torch.nn.utils.clip_grad_norm_(self._critic1.parameters(), self._config.max_grad_norm) 174 | sync_grads(self._critic) 175 | self._critic_optim.step() 176 | 177 | # include info from policy 178 | info.update(self._actor.info) 179 | 180 | return mpi_average(info) 181 | -------------------------------------------------------------------------------- /panda/models/assets/robot/panda/robot_torque.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /panda/utils/pytorch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import io 3 | from glob import glob 4 | from collections import OrderedDict, defaultdict 5 | 6 | import numpy as np 7 | import torch 8 | import torch.distributed as dist 9 | import torchvision.utils as vutils 10 | import torchvision.transforms.functional as TF 11 | import PIL.Image 12 | from mpi4py import MPI 13 | 14 | 15 | # Note! This is l2 square, not l2 16 | def l2(a, b): 17 | return torch.pow(torch.abs(a - b), 2).sum(dim=1) 18 | 19 | 20 | # required when we load optimizer from a checkpoint 21 | def optimizer_cuda(optimizer, device): 22 | for state in optimizer.state.values(): 23 | for k, v in state.items(): 24 | if isinstance(v, torch.Tensor): 25 | state[k] = v.to(device) 26 | 27 | 28 | def get_ckpt_path(base_dir, ckpt_num): 29 | if ckpt_num is None: 30 | return get_recent_ckpt_path(base_dir) 31 | files = glob(os.path.join(base_dir, "*.pt")) 32 | for f in files: 33 | if 'ckpt_%08d.pt' % ckpt_num in f: 34 | return f, ckpt_num 35 | raise Exception("Did not find ckpt_%s.pt" % ckpt_num) 36 | 37 | 38 | def get_recent_ckpt_path(base_dir): 39 | files = glob(os.path.join(base_dir, "*.pt")) 40 | files.sort() 41 | if len(files) == 0: 42 | return None, None 43 | max_step = max([f.rsplit('_', 1)[-1].split('.')[0] for f in files]) 44 | paths = [f for f in files if max_step in f] 45 | if len(paths) == 1: 46 | return paths[0], int(max_step) 47 | else: 48 | raise Exception("Multiple most recent ckpts %s" % paths) 49 | 50 | 51 | def image_grid(image, n=4): 52 | return vutils.make_grid(image[:n], nrow=n).cpu().detach().numpy() 53 | 54 | 55 | def count_parameters(model): 56 | return sum(p.numel() for p in model.parameters() if p.requires_grad) 57 | 58 | 59 | def slice_tensor(input, indices): 60 | ret = {} 61 | for k, v in input.items(): 62 | ret[k] = v[indices] 63 | return ret 64 | 65 | 66 | def average_gradients(model): 67 | size = float(dist.get_world_size()) 68 | for p in model.parameters(): 69 | if p.grad is not None: 70 | dist.all_reduce(p.grad.data, op=dist.ReduceOp.SUM) 71 | p.grad.data /= size 72 | 73 | 74 | def ensure_shared_grads(model, shared_model): 75 | """for A3C""" 76 | for param, shared_param in zip(model.parameters(), shared_model.parameters()): 77 | if shared_param.grad is not None: 78 | return 79 | shared_param._grad = param.grad 80 | 81 | 82 | def compute_gradient_norm(model): 83 | grad_norm = 0 84 | for p in model.parameters(): 85 | if p.grad is not None: 86 | grad_norm += (p.grad.data ** 2).sum().item() 87 | return grad_norm 88 | 89 | 90 | def compute_weight_norm(model): 91 | weight_norm = 0 92 | for p in model.parameters(): 93 | if p.data is not None: 94 | weight_norm += (p.data ** 2).sum().item() 95 | return weight_norm 96 | 97 | 98 | def compute_weight_sum(model): 99 | weight_sum = 0 100 | for p in model.parameters(): 101 | if p.data is not None: 102 | weight_sum += p.data.abs().sum().item() 103 | return weight_sum 104 | 105 | 106 | # sync_networks across the different cores 107 | def sync_networks(network): 108 | """ 109 | network is the network you want to sync 110 | """ 111 | comm = MPI.COMM_WORLD 112 | flat_params, params_shape = _get_flat_params(network) 113 | comm.Bcast(flat_params, root=0) 114 | # set the flat params back to the network 115 | _set_flat_params(network, params_shape, flat_params) 116 | 117 | 118 | # get the flat params from the network 119 | def _get_flat_params(network): 120 | param_shape = {} 121 | flat_params = None 122 | for key_name, value in network.named_parameters(): 123 | param_shape[key_name] = value.cpu().detach().numpy().shape 124 | if flat_params is None: 125 | flat_params = value.cpu().detach().numpy().flatten() 126 | else: 127 | flat_params = np.append(flat_params, value.cpu().detach().numpy().flatten()) 128 | return flat_params, param_shape 129 | 130 | 131 | # set the params from the network 132 | def _set_flat_params(network, params_shape, params): 133 | pointer = 0 134 | if hasattr(network, '_config'): 135 | device = network._config.device 136 | else: 137 | device = torch.device("cpu") 138 | 139 | for key_name, values in network.named_parameters(): 140 | # get the length of the parameters 141 | len_param = np.prod(params_shape[key_name]) 142 | copy_params = params[pointer:pointer + len_param].reshape(params_shape[key_name]) 143 | copy_params = torch.tensor(copy_params).to(device) 144 | # copy the params 145 | values.data.copy_(copy_params.data) 146 | # update the pointer 147 | pointer += len_param 148 | 149 | 150 | # sync gradients across the different cores 151 | def sync_grads(network): 152 | flat_grads, grads_shape = _get_flat_grads(network) 153 | comm = MPI.COMM_WORLD 154 | global_grads = np.zeros_like(flat_grads) 155 | comm.Allreduce(flat_grads, global_grads, op=MPI.SUM) 156 | _set_flat_grads(network, grads_shape, global_grads) 157 | 158 | 159 | def _set_flat_grads(network, grads_shape, flat_grads): 160 | pointer = 0 161 | if hasattr(network, '_config'): 162 | device = network._config.device 163 | else: 164 | device = torch.device("cpu") 165 | 166 | for key_name, value in network.named_parameters(): 167 | len_grads = np.prod(grads_shape[key_name]) 168 | copy_grads = flat_grads[pointer:pointer + len_grads].reshape(grads_shape[key_name]) 169 | copy_grads = torch.tensor(copy_grads).to(device) 170 | # copy the grads 171 | value.grad.data.copy_(copy_grads.data) 172 | pointer += len_grads 173 | 174 | 175 | def _get_flat_grads(network): 176 | grads_shape = {} 177 | flat_grads = None 178 | for key_name, value in network.named_parameters(): 179 | try: 180 | grads_shape[key_name] = value.grad.data.cpu().numpy().shape 181 | except: 182 | print('Cannot get grad of tensor {}'.format(key_name)) 183 | import pdb; pdb.set_trace() 184 | if flat_grads is None: 185 | flat_grads = value.grad.data.cpu().numpy().flatten() 186 | else: 187 | flat_grads = np.append(flat_grads, value.grad.data.cpu().numpy().flatten()) 188 | return flat_grads, grads_shape 189 | 190 | 191 | def fig2tensor(draw_func): 192 | def decorate(*args, **kwargs): 193 | tmp = io.BytesIO() 194 | fig = draw_func(*args, **kwargs) 195 | fig.savefig(tmp, dpi=88) 196 | tmp.seek(0) 197 | fig.clf() 198 | return TF.to_tensor(PIL.Image.open(tmp)) 199 | 200 | return decorate 201 | 202 | 203 | def tensor2np(t): 204 | if isinstance(t, torch.Tensor): 205 | return t.clone().detach().cpu().numpy() 206 | else: 207 | return t 208 | 209 | 210 | def tensor2img(tensor): 211 | if len(tensor.shape) == 4: 212 | assert tensor.shape[0] == 1 213 | tensor = tensor.squeeze(0) 214 | img = tensor.permute(1, 2, 0).detach().cpu().numpy() 215 | import cv2 216 | cv2.imwrite('tensor.png', img) 217 | 218 | 219 | def obs2tensor(obs, device): 220 | if isinstance(obs, list): 221 | obs = list2dict(obs) 222 | 223 | return OrderedDict([(k, torch.tensor(np.stack(v), dtype=torch.float32).to(device)) for k, v in obs.items()]) 224 | 225 | 226 | # transfer a numpy array/list into a tensor 227 | def to_tensor(x, device): 228 | if isinstance(x, dict): 229 | return OrderedDict([(k, torch.tensor(v, dtype=torch.float32).to(device)) for k, v in x.items()]) 230 | 231 | if isinstance(x, list): 232 | return [torch.tensor(v, dtype=torch.float32).to(device) for v in x] 233 | 234 | return torch.tensor(x, dtype=torch.float32).to(device) 235 | 236 | 237 | def list2dict(rollout): 238 | ret = OrderedDict() 239 | for k in rollout[0].keys(): 240 | ret[k] = [] 241 | for transition in rollout: 242 | for k, v in transition.items(): 243 | ret[k].append(v) 244 | return ret 245 | 246 | 247 | # From softlearning repo 248 | def flatten(unflattened, parent_key='', separator='/'): 249 | items = [] 250 | for k, v in unflattened.items(): 251 | if separator in k: 252 | raise ValueError( 253 | "Found separator ({}) from key ({})".format(separator, k)) 254 | 255 | new_key = parent_key + separator + k if parent_key else k 256 | 257 | if isinstance(v, collections.MutableMapping) and v: 258 | items.extend(flatten(v, new_key, separator=separator).items()) 259 | else: 260 | items.append((new_key, v)) 261 | 262 | return OrderedDict(items) 263 | 264 | 265 | # From softlearning repo 266 | def unflatten(flattened, separator='.'): 267 | result = {} 268 | for key, value in flattened.items(): 269 | parts = key.split(separator) 270 | d = result 271 | for part in parts[:-1]: 272 | if part not in d: 273 | d[part] = {} 274 | d = d[part] 275 | d[parts[-1]] = value 276 | 277 | return result 278 | -------------------------------------------------------------------------------- /panda/rl/sac_agent.py: -------------------------------------------------------------------------------- 1 | # SAC training code reference 2 | # https://github.com/vitchyr/rlkit/blob/master/rlkit/torch/sac/sac.py 3 | 4 | from collections import OrderedDict 5 | 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from rl.dataset import ReplayBuffer, RandomSampler 11 | from rl.base_agent import BaseAgent 12 | from utils.logger import logger 13 | from utils.mpi import mpi_average 14 | from utils.pytorch import optimizer_cuda, count_parameters, compute_gradient_norm, compute_weight_norm, sync_networks, sync_grads, to_tensor 15 | 16 | 17 | class SACAgent(BaseAgent): 18 | def __init__(self, config, ob_space, ac_space, actor, critic): 19 | super().__init__(config, ob_space) 20 | 21 | self._ob_space = ob_space 22 | self._ac_space = ac_space 23 | 24 | self._target_entropy = -ac_space.size 25 | self._log_alpha = torch.zeros(1, requires_grad=True, device=config.device) 26 | self._alpha_optim = optim.Adam([self._log_alpha], lr=config.lr_actor) 27 | 28 | # build up networks 29 | self._build_actor(actor) 30 | self._critic1 = critic(config, ob_space, ac_space) 31 | self._critic2 = critic(config, ob_space, ac_space) 32 | 33 | # build up target networks 34 | self._critic1_target = critic(config, ob_space, ac_space) 35 | self._critic2_target = critic(config, ob_space, ac_space) 36 | self._critic1_target.load_state_dict(self._critic1.state_dict()) 37 | self._critic2_target.load_state_dict(self._critic2.state_dict()) 38 | self._network_cuda(config.device) 39 | 40 | self._actor_optim = optim.Adam(self._actor.parameters(), lr=config.lr_actor) 41 | self._critic1_optim = optim.Adam(self._critic1.parameters(), lr=config.lr_critic) 42 | self._critic2_optim = optim.Adam(self._critic2.parameters(), lr=config.lr_critic) 43 | 44 | sampler = RandomSampler() 45 | buffer_keys = ['ob', 'ac', 'done', 'rew'] 46 | self._buffer = ReplayBuffer(buffer_keys, 47 | config.buffer_size, 48 | sampler.sample_func) 49 | 50 | self._log_creation() 51 | 52 | def _log_creation(self): 53 | if self._config.is_chef: 54 | logger.info('Creating a SAC agent') 55 | logger.info('The actor has %d parameters', count_parameters(self._actor)) 56 | logger.info('The critic1 has %d parameters', count_parameters(self._critic1)) 57 | logger.info('The critic2 has %d parameters', count_parameters(self._critic2)) 58 | 59 | def _build_actor(self, actor): 60 | self._actor = actor(self._config, self._ob_space, self._ac_space, self._config.tanh_policy) 61 | 62 | def store_episode(self, rollouts): 63 | self._buffer.store_episode(rollouts) 64 | 65 | def state_dict(self): 66 | return { 67 | 'log_alpha': self._log_alpha.cpu().detach().numpy(), 68 | 'actor_state_dict': self._actor.state_dict(), 69 | 'critic1_state_dict': self._critic1.state_dict(), 70 | 'critic2_state_dict': self._critic2.state_dict(), 71 | 'alpha_optim_state_dict': self._alpha_optim.state_dict(), 72 | 'actor_optim_state_dict': self._actor_optim.state_dict(), 73 | 'critic1_optim_state_dict': self._critic1_optim.state_dict(), 74 | 'critic2_optim_state_dict': self._critic2_optim.state_dict(), 75 | 'ob_norm_state_dict': self._ob_norm.state_dict(), 76 | } 77 | 78 | def load_state_dict(self, ckpt): 79 | self._log_alpha.data = torch.tensor(ckpt['log_alpha'], requires_grad=True, 80 | device=self._config.device) 81 | self._actor.load_state_dict(ckpt['actor_state_dict']) 82 | self._critic1.load_state_dict(ckpt['critic1_state_dict']) 83 | self._critic2.load_state_dict(ckpt['critic2_state_dict']) 84 | self._critic1_target.load_state_dict(self._critic1.state_dict()) 85 | self._critic2_target.load_state_dict(self._critic2.state_dict()) 86 | self._ob_norm.load_state_dict(ckpt['ob_norm_state_dict']) 87 | self._network_cuda(self._config.device) 88 | 89 | self._alpha_optim.load_state_dict(ckpt['alpha_optim_state_dict']) 90 | self._actor_optim.load_state_dict(ckpt['actor_optim_state_dict']) 91 | self._critic1_optim.load_state_dict(ckpt['critic1_optim_state_dict']) 92 | self._critic2_optim.load_state_dict(ckpt['critic2_optim_state_dict']) 93 | optimizer_cuda(self._alpha_optim, self._config.device) 94 | optimizer_cuda(self._actor_optim, self._config.device) 95 | optimizer_cuda(self._critic1_optim, self._config.device) 96 | optimizer_cuda(self._critic2_optim, self._config.device) 97 | 98 | def _network_cuda(self, device): 99 | self._actor.to(device) 100 | self._critic1.to(device) 101 | self._critic2.to(device) 102 | self._critic1_target.to(device) 103 | self._critic2_target.to(device) 104 | 105 | def sync_networks(self): 106 | sync_networks(self._actor) 107 | sync_networks(self._critic1) 108 | sync_networks(self._critic2) 109 | 110 | def train(self): 111 | for _ in range(self._config.num_batches): 112 | transitions = self._buffer.sample(self._config.batch_size) 113 | train_info = self._update_network(transitions) 114 | self._soft_update_target_network(self._critic1_target, self._critic1, self._config.polyak) 115 | self._soft_update_target_network(self._critic2_target, self._critic2, self._config.polyak) 116 | 117 | train_info.update({ 118 | 'actor_grad_norm': np.mean(compute_gradient_norm(self._actor)), 119 | 'actor_weight_norm': np.mean(compute_weight_norm(self._actor)), 120 | 'critic1_grad_norm': compute_gradient_norm(self._critic1), 121 | 'critic2_grad_norm': compute_gradient_norm(self._critic2), 122 | 'critic1_weight_norm': compute_weight_norm(self._critic1), 123 | 'critic2_weight_norm': compute_weight_norm(self._critic2), 124 | }) 125 | return train_info 126 | 127 | def act_log(self, ob): 128 | return self._actor.act_log(ob) 129 | 130 | def _update_network(self, transitions): 131 | info = {} 132 | 133 | # pre-process observations 134 | o, o_next = transitions['ob'], transitions['ob_next'] 135 | o = self.normalize(o) 136 | o_next = self.normalize(o_next) 137 | 138 | bs = len(transitions['done']) 139 | _to_tensor = lambda x: to_tensor(x, self._config.device) 140 | o = _to_tensor(o) 141 | o_next = _to_tensor(o_next) 142 | ac = _to_tensor(transitions['ac']) 143 | done = _to_tensor(transitions['done']).reshape(bs, 1) 144 | rew = _to_tensor(transitions['rew']).reshape(bs, 1) 145 | 146 | # update alpha 147 | actions_real, log_pi = self.act_log(o) 148 | alpha_loss = -(self._log_alpha * (log_pi + self._target_entropy).detach()).mean() 149 | self._alpha_optim.zero_grad() 150 | alpha_loss.backward() 151 | self._alpha_optim.step() 152 | alpha = self._log_alpha.exp() 153 | 154 | # the actor loss 155 | entropy_loss = (alpha * log_pi).mean() 156 | actor_loss = -torch.min(self._critic1(o, actions_real), 157 | self._critic2(o, actions_real)).mean() 158 | info['entropy_alpha'] = alpha.cpu().item() 159 | info['entropy_loss'] = entropy_loss.cpu().item() 160 | info['actor_loss'] = actor_loss.cpu().item() 161 | actor_loss += entropy_loss 162 | 163 | # calculate the target Q value function 164 | with torch.no_grad(): 165 | actions_next, log_pi_next = self.act_log(o_next) 166 | q_next_value1 = self._critic1_target(o_next, actions_next) 167 | q_next_value2 = self._critic2_target(o_next, actions_next) 168 | q_next_value = torch.min(q_next_value1, q_next_value2) - alpha * log_pi_next 169 | target_q_value = rew * self._config.reward_scale + (1 - done) * self._config.discount_factor * q_next_value 170 | target_q_value = target_q_value.detach() 171 | ## clip the q value 172 | clip_return = 10 / (1 - self._config.discount_factor) 173 | target_q_value = torch.clamp(target_q_value, -clip_return, clip_return) 174 | 175 | # the q loss 176 | real_q_value1 = self._critic1(o, ac) 177 | real_q_value2 = self._critic2(o, ac) 178 | critic1_loss = 0.5 * (target_q_value - real_q_value1).pow(2).mean() 179 | critic2_loss = 0.5 * (target_q_value - real_q_value2).pow(2).mean() 180 | 181 | info['min_target_q'] = target_q_value.min().cpu().item() 182 | info['target_q'] = target_q_value.mean().cpu().item() 183 | info['min_real1_q'] = real_q_value1.min().cpu().item() 184 | info['min_real2_q'] = real_q_value2.min().cpu().item() 185 | info['real1_q'] = real_q_value1.mean().cpu().item() 186 | info['real2_q'] = real_q_value2.mean().cpu().item() 187 | info['critic1_loss'] = critic1_loss.cpu().item() 188 | info['critic2_loss'] = critic2_loss.cpu().item() 189 | 190 | # update the actor 191 | self._actor_optim.zero_grad() 192 | actor_loss.backward() 193 | #torch.nn.utils.clip_grad_norm_(self._actor.parameters(), self._config.max_grad_norm) 194 | sync_grads(self._actor) 195 | self._actor_optim.step() 196 | 197 | # update the critic 198 | self._critic1_optim.zero_grad() 199 | critic1_loss.backward() 200 | #torch.nn.utils.clip_grad_norm_(self._critic1.parameters(), self._config.max_grad_norm) 201 | sync_grads(self._critic1) 202 | self._critic1_optim.step() 203 | 204 | self._critic2_optim.zero_grad() 205 | critic2_loss.backward() 206 | #torch.nn.utils.clip_grad_norm_(self._critic2.parameters(), self._config.max_grad_norm) 207 | sync_grads(self._critic2) 208 | self._critic2_optim.step() 209 | 210 | # include info from policy 211 | info.update(self._actor.info) 212 | return mpi_average(info) 213 | -------------------------------------------------------------------------------- /panda/environments/panda_grasping.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import random 3 | import numpy as np 4 | 5 | import utils.transform_utils as T 6 | from utils.mjcf_utils import string_to_array 7 | from utils.transform_utils import convert_quat 8 | from environments.panda import PandaEnv 9 | 10 | from models.arena import TableArena 11 | from models.objects import CubeObject, BasePartObject, CylObject, Cyl2Object 12 | from models.robot import Panda 13 | from models.task import GraspingTask 14 | 15 | import hjson 16 | import os 17 | 18 | 19 | class PandaGrasp(PandaEnv): 20 | """ 21 | This class corresponds to the grasping task for the 🐼️ robot arm. 22 | """ 23 | 24 | def __init__(self, config): 25 | """ 26 | Args: 27 | config = configuration file of environment and task parameters 28 | """ 29 | 30 | # settings for table top 31 | self.config = config 32 | self.table_full_size = config.table_full_size 33 | 34 | # Load the controller parameter configuration files 35 | controller_filepath = os.path.join(os.path.dirname(__file__), '..','config/controller_config.hjson') 36 | super().__init__(config, controller_config_file=controller_filepath) 37 | 38 | 39 | def _load_model(self): 40 | super()._load_model() 41 | # set the robot base pos in world ref frame; World frame is right at the center of floor 42 | self.mujoco_robot.set_base_xpos([-0.5, 0, 0.913]) 43 | 44 | # load model for table top workspace 45 | self.mujoco_arena = TableArena(table_full_size=self.table_full_size) 46 | self.mujoco_arena.set_origin([0, 0, 0]) 47 | 48 | # define mujoco objects 49 | Cube = CubeObject() 50 | Base = BasePartObject() # This object has no function in learning its just for viewing purpose 51 | 52 | self.mujoco_objects = OrderedDict([("BasePart", Base), ("Cube", Cube)]) 53 | 54 | # For collision avoidance scenario 55 | if self.config.mode == 2: 56 | Cylinder = CylObject() 57 | Cylinder2 = Cyl2Object() 58 | self.mujoco_objects = OrderedDict([("BasePart", Base), ("Cube", Cube), ("Cylinder", Cylinder), ("Cylinder2", Cylinder2)]) 59 | 60 | # task includes arena, robot, and objects of interest 61 | self.model = GraspingTask(self.mujoco_arena, self.mujoco_robot, self.mujoco_objects) 62 | self.table_pos = string_to_array(self.model.table_body.get("pos")) 63 | 64 | 65 | def _get_reference(self): 66 | super()._get_reference() 67 | 68 | self.cube_body_id = self.sim.model.body_name2id("Cube") # 'cube': 23 69 | self.cube_geom_id = self.sim.model.geom_name2id("Cube") # 'cube': 41 70 | 71 | #information of objects 72 | self.object_names = list(self.mujoco_objects.keys()) 73 | self.object_site_ids = [self.sim.model.site_name2id(ob_name) for ob_name in self.object_names] 74 | 75 | # clutter objects ids 76 | if self.config.mode == 2: 77 | self.cyl_geom_id = self.sim.model.geom_name2id("Cylinder") 78 | self.cyl2_geom_id = self.sim.model.geom_name2id("Cylinder2") 79 | 80 | # id of grippers for contact checking 81 | # ['hand_collision', 'finger1_collision', 'finger2_collision', 'finger1_tip_collision', 'finger2_tip_collision'] 82 | if self.has_gripper: 83 | self.finger_names = self.gripper.contact_geoms() 84 | self.l_finger_geom_ids = [self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms] # finger1_visual': 34 85 | self.r_finger_geom_ids = [self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms] # 'finger2_visual': 37 86 | 87 | # self.sim.data.contact # list, geom1, geom2 88 | self.collision_check_geom_names = self.sim.model._geom_name2id.keys() # {...,'hand_collision': 33 ,...,'BasePart': 40, 'cube': 41} 89 | self.collision_check_geom_ids = [self.sim.model._geom_name2id[k] for k in self.collision_check_geom_names] 90 | 91 | 92 | def _reset_internal(self): 93 | """ 94 | Resets simulation internal configurations. 95 | """ 96 | super()._reset_internal() 97 | 98 | init_pos = np.array([0, np.pi / 16.0, 0.00, -np.pi / 2.0 - np.pi / 3.0, 0.00, np.pi - 0.2, np.pi / 4]) 99 | 100 | # Uncomment below line for adding randomization to initial position of robot position 101 | #init_pos += np.random.randn(init_pos.shape[0]) * 0.02 102 | 103 | self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos) 104 | 105 | # reset position of objects on table top 106 | 107 | # Cube 108 | self.sim.data.qpos[16:19] = np.array([0 ,-0.1 ,0.85]) 109 | self.sim.data.qpos[19:23] = np.array([1 ,0 ,0, 0]) 110 | 111 | # Random Base Part 112 | self.sim.data.qpos[9:12] = np.array([0 ,0.1 ,0.82]) 113 | self.sim.data.qpos[12:16] = np.array([1 ,0 ,0, 0]) 114 | 115 | if self.config.mode == 2: 116 | # reset cylinders pos 117 | self.sim.data.qpos[23:26] = np.array([0 ,0.03 ,0.9]) 118 | self.sim.data.qpos[26:30] = np.array([1 ,0 ,0, 0]) 119 | 120 | self.sim.data.qpos[30:33] = np.array([0 ,-0.23 ,0.9]) 121 | self.sim.data.qpos[33:37] = np.array([1 ,0 ,0, 0]) 122 | 123 | # reset the phase and grasp state 124 | self.phase = 0 125 | self.has_grasp = False 126 | 127 | def reward(self, action=None): 128 | """ 129 | Reward function for the task. 130 | Returns: 131 | reward (float): the reward 132 | """ 133 | # reaching reward 134 | cube_pos = self.sim.data.body_xpos[self.cube_body_id] 135 | gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id] 136 | dist = np.linalg.norm(gripper_site_pos - cube_pos) # Eucledian distance b/w cube and grip site 137 | reaching_reward = 1 - np.tanh(10.0 * dist) # tanh function on distance 138 | 139 | # slow down reward 140 | vel = np.sum(abs(self.ee_v)) / 6 141 | vel_reward = 1 - np.tanh(10.0 * vel) 142 | 143 | # Two phases of the task (0.Reaching and 1.Grasping+Lifiting) 144 | if self.phase == 0: 145 | 146 | reward = 0.6 * reaching_reward 147 | 148 | if dist < 0.08: 149 | reward += 0.3 * vel_reward 150 | 151 | # gripper open reward 152 | if action[-1] < 0: 153 | reward += 0.1 * abs(action[-1]) 154 | 155 | if dist < 0.025: 156 | self.phase = 1 157 | 158 | elif self.phase == 1: 159 | 160 | reward = reaching_reward + vel_reward 161 | 162 | # gripper closing reward 163 | if action[-1] > 0: 164 | reward += 0.5 * action[-1] 165 | 166 | # check contact between fingers and cube 167 | touch_left_finger = False 168 | touch_right_finger = False 169 | 170 | for i in range(self.sim.data.ncon): 171 | c = self.sim.data.contact[i] 172 | if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cube_geom_id: 173 | touch_left_finger = True 174 | if c.geom1 == self.cube_geom_id and c.geom2 in self.l_finger_geom_ids: 175 | touch_left_finger = True 176 | if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cube_geom_id: 177 | touch_right_finger = True 178 | if c.geom1 == self.cube_geom_id and c.geom2 in self.r_finger_geom_ids: 179 | touch_right_finger = True 180 | 181 | self.has_grasp = touch_left_finger and touch_right_finger 182 | 183 | # grasping reward 184 | if self.has_grasp: 185 | reward += 0.5 186 | 187 | # success reward 188 | if self._check_success(): 189 | reward += 5.0 190 | 191 | # stay within joint limits! This is used only in retraining condition 192 | #if self._check_q_limits(): 193 | #reward -= 1.0 194 | 195 | # Collision avoidance scene 196 | if self.config.mode == 2: 197 | # collision penalty 198 | collision = False 199 | for contact in self.sim.data.contact[:self.sim.data.ncon]: 200 | # hand collision check 201 | if self.sim.model.geom_id2name(contact.geom1) in self.hand_names and contact.geom2 == self.cyl_geom_id: 202 | collision = True 203 | if self.sim.model.geom_id2name(contact.geom2) in self.hand_names and contact.geom1 == self.cyl_geom_id: 204 | collision = True 205 | if self.sim.model.geom_id2name(contact.geom1) in self.hand_names and contact.geom2 == self.cyl2_geom_id: 206 | collision = True 207 | if self.sim.model.geom_id2name(contact.geom2) in self.hand_names and contact.geom1 == self.cyl2_geom_id: 208 | collision = True 209 | # cube collision check 210 | if contact.geom1 == self.cyl_geom_id and contact.geom2 == self.cube_geom_id: 211 | collision = True 212 | if contact.geom1 == self.cube_geom_id and contact.geom2 == self.cyl_geom_id: 213 | collision = True 214 | if contact.geom1 == self.cyl2_geom_id and contact.geom2 == self.cube_geom_id: 215 | collision = True 216 | if contact.geom1 == self.cube_geom_id and contact.geom2 == self.cyl2_geom_id: 217 | collision = True 218 | 219 | if collision: 220 | reward -= 5.0 221 | 222 | return reward 223 | 224 | def _check_success(self): 225 | """ 226 | Returns True if task has been completed. 227 | """ 228 | cube_pos = self.sim.data.body_xpos[self.cube_body_id] 229 | gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id] 230 | dist = np.linalg.norm(gripper_site_pos - cube_pos) 231 | 232 | return self.has_grasp and cube_pos[2] > 0.86 233 | 234 | 235 | def _get_observation(self): 236 | """ 237 | Returns an OrderedDict containing object observations 238 | """ 239 | state = super()._get_observation() 240 | di = OrderedDict() 241 | 242 | # low-level object information 243 | # position and rotation of object 244 | cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id]) 245 | cube_quat = convert_quat(np.array(self.sim.data.body_xquat[self.cube_body_id]), to="xyzw") 246 | di["cube_pos"] = cube_pos 247 | di["cube_quat"] = cube_quat 248 | 249 | gripper_site_pos = np.array(self.sim.data.site_xpos[self.eef_site_id]) 250 | # gripper to cube distance 251 | di["gripper_to_cube"] = gripper_site_pos - cube_pos 252 | 253 | state["object-state"] = np.concatenate([cube_pos, cube_quat, di["gripper_to_cube"]]) 254 | 255 | return state 256 | -------------------------------------------------------------------------------- /panda/rl/trainer.py: -------------------------------------------------------------------------------- 1 | """ Base code for RL training. Collects rollouts and updates policy networks. """ 2 | 3 | import os 4 | from time import time 5 | from collections import defaultdict, OrderedDict 6 | import gzip 7 | import pickle 8 | import h5py 9 | import torch 10 | import wandb 11 | import numpy as np 12 | from tqdm import tqdm, trange 13 | from rl.policies import MlpActor, MlpCritic 14 | from rl.ppo_agent import PPOAgent 15 | from rl.sac_agent import SACAgent 16 | from rl.rollouts import RolloutRunner 17 | from utils.logger import logger 18 | from utils.pytorch import get_ckpt_path 19 | from utils.mpi import mpi_sum 20 | from environments import make 21 | 22 | class Trainer(): 23 | """ 24 | Trainer class for SAC and PPO in PyTorch. 25 | """ 26 | 27 | def __init__(self, config): 28 | """ 29 | Initializes class with the configuration. 30 | """ 31 | self._config = config 32 | self._is_chef = config.is_chef 33 | 34 | # create a new environment 35 | self._env = make("PandaGrasp", config) 36 | ob_space = self._env.observation_space # e.g. OrderedDict([('object-state', [10]), ('robot-state', [36])]) 37 | ac_space = self._env.action_space # e.g. ActionSpace(shape=OrderedDict([('default', 8)]),minimum=-1.0, maximum=1.0) 38 | print('***', ac_space) 39 | 40 | # get actor and critic networks 41 | actor, critic = MlpActor, MlpCritic 42 | 43 | # build up networks for PPO agent 44 | if self._config.algo == 'sac': 45 | self._agent = SACAgent(config, ob_space, ac_space, actor, critic) 46 | else: 47 | self._agent = PPOAgent(config, ob_space, ac_space, actor, critic) 48 | 49 | # build rollout runner 50 | self._runner = RolloutRunner(config, self._env, self._agent) 51 | 52 | # setup log 53 | if self._is_chef and self._config.is_train: 54 | exclude = ['device'] 55 | if not self._config.wandb: 56 | os.environ['WANDB_MODE'] = 'dryrun' 57 | 58 | # Weights and Biases (wandb) is used for logging, set the account details below or dry run above 59 | # user or team name 60 | entity = 'panda' 61 | # project name 62 | project = 'robo' 63 | 64 | wandb.init( 65 | resume=config.run_name, 66 | project=project, 67 | config={k: v for k, v in config.__dict__.items() if k not in exclude}, 68 | dir=config.log_dir, 69 | entity=entity, 70 | notes=config.notes 71 | ) 72 | 73 | def _save_ckpt(self, ckpt_num, update_iter): 74 | """ 75 | Save checkpoint to log directory. 76 | 77 | Args: 78 | ckpt_num: number appended to checkpoint name. The number of environment step is used in this code. 79 | update_iter: number of policy update. It will be used for resuming training. 80 | """ 81 | ckpt_path = os.path.join(self._config.log_dir, 'ckpt_%08d.pt' % ckpt_num) 82 | state_dict = {'step': ckpt_num, 'update_iter': update_iter} 83 | state_dict['agent'] = self._agent.state_dict() 84 | torch.save(state_dict, ckpt_path) 85 | logger.warn('Save checkpoint: %s', ckpt_path) 86 | 87 | replay_path = os.path.join(self._config.log_dir, 'replay_%08d.pkl' % ckpt_num) 88 | with gzip.open(replay_path, 'wb') as f: 89 | replay_buffers = {'replay': self._agent.replay_buffer()} 90 | pickle.dump(replay_buffers, f) 91 | 92 | def _load_ckpt(self, ckpt_num=None): 93 | """ 94 | Loads checkpoint with index number @ckpt_num. If @ckpt_num is None, 95 | it loads and returns the checkpoint with the largest index number. 96 | """ 97 | ckpt_path, ckpt_num = get_ckpt_path(self._config.log_dir, ckpt_num) 98 | 99 | if ckpt_path is not None: 100 | logger.warn('Load checkpoint %s', ckpt_path) 101 | ckpt = torch.load(ckpt_path) # ckpt is a dict with keys (step, update_iter, agent) 102 | self._agent.load_state_dict(ckpt['agent']) 103 | 104 | if self._config.is_train: 105 | replay_path = os.path.join(self._config.log_dir, 'replay_%08d.pkl' % ckpt_num) 106 | logger.warn('Load replay_buffer %s', replay_path) 107 | with gzip.open(replay_path, 'rb') as f: 108 | replay_buffers = pickle.load(f) 109 | self._agent.load_replay_buffer(replay_buffers['replay']) 110 | 111 | return ckpt['step'], ckpt['update_iter'] 112 | else: 113 | logger.warn('Randomly initialize models') 114 | return 0, 0 115 | 116 | def _log_ep(self, step, ep_info): 117 | """ 118 | Logs episode information to wandb. 119 | Args: 120 | step: the number of environment steps. 121 | ep_info: episode information to log, such as reward, episode time. 122 | """ 123 | for k, v in ep_info.items(): 124 | wandb.log({'train_ep/%s' % k: np.mean(v)}, step=step) 125 | wandb.log({'train_ep_max/%s' % k: np.max(v)}, step=step) 126 | 127 | def _log_train(self, step, train_info): 128 | """ 129 | Logs training information to wandb. 130 | Args: 131 | step: the number of environment steps. 132 | train_info: training information to log, such as loss, gradient. 133 | """ 134 | for k, v in train_info.items(): 135 | if np.isscalar(v) or (hasattr(v, 'shape') and np.prod(v.shape) == 1): 136 | wandb.log({'train_rl/%s' % k: v}, step=step) 137 | else: 138 | wandb.log({'train_rl/%s' % k: [wandb.Image(v)]}, step=step) 139 | 140 | def _log_test(self, step, ep_info): 141 | """ 142 | Logs episode information during testing to wandb. 143 | Args: 144 | step: the number of environment steps. 145 | ep_info: episode information to log, such as reward, episode time. 146 | """ 147 | if self._config.is_train: 148 | for k, v in ep_info.items(): 149 | wandb.log({'test_ep/%s' % k: np.mean(v)}, step=step) 150 | 151 | def train(self): 152 | """ Trains an agent. """ 153 | config = self._config 154 | num_batches = config.num_batches 155 | 156 | # load checkpoint 157 | step, update_iter = self._load_ckpt() 158 | 159 | # sync the networks across the cpus 160 | self._agent.sync_networks() 161 | 162 | logger.info("Start training at step=%d", step) 163 | if self._is_chef: 164 | pbar = tqdm(initial=step, total=config.max_global_step, desc=config.run_name) 165 | ep_info = defaultdict(list) 166 | 167 | # decide how many episodes or how long rollout to collect 168 | if self._config.algo == 'sac': 169 | run_ep_max = 1 170 | run_step_max = 600 171 | else: 172 | run_step_max = self._config.rollout_length 173 | run_ep_max = 3 174 | 175 | # dummy run for preventing weird error in a cold run 176 | self._runner.run_episode() 177 | 178 | st_time = time() 179 | st_step = step 180 | log_step = 0 181 | while step < config.max_global_step: 182 | # collect rollouts 183 | run_ep = 0 184 | run_step = 0 185 | while run_step < run_step_max and run_ep < run_ep_max: 186 | # return one episode and info{len:, rew:, qpos:[], success:} about that episode 187 | rollout, info = self._runner.run_episode() 188 | run_step += info['len'] 189 | run_ep += 1 190 | log_step += info['len'] 191 | 192 | for k, v in info.items(): 193 | if isinstance(v, list): 194 | ep_info[k].extend(v) 195 | else: 196 | ep_info[k].append(v) 197 | 198 | self._log_ep(log_step, ep_info) 199 | ep_info = defaultdict(list) 200 | logger.info('rollout: %s', {k: v for k, v in info.items() if not 'qpos' in k}) 201 | self._agent.store_episode(rollout) 202 | self._update_normalizer(rollout) 203 | 204 | step_per_batch = mpi_sum(run_step) 205 | 206 | # train an agent 207 | logger.info('Update networks %d', update_iter) 208 | train_info = self._agent.train() 209 | logger.info('Update networks done') 210 | 211 | step += step_per_batch 212 | update_iter += 1 213 | 214 | # log training and episode information or evaluate 215 | if self._is_chef: 216 | pbar.update(step_per_batch) 217 | if update_iter % config.log_interval == 0: 218 | train_info.update({ 219 | 'sec': (time() - st_time) / config.log_interval, 220 | 'steps_per_sec': (step - st_step) / (time() - st_time), 221 | 'update_iter': update_iter 222 | }) 223 | st_time = time() 224 | st_step = step 225 | self._log_train(step, train_info) 226 | 227 | if update_iter % config.evaluate_interval == 1: 228 | logger.info('Evaluate at %d', update_iter) 229 | rollout, info = self._evaluate(step=step)## 230 | self._log_test(step, info) 231 | 232 | if update_iter % config.ckpt_interval == 0: 233 | self._save_ckpt(step, update_iter) 234 | 235 | logger.info('Reached %s steps. worker %d stopped.', step, config.rank) 236 | 237 | def _update_normalizer(self, rollout): 238 | """ Updates normalizer with @rollout. """ 239 | if self._config.ob_norm: 240 | self._agent.update_normalizer(rollout['ob']) 241 | 242 | def _evaluate(self, step=None, idx=None): 243 | """ 244 | Runs one rollout if in eval mode (@idx is not None). 245 | Runs num_record_samples rollouts if in train mode (@idx is None). 246 | 247 | Args: 248 | step: the number of environment steps. 249 | """ 250 | for i in range(self._config.num_record_samples): 251 | rollout, info = self._runner.run_episode(is_train=False) 252 | 253 | if idx is not None: 254 | break 255 | logger.info('rollout: %s', {k: v for k, v in info.items() if not 'qpos' in k}) 256 | return rollout, info 257 | 258 | def evaluate(self): 259 | """ Evaluates an agent stored in chekpoint with @self._config.ckpt_num. """ 260 | 261 | step, update_iter = self._load_ckpt(ckpt_num=self._config.ckpt_num) 262 | logger.info('Run %d evaluations at step=%d, update_iter=%d', self._config.num_eval, step, update_iter) 263 | 264 | for i in trange(self._config.num_eval): 265 | logger.warn("Evalute run %d", i+1) 266 | rollout, info = self._evaluate(step=step, idx=i) 267 | -------------------------------------------------------------------------------- /panda/environments/panda.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import numpy as np 3 | import utils.transform_utils as T 4 | from environments import MujocoEnv 5 | from models.gripper import PandaGripper 6 | from models.robot import Panda 7 | from environments.action_space import ActionSpace 8 | from controller.arm_controller import * 9 | from collections import deque 10 | import hjson 11 | 12 | 13 | class PandaEnv(MujocoEnv): 14 | """Initializes a Panda robot environment.""" 15 | 16 | def __init__(self, config, controller_config_file): 17 | """ 18 | Args: 19 | All configuratin parameters are defined in config file 20 | 21 | controller_config_file (str): filepath to the corresponding controller config file that contains the 22 | associated controller parameters 23 | """ 24 | self.control_freq = config.control_freq 25 | self.has_gripper = config.gripper_type is not None 26 | self.gripper_visualization = config.gripper_visualization 27 | 28 | # Load the appropriate controller 29 | self._load_controller('joint_velocity', controller_config_file) 30 | super().__init__(config) 31 | 32 | def _load_controller(self, controller_type, controller_file): 33 | """ 34 | Loads controller to be used for dynamic trajectories 35 | Controller_type is a specified controller, and controller_params is a config file containing the appropriate 36 | parameters for that controller 37 | """ 38 | # Load the controller config file 39 | try: 40 | with open(controller_file) as f: 41 | params = hjson.load(f) 42 | except FileNotFoundError: 43 | print("Controller config file '{}' not found. Please check filepath and try again.".format(controller_file)) 44 | 45 | controller_params = params[controller_type] 46 | self.controller = JointVelocityController(**controller_params) 47 | 48 | def _load_model(self): 49 | """ 50 | Loads robot and add gripper. 51 | """ 52 | super()._load_model() 53 | # Use xml that has motor torque actuators enabled 54 | self.mujoco_robot = Panda(xml_path="robot/panda/robot_torque.xml") 55 | 56 | if self.has_gripper: 57 | self.gripper = PandaGripper() 58 | if not self.gripper_visualization: 59 | self.gripper.hide_visualization() 60 | self.mujoco_robot.add_gripper("hand", self.gripper) 61 | 62 | def _reset_internal(self): 63 | """ 64 | Sets initial pose of arm and grippers. 65 | """ 66 | super()._reset_internal() 67 | self.sim.data.qpos[self._ref_joint_pos_indexes] = self.mujoco_robot.init_qpos 68 | self.sim.data.qvel[self._ref_joint_vel_indexes] = np.zeros(len(self._ref_joint_vel_indexes)) 69 | if self.has_gripper: 70 | self.sim.data.qpos[self._ref_gripper_joint_pos_indexes] = self.gripper.init_qpos 71 | self.sim.data.qvel[self._ref_gripper_joint_vel_indexes] = np.zeros(len(self._ref_gripper_joint_vel_indexes)) 72 | 73 | 74 | self.controller.reset() 75 | self.total_joint_torque = 0 76 | self.joint_torques = 0 77 | self.ee_v = np.zeros(6) 78 | 79 | def _get_reference(self): 80 | """ 81 | Sets up necessary reference for robot, gripper, and objects. 82 | """ 83 | super()._get_reference() 84 | # indices for joints in qpos, qvel 85 | self.robot_joints = list(self.mujoco_robot.joints) # ['joint1', 'joint2', 'joint3', 'joint4', 'joint5', 'joint6', 'joint7'] 86 | self._ref_joint_pos_indexes = [self.sim.model.get_joint_qpos_addr(x) for x in self.robot_joints] # [0, 1, 2, 3, 4, 5, 6] 87 | self._ref_joint_vel_indexes = [self.sim.model.get_joint_qvel_addr(x) for x in self.robot_joints] # [0, 1, 2, 3, 4, 5, 6] 88 | 89 | # indices for grippers in qpos, qvel 90 | if self.has_gripper: 91 | self.gripper_joints = list(self.gripper.joints) # ['finger_joint1', 'finger_joint2'] 92 | self._ref_gripper_joint_pos_indexes = [self.sim.model.get_joint_qpos_addr(x) for x in self.gripper_joints] # [7, 8] 93 | self._ref_gripper_joint_vel_indexes = [self.sim.model.get_joint_qvel_addr(x) for x in self.gripper_joints] # [7, 8] 94 | self._ref_joint_gripper_actuator_indexes = [self.sim.model.actuator_name2id(actuator) for actuator in self.sim.model.actuator_names 95 | if actuator.startswith("gripper")] # [7, 8] 96 | # IDs of sites for gripper visualization 97 | self.eef_site_id = self.sim.model.site_name2id("grip_site") # 5 98 | 99 | 100 | def _pre_action(self, action, policy_step): 101 | """ 102 | Overrides the superclass method to actuate the robot with the 103 | passed joint velocities and gripper control. 104 | 105 | Args: 106 | action (numpy array): The control to apply to the robot. The first 107 | @self.mujoco_robot.dof dimensions should be the desired 108 | normalized joint velocities and the next @self.gripper.dof dimensions should be 109 | actuation controls for the gripper. 110 | """ 111 | 112 | self.policy_step = policy_step 113 | 114 | # Make sure action length is correct 115 | assert len(action) == self.dof, "environment got invalid action dimension" 116 | 117 | # using controller 118 | # Split action into joint control and peripheral (i.e.: gripper) control 119 | gripper_action = [], 120 | if self.has_gripper: 121 | gripper_action = action[self.controller.control_dim:] # all indexes past controller dimension indexes 122 | action = action[:self.controller.control_dim] 123 | 124 | action = action.copy() # ensure that we don't change the action outside of this scope 125 | self.controller.update_model(self.sim, joint_index=self._ref_joint_pos_indexes) 126 | torques = self.controller.action_to_torques(action, self.policy_step) # this scales and clips the actions correctly 127 | self.total_joint_torque += np.sum(abs(torques)) 128 | self.joint_torques = torques 129 | 130 | # Get gripper action 131 | if self.has_gripper: 132 | gripper_action_actual = self.gripper.format_action(gripper_action) # mirrors the commanded action i-e [0.54] ==> [-0.54 0.54] 133 | 134 | # rescale normalized gripper action to control ranges 135 | ctrl_range = self.sim.model.actuator_ctrlrange[self._ref_gripper_joint_vel_indexes] # e.g.[[0. 0.04] [-0.04 -0.]] 136 | bias = 0.5 * (ctrl_range[:, 1] + ctrl_range[:, 0]) # e.g.[0.02 -0.02] 137 | weight = 0.5 * (ctrl_range[:, 1] - ctrl_range[:, 0]) # e.g.[0.02 0.02] 138 | applied_gripper_action = bias + weight * gripper_action_actual 139 | self.sim.data.ctrl[self._ref_gripper_joint_vel_indexes] = [applied_gripper_action] #[1, -1] 140 | 141 | # Now,control the joints, qfrc_bias accounts for gravity compensation and other internal forces (coirolis, centrifugal) 142 | self.sim.data.ctrl[self._ref_joint_vel_indexes] = self.sim.data.qfrc_bias[self._ref_joint_vel_indexes] + torques 143 | 144 | # velocity of hand and joints to be used in reward 145 | self.ee_v = np.concatenate([self.sim.data.body_xvelp[self.sim.model.body_name2id("hand")], 146 | self.sim.data.body_xvelr[self.sim.model.body_name2id("hand")]]) 147 | self.q_vel = self.sim.data.qvel[self._ref_joint_vel_indexes] 148 | 149 | def _post_action(self, action): 150 | """ 151 | (Optional) does gripper visualization after actions. 152 | """ 153 | ret = super()._post_action(action) 154 | if self.gripper_visualization: 155 | self._gripper_visualization() 156 | return ret 157 | 158 | def _get_observation(self): 159 | """ 160 | Returns an OrderedDict containing robot observations 161 | """ 162 | 163 | state = super()._get_observation() 164 | di = OrderedDict() 165 | 166 | # proprioceptive features 167 | di["joint_pos"] = np.array([self.sim.data.qpos[x] for x in self._ref_joint_pos_indexes]) 168 | di["joint_vel"] = np.array([self.sim.data.qvel[x] for x in self._ref_joint_vel_indexes]) 169 | 170 | # 7 x 3 (sinq, cosq, qdot) = 21 171 | robot_states = [np.sin(di["joint_pos"]), np.cos(di["joint_pos"]), di["joint_vel"]] 172 | 173 | if self.has_gripper: 174 | di["gripper_qpos"] = np.array([self.sim.data.qpos[x] for x in self._ref_gripper_joint_pos_indexes]) 175 | di["gripper_qvel"] = np.array([self.sim.data.qvel[x] for x in self._ref_gripper_joint_vel_indexes]) 176 | 177 | di["eef_pos"] = np.array(self.sim.data.body_xpos[self.sim.model.body_name2id('hand')]) 178 | di["eef_quat"] = T.convert_quat(self.sim.data.get_body_xquat("hand"), to="xyzw") 179 | di["eef_vlin"] = np.array(self.sim.data.get_body_xvelp('hand')) 180 | di["eef_vang"] = np.array(self.sim.data.get_body_xvelr('hand')) 181 | 182 | # add in gripper information (2(gripper) + 3(end-effector pos) + 4(end-effector rot) + 3(end-effector vel) + 3(end-effector w)) 183 | robot_states.extend([di["gripper_qpos"], di["eef_pos"], di["eef_quat"], di["eef_vlin"], di["eef_vang"]]) 184 | 185 | state["robot-state"] = np.concatenate(robot_states) 186 | 187 | return state 188 | 189 | 190 | @property 191 | def observation_space(self): 192 | """ 193 | Returns dict where keys are ob names and values are dimensions. 194 | """ 195 | ob_space = OrderedDict() 196 | observation = self._get_observation() 197 | ob_space['object-state'] = [observation['object-state'].size] 198 | ob_space['robot-state'] = [observation['robot-state'].size] 199 | return ob_space 200 | 201 | @property 202 | def dof(self): 203 | """ 204 | Returns the DoF of the robot (with grippers). 205 | """ 206 | dof = self.controller.action_dim 207 | 208 | if self.has_gripper: 209 | dof += self.gripper.dof 210 | return dof 211 | 212 | @property 213 | def action_space(self): 214 | """ 215 | Returns ActionSpec of action space, see 216 | action_spec.py for more documentation. 217 | """ 218 | return ActionSpace(self.dof) 219 | 220 | 221 | def _gripper_visualization(self): 222 | """ 223 | Do any needed visualization here. 224 | """ 225 | 226 | # By default, don't do any coloring. 227 | self.sim.model.site_rgba[self.eef_site_id] = [0., 1., 0., 1.] 228 | 229 | 230 | def _check_q_limits(self): 231 | """ 232 | Returns True if the arm is in joint limits or very close to. 233 | """ 234 | joint_limits = False 235 | tolerance = 0.15 236 | for (q, q_limits) in zip(self.sim.data.qpos[self._ref_joint_pos_indexes], self.sim.model.jnt_range[:7]): 237 | if (q < q_limits[0] + tolerance) or (q > abs(q_limits[1]) - tolerance): 238 | joint_limits = True 239 | return joint_limits 240 | -------------------------------------------------------------------------------- /panda/utils/transform_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions of matrix and vector transformations. 3 | 4 | NOTE: convention for quaternions is (x, y, z, w) 5 | """ 6 | 7 | import math 8 | import numpy as np 9 | 10 | 11 | PI = np.pi 12 | EPS = np.finfo(float).eps * 4. 13 | 14 | # axis sequences for Euler angles 15 | _NEXT_AXIS = [1, 2, 0, 1] 16 | 17 | # map axes strings to/from tuples of inner axis, parity, repetition, frame 18 | _AXES2TUPLE = { 19 | "sxyz": (0, 0, 0, 0), 20 | "sxyx": (0, 0, 1, 0), 21 | "sxzy": (0, 1, 0, 0), 22 | "sxzx": (0, 1, 1, 0), 23 | "syzx": (1, 0, 0, 0), 24 | "syzy": (1, 0, 1, 0), 25 | "syxz": (1, 1, 0, 0), 26 | "syxy": (1, 1, 1, 0), 27 | "szxy": (2, 0, 0, 0), 28 | "szxz": (2, 0, 1, 0), 29 | "szyx": (2, 1, 0, 0), 30 | "szyz": (2, 1, 1, 0), 31 | "rzyx": (0, 0, 0, 1), 32 | "rxyx": (0, 0, 1, 1), 33 | "ryzx": (0, 1, 0, 1), 34 | "rxzx": (0, 1, 1, 1), 35 | "rxzy": (1, 0, 0, 1), 36 | "ryzy": (1, 0, 1, 1), 37 | "rzxy": (1, 1, 0, 1), 38 | "ryxy": (1, 1, 1, 1), 39 | "ryxz": (2, 0, 0, 1), 40 | "rzxz": (2, 0, 1, 1), 41 | "rxyz": (2, 1, 0, 1), 42 | "rzyz": (2, 1, 1, 1), 43 | } 44 | 45 | _TUPLE2AXES = dict((v, k) for k, v in _AXES2TUPLE.items()) 46 | 47 | 48 | def convert_quat(q, to="xyzw"): 49 | """ 50 | Converts quaternion from one convention to another. 51 | The convention to convert TO is specified as an optional argument. 52 | If to == 'xyzw', then the input is in 'wxyz' format, and vice-versa. 53 | 54 | Args: 55 | q: a 4-dim numpy array corresponding to a quaternion 56 | to: a string, either 'xyzw' or 'wxyz', determining 57 | which convention to convert to. 58 | """ 59 | if to == "xyzw": 60 | return q[[1, 2, 3, 0]] 61 | if to == "wxyz": 62 | return q[[3, 0, 1, 2]] 63 | raise Exception("convert_quat: choose a valid `to` argument (xyzw or wxyz)") 64 | 65 | 66 | def quat_multiply(quaternion1, quaternion0): 67 | """Return multiplication of two quaternions. 68 | >>> q = quat_multiply([1, -2, 3, 4], [-5, 6, 7, 8]) 69 | >>> np.allclose(q, [-44, -14, 48, 28]) 70 | True 71 | """ 72 | x0, y0, z0, w0 = quaternion0 73 | x1, y1, z1, w1 = quaternion1 74 | return np.array( 75 | ( 76 | x1 * w0 + y1 * z0 - z1 * y0 + w1 * x0, 77 | -x1 * z0 + y1 * w0 + z1 * x0 + w1 * y0, 78 | x1 * y0 - y1 * x0 + z1 * w0 + w1 * z0, 79 | -x1 * x0 - y1 * y0 - z1 * z0 + w1 * w0, 80 | ), 81 | dtype=np.float32, 82 | ) 83 | 84 | 85 | def quat_conjugate(quaternion): 86 | """Return conjugate of quaternion. 87 | >>> q0 = random_quaternion() 88 | >>> q1 = quat_conjugate(q0) 89 | >>> q1[3] == q0[3] and all(q1[:3] == -q0[:3]) 90 | True 91 | """ 92 | return np.array( 93 | (-quaternion[0], -quaternion[1], -quaternion[2], quaternion[3]), 94 | dtype=np.float32, 95 | ) 96 | 97 | 98 | def quat_inverse(quaternion): 99 | """Return inverse of quaternion. 100 | >>> q0 = random_quaternion() 101 | >>> q1 = quat_inverse(q0) 102 | >>> np.allclose(quat_multiply(q0, q1), [0, 0, 0, 1]) 103 | True 104 | """ 105 | return quat_conjugate(quaternion) / np.dot(quaternion, quaternion) 106 | 107 | 108 | def quat_slerp(quat0, quat1, fraction, spin=0, shortestpath=True): 109 | """Return spherical linear interpolation between two quaternions. 110 | >>> q0 = random_quat() 111 | >>> q1 = random_quat() 112 | >>> q = quat_slerp(q0, q1, 0.0) 113 | >>> np.allclose(q, q0) 114 | True 115 | >>> q = quat_slerp(q0, q1, 1.0, 1) 116 | >>> np.allclose(q, q1) 117 | True 118 | >>> q = quat_slerp(q0, q1, 0.5) 119 | >>> angle = math.acos(np.dot(q0, q)) 120 | >>> np.allclose(2.0, math.acos(np.dot(q0, q1)) / angle) or \ 121 | np.allclose(2.0, math.acos(-np.dot(q0, q1)) / angle) 122 | True 123 | """ 124 | q0 = unit_vector(quat0[:4]) 125 | q1 = unit_vector(quat1[:4]) 126 | if fraction == 0.0: 127 | return q0 128 | elif fraction == 1.0: 129 | return q1 130 | d = np.dot(q0, q1) 131 | if abs(abs(d) - 1.0) < _EPS: 132 | return q0 133 | if shortestpath and d < 0.0: 134 | # invert rotation 135 | d = -d 136 | q1 *= -1.0 137 | angle = math.acos(d) + spin * math.pi 138 | if abs(angle) < _EPS: 139 | return q0 140 | isin = 1.0 / math.sin(angle) 141 | q0 *= math.sin((1.0 - fraction) * angle) * isin 142 | q1 *= math.sin(fraction * angle) * isin 143 | q0 += q1 144 | return q0 145 | 146 | 147 | def random_quat(rand=None): 148 | """Return uniform random unit quaternion. 149 | rand: array like or None 150 | Three independent random variables that are uniformly distributed 151 | between 0 and 1. 152 | >>> q = random_quat() 153 | >>> np.allclose(1.0, vector_norm(q)) 154 | True 155 | >>> q = random_quat(np.random.random(3)) 156 | >>> q.shape 157 | (4,) 158 | """ 159 | if rand is None: 160 | rand = np.random.rand(3) 161 | else: 162 | assert len(rand) == 3 163 | r1 = np.sqrt(1.0 - rand[0]) 164 | r2 = np.sqrt(rand[0]) 165 | pi2 = math.pi * 2.0 166 | t1 = pi2 * rand[1] 167 | t2 = pi2 * rand[2] 168 | return np.array( 169 | (np.sin(t1) * r1, np.cos(t1) * r1, np.sin(t2) * r2, np.cos(t2) * r2), 170 | dtype=np.float32, 171 | ) 172 | 173 | 174 | def vec(values): 175 | """ 176 | Converts value tuple into a numpy vector. 177 | 178 | Args: 179 | values: a tuple of numbers 180 | 181 | Returns: 182 | a numpy vector of given values 183 | """ 184 | return np.array(values, dtype=np.float32) 185 | 186 | 187 | def mat4(array): 188 | """ 189 | Converts an array to 4x4 matrix. 190 | 191 | Args: 192 | array: the array in form of vec, list, or tuple 193 | 194 | Returns: 195 | a 4x4 numpy matrix 196 | """ 197 | return np.array(array, dtype=np.float32).reshape((4, 4)) 198 | 199 | 200 | def mat2pose(hmat): 201 | """ 202 | Converts a homogeneous 4x4 matrix into pose. 203 | 204 | Args: 205 | hmat: a 4x4 homogeneous matrix 206 | 207 | Returns: 208 | (pos, orn) tuple where pos is vec3 float in cartesian, 209 | orn is vec4 float quaternion 210 | """ 211 | pos = hmat[:3, 3] 212 | orn = mat2quat(hmat[:3, :3]) 213 | return pos, orn 214 | 215 | 216 | def mat2quat(rmat, precise=False): 217 | """ 218 | Converts given rotation matrix to quaternion. 219 | 220 | Args: 221 | rmat: 3x3 rotation matrix 222 | precise: If isprecise is True, the input matrix is assumed to be a precise 223 | rotation matrix and a faster algorithm is used. 224 | 225 | Returns: 226 | vec4 float quaternion angles 227 | """ 228 | M = np.array(rmat, dtype=np.float32, copy=False)[:3, :3] 229 | if precise: 230 | q = np.empty((4,)) 231 | t = np.trace(M) 232 | if t > M[3, 3]: 233 | q[0] = t 234 | q[3] = M[1, 0] - M[0, 1] 235 | q[2] = M[0, 2] - M[2, 0] 236 | q[1] = M[2, 1] - M[1, 2] 237 | else: 238 | i, j, k = 0, 1, 2 239 | if M[1, 1] > M[0, 0]: 240 | i, j, k = 1, 2, 0 241 | if M[2, 2] > M[i, i]: 242 | i, j, k = 2, 0, 1 243 | t = M[i, i] - (M[j, j] + M[k, k]) + M[3, 3] 244 | q[i] = t 245 | q[j] = M[i, j] + M[j, i] 246 | q[k] = M[k, i] + M[i, k] 247 | q[3] = M[k, j] - M[j, k] 248 | q = q[[3, 0, 1, 2]] 249 | q *= 0.5 / math.sqrt(t * M[3, 3]) 250 | else: 251 | m00 = M[0, 0] 252 | m01 = M[0, 1] 253 | m02 = M[0, 2] 254 | m10 = M[1, 0] 255 | m11 = M[1, 1] 256 | m12 = M[1, 2] 257 | m20 = M[2, 0] 258 | m21 = M[2, 1] 259 | m22 = M[2, 2] 260 | # symmetric matrix K 261 | K = np.array( 262 | [ 263 | [m00 - m11 - m22, 0.0, 0.0, 0.0], 264 | [m01 + m10, m11 - m00 - m22, 0.0, 0.0], 265 | [m02 + m20, m12 + m21, m22 - m00 - m11, 0.0], 266 | [m21 - m12, m02 - m20, m10 - m01, m00 + m11 + m22], 267 | ] 268 | ) 269 | K /= 3.0 270 | # quaternion is Eigen vector of K that corresponds to largest eigenvalue 271 | w, V = np.linalg.eigh(K) 272 | q = V[[3, 0, 1, 2], np.argmax(w)] 273 | if q[0] < 0.0: 274 | np.negative(q, q) 275 | return q[[1, 2, 3, 0]] 276 | 277 | def euler2mat(euler): #assume xyz 278 | euler = np.asarray(euler, dtype=np.float64) 279 | assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler) 280 | 281 | ai, aj, ak = -euler[..., 2], -euler[..., 1], -euler[..., 0] 282 | si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak) 283 | ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak) 284 | cc, cs = ci * ck, ci * sk 285 | sc, ss = si * ck, si * sk 286 | 287 | mat = np.empty(euler.shape[:-1] + (3, 3), dtype=np.float64) 288 | mat[..., 2, 2] = cj * ck 289 | mat[..., 2, 1] = sj * sc - cs 290 | mat[..., 2, 0] = sj * cc + ss 291 | mat[..., 1, 2] = cj * sk 292 | mat[..., 1, 1] = sj * ss + cc 293 | mat[..., 1, 0] = sj * cs - sc 294 | mat[..., 0, 2] = -sj 295 | mat[..., 0, 1] = cj * si 296 | mat[..., 0, 0] = cj * ci 297 | return mat 298 | 299 | def mat2euler(rmat, axes="sxyz"): 300 | """ 301 | Converts given rotation matrix to euler angles in radian. 302 | 303 | Args: 304 | rmat: 3x3 rotation matrix 305 | axes: One of 24 axis sequences as string or encoded tuple 306 | 307 | Returns: 308 | converted euler angles in radian vec3 float 309 | """ 310 | try: 311 | firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()] 312 | except (AttributeError, KeyError): 313 | firstaxis, parity, repetition, frame = axes 314 | 315 | i = firstaxis 316 | j = _NEXT_AXIS[i + parity] 317 | k = _NEXT_AXIS[i - parity + 1] 318 | 319 | M = np.array(rmat, dtype=np.float32, copy=False)[:3, :3] 320 | if repetition: 321 | sy = math.sqrt(M[i, j] * M[i, j] + M[i, k] * M[i, k]) 322 | if sy > EPS: 323 | ax = math.atan2(M[i, j], M[i, k]) 324 | ay = math.atan2(sy, M[i, i]) 325 | az = math.atan2(M[j, i], -M[k, i]) 326 | else: 327 | ax = math.atan2(-M[j, k], M[j, j]) 328 | ay = math.atan2(sy, M[i, i]) 329 | az = 0.0 330 | else: 331 | cy = math.sqrt(M[i, i] * M[i, i] + M[j, i] * M[j, i]) 332 | if cy > EPS: 333 | ax = math.atan2(M[k, j], M[k, k]) 334 | ay = math.atan2(-M[k, i], cy) 335 | az = math.atan2(M[j, i], M[i, i]) 336 | else: 337 | ax = math.atan2(-M[j, k], M[j, j]) 338 | ay = math.atan2(-M[k, i], cy) 339 | az = 0.0 340 | 341 | if parity: 342 | ax, ay, az = -ax, -ay, -az 343 | if frame: 344 | ax, az = az, ax 345 | return vec((ax, ay, az)) 346 | 347 | 348 | def pose2mat(pose): 349 | """ 350 | Converts pose to homogeneous matrix. 351 | 352 | Args: 353 | pose: a (pos, orn) tuple where pos is vec3 float cartesian, and 354 | orn is vec4 float quaternion. 355 | 356 | Returns: 357 | 4x4 homogeneous matrix 358 | """ 359 | homo_pose_mat = np.zeros((4, 4), dtype=np.float32) 360 | homo_pose_mat[:3, :3] = quat2mat(pose[1]) 361 | homo_pose_mat[:3, 3] = np.array(pose[0], dtype=np.float32) 362 | homo_pose_mat[3, 3] = 1. 363 | return homo_pose_mat 364 | 365 | 366 | def quat2mat(quaternion): 367 | """ 368 | Converts given quaternion (x, y, z, w) to matrix. 369 | 370 | Args: 371 | quaternion: vec4 float angles 372 | 373 | Returns: 374 | 3x3 rotation matrix 375 | """ 376 | q = np.array(quaternion, dtype=np.float32, copy=True)[[3, 0, 1, 2]] 377 | n = np.dot(q, q) 378 | if n < EPS: 379 | return np.identity(3) 380 | q *= math.sqrt(2.0 / n) 381 | q = np.outer(q, q) 382 | return np.array( 383 | [ 384 | [1.0 - q[2, 2] - q[3, 3], q[1, 2] - q[3, 0], q[1, 3] + q[2, 0]], 385 | [q[1, 2] + q[3, 0], 1.0 - q[1, 1] - q[3, 3], q[2, 3] - q[1, 0]], 386 | [q[1, 3] - q[2, 0], q[2, 3] + q[1, 0], 1.0 - q[1, 1] - q[2, 2]], 387 | ] 388 | ) 389 | 390 | 391 | def pose_in_A_to_pose_in_B(pose_A, pose_A_in_B): 392 | """ 393 | Converts a homogenous matrix corresponding to a point C in frame A 394 | to a homogenous matrix corresponding to the same point C in frame B. 395 | 396 | Args: 397 | pose_A: numpy array of shape (4,4) corresponding to the pose of C in frame A 398 | pose_A_in_B: numpy array of shape (4,4) corresponding to the pose of A in frame B 399 | 400 | Returns: 401 | numpy array of shape (4,4) corresponding to the pose of C in frame B 402 | """ 403 | 404 | # pose of A in B takes a point in A and transforms it to a point in C. 405 | 406 | # pose of C in B = pose of A in B * pose of C in A 407 | # take a point in C, transform it to A, then to B 408 | # T_B^C = T_A^C * T_B^A 409 | return pose_A_in_B.dot(pose_A) 410 | 411 | 412 | def pose_inv(pose): 413 | """ 414 | Computes the inverse of a homogenous matrix corresponding to the pose of some 415 | frame B in frame A. The inverse is the pose of frame A in frame B. 416 | 417 | Args: 418 | pose: numpy array of shape (4,4) for the pose to inverse 419 | 420 | Returns: 421 | numpy array of shape (4,4) for the inverse pose 422 | """ 423 | 424 | # Note, the inverse of a pose matrix is the following 425 | # [R t; 0 1]^-1 = [R.T -R.T*t; 0 1] 426 | 427 | # Intuitively, this makes sense. 428 | # The original pose matrix translates by t, then rotates by R. 429 | # We just invert the rotation by applying R-1 = R.T, and also translate back. 430 | # Since we apply translation first before rotation, we need to translate by 431 | # -t in the original frame, which is -R-1*t in the new frame, and then rotate back by 432 | # R-1 to align the axis again. 433 | 434 | pose_inv = np.zeros((4, 4)) 435 | pose_inv[:3, :3] = pose[:3, :3].T 436 | pose_inv[:3, 3] = -pose_inv[:3, :3].dot(pose[:3, 3]) 437 | pose_inv[3, 3] = 1.0 438 | return pose_inv 439 | 440 | 441 | def _skew_symmetric_translation(pos_A_in_B): 442 | """ 443 | Helper function to get a skew symmetric translation matrix for converting quantities 444 | between frames. 445 | """ 446 | return np.array( 447 | [ 448 | 0., 449 | -pos_A_in_B[2], 450 | pos_A_in_B[1], 451 | pos_A_in_B[2], 452 | 0., 453 | -pos_A_in_B[0], 454 | -pos_A_in_B[1], 455 | pos_A_in_B[0], 456 | 0., 457 | ] 458 | ).reshape((3, 3)) 459 | 460 | 461 | def vel_in_A_to_vel_in_B(vel_A, ang_vel_A, pose_A_in_B): 462 | """ 463 | Converts linear and angular velocity of a point in frame A to the equivalent in frame B. 464 | 465 | Args: 466 | vel_A: 3-dim iterable for linear velocity in A 467 | ang_vel_A: 3-dim iterable for angular velocity in A 468 | pose_A_in_B: numpy array of shape (4,4) corresponding to the pose of A in frame B 469 | 470 | Returns: 471 | vel_B, ang_vel_B: two numpy arrays of shape (3,) for the velocities in B 472 | """ 473 | pos_A_in_B = pose_A_in_B[:3, 3] 474 | rot_A_in_B = pose_A_in_B[:3, :3] 475 | skew_symm = _skew_symmetric_translation(pos_A_in_B) 476 | vel_B = rot_A_in_B.dot(vel_A) + skew_symm.dot(rot_A_in_B.dot(ang_vel_A)) 477 | ang_vel_B = rot_A_in_B.dot(ang_vel_A) 478 | return vel_B, ang_vel_B 479 | 480 | 481 | def force_in_A_to_force_in_B(force_A, torque_A, pose_A_in_B): 482 | """ 483 | Converts linear and rotational force at a point in frame A to the equivalent in frame B. 484 | 485 | Args: 486 | force_A: 3-dim iterable for linear force in A 487 | torque_A: 3-dim iterable for rotational force (moment) in A 488 | pose_A_in_B: numpy array of shape (4,4) corresponding to the pose of A in frame B 489 | 490 | Returns: 491 | force_B, torque_B: two numpy arrays of shape (3,) for the forces in B 492 | """ 493 | pos_A_in_B = pose_A_in_B[:3, 3] 494 | rot_A_in_B = pose_A_in_B[:3, :3] 495 | skew_symm = _skew_symmetric_translation(pos_A_in_B) 496 | force_B = rot_A_in_B.T.dot(force_A) 497 | torque_B = -rot_A_in_B.T.dot(skew_symm.dot(force_A)) + rot_A_in_B.T.dot(torque_A) 498 | return force_B, torque_B 499 | 500 | 501 | def rotation_matrix(angle, direction, point=None): 502 | """ 503 | Returns matrix to rotate about axis defined by point and direction. 504 | 505 | Examples: 506 | 507 | >>> angle = (random.random() - 0.5) * (2*math.pi) 508 | >>> direc = numpy.random.random(3) - 0.5 509 | >>> point = numpy.random.random(3) - 0.5 510 | >>> R0 = rotation_matrix(angle, direc, point) 511 | >>> R1 = rotation_matrix(angle-2*math.pi, direc, point) 512 | >>> is_same_transform(R0, R1) 513 | True 514 | >>> R0 = rotation_matrix(angle, direc, point) 515 | >>> R1 = rotation_matrix(-angle, -direc, point) 516 | >>> is_same_transform(R0, R1) 517 | True 518 | >>> I = numpy.identity(4, numpy.float32) 519 | >>> numpy.allclose(I, rotation_matrix(math.pi*2, direc)) 520 | True 521 | >>> numpy.allclose(2., numpy.trace(rotation_matrix(math.pi/2, 522 | ... direc, point))) 523 | True 524 | 525 | """ 526 | sina = math.sin(angle) 527 | cosa = math.cos(angle) 528 | direction = unit_vector(direction[:3]) 529 | # rotation matrix around unit vector 530 | R = np.array( 531 | ((cosa, 0.0, 0.0), (0.0, cosa, 0.0), (0.0, 0.0, cosa)), dtype=np.float32 532 | ) 533 | R += np.outer(direction, direction) * (1.0 - cosa) 534 | direction *= sina 535 | R += np.array( 536 | ( 537 | (0.0, -direction[2], direction[1]), 538 | (direction[2], 0.0, -direction[0]), 539 | (-direction[1], direction[0], 0.0), 540 | ), 541 | dtype=np.float32, 542 | ) 543 | M = np.identity(4) 544 | M[:3, :3] = R 545 | if point is not None: 546 | # rotation not around origin 547 | point = np.array(point[:3], dtype=np.float32, copy=False) 548 | M[:3, 3] = point - np.dot(R, point) 549 | return M 550 | 551 | 552 | def make_pose(translation, rotation): 553 | """ 554 | Makes a homogenous pose matrix from a translation vector and a rotation matrix. 555 | 556 | Args: 557 | translation: a 3-dim iterable 558 | rotation: a 3x3 matrix 559 | 560 | Returns: 561 | pose: a 4x4 homogenous matrix 562 | """ 563 | pose = np.zeros((4, 4)) 564 | pose[:3, :3] = rotation 565 | pose[:3, 3] = translation 566 | pose[3, 3] = 1.0 567 | return pose 568 | 569 | 570 | def unit_vector(data, axis=None, out=None): 571 | """ 572 | Returns ndarray normalized by length, i.e. eucledian norm, along axis. 573 | 574 | Examples: 575 | 576 | >>> v0 = numpy.random.random(3) 577 | >>> v1 = unit_vector(v0) 578 | >>> numpy.allclose(v1, v0 / numpy.linalg.norm(v0)) 579 | True 580 | >>> v0 = numpy.random.rand(5, 4, 3) 581 | >>> v1 = unit_vector(v0, axis=-1) 582 | >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=2)), 2) 583 | >>> numpy.allclose(v1, v2) 584 | True 585 | >>> v1 = unit_vector(v0, axis=1) 586 | >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=1)), 1) 587 | >>> numpy.allclose(v1, v2) 588 | True 589 | >>> v1 = numpy.empty((5, 4, 3), dtype=numpy.float32) 590 | >>> unit_vector(v0, axis=1, out=v1) 591 | >>> numpy.allclose(v1, v2) 592 | True 593 | >>> list(unit_vector([])) 594 | [] 595 | >>> list(unit_vector([1.0])) 596 | [1.0] 597 | 598 | """ 599 | if out is None: 600 | data = np.array(data, dtype=np.float32, copy=True) 601 | if data.ndim == 1: 602 | data /= math.sqrt(np.dot(data, data)) 603 | return data 604 | else: 605 | if out is not data: 606 | out[:] = np.array(data, copy=False) 607 | data = out 608 | length = np.atleast_1d(np.sum(data * data, axis)) 609 | np.sqrt(length, length) 610 | if axis is not None: 611 | length = np.expand_dims(length, axis) 612 | data /= length 613 | if out is None: 614 | return data 615 | 616 | 617 | def get_orientation_error(target_orn, current_orn): 618 | """ 619 | Returns the difference between two quaternion orientations as a 3 DOF numpy array. 620 | For use in an impedance controller / task-space PD controller. 621 | 622 | Args: 623 | target_orn: 4-dim iterable, desired orientation as a (x, y, z, w) quaternion 624 | current_orn: 4-dim iterable, current orientation as a (x, y, z, w) quaternion 625 | 626 | Returns: 627 | orn_error: 3-dim numpy array for current orientation error, corresponds to 628 | (target_orn - current_orn) 629 | """ 630 | current_orn = np.array( 631 | [current_orn[3], current_orn[0], current_orn[1], current_orn[2]] 632 | ) 633 | target_orn = np.array([target_orn[3], target_orn[0], target_orn[1], target_orn[2]]) 634 | 635 | pinv = np.zeros((3, 4)) 636 | pinv[0, :] = [-current_orn[1], current_orn[0], -current_orn[3], current_orn[2]] 637 | pinv[1, :] = [-current_orn[2], current_orn[3], current_orn[0], -current_orn[1]] 638 | pinv[2, :] = [-current_orn[3], -current_orn[2], current_orn[1], current_orn[0]] 639 | orn_error = 2.0 * pinv.dot(np.array(target_orn)) 640 | return orn_error 641 | 642 | 643 | def get_pose_error(target_pose, current_pose): 644 | """ 645 | Computes the error corresponding to target pose - current pose as a 6-dim vector. 646 | The first 3 components correspond to translational error while the last 3 components 647 | correspond to the rotational error. 648 | 649 | Args: 650 | target_pose: a 4x4 homogenous matrix for the target pose 651 | current_pose: a 4x4 homogenous matrix for the current pose 652 | 653 | Returns: 654 | A 6-dim numpy array for the pose error. 655 | """ 656 | error = np.zeros(6) 657 | 658 | # compute translational error 659 | target_pos = target_pose[:3, 3] 660 | current_pos = current_pose[:3, 3] 661 | pos_err = target_pos - current_pos 662 | 663 | # compute rotational error 664 | r1 = current_pose[:3, 0] 665 | r2 = current_pose[:3, 1] 666 | r3 = current_pose[:3, 2] 667 | r1d = target_pose[:3, 0] 668 | r2d = target_pose[:3, 1] 669 | r3d = target_pose[:3, 2] 670 | rot_err = 0.5 * (np.cross(r1, r1d) + np.cross(r2, r2d) + np.cross(r3, r3d)) 671 | 672 | error[:3] = pos_err 673 | error[3:] = rot_err 674 | return error 675 | --------------------------------------------------------------------------------