├── .gitignore ├── .gitmodules ├── LICENSE ├── assets └── 1_logo.gif ├── env ├── benchmark.py ├── bodies_for_visualization.py ├── body.py ├── camera.py ├── hand.py ├── handover_env.py ├── objects.py ├── panda.py ├── status_checker.py ├── table.py ├── tools │ ├── process_dexycb.py │ └── sdf.py └── utils │ ├── hand_collision_filter.py │ ├── robot_kinematics.py │ ├── scene.py │ ├── sdf_loss.py │ └── transform.py ├── evaluate.py ├── evaluate_config.py ├── models ├── encoders │ └── pointnet2.py ├── loss.py ├── policy_network.py └── utils.py ├── policies ├── base_policy.py ├── base_policy_config.py ├── offline_policy.py ├── offline_policy_config.py ├── pointnet2_policy.py ├── pointnet2_policy_config.py └── utils │ └── point_cloud.py ├── policy_runner.py ├── readme.md └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | data 3 | log 4 | tmp 5 | *.bullet 6 | wandb -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "env/third_party/orocos_kinematics_dynamics"] 2 | path = env/third_party/orocos_kinematics_dynamics 3 | url = git@github.com:orocos/orocos_kinematics_dynamics.git 4 | [submodule "third_party/Pointnet2_PyTorch"] 5 | path = third_party/Pointnet2_PyTorch 6 | url = git@github.com:erikwijmans/Pointnet2_PyTorch.git 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 GenH2R 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/1_logo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenjy2003/genh2r/4b0187bed706d3c6a0c22654ae64c11840edf408/assets/1_logo.gif -------------------------------------------------------------------------------- /env/benchmark.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import List, Optional 3 | from .utils.scene import load_scene_data 4 | 5 | def get_dexycb_scene_ids(setup: str, split: str) -> List[int]: 6 | _EVAL_SKIP_OBJECT = [0, 15] 7 | if setup == "s0": # Seen subjects, camera views, grasped objects. 8 | if split == "train": 9 | subject_ind = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 10 | sequence_ind = [i for i in range(100) if i % 5 != 4] 11 | if split == "val": 12 | subject_ind = [0, 1] 13 | sequence_ind = [i for i in range(100) if i % 5 == 4] 14 | if split == "test": 15 | subject_ind = [2, 3, 4, 5, 6, 7, 8, 9] 16 | sequence_ind = [i for i in range(100) if i % 5 == 4] 17 | mano_side = ["right", "left"] 18 | elif setup == "s1": # Unseen subjects. 19 | if split == "train": 20 | subject_ind = [0, 1, 2, 3, 4, 5, 9] 21 | if split == "val": 22 | subject_ind = [6] 23 | if split == "test": 24 | subject_ind = [7, 8] 25 | sequence_ind = [*range(100)] 26 | mano_side = ["right", "left"] 27 | elif setup == "s2": # Unseen handedness. 28 | if split == "train": 29 | subject_ind = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 30 | mano_side = ["right"] 31 | if split == "val": 32 | subject_ind = [0, 1] 33 | mano_side = ["left"] 34 | if split == "test": 35 | subject_ind = [2, 3, 4, 5, 6, 7, 8, 9] 36 | mano_side = ["left"] 37 | sequence_ind = [*range(100)] 38 | elif setup == "s3": # Unseen grasped objects. 39 | subject_ind = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 40 | if split == "train": 41 | sequence_ind = [i for i in range(100) if i // 5 not in (3, 7, 11, 15, 19)] 42 | if split == "val": 43 | sequence_ind = [i for i in range(100) if i // 5 in (3, 19)] 44 | if split == "test": 45 | sequence_ind = [i for i in range(100) if i // 5 in (7, 11, 15)] 46 | mano_side = ["right", "left"] 47 | 48 | scene_ids = [] 49 | for i in range(1000): 50 | if i // 5 % 20 in _EVAL_SKIP_OBJECT: 51 | continue 52 | if i // 100 in subject_ind and i % 100 in sequence_ind: 53 | if mano_side == ["right", "left"]: 54 | scene_ids.append(i) 55 | else: 56 | if i % 5 != 4: 57 | if (i % 5 in (0, 1) and mano_side == ["right"] or i % 5 in (2, 3) and mano_side == ["left"]): 58 | scene_ids.append(i) 59 | elif mano_side[0] == load_scene_data(i)["hand_side"]: 60 | scene_ids.append(i) 61 | return scene_ids 62 | 63 | def get_genh2rsim_scene_ids(setup: str, split: str, start_object_idx: Optional[int]=None, end_object_idx: Optional[int]=None, start_traj_idx: Optional[int]=None, end_traj_idx: Optional[int]=None) -> List[int]: 64 | if setup == "t0": 65 | start_id = 1000000 66 | num_objects = 8836 # number of acronym objects 67 | num_trajs_per_object = 128 68 | all_scene_ids = np.arange(start_id, start_id+num_objects*num_trajs_per_object) 69 | if split == "train": 70 | object_mask = np.arange(num_objects)%10<8 71 | elif split == "val": 72 | object_mask = np.arange(num_objects)%10==8 73 | elif split == "test": 74 | object_mask = np.arange(num_objects)%10==9 75 | else: 76 | raise ValueError(split) 77 | if start_object_idx is not None and end_object_idx is not None: 78 | object_idxs = np.arange(num_objects) 79 | object_mask &= (start_object_idx<=object_idxs)&(object_idxs List[int]: 88 | if setup in ["s0", "s1", "s2", "s3"]: 89 | return get_dexycb_scene_ids(setup, split) 90 | elif setup in ["t0"]: 91 | return get_genh2rsim_scene_ids(setup, split, start_object_idx, end_object_idx, start_traj_idx, end_traj_idx) 92 | else: 93 | raise ValueError(setup) 94 | -------------------------------------------------------------------------------- /env/bodies_for_visualization.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from numpy.typing import NDArray 4 | from pybullet_utils.bullet_client import BulletClient 5 | from dataclasses import dataclass 6 | from typing import Tuple, Optional, Any 7 | from omegaconf import MISSING 8 | from scipy.spatial.transform import Rotation as Rt 9 | 10 | from .body import Body, BodyConfig 11 | from .utils.transform import mat_to_pos_ros_quat 12 | 13 | @dataclass 14 | class GraspConfig(BodyConfig): 15 | name: str = "grasp" 16 | urdf_file: str = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "assets", "grasp", "grasp_simplified.urdf") 17 | # collision 18 | collision_mask: int = 0 19 | # base 20 | use_fixed_base: bool = True 21 | base_position: Tuple[float] = MISSING 22 | base_orientation: Tuple[float] = MISSING 23 | # visual 24 | link_color: Tuple[float] = MISSING 25 | 26 | def get_grasp_config(pose_mat: NDArray, color=[1., 0., 0., 1.]): # NDArray is not supported by OmegaConf 27 | pos, orn = mat_to_pos_ros_quat(pose_mat) 28 | grasp_config = GraspConfig(base_position=tuple(pos.tolist()), base_orientation=tuple(orn.tolist()), link_color=color) 29 | return grasp_config 30 | 31 | class Grasp(Body): 32 | def __init__(self, bullet_client: BulletClient, cfg: GraspConfig): 33 | super().__init__(bullet_client, cfg) 34 | self.cfg: GraspConfig 35 | self.load() 36 | 37 | @dataclass 38 | class SphereConfig(BodyConfig): 39 | name: str = "sphere" 40 | urdf_file: str = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "assets", "sphere", "sphere.urdf") 41 | scale: float = MISSING 42 | # collision 43 | collision_mask: int = 0 44 | # base 45 | use_fixed_base: bool = True 46 | base_position: Tuple[float] = MISSING 47 | base_orientation: Tuple[float] = (0., 0., 0., 1.) 48 | # visual 49 | link_color: Tuple[float] = MISSING 50 | 51 | class Sphere(Body): 52 | def __init__(self, bullet_client: BulletClient, cfg: SphereConfig): 53 | super().__init__(bullet_client, cfg) 54 | self.cfg: SphereConfig 55 | self.load() 56 | 57 | def debug(): 58 | from omegaconf import OmegaConf 59 | import pybullet 60 | import code 61 | grasp_cfg = OmegaConf.to_object(OmegaConf.structured(get_grasp_config(pose_mat=np.eye(4)))) 62 | sphere_cfg = OmegaConf.to_object(OmegaConf.structured(SphereConfig(scale=1., base_position=(0., 0., 0.1), link_color=(1., 0., 0., 1.)))) 63 | bullet_client = BulletClient(connection_mode=pybullet.GUI) # or pybullet.DIRECT 64 | grasp = Grasp(bullet_client, grasp_cfg) 65 | sphere = Sphere(bullet_client, sphere_cfg) 66 | code.interact(local=dict(globals(), **locals())) 67 | 68 | if __name__ == "__main__": 69 | debug() 70 | 71 | """ 72 | python -m env.bodies_for_visualization 73 | 74 | CUDA_VISIBLE_DEVICES=2 python -m evaluate evaluate.setup s0 evaluate.split train evaluate.use_ray False policy.name offline policy.offline.demo_dir data/demo/s0/train/pointnet2_simultaneous_dart/0 env.visualize True env.show_trajectory True 75 | scene_id 5, step 68, status 1, reward 1.0, reached frame 6760, done frame 9184 76 | """ -------------------------------------------------------------------------------- /env/body.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Tuple, Optional 3 | import numpy as np 4 | from numpy.typing import NDArray 5 | import torch 6 | from dataclasses import dataclass 7 | from omegaconf import MISSING, OmegaConf 8 | import pybullet 9 | from pybullet_utils.bullet_client import BulletClient 10 | import code 11 | 12 | from .utils.transform import pos_ros_quat_to_mat 13 | from .utils.sdf_loss import SDFData 14 | 15 | OmegaConf.register_new_resolver("concat_tuples", lambda *tuples: sum(tuples, start=tuple())) 16 | 17 | @dataclass 18 | class BodyConfig: 19 | name: str = MISSING 20 | urdf_file: str = MISSING 21 | scale: float = 1. 22 | verbose: bool = False 23 | # collision 24 | use_self_collision: bool = False 25 | collision_mask: Optional[int] = None 26 | # base 27 | use_fixed_base: bool = False 28 | base_position: Optional[Tuple[float]] = None # (3, ) 29 | base_orientation: Optional[Tuple[float]] = None # (4, ), xyzw, ros quaternion 30 | base_linear_velocity: Optional[Tuple[float]] = None 31 | base_angular_velocity: Optional[Tuple[float]] = None 32 | # links 33 | num_dofs: int = 0 34 | dof_position: Optional[Tuple[float]] = None 35 | dof_velocity: Optional[Tuple[float]] = None 36 | link_lateral_friction: Optional[Tuple[float]] = None 37 | link_spinning_friction: Optional[Tuple[float]] = None 38 | link_rolling_friction: Optional[Tuple[float]] = None 39 | link_restitution: Optional[Tuple[float]] = None 40 | link_linear_damping: Optional[float] = None 41 | link_angular_damping: Optional[float] = None 42 | dof_max_force: Optional[Tuple[float]] = None 43 | dof_position_gain: Optional[Tuple[float]] = None 44 | dof_velocity_gain: Optional[Tuple[float]] = None 45 | dof_max_velocity: Optional[Tuple[float]] = None 46 | # visual 47 | link_color: Optional[Tuple[float]] = None 48 | 49 | class Body: 50 | def __init__(self, bullet_client: BulletClient, cfg: BodyConfig): 51 | self.bullet_client = bullet_client 52 | self.cfg = cfg 53 | self.body_id = None 54 | self.sdf_data: Optional[SDFData] = None 55 | 56 | def base_reset(self): 57 | self.body_id = None 58 | # self.clear() # bodies are already removed by bullet_client.resetSimulation() 59 | 60 | def set_collision_mask(self, collision_mask): 61 | for i in range(-1, self.num_links-1): 62 | self.bullet_client.setCollisionFilterGroupMask(self.body_id, i, collision_mask, collision_mask) 63 | 64 | def load(self): 65 | # assert self.body_id is None 66 | 67 | kwargs = {} 68 | if self.cfg.use_self_collision: 69 | kwargs["flags"] = pybullet.URDF_USE_SELF_COLLISION 70 | kwargs["useFixedBase"] = self.cfg.use_fixed_base 71 | if self.cfg.scale != 1.: 72 | kwargs["globalScaling"] = self.cfg.scale 73 | self.body_id = self.bullet_client.loadURDF(self.cfg.urdf_file, **kwargs) 74 | 75 | dof_indices = [] 76 | for j in range(self.bullet_client.getNumJoints(self.body_id)): 77 | joint_info = self.bullet_client.getJointInfo(self.body_id, j) 78 | if joint_info[2] != pybullet.JOINT_FIXED: 79 | dof_indices.append(j) 80 | self.dof_indices = np.array(dof_indices, dtype=np.int64) 81 | self.num_links = self.bullet_client.getNumJoints(self.body_id)+1 82 | 83 | # Reset base state. 84 | if self.cfg.base_position is not None and self.cfg.base_orientation is not None: 85 | self.bullet_client.resetBasePositionAndOrientation(self.body_id, self.cfg.base_position, self.cfg.base_orientation) 86 | self.bullet_client.resetBaseVelocity(self.body_id, linearVelocity=self.cfg.base_linear_velocity, angularVelocity=self.cfg.base_angular_velocity) 87 | 88 | # Reset DoF state. 89 | if self.cfg.dof_position is not None: 90 | for i, j in enumerate(self.dof_indices): 91 | kwargs = {} 92 | if self.cfg.dof_velocity is not None: 93 | kwargs["targetVelocity"] = self.cfg.dof_velocity[i] 94 | self.bullet_client.resetJointState(self.body_id, j, self.cfg.dof_position[i], **kwargs) 95 | 96 | # Set collision mask. 97 | if self.cfg.collision_mask is not None: 98 | self.set_collision_mask(self.cfg.collision_mask) 99 | 100 | # Set link dynamics. 101 | kwargs = {} 102 | if self.cfg.link_lateral_friction is not None: 103 | kwargs["lateralFriction"] = self.cfg.link_lateral_friction 104 | if self.cfg.link_spinning_friction is not None: 105 | kwargs["spinningFriction"] = self.cfg.link_spinning_friction 106 | if self.cfg.link_rolling_friction is not None: 107 | kwargs["rollingFriction"] = self.cfg.link_rolling_friction 108 | if self.cfg.link_restitution is not None: 109 | kwargs["restitution"] = self.cfg.link_restitution 110 | if len(kwargs) > 0: 111 | for i in range(-1, self.num_links-1): 112 | self.bullet_client.changeDynamics(self.body_id, i, **{k: v[i+1] for k, v in kwargs.items()}) 113 | # Bullet only sets `linearDamping` and `angularDamping` for link index -1. See: 114 | # https://github.com/bulletphysics/bullet3/blob/740d2b978352b16943b24594572586d95d476466/examples/SharedMemory/PhysicsClientC_API.cpp#L3419 115 | # https://github.com/bulletphysics/bullet3/blob/740d2b978352b16943b24594572586d95d476466/examples/SharedMemory/PhysicsClientC_API.cpp#L3430 116 | kwargs = {} 117 | if self.cfg.link_linear_damping is not None: 118 | kwargs["linearDamping"] = self.cfg.link_linear_damping 119 | if self.cfg.link_angular_damping is not None: 120 | kwargs["angularDamping"] = self.cfg.link_angular_damping 121 | if len(kwargs) > 0: 122 | self.bullet_client.changeDynamics(self.body_id, -1, **kwargs) 123 | 124 | # Set link color. 125 | if self.cfg.link_color is not None: 126 | for i in range(-1, self.num_links-1): 127 | self.bullet_client.changeVisualShape(self.body_id, i, rgbaColor=self.cfg.link_color) 128 | 129 | def clear(self): 130 | if self.body_id is not None: 131 | self.bullet_client.removeBody(self.body_id) 132 | self.body_id = None 133 | 134 | def set_dof_target(self, dof_target_position=None, dof_target_velocity=None): 135 | kwargs = {} 136 | if dof_target_position is not None: 137 | kwargs["targetPositions"] = dof_target_position 138 | if dof_target_velocity is not None: 139 | kwargs["targetVelocities"] = dof_target_velocity 140 | if self.cfg.dof_position_gain is not None: 141 | kwargs["positionGains"] = self.cfg.dof_position_gain 142 | if self.cfg.dof_velocity_gain is not None: 143 | kwargs["velocityGains"] = self.cfg.dof_velocity_gain 144 | if self.cfg.dof_max_force is not None: 145 | kwargs["forces"] = self.cfg.dof_max_force 146 | # The redundant if-else block below is an artifact due to `setJointMotorControlArray()` 147 | # not supporting `maxVelocity`. `setJointMotorControlArray()` is still preferred when 148 | # `maxVelocity` is not needed due to better speed performance. 149 | if self.cfg.dof_max_velocity is None: 150 | self.bullet_client.setJointMotorControlArray(self.body_id, self.dof_indices, pybullet.POSITION_CONTROL, **kwargs) 151 | else: # For Bullet, 'dof_max_velocity' has no effect when not in the POSITION_CONROL mode. 152 | kwargs["maxVelocity"] = self.cfg.dof_max_velocity 153 | for i, j in enumerate(self.dof_indices): 154 | self.bullet_client.setJointMotorControl2(self.body_id, j, pybullet.POSITION_CONTROL, **{k: v[i] for k, v in kwargs.items()}) 155 | 156 | def get_link_pos(self, link_id) -> NDArray[np.float64]: 157 | if self.body_id is None: 158 | pos = np.nan*np.ones(3, dtype=np.float64) 159 | else: 160 | link_state = self.bullet_client.getLinkState(self.body_id, link_id, computeForwardKinematics=1) 161 | pos = np.array(link_state[4], dtype=np.float64) 162 | return pos 163 | 164 | def get_link_orn(self, link_id) -> NDArray[np.float64]: 165 | if self.body_id is None: 166 | orn = np.nan*np.ones(4, dtype=np.float64) 167 | else: 168 | link_state = self.bullet_client.getLinkState(self.body_id, link_id, computeForwardKinematics=1) 169 | orn = np.array(link_state[5], dtype=np.float64) 170 | return orn 171 | 172 | def get_link_pos_orn(self, link_id: int) -> Tuple[NDArray[np.float64], NDArray[np.float64]]: 173 | if self.body_id is None: 174 | pos, orn = np.nan*np.ones(3, dtype=np.float64), np.nan*np.ones(4, dtype=np.float64) 175 | else: 176 | link_state = self.bullet_client.getLinkState(self.body_id, link_id, computeForwardKinematics=1) 177 | pos, orn = np.array(link_state[4], dtype=np.float64), np.array(link_state[5], dtype=np.float64) 178 | return pos, orn 179 | 180 | def get_link_pose(self, link_id: int) -> NDArray[np.float64]: 181 | pos, orn = self.get_link_pos_orn(link_id) 182 | pose = pos_ros_quat_to_mat(pos, orn) 183 | return pose 184 | 185 | def get_link_states(self): 186 | link_indices = list(range(0, self.num_links-1)) 187 | return self.bullet_client.getLinkStates(self.body_id, link_indices, computeLinkVelocity=1, computeForwardKinematics=1) 188 | 189 | def get_joint_positions(self) -> NDArray[np.float64]: 190 | if self.body_id is None: 191 | joint_positions = np.nan*np.ones(self.cfg.num_dofs, dtype=np.float64) 192 | else: 193 | joint_states = self.bullet_client.getJointStates(self.body_id, self.dof_indices) 194 | joint_positions = np.array([joint_state[0] for joint_state in joint_states], dtype=np.float64) 195 | return joint_positions 196 | 197 | def get_sdf(self) -> SDFData: 198 | if self.sdf_data is None: 199 | sdf_path = os.path.join(os.path.dirname(self.cfg.urdf_file), "sdf.npz") 200 | assert os.path.exists(sdf_path), f"{self.cfg.name} doesn't have SDF in {sdf_path}" 201 | self.sdf_data = SDFData(sdf_path) 202 | return self.sdf_data 203 | 204 | def pre_step(self): 205 | pass 206 | 207 | def post_step(self): 208 | pass 209 | 210 | def debug(): 211 | from omegaconf import OmegaConf 212 | class BodyTest(Body): 213 | pass 214 | cli_cfg = OmegaConf.from_cli() 215 | body_base_cfg = OmegaConf.structured(BodyConfig) 216 | body_cfg = OmegaConf.merge(body_base_cfg, cli_cfg) 217 | bullet_client = BulletClient(connection_mode=pybullet.GUI) # or pybullet.DIRECT 218 | body_test = BodyTest(bullet_client, body_cfg) 219 | body_test.load() 220 | code.interact(local=dict(globals(), **locals())) 221 | 222 | if __name__ == "__main__": 223 | debug() 224 | 225 | """ 226 | DISPLAY="localhost:11.0" python -m env.body name=table urdf_file=data/assets/table/table.urdf 227 | """ -------------------------------------------------------------------------------- /env/camera.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Tuple, Optional, Union, List 3 | from dataclasses import dataclass 4 | from omegaconf import OmegaConf, MISSING 5 | import pybullet 6 | from pybullet_utils.bullet_client import BulletClient 7 | from scipy.spatial.transform import Rotation as Rt 8 | import imageio 9 | import code 10 | 11 | # def get_view_matrix(pos: Optional[Tuple[float]], orn: Optional[Tuple[float]], target: Optional[Tuple[float]], up_vector: Optional[Tuple[float]]) -> Tuple[float]: 12 | # if pos is not None and orn is not None: 13 | # assert target is None and up_vector is None 14 | # urdf_to_opengl = Rt.from_euler("XYZ", (-np.pi/2, 0.0, -np.pi)).as_matrix() 15 | # R = Rt.from_quat(orn).as_matrix()@urdf_to_opengl 16 | # t = -np.array(pos).dot(R) 17 | # view_matrix = np.eye(4, dtype=float) 18 | # view_matrix[:3, :3] = R 19 | # view_matrix[3, :3] = t 20 | # view_matrix = tuple(view_matrix.flatten()) 21 | # return view_matrix 22 | # elif pos is not None and target is not None and up_vector is not None: 23 | # assert orn is None 24 | # view_matrix = pybullet.computeViewMatrix(cameraEyePosition=pos, cameraTargetPosition=target, cameraUpVector=up_vector) 25 | # return view_matrix 26 | # else: 27 | # raise NotImplementedError 28 | # OmegaConf.register_new_resolver("get_view_matrix", get_view_matrix) 29 | 30 | @dataclass 31 | class CameraConfig: 32 | width: int = MISSING 33 | height: int = MISSING 34 | fov: float = MISSING 35 | near: float = MISSING 36 | far: float = MISSING 37 | pos: Optional[Tuple[float]] = None 38 | orn: Optional[Tuple[float]] = None 39 | target: Optional[Tuple[float]] = None 40 | up_vector: Optional[Tuple[float]] = None 41 | step_time: float = MISSING 42 | @property 43 | def view_matrix(self) -> Tuple[float]: 44 | if self.pos is not None and self.orn is not None: 45 | assert self.target is None and self.up_vector is None 46 | urdf_to_opengl = Rt.from_euler("XYZ", (-np.pi/2, 0.0, -np.pi)).as_matrix() 47 | R = Rt.from_quat(self.orn).as_matrix()@urdf_to_opengl 48 | t = -np.array(self.pos).dot(R) 49 | view_matrix = np.eye(4, dtype=float) 50 | view_matrix[:3, :3] = R 51 | view_matrix[3, :3] = t 52 | view_matrix = tuple(view_matrix.flatten()) 53 | return view_matrix 54 | elif self.pos is not None and self.target is not None and self.up_vector is not None: 55 | assert self.orn is None 56 | view_matrix = pybullet.computeViewMatrix(cameraEyePosition=self.pos, cameraTargetPosition=self.target, cameraUpVector=self.up_vector) 57 | return view_matrix 58 | else: 59 | raise NotImplementedError 60 | # view_matrix: Tuple[float] = "${get_view_matrix:${.pos},${.orn},${.target},${.up_vector}}" # the "." is necessary in nested configs, see https://github.com/omry/omegaconf/issues/1099#issuecomment-1624496194 61 | 62 | class Camera: 63 | def __init__(self, bullet_client: BulletClient, cfg: CameraConfig): 64 | self.bullet_client = bullet_client 65 | self.cfg = cfg 66 | 67 | self.projection_matrix = self.bullet_client.computeProjectionMatrixFOV(fov=self.cfg.fov, aspect=self.cfg.width/self.cfg.height, nearVal=self.cfg.near, farVal=self.cfg.far) 68 | 69 | self.urdf_to_opengl = Rt.from_euler("XYZ", (-np.pi/2, 0.0, -np.pi)).as_matrix() 70 | 71 | K = np.eye(3, dtype=float) 72 | K[0, 0] = self.cfg.width/2/np.tan(np.deg2rad(self.cfg.fov)*self.cfg.width/self.cfg.height/2) 73 | K[1, 1] = self.cfg.height/2/np.tan(np.deg2rad(self.cfg.fov)/2) 74 | K[0, 2] = self.cfg.width/2 75 | K[1, 2] = self.cfg.height/2 76 | K_inv = np.linalg.inv(K) 77 | x, y = np.meshgrid(np.arange(self.cfg.width), np.arange(self.cfg.height)) 78 | x, y = x.astype(float), y.astype(float) 79 | ones = np.ones((self.cfg.height, self.cfg.width), dtype=float) 80 | xy1s = np.stack((x, y, ones), axis=2).reshape(self.cfg.width*self.cfg.height, 3).T 81 | self.deproject = np.matmul(K_inv, xy1s).T 82 | 83 | def update_pose(self, pos, orn): 84 | self.cfg.pos, self.cfg.orn = pos, orn 85 | 86 | def process_depth(self, depth): 87 | # depth_1_mask = depth==1.0 88 | depth = self.cfg.far*self.cfg.near/(self.cfg.far-(self.cfg.far-self.cfg.near)*depth) 89 | # depth[depth_1_mask] = 0.0 90 | return depth 91 | 92 | def get_points(self, depth, mask): 93 | points = np.tile(depth[mask].reshape(-1, 1), (1, 3))*self.deproject[mask.reshape(-1), :] 94 | return points 95 | 96 | def render(self, segmentation_ids=[]): 97 | # code.interact(local=dict(globals(), **locals())) 98 | _, _, color, depth, segmentation = self.bullet_client.getCameraImage(width=self.cfg.width, height=self.cfg.height, viewMatrix=self.cfg.view_matrix, projectionMatrix=self.projection_matrix, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL) # (224, 224, 4), (224, 224), (224, 224) 99 | depth = self.process_depth(depth) 100 | points = [] 101 | for segmentation_id in segmentation_ids: 102 | points.append(self.get_points(depth, segmentation==segmentation_id)) 103 | return color, depth, segmentation, points 104 | 105 | def setup_video_writer(self, video_filename, fps=20): 106 | print(f"setting up video writer to {video_filename}...") 107 | self.video_writer = imageio.get_writer(video_filename, mode='I', fps=fps) 108 | self.save_video_interval = int(1/fps/self.cfg.step_time) 109 | self.frame = 0 110 | 111 | def close_video_writer(self): 112 | print("closing video writer...") 113 | if self.video_writer is not None: 114 | self.video_writer.close() 115 | delattr(self, "video_writer") 116 | 117 | def pre_step(self): 118 | pass 119 | 120 | def post_step(self): 121 | if not hasattr(self, "video_writer"): 122 | return 123 | self.frame += 1 124 | if self.frame % self.save_video_interval == 0: 125 | color, depth, segmentation, points = self.render([]) 126 | self.video_writer.append_data(color) 127 | 128 | def debug(): 129 | camera_cfg = OmegaConf.structured(CameraConfig(pos=(0., 0., 0.), orn=(0., 0., 0., 1.))) 130 | code.interact(local=dict(globals(), **locals())) 131 | 132 | if __name__ == "__main__": 133 | debug() 134 | 135 | """ 136 | DISPLAY="localhost:11.0" python -m env.camera 137 | """ -------------------------------------------------------------------------------- /env/hand.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.typing import NDArray 3 | import os 4 | from typing import Tuple, Optional, ContextManager 5 | from dataclasses import dataclass 6 | from omegaconf import MISSING 7 | from pybullet_utils.bullet_client import BulletClient 8 | import code 9 | 10 | from .body import Body, BodyConfig 11 | 12 | @dataclass 13 | class HandConfig(BodyConfig): 14 | name: str = "" 15 | urdf_file: str = "" 16 | # collision 17 | collision_mask: int = 2**1 18 | # base 19 | use_fixed_base: bool = True 20 | base_position: Tuple[float] = (0., 0., 0.) 21 | base_orientation: Tuple[float] = (0., 0., 0., 1.) 22 | # links 23 | num_dofs: int = 51 24 | dof_position: Optional[Tuple[float]] = None 25 | dof_velocity: Tuple[float] = (0.0,)*51 26 | link_lateral_friction: Tuple[float] = (5.0,)*53 27 | link_spinning_friction: Tuple[float] = (5.0,)*53 28 | link_restitution: Tuple[float] = (0.5,)*53 29 | link_linear_damping: float = 10.0 30 | link_angular_damping: float = 10.0 31 | 32 | translation_max_force: Tuple[float] = (50.0,)*3 33 | translation_position_gain: Tuple[float] = (0.2,)*3 34 | translation_velocity_gain: Tuple[float] = (1.0,)*3 35 | rotation_max_force: Tuple[float] = (5.0,)*3 36 | rotation_position_gain: Tuple[float] = (0.2,)*3 37 | rotation_velocity_gain: Tuple[float] = (1.0,)*3 38 | joint_max_force: Tuple[float] = (0.5,)*45 39 | joint_position_gain: Tuple[float] = (0.1,)*45 40 | joint_velocity_gain: Tuple[float] = (1.0,)*45 41 | 42 | dof_max_force: Tuple[float] = "${concat_tuples:${.translation_max_force},${.rotation_max_force},${.joint_max_force}}" 43 | dof_position_gain: Tuple[float] = "${concat_tuples:${.translation_position_gain},${.rotation_position_gain},${.joint_position_gain}}" 44 | dof_velocity_gain: Tuple[float] = "${concat_tuples:${.translation_velocity_gain},${.rotation_velocity_gain},${.joint_velocity_gain}}" 45 | # concat_tuples is defined in body.py 46 | # the "." is necessary in nested configs, see https://github.com/omry/omegaconf/issues/1099#issuecomment-1624496194 47 | # visual 48 | link_color: Tuple[float] = (0., 1., 0., 1.) 49 | 50 | class Hand(Body): 51 | def __init__(self, bullet_client: BulletClient, cfg: HandConfig): 52 | super().__init__(bullet_client, cfg) 53 | self.cfg: HandConfig 54 | 55 | def reset(self, name: str, side: str, path: str, pose: NDArray[np.float32]): 56 | self.base_reset() 57 | # config 58 | self.cfg.name = "{}_{}".format(name, side) 59 | self.cfg.urdf_file = path 60 | 61 | # process pose data 62 | self.pose = pose # (num_frames, 51) 63 | nonzero_mask = np.any(self.pose!=0, axis=1) 64 | if np.any(nonzero_mask): 65 | nonzeros = np.where(nonzero_mask)[0] 66 | self.start_frame = nonzeros[0] 67 | self.end_frame = nonzeros[-1]+1 68 | else: 69 | self.start_frame, self.end_frame = -1, -1 70 | self.num_frames = self.pose.shape[0] 71 | self.frame = 0 72 | 73 | if self.frame == self.start_frame: 74 | self.cfg.dof_position = self.pose[self.frame].tolist() # can not use tuple(self.pose[self.frame]), which keeps the type np.float32 for scalars 75 | self.load() 76 | self.set_dof_target(self.pose[self.frame]) 77 | 78 | def pre_step(self, disable_rendering: ContextManager): 79 | self.frame += 1 80 | self.frame = min(self.frame, self.num_frames-1) 81 | 82 | if self.frame == self.start_frame: 83 | self.cfg.dof_position = self.pose[self.frame].tolist() # can not use tuple(self.pose[self.frame]), which keeps the type np.float32 for scalars 84 | with disable_rendering(): 85 | self.load() 86 | if self.start_frame <= self.frame < self.end_frame: 87 | self.set_dof_target(self.pose[self.frame]) 88 | if self.frame == self.end_frame: 89 | self.clear() 90 | 91 | def post_step(self): 92 | pass 93 | 94 | def debug(): 95 | from omegaconf import OmegaConf 96 | import pybullet 97 | import time 98 | from env.utils import load_scene_data 99 | hand_cfg = OmegaConf.structured(HandConfig) 100 | bullet_client = BulletClient(connection_mode=pybullet.GUI) # or pybullet.DIRECT 101 | hand = Hand(bullet_client, hand_cfg) 102 | scene_data = load_scene_data(0) 103 | hand.reset(scene_data["hand_name"], scene_data["hand_side"], scene_data["hand_path"], scene_data["hand_pose"]) 104 | while True: 105 | print(f"frame {hand.frame}") 106 | hand.pre_step() 107 | bullet_client.stepSimulation() 108 | hand.post_step() 109 | time.sleep(0.01) 110 | code.interact(local=dict(globals(), **locals())) 111 | 112 | if __name__ == "__main__": 113 | debug() 114 | 115 | """ 116 | DISPLAY="localhost:11.0" python -m env.hand 117 | """ -------------------------------------------------------------------------------- /env/handover_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.typing import NDArray 3 | from dataclasses import dataclass, field 4 | from omegaconf import OmegaConf 5 | import pybullet 6 | from pybullet_utils.bullet_client import BulletClient 7 | import pybullet_data 8 | from functools import partial 9 | from typing import TypedDict, Callable, List, Tuple, Optional 10 | from scipy.spatial.transform import Rotation as Rt 11 | from contextlib import contextmanager 12 | import time 13 | import code 14 | 15 | from .table import Table, TableConfig 16 | from .panda import Panda, PandaConfig 17 | from .camera import Camera, CameraConfig 18 | from .hand import Hand, HandConfig 19 | from .objects import Objects, ObjectConfig, ObjectsConfig 20 | from .status_checker import StatusChecker, StatusCheckerConfig, EpisodeStatus 21 | from .bodies_for_visualization import Grasp, GraspConfig, get_grasp_config, Sphere, SphereConfig 22 | from .utils.scene import load_scene_data 23 | 24 | OmegaConf.register_new_resolver("divide_to_int", lambda x, y: int(x/y) if x is not None else None) 25 | 26 | @dataclass 27 | class GenH2RSimConfig: 28 | gravity: Tuple[float] = (0.0, 0.0, -9.8) 29 | substeps: int = 1 30 | table_height: float = 0.92 31 | 32 | step_time: float = 0.001 33 | max_time: float = 13.0 34 | stop_moving_time: Optional[float] = None 35 | 36 | max_frames: int = "${divide_to_int:${.max_time},${.step_time}}" 37 | stop_moving_frame: Optional[int] = "${divide_to_int:${.stop_moving_time},${.step_time}}" 38 | frame_interval: int = "${divide_to_int:${.step_time},0.001}" 39 | # the "." is necessary in nested configs, see https://github.com/omry/omegaconf/issues/1099#issuecomment-1624496194 40 | 41 | stop_moving_dist: Optional[float] = None 42 | 43 | # GUI 44 | visualize: bool = False 45 | viewer_camera_distance: float = 2.4 46 | viewer_camera_yaw: float = 102. 47 | viewer_camera_pitch: float = -58. 48 | viewer_camera_target: Tuple[float] = (0., 0., 0.) 49 | # DRAW_VIEWER_AXES = True 50 | show_trajectory: bool = False 51 | show_camera: bool = False 52 | 53 | verbose: bool = False 54 | 55 | table: TableConfig = field(default_factory=TableConfig) # the format of nested structured configs must be like this https://omegaconf.readthedocs.io/en/2.3_branch/structured_config.html#nesting-structured-configs 56 | hand: HandConfig = field(default_factory=HandConfig) 57 | object: ObjectConfig = field(default_factory=ObjectConfig) 58 | objects: ObjectsConfig = field(default_factory=ObjectsConfig) 59 | panda: PandaConfig = field(default_factory=lambda: PandaConfig(step_time="${..step_time}")) 60 | third_person_camera: CameraConfig = field(default_factory=lambda: CameraConfig(width=1280, height=720, fov=60., near=0.1, far=10.0, pos=(1.5, -0.1, 1.8), target=(0.6, -0.1, 1.3), up_vector=(0., 0., 1.), step_time="${..step_time}")) 61 | status_checker: StatusCheckerConfig = field(default_factory=lambda: StatusCheckerConfig(table_height="${..table_height}", max_frames="${..max_frames}")) 62 | 63 | class CartesianActionSpace: # not a hard constraint. only for policy learning 64 | def __init__(self): 65 | self.high = np.array([ 0.06, 0.06, 0.06, np.pi/6, np.pi/6, np.pi/6]) #, np.pi/10 66 | self.low = np.array([-0.06, -0.06, -0.06, -np.pi/6, -np.pi/6, -np.pi/6]) # , -np.pi/3 67 | self.shape = [6] 68 | self.bounds = np.vstack([self.low, self.high]) 69 | 70 | @dataclass 71 | class Observation: 72 | frame: int 73 | world_to_ee: NDArray[np.float64] 74 | joint_positions: NDArray[np.float64] 75 | get_visual_observation: "GenH2RSim.get_visual_observation" 76 | env: "GenH2RSim" 77 | 78 | class GenH2RSim: 79 | def __init__(self, cfg: GenH2RSimConfig): 80 | self.cfg = cfg 81 | 82 | if self.cfg.visualize: 83 | self.bullet_client = BulletClient(connection_mode=pybullet.GUI) 84 | self.bullet_client.resetDebugVisualizerCamera(cameraDistance=self.cfg.viewer_camera_distance, cameraYaw=self.cfg.viewer_camera_yaw, cameraPitch=self.cfg.viewer_camera_pitch, cameraTargetPosition=self.cfg.viewer_camera_target) 85 | self.bullet_client.rendering_enabled = True 86 | else: 87 | self.bullet_client = BulletClient(connection_mode=pybullet.DIRECT) 88 | self.bullet_client.rendering_enabled = False 89 | self.bullet_client.setAdditionalSearchPath(pybullet_data.getDataPath()) 90 | 91 | self.table = Table(self.bullet_client, cfg.table) 92 | self.panda = Panda(self.bullet_client, cfg.panda) 93 | self.hand = Hand(self.bullet_client, cfg.hand) 94 | self.objects = Objects(self.bullet_client, cfg.objects, cfg.object) 95 | self.status_checker = StatusChecker(self.bullet_client, cfg.status_checker) 96 | self.third_person_camera = Camera(self.bullet_client, cfg.third_person_camera) 97 | 98 | @contextmanager 99 | def disable_rendering(self): # speedup setting up scene 100 | rendering_enabled = self.bullet_client.rendering_enabled 101 | if rendering_enabled: 102 | self.bullet_client.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 0) 103 | self.bullet_client.rendering_enabled = False 104 | yield 105 | if rendering_enabled: 106 | self.bullet_client.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 1) 107 | self.bullet_client.rendering_enabled = True 108 | 109 | def reset(self, scene_id): 110 | self.bullet_client.resetSimulation() # remove all objects from the scene 111 | self.bullet_client.setGravity(*self.cfg.gravity) 112 | self.bullet_client.setPhysicsEngineParameter(fixedTimeStep=self.cfg.step_time, numSubSteps=self.cfg.substeps, deterministicOverlappingPairs=1) 113 | 114 | self.scene_id = scene_id 115 | self.frame = 0 116 | self.scene_data = load_scene_data(scene_id, table_height=self.cfg.table_height, stop_moving_frame=self.cfg.stop_moving_frame, frame_interval=self.cfg.frame_interval) 117 | self.target_object_stopped_because_of_dist = False 118 | 119 | with self.disable_rendering(): 120 | self.ground_id = self.bullet_client.loadURDF("plane_implicit.urdf", basePosition=(0., 0., 0.)) 121 | self.table.reset() 122 | self.panda.reset() 123 | self.hand.reset(self.scene_data["hand_name"], self.scene_data["hand_side"], self.scene_data["hand_path"], self.scene_data["hand_pose"]) 124 | self.objects.reset(self.scene_data["object_names"], self.scene_data["object_paths"], self.scene_data["object_grasp_id"], self.scene_data["object_poses"]) 125 | self.status_checker.reset() 126 | 127 | # bodies for visualization 128 | self.grasps: List[Grasp] = [] 129 | self.spheres: List[Sphere] = [] 130 | 131 | def get_visual_observation(self): 132 | return self.panda.get_visual_observation([self.objects.target_object.body_id, self.hand.body_id]) 133 | 134 | def get_observation(self) -> Observation: 135 | # color, depth, segmentation, points = self.panda.get_visual_observation([self.objects.target_object.body_id, self.hand.body_id]) 136 | # "color": color, 137 | # "depth": depth, 138 | # "segmentation": segmentation, 139 | # "object_points": points[0], 140 | # "hand_points": points[1] 141 | if self.cfg.show_camera: 142 | self.get_visual_observation() # to refresh the camera for visualization 143 | observation = Observation( 144 | frame=self.frame, 145 | world_to_ee=self.panda.get_world_to_ee(), 146 | joint_positions=self.panda.get_joint_positions(), 147 | get_visual_observation=self.get_visual_observation, 148 | env=self, 149 | ) 150 | return observation 151 | 152 | # no disable_rendering because there are always multiple loadings together, so disable_rendering is placed there 153 | def load_grasp(self, pose_mat: NDArray[np.float64], color: Tuple[float]) -> Grasp: 154 | grasp_cfg: GraspConfig = OmegaConf.to_object(OmegaConf.structured(get_grasp_config(pose_mat=pose_mat, color=color))) 155 | grasp = Grasp(self.bullet_client, grasp_cfg) 156 | self.grasps.append(grasp) 157 | return grasp 158 | 159 | def clear_grasps(self): 160 | with self.disable_rendering(): 161 | for grasp in self.grasps: 162 | grasp.clear() 163 | self.grasps = [] 164 | 165 | # no disable_rendering because there are always multiple loadings together, so disable_rendering is placed there 166 | def load_sphere(self, pos: NDArray[np.float64], color: Tuple[float], scale: float) -> Sphere: 167 | sphere_cfg: SphereConfig = OmegaConf.to_object(OmegaConf.structured(SphereConfig(base_position=tuple(pos.tolist()), link_color=color, scale=scale))) 168 | sphere = Sphere(self.bullet_client, sphere_cfg) 169 | self.spheres.append(sphere) 170 | return sphere 171 | 172 | def clear_spheres(self): 173 | with self.disable_rendering(): 174 | for sphere in self.spheres: 175 | sphere.clear() 176 | self.spheres = [] 177 | 178 | def get_panda_object_dist(self) -> float: 179 | world_to_tip_pos = self.panda.get_tip_pos() 180 | world_to_object_pc = self.objects.target_object.get_world_to_object_pc() 181 | dists = np.square(world_to_object_pc-world_to_tip_pos).sum(axis=1) 182 | min_dist, min_dist_idx = np.sqrt(dists.min()), dists.argmin() 183 | return min_dist 184 | 185 | def sim_step(self, panda_dof_target_position, increase_frame=True): 186 | info = {} 187 | # pre step 188 | if increase_frame: 189 | self.frame += 1 190 | # self.table.pre_step() 191 | self.panda.pre_step(panda_dof_target_position) 192 | increase_hand_object_frame = increase_frame 193 | if self.cfg.stop_moving_dist is not None: 194 | panda_object_dist = self.get_panda_object_dist() 195 | if panda_object_dist < self.cfg.stop_moving_dist: 196 | increase_hand_object_frame = False 197 | self.target_object_stopped_because_of_dist = panda_object_dist < self.cfg.stop_moving_dist 198 | if self.cfg.verbose: 199 | print(f"frame: {self.frame}, panda object dist {panda_object_dist}") 200 | if increase_hand_object_frame: 201 | self.hand.pre_step(self.disable_rendering) 202 | self.objects.pre_step() 203 | # self.status_checker.pre_step() 204 | # self.third_person_camera.pre_step() 205 | self.bullet_client.stepSimulation() 206 | # post step 207 | # self.table.post_step() 208 | self.panda.post_step() 209 | # self.hand.post_step() 210 | self.objects.post_step() 211 | status, release = self.status_checker.post_step(self.table, self.panda, self.hand, self.objects, self.frame) 212 | self.third_person_camera.post_step() 213 | if release: 214 | self.objects.release() 215 | reward = float(status == EpisodeStatus.SUCCESS) 216 | done = status != 0 217 | if self.cfg.verbose and status != 0: print(f"frame {self.frame}, status {status}, reward {reward}, done {done}") 218 | info["status"] = status 219 | return reward, done, info 220 | 221 | def joint_step(self, panda_dof_target_position, repeat, increase_frame=True): 222 | " panda_dof_target_position: (9, ) " 223 | if self.cfg.verbose: 224 | print(f"in joint_step, frame={self.frame}", end=" ") 225 | for position in panda_dof_target_position: 226 | print(position, end=" ") 227 | print("") 228 | if self.cfg.show_trajectory: 229 | with self.disable_rendering(): 230 | self.load_sphere(self.objects.target_object.get_world_to_obj()[:3, 3], color=(1., 0.75, 0., 1.), scale=0.1) 231 | self.load_sphere(self.panda.get_tip_pos(), color=(1., 0., 0., 1.), scale=0.1) 232 | self.load_grasp(self.panda.get_world_to_ee(), color=(1., 0., 0., 1.)) 233 | for _ in range(repeat): 234 | reward, done, info = self.sim_step(panda_dof_target_position, increase_frame) 235 | if done: break 236 | return reward, done, info 237 | 238 | def ego_cartesian_step(self, action, repeat, increase_frame=True): 239 | " action: (7,) pos+euler+width " 240 | if self.cfg.verbose: 241 | print(f"in ego_cartesian_step, frame={self.frame}", end=" ") 242 | for action_i in action: 243 | print(action_i, end=" ") 244 | print("") 245 | panda_dof_target_position = self.panda.ego_cartesian_action_to_dof_target_position(pos=action[:3], orn=action[3:6], width=action[6], orn_type="euler") 246 | reward, done, info = self.joint_step(panda_dof_target_position, repeat, increase_frame) 247 | return reward, done, info 248 | 249 | def world_pos_step(self, action, repeat): 250 | " action: (4,) pos+width " 251 | panda_dof_target_position = self.panda.world_pos_action_to_dof_target_position(pos=action[:3], width=action[3]) 252 | reward, done, info = self.joint_step(panda_dof_target_position, repeat) 253 | return reward, done, info 254 | -------------------------------------------------------------------------------- /env/objects.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from numpy.typing import NDArray 4 | from pybullet_utils.bullet_client import BulletClient 5 | from dataclasses import dataclass 6 | from omegaconf import MISSING 7 | from xml.etree.ElementTree import parse, ElementTree 8 | import copy 9 | from typing import Tuple, List, Optional 10 | import open3d as o3d 11 | import code 12 | 13 | from .body import Body, BodyConfig 14 | from .utils.transform import pos_ros_quat_to_mat, se3_transform_pc 15 | 16 | @dataclass 17 | class ObjectConfig(BodyConfig): 18 | name: str = "" 19 | urdf_file: str = "" 20 | # collision 21 | collision_mask: Optional[int] = None 22 | # base 23 | use_fixed_base: bool = True 24 | base_position: Tuple[float] = (0., 0., 0.) 25 | base_orientation: Tuple[float] = (0., 0., 0., 1.) 26 | # links 27 | num_dofs: int = 6 28 | dof_position: Optional[Tuple[float]] = None 29 | dof_velocity: Tuple[float] = (0.0,)*9 30 | 31 | translation_max_force: Tuple[float] = (50.0,)*3 32 | translation_position_gain: Tuple[float] = (0.2,)*3 33 | translation_velocity_gain: Tuple[float] = (1.0,)*3 34 | rotation_max_force: Tuple[float] = (5.0,)*3 35 | rotation_position_gain: Tuple[float] = (0.2,)*3 36 | rotation_velocity_gain: Tuple[float] = (1.0,)*3 37 | 38 | dof_max_force: Tuple[float] = "${concat_tuples:${.translation_max_force},${.rotation_max_force}}" 39 | dof_position_gain: Tuple[float] = "${concat_tuples:${.translation_position_gain},${.rotation_position_gain}}" 40 | dof_velocity_gain: Tuple[float] = "${concat_tuples:${.translation_velocity_gain},${.rotation_velocity_gain}}" 41 | # concat_tuples is defined in body.py 42 | # the "." is necessary in nested configs, see https://github.com/omry/omegaconf/issues/1099#issuecomment-1624496194 43 | 44 | compute_target_real_displacement: bool = False 45 | 46 | max_target_real_displacement = 0. 47 | 48 | class Object(Body): 49 | def __init__(self, bullet_client: BulletClient, cfg: ObjectConfig): 50 | super().__init__(bullet_client, cfg) 51 | self.cfg: ObjectConfig 52 | 53 | def reset(self, name: str, path: str, pose: NDArray[np.float32], collision_mask: int): 54 | self.base_reset() 55 | # config 56 | self.cfg.name = name 57 | self.cfg.urdf_file = path 58 | self.cfg.collision_mask = collision_mask 59 | 60 | # process pose data 61 | self.pose = pose # (num_frames, 6) 62 | self.num_frames = self.pose.shape[0] 63 | self.frame = 0 64 | 65 | self.cfg.dof_position = self.pose[self.frame].tolist() # can not use tuple(self.pose[self.frame]), which keeps the type np.float32 for scalars 66 | self.load() 67 | self.set_dof_target(self.pose[self.frame]) 68 | 69 | self.object_pc: Optional[NDArray[np.float64]] = None 70 | 71 | def pre_step(self): 72 | self.frame += 1 73 | self.frame = min(self.frame, self.num_frames-1) 74 | self.set_dof_target(self.pose[self.frame]) 75 | 76 | def post_step(self): 77 | if self.cfg.compute_target_real_displacement: 78 | global max_target_real_displacement 79 | max_target_real_displacement = max(max_target_real_displacement, np.abs(self.pose[self.frame][:3]-self.get_joint_positions()[:3]).max()) 80 | print(f"max_target_real_displacement={max_target_real_displacement}") 81 | 82 | def get_world_to_obj(self) -> NDArray[np.float64]: 83 | world_to_obj = self.get_link_pose(5) 84 | return world_to_obj 85 | 86 | def get_world_to_object_pc(self) -> NDArray[np.float64]: 87 | if self.object_pc is None: 88 | tree: ElementTree = parse(self.cfg.urdf_file) 89 | root = tree.getroot() 90 | collision_file_name: str = root.findall("link")[-1].find("collision").find("geometry").find("mesh").get("filename") 91 | collision_file_path = os.path.join(os.path.dirname(self.cfg.urdf_file), collision_file_name) 92 | object_mesh = o3d.io.read_triangle_mesh(collision_file_path) 93 | self.object_pc = np.array(object_mesh.vertices) 94 | world_to_object = self.get_world_to_obj() 95 | world_to_object_pc = se3_transform_pc(world_to_object, self.object_pc) 96 | return world_to_object_pc 97 | 98 | @dataclass 99 | class ObjectsConfig: 100 | collision_masks: Tuple[int] = (2**2, 2**3, 2**4, 2**5, 2**6, 2**7) # allow up to 6 objects 101 | collision_mask_release: int = -1-2**1 102 | 103 | class Objects: 104 | def __init__(self, bullet_client: BulletClient, cfg_objects: ObjectsConfig, cfg_object: ObjectConfig): 105 | self.bullet_client = bullet_client 106 | self.cfg_objects = cfg_objects 107 | self.cfg_object = cfg_object 108 | self.objects: List[Object] = [] 109 | 110 | def reset(self, names: List[str], paths: List[str], grasp_id: int, pose: NDArray[np.float32]): 111 | for obj in self.objects: 112 | del obj 113 | self.objects: List[Object] = [] 114 | self.grasp_id = grasp_id 115 | for idx, (name, path) in enumerate(zip(names, paths)): 116 | new_object = Object(self.bullet_client, copy.deepcopy(self.cfg_object)) 117 | new_object.reset(name, path, pose[idx], self.cfg_objects.collision_masks[idx]) 118 | self.objects.append(new_object) 119 | if idx == grasp_id: 120 | self.target_object = new_object 121 | self.released = False 122 | 123 | def pre_step(self): 124 | for obj in self.objects: 125 | obj.pre_step() 126 | 127 | def post_step(self): 128 | for obj in self.objects: 129 | obj.post_step() 130 | 131 | def release(self): 132 | self.target_object.set_collision_mask(self.cfg_objects.collision_mask_release) 133 | self.target_object.cfg.dof_max_force = (0.,)*6 134 | self.target_object.set_dof_target(self.target_object.pose[self.target_object.frame]) # refresh the controller setting 135 | self.released = True 136 | 137 | def debug(): 138 | from omegaconf import OmegaConf 139 | import pybullet 140 | import time 141 | from env.utils import load_scene_data 142 | objects_cfg = OmegaConf.structured(ObjectsConfig) 143 | object_cfg = OmegaConf.structured(ObjectConfig) 144 | bullet_client = BulletClient(connection_mode=pybullet.GUI) # or pybullet.DIRECT 145 | objects = Objects(bullet_client, objects_cfg, object_cfg) 146 | scene_data = load_scene_data(0) 147 | objects.reset(scene_data["object_names"], scene_data["object_paths"], scene_data["object_grasp_id"], scene_data["object_poses"]) 148 | code.interact(local=dict(globals(), **locals())) 149 | while True: 150 | print(f"frame {objects.target_object.frame}") 151 | objects.pre_step() 152 | bullet_client.stepSimulation() 153 | objects.post_step() 154 | # time.sleep(0.01) 155 | 156 | if __name__ == "__main__": 157 | debug() 158 | 159 | """ 160 | DISPLAY="localhost:11.0" python -m env.objects 161 | 162 | CUDA_VISIBLE_DEVICES=0,1,2,3 RAY_DEDUP_LOGS=0 python -m evaluate num_runners=32 env.panda.IK_solver=PyKDL setup=s0 split=train policy=chomp chomp.wait_time=13 env.object.compute_target_real_displacement=True 163 | """ -------------------------------------------------------------------------------- /env/panda.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from numpy.typing import NDArray 4 | from typing import Tuple, Optional, List 5 | from dataclasses import dataclass, field 6 | from omegaconf import MISSING 7 | from pybullet_utils.bullet_client import BulletClient 8 | from scipy.spatial.transform import Rotation as Rt 9 | import ipdb 10 | import code 11 | 12 | from .body import Body, BodyConfig 13 | from .camera import Camera, CameraConfig 14 | from .utils.transform import pos_ros_quat_to_mat, pos_euler_to_mat, mat_to_pos_ros_quat, se3_inverse, se3_transform_pc 15 | from .utils.robot_kinematics import RobotKinematics, RobotKinematicsConfig 16 | 17 | @dataclass 18 | class PandaConfig(BodyConfig): 19 | name: str = "panda" 20 | urdf_file: str = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "assets", "franka_panda", "panda_gripper_hand_camera.urdf") 21 | # collision 22 | use_self_collision: bool = True 23 | collision_mask: int = -1 24 | # base 25 | use_fixed_base: bool = True 26 | base_position: Tuple[float] = (0.61, -0.50, 0.875) 27 | base_orientation: Tuple[float] = (0.0, 0.0, 0.7071068, 0.7071068) 28 | # links 29 | num_dofs: int = 9 30 | dof_default_position: Tuple[float] = (0.0, -1.285, 0, -2.356, 0.0, 1.571, 0.785, 0.04, 0.04) 31 | dof_position: Optional[Tuple[float]] = None 32 | dof_velocity: Tuple[float] = (0.0,)*9 33 | dof_max_force: Tuple[float] = (250.0,)*9 34 | dof_position_gain: Tuple[float] = (0.01,)*9 35 | dof_velocity_gain: Tuple[float] = (1.0,)*9 36 | 37 | # IK 38 | IK_solver: str = "PyKDL" # "pybullet" 39 | IK_solver_max_iter: int = 100 40 | IK_solver_eps: float = 1e-6 41 | # camera 42 | step_time: float = MISSING 43 | camera: CameraConfig = field(default_factory=lambda: CameraConfig(width=224, height=224, fov=90., near=0.035, far=2.0, step_time="${..step_time}")) # the format of nested structured configs must be like this https://omegaconf.readthedocs.io/en/2.3_branch/structured_config.html#nesting-structured-configs 44 | robot_kinematics: RobotKinematicsConfig = field(default_factory=lambda: RobotKinematicsConfig( 45 | urdf_file="${..urdf_file}", 46 | IK_solver_max_iter="${..IK_solver_max_iter}", 47 | IK_solver_eps="${..IK_solver_eps}", 48 | chain_tip="panda_hand", 49 | )) 50 | 51 | class Panda(Body): 52 | def __init__(self, bullet_client: BulletClient, cfg: PandaConfig): 53 | super().__init__(bullet_client, cfg) 54 | self.cfg: PandaConfig 55 | self.ee_link_id = 7 56 | self.fingers_link_id = (8, 9) 57 | self.camera_link_id = 10 58 | self.world_to_base = pos_ros_quat_to_mat(cfg.base_position, cfg.base_orientation) 59 | self.base_to_world = se3_inverse(self.world_to_base) 60 | 61 | self.camera = Camera(bullet_client, cfg.camera) 62 | self.hand_to_camera = np.eye(4) 63 | self.hand_to_camera[:3, 3] = (0.036, 0.0, 0.036) 64 | self.hand_to_camera[:3, :3] = Rt.from_euler("XYZ", (0.0, 0.0, np.pi/2)).as_matrix() 65 | 66 | if cfg.IK_solver == "PyKDL": 67 | self.robot_kinematics = RobotKinematics(cfg.robot_kinematics) 68 | 69 | def reset(self, joint_state: Optional[Tuple[float]]=None): 70 | self.base_reset() 71 | if joint_state is not None: 72 | self.cfg.dof_position = joint_state 73 | else: 74 | self.cfg.dof_position = self.cfg.dof_default_position 75 | self.load() 76 | self.set_dof_target(self.cfg.dof_position) 77 | camera_link_state = self.bullet_client.getLinkState(self.body_id, self.camera_link_id, computeForwardKinematics=1) 78 | self.camera.update_pose(camera_link_state[4], camera_link_state[5]) 79 | 80 | def pre_step(self, dof_target_position): 81 | self.set_dof_target(dof_target_position) 82 | self.camera.pre_step() 83 | 84 | def post_step(self): 85 | camera_link_state = self.bullet_client.getLinkState(self.body_id, self.camera_link_id, computeForwardKinematics=1) 86 | self.camera.update_pose(camera_link_state[4], camera_link_state[5]) 87 | self.camera.post_step() 88 | 89 | def get_world_to_ee(self): 90 | world_to_ee = self.get_link_pose(self.ee_link_id) 91 | return world_to_ee 92 | 93 | def get_tip_pos(self): 94 | world_to_ee = self.get_world_to_ee() 95 | tip_pos = world_to_ee[:3, 3]+0.108*world_to_ee[:3, 2] 96 | return tip_pos 97 | 98 | def ego_cartesian_action_to_dof_target_position(self, pos: NDArray[np.float64], orn: NDArray[np.float64], width: float, orn_type="euler") -> NDArray[np.float64]: 99 | world_to_ee = self.get_world_to_ee() 100 | if orn_type == "euler": 101 | ee_to_new_ee = pos_euler_to_mat(pos, orn) 102 | else: 103 | raise NotImplementedError 104 | world_to_new_ee = world_to_ee@ee_to_new_ee 105 | 106 | if self.cfg.IK_solver == "PyKDL": 107 | base_to_new_ee = self.base_to_world@world_to_new_ee 108 | joint_positions = self.get_joint_positions() 109 | dof_target_position, info = self.robot_kinematics.cartesian_to_joint(base_to_new_ee, seed=joint_positions[:7]) 110 | if self.cfg.verbose and info<0: print(f"PyKDL IK error: {info}") 111 | dof_target_position = np.append(dof_target_position, [width, width]) 112 | elif self.cfg.IK_solver == "pybullet": 113 | world_to_new_ee_pos, world_to_new_ee_ros_quat = mat_to_pos_ros_quat(world_to_new_ee) 114 | dof_target_position = self.bullet_client.calculateInverseKinematics(self.body_id, self.ee_link_id, world_to_new_ee_pos, world_to_new_ee_ros_quat, maxNumIterations=self.cfg.IK_solver_max_iter, residualThreshold=self.cfg.IK_solver_eps) 115 | dof_target_position = np.array(dof_target_position) 116 | dof_target_position[7:9] = width 117 | else: 118 | raise NotImplementedError 119 | return dof_target_position 120 | 121 | def world_pos_action_to_dof_target_position(self, pos, width): 122 | world_to_ee = self.get_world_to_ee() 123 | world_to_new_ee_pos = world_to_ee[:3, 3]+pos 124 | # if self.cfg.IK_solver == "PyKDL": 125 | # base_to_new_ee_pos = se3_transform_pc(self.base_to_world, world_to_new_ee_pos) 126 | # joint_positions = self.get_joint_positions() 127 | # dof_target_position, info = self.robot_kinematics.inverse_kinematics(position=base_to_new_ee_pos, seed=joint_positions[:7]) 128 | # if self.root_cfg.env.verbose and info<0: print(f"PyKDL IK error: {info}") 129 | # dof_target_position = np.append(dof_target_position, [width, width]) 130 | # elif self.cfg.IK_solver == "pybullet": 131 | dof_target_position = self.bullet_client.calculateInverseKinematics(self.body_id, self.ee_link_id, world_to_new_ee_pos) 132 | dof_target_position = np.array(dof_target_position) 133 | dof_target_position[7:9] = width 134 | return dof_target_position 135 | 136 | def get_visual_observation(self, segmentation_ids: List[int]=[]): 137 | camera_link_state = self.bullet_client.getLinkState(self.body_id, self.camera_link_id, computeForwardKinematics=1) 138 | self.camera.update_pose(camera_link_state[4], camera_link_state[5]) 139 | color, depth, segmentation, points = self.camera.render(segmentation_ids) 140 | for i in range(len(points)): 141 | points[i] = se3_transform_pc(self.hand_to_camera, points[i]) 142 | return color, depth, segmentation, points 143 | 144 | def debug(): 145 | from omegaconf import OmegaConf 146 | import pybullet 147 | import copy 148 | import time 149 | default_cfg = OmegaConf.structured(PandaConfig) 150 | cli_cfg = OmegaConf.from_cli() 151 | cfg: PandaConfig = OmegaConf.to_object(OmegaConf.merge(default_cfg, cli_cfg)) 152 | bullet_client = BulletClient(connection_mode=pybullet.GUI) # or pybullet.DIRECT 153 | bullet_client.resetDebugVisualizerCamera(cameraDistance=2.4, cameraPitch=-58, cameraYaw=102, cameraTargetPosition=[0, 0, 0]) 154 | panda = Panda(bullet_client, cfg) 155 | panda.reset() 156 | dof_default_position = np.array(cfg.dof_default_position) 157 | 158 | def move_to(dof_target_position: NDArray[np.float64], steps=1) -> None: 159 | dof_current_position = panda.get_joint_positions() 160 | for i in range(steps): 161 | dof_target_position_i = (dof_target_position-dof_current_position)/steps*(i+1)+dof_current_position 162 | for _ in range(130): 163 | panda.pre_step(dof_target_position_i) 164 | bullet_client.stepSimulation() 165 | panda.post_step() 166 | # time.sleep(0.003) 167 | panda.get_visual_observation() 168 | 169 | # for i in range(len(dof_default_position)-2): # arm links 170 | # print(f"moving link {i}") 171 | # dof_target_position = dof_default_position.copy() 172 | # dof_target_position[i] += np.pi/2 173 | # move_to(dof_target_position, 10) 174 | # move_to(dof_default_position, 10) 175 | 176 | # dof_target_position = dof_default_position.copy() 177 | # dof_target_position[7:] = 0. 178 | # move_to(dof_target_position) # gripper 179 | # move_to(dof_default_position) 180 | 181 | for i in range(6): # cartesian action 182 | pos, orn = np.array([0., 0., 0.]), np.array([0., 0., 0.]) 183 | if i<3: 184 | pos[i] = 0.1 185 | else: 186 | orn[i-3] = np.pi/2 187 | dof_target_position = panda.ego_cartesian_action_to_dof_target_position(pos, orn, 0.04) 188 | move_to(dof_target_position, 10) # forward 189 | move_to(dof_default_position, 10) # backward 190 | code.interact(local=dict(globals(), **locals())) 191 | 192 | if __name__ == "__main__": 193 | debug() 194 | 195 | """ 196 | DISPLAY="localhost:11.0" python -m env.panda 197 | DISPLAY="localhost:11.0" python -m env.panda IK_solver=pybullet 198 | """ -------------------------------------------------------------------------------- /env/status_checker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pybullet_utils.bullet_client import BulletClient 3 | from dataclasses import dataclass 4 | from omegaconf import MISSING 5 | from typing import Tuple 6 | import code 7 | 8 | from .table import Table 9 | from .panda import Panda 10 | from .hand import Hand 11 | from .objects import Object, Objects 12 | from .utils.transform import se3_transform_pc, pos_ros_quat_to_mat, se3_inverse 13 | 14 | @dataclass 15 | class StatusCheckerConfig: 16 | table_height: float = MISSING # "${..table_height}" 17 | max_frames: int = MISSING # "${..max_frames}" 18 | # release object check 19 | release_force_thresh: float = 0.0 20 | release_time_thresh: float = 0.1 21 | release_step_thresh: int = "${divide_to_int:${.release_time_thresh},${..step_time}}" # divide_to_int is defined in handover_env.py 22 | release_contact_region_range_x: Tuple[float] = (-0.0110, +0.0110) 23 | release_contact_region_range_y: Tuple[float] = (-0.0090, +0.0090) 24 | release_contact_region_range_z: Tuple[float] = (+0.0000, +0.0550) 25 | release_displacement_thresh: float = 0.03 26 | # draw_release_contact = False 27 | # release_contact_region_color = (0.85, 0.19, 0.21, 0.5) 28 | # release_contact_vertex_radius = 0.001 29 | # release_contact_vertex_color = (0.85, 0.19, 0.21, 1.0) 30 | 31 | # collision check for two failure cases: human hand collision and drop 32 | contact_force_thresh: float = 0.0 33 | 34 | # success check 35 | goal_center: Tuple[float] = (0.61, -0.20, 1.25) 36 | goal_radius: float = 0.15 37 | # draw_goal = False 38 | goal_color: Tuple[float] = (0.85, 0.19, 0.21, 0.5) 39 | success_time_thresh: float = 0.1 40 | success_step_thresh: int = "${divide_to_int:${.success_time_thresh},${..step_time}}" # divide_to_int is defined in handover_env.py 41 | 42 | verbose: bool = False 43 | 44 | class EpisodeStatus: 45 | SUCCESS = 1 46 | FAILURE_HUMAN_CONTACT = 2 47 | FAILURE_OBJECT_DROP = 4 48 | FAILURE_TIMEOUT = 8 49 | 50 | class StatusChecker: 51 | def __init__(self, bullet_client: BulletClient, cfg: StatusCheckerConfig): 52 | self.bullet_client = bullet_client 53 | self.cfg = cfg 54 | 55 | self.DTYPE = np.dtype( 56 | { 57 | "names": [ 58 | "body_id_a", 59 | "body_id_b", 60 | "link_id_a", 61 | "link_id_b", 62 | "position_a_world", 63 | "position_b_world", 64 | "position_a_link", 65 | "position_b_link", 66 | "normal", 67 | "force", 68 | ], 69 | "formats": [ 70 | np.int32, 71 | np.int32, 72 | np.int32, 73 | np.int32, 74 | [("x", np.float32), ("y", np.float32), ("z", np.float32)], 75 | [("x", np.float32), ("y", np.float32), ("z", np.float32)], 76 | [("x", np.float32), ("y", np.float32), ("z", np.float32)], 77 | [("x", np.float32), ("y", np.float32), ("z", np.float32)], 78 | [("x", np.float32), ("y", np.float32), ("z", np.float32)], 79 | np.float32, 80 | ], 81 | "offsets": [0, 4, 8, 12, 16, 28, 40, 52, 64, 76], 82 | "itemsize": 80, 83 | } 84 | ) 85 | 86 | def reset(self): 87 | self.release_step_counter_passive = 0 88 | self.release_step_counter_active = 0 89 | self.dropped = False 90 | self.success_step_counter = 0 91 | 92 | def get_contact(self): 93 | contact_points = self.bullet_client.getContactPoints() 94 | self.contact = np.empty(len(contact_points), dtype=self.DTYPE) 95 | if len(contact_points) == 0: 96 | return 97 | body_id_a, body_id_b, link_id_a, link_id_b, position_a_world, position_b_world, position_a_link, position_b_link, normal, force = [], [], [], [], [], [], [], [], [], [] 98 | for contactFlag, bodyUniqueIdA, bodyUniqueIdB, linkIndexA, linkIndexB, positionOnA, positionOnB, contactNormalOnB, contactDistance, normalForce, lateralFriction1, lateralFrictionDir1, lateralFriction2, lateralFrictionDir2 in contact_points: 99 | body_id_a.append(bodyUniqueIdA) # if bodyUniqueIdA != self.ground_id else -1) 100 | body_id_b.append(bodyUniqueIdB) # if bodyUniqueIdB != self.ground_id else -1) 101 | link_id_a.append(linkIndexA) 102 | link_id_b.append(linkIndexB) 103 | position_a_world.append(positionOnA) 104 | position_b_world.append(positionOnB) 105 | position_a_link.append(np.nan) 106 | position_b_link.append(np.nan) 107 | normal.append(contactNormalOnB) 108 | force.append(normalForce) 109 | # code.interact(local=dict(globals(), **locals())) 110 | self.contact["body_id_a"] = body_id_a 111 | self.contact["body_id_b"] = body_id_b 112 | self.contact["link_id_a"] = link_id_a 113 | self.contact["link_id_b"] = link_id_b 114 | self.contact["position_a_world"] = position_a_world 115 | self.contact["position_b_world"] = position_b_world 116 | self.contact["position_a_link"] = position_a_link 117 | self.contact["position_b_link"] = position_b_link 118 | self.contact["normal"] = normal 119 | self.contact["force"] = force 120 | 121 | def check_object_release(self, target_object: Object, panda: Panda, frame: int): 122 | object_id, panda_id = target_object.body_id, panda.body_id # if hand is not loaded, hand.body_id = None 123 | contact = self.contact[self.contact["force"] > self.cfg.release_force_thresh].copy() 124 | 125 | if len(contact) == 0: 126 | contact_panda_release_region = [False] * len(panda.fingers_link_id) 127 | contact_panda_body = False 128 | else: 129 | contact_1 = contact[(contact["body_id_a"] == object_id) & (contact["body_id_b"] == panda_id)] 130 | contact_2 = contact[(contact["body_id_a"] == panda_id) & (contact["body_id_b"] == object_id)] 131 | contact_2[["body_id_a", "body_id_b"]] = contact_2[["body_id_b", "body_id_a"]] 132 | contact_2[["link_id_a", "link_id_b"]] = contact_2[["link_id_b", "link_id_a"]] 133 | contact_2[["position_a_world", "position_b_world"]] = contact_2[["position_b_world", "position_a_world"]] 134 | contact_2[["position_a_link", "position_b_link"]] = contact_2[["position_b_link", "position_a_link"]] 135 | contact_2["normal"]["x"] *= -1 136 | contact_2["normal"]["y"] *= -1 137 | contact_2["normal"]["z"] *= -1 138 | contact = np.concatenate((contact_1, contact_2)) 139 | 140 | contact_panda_body = len(contact) > 0 141 | contact_panda_release_region = [] 142 | 143 | for link_id in panda.fingers_link_id: 144 | contact_link = contact[contact["link_id_b"] == link_id] 145 | if len(contact_link) == 0: 146 | contact_panda_release_region.append(False) 147 | continue 148 | if np.any(np.isnan(contact_link["position_b_link"]["x"])): 149 | pos, orn = panda.get_link_pos_orn(link_id) 150 | world_to_link = pos_ros_quat_to_mat(pos, orn) 151 | link_to_world = se3_inverse(world_to_link) 152 | position = np.ascontiguousarray(contact_link["position_b_world"]).view(np.float32).reshape(-1, 3) 153 | position = se3_transform_pc(link_to_world, position) 154 | else: 155 | position = ( 156 | np.ascontiguousarray(contact_link["position_b_link"]) 157 | .view(np.float32) 158 | .reshape(-1, 3) 159 | ) 160 | 161 | is_in_release_region = ( 162 | (position[:, 0] > self.cfg.release_contact_region_range_x[0]) 163 | & (position[:, 0] < self.cfg.release_contact_region_range_x[1]) 164 | & (position[:, 1] > self.cfg.release_contact_region_range_y[0]) 165 | & (position[:, 1] < self.cfg.release_contact_region_range_y[1]) 166 | & (position[:, 2] > self.cfg.release_contact_region_range_z[0]) 167 | & (position[:, 2] < self.cfg.release_contact_region_range_z[1]) 168 | ) 169 | contact_panda_release_region.append(np.any(is_in_release_region)) 170 | 171 | if not any(contact_panda_release_region) and contact_panda_body: 172 | self.release_step_counter_passive += 1 173 | else: 174 | self.release_step_counter_passive = 0 175 | 176 | if all(contact_panda_release_region): 177 | self.release_step_counter_active += 1 178 | else: 179 | self.release_step_counter_active = 0 180 | if self.cfg.verbose and self.release_step_counter_passive+self.release_step_counter_active > 0: 181 | print(f"frame: {frame}, release step counter passive: {self.release_step_counter_passive}, release step counter active: {self.release_step_counter_active}") 182 | 183 | target_real_displacement = np.abs(target_object.pose[target_object.frame, :3]-target_object.get_joint_positions()[:3]).max() 184 | 185 | return self.release_step_counter_passive >= self.cfg.release_step_thresh or self.release_step_counter_active >= self.cfg.release_step_thresh or target_real_displacement >= self.cfg.release_displacement_thresh 186 | 187 | def check_panda_hand_collision(self, panda: Panda, hand: Hand): 188 | panda_id, hand_id = panda.body_id, hand.body_id # if hand is not loaded, hand.body_id = None 189 | contact_1 = self.contact[(self.contact["body_id_a"] == hand_id) & (self.contact["body_id_b"] == panda_id)] 190 | contact_2 = self.contact[(self.contact["body_id_a"] == panda_id) & (self.contact["body_id_b"] == hand_id)] 191 | contact = np.concatenate((contact_1, contact_2)) 192 | if contact.shape[0] > 0 and contact["force"].max() > self.cfg.contact_force_thresh: 193 | if self.cfg.verbose: print("detect pand hand collision") 194 | return True 195 | else: 196 | return False 197 | 198 | def check_object_dropped(self, objects: Objects, panda: Panda, table: Table) -> Tuple[bool, bool]: 199 | if self.dropped: 200 | return True, False 201 | target_object_id = objects.target_object.body_id 202 | contact_1 = self.contact[self.contact["body_id_a"] == target_object_id].copy() 203 | contact_2 = self.contact[self.contact["body_id_b"] == target_object_id].copy() 204 | contact_2[["body_id_a", "body_id_b"]] = contact_2[["body_id_b", "body_id_a"]] 205 | contact_2[["link_id_a", "link_id_b"]] = contact_2[["link_id_b", "link_id_a"]] 206 | contact_2[["position_a_world", "position_b_world"]] = contact_2[["position_b_world", "position_a_world"]] 207 | contact_2[["position_a_link", "position_b_link"]] = contact_2[["position_b_link", "position_a_link"]] 208 | contact_2["normal"]["x"] *= -1 209 | contact_2["normal"]["y"] *= -1 210 | contact_2["normal"]["z"] *= -1 211 | contact = np.concatenate((contact_1, contact_2)) 212 | 213 | contact_panda = contact[contact["body_id_b"] == panda.body_id] 214 | contact_table = contact[contact["body_id_b"] == table.body_id] 215 | contact_other_objects = [] 216 | for i in range(len(objects.objects)): 217 | if i != objects.grasp_id: 218 | contact_other_objects.append(contact[contact["body_id_b"] == objects.objects[i].body_id]) 219 | 220 | panda_link_ind = contact_panda["link_id_b"][contact_panda["force"] > self.cfg.contact_force_thresh] 221 | contact_panda_fingers = set(panda.fingers_link_id).issubset(panda_link_ind) 222 | contact_table = np.any(contact_table["force"] > self.cfg.contact_force_thresh) 223 | if len(contact_other_objects) > 0: 224 | contact_other_objects = np.concatenate(contact_other_objects) 225 | contact_other_objects = np.any(contact_other_objects["force"] > self.cfg.contact_force_thresh) 226 | else: 227 | contact_other_objects = False 228 | 229 | target_object_pos = objects.target_object.get_link_pos(5) 230 | is_below_table = target_object_pos[2] < self.cfg.table_height 231 | 232 | if not contact_panda_fingers and (contact_table or contact_other_objects or is_below_table): 233 | if self.cfg.verbose: 234 | print("detect contact panda fingers") 235 | if contact_table: print("object drop because contact table") 236 | if contact_other_objects: print("object drop because contact other objects") 237 | if is_below_table: print("object drop because is below table") 238 | self.dropped = True 239 | return self.dropped, contact_panda_fingers 240 | 241 | def check_success(self, panda: Panda, contact_panda_fingers: bool): 242 | if not contact_panda_fingers: 243 | self.success_step_counter = 0 244 | return 0 245 | pos = panda.get_link_pos(panda.ee_link_id) 246 | dist = np.linalg.norm(np.array(self.cfg.goal_center)-np.array(pos)) 247 | is_within_goal = dist < self.cfg.goal_radius 248 | if not is_within_goal: 249 | self.success_step_counter = 0 250 | return 0 251 | self.success_step_counter += 1 252 | if self.success_step_counter >= self.cfg.success_step_thresh: 253 | if self.cfg.verbose: print("detect success") 254 | return EpisodeStatus.SUCCESS 255 | else: 256 | return 0 257 | 258 | def pre_step(self): 259 | pass 260 | 261 | def post_step(self, table: Table, panda: Panda, hand: Hand, objects: Objects, frame: int) -> Tuple[int, bool]: 262 | self.get_contact() 263 | release = not objects.released and self.check_object_release(objects.target_object, panda, frame) 264 | status = 0 265 | if self.check_panda_hand_collision(panda, hand): 266 | status |= EpisodeStatus.FAILURE_HUMAN_CONTACT 267 | dropped, contact_panda_fingers = self.check_object_dropped(objects, panda, table) 268 | if dropped: 269 | status |= EpisodeStatus.FAILURE_OBJECT_DROP 270 | if status != 0: 271 | return status, release 272 | if self.check_success(panda, contact_panda_fingers): 273 | status |= EpisodeStatus.SUCCESS 274 | if frame >= self.cfg.max_frames and status != EpisodeStatus.SUCCESS: 275 | status |= EpisodeStatus.FAILURE_TIMEOUT 276 | return status, release 277 | -------------------------------------------------------------------------------- /env/table.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Tuple 3 | from dataclasses import dataclass 4 | import pybullet 5 | from pybullet_utils.bullet_client import BulletClient 6 | import code 7 | 8 | from env.body import Body, BodyConfig 9 | from env.utils.transform import pos_ros_quat_to_mat 10 | 11 | @dataclass 12 | class TableConfig(BodyConfig): 13 | name: str = "table" 14 | urdf_file: str = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "assets", "table", "table.urdf") 15 | # collision 16 | collision_mask: int = 2**0 17 | # base 18 | use_fixed_base: bool = True 19 | base_position: Tuple[float] = (0.61, 0.28, 0.0) 20 | base_orientation: Tuple[float] = (0., 0., 0., 1.) 21 | # visual 22 | link_color: Tuple[float] = (1., 1., 1., 1.) 23 | 24 | class Table(Body): 25 | def __init__(self, bullet_client: BulletClient, cfg: TableConfig): 26 | super().__init__(bullet_client, cfg) 27 | self.cfg: TableConfig 28 | self.world_to_table = pos_ros_quat_to_mat(cfg.base_position, cfg.base_orientation) 29 | 30 | def reset(self): 31 | self.base_reset() 32 | self.load() 33 | 34 | def pre_step(self): 35 | pass 36 | 37 | def post_step(self): 38 | pass 39 | 40 | def debug(): 41 | from omegaconf import OmegaConf 42 | table_cfg = OmegaConf.structured(TableConfig) 43 | bullet_client = BulletClient(connection_mode=pybullet.GUI) # or pybullet.DIRECT 44 | table = Table(bullet_client, table_cfg) 45 | table.reset() 46 | code.interact(local=dict(globals(), **locals())) 47 | 48 | if __name__ == "__main__": 49 | debug() 50 | 51 | """ 52 | DISPLAY="localhost:11.0" python -m env.table 53 | """ -------------------------------------------------------------------------------- /env/tools/process_dexycb.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from transforms3d.euler import mat2euler 3 | import json 4 | import os 5 | from tqdm import tqdm 6 | import code 7 | 8 | from ..utils.scene import scene_id_to_scene_dir 9 | from .sdf import gen_sdf 10 | 11 | OBJ_ID_TO_NAME = { 12 | 1: "002_master_chef_can", 13 | 2: "003_cracker_box", 14 | 3: "004_sugar_box", 15 | 4: "005_tomato_soup_can", 16 | 5: "006_mustard_bottle", 17 | 6: "007_tuna_fish_can", 18 | 7: "008_pudding_box", 19 | 8: "009_gelatin_box", 20 | 9: "010_potted_meat_can", 21 | 10: "011_banana", 22 | 11: "019_pitcher_base", 23 | 12: "021_bleach_cleanser", 24 | 13: "024_bowl", 25 | 14: "025_mug", 26 | 15: "035_power_drill", 27 | 16: "036_wood_block", 28 | 17: "037_scissors", 29 | 18: "040_large_marker", 30 | 20: "052_extra_large_clamp", 31 | 21: "061_foam_brick", 32 | } 33 | 34 | data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data") 35 | 36 | def rotZ(rotz): 37 | RotZ = np.array( 38 | [ 39 | [np.cos(rotz), -np.sin(rotz), 0, 0], 40 | [np.sin(rotz), np.cos(rotz), 0, 0], 41 | [0, 0, 1, 0], 42 | [0, 0, 0, 1], 43 | ] 44 | ) 45 | return RotZ 46 | 47 | def ycb_special_case(pose_grasp, name): 48 | if name == '037_scissors': # only accept top down for edge cases 49 | z_constraint = np.where((np.abs(pose_grasp[:, 2, 3]) > 0.09) * \ 50 | (np.abs(pose_grasp[:, 1, 3]) > 0.02) * (np.abs(pose_grasp[:, 0, 3]) < 0.05)) 51 | pose_grasp = pose_grasp[z_constraint[0]] 52 | top_down = [] 53 | 54 | for pose in pose_grasp: 55 | top_down.append(mat2euler(pose[:3, :3])) 56 | 57 | top_down = np.array(top_down)[:,1] 58 | rot_constraint = np.where(np.abs(top_down) > 0.06) 59 | pose_grasp = pose_grasp[rot_constraint[0]] 60 | 61 | elif name == '024_bowl' or name == '025_mug': 62 | if name == '024_bowl': 63 | angle = 30 64 | else: 65 | angle = 15 66 | top_down = [] 67 | for pose in pose_grasp: 68 | top_down.append(mat2euler(pose[:3, :3])) 69 | top_down = np.array(top_down)[:,1] 70 | rot_constraint = np.where(np.abs(top_down) > angle * np.pi / 180) 71 | pose_grasp = pose_grasp[rot_constraint[0]] 72 | return pose_grasp 73 | 74 | ycb_grasps_dir = os.path.join(data_dir, "tmp", "ycb_grasps") 75 | def process_object(object_name: str): 76 | object_dir = os.path.join(data_dir, "assets", "objects", "ycb", object_name) 77 | # grasps 78 | object_grasp_path_original = os.path.join(ycb_grasps_dir, object_name+".npy") 79 | object_grasp_path = os.path.join(object_dir, "grasps.npy") 80 | if os.path.exists(object_grasp_path): 81 | pass 82 | elif not os.path.exists(object_grasp_path_original): 83 | print(f"grasps not found for {object_name}") 84 | else: 85 | simulator_grasp = np.load( 86 | object_grasp_path_original, 87 | allow_pickle=True, 88 | fix_imports=True, 89 | encoding="bytes", 90 | ) 91 | pose_grasp = simulator_grasp.item()[b"transforms"] 92 | offset_pose = np.array(rotZ(np.pi / 2)) # and 93 | pose_grasp = np.matmul(pose_grasp, offset_pose) # flip x, y 94 | # print(f"load {pose_grasp.shape[0]} grasps") 95 | pose_grasp = ycb_special_case(pose_grasp, object_name) 96 | np.save(object_grasp_path, pose_grasp) 97 | # sdf 98 | object_sdf_path = os.path.join(object_dir, "sdf.npz") 99 | if not os.path.exists(object_sdf_path): 100 | gen_sdf(os.path.join(object_dir, "model_normalized_convex.obj")) 101 | 102 | dexycb_data_dir = os.path.join(data_dir, "tmp", "dex-ycb-cache") 103 | def process(scene_id: int): 104 | meta_path = os.path.join(dexycb_data_dir, f"meta_{scene_id:03d}.json") 105 | with open(meta_path, "r") as f: 106 | meta = json.load(f) 107 | pose_path = os.path.join(dexycb_data_dir, f"pose_{scene_id:03d}.npz") 108 | pose = np.load(pose_path) 109 | 110 | scene_dir = scene_id_to_scene_dir(scene_id) 111 | os.makedirs(scene_dir, exist_ok=True) 112 | scene_data_path = os.path.join(scene_dir, f"{scene_id:08d}.npz") 113 | 114 | hand_name = meta["name"].split("/")[0] 115 | hand_side = meta["mano_sides"][0] 116 | hand_path = os.path.join(f"{hand_name}_{hand_side}", "mano.urdf") 117 | object_names = [] 118 | object_paths = [] 119 | for object_id in meta["ycb_ids"]: 120 | object_name = OBJ_ID_TO_NAME[object_id] 121 | object_path = os.path.join("ycb", object_name, "model_normalized.urdf") 122 | object_names.append(object_name) 123 | object_paths.append(object_path) 124 | object_names = np.array(object_names) 125 | scene_data = { 126 | "hand_name": hand_name, 127 | "hand_side": hand_side, 128 | "hand_path": hand_path, 129 | "hand_pose": pose["pose_m"][:, 0], 130 | "object_names": object_names, 131 | "object_paths": object_paths, 132 | "object_grasp_id": meta["ycb_grasp_ind"], 133 | "object_poses": pose["pose_y"].transpose(1, 0, 2), 134 | "source": "dexycb", 135 | } 136 | np.savez(scene_data_path, **scene_data) 137 | 138 | def main(): 139 | for key, object_name in OBJ_ID_TO_NAME.items(): 140 | process_object(object_name) 141 | 142 | for i in tqdm(range(1000)): 143 | process(i) 144 | 145 | if __name__ == "__main__": 146 | main() 147 | -------------------------------------------------------------------------------- /env/tools/sdf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from numpy.typing import NDArray 4 | import open3d as o3d 5 | import code 6 | 7 | env_path = os.path.dirname(os.path.dirname(__file__)) 8 | sdfgen_path = os.path.join(env_path, "third_party", "SDFGen", "build", "bin", "SDFGen") 9 | 10 | def load_sdf(sdf_path: str): 11 | with open(sdf_path, "r") as f: 12 | nx, ny, nz = map(int, f.readline().split(" ")) 13 | x0, y0, z0 = map(float, f.readline().split(" ")) 14 | delta = float(f.readline().strip()) 15 | sdf = np.loadtxt(f).reshape((nz, ny, nx)).transpose((2, 1, 0)) 16 | return np.array([x0, y0, z0]), delta, sdf 17 | 18 | def gen_sdf(object_path: str, reg_size: float=0.2, dim: int=32, padding: int=20): 19 | " object should be convex decomposed before generating sdf " 20 | object_mesh: o3d.geometry.TriangleMesh = o3d.io.read_triangle_mesh(object_path) 21 | object_vertices: NDArray[np.float64] = np.array(object_mesh.vertices) 22 | min_coords, max_coords = object_vertices.min(axis=0), object_vertices.max(axis=0) 23 | extent: NDArray[np.float64] = max_coords-min_coords 24 | max_extent = extent.max() 25 | scale = max_extent/reg_size 26 | dim = np.clip(dim*scale, 32, 100) 27 | delta = max_extent/dim 28 | padding = min(int(padding*scale), 30) 29 | 30 | print(f"generating sdf for {object_path} with min_coords={min_coords}, max_coords={max_coords}, extent={extent}, delta={delta}, padding={padding}") 31 | sdfgen_cmd = f"{sdfgen_path} \"{object_path}\" {delta} {padding}" 32 | os.system(sdfgen_cmd) # bound box size: min=min_coords-padding*delta, max=max_coords+padding*delta 33 | 34 | sdf_path = object_path[:-4]+".sdf" 35 | sdf_min_coords, sdf_delta, sdf = load_sdf(sdf_path) 36 | print(f"sdf generation complete, sdf_min_coords={sdf_min_coords}, sdf_delta={sdf_delta}") 37 | np.savez(os.path.join(os.path.dirname(object_path), "sdf.npz"), min_coords=sdf_min_coords, delta=sdf_delta, sdf=sdf) 38 | 39 | def vis_sdf(sdf_path: str): 40 | from mayavi import mlab 41 | min_coords, delta, sdf = load_sdf(sdf_path) 42 | 43 | src = mlab.pipeline.scalar_field(sdf) 44 | mlab.pipeline.iso_surface(src, contours=[0, ], opacity=0.3) 45 | mlab.show() 46 | 47 | if __name__ == "__main__": 48 | # gen_sdf("tmp/debug_sdf/box/box.obj") 49 | # sdf_data = np.load("tmp/debug_sdf/box/sdf.npz") 50 | gen_sdf(os.path.join("env", "data", "assets", "table", "table.obj")) 51 | code.interact(local=dict(globals(), **locals())) 52 | # gen_sdf("tmp/debug_sdf/003_cracker_box/model_normalized_convex.obj") 53 | 54 | """ 55 | python -m env.tools.sdf 56 | """ -------------------------------------------------------------------------------- /env/utils/hand_collision_filter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from numpy.typing import NDArray 4 | import torch 5 | from typing import Dict, List, Optional 6 | from omegaconf import OmegaConf 7 | import open3d as o3d 8 | from dataclasses import dataclass 9 | import code 10 | 11 | from .robot_kinematics import RobotKinematics, RobotKinematicsConfig 12 | from .transform import se3_inverse 13 | from .sdf_loss import SDFData, SDFDataTensor, se3_transform_pc_tensor, compute_distance 14 | 15 | class HandKinematics: 16 | def __init__(self, hand_urdf_path: str): 17 | self.hand_dir = os.path.dirname(hand_urdf_path) 18 | kinematics_cfg: RobotKinematicsConfig = OmegaConf.to_object(OmegaConf.structured(RobotKinematicsConfig(urdf_file=hand_urdf_path, chain_tip=None))) 19 | self.kinematics = RobotKinematics(kinematics_cfg) 20 | 21 | self.link_points_list: List[Optional[NDArray[np.float64]]] = [] 22 | for link in self.kinematics.links: 23 | if link.collision_file_path is None: 24 | self.link_points_list.append(None) 25 | continue 26 | link_collision_file_path = os.path.join(self.hand_dir, link.collision_file_path) 27 | link_mesh = o3d.io.read_triangle_mesh(link_collision_file_path) 28 | self.link_points_list.append(np.array(link_mesh.vertices)) 29 | 30 | def get_hand_points(self, hand_joint_values: NDArray[np.float64], device: torch.device) -> torch.DoubleTensor: 31 | hand_joint_values: torch.DoubleTensor = torch.tensor(hand_joint_values, device=device) 32 | base_to_links = self.kinematics.joint_to_cartesian_for_all_links(hand_joint_values) 33 | base_to_link_points_list: List[torch.DoubleTensor] = [] 34 | for base_to_link, link_points in zip(base_to_links, self.link_points_list): 35 | if link_points is None: 36 | continue 37 | link_points: torch.DoubleTensor = torch.tensor(link_points, device=device) 38 | base_to_link_points = se3_transform_pc_tensor(base_to_link, link_points) 39 | base_to_link_points_list.append(base_to_link_points) 40 | hand_points = torch.cat(base_to_link_points_list, dim=0) 41 | return hand_points 42 | 43 | @dataclass 44 | class HandCollisionFilterConfig: 45 | device: str = "cpu" 46 | threshold: float = 0. 47 | use_bbox: bool = True 48 | 49 | class HandCollisionFilter: 50 | def __init__(self, cfg: HandCollisionFilterConfig): 51 | self.cfg = cfg 52 | self.device = torch.device(cfg.device) 53 | 54 | self.hand_kinematics_cache: Dict[str, HandKinematics] = {} 55 | 56 | env_dir = os.path.dirname(os.path.dirname(__file__)) 57 | ee_dir = os.path.join(env_dir, "data", "assets", "franka_panda", "ee") 58 | if cfg.use_bbox: 59 | ee_mesh = o3d.io.read_triangle_mesh(os.path.join(ee_dir, "model.obj")) 60 | self.ee_min_coords = np.array(ee_mesh.vertices).min(axis=0)-cfg.threshold 61 | self.ee_max_coords = np.array(ee_mesh.vertices).max(axis=0)+cfg.threshold 62 | else: 63 | self.ee_sdf_data = SDFData(os.path.join(ee_dir, "sdf.npz")) 64 | 65 | def filter_hand_collision(self, hand_urdf_path: str, hand_joint_values: NDArray[np.float64], world_to_ees: NDArray[np.float64]) -> NDArray[np.bool_]: 66 | if hand_urdf_path not in self.hand_kinematics_cache: 67 | self.hand_kinematics_cache[hand_urdf_path] = HandKinematics(hand_urdf_path) 68 | hand_kinematics = self.hand_kinematics_cache[hand_urdf_path] 69 | hand_points = hand_kinematics.get_hand_points(hand_joint_values, device=self.device) # hand_points is in the world coordinate system, since its base is the origin in the world 70 | 71 | filter_results: List[bool] = [] 72 | if not self.cfg.use_bbox: 73 | ee_sdf_data_tensor = SDFDataTensor(self.ee_sdf_data, self.device) 74 | for world_to_ee in world_to_ees: 75 | ee_to_world = torch.tensor(se3_inverse(world_to_ee), device=self.device) 76 | if not self.cfg.use_bbox: 77 | inside_bbox_mask, inside_bbox_value = compute_distance(hand_points, ee_sdf_data_tensor, ee_to_world) # this bbox is the bbox of SDF 78 | filter_results.append((inside_bbox_value>=self.cfg.threshold).all().item()) 79 | else: 80 | ee_to_hand_points = se3_transform_pc_tensor(ee_to_world, hand_points) 81 | inside_bbox_mask: torch.BoolTensor = ( 82 | (ee_to_hand_points[:, 0] >= self.ee_min_coords[0]) 83 | & (ee_to_hand_points[:, 0] <= self.ee_max_coords[0]) 84 | & (ee_to_hand_points[:, 1] >= self.ee_min_coords[1]) 85 | & (ee_to_hand_points[:, 1] <= self.ee_max_coords[1]) 86 | & (ee_to_hand_points[:, 2] >= self.ee_min_coords[2]) 87 | & (ee_to_hand_points[:, 2] <= self.ee_max_coords[2]) 88 | ) # this bbox is the bbox of ee model 89 | filter_results.append(not inside_bbox_mask.any()) 90 | 91 | filter_results = np.array(filter_results, dtype=bool) 92 | return filter_results 93 | 94 | def debug(): 95 | hand_kinematics = HandKinematics("env/data/assets/hand/20200709-subject-01_left/mano.urdf") 96 | # for i in range(51): 97 | # hand_joint_values = np.zeros(51) 98 | # hand_joint_values[i] = np.pi/6 99 | # hand_points = hand_kinematics.get_hand_points(hand_joint_values, device=torch.device("cpu")) 100 | # pcd = o3d.geometry.PointCloud() 101 | # pcd.points = o3d.utility.Vector3dVector(hand_points.numpy()) 102 | # o3d.io.write_point_cloud(f"tmp/debug_hand_collision_filter/{i}.xyz", pcd) 103 | # # o3d.visualization.draw_geometries([pcd]) 104 | 105 | hand_collision_filter = HandCollisionFilter("env/data/assets/hand/20200709-subject-01_left/mano.urdf") 106 | 107 | if __name__ == "__main__": 108 | debug() 109 | 110 | """ 111 | python -m policies.hand_collision_filter 112 | """ -------------------------------------------------------------------------------- /env/utils/robot_kinematics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.typing import NDArray 3 | import torch 4 | from xml.etree.ElementTree import parse, ElementTree 5 | from dataclasses import dataclass 6 | from omegaconf import MISSING 7 | from typing import Tuple, Optional, List, Dict, Union 8 | from scipy.spatial.transform import Rotation as Rt 9 | try: 10 | import PyKDL 11 | except: 12 | print("warning: failed to import PyKDL, the functionality of robot_kinematics will be limited") 13 | import code 14 | 15 | from .transform import mat_to_pos_ros_quat, pos_ros_quat_to_mat 16 | 17 | def str_to_float_list(s): 18 | return list(map(float, s.split(" "))) 19 | 20 | def list_to_jnt_array(q: List[float]): 21 | q_kdl = PyKDL.JntArray(len(q)) 22 | for i, q_i in enumerate(q): 23 | q_kdl[i] = q_i 24 | return q_kdl 25 | 26 | def urdf_origin_to_pose(origin: Optional[ElementTree]) -> NDArray[np.float64]: 27 | pose = np.eye(4) 28 | if origin is not None: 29 | xyz, rpy = str_to_float_list(origin.get("xyz")), str_to_float_list(origin.get("rpy")) 30 | pose[:3, 3] = xyz 31 | pose[:3, :3] = Rt.from_euler("xyz", rpy).as_matrix() 32 | return pose 33 | 34 | def axis_angle_to_rotation_matrix_torch(axis: torch.DoubleTensor, angle: torch.DoubleTensor) -> torch.DoubleTensor: 35 | """ 36 | Axis angle to rotation matrix for pytorch, numpy can use Rt.from_rotvec(...).as_matrix() 37 | Input: 38 | axis: (..., 3), angle: (...) 39 | Return: 40 | rotation_matrix: (..., 3, 3) 41 | """ 42 | # Components of the axis vector 43 | x, y, z = axis[..., 0], axis[..., 1], axis[..., 2] 44 | # Using Rodrigues' rotation formula 45 | cos, sin = angle.cos(), angle.sin() 46 | one_minus_cos = 1-cos 47 | R = torch.stack([ 48 | torch.stack([cos + x*x*one_minus_cos, x*y*one_minus_cos - z*sin, x*z*one_minus_cos + y*sin], dim=-1), # (..., 3) 49 | torch.stack([y*x*one_minus_cos + z*sin, cos + y*y*one_minus_cos, y*z*one_minus_cos - x*sin], dim=-1), # (..., 3) 50 | torch.stack([z*x*one_minus_cos - y*sin, z*y*one_minus_cos + x*sin, cos + z*z*one_minus_cos], dim=-1), # (..., 3) 51 | ], dim=-2) # (..., 3, 3) 52 | return R 53 | 54 | class Link: 55 | name: str 56 | parent: Optional["Link"] 57 | parent_joint: Optional["Joint"] 58 | children: List["Link"] 59 | inertial: ElementTree 60 | idx: int 61 | collision_file_path: Optional[str] 62 | 63 | def __init__(self, cfg: ElementTree, idx: int): 64 | self.name = cfg.get("name") 65 | self.inertial = cfg.find("inertial") 66 | self.parent = None 67 | self.parent_joint = None 68 | self.children = [] 69 | self.idx = idx 70 | try: 71 | self.collision_file_path = cfg.find("collision").find("geometry").find("mesh").get("filename").lstrip("package://") 72 | except: 73 | self.collision_file_path = None 74 | 75 | def __repr__(self) -> str: 76 | return f"Link("\ 77 | f"name={self.name}, "\ 78 | f"parent={self.parent.name if self.parent is not None else None}, "\ 79 | f"parent_joint={self.parent_joint.name if self.parent_joint is not None else None}, "\ 80 | f"children={[child.name for child in self.children]}, "\ 81 | f")" 82 | 83 | class Joint: 84 | name: str 85 | type: str 86 | parent: str 87 | child: str 88 | origin: ElementTree 89 | parent_to_child_default: NDArray[np.float64] 90 | axis: Optional[List[float]] 91 | limit: Optional[ElementTree] 92 | safety_controller: Optional[ElementTree] 93 | dynamics: Optional[ElementTree] 94 | def __init__(self, cfg: ElementTree): 95 | self.name = cfg.get("name") 96 | self.type = cfg.get("type") 97 | self.parent = cfg.find("parent").get("link") 98 | self.child = cfg.find("child").get("link") 99 | 100 | self.origin = cfg.find("origin") 101 | assert self.origin is not None 102 | self.parent_to_child_default = urdf_origin_to_pose(self.origin) 103 | if cfg.find("axis") is None: 104 | self.axis = None 105 | else: 106 | self.axis = str_to_float_list(cfg.find("axis").get("xyz")) 107 | 108 | self.limit = cfg.find("limit") 109 | self.safety_controller = cfg.find("safety_controller") 110 | self.dynamics = cfg.find("dynamics") 111 | 112 | def get_parent_to_child(self, theta: Union[float, NDArray[np.float64], torch.DoubleTensor]) -> Union[NDArray[np.float64], torch.DoubleTensor]: 113 | """ 114 | Get the pose of the child in parent's frame 115 | Input: 116 | theta: (...) 117 | Return: 118 | parent_to_child: (..., 4, 4) 119 | """ 120 | if self.type in ["revolute", "continuous"]: 121 | if isinstance(theta, torch.DoubleTensor) or isinstance(theta, torch.cuda.DoubleTensor): 122 | device = theta.device 123 | axis = torch.tensor(self.axis, device=device, dtype=torch.float64) 124 | child_default_to_child_rot = axis_angle_to_rotation_matrix_torch(axis, theta) # (..., 3, 3) 125 | parent_to_child = torch.tensor(self.parent_to_child_default, device=device).repeat(theta.shape+(1, 1)) # (..., 4, 4) 126 | parent_to_child[..., :3, :3] = parent_to_child[..., :3, :3].clone()@child_default_to_child_rot # (..., 4, 4) 127 | elif isinstance(theta, float) or (isinstance(theta, np.ndarray) and theta.dtype == np.float64): 128 | if isinstance(theta, float): 129 | theta = np.array(theta) 130 | rotvec = np.array(self.axis)*theta[..., None] # (..., 3) 131 | child_default_to_child_rot = Rt.from_rotvec(rotvec.reshape(-1, 3)).as_matrix().reshape(rotvec.shape[:-1]+(3, 3)) # (..., 3, 3) 132 | parent_to_child = np.tile(self.parent_to_child_default, theta.shape+(1, 1)) # (..., 4, 4) 133 | parent_to_child[..., :3, :3] = parent_to_child[..., :3, :3]@child_default_to_child_rot # (..., 4, 4) 134 | else: 135 | raise TypeError(type(theta)) 136 | elif self.type == "prismatic": 137 | if isinstance(theta, torch.DoubleTensor) or isinstance(theta, torch.cuda.DoubleTensor): 138 | device = theta.device 139 | child_default_to_child_trans = torch.tensor(self.axis, device=device, dtype=torch.float64)*theta[..., None] # (..., 3) 140 | parent_to_child = torch.tensor(self.parent_to_child_default, device=device).repeat(theta.shape+(1, 1)) # (..., 4, 4) 141 | parent_to_child[..., :3, 3] = parent_to_child[..., :3, 3].clone()+child_default_to_child_trans # (..., 4, 4) 142 | elif isinstance(theta, float) or (isinstance(theta, np.ndarray) and theta.dtype == np.float64): 143 | if isinstance(theta, float): 144 | theta = np.array(theta) 145 | child_default_to_child_trans = np.array(self.axis)*theta[..., None] # (..., 3) 146 | parent_to_child = np.tile(self.parent_to_child_default, theta.shape+(1, 1)) # (..., 4, 4) 147 | parent_to_child[..., :3, 3] = parent_to_child[..., :3, 3]+child_default_to_child_trans # (..., 4, 4) 148 | else: 149 | raise TypeError(type(theta)) 150 | else: 151 | raise NotImplementedError 152 | return parent_to_child 153 | 154 | def parse_urdf(urdf_file) -> Tuple[List[Link], Dict[str, Link], List[Joint], Dict[str, Joint], Link]: 155 | tree: ElementTree = parse(urdf_file) 156 | root = tree.getroot() 157 | link_cfgs = root.findall("link") 158 | joint_cfgs = root.findall("joint") 159 | links = [Link(link_cfg, idx) for idx, link_cfg in enumerate(link_cfgs)] 160 | link_map = {link.name: link for link in links} 161 | joints = [Joint(joint_cfg) for joint_cfg in joint_cfgs] 162 | joint_map = {joint.name: joint for joint in joints} 163 | for joint in joints: 164 | parent_link = link_map[joint.parent] 165 | child_link = link_map[joint.child] 166 | child_link.parent = parent_link 167 | child_link.parent_joint = joint 168 | parent_link.children.append(child_link) 169 | root_link = None 170 | for link in links: 171 | if link.parent is None: 172 | assert root_link is None, "multiple roots detected" 173 | root_link = link 174 | assert root_link is not None, "no roots detected" 175 | return links, link_map, joints, joint_map, root_link 176 | 177 | def urdf_origin_to_kdl_frame(origin: Optional[ElementTree]): 178 | if origin is not None: 179 | xyz = str_to_float_list(origin.get("xyz")) 180 | rpy = str_to_float_list(origin.get("rpy")) 181 | else: 182 | xyz = [0.0, 0.0, 0.0] 183 | rpy = [0.0, 0.0, 0.0] 184 | return PyKDL.Frame(PyKDL.Rotation.Quaternion(*Rt.from_euler("xyz", rpy).as_quat()), PyKDL.Vector(*xyz)) 185 | 186 | def urdf_joint_to_kdl_joint(joint: Joint): 187 | origin_frame = urdf_origin_to_kdl_frame(joint.origin) 188 | if joint.type == "fixed": 189 | return PyKDL.Joint(joint.name, PyKDL.Joint.Fixed) 190 | axis = PyKDL.Vector(*joint.axis) 191 | if joint.type in ["revolute", "continuous"]: 192 | return PyKDL.Joint(joint.name, origin_frame.p, origin_frame.M*axis, PyKDL.Joint.RotAxis) 193 | if joint.type == "prismatic": 194 | return PyKDL.Joint(joint.name, origin_frame.p, origin_frame.M*axis, PyKDL.Joint.TransAxis) 195 | raise ValueError(f"Unknown joint type: {joint.type}.") 196 | 197 | def urdf_inertial_to_kdl_rbi(inertial: ElementTree): 198 | origin_frame = urdf_origin_to_kdl_frame(inertial.find("origin")) 199 | mass = float(inertial.find("mass").get("value")) 200 | inertia = inertial.find("inertia") 201 | rbi = PyKDL.RigidBodyInertia( 202 | mass, 203 | origin_frame.p, 204 | PyKDL.RotationalInertia( 205 | float(inertia.get("ixx")), 206 | float(inertia.get("iyy")), 207 | float(inertia.get("izz")), 208 | float(inertia.get("ixy")), 209 | float(inertia.get("ixz")), 210 | float(inertia.get("iyz")), 211 | ), 212 | ) 213 | return origin_frame.M*rbi 214 | 215 | def build_kdl_tree(link_map: Dict[str, Link], joints: List[Joint], root_link: Link): 216 | tree = PyKDL.Tree(root_link.name) 217 | for joint in joints: 218 | child_link = link_map[joint.child] 219 | kdl_joint = urdf_joint_to_kdl_joint(joint) 220 | kdl_frame = urdf_origin_to_kdl_frame(joint.origin) 221 | kdl_rbi = urdf_inertial_to_kdl_rbi(child_link.inertial) 222 | kdl_segment = PyKDL.Segment(joint.child, kdl_joint, kdl_frame, kdl_rbi) 223 | tree.addSegment(kdl_segment, joint.parent) 224 | return tree 225 | 226 | def get_chain_joint_limit(tip_link: Link) -> Tuple[List[float], List[float]]: 227 | q_min, q_max = [], [] 228 | cur_link = tip_link 229 | while cur_link.parent_joint is not None: 230 | limit = cur_link.parent_joint.limit 231 | if limit is not None: 232 | q_min.append(float(limit.get("lower"))) 233 | q_max.append(float(limit.get("upper"))) 234 | cur_link = cur_link.parent 235 | q_min.reverse() 236 | q_max.reverse() 237 | return q_min, q_max 238 | 239 | def get_home_pose_and_screw_axes(tip_link: Link) -> Tuple[NDArray[np.float64], NDArray[np.float64]]: 240 | current_link = tip_link 241 | current_link_to_tip_link = np.eye(4) 242 | chain_links: List[Link] = [current_link] 243 | while current_link.parent is not None: 244 | current_link_to_tip_link = current_link.parent_joint.parent_to_child_default@current_link_to_tip_link 245 | current_link = current_link.parent 246 | chain_links.append(current_link) 247 | home_pose = current_link_to_tip_link 248 | # screw axes 249 | chain_links.reverse() 250 | base_link_to_current_link = np.eye(4) 251 | screw_axes: List[NDArray[np.float64]] = [] 252 | for link in chain_links[1:]: 253 | joint = link.parent_joint 254 | base_link_to_current_link = base_link_to_current_link@joint.parent_to_child_default 255 | if joint.type in ["revolute", "continuous"]: 256 | rot_axis = base_link_to_current_link[:3, :3]@np.array(joint.axis) # (3,) 257 | linear_velocity = -np.cross(rot_axis, base_link_to_current_link[:3, 3]) # (3,) 258 | # Reference: MODERN ROBOTICS: MECHANICS, PLANNING, AND CONTROL. 4.1.1 First Formulation: Screw Axes in the Base Frame 259 | screw_axis = np.concatenate([rot_axis, linear_velocity]) 260 | screw_axes.append(screw_axis) 261 | elif joint.type == "fixed": 262 | pass 263 | else: 264 | raise NotImplementedError 265 | screw_axes: NDArray[np.float64] = np.stack(screw_axes, axis=0) 266 | return home_pose, screw_axes 267 | 268 | def vector_to_so3(vec: NDArray[np.float64]) -> NDArray[np.float64]: 269 | """ 270 | Input: 271 | vec: (..., 3) 272 | Return: 273 | matrices: (..., 3, 3) 274 | Reference: 275 | MODERN ROBOTICS: MECHANICS, PLANNING, AND CONTROL. 3.30 276 | [[ 0, -x2, x1], 277 | [ x2, 0, -x0], 278 | [-x1, x0, 0]] 279 | """ 280 | assert vec.shape[-1] == 3 281 | x0, x1, x2 = vec[..., 0], vec[..., 1], vec[..., 2] 282 | zeros = np.zeros_like(x0) 283 | return np.stack([ 284 | np.stack([zeros, -x2, x1], axis=-1), 285 | np.stack([x2, -zeros, -x0], axis=-1), 286 | np.stack([-x1, x0, zeros], axis=-1), 287 | ], axis=-2) 288 | 289 | def screw_axes_and_thetas_to_matrix_exponentials(screw_axes: NDArray[np.float64], thetas: NDArray[np.float64]) -> NDArray[np.float64]: 290 | """ 291 | Input: 292 | screw_axes: (..., 6) 293 | thetas: (...) 294 | Return: 295 | matrix_exponentials: (..., 4, 4) 296 | Reference: 297 | MODERN ROBOTICS: MECHANICS, PLANNING, AND CONTROL. 3.51 and 3.88 298 | [[I+sin(theta)[w]+(1-cos(theta))[w]^2, (theta*I+(1-cos(theta))[w]+(theta-sin(theta))[w]^2)v] 299 | [0, 1]] 300 | """ 301 | omegas = vector_to_so3(screw_axes[..., :3]) # (..., 3, 3) 302 | omegas_square = omegas@omegas # (..., 3, 3) 303 | sin_thetas = np.sin(thetas) # (...) 304 | one_minus_cos_thetas = 1-np.cos(thetas) # (...) 305 | rotations = np.eye(3)+sin_thetas[..., None, None]*omegas+one_minus_cos_thetas[..., None, None]*omegas_square # (..., 3, 3) 306 | translations = (thetas[..., None, None]*np.eye(3)+one_minus_cos_thetas[..., None, None]*omegas+(thetas-sin_thetas)[..., None, None]*omegas_square)@screw_axes[..., 3:, None] # (..., 3, 1) 307 | last_row = np.tile(np.array([0., 0., 0., 1.]), rotations.shape[:-2]+(1, 1)) # (..., 1, 4) 308 | matrix_exponentials = np.concatenate([ 309 | np.concatenate([rotations, translations], axis=-1), # (..., 3, 4) 310 | last_row 311 | ], axis=-2) # (..., 4, 4) 312 | return matrix_exponentials 313 | 314 | @dataclass 315 | class RobotKinematicsConfig: 316 | urdf_file: str = "env/data/assets/franka_panda/panda_gripper_hand_camera.urdf" 317 | IK_solver_max_iter: int = 100 318 | IK_solver_eps: float = 1e-6 319 | chain_tip: Optional[str] = "panda_hand" 320 | 321 | class RobotKinematics: 322 | def __init__(self, cfg: RobotKinematicsConfig): 323 | self.cfg = cfg 324 | 325 | self.links, self.link_map, self.joints, self.joint_map, self.root_link = parse_urdf(cfg.urdf_file) 326 | 327 | if cfg.chain_tip is not None: 328 | self.kdl_tree = build_kdl_tree(self.link_map, self.joints, self.root_link) 329 | self.arm_chain = self.kdl_tree.getChain(self.root_link.name, cfg.chain_tip) 330 | self.num_joints: int = self.arm_chain.getNrOfJoints() 331 | self.tip_link = self.link_map[cfg.chain_tip] 332 | self.q_min, self.q_max = get_chain_joint_limit(self.tip_link) 333 | q_min_kdl, q_max_kdl = list_to_jnt_array(self.q_min), list_to_jnt_array(self.q_max) 334 | self.fk_p_kdl = PyKDL.ChainFkSolverPos_recursive(self.arm_chain) 335 | self.ik_v_kdl = PyKDL.ChainIkSolverVel_pinv(self.arm_chain) 336 | self.ik_p_kdl = PyKDL.ChainIkSolverPos_NR_JL(self.arm_chain, q_min_kdl, q_max_kdl, self.fk_p_kdl, self.ik_v_kdl, maxiter=cfg.IK_solver_max_iter, eps=cfg.IK_solver_eps) 337 | self.home_pose, self.screw_axes = get_home_pose_and_screw_axes(self.tip_link) 338 | 339 | def cartesian_to_joint(self, base_to_ee: NDArray[np.float64], seed: Optional[NDArray[np.float64]]=None) -> Tuple[NDArray[np.float64], int]: 340 | """ 341 | Inverse kinematics 342 | Input: 343 | base_to_ee: (4, 4) 344 | seed: (self.num_joints,) 345 | Return: 346 | result: (self.num_joints,) 347 | info: int 348 | """ 349 | pos, ros_quat = mat_to_pos_ros_quat(base_to_ee) 350 | if seed is None: 351 | seed = np.zeros(self.num_joints) 352 | else: 353 | assert seed.shape[0] == self.num_joints 354 | seed_kdl = list_to_jnt_array(seed) 355 | # Make IK Call 356 | goal_pose = PyKDL.Frame( 357 | PyKDL.Rotation.Quaternion(ros_quat[0], ros_quat[1], ros_quat[2], ros_quat[3]), 358 | PyKDL.Vector(pos[0], pos[1], pos[2]) 359 | ) 360 | result_angles = PyKDL.JntArray(self.num_joints) 361 | info = self.ik_p_kdl.CartToJnt(seed_kdl, goal_pose, result_angles) # E_NOERROR = 0, E_NOT_UP_TO_DATE = -3, E_SIZE_MISMATCH = -4, E_MAX_ITERATIONS_EXCEEDED = -5, E_IKSOLVERVEL_FAILED = -100, E_FKSOLVERPOS_FAILED = -101 362 | result = np.array(list(result_angles)) 363 | return result, info 364 | 365 | def joint_to_cartesian(self, joint_values: NDArray[np.float64]) -> NDArray[np.float64]: 366 | """ 367 | Forward kinematics 368 | Input: 369 | joint_values: (..., self.num_joints) 370 | Return: 371 | base_to_ees: (..., 4, 4) 372 | """ 373 | matrix_exponentials = screw_axes_and_thetas_to_matrix_exponentials(self.screw_axes, joint_values) # (..., self.num_joints, 4, 4) 374 | base_to_ees = self.home_pose 375 | for i in reversed(range(self.num_joints)): 376 | base_to_ees = matrix_exponentials[..., i, :, :]@base_to_ees 377 | return base_to_ees 378 | 379 | def joint_to_cartesian_for_all_links(self, joint_values: torch.DoubleTensor) -> torch.DoubleTensor: 380 | """ 381 | Forward kinematics for all links 382 | Input: 383 | joint_values: (..., number of joints in self.joints that are not fixed) 384 | Return: 385 | base_to_links: (..., len(self.links), 4, 4) 386 | """ 387 | device = joint_values.device 388 | base_to_base = torch.eye(4, dtype=torch.float64, device=device).repeat(joint_values.shape[:-1]+(1, 1)) # (..., 4, 4) 389 | base_to_links: List[torch.DoubleTensor] = [base_to_base] 390 | joint_idx = 0 391 | for link in self.links[1:]: # assume links are in a topological order 392 | parent_link_idx = link.parent.idx 393 | parent_joint = link.parent_joint 394 | base_to_parent_link = base_to_links[parent_link_idx] # (..., 4, 4) 395 | if parent_joint.type == "fixed": 396 | base_to_link = base_to_parent_link@torch.tensor(parent_joint.parent_to_child_default, device=device) # (..., 4, 4) 397 | elif parent_joint.type in ["revolute", "prismatic", "continuous"]: 398 | base_to_link = base_to_parent_link@parent_joint.get_parent_to_child(joint_values[..., joint_idx]) # (..., 4, 4) 399 | joint_idx += 1 400 | else: 401 | raise NotImplementedError(f"forward kinematics not implemented for type {parent_joint.type}") 402 | base_to_links.append(base_to_link) 403 | base_to_links = torch.stack(base_to_links, dim=-3) # (..., len(self.links), 4, 4) 404 | return base_to_links 405 | 406 | def debug_cartesian_to_joint(robot: RobotKinematics): 407 | pos = np.array([ 0.1439894 , -0.00910749, 0.71072687]) 408 | ros_quat = np.array([ 0.96438653, 0.03465594, 0.2612568 , -0.02241564]) 409 | base_to_ee = pos_ros_quat_to_mat(pos, ros_quat) 410 | seed = np.array([ 0. , -1.285, 0. , -2.356, 0. , 1.571, 0.785]) 411 | ik_joint_values, info = robot.cartesian_to_joint(base_to_ee, seed) # array([ 0.03839981, -1.29161734, -0.04006584, -2.35181459, 0.01433843, 1.58850796, 0.73411765]) 412 | print(f"ik_joint_values {ik_joint_values}") 413 | 414 | def debug_joint_to_cartesian(robot: RobotKinematics): 415 | print(f"robot.home_pose {robot.home_pose}") 416 | # array([[ 0.70710678, 0.70710678, 0. , 0.088 ], 417 | # [ 0.70710678, -0.70710678, -0. , -0. ], 418 | # [-0. , 0. , -1. , 0.926 ], 419 | # [ 0. , 0. , 0. , 1. ]]) 420 | joint_values = np.array([ 421 | [0., 0., 0., 0., 0., 0., 0.], 422 | [.1, 0., 0., 0., 0., 0., 0.], 423 | [0., .1, 0., 0., 0., 0., 0.], 424 | [0., 0., .1, 0., 0., 0., 0.], 425 | [0., 0., 0., .1, 0., 0., 0.], 426 | [0., 0., 0., 0., .1, 0., 0.], 427 | [0., 0., 0., 0., 0., .1, 0.], 428 | [0., 0., 0., 0., 0., 0., .1], 429 | [.1, .2, .3, .4, .5, .6, .7], 430 | ]) 431 | for joint_value in joint_values: 432 | print(f"joint_value {joint_value} base_to_ee\n{robot.joint_to_cartesian(joint_value)}") 433 | print(f"batch base_to_ee\n{robot.joint_to_cartesian(joint_values)}") 434 | # joint_value [0. 0. 0. 0. 0. 0. 0.], base_to_ee 435 | # [[ 0.70710678 0.70710678 0. 0.088 ] 436 | # [ 0.70710678 -0.70710678 -0. -0. ] 437 | # [-0. 0. -1. 0.926 ] 438 | # [ 0. 0. 0. 1. ]] 439 | # joint_value [0.1 0. 0. 0. 0. 0. 0. ], base_to_ee 440 | # [[ 0.63298131 0.77416708 0. 0.08756037] 441 | # [ 0.77416708 -0.63298131 -0. 0.00878534] 442 | # [-0. 0. -1. 0.926 ] 443 | # [ 0. 0. 0. 1. ]] 444 | # joint_value [0. 0.1 0. 0. 0. 0. 0. ], base_to_ee 445 | # [[ 0.70357419 0.70357419 -0.09983342 0.14676158] 446 | # [ 0.70710678 -0.70710678 -0. -0. ] 447 | # [-0.07059289 -0.07059289 -0.99500417 0.91425213] 448 | # [ 0. 0. 0. 1. ]] 449 | # joint_value [0. 0. 0.1 0. 0. 0. 0. ], base_to_ee 450 | # [[ 0.63298131 0.77416708 0. 0.08756037] 451 | # [ 0.77416708 -0.63298131 -0. 0.00878534] 452 | # [-0. 0. -1. 0.926 ] 453 | # [ 0. 0. 0. 1. ]] 454 | # joint_value [0. 0. 0. 0.1 0. 0. 0. ], base_to_ee 455 | # [[ 0.70357419 0.70357419 0.09983342 0.06031867] 456 | # [ 0.70710678 -0.70710678 -0. -0. ] 457 | # [ 0.07059289 0.07059289 -0.99500417 0.92516524] 458 | # [ 0. 0. 0. 1. ]] 459 | # joint_value [0. 0. 0. 0. 0.1 0. 0. ], base_to_ee 460 | # [[ 0.63298131 0.77416708 0. 0.08756037] 461 | # [ 0.77416708 -0.63298131 -0. 0.00878534] 462 | # [-0. 0. -1. 0.926 ] 463 | # [ 0. 0. 0. 1. ]] 464 | # joint_value [0. 0. 0. 0. 0. 0.1 0. ], base_to_ee 465 | # [[ 0.70357419 0.70357419 0.09983342 0.09824254] 466 | # [ 0.70710678 -0.70710678 -0. -0. ] 467 | # [ 0.07059289 0.07059289 -0.99500417 0.93531989] 468 | # [ 0. 0. 0. 1. ]] 469 | # joint_value [0. 0. 0. 0. 0. 0. 0.1], base_to_ee 470 | # [[ 0.77416708 0.63298131 -0. 0.088 ] 471 | # [ 0.63298131 -0.77416708 -0. -0. ] 472 | # [-0. 0. -1. 0.926 ] 473 | # [ 0. 0. 0. 1. ]] 474 | # joint_value [0.1 0.2 0.3 0.4 0.5 0.6 0.7], base_to_ee 475 | # [[ 0.3429257 0.80404361 0.48571168 0.08508066] 476 | # [ 0.60596605 -0.58444466 0.53965691 0.06370813] 477 | # [ 0.7177793 0.10926257 -0.68764422 0.97517365] 478 | # [ 0. 0. 0. 1. ]] 479 | 480 | # batch base_to_ee 481 | # [[[ 0.70710678 0.70710678 0. 0.088 ] 482 | # [ 0.70710678 -0.70710678 -0. -0. ] 483 | # [-0. 0. -1. 0.926 ] 484 | # [ 0. 0. 0. 1. ]] 485 | 486 | # [[ 0.63298131 0.77416708 0. 0.08756037] 487 | # [ 0.77416708 -0.63298131 -0. 0.00878534] 488 | # [-0. 0. -1. 0.926 ] 489 | # [ 0. 0. 0. 1. ]] 490 | 491 | # [[ 0.70357419 0.70357419 -0.09983342 0.14676158] 492 | # [ 0.70710678 -0.70710678 -0. -0. ] 493 | # [-0.07059289 -0.07059289 -0.99500417 0.91425213] 494 | # [ 0. 0. 0. 1. ]] 495 | 496 | # [[ 0.63298131 0.77416708 0. 0.08756037] 497 | # [ 0.77416708 -0.63298131 -0. 0.00878534] 498 | # [-0. 0. -1. 0.926 ] 499 | # [ 0. 0. 0. 1. ]] 500 | 501 | # [[ 0.70357419 0.70357419 0.09983342 0.06031867] 502 | # [ 0.70710678 -0.70710678 -0. -0. ] 503 | # [ 0.07059289 0.07059289 -0.99500417 0.92516524] 504 | # [ 0. 0. 0. 1. ]] 505 | 506 | # [[ 0.63298131 0.77416708 0. 0.08756037] 507 | # [ 0.77416708 -0.63298131 -0. 0.00878534] 508 | # [-0. 0. -1. 0.926 ] 509 | # [ 0. 0. 0. 1. ]] 510 | 511 | # [[ 0.70357419 0.70357419 0.09983342 0.09824254] 512 | # [ 0.70710678 -0.70710678 -0. -0. ] 513 | # [ 0.07059289 0.07059289 -0.99500417 0.93531989] 514 | # [ 0. 0. 0. 1. ]] 515 | 516 | # [[ 0.77416708 0.63298131 0. 0.088 ] 517 | # [ 0.63298131 -0.77416708 -0. -0. ] 518 | # [-0. 0. -1. 0.926 ] 519 | # [ 0. 0. 0. 1. ]] 520 | 521 | # [[ 0.3429257 0.80404361 0.48571168 0.08508066] 522 | # [ 0.60596605 -0.58444466 0.53965691 0.06370813] 523 | # [ 0.7177793 0.10926257 -0.68764422 0.97517365] 524 | # [ 0. 0. 0. 1. ]]] 525 | 526 | def debug_joint_to_cartesian_for_all_links(robot: RobotKinematics): 527 | # joint_values = torch.zeros(9, dtype=torch.float64) 528 | joint_values = torch.tensor([.1, .2, .3, .4, .5, .6, .7, .04, .04], dtype=torch.float64) 529 | world_to_links = robot.joint_to_cartesian_for_all_links(joint_values) 530 | # [[[ 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], 531 | # [ 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00], 532 | # [ 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00], 533 | # [ 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]], 534 | 535 | # [[ 9.9500e-01, -9.9833e-02, 0.0000e+00, 0.0000e+00], 536 | # [ 9.9833e-02, 9.9500e-01, 0.0000e+00, 0.0000e+00], 537 | # [ 0.0000e+00, 0.0000e+00, 1.0000e+00, 3.3300e-01], 538 | # [ 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]], 539 | 540 | # [[ 9.7517e-01, -1.9768e-01, -9.9833e-02, 0.0000e+00], 541 | # [ 9.7843e-02, -1.9834e-02, 9.9500e-01, 0.0000e+00], 542 | # [-1.9867e-01, -9.8007e-01, 4.8966e-12, 3.3300e-01], 543 | # [ 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]], 544 | 545 | # [[ 9.0211e-01, -3.8356e-01, 1.9768e-01, 6.2466e-02], 546 | # [ 3.8752e-01, 9.2165e-01, 1.9834e-02, 6.2675e-03], 547 | # [-1.8980e-01, 5.8711e-02, 9.8007e-01, 6.4270e-01], 548 | # [ 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]], 549 | 550 | # [[ 9.0788e-01, -1.6923e-01, 3.8356e-01, 1.3689e-01], 551 | # [ 3.6465e-01, -1.3264e-01, -9.2165e-01, 3.8238e-02], 552 | # [ 2.0684e-01, 9.7661e-01, -5.8711e-02, 6.2704e-01], 553 | # [ 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]], 554 | 555 | # [[ 6.1285e-01, -7.7186e-01, -1.6923e-01, -2.9931e-03], 556 | # [ 7.6187e-01, 6.3400e-01, -1.3264e-01, -4.2779e-02], 557 | # [ 2.0967e-01, -4.7642e-02, 9.7661e-01, 9.8500e-01], 558 | # [ 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]], 559 | 560 | # [[ 4.1026e-01, -4.8571e-01, 7.7186e-01, -2.9931e-03], 561 | # [ 5.5391e-01, -5.3966e-01, -6.3400e-01, -4.2779e-02], 562 | # [ 7.2448e-01, 6.8764e-01, 4.7642e-02, 9.8500e-01], 563 | # [ 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]], 564 | 565 | # [[ 8.1103e-01, 3.2606e-01, 4.8571e-01, 3.3110e-02], 566 | # [ 1.5218e-02, -8.4175e-01, 5.3966e-01, 5.9648e-03], 567 | # [ 5.8481e-01, -4.3029e-01, -6.8764e-01, 1.0488e+00], 568 | # [ 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]], 569 | 570 | # [[ 3.4293e-01, 8.0404e-01, 4.8571e-01, 8.5081e-02], 571 | # [ 6.0597e-01, -5.8444e-01, 5.3966e-01, 6.3708e-02], 572 | # [ 7.1778e-01, 1.0926e-01, -6.8764e-01, 9.7517e-01], 573 | # [ 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]], 574 | 575 | # [[ 3.4293e-01, 8.0404e-01, 4.8571e-01, 1.4561e-01], 576 | # [ 6.0597e-01, -5.8444e-01, 5.3966e-01, 7.1846e-02], 577 | # [ 7.1778e-01, 1.0926e-01, -6.8764e-01, 9.3939e-01], 578 | # [ 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]], 579 | 580 | # [[ 3.4293e-01, 8.0404e-01, 4.8571e-01, 8.1284e-02], 581 | # [ 6.0597e-01, -5.8444e-01, 5.3966e-01, 1.1860e-01], 582 | # [ 7.1778e-01, 1.0926e-01, -6.8764e-01, 9.3064e-01], 583 | # [ 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]], 584 | 585 | # [[-8.0404e-01, -4.8571e-01, 3.4293e-01, 1.1491e-01], 586 | # [ 5.8444e-01, -5.3966e-01, 6.0597e-01, 1.0495e-01], 587 | # [-1.0926e-01, 6.8764e-01, 7.1778e-01, 9.7626e-01], 588 | # [ 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]]] 589 | code.interact(local=dict(globals(), **locals())) 590 | 591 | def debug(): 592 | np.set_printoptions(suppress=True) # no scientific notation 593 | from omegaconf import OmegaConf 594 | cfg = OmegaConf.to_object(OmegaConf.structured(RobotKinematicsConfig)) 595 | robot = RobotKinematics(cfg) 596 | # debug_cartesian_to_joint(robot) 597 | # debug_joint_to_cartesian(robot) 598 | debug_joint_to_cartesian_for_all_links(robot) 599 | code.interact(local=dict(globals(), **locals())) 600 | 601 | if __name__ == "__main__": 602 | debug() 603 | 604 | """ 605 | python -m env.utils.robot_kinematics 606 | """ -------------------------------------------------------------------------------- /env/utils/scene.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from numpy.typing import NDArray 4 | from typing import TypedDict, List, Optional 5 | from scipy.spatial.transform import Rotation as Rt 6 | 7 | def scene_id_to_hierarchical_dir(scene_id: int) -> str: 8 | scene_id_str = f"{scene_id:08d}" # "12345678" 9 | hierarchical_dir = os.path.join(*(scene_id_str[i:i+2] for i in range(0, len(scene_id_str)-2, 2))) # "12/34/56/" 10 | return hierarchical_dir 11 | 12 | def scene_id_to_dir(scene_id: int, demo_structure: str) -> str: 13 | if demo_structure == "hierarchical": 14 | scene_dir = scene_id_to_hierarchical_dir(scene_id) 15 | elif demo_structure == "flat": 16 | scene_dir = "" 17 | else: 18 | raise NotImplementedError 19 | return scene_dir 20 | 21 | env_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 22 | scene_root_dir = os.path.join(env_dir, "data", "scene") 23 | def scene_id_to_scene_dir(scene_id: int) -> str: 24 | scene_path = os.path.join(scene_root_dir, scene_id_to_hierarchical_dir(scene_id)) 25 | return scene_path 26 | 27 | class SceneData(TypedDict): 28 | hand_name: str 29 | hand_side: str 30 | hand_path: str 31 | hand_pose: NDArray[np.float32] 32 | object_names: List[str] 33 | object_paths: List[str] 34 | object_grasp_id: int 35 | object_poses: NDArray[np.float32] 36 | endpoints: Optional[NDArray[np.int64]] 37 | 38 | objects_dir = os.path.join(env_dir, "data", "assets", "objects") 39 | hand_dir = os.path.join(env_dir, "data", "assets", "hand") 40 | def load_scene_data(scene_id: int, table_height: float=0., stop_moving_frame: Optional[int]=None, frame_interval: int=1) -> SceneData: 41 | scene_dir = scene_id_to_scene_dir(scene_id) 42 | scene_data_path = os.path.join(scene_dir, f"{scene_id:08d}.npz") 43 | scene_data = dict(np.load(scene_data_path)) # "hand_name", "hand_side", "hand_path", "hand_pose", "object_names", "object_paths", "object_grasp_id", "object_poses", "endpoints", "source" 44 | source = scene_data["source"] 45 | scene_data["hand_name"] = scene_data["hand_name"].item() 46 | scene_data["hand_side"] = scene_data["hand_side"].item() 47 | scene_data["hand_path"] = os.path.join(hand_dir, scene_data["hand_path"].item()) 48 | if source == "dexycb": 49 | scene_data["hand_pose"] = scene_data["hand_pose"][::frame_interval] # (T, 51) 50 | elif source == "genh2r": 51 | hand_pose = scene_data["hand_pose"][::frame_interval] 52 | scene_data["hand_pose"] = np.concatenate([hand_pose, np.tile(scene_data["hand_theta"], (hand_pose.shape[0], 1))], axis=1) 53 | else: 54 | raise NotImplementedError 55 | scene_data["object_names"] = scene_data["object_names"].tolist() 56 | scene_data["object_paths"] = [os.path.join(objects_dir, object_path) for object_path in scene_data["object_paths"].tolist()] 57 | scene_data["object_poses"] = scene_data["object_poses"][:, ::frame_interval] # (#objects, T, 6) 58 | 59 | if stop_moving_frame is not None: 60 | scene_data["hand_pose"] = scene_data["hand_pose"][:stop_moving_frame] 61 | scene_data["object_poses"] = scene_data["object_poses"][:, :stop_moving_frame] 62 | if table_height != 0.: 63 | hand_nonzero_mask = np.any(scene_data["hand_pose"]!=0, axis=1) 64 | hand_nonzeros = np.where(hand_nonzero_mask)[0] 65 | hand_start_frame = hand_nonzeros[0] 66 | hand_end_frame = hand_nonzeros[-1]+1 67 | scene_data["hand_pose"][hand_start_frame:hand_end_frame, 2] += table_height 68 | scene_data["object_poses"][:, :, 2] += table_height 69 | 70 | if "endpoints" in scene_data: 71 | scene_data["endpoints"] //= frame_interval 72 | else: 73 | scene_data["endpoints"] = None 74 | 75 | return scene_data 76 | 77 | def six_d_to_mat(six_d: NDArray[np.float64]) -> NDArray[np.float64]: 78 | " (..., 6) " 79 | shape_prefix = six_d.shape[:-1] 80 | mat = np.zeros(shape_prefix+(4, 4)) 81 | mat[..., :3, 3] = six_d[..., :3] 82 | mat[..., :3, :3] = Rt.from_euler("XYZ", six_d[..., 3:].reshape(-1, 3)).as_matrix().reshape(shape_prefix+(3, 3)) 83 | mat[..., 3, 3] = 1 84 | return mat 85 | 86 | def mat_to_six_d(mat: NDArray[np.float64]) -> NDArray[np.float64]: 87 | " (..., 4, 4) " 88 | shape_prefix = mat.shape[:-2] 89 | return np.concatenate([mat[..., :3, 3], Rt.from_matrix(mat[..., :3, :3].reshape(-1, 3, 3)).as_euler("XYZ").reshape(shape_prefix + (3, ))], axis=-1) -------------------------------------------------------------------------------- /env/utils/sdf_loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.typing import NDArray 3 | import torch 4 | from typing import List, Tuple 5 | import code 6 | 7 | class SDFData: 8 | def __init__(self, sdf_path: str): 9 | sdf_data = np.load(sdf_path) 10 | self.min_coords: NDArray[np.float64] = sdf_data["min_coords"] 11 | self.delta: NDArray[np.float64] = sdf_data["delta"] 12 | self.sdf: NDArray[np.float64] = sdf_data["sdf"] 13 | 14 | class SDFDataTensor: 15 | def __init__(self, sdf_data: SDFData, device: torch.device=torch.device("cpu")): 16 | self.min_coords: torch.DoubleTensor = torch.tensor(sdf_data.min_coords, device=device) 17 | self.delta: torch.DoubleTensor = torch.tensor(sdf_data.delta, device=device) 18 | self.sdf: torch.DoubleTensor = torch.tensor(sdf_data.sdf, device=device) 19 | 20 | def se3_transform_pc_tensor(T: torch.DoubleTensor, points: torch.DoubleTensor) -> torch.DoubleTensor: 21 | " T: (4, 4) points: (..., 3) " 22 | return points@T[:3, :3].transpose(0, 1)+T[:3, 3] 23 | 24 | def interpolate(data: torch.DoubleTensor, coords_0: torch.LongTensor, coords_frac: torch.DoubleTensor) -> torch.DoubleTensor: 25 | """ 26 | Input: 27 | data: (nx, ny, nz) 28 | coords: (N, 3) within data range 29 | coords_frac: (N, 3) 30 | Return: 31 | value: (N, ) 32 | """ 33 | x0, y0, z0 = coords_0.unbind(dim=1) # (N,), (N,), (N,) 34 | x1, y1, z1 = x0+1, y0+1, z0+1 # (N,), (N,), (N,) 35 | xc, yc, zc = coords_frac.unbind(dim=1) # (N,), (N,), (N,) 36 | value_00 = data[x0, y0, z0]*(1-zc)+data[x0, y0, z1]*zc # (N,) 37 | value_01 = data[x0, y1, z0]*(1-zc)+data[x0, y1, z1]*zc # (N,) 38 | value_10 = data[x1, y0, z0]*(1-zc)+data[x1, y0, z1]*zc # (N,) 39 | value_11 = data[x1, y1, z0]*(1-zc)+data[x1, y1, z1]*zc # (N,) 40 | value_0 = value_00*(1-yc)+value_01*yc # (N,) 41 | value_1 = value_10*(1-yc)+value_11*yc # (N,) 42 | value = value_0*(1-xc)+value_1*xc # (N,) 43 | return value 44 | 45 | def compute_distance(points: torch.DoubleTensor, sdf_data: SDFDataTensor, body_to_base: torch.DoubleTensor) -> Tuple[torch.BoolTensor, torch.DoubleTensor]: 46 | """ 47 | Input: 48 | point: (N, 3) 49 | sdf_data 50 | body_to_base: (4, 4) 51 | Return: 52 | inside_bbox_mask: (N,) 53 | inside_bbox_value: (M,) 54 | """ 55 | body_to_points = se3_transform_pc_tensor(body_to_base, points) # (N, 3) 56 | point_coords: torch.DoubleTensor = (body_to_points-sdf_data.min_coords)/sdf_data.delta # (N, 3) 57 | point_coords_0: torch.LongTensor = point_coords.floor().long() # (N, 3) 58 | point_coords_fraction: torch.DoubleTensor = point_coords-point_coords_0 # (N, 3) 59 | nx, ny, nz = sdf_data.sdf.shape 60 | x0, y0, z0 = point_coords_0.unbind(dim=1) # (N,) 61 | 62 | inside_bbox_mask = (x0>=0)&(y0>=0)&(z0>=0)&(x0 torch.DoubleTensor: 67 | " points: (N, 3) " 68 | points = points.detach() 69 | points.requires_grad = True 70 | device = points.device 71 | loss: torch.DoubleTensor = torch.zeros(points.shape[0], dtype=torch.float64, device=device) 72 | grad: torch.DoubleTensor = torch.zeros_like(points) 73 | for sdf_data, body_to_base in zip(sdf_data_list, body_to_base_list): 74 | inside_bbox_mask, inside_bbox_value = compute_distance(points, sdf_data, body_to_base) # (N,), (M,) 75 | inside_object_loss = torch.zeros(inside_bbox_value.shape[0], dtype=torch.float64, device=device) # (M,) 76 | inside_object_mask = inside_bbox_value<=0 # (M,) 77 | inside_object_loss[inside_object_mask] = -inside_bbox_value[inside_object_mask]+0.5*epsilon 78 | within_sdf_epsilon_mask = (inside_bbox_value<=epsilon)&(~inside_object_mask) # (M,) 79 | inside_object_loss[within_sdf_epsilon_mask] = 1/(2*epsilon)*(inside_bbox_value[within_sdf_epsilon_mask]-epsilon)**2 80 | loss[inside_bbox_mask] = loss[inside_bbox_mask]+inside_object_loss 81 | # loss = loss+(-inside_bbox_value[inside_object_mask]+0.5*epsilon).sum() 82 | # loss = loss+(1/(2*epsilon)*(inside_bbox_value[within_sdf_epsilon_mask]-epsilon)**2).sum() 83 | grad = torch.autograd.grad(loss.sum(), points)[0] 84 | return loss.detach(), grad 85 | 86 | def visualize(points, vectors): 87 | import matplotlib.pyplot as plt 88 | 89 | # Plotting 90 | fig = plt.figure() 91 | ax = fig.add_subplot(111, projection='3d') 92 | 93 | # Plot 3D points 94 | ax.scatter(points[:, 0], points[:, 1], points[:, 2], c='r', marker='o') 95 | 96 | # Plot vectors starting from the points 97 | for i, (point, vector) in enumerate(zip(points, vectors)): 98 | ax.quiver(point[0], point[1], point[2], vector[0], vector[1], vector[2], color='b') 99 | 100 | # Set labels and limits 101 | ax.set_xlabel('X') 102 | ax.set_ylabel('Y') 103 | ax.set_zlabel('Z') 104 | # ax.set_xlim(-1, 1) 105 | # ax.set_ylim(-1, 1) 106 | # ax.set_zlim(-1, 1) 107 | 108 | plt.show() 109 | 110 | def debug(): 111 | import time 112 | # torch.manual_seed(0) 113 | # points = torch.randn(100, 3, dtype=torch.float64)*0.5 114 | # points.requires_grad = True 115 | # points = torch.tensor([ 116 | # [0.1540, 0.0932, -0.2383], 117 | # # [-0.1441, -0.0051, -0.0321], 118 | # # [-0.0338, -0.0653, -0.1720], 119 | # # [-0.0500, 0.0788, -0.2203], 120 | # # [ 0.0766, -0.1635, 0.0624], 121 | # # [ 0.1436, -0.0409, 0.0021], 122 | # # [-0.1285, -0.1161, 0.0298], 123 | # # [-0.0441, 0.0190, -0.2006] 124 | # ], dtype=torch.float64, requires_grad=True) 125 | # sdf_data = SDFData("tmp/debug_sdf/box/sdf.npz") 126 | device = torch.device("cuda") 127 | sdf_data = SDFData("tmp/debug_sdf/003_cracker_box/sdf.npz") 128 | sdf_data_tensor = SDFDataTensor(sdf_data, device=device) 129 | min_coords = sdf_data_tensor.min_coords 130 | max_coords = sdf_data_tensor.min_coords+sdf_data_tensor.delta*torch.tensor(sdf_data_tensor.sdf.shape, device=device) 131 | side_length = 10 132 | points_x, points_y, points_z = torch.meshgrid( 133 | torch.linspace(min_coords[0], max_coords[0], side_length, dtype=torch.float64, device=device), 134 | torch.linspace(min_coords[1], max_coords[1], side_length, dtype=torch.float64, device=device), 135 | torch.linspace(min_coords[2], max_coords[2], side_length, dtype=torch.float64, device=device) 136 | ) 137 | points = torch.stack([points_x, points_y, points_z], dim=3).reshape(-1, 3) 138 | points.requires_grad = True 139 | 140 | body_to_base = torch.eye(4, dtype=torch.float64, device=device) 141 | 142 | torch.cuda.synchronize() 143 | start_time = time.time() 144 | loss, grad = compute_sdf_loss(points, [sdf_data_tensor], [body_to_base]) 145 | torch.cuda.synchronize() 146 | end_time = time.time() 147 | print(f"compute sdf loss takes {end_time-start_time} seconds") 148 | code.interact(local=dict(globals(), **locals())) 149 | visualize(points.cpu().data, grad.cpu().data/10) 150 | 151 | if __name__ == "__main__": 152 | debug() 153 | 154 | """ 155 | DISPLAY="localhost:11.0" CUDA_VISIBLE_DEVICES=3 python -m env.utils.sdf_loss 156 | """ -------------------------------------------------------------------------------- /env/utils/transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.typing import NDArray 3 | from typing import Tuple, Union, List 4 | from scipy.spatial.transform import Rotation as Rt 5 | 6 | def se3_transform_pc(T: NDArray[np.float64], points: NDArray[np.float64]) -> NDArray[np.float64]: 7 | " T: (4, 4) points: (..., 3) " 8 | return points@T[:3, :3].transpose()+T[:3, 3] 9 | 10 | def pos_ros_quat_to_mat(pos: Union[NDArray[np.float64], List[float]], ros_quat: Union[NDArray[np.float64], List[float]]) -> NDArray[np.float64]: 11 | " pos: (..., 3) ros_quat: (..., 4) xyzw " 12 | if not isinstance(pos, np.ndarray): 13 | pos = np.array(pos, dtype=np.float64) 14 | if not isinstance(ros_quat, np.ndarray): 15 | ros_quat = np.array(ros_quat, dtype=np.float64) 16 | mat = np.tile(np.eye(4), pos.shape[:-1]+(1, 1)) 17 | mat[..., :3, :3] = Rt.from_quat(ros_quat.reshape(-1, 4)).as_matrix().reshape(ros_quat.shape[:-1]+(3, 3)) 18 | mat[..., :3, 3] = pos 19 | return mat 20 | 21 | def mat_to_pos_ros_quat(mat): 22 | pos = mat[:3, 3].copy() 23 | quat = Rt.from_matrix(mat[:3, :3]).as_quat() 24 | return pos, quat 25 | 26 | def pos_euler_to_mat(pos, euler): 27 | mat = np.eye(4) 28 | mat[:3, 3] = pos 29 | mat[:3, :3] = Rt.from_euler("xyz", euler).as_matrix() # extrinsic euler. equivalent to transforms3d.euler.euler2mat 30 | return mat 31 | 32 | def mat_to_pos_euler(mat: NDArray[np.float64]) -> Tuple[NDArray[np.float64], NDArray[np.float64]]: 33 | pos = mat[:3, 3].copy() 34 | euler = Rt.from_matrix(mat[:3, :3]).as_euler("xyz") 35 | return pos, euler 36 | 37 | def se3_inverse(mat: NDArray[np.float64]) -> NDArray[np.float64]: 38 | R = mat[:3, :3] 39 | mat_inv = np.eye(4, dtype=mat.dtype) 40 | mat_inv[:3, :3] = R.transpose() 41 | mat_inv[:3, 3] = -R.transpose()@mat[:3, 3] 42 | return mat_inv 43 | 44 | def to_ros_quat(tf_quat): # wxyz -> xyzw 45 | quat = np.zeros(4) 46 | quat[-1] = tf_quat[0] 47 | quat[:-1] = tf_quat[1:] 48 | return quat 49 | 50 | def to_tf_quat(ros_quat): # xyzw -> wxyz 51 | quat = np.zeros(4) 52 | quat[0] = ros_quat[-1] 53 | quat[1:] = ros_quat[:-1] 54 | return quat 55 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ray 3 | import numpy as np 4 | from numpy.typing import NDArray 5 | from omegaconf import OmegaConf 6 | import time 7 | import code 8 | 9 | from env.benchmark import get_scene_ids 10 | from env.status_checker import EpisodeStatus 11 | from evaluate_config import EvaluateConfig 12 | from policy_runner import result_dtype, Distributer, PolicyRunner, get_policy_runner_remote 13 | 14 | def print_results(cfg: EvaluateConfig, results: NDArray[result_dtype]): 15 | num_scenes = results.shape[0] 16 | success_cnt = (results["status"] == EpisodeStatus.SUCCESS).sum() 17 | contact_cnt = (results["status"] == EpisodeStatus.FAILURE_HUMAN_CONTACT).sum() 18 | drop_cnt = (results["status"] == EpisodeStatus.FAILURE_OBJECT_DROP).sum() 19 | timeout_cnt = (results["status"] == EpisodeStatus.FAILURE_TIMEOUT).sum() 20 | print(f"success rate: {success_cnt}/{num_scenes}={success_cnt/num_scenes}") 21 | print(f"contact rate: {contact_cnt}/{num_scenes}={contact_cnt/num_scenes}") 22 | print(f" drop rate: {drop_cnt}/{num_scenes}={drop_cnt/num_scenes}") 23 | print(f"timeout rate: {timeout_cnt}/{num_scenes}={timeout_cnt/num_scenes}") 24 | success_mask = results["status"] == EpisodeStatus.SUCCESS 25 | # average_done_frame = results["done_frame"].mean() 26 | average_success_done_frame = results["done_frame"][success_mask].mean() 27 | average_success_reached_frame = results["reached_frame"][success_mask].mean() 28 | average_success_num_steps = results["num_steps"][success_mask].mean() 29 | average_success = ((cfg.env.max_frames-results["done_frame"][success_mask]+1)/cfg.env.max_frames).sum()/num_scenes 30 | # print(f"average done frame : {average_done_frame}") 31 | print(f"average success done frame : {average_success_done_frame}") 32 | print(f"average success reached frame: {average_success_reached_frame}") 33 | print(f"average success num steps : {average_success_num_steps}") 34 | print(f"average success : {average_success}") 35 | if cfg.print_failure_ids: 36 | print(f"contact indices: {sorted(results['scene_id'][results['status'] == EpisodeStatus.FAILURE_HUMAN_CONTACT])}") 37 | print(f" drop indices: {sorted(results['scene_id'][results['status'] == EpisodeStatus.FAILURE_OBJECT_DROP])}") 38 | print(f"timeout indices: {sorted(results['scene_id'][results['status'] == EpisodeStatus.FAILURE_TIMEOUT])}") 39 | 40 | def evaluate(cfg: EvaluateConfig): 41 | start_time = time.time() 42 | if cfg.scene_ids is None: 43 | scene_ids = get_scene_ids(cfg.setup, cfg.split, cfg.start_object_idx, cfg.end_object_idx, cfg.start_traj_idx, cfg.end_traj_idx) 44 | else: 45 | scene_ids = cfg.scene_ids 46 | 47 | ray.init() 48 | distributer = Distributer.remote(scene_ids) 49 | if not cfg.use_ray: 50 | policy_runner = PolicyRunner(cfg, distributer) 51 | results = policy_runner.work() 52 | else: 53 | num_gpus = len(os.environ["CUDA_VISIBLE_DEVICES"].split(",")) 54 | num_runners_per_gpu = (cfg.num_runners-1)//num_gpus+1 55 | PolicyRunnerRemote = get_policy_runner_remote(1/num_runners_per_gpu) 56 | policy_runners = [PolicyRunnerRemote.remote(cfg, distributer) for _ in range(cfg.num_runners)] 57 | results = ray.get([policy_runner.work.remote() for policy_runner in policy_runners]) 58 | results: NDArray[result_dtype] = np.concatenate(results) 59 | results = np.sort(results, order="scene_id") 60 | print_results(cfg, results) 61 | if cfg.demo_dir is not None: 62 | # results.tofile(os.path.join(cfg.demo_dir, "results.bin")) 63 | if cfg.start_object_idx is not None or cfg.end_object_idx is not None or cfg.start_traj_idx is not None or cfg.end_traj_idx is not None: 64 | np.save(os.path.join(cfg.demo_dir, f"results_{cfg.start_object_idx}_{cfg.end_object_idx}_{cfg.start_traj_idx}_{cfg.end_traj_idx}.npy"), results) 65 | else: 66 | np.save(os.path.join(cfg.demo_dir, f"results.npy"), results) 67 | print(f"evaluting uses {time.time()-start_time} seconds") 68 | # code.interact(local=dict(globals(), **locals())) 69 | ray.shutdown() 70 | 71 | def main(): 72 | default_cfg = OmegaConf.structured(EvaluateConfig) 73 | cli_cfg = OmegaConf.from_cli() 74 | cfg: EvaluateConfig = OmegaConf.to_object(OmegaConf.merge(default_cfg, cli_cfg)) 75 | if cfg.start_seed is not None and cfg.end_seed is not None and cfg.step_seed is not None: 76 | root_demo_dir = cfg.demo_dir 77 | for seed in range(cfg.start_seed, cfg.end_seed, cfg.step_seed): 78 | print(f"evaluating for seed {seed}") 79 | cfg.seed = seed 80 | cfg.demo_dir = os.path.join(root_demo_dir, str(seed)) 81 | evaluate(cfg) 82 | else: 83 | evaluate(cfg) 84 | 85 | if __name__ == "__main__": 86 | main() 87 | 88 | """ 89 | CUDA_VISIBLE_DEVICES=0 python -m evaluate use_ray=False env.panda.IK_solver=PyKDL setup=s0 split=train policy=offline offline.demo_dir=/data2/haoran/HRI/expert_demo/OMG/s0/know_dest_smooth_0.08 offline.demo_source=handoversim env.visualize=True env.verbose=True 90 | 91 | CUDA_VISIBLE_DEVICES=0,1,2,3 RAY_DEDUP_LOGS=0 python -m evaluate env.panda.IK_solver=PyKDL setup=s0 split=train policy=offline offline.demo_dir=/data2/haoran/HRI/expert_demo/OMG/s0/know_dest_smooth_0.08 offline.demo_source=handoversim 92 | success rate: 567/720=0.7875 93 | contact rate: 37/720=0.05138888888888889 94 | drop rate: 50/720=0.06944444444444445 95 | timeout rate: 66/720=0.09166666666666666 96 | average done frame : 6500.272222222222 97 | average success done frame: 6042.241622574956 98 | average success num steps : 44.16225749559083 99 | average success : 0.42154017094017093 100 | evaluting uses 266.8944182395935 seconds 101 | # original: success 546/720=0.7583333333333333 102 | 103 | python -m evaluate env.panda.IK_solver PyKDL evaluate.SCENE_IDS "[951, 996, 953, 917]" evaluate.use_ray False policy.name offline policy.offline.demo_dir /data2/haoran/HRI/expert_demo/OMG/s0/know_dest_smooth_0.08 policy.offline.demo_structure flat env.visualize True env.verbose True 104 | """ -------------------------------------------------------------------------------- /evaluate_config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from omegaconf import MISSING 3 | from typing import Optional, List 4 | 5 | from env.handover_env import GenH2RSimConfig 6 | from policies.offline_policy_config import OfflinePolicyConfig 7 | from policies.pointnet2_policy_config import PointNet2PolicyConfig 8 | 9 | policy_default_kwargs = { 10 | "init_joint": "${..env.panda.dof_default_position}", 11 | "step_time": "${..env.step_time}", 12 | "goal_center": "${..env.status_checker.goal_center}", 13 | } 14 | 15 | @dataclass 16 | class EvaluateConfig: 17 | use_ray: bool = True 18 | num_runners: int = 32 19 | setup: str = "s0" 20 | split: str = "test" 21 | scene_ids: Optional[List[int]] = None 22 | start_object_idx: Optional[int] = None 23 | end_object_idx: Optional[int] = None 24 | start_traj_idx: Optional[int] = None 25 | end_traj_idx: Optional[int] = None 26 | seed: int = 0 27 | 28 | # config for multiple seeds 29 | start_seed: Optional[int] = None 30 | end_seed: Optional[int] = None 31 | step_seed: Optional[int] = None 32 | 33 | policy: str = MISSING 34 | demo_dir: Optional[str] = None 35 | demo_structure: str = "hierarchical" # "flat" 36 | overwrite_demo: bool = False 37 | record_ego_video: bool = False 38 | record_third_person_video: bool = False 39 | dart: bool = False 40 | dart_min_step: int = 0 41 | dart_max_step: int = 30 # max is 30 42 | dart_ratio: float = 0.5 43 | print_failure_ids: bool = False 44 | save_state: bool = False 45 | show_target_grasp: bool = False 46 | verbose: bool = False 47 | 48 | env: GenH2RSimConfig = field(default_factory=GenH2RSimConfig) 49 | offline: OfflinePolicyConfig = field(default_factory=lambda: OfflinePolicyConfig(**policy_default_kwargs)) 50 | pointnet2: PointNet2PolicyConfig = field(default_factory=lambda: PointNet2PolicyConfig(**policy_default_kwargs)) 51 | -------------------------------------------------------------------------------- /models/encoders/pointnet2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from pointnet2_ops.pointnet2_modules import PointnetSAModule 4 | from dataclasses import dataclass 5 | from omegaconf import MISSING 6 | import code 7 | 8 | @dataclass 9 | class PointNet2EncoderConfig: 10 | in_channel: int = MISSING 11 | 12 | class PointNet2Encoder(nn.Module): 13 | def __init__(self, cfg: PointNet2EncoderConfig): 14 | super().__init__() 15 | self.cfg = cfg 16 | self.in_channel = cfg.in_channel 17 | 18 | self.SA_modules = nn.ModuleList() 19 | self.SA_modules.append( 20 | PointnetSAModule( 21 | npoint=32, 22 | radius=0.02, 23 | nsample=64, 24 | mlp=[self.in_channel, 64, 64, 128], 25 | ) 26 | ) 27 | self.SA_modules.append( 28 | PointnetSAModule( 29 | npoint=32, 30 | radius=0.04, 31 | nsample=128, 32 | mlp=[128, 128, 128, 256], 33 | ) 34 | ) 35 | self.SA_modules.append( 36 | PointnetSAModule( 37 | mlp=[256, 256, 256, 512] 38 | ) 39 | ) 40 | self.fc_layer = nn.Sequential( 41 | nn.Linear(512, 1024), 42 | nn.BatchNorm1d(1024), 43 | nn.ReLU(True), 44 | nn.Linear(1024, 512), 45 | nn.BatchNorm1d(512), 46 | nn.ReLU(True), 47 | ) 48 | 49 | def forward(self, pc): 50 | " pc: (B, N, 3+C) " 51 | xyz = pc[:, :, :3].contiguous() 52 | features = pc.transpose(1, 2).contiguous() if pc.size(-1) > 3 else None 53 | 54 | for module in self.SA_modules: 55 | xyz, features = module(xyz, features) 56 | 57 | return self.fc_layer(features.squeeze(-1)) 58 | 59 | def debug(): 60 | from omegaconf import OmegaConf 61 | default_cfg = OmegaConf.structured(PointNet2EncoderConfig) 62 | cli_cfg = OmegaConf.from_cli() 63 | cfg: PointNet2EncoderConfig = OmegaConf.to_object(OmegaConf.merge(default_cfg, cli_cfg)) 64 | encoder = PointNet2Encoder(cfg).cuda() 65 | pc = torch.randn(2, 1024, cfg.in_channel).cuda() 66 | feature = encoder(pc) 67 | code.interact(local=dict(globals(), **locals())) 68 | 69 | if __name__ == "__main__": 70 | debug() 71 | 72 | """ 73 | CUDA_VISIBLE_DEVICES=7 python -m models.encoders.pointnet2 in_channel=10 74 | """ -------------------------------------------------------------------------------- /models/loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | def euler_to_rotation_matrix(az, el, th, batched=False): 5 | if batched: 6 | cx = torch.cos(torch.reshape(az, [-1, 1])) 7 | cy = torch.cos(torch.reshape(el, [-1, 1])) 8 | cz = torch.cos(torch.reshape(th, [-1, 1])) 9 | sx = torch.sin(torch.reshape(az, [-1, 1])) 10 | sy = torch.sin(torch.reshape(el, [-1, 1])) 11 | sz = torch.sin(torch.reshape(th, [-1, 1])) 12 | 13 | ones = torch.ones_like(cx) 14 | zeros = torch.zeros_like(cx) 15 | 16 | rx = torch.cat([ones, zeros, zeros, zeros, cx, -sx, zeros, sx, cx], dim=-1) 17 | ry = torch.cat([cy, zeros, sy, zeros, ones, zeros, -sy, zeros, cy], dim=-1) 18 | rz = torch.cat([cz, -sz, zeros, sz, cz, zeros, zeros, zeros, ones], dim=-1) 19 | 20 | rx = torch.reshape(rx, [-1, 3, 3]) 21 | ry = torch.reshape(ry, [-1, 3, 3]) 22 | rz = torch.reshape(rz, [-1, 3, 3]) 23 | 24 | return torch.matmul(rz, torch.matmul(ry, rx)) 25 | else: 26 | cx = torch.cos(az) 27 | cy = torch.cos(el) 28 | cz = torch.cos(th) 29 | sx = torch.sin(az) 30 | sy = torch.sin(el) 31 | sz = torch.sin(th) 32 | 33 | rx = torch.stack([[1.0, 0.0, 0.0], [0, cx, -sx], [0, sx, cx]], dim=0) 34 | ry = torch.stack([[cy, 0, sy], [0, 1, 0], [-sy, 0, cy]], dim=0) 35 | rz = torch.stack([[cz, -sz, 0], [sz, cz, 0], [0, 0, 1]], dim=0) 36 | 37 | return torch.matmul(rz, torch.matmul(ry, rx)) 38 | 39 | def get_control_points(batch_size: int, device: torch.device, rotz=False) -> torch.FloatTensor: 40 | " Outputs a tensor of shape (batch_size x 6 x 3) " 41 | control_points = np.array([ 42 | [ 0. , 0. , 0. ], 43 | [ 0. , 0. , 0. ], 44 | [ 0.053, -0. , 0.075], 45 | [-0.053, 0. , 0.075], 46 | [ 0.053, -0. , 0.105], 47 | [-0.053, 0. , 0.105] 48 | ], dtype=np.float32) 49 | control_points = np.tile(np.expand_dims(control_points, 0), [batch_size, 1, 1]) 50 | 51 | if rotz: 52 | RotZ = np.array([ 53 | [0., -1., 0.], 54 | [1., 0., 0.], 55 | [0., 0., 1.], 56 | ]) 57 | control_points = np.matmul(control_points, RotZ) 58 | 59 | return torch.tensor(control_points, dtype=torch.float32, device=device) 60 | 61 | def rotate_vector_by_quaternion(q, v): 62 | """ 63 | Rotate vector(s) v about the rotation described by ros quaternion(s) q. 64 | Input: 65 | q: (..., 4) 66 | v: (..., 3) 67 | Return: 68 | (..., 3) 69 | """ 70 | qvec = q[..., :3] 71 | uv = torch.cross(qvec, v, dim=-1) # (..., 3) 72 | uuv = torch.cross(qvec, uv, dim=-1) # (..., 3) 73 | return v+2*(q[..., 3:]*uv+uuv) 74 | 75 | def pos_ros_quat_transform_points(pos_ros_quat: torch.FloatTensor, control_points: torch.FloatTensor): 76 | """ 77 | Transforms canonical points using pos and ros_quat. 78 | Input: 79 | pos_ros_quat: (B, 7) 80 | control_points: (B, N, 3) 81 | Return: 82 | transformed_control_points: (B, N, 3) 83 | """ 84 | pos = pos_ros_quat[:, :3] # (B, 3) 85 | ros_quat = pos_ros_quat[:, 3:] # (B, 4) 86 | rotated_control_points = rotate_vector_by_quaternion(ros_quat[:, None], control_points) # (B, N, 3) 87 | transformed_control_points = rotated_control_points+pos[:, None] # (B, N, 3) 88 | return transformed_control_points 89 | 90 | def control_points_from_cartesian_action(action: torch.FloatTensor): 91 | " action: (B, 6) " 92 | rot = euler_to_rotation_matrix(action[:, 3], action[:, 4], action[:, 5], batched=True) 93 | grasp_pc = get_control_points(action.shape[0], device=action.device) 94 | grasp_pc = torch.matmul(grasp_pc, rot.permute(0, 2, 1)) 95 | grasp_pc += action[:, :3].unsqueeze(1) 96 | return grasp_pc 97 | 98 | def compute_bc_loss(action: torch.FloatTensor, expert_action: torch.FloatTensor) -> torch.FloatTensor: 99 | """ 100 | PM loss for behavior clone 101 | action: (B, 6) 102 | expert_action: (B, 6) 103 | """ 104 | action_control_points = control_points_from_cartesian_action(action) 105 | expert_action_control_points = control_points_from_cartesian_action(expert_action) 106 | return torch.mean(torch.abs(action_control_points-expert_action_control_points).sum(-1)) 107 | 108 | def compute_goal_pred_loss(pred: torch.FloatTensor, target: torch.FloatTensor) -> torch.FloatTensor: 109 | """ 110 | PM loss for grasp pose detection 111 | Input: 112 | pred: (B, 7) 113 | target: (B, 7) 114 | """ 115 | control_points = get_control_points(pred.shape[0], device=pred.device, rotz=True) # (N, 3) 116 | pred_control_points = pos_ros_quat_transform_points(pred, control_points) # (B, N, 3) 117 | target_control_points = pos_ros_quat_transform_points(target, control_points) # (B, N, 3) 118 | return torch.mean(torch.abs(pred_control_points-target_control_points).sum(-1)) 119 | 120 | def debug(): 121 | import code 122 | torch.manual_seed(0) 123 | action = torch.randn(8, 6).cuda() 124 | pos = torch.randn(8, 3).cuda() 125 | rot = torch.randn(8, 4).cuda() 126 | rot = rot/rot.norm(dim=1, keepdim=True) 127 | print(f"action: {action.sum()}, pos: {pos.sum()}") 128 | 129 | bc_loss = compute_bc_loss(action[:4], action[4:]) 130 | goal_loss = compute_goal_pred_loss(torch.cat([pos[:4], rot[:4, 1:], rot[:4, :1]], dim=1), torch.cat([pos[4:], rot[4:, 1:], rot[4:, :1]], dim=1)) 131 | print(f"bc loss {bc_loss}, goal loss {goal_loss}") 132 | code.interact(local=dict(globals(), **locals())) 133 | 134 | if __name__ == "__main__": 135 | debug() 136 | 137 | """ 138 | conda activate genh2r 139 | cd /share1/haoran/HRI/GenH2R 140 | python -m models.loss 141 | """ -------------------------------------------------------------------------------- /models/policy_network.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch import nn 4 | import torch.nn.functional as F 5 | from dataclasses import dataclass, field 6 | from omegaconf import OmegaConf, MISSING 7 | from typing import Optional, Dict, Tuple 8 | import code 9 | 10 | from env.handover_env import CartesianActionSpace 11 | from .encoders.pointnet2 import PointNet2Encoder, PointNet2EncoderConfig 12 | from .utils import get_submodule_weights 13 | from .loss import compute_bc_loss, compute_goal_pred_loss 14 | 15 | def weights_init_(m): 16 | if isinstance(m, nn.Linear): 17 | torch.nn.init.xavier_uniform_(m.weight, gain=1) 18 | torch.nn.init.constant_(m.bias, 0) 19 | 20 | @dataclass 21 | class PolicyNetworkConfig: 22 | num_points: int = 1024 23 | pretrained_dir: Optional[str] = None 24 | pretrained_suffix: str = "latest" 25 | pretrained_source: str = "genh2r" # "handoversim" 26 | in_channel: int = MISSING 27 | state_dim: int = 512 28 | hidden_dim: int = 256 29 | goal_pred: bool = True 30 | obj_pose_pred_frame_num: int = 0 31 | obj_pose_pred_coff: float = 0.0 32 | 33 | encoder: PointNet2EncoderConfig = field(default_factory=lambda: PointNet2EncoderConfig( 34 | in_channel="${..in_channel}", 35 | )) 36 | 37 | class PolicyNetwork(nn.Module): 38 | def __init__(self, cfg: PolicyNetworkConfig): 39 | super().__init__() 40 | self.cfg = cfg 41 | num_actions = 6 42 | self.goal_pred_dim = 7 if cfg.goal_pred else 0 43 | self.obj_pose_pred_dim = 6*cfg.obj_pose_pred_frame_num 44 | 45 | self.encoder = PointNet2Encoder(cfg.encoder) 46 | 47 | self.linear1 = nn.Linear(cfg.state_dim, cfg.hidden_dim) 48 | self.linear2 = nn.Linear(cfg.hidden_dim, cfg.hidden_dim) 49 | 50 | self.action_head = nn.Linear(cfg.hidden_dim, num_actions) 51 | 52 | self.goal_pred_head = nn.Linear(cfg.hidden_dim, self.goal_pred_dim) 53 | self.obj_pose_pred_head = nn.Linear(cfg.hidden_dim, self.obj_pose_pred_dim) 54 | 55 | action_space = CartesianActionSpace() 56 | self.action_scale = nn.Parameter(torch.tensor((action_space.high-action_space.low)/2.0, dtype=torch.float32), requires_grad=False) 57 | self.action_bias = nn.Parameter(torch.tensor((action_space.high+action_space.low)/2.0, dtype=torch.float32), requires_grad=False) 58 | 59 | self.init_model() 60 | 61 | def init_model(self): 62 | if self.cfg.pretrained_dir is None: 63 | self.apply(weights_init_) 64 | return 65 | 66 | if self.cfg.pretrained_source == "genh2r": 67 | checkpoint = torch.load(os.path.join(self.cfg.pretrained_dir, f"{self.cfg.pretrained_suffix}.pth")) 68 | self.load_state_dict(checkpoint["state_dict"]) 69 | elif self.cfg.pretrained_source == "handoversim": 70 | actor_weight = torch.load(os.path.join(self.cfg.pretrained_dir, f"BC_actor_PandaYCBEnv_{self.cfg.pretrained_suffix}"))["net"] 71 | encoder_weight = torch.load(os.path.join(self.cfg.pretrained_dir, f"BC_state_feat_PandaYCBEnv_{self.cfg.pretrained_suffix}"))["net"] 72 | self.encoder.SA_modules[0].load_state_dict(get_submodule_weights(encoder_weight, "module.encoder.0.0.")) 73 | self.encoder.SA_modules[1].load_state_dict(get_submodule_weights(encoder_weight, "module.encoder.0.1.")) 74 | self.encoder.SA_modules[2].load_state_dict(get_submodule_weights(encoder_weight, "module.encoder.0.2.")) 75 | self.encoder.fc_layer.load_state_dict(get_submodule_weights(encoder_weight, "module.encoder.1.")) 76 | self.linear1.load_state_dict(get_submodule_weights(actor_weight, "linear1.")) 77 | self.linear2.load_state_dict(get_submodule_weights(actor_weight, "linear2.")) 78 | self.action_head.load_state_dict(get_submodule_weights(actor_weight, "mean.")) 79 | self.goal_pred_head.load_state_dict(get_submodule_weights(actor_weight, "extra_pred.")) 80 | assert (self.obj_pose_pred_dim > 0) == any(key.startswith("extra_pred_obj") for key in actor_weight.keys()) 81 | if self.obj_pose_pred_dim > 0: 82 | self.obj_pose_pred_head.load_state_dict(get_submodule_weights(actor_weight, "extra_pred_obj.")) 83 | else: 84 | raise NotImplementedError 85 | 86 | def forward(self, pc): 87 | state = self.encoder(pc) 88 | x = F.relu(self.linear1(state)) 89 | x = F.relu(self.linear2(x)) 90 | action = self.action_head(x) 91 | action = torch.tanh(action)*self.action_scale+self.action_bias 92 | goal_pred = self.goal_pred_head(x) 93 | if self.cfg.goal_pred: # pos+quat 94 | goal_pred = torch.cat([goal_pred[:, :3], F.normalize(goal_pred[:, 3:], p=2, dim=-1)], dim=-1) 95 | obj_pose_pred = self.obj_pose_pred_head(x) 96 | return action, goal_pred, obj_pose_pred 97 | 98 | def compute_loss(self, batch_data: dict) -> Tuple[torch.FloatTensor, Dict[str, torch.FloatTensor]]: 99 | point_cloud, expert_action = batch_data["point_clouds"], batch_data["expert_actions"] 100 | gt_grasp_pose, gt_object_pred_pose = batch_data["grasp_poses"], batch_data["object_pred_poses"] 101 | action, goal_pred, obj_pose_pred = self.forward(point_cloud) 102 | 103 | # print("point_cloud", point_cloud.shape) #(B, 1024, 5 + flow_frame * 3) 104 | # print("expert_action", expert_action.shape, "action", action.shape) #(B, 6) 105 | # print("gt_grasp_pose", gt_grasp_pose.shape, "goal_pred", goal_pred.shape) # (B, 7) 106 | # print("gt_object_pred_pose", gt_object_pred_pose.shape, "obj_pose_pred", obj_pose_pred.shape) # (B, 6 * pred_frame) 107 | 108 | loss = 0 109 | loss_dict: Dict[str, torch.FloatTensor] = {} 110 | bc_loss = compute_bc_loss(action, expert_action) 111 | loss = loss+bc_loss 112 | loss_dict["bc_loss"] = bc_loss.item() 113 | 114 | if self.cfg.goal_pred: 115 | goal_pred_loss = compute_goal_pred_loss(goal_pred, gt_grasp_pose) 116 | loss = loss+goal_pred_loss 117 | loss_dict["goal_pred_loss"] = goal_pred_loss.item() 118 | 119 | if self.cfg.obj_pose_pred_frame_num > 0: 120 | obj_pose_pred_loss = self.cfg.obj_pose_pred_coff*(obj_pose_pred.view(-1)-gt_object_pred_pose.view(-1)).abs().mean() 121 | loss = loss+obj_pose_pred_loss 122 | loss_dict["obj_pose_pred_loss"] = obj_pose_pred_loss.item() 123 | 124 | return loss, loss_dict 125 | 126 | def debug(): 127 | from omegaconf import OmegaConf 128 | default_cfg = OmegaConf.structured(PolicyNetworkConfig) 129 | cli_cfg = OmegaConf.from_cli() 130 | cfg: PolicyNetworkConfig = OmegaConf.to_object(OmegaConf.merge(default_cfg, cli_cfg)) 131 | policy_network = PolicyNetwork(cfg).cuda() 132 | state = torch.load("/share1/haoran/HRI/generalizable_handover/state.pkl") 133 | x = F.relu(policy_network.linear1(state)) 134 | x = F.relu(policy_network.linear2(x)) 135 | action = policy_network.action_head(x) 136 | action = torch.tanh(action)*policy_network.action_scale+policy_network.action_bias 137 | # pc = torch.randn(2, 1024, 17).cuda() 138 | code.interact(local=dict(globals(), **locals())) 139 | 140 | if __name__ == "__main__": 141 | debug() 142 | 143 | """ 144 | conda activate gaddpg 145 | cd /share1/haoran/HRI/generalizable_handover 146 | 147 | OMG_PLANNER_DIR=/share/haoran/HRI/OMG-Planner PYTHONUNBUFFERED=True CUDA_VISIBLE_DEVICES=3 python -m core_multi_frame.train_online_handover TRAIN_DDPG.test True BENCHMARK.SETUP s0 TRAIN_DDPG.num_remotes 1 TRAIN_DDPG.pretrained output/t450_bc_omg_replan5_landmark_smooth0.08_use_hand_flow0_pred0_wd0.0001_pred0.5_300w_13s_no_accum_3 SIM.RENDER False TEST.split test TRAIN_DDPG.occupy False TRAIN_DDPG.debug True 148 | 149 | CUDA_VISIBLE_DEVICES=7 python -m models.policy_network pretrained_dir=/share1/haoran/HRI/generalizable_handover/output/t450_bc_omg_replan5_landmark_smooth0.08_use_hand_flow0_pred0_wd0.0001_pred0.5_300w_13s_no_accum_3 pretrained_source=handoversim in_channel=5 150 | >>> action 151 | tensor([[ 0.0194, -0.0011, 0.0022, -0.0022, -0.0011, -0.0042]], 152 | device='cuda:0', grad_fn=) 153 | """ -------------------------------------------------------------------------------- /models/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import collections 3 | 4 | def get_submodule_weights(weights: collections.OrderedDict, prefix: str): 5 | submodule_weights = collections.OrderedDict() 6 | len_prefix = len(prefix) 7 | for key, weight in weights.items(): 8 | if key.startswith(prefix): 9 | submodule_weights[key[len_prefix:]] = weight 10 | return submodule_weights 11 | -------------------------------------------------------------------------------- /policies/base_policy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import code 3 | 4 | from env.handover_env import Observation 5 | from .base_policy_config import BasePolicyConfig 6 | 7 | class BasePolicy: 8 | def __init__(self, cfg: BasePolicyConfig): 9 | self.cfg = cfg 10 | self.init_joint = np.array(cfg.init_joint) 11 | self.wait_steps = int(cfg.wait_time/cfg.step_time) 12 | self.action_repeat_steps = int(cfg.action_repeat_time/cfg.step_time) 13 | self.close_gripper_steps = int(cfg.close_gripper_time/cfg.step_time) 14 | self.np_random = np.random.RandomState() 15 | 16 | def base_reset(self): 17 | self.np_random.seed(self.cfg.seed) 18 | self.init = False 19 | self.reached = False 20 | self.reached_frame = -1 21 | self.grasped = False 22 | self.retreat_step = 0 23 | 24 | def plan(self, observation: Observation): 25 | pass 26 | # return action, action_type, reached, info 27 | 28 | def run_policy(self, observation: Observation): 29 | if not self.init: 30 | self.init = True 31 | if self.wait_steps > 0: 32 | # Wait 33 | action, action_type, repeat, stage, info = self.init_joint, "joint", self.wait_steps, "wait", {} 34 | return action, action_type, repeat, stage, info 35 | 36 | if not self.reached: 37 | action, action_type, reached, info = self.plan(observation) 38 | if not reached: 39 | # Approach object until reaching grasp pose. 40 | repeat = self.action_repeat_steps 41 | stage = "reach" 42 | return action, action_type, repeat, stage, info 43 | else: 44 | self.reached = True 45 | self.reached_frame = observation.frame 46 | 47 | if not self.grasped: 48 | # Close gripper 49 | self.grasped = True 50 | action, action_type, repeat, stage, info = np.zeros(7), "ego_cartesian", self.close_gripper_steps, "grasp", {} 51 | return action, action_type, repeat, stage, info 52 | 53 | # Retreat 54 | if self.retreat_step < self.cfg.retreat_steps: 55 | self.retreat_step += 1 56 | action = np.array([0., 0., -self.cfg.retreat_step_size, 0., 0., 0., 0.]) 57 | action_type, repeat, stage, info = "ego_cartesian", self.action_repeat_steps, "retreat", {} 58 | return action, action_type, repeat, stage, info 59 | 60 | # Retrieve 61 | pos = observation.world_to_ee[:3, 3] 62 | pos_displacement = self.cfg.goal_center-pos 63 | if np.linalg.norm(pos_displacement) <= self.cfg.retrive_step_size: 64 | action_pos = pos_displacement 65 | else: 66 | action_pos = pos_displacement/np.linalg.norm(pos_displacement)*self.cfg.retrive_step_size 67 | action, action_type, repeat, stage, info = np.append(action_pos, 0), "world_pos", self.action_repeat_steps, "retrieve", {} 68 | return action, action_type, repeat, stage, info 69 | 70 | class DebugPolicy(BasePolicy): 71 | def __init__(self, cfg: BasePolicyConfig): 72 | super().__init__(cfg) 73 | 74 | def reset(self, traj_data_path): 75 | self.base_reset() 76 | self.traj_data = np.load(traj_data_path) 77 | self.num_steps = self.traj_data["num_steps"] 78 | self.step = 0 79 | 80 | def plan(self, observation: Observation): 81 | if self.step == self.num_steps: 82 | action, action_type, reached, info = None, None, True, {} 83 | else: 84 | action = np.append(self.traj_data["expert_action"][self.step], 0.04) 85 | action_type, reached, info = "ego_cartesian", False, {} 86 | self.step += 1 87 | return action, action_type, reached, info 88 | 89 | def debug(): 90 | from env.handover_env import GenH2RSim, GenH2RSimConfig 91 | from omegaconf import OmegaConf 92 | from dataclasses import dataclass, field 93 | import ipdb 94 | 95 | @dataclass 96 | class EvalConfig: 97 | env: GenH2RSimConfig = field(default_factory=GenH2RSimConfig) 98 | policy: BasePolicyConfig = field(default_factory=lambda: BasePolicyConfig(name="debug", init_joint="${..env.panda.dof_default_position}", step_time="${..env.step_time}", goal_center="${..env.status_checker.goal_center}")) 99 | 100 | eval_base_cfg = OmegaConf.structured(EvalConfig) 101 | cli_cfg = OmegaConf.from_cli() 102 | eval_cfg = OmegaConf.to_object(OmegaConf.merge(eval_base_cfg, cli_cfg)) 103 | env = GenH2RSim(eval_cfg.env) 104 | policy = DebugPolicy(eval_cfg.policy) 105 | 106 | while True: 107 | scene_id = int(input("scene_id:")) 108 | env.reset(scene_id) 109 | policy.reset(f"/data2/haoran/HRI/expert_demo/OMG/s0/know_dest_smooth_0.08/{scene_id}.npz") 110 | while True: 111 | observation = env.get_observation() 112 | action, action_type, repeat, stage, info = policy.run_policy(observation) 113 | if action_type == "joint": 114 | reward, done, info = env.joint_step(action, repeat) 115 | elif action_type == "ego_cartesian": 116 | reward, done, info = env.ego_cartesian_step(action, repeat) 117 | elif action_type == "world_pos": 118 | reward, done, info = env.world_pos_step(action, repeat) 119 | print(f"reward {reward}, done {done}, info {info}") 120 | if done: 121 | break 122 | 123 | if __name__ == "__main__": 124 | debug() 125 | 126 | """ 127 | python -m policies.base_policy env.verbose=True env.panda.IK_solver=PyKDL env.visualize=True 128 | scene_id: 10 129 | frame 7052, status 1, reward 1.0, done True 130 | scene_id: 12 131 | frame 7018, status 1, reward 1.0, done True 132 | """ -------------------------------------------------------------------------------- /policies/base_policy_config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Tuple 3 | from omegaconf import MISSING 4 | 5 | @dataclass 6 | class BasePolicyConfig: 7 | name: str = MISSING 8 | seed: int = 0 9 | init_joint: Tuple[float] = MISSING 10 | step_time: float = MISSING 11 | wait_time: float = 0. 12 | action_repeat_time: float = 0.13 13 | close_gripper_time: float = 0.5 14 | retreat_step_size: float = 0.03 15 | retrive_step_size: float = 0.03 16 | goal_center: Tuple[float] = MISSING 17 | retreat_steps: int = 0 18 | verbose: bool = False 19 | 20 | """ 21 | It is nice to have all configs in the same file with objects. But in some cases, only the configs are wanted to be imported. 22 | """ 23 | -------------------------------------------------------------------------------- /policies/offline_policy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.typing import NDArray 3 | from typing import Optional, Tuple 4 | import os 5 | import open3d as o3d 6 | 7 | from env.handover_env import Observation 8 | from env.utils.scene import scene_id_to_dir 9 | from .base_policy import BasePolicy 10 | from .offline_policy_config import OfflinePolicyConfig 11 | 12 | def get_o3d_pcd(points: NDArray[np.float64], color: Optional[Tuple[float]]=None): 13 | pcd = o3d.geometry.PointCloud() 14 | pcd.points = o3d.utility.Vector3dVector(points) 15 | if color is not None: 16 | pcd.paint_uniform_color(color) 17 | return pcd 18 | 19 | def scene_id_to_demo_path(scene_id: int, demo_structure: str) -> str: 20 | scene_dir = scene_id_to_dir(scene_id, demo_structure) 21 | scene_path = os.path.join(scene_dir, f"{scene_id:08d}.npz") 22 | return scene_path 23 | 24 | class OfflinePolicy(BasePolicy): 25 | def __init__(self, cfg: OfflinePolicyConfig): 26 | super().__init__(cfg) 27 | self.cfg: OfflinePolicyConfig 28 | self.data_dir = self.cfg.demo_dir 29 | assert self.data_dir is not None 30 | self.data_structure = self.cfg.demo_structure 31 | assert self.data_structure in ["flat", "hierarchical"] 32 | self.data_source = self.cfg.demo_source 33 | 34 | def reset(self, scene_id: int): 35 | self.base_reset() 36 | if self.data_source == "genh2r": 37 | self.data_path = os.path.join(self.data_dir, scene_id_to_demo_path(scene_id, self.data_structure)) 38 | elif self.data_source == "handoversim": 39 | self.data_path = os.path.join(self.data_dir, f"{scene_id}.npz") 40 | else: 41 | raise NotImplementedError 42 | self.traj_data = np.load(self.data_path) 43 | self.num_steps = self.traj_data["num_steps"] 44 | if self.data_source == "genh2r": 45 | self.actions = self.traj_data["action"] 46 | self.world_to_target_grasps = self.traj_data["world_to_target_grasp"] 47 | elif self.data_source == "handoversim": 48 | self.actions = self.traj_data["expert_action"] 49 | else: 50 | raise NotImplementedError 51 | self.step = 0 52 | 53 | def plan(self, observation: Observation): 54 | if self.step == self.num_steps: 55 | action, action_type, reached, info = None, None, True, {} 56 | else: 57 | action = np.append(self.actions[self.step], 0.04) 58 | action_type, reached, info = "ego_cartesian", False, {"world_to_target_grasp": self.world_to_target_grasps[self.step]} 59 | if self.cfg.check_input_pcd: 60 | if f"object_points_{self.step}" in self.traj_data: 61 | object_points = self.traj_data[f"object_points_{self.step}"] 62 | else: 63 | object_points = np.zeros((0, 3)) 64 | if f"hand_points_{self.step}" in self.traj_data: 65 | hand_points = self.traj_data[f"hand_points_{self.step}"] 66 | else: 67 | hand_points = np.zeros((0, 3)) 68 | object_pcd = get_o3d_pcd(object_points, color=(0., 0., 1.)) 69 | hand_pcd = get_o3d_pcd(hand_points, (0., 1., 0.)) 70 | info["input_pcd"] = object_pcd+hand_pcd 71 | self.step += 1 72 | return action, action_type, reached, info 73 | 74 | """ 75 | CUDA_VISIBLE_DEVICES=0 python -m evaluate use_ray=False env.panda.IK_solver=PyKDL setup=s0 split=train policy=offline offline.demo_dir=/data2/haoran/HRI/expert_demo/OMG/s0/know_dest_smooth_0.08 offline.demo_source=handoversim env.visualize=True env.verbose=True 76 | 77 | CUDA_VISIBLE_DEVICES=0,1,2,3 RAY_DEDUP_LOGS=0 python -m evaluate env.panda.IK_solver=PyKDL setup=s0 split=train policy=offline offline.demo_dir=/data2/haoran/HRI/expert_demo/OMG/s0/know_dest_smooth_0.08 offline.demo_source=handoversim 78 | """ -------------------------------------------------------------------------------- /policies/offline_policy_config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from omegaconf import MISSING 3 | from typing import Optional 4 | 5 | from .base_policy_config import BasePolicyConfig 6 | 7 | @dataclass 8 | class OfflinePolicyConfig(BasePolicyConfig): 9 | name: str = "offline" 10 | demo_dir: Optional[str] = None 11 | demo_structure: str = "hierarchical" # "flat" 12 | demo_source: str = "genh2r" # "handoversim" 13 | check_input_pcd: bool = False 14 | -------------------------------------------------------------------------------- /policies/pointnet2_policy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.typing import NDArray 3 | import torch 4 | import code 5 | 6 | from env.handover_env import Observation 7 | from env.utils.transform import se3_transform_pc, se3_inverse, pos_ros_quat_to_mat 8 | from models.policy_network import PolicyNetwork 9 | from .base_policy import BasePolicy 10 | from .pointnet2_policy_config import PointNet2PolicyConfig 11 | from .utils.point_cloud import regularize_pc_point_count, PointCloudProcessor 12 | 13 | class PointNet2Policy(BasePolicy): 14 | def __init__(self, cfg: PointNet2PolicyConfig, device=torch.device("cuda")): 15 | super().__init__(cfg) 16 | self.cfg: PointNet2PolicyConfig 17 | torch.backends.cudnn.deterministic = True 18 | self.device = device 19 | self.policy_network = PolicyNetwork(cfg.model).to(device) 20 | self.policy_network.eval() 21 | self.point_cloud_processor = PointCloudProcessor(cfg.processor) 22 | 23 | def reset(self): 24 | self.base_reset() 25 | self.point_cloud_processor.reset() 26 | 27 | def termination_heuristics(self, world_to_ee: NDArray) -> bool: 28 | ee_to_world = se3_inverse(world_to_ee) 29 | object_points = se3_transform_pc(ee_to_world, self.point_cloud_processor.world_to_object_points) 30 | if object_points.shape[0] == 0: 31 | return False 32 | cage_points_mask = ( 33 | (object_points[:, 2] > +0.06) 34 | & (object_points[:, 2] < +0.11) 35 | & (object_points[:, 1] > -0.05) 36 | & (object_points[:, 1] < +0.05) 37 | & (object_points[:, 0] > -0.02) 38 | & (object_points[:, 0] < +0.02) 39 | ) 40 | cage_points_mask_reg = regularize_pc_point_count(cage_points_mask[:, None], 1024, self.np_random) 41 | return np.sum(cage_points_mask_reg) > 50 42 | 43 | @torch.no_grad() 44 | def plan(self, observation: Observation): 45 | info = {} 46 | object_points, hand_points = observation.get_visual_observation()[3] 47 | world_to_ee = observation.world_to_ee 48 | input_points = self.point_cloud_processor.process(object_points, hand_points, world_to_ee) 49 | if input_points is None: 50 | action = np.zeros(6) 51 | else: 52 | input_points_tensor = torch.tensor(input_points, dtype=torch.float32, device=self.device).unsqueeze(0) 53 | action, goal_pred, obj_pose_pred = self.policy_network(input_points_tensor) 54 | action, goal_pred, obj_pose_pred = action[0].cpu().numpy(), goal_pred[0].cpu().numpy(), obj_pose_pred[0].cpu().numpy() 55 | if self.policy_network.cfg.goal_pred: 56 | ee_to_target_grasp = pos_ros_quat_to_mat(goal_pred[:3], goal_pred[3:]) 57 | world_to_target_grasp = world_to_ee@ee_to_target_grasp 58 | info["world_to_target_grasp"] = world_to_target_grasp 59 | action = np.append(action, 0.04) 60 | reached = self.termination_heuristics(world_to_ee) 61 | return action, "ego_cartesian", reached, info 62 | 63 | """ 64 | CUDA_VISIBLE_DEVICES=1 python -m evaluate setup=s0 split=train use_ray=False policy=pointnet2 pointnet2.model.pretrained_dir=/share1/haoran/HRI/generalizable_handover/output/t450_bc_omg_replan5_landmark_smooth0.08_use_hand_flow0_pred0_wd0.0001_pred0.5_300w_13s_no_accum_3 pointnet2.model.pretrained_source=handoversim env.visualize=True 65 | scene_id 5, step 62, status 1, reward 1.0, reached frame 5720, done frame 8380 66 | 67 | CUDA_VISIBLE_DEVICES=0,1,2,3 RAY_DEDUP_LOGS=0 python -m evaluate setup=s0 split=test num_runners=32 policy=pointnet2 pointnet2.model.pretrained_dir=/share1/haoran/HRI/generalizable_handover/output/t450_bc_omg_replan5_landmark_smooth0.08_use_hand_flow0_pred0_wd0.0001_pred0.5_300w_13s_no_accum_3 pointnet2.model.pretrained_source=handoversim 68 | success rate: 123/144=0.8541666666666666 69 | contact rate: 5/144=0.034722222222222224 70 | drop rate: 16/144=0.1111111111111111 71 | timeout rate: 0/144=0.0 72 | average done frame : 6378.659722222223 73 | average success done frame: 6421.861788617886 74 | average success num steps : 47.0650406504065 75 | average success : 0.4322831196581196 76 | evaluting uses 112.1101815700531 seconds 77 | 78 | CUDA_VISIBLE_DEVICES=0,1,2,3 RAY_DEDUP_LOGS=0 python -m evaluate setup=s0 split=train num_runners=32 policy=pointnet2 pointnet2.model.pretrained_dir=/share1/haoran/HRI/generalizable_handover/output/t450_bc_omg_replan5_landmark_smooth0.08_use_hand_flow0_pred0_wd0.0001_pred0.5_300w_13s_no_accum_3 pointnet2.model.pretrained_source=handoversim 79 | success rate: 613/720=0.8513888888888889 80 | contact rate: 20/720=0.027777777777777776 81 | drop rate: 68/720=0.09444444444444444 82 | timeout rate: 19/720=0.02638888888888889 83 | average done frame : 6652.265277777778 84 | average success done frame: 6543.5057096247965 85 | average success num steps : 47.99347471451876 86 | average success : 0.42291068376068375 87 | evaluting uses 370.2707829475403 seconds 88 | """ -------------------------------------------------------------------------------- /policies/pointnet2_policy_config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | 3 | from .base_policy_config import BasePolicyConfig 4 | from .utils.point_cloud import PointCloudProcessorConfig 5 | from models.policy_network import PolicyNetworkConfig 6 | 7 | @dataclass 8 | class PointNet2PolicyConfig(BasePolicyConfig): 9 | name: str = "pointnet2" 10 | 11 | processor: PointCloudProcessorConfig = field(default_factory=PointCloudProcessorConfig) 12 | model: PolicyNetworkConfig = field(default_factory=lambda: PolicyNetworkConfig( 13 | in_channel="${..processor.in_channel}", 14 | )) 15 | -------------------------------------------------------------------------------- /policies/utils/point_cloud.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.typing import NDArray 3 | from typing import Optional 4 | from dataclasses import dataclass 5 | from omegaconf import OmegaConf 6 | 7 | from env.utils.transform import se3_inverse, se3_transform_pc 8 | import open3d as o3d 9 | 10 | def regularize_pc_point_count(pc: NDArray, num_points: int, np_random: np.random.RandomState) -> NDArray: 11 | """ 12 | pc: (N, D) 13 | If point cloud pc has less points than num_points, it oversamples. 14 | Otherwise, it downsample the input pc to have num_points points. 15 | """ 16 | if pc.shape[0] > num_points: 17 | selected_indices = np_random.choice(range(pc.shape[0]), size=num_points, replace=False) 18 | regularized_pc = pc[selected_indices] 19 | elif pc.shape[0] == num_points: 20 | regularized_pc = pc 21 | else: 22 | required = num_points-pc.shape[0] 23 | selected_indices = np_random.choice(range(pc.shape[0]), size=required) 24 | regularized_pc = np.concatenate((pc, pc[selected_indices]), axis=0) 25 | return regularized_pc 26 | 27 | OmegaConf.register_new_resolver("eval", eval) 28 | 29 | @dataclass 30 | class PointCloudProcessorConfig: 31 | num_points: int = 1024 32 | use_hand: bool = True 33 | flow_frame_num: int = 0 34 | in_channel: int = "${eval:'3+2*int(${.use_hand})+3*${.flow_frame_num}'}" 35 | 36 | class PointCloudProcessor: 37 | def __init__(self, cfg: PointCloudProcessorConfig): 38 | self.cfg = cfg 39 | self.np_random = np.random.RandomState(0) 40 | self.world_to_object_points = np.zeros((0, 3)) 41 | self.world_to_hand_points = np.zeros((0, 3)) 42 | self.world_to_previous_object_points = np.zeros((0, 3)) 43 | self.world_to_previous_hand_points = np.zeros((0, 3)) 44 | self.pre_hand_flow_matrix_list = [np.eye(4) for i in range(self.cfg.flow_frame_num)] 45 | self.pre_object_flow_matrix_list = [np.eye(4) for i in range(self.cfg.flow_frame_num)] 46 | 47 | def _get_flow_matrix(self, pre_point, cur_point): 48 | " pre_point: (..., 3) cur_point: (..., 3) " 49 | if pre_point.shape[0] == 0 or cur_point.shape[0] == 0: 50 | return np.eye(4) 51 | pre_pcd = o3d.geometry.PointCloud() 52 | pre_pcd.points = o3d.utility.Vector3dVector(pre_point) 53 | cur_pcd = o3d.geometry.PointCloud() 54 | cur_pcd.points = o3d.utility.Vector3dVector(cur_point) 55 | 56 | threshold = 10 # Correspondence distance threshold 57 | reg_p2p = o3d.pipelines.registration.registration_icp( 58 | pre_pcd, cur_pcd, threshold, np.identity(4), o3d.pipelines.registration.TransformationEstimationPointToPoint()) 59 | 60 | return reg_p2p.transformation 61 | 62 | def _compute_pre_points(self, matrix, points): 63 | return se3_transform_pc(se3_inverse(matrix), points) 64 | 65 | def reset(self): 66 | self.np_random.seed(0) 67 | self.world_to_object_points = np.zeros((0, 3)) 68 | self.world_to_hand_points = np.zeros((0, 3)) 69 | self.world_to_previous_object_points = np.zeros((0, 3)) 70 | self.world_to_previous_hand_points = np.zeros((0, 3)) 71 | self.pre_hand_flow_matrix_list = [np.eye(4) for i in range(self.cfg.flow_frame_num)] 72 | self.pre_object_flow_matrix_list = [np.eye(4) for i in range(self.cfg.flow_frame_num)] 73 | 74 | def process(self, object_points, hand_points, world_to_ee) -> Optional[NDArray[np.float32]]: 75 | self.world_to_previous_object_points = self.world_to_object_points 76 | self.world_to_previous_hand_points = self.world_to_hand_points 77 | 78 | # update world to points 79 | if object_points.shape[0] > 0 or hand_points.shape[0] > 0: 80 | world_to_new_object_points = se3_transform_pc(world_to_ee, object_points) 81 | world_to_new_hand_points = se3_transform_pc(world_to_ee, hand_points) 82 | self.world_to_object_points = world_to_new_object_points 83 | self.world_to_hand_points = world_to_new_hand_points 84 | 85 | # convert the stored world_to_points to egocentric 86 | ee_to_world = se3_inverse(world_to_ee) 87 | object_points = se3_transform_pc(ee_to_world, self.world_to_object_points) 88 | hand_points = se3_transform_pc(ee_to_world, self.world_to_hand_points) 89 | 90 | if object_points.shape[0]+hand_points.shape[0] == 0: 91 | return None 92 | 93 | if self.cfg.flow_frame_num > 0: 94 | object_flow_matrix = self._get_flow_matrix(self.world_to_previous_object_points, self.world_to_object_points) 95 | hand_flow_matrix = self._get_flow_matrix(self.world_to_previous_hand_points, self.world_to_hand_points) 96 | self.pre_object_flow_matrix_list.append(np.eye(4)) 97 | self.pre_hand_flow_matrix_list.append(np.eye(4)) 98 | 99 | for i in range(-self.cfg.flow_frame_num, 0): 100 | self.pre_object_flow_matrix_list[i] = object_flow_matrix @ self.pre_object_flow_matrix_list[i] 101 | self.pre_hand_flow_matrix_list[i] = hand_flow_matrix @ self.pre_hand_flow_matrix_list[i] 102 | 103 | input_points = np.zeros((object_points.shape[0]+hand_points.shape[0], 5), dtype=np.float32) 104 | input_points[:object_points.shape[0], :3] = object_points 105 | input_points[:object_points.shape[0], 3] = 1 106 | input_points[object_points.shape[0]:, :3] = hand_points 107 | input_points[object_points.shape[0]:, 4] = 1 108 | input_points = regularize_pc_point_count(input_points, self.cfg.num_points, self.np_random) 109 | 110 | 111 | if self.cfg.flow_frame_num > 0: 112 | point_flow_feature = np.zeros((input_points.shape[0], 3 * self.cfg.flow_frame_num)) 113 | object_mask = input_points[:,3] == True 114 | hand_mask = input_points[:,4] == True 115 | world_to_new_object_points = se3_transform_pc(world_to_ee, input_points[object_mask, :3]) 116 | world_to_new_hand_points = se3_transform_pc(world_to_ee, input_points[hand_mask, :3]) 117 | 118 | for i in range(self.cfg.flow_frame_num): 119 | point_flow_feature[object_mask, i*3 : (i+1)*3] = self._compute_pre_points(self.pre_object_flow_matrix_list[-(i+1)], world_to_new_object_points) 120 | point_flow_feature[hand_mask, i*3 : (i+1)*3] = self._compute_pre_points(self.pre_hand_flow_matrix_list[-(i+1)], world_to_new_hand_points) 121 | point_flow_feature[:, i*3 : (i+1)*3] = se3_transform_pc(ee_to_world, point_flow_feature[:, i*3 : (i+1)*3]) 122 | 123 | input_points = np.concatenate([input_points[:,:3], point_flow_feature, input_points[:,3:]], axis = 1) 124 | 125 | return input_points 126 | -------------------------------------------------------------------------------- /policy_runner.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ray 3 | import numpy as np 4 | from numpy.typing import NDArray 5 | from typing import TypedDict, List, Optional 6 | import open3d as o3d 7 | import code 8 | 9 | from env.utils.scene import scene_id_to_dir 10 | from env.status_checker import EpisodeStatus 11 | from env.handover_env import GenH2RSim, Observation 12 | from evaluate_config import EvaluateConfig 13 | 14 | result_dtype = np.dtype({ 15 | "names": ["scene_id", "status", "reward", "reached_frame", "done_frame", "num_steps"], 16 | "formats": [np.int32, np.int32, np.float32, np.int32, np.int32, np.int32], 17 | "offsets": [0, 4, 8, 12, 16, 20], 18 | "itemsize": 24, 19 | }) 20 | 21 | def scene_id_to_demo_path(scene_id: int, demo_structure: str) -> str: 22 | scene_dir = scene_id_to_dir(scene_id, demo_structure) 23 | scene_path = os.path.join(scene_dir, f"{scene_id:08d}.npz") 24 | return scene_path 25 | 26 | @ray.remote(num_cpus=1) 27 | class Distributer: 28 | def __init__(self, scene_ids: List[int]): 29 | self.idx = 0 30 | self.scene_ids = scene_ids 31 | 32 | def get_next_task(self): 33 | if self.idx == len(self.scene_ids): 34 | return None 35 | print(f"idx={self.idx}, scene_id={self.scene_ids[self.idx]}") 36 | scene_id = self.scene_ids[self.idx] 37 | self.idx += 1 38 | return scene_id 39 | 40 | class DemoData(TypedDict): 41 | frame: NDArray[np.int32] 42 | world_to_ee: NDArray[np.float32] 43 | world_to_object: NDArray[np.float32] 44 | world_to_hand: NDArray[np.float32] 45 | joint_positions: NDArray[np.float32] 46 | action: NDArray[np.float32] 47 | world_to_target_grasp: NDArray[np.float32] 48 | num_steps: int 49 | object_points_0: NDArray[np.float32] # and other steps 50 | hand_points_0: NDArray[np.float32] 51 | status: int 52 | reward: float 53 | reached_frame: int 54 | done_frame: int 55 | 56 | class PolicyRunner: 57 | def __init__(self, cfg: EvaluateConfig, distributer: Distributer=None): 58 | self.cfg = cfg 59 | self.env = GenH2RSim(cfg.env) 60 | if cfg.policy == "offline": 61 | from policies.offline_policy import OfflinePolicy 62 | self.policy = OfflinePolicy(cfg.offline) 63 | elif cfg.policy == "pointnet2": 64 | from policies.pointnet2_policy import PointNet2Policy 65 | self.policy = PointNet2Policy(cfg.pointnet2) 66 | else: 67 | raise NotImplementedError 68 | self.distributer = distributer 69 | 70 | self.demo_array_keys = ["frame", "world_to_ee", "world_to_object", "world_to_hand", "joint_positions", "action", "world_to_target_grasp"] 71 | self.np_random = np.random.RandomState() 72 | 73 | def get_dart_action(self, step: int) -> Optional[NDArray[np.float64]]: 74 | if self.cfg.policy == "offline": 75 | if f"dart_action_{step}" in self.policy.traj_data: 76 | dart_action: NDArray[np.float64] = self.policy.traj_data[f"dart_action_{step}"] 77 | else: 78 | dart_action = None 79 | else: 80 | if self.cfg.dart and not self.policy.reached and step >= self.cfg.dart_min_step and step <= self.cfg.dart_max_step and self.np_random.uniform() <= self.cfg.dart_ratio: 81 | trans = self.np_random.uniform(-0.04, 0.04, size=(3,)) 82 | rot = self.np_random.uniform(-0.2, 0.2, size=(3,)) 83 | dart_action = np.concatenate([trans, rot, np.array([0.04])]) 84 | else: 85 | dart_action = None 86 | return dart_action 87 | 88 | def init_demo_data(self, scene_id: int) -> str: 89 | self.demo_data = {key: [] for key in self.demo_array_keys} 90 | self.demo_data["num_steps"] = 0 91 | demo_path = os.path.join(self.cfg.demo_dir, scene_id_to_demo_path(scene_id, self.cfg.demo_structure)) 92 | os.makedirs(os.path.dirname(demo_path), exist_ok=True) 93 | return demo_path 94 | 95 | def clip_object_hand_points(self, object_points, hand_points): 96 | num_points = object_points.shape[0]+hand_points.shape[0] 97 | num_object_points = object_points.shape[0] 98 | if num_points <= 1024: 99 | return object_points, hand_points 100 | selected_idxs = np.random.choice(range(num_points), size=1024, replace=False) 101 | object_points = object_points[selected_idxs[selected_idxs=num_object_points]-num_object_points] 103 | return object_points, hand_points 104 | 105 | def add_demo_data(self, stage: str, observation: Observation, action: NDArray, world_to_target_grasp: NDArray, dart_action: NDArray=None, demo_path: str=None): # depend on clip_object_hand_points 106 | step = self.demo_data["num_steps"] 107 | if dart_action is not None: 108 | self.demo_data[f"dart_action_{step}"] = dart_action 109 | if stage == "reach": 110 | self.demo_data["num_steps"] += 1 111 | self.demo_data[f"object_points_{step}"], self.demo_data[f"hand_points_{step}"] = self.clip_object_hand_points(*observation.get_visual_observation()[3]) 112 | self.demo_data["frame"].append(observation.frame) 113 | self.demo_data["world_to_ee"].append(self.env.panda.get_world_to_ee()) 114 | self.demo_data["world_to_object"].append(self.env.objects.target_object.get_world_to_obj()) 115 | self.demo_data["world_to_hand"].append(self.env.hand.get_joint_positions()) 116 | self.demo_data["joint_positions"].append(self.env.panda.get_joint_positions()) 117 | self.demo_data["action"].append(action) 118 | self.demo_data["world_to_target_grasp"].append(world_to_target_grasp) 119 | if demo_path is not None and self.cfg.save_state: 120 | scene_id = int(demo_path.split("/")[-1][:-4]) 121 | self.env.bullet_client.saveBullet(os.path.join(os.path.dirname(demo_path), f"{scene_id}_step_{step}.bullet")) 122 | 123 | def save_demo_data(self, demo_path: str, status: int, reward: float, reached_frame: int, done_frame: int): 124 | for key in self.demo_array_keys: 125 | if len(self.demo_data[key]) == 0: 126 | print(f"no data added for {demo_path}") 127 | self.demo_data[key] = None 128 | else: 129 | self.demo_data[key] = np.stack(self.demo_data[key], axis=0) 130 | self.demo_data.update({"status": status, "reward": reward, "reached_frame": reached_frame, "done_frame": done_frame}) 131 | if status != EpisodeStatus.SUCCESS: 132 | for step in range(self.demo_data["num_steps"]): 133 | del self.demo_data[f"object_points_{step}"], self.demo_data[f"hand_points_{step}"] 134 | np.savez(demo_path, **self.demo_data) 135 | self.demo_data = {} # free the space 136 | 137 | def run(self, scene_id): # depend on init_demo_data, add_demo_data, save_demo_data 138 | self.env.reset(scene_id) 139 | self.np_random.seed(self.cfg.seed+scene_id) 140 | if self.cfg.policy in ["offline", "chomp"]: 141 | self.policy.reset(scene_id) 142 | else: 143 | self.policy.reset() 144 | step = 0 145 | if self.cfg.demo_dir is not None: 146 | demo_path = self.init_demo_data(scene_id) 147 | if self.cfg.record_ego_video: 148 | self.env.panda.camera.setup_video_writer(os.path.join(os.path.dirname(demo_path), f"{scene_id:08d}_ego_rgb.mp4")) 149 | if self.cfg.record_third_person_video: 150 | self.env.third_person_camera.setup_video_writer(os.path.join(os.path.dirname(demo_path), f"{scene_id:08d}_third_person_rgb.mp4")) 151 | while True: 152 | dart_action = self.get_dart_action(step) 153 | if dart_action is not None: 154 | reward, done, info = self.env.ego_cartesian_step(dart_action, self.policy.action_repeat_steps, increase_frame=False) 155 | if self.cfg.policy in ["OMG_original", "chomp"]: 156 | self.policy.reset() 157 | self.env.clear_grasps() 158 | observation = self.env.get_observation() 159 | action, action_type, repeat, stage, info = self.policy.run_policy(observation) 160 | if "world_to_target_grasp" in info: 161 | world_to_target_grasp = info["world_to_target_grasp"] 162 | if self.cfg.show_target_grasp: 163 | self.env.load_grasp(world_to_target_grasp, [0., 1., 0., 1.]) 164 | else: 165 | world_to_target_grasp = np.nan*np.ones((4, 4), dtype=np.float32) 166 | if "input_pcd" in info: # offline.check_input_point_cloud 167 | assert self.cfg.demo_dir is not None 168 | if np.array(info["input_pcd"].points).shape[0] > 0: 169 | o3d.io.write_point_cloud(os.path.join(os.path.dirname(demo_path), f"{scene_id:08d}_{step}_input_pcd.pcd"), info["input_pcd"]) 170 | if self.cfg.demo_dir is not None: 171 | self.add_demo_data(stage, observation, action, world_to_target_grasp, dart_action, demo_path) 172 | if self.cfg.policy == "offline": 173 | # print(f"actual joint positions {self.env.panda.get_joint_positions()}") 174 | # print(f"loaded joint positions {self.policy.traj_data['joint_positions'][step]}") 175 | # print(f"actual world to obj {self.env.objects.target_object.get_world_to_obj()}") 176 | # print(f"loaded world to obj {self.policy.traj_data['world_to_object'][step]}") 177 | saved_state_path = os.path.join(os.path.dirname(self.policy.data_path), f"{scene_id}_step_{step}.bullet") 178 | if os.path.exists(saved_state_path): 179 | self.env.bullet_client.saveBullet("tmp.bullet") 180 | print(f"stage {stage} step {step} diff", os.system(f"diff {saved_state_path} tmp.bullet")) 181 | step += 1 182 | if self.cfg.verbose: 183 | print(f"step {step}, frame {observation.frame}, action {action}, repeat {repeat}") 184 | if action_type == "joint": 185 | reward, done, info = self.env.joint_step(action, repeat) 186 | elif action_type == "ego_cartesian": 187 | reward, done, info = self.env.ego_cartesian_step(action, repeat) 188 | elif action_type == "world_pos": 189 | reward, done, info = self.env.world_pos_step(action, repeat) 190 | else: 191 | raise NotImplementedError 192 | if done: 193 | break 194 | reached_frame, done_frame, status = self.policy.reached_frame, self.env.frame, info["status"] 195 | print(f"scene_id {scene_id}, step {step}, status {status}, reward {reward}, reached frame {reached_frame}, done frame {done_frame}") 196 | if self.cfg.policy == "offline": 197 | if status != self.policy.traj_data["status"]: 198 | print(f"scene_id {scene_id}, status {status}, loaded status {self.policy.traj_data['status']}") 199 | if self.cfg.demo_dir is not None: 200 | self.save_demo_data(demo_path, status, reward, reached_frame, done_frame) 201 | if self.cfg.record_ego_video: 202 | self.env.panda.camera.close_video_writer() 203 | if self.cfg.record_third_person_video: 204 | self.env.third_person_camera.close_video_writer() 205 | return status, reward, reached_frame, done_frame, step 206 | 207 | def work(self): 208 | results = [] 209 | while True: 210 | scene_id = ray.get(self.distributer.get_next_task.remote()) 211 | if scene_id is None: break 212 | result: NDArray[result_dtype] = np.empty((1, ), dtype=result_dtype) 213 | result["scene_id"] = scene_id 214 | demo_data_existed = False 215 | if self.cfg.demo_dir is not None: 216 | demo_path = os.path.join(self.cfg.demo_dir, scene_id_to_demo_path(scene_id, self.cfg.demo_structure)) 217 | if os.path.exists(demo_path): 218 | demo_data_existed = True 219 | if demo_data_existed and not self.cfg.overwrite_demo: 220 | demo_data = np.load(demo_path) 221 | result["status"], result["reward"], result["reached_frame"], result["done_frame"], result["num_steps"] = demo_data["status"], demo_data["reward"], demo_data["reached_frame"], demo_data["done_frame"], demo_data["num_steps"] 222 | else: 223 | result["status"], result["reward"], result["reached_frame"], result["done_frame"], result["num_steps"] = self.run(scene_id) 224 | results.append(result) 225 | if len(results) > 0: 226 | results: NDArray[result_dtype] = np.stack(results) 227 | else: 228 | results: NDArray[result_dtype] = np.empty((0, 1), dtype=result_dtype) 229 | return results 230 | 231 | def get_policy_runner_remote(num_gpus): 232 | @ray.remote(num_cpus=1, num_gpus=num_gpus) 233 | class PolicyRunnerRemote(PolicyRunner): 234 | pass 235 | return PolicyRunnerRemote 236 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # GenH2R 2 | 3 | GenH2R is the official code for the following CVPR 2024 paper: 4 | 5 | **GenH2R: Learning Generalizable Human-to-Robot Handover via Scalable Simulation, Demonstration, and Imitation** 6 | 7 | Zifan Wang*, Junyu Chen*, Ziqing Chen, Pengwei Xie, Rui Chen, Li Yi 8 | 9 | IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2024 10 | 11 | [ [website](https://genh2r.github.io/) ] [ [arXiv](https://arxiv.org/abs/2401.00929) ] [ [video](https://www.youtube.com/watch?v=BbphK5QlS1Y) ] 12 | 13 | ![logo](assets/1_logo.gif) 14 | 15 | ## Introduction 16 | 17 | GenH2R is a framework for learning generalizable vision-based human-to-robot (H2R) handover skills. The goal is to equip robots with the ability to reliably receive objects with unseen geometry handed over by humans in various complex trajectories. 18 | 19 | We acquire such generalizability by learning H2R handover at scale with a comprehensive solution including procedural simulation assets creation, automated demonstration generation, and effective imitation learning. We leverage large-scale 3D model repositories, dexterous grasp generation methods, and curve-based 3D animation to create an H2R handover simulation environment named GenH2R-Sim, surpassing the number of scenes in existing simulators by three orders of magnitude. We further introduce a distillation-friendly demonstration generation method that automatically generates a million high-quality demonstrations suitable for learning. Finally, we present a 4D imitation learning method augmented by a future forecasting objective to distill demonstrations into a visuo-motor handover policy. 20 | 21 | ## Release News and Updates 22 | 23 | Building upon [handover-sim](https://github.com/NVlabs/handover-sim), [GA-DDPG](https://github.com/liruiw/GA-DDPG) and [OMG-Planner](https://github.com/liruiw/OMG-Planner), our original codebase is a bit bulky. For better readability and extensibility, we decide to refactor our codebase to provide a simplified version. 24 | 25 | - `2024.06.20` We have released the evaluation scripts and the pre-trained models. 26 | 27 | We are actively cleaning the code for simulation scene construction, demonstration generation and policy training, and will release as soon as possible. 28 | 29 | ## Usage 30 | 31 | ### Clone Repository 32 | ``` bash 33 | git clone --recursive git@github.com:chenjy2003/genh2r.git 34 | ``` 35 | 36 | ### Create Python Environment 37 | ``` bash 38 | conda create -n genh2r python=3.10 39 | conda activate genh2r 40 | pip install -r requirements.txt 41 | # install pytorch according to your cuda version (https://pytorch.org/get-started/previous-versions/) 42 | pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116 43 | ``` 44 | 45 | ### Install Third Party Packages 46 | #### PyKDL 47 | We highly recommand users to install this third-party package for robot kinematics. But inference and evaluation of pre-trained models can be done without this package, with `env.panda.IK_solver=pybullet` added to the following commands, and with slightly different results. 48 | ``` bash 49 | cd env/third_party/orocos_kinematics_dynamics 50 | sudo apt-get update 51 | sudo apt-get install libeigen3-dev libcppunit-dev 52 | cd orocos_kdl 53 | mkdir build 54 | cd build 55 | cmake .. -DENABLE_TESTS:BOOL=ON 56 | make 57 | sudo make install 58 | make check 59 | cd ../../python_orocos_kdl 60 | mkdir build 61 | cd build 62 | ROS_PYTHON_VERSION=3.10 cmake .. 63 | make 64 | sudo make install 65 | cp PyKDL.so $CONDA_PREFIX/lib/python3.10/site-packages/ 66 | ## test 67 | python3 ../tests/PyKDLtest.py 68 | ``` 69 | #### PointNet++ 70 | ``` bash 71 | cd third_party/Pointnet2_PyTorch 72 | pip install pointnet2_ops_lib/. 73 | cd ../.. 74 | ``` 75 | ### Prepare Data 76 | #### DexYCB 77 | Download [`dex-ycb-cache-20220323.tar.gz`](https://drive.google.com/uc?export=download&id=1Jqe2iqI7inoEdE3BL4vEs25eT5M7aUHd) (from [handover-sim](https://github.com/NVlabs/handover-sim)) to `env/data/tmp`. 78 | 79 | Download [`assets.tar.gz`](https://drive.google.com/file/d/1jLi23goHESWHMIud2wpNiQNdZ45dMQ49/view?usp=drive_link) (the object and hand models are from [handover-sim](https://github.com/NVlabs/handover-sim)) to `env/data`. 80 | 81 | Then run 82 | ``` bash 83 | cd env/data/tmp 84 | tar -xvf dex-ycb-cache-20220323.tar.gz 85 | cd .. 86 | tar -xvf assets.tar.gz 87 | cd ../.. 88 | python -m env.tools.process_dexycb 89 | ``` 90 | 91 | The processed 1000 scenes will be in `data/scene/00/00`, from `data/scene/00/00/00/00000000.npz` to `data/scene/00/00/09/00000999.npz`. 92 | 93 | ### Evaluate Pre-trained Models 94 | Our pre-trained models can be downloaded [here](https://drive.google.com/drive/folders/1kbQR3xXJJp4rUZ-pytH7vTQVBNaJgM4O?usp=drive_link). 95 | 96 | We use [ray](https://github.com/ray-project/ray) for parallel evaluation in order to support larger test set. One can feel free to adjust `CUDA_VISIBLE_DEVICES` (the GPUs to use) and `num_runners` (the total number of runners) according to the local machine, without changing the evaluation results. 97 | 98 | We observed that the evaluation results can be slightly different on different devices, which unfortunately originates in some third-party packages. Our evaluation is done on `NVIDIA GeForce RTX 3090`. 99 | #### on DexYCB Test Set (Simultaneous) 100 | ``` bash 101 | CUDA_VISIBLE_DEVICES=0 python -m evaluate \ 102 | setup=s0 split=test num_runners=16 \ 103 | policy=pointnet2 pointnet2.processor.flow_frame_num=3 pointnet2.model.obj_pose_pred_frame_num=3 \ 104 | pointnet2.model.pretrained_dir=${model_dir} \ 105 | pointnet2.model.pretrained_source=handoversim 106 | ``` 107 | Here `model_dir` should be the path of the folder containing model parameters. 108 | #### on DexYCB Test Set (Sequential) 109 | ``` bash 110 | CUDA_VISIBLE_DEVICES=0 python -m evaluate \ 111 | setup=s0 split=test num_runners=16 \ 112 | policy=pointnet2 pointnet2.processor.flow_frame_num=3 pointnet2.model.obj_pose_pred_frame_num=3 \ 113 | pointnet2.model.pretrained_dir=${model_dir} \ 114 | pointnet2.model.pretrained_source=handoversim \ 115 | pointnet2.wait_time=3 116 | ``` 117 | ### Visualization 118 | To visualize the handover process, one can have two options: 119 | #### Use `pybullet` GUI 120 | To use GUI, one can only start a single process, therefore parallel evaluation should be disabled by setting `use_ray=False`. Then GUI can be enabled by setting `env.visualize=True`. Note that one can have a finer control of which scenes to evaluate by setting `scene_ids` instead of `setup` and `split`. 121 | ``` bash 122 | CUDA_VISIBLE_DEVICES=0 python -m evaluate \ 123 | scene_ids=[214,219] use_ray=False \ 124 | env.visualize=True \ 125 | policy=pointnet2 pointnet2.processor.flow_frame_num=3 pointnet2.model.obj_pose_pred_frame_num=3 \ 126 | pointnet2.model.pretrained_dir=${model_dir} \ 127 | pointnet2.model.pretrained_source=handoversim 128 | ``` 129 | #### Record Egocentric Video from the Wrist-mounted Camera or Record Video from Third Person View 130 | To record videos, we need to set `demo_dir` (where to store the videos), `record_ego_video=True` and `record_third_person_video=True`. 131 | ``` bash 132 | CUDA_VISIBLE_DEVICES=0 python -m evaluate \ 133 | setup=s0 split=test num_runners=16 \ 134 | policy=pointnet2 pointnet2.processor.flow_frame_num=3 pointnet2.model.obj_pose_pred_frame_num=3 \ 135 | pointnet2.model.pretrained_dir=${model_dir} \ 136 | pointnet2.model.pretrained_source=handoversim \ 137 | demo_dir=data/tmp record_ego_video=True record_third_person_video=True 138 | ``` 139 | There is an argument `demo_structure` controlling how the demonstration data are arranged. If set to `hierarchical` by default, then the data will be stored in a hierarchical way, like `data/tmp/00/00/02/00000209_ego_rgb.mp4`. If set to `flat`, then all the data will be stored in the same folder `data/tmp`. 140 | 141 | One can also adjust the position and orientation of the third person camera by setting `env.third_person_camera.pos` (default `[1.5,-0.1,1.8]`), `env.third_person_camera.target` (default `[0.6,-0.1,1.3]`), and `env.third_person_camera.up_vector` (default `[0.,0.,1.]`). 142 | 143 | ## Acknowledgements 144 | This repo is built based on [handover-sim](https://github.com/NVlabs/handover-sim), [GA-DDPG](https://github.com/liruiw/GA-DDPG), [OMG-Planner](https://github.com/liruiw/OMG-Planner), [acornym](https://github.com/NVlabs/acronym), [orocos_kinematics_dynamics](https://github.com/orocos/orocos_kinematics_dynamics), and [Pointnet2_PyTorch](https://github.com/erikwijmans/Pointnet2_PyTorch). We sincerely appreciate their contributions to open source. 145 | 146 | ## Citation 147 | If GenH2R is useful or relevant to your research, please kindly recognize our contributions by citing our paper: 148 | ``` 149 | @article{wang2024genh2r, 150 | title={GenH2R: Learning Generalizable Human-to-Robot Handover via Scalable Simulation, Demonstration, and Imitation}, 151 | author={Wang, Zifan and Chen, Junyu and Chen, Ziqing and Xie, Pengwei and Chen, Rui and Yi, Li}, 152 | journal={arXiv preprint arXiv:2401.00929}, 153 | year={2024} 154 | } 155 | ``` 156 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | addict==2.4.0 2 | aiosignal==1.3.1 3 | antlr4-python3-runtime==4.9.3 4 | appdirs==1.4.4 5 | apptools==5.2.1 6 | asttokens==2.4.1 7 | attrs==23.2.0 8 | bezier==2023.7.28 9 | blinker==1.7.0 10 | certifi==2023.11.17 11 | charset-normalizer==3.3.2 12 | click==8.1.7 13 | cloudpickle==3.0.0 14 | comm==0.2.1 15 | ConfigArgParse==1.7 16 | configobj==5.0.8 17 | contourpy==1.2.0 18 | cycler==0.12.1 19 | dash==2.16.0 20 | dash-core-components==2.0.0 21 | dash-html-components==2.0.0 22 | dash-table==5.0.0 23 | decorator==5.1.1 24 | docker-pycreds==0.4.0 25 | easydict==1.11 26 | envisage==7.0.3 27 | exceptiongroup==1.2.0 28 | executing==2.0.1 29 | fastjsonschema==2.19.1 30 | filelock==3.13.1 31 | Flask==3.0.2 32 | fonttools==4.49.0 33 | frozenlist==1.4.1 34 | gitdb==4.0.11 35 | GitPython==3.1.42 36 | gym==0.26.2 37 | gym-notices==0.0.8 38 | h5py==3.10.0 39 | idna==3.6 40 | imageio==2.33.1 41 | imageio-ffmpeg==0.4.9 42 | importlib-metadata==7.0.1 43 | ipdb==0.13.13 44 | ipython==8.21.0 45 | ipywidgets==8.1.2 46 | itsdangerous==2.1.2 47 | jedi==0.19.1 48 | Jinja2==3.1.3 49 | joblib==1.3.2 50 | jsonschema==4.21.1 51 | jsonschema-specifications==2023.12.1 52 | jupyter_core==5.7.1 53 | jupyterlab_widgets==3.0.10 54 | kiwisolver==1.4.5 55 | lxml==5.1.0 56 | MarkupSafe==2.1.5 57 | matplotlib==3.8.3 58 | matplotlib-inline==0.1.6 59 | mayavi==4.8.1 60 | msgpack==1.0.7 61 | nbformat==5.9.2 62 | nest-asyncio==1.6.0 63 | numpy==1.26.3 64 | omegaconf==2.3.0 65 | open3d==0.18.0 66 | opencv-python==4.9.0.80 67 | packaging==23.2 68 | pandas==2.2.1 69 | parso==0.8.3 70 | pexpect==4.9.0 71 | pillow==10.2.0 72 | platformdirs==4.2.0 73 | plotly==5.19.0 74 | prompt-toolkit==3.0.43 75 | protobuf==4.25.2 76 | psutil==5.9.8 77 | ptyprocess==0.7.0 78 | pure-eval==0.2.2 79 | pyassimp==4.1.3 80 | pybullet==3.2.6 81 | pyface==8.0.0 82 | Pygments==2.17.2 83 | pyinstrument==4.6.2 84 | pymeshlab==2023.12.post1 85 | PyOpenGL==3.1.7 86 | pyparsing==3.1.1 87 | PyQt5==5.15.10 88 | PyQt5-Qt5==5.15.2 89 | PyQt5-sip==12.13.0 90 | pyquaternion==0.9.9 91 | python-dateutil==2.8.2 92 | pytz==2024.1 93 | PyYAML==6.0.1 94 | ray==2.9.1 95 | referencing==0.33.0 96 | requests==2.31.0 97 | retrying==1.3.4 98 | rpds-py==0.17.1 99 | scikit-learn==1.4.1.post1 100 | scipy==1.12.0 101 | sentry-sdk==1.43.0 102 | setproctitle==1.3.3 103 | sip==6.8.3 104 | six==1.16.0 105 | smmap==5.0.1 106 | stack-data==0.6.3 107 | tenacity==8.2.3 108 | threadpoolctl==3.3.0 109 | tomli==2.0.1 110 | tqdm==4.66.1 111 | traitlets==5.14.1 112 | traits==6.4.3 113 | traitsui==8.0.0 114 | transforms3d==0.4.1 115 | typing_extensions==4.9.0 116 | tzdata==2024.1 117 | urllib3==2.2.0 118 | vtk==9.3.0 119 | wandb==0.16.4 120 | wcwidth==0.2.13 121 | Werkzeug==3.0.1 122 | widgetsnbextension==4.0.10 123 | yacs==0.1.8 124 | zipp==3.17.0 125 | --------------------------------------------------------------------------------