├── .gitignore ├── README.md ├── images ├── object.gif ├── relay.gif ├── relaylonger3.gif └── self.gif ├── pointMass ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ └── __init__.cpython-37.pyc └── envs │ ├── __init__.py │ └── environments.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | /pointMass.egg-info/* 2 | /pointMass/envs/superbasic.py 3 | /pointMass/envs/paper_folding.py 4 | /pointMass/envs/basic_tests.py 5 | /pointMass/envs/externalTorqueSphere.py 6 | /pointMass/envs/__pycache__/* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pointMass 2 | pointMass pybullet RL environment for simple experiments and algorithm verification. 3 | 4 | It is a openAI gym goal based environment, and has modifiable difficulty. The easiest environment is a 2D self navigation task. Further levels of difficulty involve a modifiable number of objects which must be pushed to goal locations. 5 | 6 | The action space in all cases is 2D and is the positional changes to the mass in the x and y dimensions. 7 | 8 | In pointMassEnvObject-v0, the gold point mass must push the green block to it's goal location, the green sphere. The 'observation' is the xy positions and velocities of the mass and block respectively and the achieved and desired goal is in terms of the xy position of the block. 9 | 10 |

11 | Object Manipulation? 12 |

13 | 14 | In pointMassEnv-v0, the gold point mass must move itself to it's goal location, the green sphere. The 'observation' is the xy positions and velocities of the mass and the achieved and desired goal is in terms of the xy position of the mass. 15 | 16 |

17 | Object Manipulation? 18 |

19 | 20 | 21 | The env also supports my experiments into hierachial learning. Here are two examples of hierarchial sub-goal setting based on https://arxiv.org/abs/1910.11956 and https://arxiv.org/abs/1712.00948. 22 | 23 | 24 | 25 |

26 | Object Manipulation? 27 |

28 | 29 | 30 |

31 | Object Manipulation? 32 |

33 | -------------------------------------------------------------------------------- /images/object.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sholtodouglas/pointMass/5e063ed7ecaab84c78db7648db3a304322f48547/images/object.gif -------------------------------------------------------------------------------- /images/relay.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sholtodouglas/pointMass/5e063ed7ecaab84c78db7648db3a304322f48547/images/relay.gif -------------------------------------------------------------------------------- /images/relaylonger3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sholtodouglas/pointMass/5e063ed7ecaab84c78db7648db3a304322f48547/images/relaylonger3.gif -------------------------------------------------------------------------------- /images/self.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sholtodouglas/pointMass/5e063ed7ecaab84c78db7648db3a304322f48547/images/self.gif -------------------------------------------------------------------------------- /pointMass/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='pointMass-v0', 5 | entry_point='pointMass.envs:pointMassEnv', 6 | ) 7 | 8 | register( 9 | id='pointMassDense-v0', 10 | entry_point='pointMass.envs:pointMassEnvDense', 11 | ) 12 | 13 | register( 14 | id='pointMassObject-v0', 15 | entry_point='pointMass.envs:pointMassEnvObject', 16 | ) 17 | 18 | register( 19 | id='pointMassObjectDuo-v0', 20 | entry_point='pointMass.envs:pointMassEnvObjectDuo', 21 | ) 22 | 23 | register( 24 | id='pointMassObjectDense-v0', 25 | entry_point='pointMass.envs:pointMassEnvObjectDense', 26 | ) -------------------------------------------------------------------------------- /pointMass/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sholtodouglas/pointMass/5e063ed7ecaab84c78db7648db3a304322f48547/pointMass/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /pointMass/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sholtodouglas/pointMass/5e063ed7ecaab84c78db7648db3a304322f48547/pointMass/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /pointMass/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from pointMass.envs.environments import pointMassEnv 2 | from pointMass.envs.environments import pointMassEnvDense 3 | from pointMass.envs.environments import pointMassEnvObject 4 | from pointMass.envs.environments import pointMassEnvObjectDense 5 | from pointMass.envs.environments import pointMassEnvObjectDuo 6 | from pointMass.envs.environments import pointMassEnvMaze 7 | -------------------------------------------------------------------------------- /pointMass/envs/environments.py: -------------------------------------------------------------------------------- 1 | 2 | import gym, gym.utils, gym.utils.seeding 3 | import pybullet as p 4 | import numpy as np 5 | import pybullet_data 6 | import os 7 | import time 8 | from pybullet_utils import bullet_client 9 | urdfRoot=pybullet_data.getDataPath() 10 | import gym.spaces as spaces 11 | import math 12 | 13 | GUI = False 14 | viewMatrix = p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition = [0,0,0], distance = 6, yaw = 0, pitch = -90, roll = 0, upAxisIndex = 2) 15 | 16 | 17 | projectionMatrix = p.computeProjectionMatrixFOV(fov = 50,aspect = 1,nearVal = 0.01,farVal = 10) 18 | 19 | 20 | class pointMassEnv(gym.GoalEnv): 21 | 22 | 23 | metadata = { 24 | 'render.modes': ['human', 'rgb_array'], 25 | 'video.frames_per_second': 60 26 | } 27 | 28 | def __init__(self, render = False, num_objects = 0, sparse = True, TARG_LIMIT = 2, sparse_rew_thresh=0.3, pointMaze=False, deterministic_pos=False): 29 | self.num_objects = num_objects 30 | 31 | action_dim = 2 32 | obs_dim = 4 33 | self.ENVIRONMENT_BOUNDS = 2.5# LENGTH 6 34 | self.sparse_rew_thresh = sparse_rew_thresh 35 | 36 | self._max_episode_steps = 100 + num_objects*100 37 | 38 | obs_dim += 4*num_objects # pos and vel of the other pm that we are knocking around. 39 | self.num_goals = max(num_objects,1) 40 | goal_dim = 2*self.num_goals 41 | 42 | high = np.ones([action_dim]) 43 | self.action_space = spaces.Box(-high, high) 44 | high_obs = self.ENVIRONMENT_BOUNDS * np.ones([obs_dim]) 45 | high_goal = self.ENVIRONMENT_BOUNDS * np.ones([goal_dim]) 46 | 47 | 48 | self.observation_space = spaces.Dict(dict( 49 | desired_goal=spaces.Box(-high_goal, high_goal), 50 | achieved_goal=spaces.Box(-high_goal, high_goal), 51 | observation=spaces.Box(-high_obs, high_obs), 52 | controllable_achieved_goal = spaces.Box( -self.ENVIRONMENT_BOUNDS*np.ones([action_dim]), self.ENVIRONMENT_BOUNDS*np.ones([action_dim])), 53 | full_positional_state = spaces.Box( -self.ENVIRONMENT_BOUNDS*np.ones([action_dim+2*self.num_objects]), self.ENVIRONMENT_BOUNDS*np.ones([action_dim+2*self.num_objects])) 54 | )) 55 | 56 | 57 | 58 | self.isRender = False 59 | self.GUI_ACTIVE = False 60 | self._p = p 61 | self.physics_client_active = 0 62 | self.movable_goal = False 63 | self.roving_goal = False 64 | self.TARG_LIMIT = TARG_LIMIT 65 | self.TARG_MIN = 0.1 66 | self._seed() 67 | self.global_step = 0 68 | self.opposite_goal = False 69 | self.show_goal = True 70 | self.objects = [] 71 | self.num_objects = num_objects 72 | self.state_representation = None 73 | self.sub_goals = None 74 | self.pointMaze = pointMaze 75 | self.deterministic_pos = deterministic_pos 76 | 77 | if sparse: 78 | self.set_sparse_reward() 79 | 80 | def set_state_representation(self, autoencoder): 81 | self.state_representation = autoencoder 82 | if self.state_representation is not None: 83 | self.episodes = np.load('collected_data/120000HER2_pointMassObject-v0_Hidden_128l_2episodes.npz', allow_pickle=True)[ 84 | 'episodes'] 85 | 86 | 87 | def crop(self, num, lim): 88 | if num >= 0 and num < lim: 89 | num = lim 90 | elif num < 0 and num > -lim: 91 | num = -lim 92 | return num 93 | 94 | 95 | def reset_goal_pos(self, goal = None): 96 | 97 | # if self.state_representation: 98 | # random_ep = np.random.randint(0, len(self.episodes)) 99 | # #random_frame = np.random.randint(0, len(self.episodes[random_ep])) 100 | # self.desired_frame = self.episodes[random_ep][-1][0] 101 | # self.desired_state = self.desired_frame['observation'] 102 | # goal = self.desired_frame['achieved_goal'] 103 | # print('set desired') 104 | if goal is None: 105 | self.goal = [] 106 | for g in range(0,self.num_goals): 107 | goal_x = self.crop(self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), self.TARG_MIN) 108 | goal_y = self.crop(self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), self.TARG_MIN) 109 | if self.pointMaze: 110 | if self.deterministic_pos: 111 | goal_x , goal_y = -0.6, 1.5 112 | else: 113 | goal_x = self.np_random.uniform(low=-self.TARG_LIMIT, high=-1.5) 114 | goal_y = self.np_random.uniform(low=0.4, high=self.TARG_LIMIT) 115 | 116 | self.goal += [goal_x, goal_y] 117 | 118 | else: 119 | self.goal = goal 120 | 121 | self.goal = np.array(self.goal) 122 | 123 | #self.goal_velocity = self.np_random.uniform(low=0, high=3) 124 | # try: 125 | # self._p.removeUserDebugItem(self.goal) 126 | # except: 127 | # pass 128 | 129 | # self.goal = self._p.addUserDebugText("o", [self.goal_x, self.goal_y, 0.1], 130 | # textColorRGB=[0, 0, 1], 131 | # textSize=2) 132 | 133 | if self.isRender: 134 | if self.show_goal: 135 | index = 0 136 | for g in range(0, self.num_goals): 137 | self._p.resetBasePositionAndOrientation(self.goals[g], [self.goal[index], self.goal[index+1],0.1], [0,0,0,1]) 138 | self._p.changeConstraint(self.goal_cids[g],[self.goal[index], self.goal[index+1],0.1], maxForce = 100) 139 | index += 2 140 | 141 | def reset_object_pos(self, obs = None, extra_info = None, curric = False): 142 | 143 | if obs is None: 144 | index = 0 145 | for obj in self.objects: 146 | current_pos = self._p.getBasePositionAndOrientation(self.mass)[0] 147 | if curric == True: 148 | 149 | vector_to_goal = np.array([self.goal[index]-current_pos[0], self.goal[index+1]-current_pos[1],0.6]) 150 | 151 | pos = np.array(current_pos)+vector_to_goal/2+(np.random.rand(3)*1)-0.5 152 | 153 | # shift it a little if too close to the goal 154 | pos = np.random.rand(3)*4-2 155 | while self.calc_target_distance(pos[0:2], [self.goal[index], self.goal[index+1]]) < 1: 156 | pos = pos + (np.random.rand(3)*1)-0.5 157 | while self.calc_target_distance(pos[0:2], [current_pos[0], current_pos[1]]) < 1: 158 | pos = pos + (np.random.rand(3)*1)-0.5 159 | pos[2] = 0.4 160 | ori = [0,0,0,1] 161 | obs_vel_x, obs_vel_y =0,0 162 | self._p.resetBasePositionAndOrientation(obj, pos,ori) 163 | self._p.resetBaseVelocity(obj, [obs_vel_x, obs_vel_y, 0]) 164 | index += 2 165 | else: 166 | starting_index = 4 # the first object index 167 | for obj in self.objects: 168 | obs_x, obs_y = obs[starting_index], obs[starting_index+1] 169 | obs_z = 0.4 if extra_info is None else extra_info[0] 170 | pos = [obs_x, obs_y, obs_z] 171 | ori = [0,0,0,1] if extra_info is None else extra_info[1:5] 172 | obs_vel_x, obs_vel_y = obs[starting_index+2], obs[starting_index+3] 173 | self._p.resetBasePositionAndOrientation(obj, pos,ori) 174 | self._p.resetBaseVelocity(obj, [obs_vel_x, obs_vel_y, 0]) 175 | starting_index += 4 # go the the next object in the observation 176 | 177 | 178 | def initialize_actor_pos(self,o): 179 | x, y, x_vel, y_vel = o[0], o[1], o[2], o[3] 180 | self._p.resetBasePositionAndOrientation(self.mass, [x, y, -0.1], [0, 0, 0, 1]) 181 | self._p.changeConstraint(self.mass_cid, [x, y, -0.1], maxForce=100) 182 | self._p.resetBaseVelocity(self.mass, [x_vel, y_vel, 0]) 183 | 184 | #TODO change the env initialise start pos to a more general form of the function 185 | 186 | def initialize_start_pos(self, o, extra_info = None): 187 | if type(o) is dict: 188 | o = o['observation'] 189 | self.initialize_actor_pos(o) 190 | if self.num_objects > 0: 191 | self.reset_object_pos(o, extra_info) 192 | 193 | 194 | 195 | def visualise_sub_goal(self, sub_goal, sub_goal_state = 'achieved_goal'): 196 | 197 | # in the sub_goal case we either only have the positional info, or we have the full state positional info. 198 | #print(sub_goal) 199 | index = 0 200 | if self.sub_goals is None: 201 | self.sub_goals = [] 202 | self.sub_goal_cids = [] 203 | print('initing') 204 | sphereRadius = 0.15 205 | mass = 1 206 | colSphereId = self._p.createCollisionShape(p.GEOM_SPHERE, radius=sphereRadius) 207 | relativeChildPosition = [0, 0, 0] 208 | relativeChildOrientation = [0, 0, 0, 1] 209 | alpha = 0.5 210 | colors = [[212/250,175/250,55/250,alpha], [0,1,0,alpha], [0,0,1,alpha]] 211 | if sub_goal_state is 'achieved_goal': 212 | colors = [ [0, 1, 0, alpha], [0, 0, 1, alpha]] 213 | 214 | for g in range(0, len(sub_goal)//2): 215 | if g == 0 and sub_goal_state is not 'achieved_goal': # all other ones include sphere 216 | # the sphere 217 | visId = p.createVisualShape(p.GEOM_SPHERE, radius = sphereRadius, 218 | rgbaColor=colors[g]) 219 | else: 220 | 221 | 222 | visId = p.createVisualShape(p.GEOM_BOX, halfExtents=[0.35,0.35,0.35], 223 | rgbaColor=colors[g]) 224 | 225 | 226 | self.sub_goals.append(self._p.createMultiBody(mass, colSphereId, visId, [sub_goal[index], sub_goal[index + 1], 0.1])) 227 | collisionFilterGroup = 0 228 | collisionFilterMask = 0 229 | self._p.setCollisionFilterGroupMask(self.sub_goals[g], -1, collisionFilterGroup, collisionFilterMask) 230 | self.sub_goal_cids.append( 231 | self._p.createConstraint(self.sub_goals[g], -1, -1, -1, self._p.JOINT_FIXED, [sub_goal[index], sub_goal[index + 1], 0.1], [0, 0, 0.1], 232 | relativeChildPosition, relativeChildOrientation)) 233 | index +=2 234 | 235 | 236 | else: 237 | 238 | 239 | for g in range(0, len(sub_goal)//2): 240 | self._p.resetBasePositionAndOrientation(self.sub_goals[g], [sub_goal[index], sub_goal[index + 1], 0.1], 241 | [0, 0, 0, 1]) 242 | self._p.changeConstraint(self.sub_goal_cids[g], [sub_goal[index], sub_goal[index + 1], 0.1], maxForce=100) 243 | index += 2 244 | 245 | 246 | 247 | 248 | def calc_state(self): 249 | 250 | # state will be x,y pos, total velocity, x,y goal. 251 | current_pos = self._p.getBasePositionAndOrientation(self.mass)[0] 252 | x,y = current_pos[0], current_pos[1] 253 | velocity = self._p.getBaseVelocity(self.mass)[0] 254 | x_vel, y_vel = velocity[0], velocity[1] 255 | #print(x_vel, y_vel) 256 | velocity_mag = (np.sum(np.array(self._p.getBaseVelocity(self.mass)[0])[0:2])**2)**(1/2) 257 | obs = [x,y,x_vel, y_vel] 258 | 259 | achieved_goal = [] 260 | extra_info = [] 261 | if self.num_objects > 0: 262 | for o in self.objects: 263 | obj_pose = self._p.getBasePositionAndOrientation(o) 264 | obs_x, obs_y = obj_pose[0][0], obj_pose[0][1] 265 | velocity_obs = self._p.getBaseVelocity(o)[0] 266 | x_vel_obj, y_vel_obj = velocity_obs[0], velocity_obs[1] 267 | 268 | obs += [obs_x, obs_y, x_vel_obj, y_vel_obj] 269 | achieved_goal += [obs_x, obs_y] 270 | extra_info += [list([obj_pose[0][2]] + list(obj_pose[1]))] 271 | 272 | 273 | extra_info = np.squeeze(np.array(extra_info)).astype('float32') # z pos of the object, ori quaternion of the object. 274 | 275 | else: # ag is the position of our controlled point mass 276 | achieved_goal = np.array([x,y]) 277 | extra_info = None 278 | 279 | # if self.state_representation: 280 | # obs = np.squeeze(self.state_representation(np.expand_dims(obs,0))[0].numpy()) 281 | 282 | if self.num_objects == 0: 283 | full_positional_state = np.array([x,y]) 284 | else: 285 | full_positional_state = np.array([x,y]+list(achieved_goal)) 286 | return_dict= { 287 | 'observation': np.array(obs).copy().astype('float32'), 288 | 'achieved_goal': np.array(achieved_goal).copy().astype('float32'), 289 | 'desired_goal': self.goal.copy().astype('float32'), 290 | 'extra_info': extra_info, 291 | 'controllable_achieved_goal': np.array([x,y]).copy().astype('float32'), # just the x,y pos of the pointmass, the controllable aspects 292 | 'full_positional_state': full_positional_state.astype('float32') 293 | } 294 | 295 | if self.isRender: 296 | 297 | (_, _, px, depth, _) = p.getCameraImage( 298 | 48, 299 | 48, 300 | viewMatrix, 301 | projectionMatrix, 302 | shadow=0, 303 | flags=p.ER_NO_SEGMENTATION_MASK, 304 | renderer=p.ER_BULLET_HARDWARE_OPENGL, 305 | ) 306 | 307 | rgb_array = np.array(px, dtype=np.uint8) 308 | rgb_array = np.reshape(rgb_array, (128, 128, 4)) 309 | return_dict["image"] = rgb_array[:, :, :3] 310 | 311 | 312 | 313 | return return_dict 314 | 315 | 316 | def calc_target_distance(self, achieved_goal, desired_goal): 317 | distance = np.sum(abs(achieved_goal - desired_goal)) 318 | return distance 319 | 320 | # def calc_velocity_distance(self): 321 | # velocity = (np.sum(np.array(self._p.getBaseVelocity(self.mass)[0])[0:2])**2)**(1/2) 322 | 323 | # return (velocity - self.goal_velocity) 324 | 325 | 326 | def activate_movable_goal(self): 327 | self.movable_goal = True 328 | 329 | def activate_roving_goal(self): 330 | self.roving_goal = True 331 | 332 | def compute_reward(self, achieved_goal, desired_goal, info = None): 333 | 334 | # reward given if new pos is closer than old 335 | 336 | current_distance = np.linalg.norm(achieved_goal - desired_goal, axis=0) 337 | 338 | position_reward = -1000*(current_distance - self.last_target_distance) 339 | self.last_target_distance = current_distance 340 | 341 | # velocity_diff = self.calc_velocity_distance() 342 | # velocity_reward = -100*(velocity_diff - self.last_velocity_distance) 343 | # self.last_velocity_distance = velocity_diff 344 | #velocity_reward = self.calc_velocity_distance() 345 | 346 | #print('Vreward', velocity_reward) 347 | 348 | # if self.state_representation is not None: 349 | # position_reward = -tf.reduce_mean(tf.losses.MAE(self.state_representation(np.expand_dims(self.calc_state()['observation'],0))[0], self.state_representation(np.expand_dims(self.desired_state,0))[0])) 350 | 351 | return position_reward#+velocity_reward 352 | 353 | def set_sparse_reward(self): 354 | print('Environment set to sparse reward') 355 | self.compute_reward = self.compute_reward_sparse 356 | 357 | def compute_reward_sparse(self, achieved_goal, desired_goal, info=None): 358 | 359 | initially_vectorized = True 360 | dimension = 2 361 | if len(achieved_goal.shape) == 1: 362 | achieved_goal = np.expand_dims(np.array(achieved_goal), axis=0) 363 | desired_goal = np.expand_dims(np.array(desired_goal), axis=0) 364 | initially_vectorized = False 365 | 366 | reward = np.zeros(len(achieved_goal)) 367 | for g in range(0, len(achieved_goal[0]) // dimension): # piecewise reward 368 | g = g * dimension # increments of 2 369 | current_distance = np.linalg.norm(achieved_goal[:, g:g + dimension] - desired_goal[:, g:g + dimension], axis=1) 370 | reward += np.where(current_distance > self.sparse_rew_thresh, -1, 0) 371 | 372 | if not initially_vectorized: 373 | return reward[0] 374 | else: 375 | return reward 376 | 377 | 378 | def step(self, action): 379 | 380 | action = action *0.1# put it to the correct scale 381 | x_shift, y_shift = action[0], action[1] 382 | current_pos = self._p.getBasePositionAndOrientation(self.mass)[0] 383 | x,y = current_pos[0], current_pos[1] 384 | 385 | new_x, new_y = np.clip(x+x_shift,-self.ENVIRONMENT_BOUNDS, self.ENVIRONMENT_BOUNDS), np.clip(y+y_shift,-self.ENVIRONMENT_BOUNDS, self.ENVIRONMENT_BOUNDS) 386 | self._p.changeConstraint(self.mass_cid,[new_x, new_y, -0.1], maxForce = 10) 387 | 388 | # force = action*10 389 | # print(force) 390 | # self._p.applyExternalForce(self.mass, -1, force, current_pos, flags=self._p.WORLD_FRAME) 391 | # 392 | for i in range(0,20): 393 | #self._p.applyExternalForce(self.mass, -1, force, current_pos, flags=self._p.WORLD_FRAME) 394 | self._p.stepSimulation() 395 | 396 | 397 | obs = self.calc_state() 398 | 399 | r = self.compute_reward(obs['achieved_goal'], obs['desired_goal']) 400 | 401 | current_distance = self.calc_target_distance(obs['achieved_goal'], self.goal) 402 | 403 | if self.movable_goal: 404 | 405 | if r == 0: 406 | self.reset_goal_pos() 407 | 408 | if self.roving_goal: 409 | if self.global_step % 150 == 0: 410 | 411 | self.reset_goal_pos() 412 | 413 | self.global_step += 1 414 | 415 | success = 0 if self.compute_reward_sparse(obs['achieved_goal'], obs['desired_goal']) < 0 else 1 # assuming negative rewards 416 | 417 | # this part is only for interoperability with baselines 418 | if self.global_step == self._max_episode_steps: 419 | done = True 420 | else: 421 | done = False 422 | return obs, r, done, {'is_success': success} 423 | 424 | def reset(self, o = None ): 425 | if o is not None: 426 | self.initialize_start_pos(o) 427 | else: 428 | #self._p.resetSimulation() 429 | self.global_step = 0 430 | 431 | if self.physics_client_active == 0: 432 | 433 | if self.isRender: 434 | self._p = bullet_client.BulletClient(connection_mode=p.GUI) 435 | self._p.configureDebugVisualizer(self._p.COV_ENABLE_SHADOWS, 0) 436 | self.GUI_ACTIVE = True 437 | else: 438 | self._p = bullet_client.BulletClient(connection_mode=p.DIRECT) 439 | 440 | self.physics_client_active = 1 441 | 442 | sphereRadius = 0.2 443 | mass = 1 444 | visualShapeId = 2 445 | colSphereId = self._p.createCollisionShape(p.GEOM_SPHERE,radius=sphereRadius) 446 | self.mass = self._p.createMultiBody(mass,colSphereId,visualShapeId,[0,0,0.4]) 447 | #objects = self._p.loadMJCF("/Users/francisdouglas/bullet3/data/mjcf/sphere.xml") 448 | #self.mass = objects[0] 449 | # self.mass = [p.loadURDF((os.path.join(urdfRoot,"sphere2.urdf")), 0,0.0,1.0,1.00000,0.707107,0.000000,0.707107)] 450 | relativeChildPosition=[0,0,0] 451 | relativeChildOrientation=[0,0,0,1] 452 | self.mass_cid = self._p.createConstraint(self.mass,-1,-1,-1,self._p.JOINT_FIXED,[0,0,0],[0,0,0],relativeChildPosition,relativeChildOrientation) 453 | 454 | alpha = 1 455 | colors = [[0, 1, 0, alpha], [0, 0, 1, alpha]] 456 | 457 | if self.show_goal: 458 | self.goals = [] 459 | self.goal_cids = [] 460 | 461 | 462 | for g in range(0,self.num_goals): 463 | visId = p.createVisualShape(p.GEOM_SPHERE, radius = sphereRadius, 464 | rgbaColor=colors[g]) 465 | self.goals.append(self._p.createMultiBody(mass,colSphereId,visId,[1,1,1.4])) 466 | collisionFilterGroup = 0 467 | collisionFilterMask = 0 468 | self._p.setCollisionFilterGroupMask(self.goals[g], -1, collisionFilterGroup, collisionFilterMask) 469 | self.goal_cids.append(self._p.createConstraint(self.goals[g],-1,-1,-1,self._p.JOINT_FIXED,[1,1,1.4],[0,0,0],relativeChildPosition,relativeChildOrientation)) 470 | #self._p.setRealTimeSimulation(1) 471 | 472 | colwallId = self._p.createCollisionShape(p.GEOM_BOX, halfExtents=[0.05, 2.5, 0.5]) 473 | wallvisId = 10 474 | wall = [self._p.createMultiBody(0, colwallId, 10, [self.TARG_LIMIT * 2 + 0.2, 0, 0.0], 475 | p.getQuaternionFromEuler([0, 0, 0]))] 476 | wall = [self._p.createMultiBody(0, colwallId, 10, [-self.TARG_LIMIT * 2 - 0.2, 0, 0.0], 477 | p.getQuaternionFromEuler([0, 0, 0]))] 478 | wall = [ 479 | self._p.createMultiBody(0, colwallId, 10, [0, self.TARG_LIMIT * 2 + 0.2, 0], 480 | p.getQuaternionFromEuler([0, 0, math.pi / 2]))] 481 | wall = [ 482 | self._p.createMultiBody(0, colwallId, 10, [0, -self.TARG_LIMIT * 2 - 0.2, 0], 483 | p.getQuaternionFromEuler([0, 0, math.pi / 2]))] 484 | 485 | 486 | if self.pointMaze: 487 | divider = self._p.createCollisionShape(p.GEOM_BOX, halfExtents=[0.7, 1.2, 0.5]) 488 | divider = [self._p.createMultiBody(0, divider, 10, [-0.8, 0, 0], p.getQuaternionFromEuler([0, 0, math.pi / 2]))] 489 | divider = self._p.createCollisionShape(p.GEOM_BOX, halfExtents=[1.5, 0.6, 0.5]) 490 | divider = [self._p.createMultiBody(0, divider, 10, [-1.4, 1.1, 0], 491 | p.getQuaternionFromEuler([0, 0, math.pi / 2]))] 492 | 493 | if self.num_objects > 0: 494 | colcubeId = self._p.createCollisionShape(p.GEOM_BOX,halfExtents=[0.35,0.35,0.35]) 495 | for i in range(0,self.num_objects): 496 | visId = p.createVisualShape(p.GEOM_BOX, halfExtents=[0.35, 0.35, 0.35], 497 | rgbaColor=colors[i]) 498 | self.objects.append(self._p.createMultiBody(0.1,colcubeId ,visId,[0,0,1.5])) 499 | 500 | 501 | if GUI: 502 | 503 | ACTION_LIMIT = 1 504 | self.x_shift = self._p.addUserDebugParameter("X", -ACTION_LIMIT, ACTION_LIMIT, 0.0) 505 | self.y_shift= self._p.addUserDebugParameter("Y", -ACTION_LIMIT, ACTION_LIMIT, 0.0) 506 | 507 | self._p.configureDebugVisualizer(p.COV_ENABLE_GUI,GUI) 508 | 509 | self._p.setGravity(0,0,-10) 510 | lookat = [0, 0, 0.1] 511 | distance = 7 512 | yaw = 0 513 | self._p.resetDebugVisualizerCamera(distance, yaw, -89, lookat) 514 | colcubeId = self._p.createCollisionShape(p.GEOM_BOX, halfExtents=[5, 5, 0.1]) 515 | visplaneId = self._p.createVisualShape(p.GEOM_BOX, halfExtents=[5, 5, 0.1],rgbaColor=[1,1,1,1]) 516 | plane = self._p.createMultiBody(0, colcubeId, visplaneId, [0, 0, -0.2]) 517 | 518 | #self._p.loadSDF(os.path.join(urdfRoot, "plane_stadium.sdf")) 519 | 520 | 521 | 522 | 523 | 524 | self._p.resetBasePositionAndOrientation(self.mass, [0, 0,0.6], [0,0,0,1]) 525 | self.reset_goal_pos() 526 | 527 | # reset mass location 528 | if self.opposite_goal: 529 | x= -self.goal[0] 530 | y = -self.goal[1] 531 | else: 532 | x = self.crop(self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT),self.TARG_MIN) 533 | y = self.crop(self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT),self.TARG_MIN) 534 | if self.pointMaze: 535 | x = self.np_random.uniform(low=-2.0, high=-1.5) 536 | y = self.np_random.uniform(low=-2.0, high=-1.5) 537 | 538 | x_vel = 0#self.np_random.uniform(low=-1, high=1) 539 | y_vel = 0#self.np_random.uniform(low=-1, high=1) 540 | 541 | self.initialize_actor_pos([x,y,x_vel,y_vel]) 542 | if self.num_objects > 0: 543 | self.reset_object_pos() 544 | 545 | 546 | #self.last_velocity_distance = self.calc_velocity_distance() 547 | 548 | 549 | 550 | 551 | #Instantiate some obstacles. 552 | #self.instaniate_obstacles() 553 | obs = self.calc_state() 554 | self.last_target_distance = self.calc_target_distance(obs['achieved_goal'], obs['desired_goal']) 555 | return obs 556 | 557 | def instaniate_obstacles(self): 558 | colcubeId = self._p.createCollisionShape(p.GEOM_BOX,halfExtents=[1.5,1.5,0.6]) 559 | self.obstacle_center = self._p.createMultiBody(0,colcubeId ,2,[0,0,0.0]) 560 | self.TARG_MIN = 1.5 561 | self.opposite_goal = True 562 | 563 | 564 | def render(self, mode='human'): 565 | 566 | if (mode=="human"): 567 | self.isRender = True 568 | return np.array([]) 569 | if mode == 'rgb_array': 570 | raise NotImplementedError 571 | 572 | 573 | def close(self): 574 | print('closing') 575 | self._p.disconnect() 576 | 577 | 578 | def _seed(self, seed=None): 579 | print('seeding') 580 | self.np_random, seed = gym.utils.seeding.np_random(seed) 581 | 582 | return [seed] 583 | 584 | 585 | 586 | 587 | 588 | # a version where reward is determined by the obstacle. 589 | # need a sub class so we can load it as a openai gym env. 590 | class pointMassEnvObject(pointMassEnv): 591 | def __init__(self, 592 | render=False, 593 | num_objects = 1, sparse = True, TARG_LIMIT = 1.3 594 | ): 595 | super().__init__(render = render, num_objects = num_objects, sparse = sparse, TARG_LIMIT = TARG_LIMIT) 596 | 597 | class pointMassEnvObjectDuo(pointMassEnv): 598 | def __init__(self, 599 | render=False, 600 | num_objects = 2, sparse = True, TARG_LIMIT = 1.3, sparse_rew_thresh = 0.3 # TODO 6 when collecting examples. 601 | ): 602 | super().__init__(render = render, num_objects = num_objects, sparse = sparse, TARG_LIMIT = TARG_LIMIT, sparse_rew_thresh=sparse_rew_thresh) 603 | 604 | class pointMassEnvObjectDense(pointMassEnv): 605 | def __init__(self, 606 | render=False, 607 | num_objects = 1, sparse = False, TARG_LIMIT = 1.3 608 | ): 609 | super().__init__(render = render, num_objects = num_objects, sparse = sparse, TARG_LIMIT = TARG_LIMIT) 610 | 611 | class pointMassEnvDense(pointMassEnv): 612 | def __init__(self, 613 | render=False, 614 | num_objects = 0, sparse = False, TARG_LIMIT = 1.3 615 | ): 616 | super().__init__(render = render, num_objects = num_objects, sparse = sparse, TARG_LIMIT = TARG_LIMIT) 617 | 618 | class pointMassEnvMaze(pointMassEnv): 619 | def __init__(self, 620 | render=False, 621 | num_objects = 0, sparse = True, TARG_LIMIT = 1.3 622 | ): 623 | super().__init__(render = render, num_objects = num_objects, sparse = sparse, TARG_LIMIT = TARG_LIMIT, pointMaze=True, deterministic_pos=True) 624 | 625 | 626 | def main(**kwargs): 627 | 628 | cameraDistance = 1 629 | cameraYaw = 35 630 | cameraPitch = -35 631 | 632 | env = pointMassEnvMaze() 633 | env.render(mode = 'human') 634 | obs = env.reset()['observation'] 635 | 636 | # objects = env._p.loadMJCF("/Users/francisdouglas/bullet3/data/mjcf/sphere.xml") 637 | # sphere = objects[0] 638 | # env._p.resetBasePositionAndOrientation(sphere, [0, 0, 1], [0, 0, 0, 1]) 639 | # env._p.changeDynamics(sphere, -1, linearDamping=0.9) 640 | # env._p.changeVisualShape(sphere, -1, rgbaColor=[1, 0, 0, 1]) 641 | forward = 0 642 | turn = 0 643 | 644 | forwardVec = [2, 0, 0] 645 | cameraDistance = 2 646 | cameraYaw = 35 647 | cameraPitch = -65 648 | steps = 0 649 | while steps<3000: 650 | 651 | spherePos, orn = env._p.getBasePositionAndOrientation(env.mass) 652 | 653 | cameraTargetPosition = spherePos 654 | env._p.resetDebugVisualizerCamera(cameraDistance, cameraYaw, cameraPitch, cameraTargetPosition) 655 | camInfo = env._p.getDebugVisualizerCamera() 656 | camForward = camInfo[5] 657 | 658 | keys = env._p.getKeyboardEvents() 659 | for k, v in keys.items(): 660 | 661 | if (k == env._p.B3G_RIGHT_ARROW and (v & env._p.KEY_WAS_TRIGGERED)): 662 | turn = -3 663 | if (k == env._p.B3G_RIGHT_ARROW and (v & env._p.KEY_WAS_RELEASED)): 664 | turn = 0 665 | if (k == env._p.B3G_LEFT_ARROW and (v & env._p.KEY_WAS_TRIGGERED)): 666 | turn = 3 667 | if (k == env._p.B3G_LEFT_ARROW and (v & env._p.KEY_WAS_RELEASED)): 668 | turn = 0 669 | 670 | if (k == env._p.B3G_UP_ARROW and (v & env._p.KEY_WAS_TRIGGERED)): 671 | forward = 1.8 672 | if (k == env._p.B3G_UP_ARROW and (v & env._p.KEY_WAS_RELEASED)): 673 | forward = 0 674 | if (k == env._p.B3G_DOWN_ARROW and (v & env._p.KEY_WAS_TRIGGERED)): 675 | forward = -1.8 676 | if (k == env._p.B3G_DOWN_ARROW and (v & env._p.KEY_WAS_RELEASED)): 677 | forward = 0 678 | 679 | 680 | force = [forward * camForward[0], forward * camForward[1], 0] 681 | cameraYaw = cameraYaw + turn 682 | 683 | # if (forward): 684 | # env._p.applyExternalForce(sphere, -1, force, spherePos, flags=env._p.WORLD_FRAME) 685 | # 686 | # env._p.stepSimulation() 687 | time.sleep(3. / 240.) 688 | 689 | o,r,_,_ = env.step(np.array(force)) 690 | print(o['observation'][0:2])#print(r) 691 | 692 | steps += 1 693 | 694 | 695 | 696 | if __name__ == "__main__": 697 | main() 698 | 699 | 700 | 701 | # env = pointMassEnv() 702 | 703 | 704 | # env.reset() 705 | 706 | # for i in range(0,100): 707 | 708 | # #env._p.stepSimulation() 709 | # time.sleep(0.005) 710 | # #action = [env._p.readUserDebugParameter(env.x_shift), env._p.readUserDebugParameter(env.y_shift)] 711 | # o2, r, d, _ = env.step(np.ones(2)) 712 | # print(o2) 713 | 714 | # test_env = pointMassEnv() 715 | 716 | # test_env.render(mode = 'human') 717 | # test_env.reset() 718 | 719 | # for i in range(0,100000): 720 | 721 | # #env._p.stepSimulation() 722 | # time.sleep(0.005) 723 | # action = np.array([test_env._p.readUserDebugParameter(test_env.x_shift), test_env._p.readUserDebugParameter(test_env.y_shift)]) 724 | # o2, r, d, _ = test_env.step(action) 725 | # print(o2) 726 | 727 | 728 | 729 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | 2 | from setuptools import setup 3 | 4 | setup(name='pointMass', 5 | version='0.0.1', 6 | install_requires=['gym', 'pybullet', 'numpy'] # And any other dependencies foo needs 7 | ) --------------------------------------------------------------------------------