├── .gitignore
├── README.md
├── images
├── object.gif
├── relay.gif
├── relaylonger3.gif
└── self.gif
├── pointMass
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-36.pyc
│ └── __init__.cpython-37.pyc
└── envs
│ ├── __init__.py
│ └── environments.py
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | /pointMass.egg-info/*
2 | /pointMass/envs/superbasic.py
3 | /pointMass/envs/paper_folding.py
4 | /pointMass/envs/basic_tests.py
5 | /pointMass/envs/externalTorqueSphere.py
6 | /pointMass/envs/__pycache__/*
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pointMass
2 | pointMass pybullet RL environment for simple experiments and algorithm verification.
3 |
4 | It is a openAI gym goal based environment, and has modifiable difficulty. The easiest environment is a 2D self navigation task. Further levels of difficulty involve a modifiable number of objects which must be pushed to goal locations.
5 |
6 | The action space in all cases is 2D and is the positional changes to the mass in the x and y dimensions.
7 |
8 | In pointMassEnvObject-v0, the gold point mass must push the green block to it's goal location, the green sphere. The 'observation' is the xy positions and velocities of the mass and block respectively and the achieved and desired goal is in terms of the xy position of the block.
9 |
10 |
11 |
12 |
13 |
14 | In pointMassEnv-v0, the gold point mass must move itself to it's goal location, the green sphere. The 'observation' is the xy positions and velocities of the mass and the achieved and desired goal is in terms of the xy position of the mass.
15 |
16 |
17 |
18 |
19 |
20 |
21 | The env also supports my experiments into hierachial learning. Here are two examples of hierarchial sub-goal setting based on https://arxiv.org/abs/1910.11956 and https://arxiv.org/abs/1712.00948.
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/images/object.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sholtodouglas/pointMass/5e063ed7ecaab84c78db7648db3a304322f48547/images/object.gif
--------------------------------------------------------------------------------
/images/relay.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sholtodouglas/pointMass/5e063ed7ecaab84c78db7648db3a304322f48547/images/relay.gif
--------------------------------------------------------------------------------
/images/relaylonger3.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sholtodouglas/pointMass/5e063ed7ecaab84c78db7648db3a304322f48547/images/relaylonger3.gif
--------------------------------------------------------------------------------
/images/self.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sholtodouglas/pointMass/5e063ed7ecaab84c78db7648db3a304322f48547/images/self.gif
--------------------------------------------------------------------------------
/pointMass/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 |
3 | register(
4 | id='pointMass-v0',
5 | entry_point='pointMass.envs:pointMassEnv',
6 | )
7 |
8 | register(
9 | id='pointMassDense-v0',
10 | entry_point='pointMass.envs:pointMassEnvDense',
11 | )
12 |
13 | register(
14 | id='pointMassObject-v0',
15 | entry_point='pointMass.envs:pointMassEnvObject',
16 | )
17 |
18 | register(
19 | id='pointMassObjectDuo-v0',
20 | entry_point='pointMass.envs:pointMassEnvObjectDuo',
21 | )
22 |
23 | register(
24 | id='pointMassObjectDense-v0',
25 | entry_point='pointMass.envs:pointMassEnvObjectDense',
26 | )
--------------------------------------------------------------------------------
/pointMass/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sholtodouglas/pointMass/5e063ed7ecaab84c78db7648db3a304322f48547/pointMass/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/pointMass/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sholtodouglas/pointMass/5e063ed7ecaab84c78db7648db3a304322f48547/pointMass/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/pointMass/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from pointMass.envs.environments import pointMassEnv
2 | from pointMass.envs.environments import pointMassEnvDense
3 | from pointMass.envs.environments import pointMassEnvObject
4 | from pointMass.envs.environments import pointMassEnvObjectDense
5 | from pointMass.envs.environments import pointMassEnvObjectDuo
6 | from pointMass.envs.environments import pointMassEnvMaze
7 |
--------------------------------------------------------------------------------
/pointMass/envs/environments.py:
--------------------------------------------------------------------------------
1 |
2 | import gym, gym.utils, gym.utils.seeding
3 | import pybullet as p
4 | import numpy as np
5 | import pybullet_data
6 | import os
7 | import time
8 | from pybullet_utils import bullet_client
9 | urdfRoot=pybullet_data.getDataPath()
10 | import gym.spaces as spaces
11 | import math
12 |
13 | GUI = False
14 | viewMatrix = p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition = [0,0,0], distance = 6, yaw = 0, pitch = -90, roll = 0, upAxisIndex = 2)
15 |
16 |
17 | projectionMatrix = p.computeProjectionMatrixFOV(fov = 50,aspect = 1,nearVal = 0.01,farVal = 10)
18 |
19 |
20 | class pointMassEnv(gym.GoalEnv):
21 |
22 |
23 | metadata = {
24 | 'render.modes': ['human', 'rgb_array'],
25 | 'video.frames_per_second': 60
26 | }
27 |
28 | def __init__(self, render = False, num_objects = 0, sparse = True, TARG_LIMIT = 2, sparse_rew_thresh=0.3, pointMaze=False, deterministic_pos=False):
29 | self.num_objects = num_objects
30 |
31 | action_dim = 2
32 | obs_dim = 4
33 | self.ENVIRONMENT_BOUNDS = 2.5# LENGTH 6
34 | self.sparse_rew_thresh = sparse_rew_thresh
35 |
36 | self._max_episode_steps = 100 + num_objects*100
37 |
38 | obs_dim += 4*num_objects # pos and vel of the other pm that we are knocking around.
39 | self.num_goals = max(num_objects,1)
40 | goal_dim = 2*self.num_goals
41 |
42 | high = np.ones([action_dim])
43 | self.action_space = spaces.Box(-high, high)
44 | high_obs = self.ENVIRONMENT_BOUNDS * np.ones([obs_dim])
45 | high_goal = self.ENVIRONMENT_BOUNDS * np.ones([goal_dim])
46 |
47 |
48 | self.observation_space = spaces.Dict(dict(
49 | desired_goal=spaces.Box(-high_goal, high_goal),
50 | achieved_goal=spaces.Box(-high_goal, high_goal),
51 | observation=spaces.Box(-high_obs, high_obs),
52 | controllable_achieved_goal = spaces.Box( -self.ENVIRONMENT_BOUNDS*np.ones([action_dim]), self.ENVIRONMENT_BOUNDS*np.ones([action_dim])),
53 | full_positional_state = spaces.Box( -self.ENVIRONMENT_BOUNDS*np.ones([action_dim+2*self.num_objects]), self.ENVIRONMENT_BOUNDS*np.ones([action_dim+2*self.num_objects]))
54 | ))
55 |
56 |
57 |
58 | self.isRender = False
59 | self.GUI_ACTIVE = False
60 | self._p = p
61 | self.physics_client_active = 0
62 | self.movable_goal = False
63 | self.roving_goal = False
64 | self.TARG_LIMIT = TARG_LIMIT
65 | self.TARG_MIN = 0.1
66 | self._seed()
67 | self.global_step = 0
68 | self.opposite_goal = False
69 | self.show_goal = True
70 | self.objects = []
71 | self.num_objects = num_objects
72 | self.state_representation = None
73 | self.sub_goals = None
74 | self.pointMaze = pointMaze
75 | self.deterministic_pos = deterministic_pos
76 |
77 | if sparse:
78 | self.set_sparse_reward()
79 |
80 | def set_state_representation(self, autoencoder):
81 | self.state_representation = autoencoder
82 | if self.state_representation is not None:
83 | self.episodes = np.load('collected_data/120000HER2_pointMassObject-v0_Hidden_128l_2episodes.npz', allow_pickle=True)[
84 | 'episodes']
85 |
86 |
87 | def crop(self, num, lim):
88 | if num >= 0 and num < lim:
89 | num = lim
90 | elif num < 0 and num > -lim:
91 | num = -lim
92 | return num
93 |
94 |
95 | def reset_goal_pos(self, goal = None):
96 |
97 | # if self.state_representation:
98 | # random_ep = np.random.randint(0, len(self.episodes))
99 | # #random_frame = np.random.randint(0, len(self.episodes[random_ep]))
100 | # self.desired_frame = self.episodes[random_ep][-1][0]
101 | # self.desired_state = self.desired_frame['observation']
102 | # goal = self.desired_frame['achieved_goal']
103 | # print('set desired')
104 | if goal is None:
105 | self.goal = []
106 | for g in range(0,self.num_goals):
107 | goal_x = self.crop(self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), self.TARG_MIN)
108 | goal_y = self.crop(self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), self.TARG_MIN)
109 | if self.pointMaze:
110 | if self.deterministic_pos:
111 | goal_x , goal_y = -0.6, 1.5
112 | else:
113 | goal_x = self.np_random.uniform(low=-self.TARG_LIMIT, high=-1.5)
114 | goal_y = self.np_random.uniform(low=0.4, high=self.TARG_LIMIT)
115 |
116 | self.goal += [goal_x, goal_y]
117 |
118 | else:
119 | self.goal = goal
120 |
121 | self.goal = np.array(self.goal)
122 |
123 | #self.goal_velocity = self.np_random.uniform(low=0, high=3)
124 | # try:
125 | # self._p.removeUserDebugItem(self.goal)
126 | # except:
127 | # pass
128 |
129 | # self.goal = self._p.addUserDebugText("o", [self.goal_x, self.goal_y, 0.1],
130 | # textColorRGB=[0, 0, 1],
131 | # textSize=2)
132 |
133 | if self.isRender:
134 | if self.show_goal:
135 | index = 0
136 | for g in range(0, self.num_goals):
137 | self._p.resetBasePositionAndOrientation(self.goals[g], [self.goal[index], self.goal[index+1],0.1], [0,0,0,1])
138 | self._p.changeConstraint(self.goal_cids[g],[self.goal[index], self.goal[index+1],0.1], maxForce = 100)
139 | index += 2
140 |
141 | def reset_object_pos(self, obs = None, extra_info = None, curric = False):
142 |
143 | if obs is None:
144 | index = 0
145 | for obj in self.objects:
146 | current_pos = self._p.getBasePositionAndOrientation(self.mass)[0]
147 | if curric == True:
148 |
149 | vector_to_goal = np.array([self.goal[index]-current_pos[0], self.goal[index+1]-current_pos[1],0.6])
150 |
151 | pos = np.array(current_pos)+vector_to_goal/2+(np.random.rand(3)*1)-0.5
152 |
153 | # shift it a little if too close to the goal
154 | pos = np.random.rand(3)*4-2
155 | while self.calc_target_distance(pos[0:2], [self.goal[index], self.goal[index+1]]) < 1:
156 | pos = pos + (np.random.rand(3)*1)-0.5
157 | while self.calc_target_distance(pos[0:2], [current_pos[0], current_pos[1]]) < 1:
158 | pos = pos + (np.random.rand(3)*1)-0.5
159 | pos[2] = 0.4
160 | ori = [0,0,0,1]
161 | obs_vel_x, obs_vel_y =0,0
162 | self._p.resetBasePositionAndOrientation(obj, pos,ori)
163 | self._p.resetBaseVelocity(obj, [obs_vel_x, obs_vel_y, 0])
164 | index += 2
165 | else:
166 | starting_index = 4 # the first object index
167 | for obj in self.objects:
168 | obs_x, obs_y = obs[starting_index], obs[starting_index+1]
169 | obs_z = 0.4 if extra_info is None else extra_info[0]
170 | pos = [obs_x, obs_y, obs_z]
171 | ori = [0,0,0,1] if extra_info is None else extra_info[1:5]
172 | obs_vel_x, obs_vel_y = obs[starting_index+2], obs[starting_index+3]
173 | self._p.resetBasePositionAndOrientation(obj, pos,ori)
174 | self._p.resetBaseVelocity(obj, [obs_vel_x, obs_vel_y, 0])
175 | starting_index += 4 # go the the next object in the observation
176 |
177 |
178 | def initialize_actor_pos(self,o):
179 | x, y, x_vel, y_vel = o[0], o[1], o[2], o[3]
180 | self._p.resetBasePositionAndOrientation(self.mass, [x, y, -0.1], [0, 0, 0, 1])
181 | self._p.changeConstraint(self.mass_cid, [x, y, -0.1], maxForce=100)
182 | self._p.resetBaseVelocity(self.mass, [x_vel, y_vel, 0])
183 |
184 | #TODO change the env initialise start pos to a more general form of the function
185 |
186 | def initialize_start_pos(self, o, extra_info = None):
187 | if type(o) is dict:
188 | o = o['observation']
189 | self.initialize_actor_pos(o)
190 | if self.num_objects > 0:
191 | self.reset_object_pos(o, extra_info)
192 |
193 |
194 |
195 | def visualise_sub_goal(self, sub_goal, sub_goal_state = 'achieved_goal'):
196 |
197 | # in the sub_goal case we either only have the positional info, or we have the full state positional info.
198 | #print(sub_goal)
199 | index = 0
200 | if self.sub_goals is None:
201 | self.sub_goals = []
202 | self.sub_goal_cids = []
203 | print('initing')
204 | sphereRadius = 0.15
205 | mass = 1
206 | colSphereId = self._p.createCollisionShape(p.GEOM_SPHERE, radius=sphereRadius)
207 | relativeChildPosition = [0, 0, 0]
208 | relativeChildOrientation = [0, 0, 0, 1]
209 | alpha = 0.5
210 | colors = [[212/250,175/250,55/250,alpha], [0,1,0,alpha], [0,0,1,alpha]]
211 | if sub_goal_state is 'achieved_goal':
212 | colors = [ [0, 1, 0, alpha], [0, 0, 1, alpha]]
213 |
214 | for g in range(0, len(sub_goal)//2):
215 | if g == 0 and sub_goal_state is not 'achieved_goal': # all other ones include sphere
216 | # the sphere
217 | visId = p.createVisualShape(p.GEOM_SPHERE, radius = sphereRadius,
218 | rgbaColor=colors[g])
219 | else:
220 |
221 |
222 | visId = p.createVisualShape(p.GEOM_BOX, halfExtents=[0.35,0.35,0.35],
223 | rgbaColor=colors[g])
224 |
225 |
226 | self.sub_goals.append(self._p.createMultiBody(mass, colSphereId, visId, [sub_goal[index], sub_goal[index + 1], 0.1]))
227 | collisionFilterGroup = 0
228 | collisionFilterMask = 0
229 | self._p.setCollisionFilterGroupMask(self.sub_goals[g], -1, collisionFilterGroup, collisionFilterMask)
230 | self.sub_goal_cids.append(
231 | self._p.createConstraint(self.sub_goals[g], -1, -1, -1, self._p.JOINT_FIXED, [sub_goal[index], sub_goal[index + 1], 0.1], [0, 0, 0.1],
232 | relativeChildPosition, relativeChildOrientation))
233 | index +=2
234 |
235 |
236 | else:
237 |
238 |
239 | for g in range(0, len(sub_goal)//2):
240 | self._p.resetBasePositionAndOrientation(self.sub_goals[g], [sub_goal[index], sub_goal[index + 1], 0.1],
241 | [0, 0, 0, 1])
242 | self._p.changeConstraint(self.sub_goal_cids[g], [sub_goal[index], sub_goal[index + 1], 0.1], maxForce=100)
243 | index += 2
244 |
245 |
246 |
247 |
248 | def calc_state(self):
249 |
250 | # state will be x,y pos, total velocity, x,y goal.
251 | current_pos = self._p.getBasePositionAndOrientation(self.mass)[0]
252 | x,y = current_pos[0], current_pos[1]
253 | velocity = self._p.getBaseVelocity(self.mass)[0]
254 | x_vel, y_vel = velocity[0], velocity[1]
255 | #print(x_vel, y_vel)
256 | velocity_mag = (np.sum(np.array(self._p.getBaseVelocity(self.mass)[0])[0:2])**2)**(1/2)
257 | obs = [x,y,x_vel, y_vel]
258 |
259 | achieved_goal = []
260 | extra_info = []
261 | if self.num_objects > 0:
262 | for o in self.objects:
263 | obj_pose = self._p.getBasePositionAndOrientation(o)
264 | obs_x, obs_y = obj_pose[0][0], obj_pose[0][1]
265 | velocity_obs = self._p.getBaseVelocity(o)[0]
266 | x_vel_obj, y_vel_obj = velocity_obs[0], velocity_obs[1]
267 |
268 | obs += [obs_x, obs_y, x_vel_obj, y_vel_obj]
269 | achieved_goal += [obs_x, obs_y]
270 | extra_info += [list([obj_pose[0][2]] + list(obj_pose[1]))]
271 |
272 |
273 | extra_info = np.squeeze(np.array(extra_info)).astype('float32') # z pos of the object, ori quaternion of the object.
274 |
275 | else: # ag is the position of our controlled point mass
276 | achieved_goal = np.array([x,y])
277 | extra_info = None
278 |
279 | # if self.state_representation:
280 | # obs = np.squeeze(self.state_representation(np.expand_dims(obs,0))[0].numpy())
281 |
282 | if self.num_objects == 0:
283 | full_positional_state = np.array([x,y])
284 | else:
285 | full_positional_state = np.array([x,y]+list(achieved_goal))
286 | return_dict= {
287 | 'observation': np.array(obs).copy().astype('float32'),
288 | 'achieved_goal': np.array(achieved_goal).copy().astype('float32'),
289 | 'desired_goal': self.goal.copy().astype('float32'),
290 | 'extra_info': extra_info,
291 | 'controllable_achieved_goal': np.array([x,y]).copy().astype('float32'), # just the x,y pos of the pointmass, the controllable aspects
292 | 'full_positional_state': full_positional_state.astype('float32')
293 | }
294 |
295 | if self.isRender:
296 |
297 | (_, _, px, depth, _) = p.getCameraImage(
298 | 48,
299 | 48,
300 | viewMatrix,
301 | projectionMatrix,
302 | shadow=0,
303 | flags=p.ER_NO_SEGMENTATION_MASK,
304 | renderer=p.ER_BULLET_HARDWARE_OPENGL,
305 | )
306 |
307 | rgb_array = np.array(px, dtype=np.uint8)
308 | rgb_array = np.reshape(rgb_array, (128, 128, 4))
309 | return_dict["image"] = rgb_array[:, :, :3]
310 |
311 |
312 |
313 | return return_dict
314 |
315 |
316 | def calc_target_distance(self, achieved_goal, desired_goal):
317 | distance = np.sum(abs(achieved_goal - desired_goal))
318 | return distance
319 |
320 | # def calc_velocity_distance(self):
321 | # velocity = (np.sum(np.array(self._p.getBaseVelocity(self.mass)[0])[0:2])**2)**(1/2)
322 |
323 | # return (velocity - self.goal_velocity)
324 |
325 |
326 | def activate_movable_goal(self):
327 | self.movable_goal = True
328 |
329 | def activate_roving_goal(self):
330 | self.roving_goal = True
331 |
332 | def compute_reward(self, achieved_goal, desired_goal, info = None):
333 |
334 | # reward given if new pos is closer than old
335 |
336 | current_distance = np.linalg.norm(achieved_goal - desired_goal, axis=0)
337 |
338 | position_reward = -1000*(current_distance - self.last_target_distance)
339 | self.last_target_distance = current_distance
340 |
341 | # velocity_diff = self.calc_velocity_distance()
342 | # velocity_reward = -100*(velocity_diff - self.last_velocity_distance)
343 | # self.last_velocity_distance = velocity_diff
344 | #velocity_reward = self.calc_velocity_distance()
345 |
346 | #print('Vreward', velocity_reward)
347 |
348 | # if self.state_representation is not None:
349 | # position_reward = -tf.reduce_mean(tf.losses.MAE(self.state_representation(np.expand_dims(self.calc_state()['observation'],0))[0], self.state_representation(np.expand_dims(self.desired_state,0))[0]))
350 |
351 | return position_reward#+velocity_reward
352 |
353 | def set_sparse_reward(self):
354 | print('Environment set to sparse reward')
355 | self.compute_reward = self.compute_reward_sparse
356 |
357 | def compute_reward_sparse(self, achieved_goal, desired_goal, info=None):
358 |
359 | initially_vectorized = True
360 | dimension = 2
361 | if len(achieved_goal.shape) == 1:
362 | achieved_goal = np.expand_dims(np.array(achieved_goal), axis=0)
363 | desired_goal = np.expand_dims(np.array(desired_goal), axis=0)
364 | initially_vectorized = False
365 |
366 | reward = np.zeros(len(achieved_goal))
367 | for g in range(0, len(achieved_goal[0]) // dimension): # piecewise reward
368 | g = g * dimension # increments of 2
369 | current_distance = np.linalg.norm(achieved_goal[:, g:g + dimension] - desired_goal[:, g:g + dimension], axis=1)
370 | reward += np.where(current_distance > self.sparse_rew_thresh, -1, 0)
371 |
372 | if not initially_vectorized:
373 | return reward[0]
374 | else:
375 | return reward
376 |
377 |
378 | def step(self, action):
379 |
380 | action = action *0.1# put it to the correct scale
381 | x_shift, y_shift = action[0], action[1]
382 | current_pos = self._p.getBasePositionAndOrientation(self.mass)[0]
383 | x,y = current_pos[0], current_pos[1]
384 |
385 | new_x, new_y = np.clip(x+x_shift,-self.ENVIRONMENT_BOUNDS, self.ENVIRONMENT_BOUNDS), np.clip(y+y_shift,-self.ENVIRONMENT_BOUNDS, self.ENVIRONMENT_BOUNDS)
386 | self._p.changeConstraint(self.mass_cid,[new_x, new_y, -0.1], maxForce = 10)
387 |
388 | # force = action*10
389 | # print(force)
390 | # self._p.applyExternalForce(self.mass, -1, force, current_pos, flags=self._p.WORLD_FRAME)
391 | #
392 | for i in range(0,20):
393 | #self._p.applyExternalForce(self.mass, -1, force, current_pos, flags=self._p.WORLD_FRAME)
394 | self._p.stepSimulation()
395 |
396 |
397 | obs = self.calc_state()
398 |
399 | r = self.compute_reward(obs['achieved_goal'], obs['desired_goal'])
400 |
401 | current_distance = self.calc_target_distance(obs['achieved_goal'], self.goal)
402 |
403 | if self.movable_goal:
404 |
405 | if r == 0:
406 | self.reset_goal_pos()
407 |
408 | if self.roving_goal:
409 | if self.global_step % 150 == 0:
410 |
411 | self.reset_goal_pos()
412 |
413 | self.global_step += 1
414 |
415 | success = 0 if self.compute_reward_sparse(obs['achieved_goal'], obs['desired_goal']) < 0 else 1 # assuming negative rewards
416 |
417 | # this part is only for interoperability with baselines
418 | if self.global_step == self._max_episode_steps:
419 | done = True
420 | else:
421 | done = False
422 | return obs, r, done, {'is_success': success}
423 |
424 | def reset(self, o = None ):
425 | if o is not None:
426 | self.initialize_start_pos(o)
427 | else:
428 | #self._p.resetSimulation()
429 | self.global_step = 0
430 |
431 | if self.physics_client_active == 0:
432 |
433 | if self.isRender:
434 | self._p = bullet_client.BulletClient(connection_mode=p.GUI)
435 | self._p.configureDebugVisualizer(self._p.COV_ENABLE_SHADOWS, 0)
436 | self.GUI_ACTIVE = True
437 | else:
438 | self._p = bullet_client.BulletClient(connection_mode=p.DIRECT)
439 |
440 | self.physics_client_active = 1
441 |
442 | sphereRadius = 0.2
443 | mass = 1
444 | visualShapeId = 2
445 | colSphereId = self._p.createCollisionShape(p.GEOM_SPHERE,radius=sphereRadius)
446 | self.mass = self._p.createMultiBody(mass,colSphereId,visualShapeId,[0,0,0.4])
447 | #objects = self._p.loadMJCF("/Users/francisdouglas/bullet3/data/mjcf/sphere.xml")
448 | #self.mass = objects[0]
449 | # self.mass = [p.loadURDF((os.path.join(urdfRoot,"sphere2.urdf")), 0,0.0,1.0,1.00000,0.707107,0.000000,0.707107)]
450 | relativeChildPosition=[0,0,0]
451 | relativeChildOrientation=[0,0,0,1]
452 | self.mass_cid = self._p.createConstraint(self.mass,-1,-1,-1,self._p.JOINT_FIXED,[0,0,0],[0,0,0],relativeChildPosition,relativeChildOrientation)
453 |
454 | alpha = 1
455 | colors = [[0, 1, 0, alpha], [0, 0, 1, alpha]]
456 |
457 | if self.show_goal:
458 | self.goals = []
459 | self.goal_cids = []
460 |
461 |
462 | for g in range(0,self.num_goals):
463 | visId = p.createVisualShape(p.GEOM_SPHERE, radius = sphereRadius,
464 | rgbaColor=colors[g])
465 | self.goals.append(self._p.createMultiBody(mass,colSphereId,visId,[1,1,1.4]))
466 | collisionFilterGroup = 0
467 | collisionFilterMask = 0
468 | self._p.setCollisionFilterGroupMask(self.goals[g], -1, collisionFilterGroup, collisionFilterMask)
469 | self.goal_cids.append(self._p.createConstraint(self.goals[g],-1,-1,-1,self._p.JOINT_FIXED,[1,1,1.4],[0,0,0],relativeChildPosition,relativeChildOrientation))
470 | #self._p.setRealTimeSimulation(1)
471 |
472 | colwallId = self._p.createCollisionShape(p.GEOM_BOX, halfExtents=[0.05, 2.5, 0.5])
473 | wallvisId = 10
474 | wall = [self._p.createMultiBody(0, colwallId, 10, [self.TARG_LIMIT * 2 + 0.2, 0, 0.0],
475 | p.getQuaternionFromEuler([0, 0, 0]))]
476 | wall = [self._p.createMultiBody(0, colwallId, 10, [-self.TARG_LIMIT * 2 - 0.2, 0, 0.0],
477 | p.getQuaternionFromEuler([0, 0, 0]))]
478 | wall = [
479 | self._p.createMultiBody(0, colwallId, 10, [0, self.TARG_LIMIT * 2 + 0.2, 0],
480 | p.getQuaternionFromEuler([0, 0, math.pi / 2]))]
481 | wall = [
482 | self._p.createMultiBody(0, colwallId, 10, [0, -self.TARG_LIMIT * 2 - 0.2, 0],
483 | p.getQuaternionFromEuler([0, 0, math.pi / 2]))]
484 |
485 |
486 | if self.pointMaze:
487 | divider = self._p.createCollisionShape(p.GEOM_BOX, halfExtents=[0.7, 1.2, 0.5])
488 | divider = [self._p.createMultiBody(0, divider, 10, [-0.8, 0, 0], p.getQuaternionFromEuler([0, 0, math.pi / 2]))]
489 | divider = self._p.createCollisionShape(p.GEOM_BOX, halfExtents=[1.5, 0.6, 0.5])
490 | divider = [self._p.createMultiBody(0, divider, 10, [-1.4, 1.1, 0],
491 | p.getQuaternionFromEuler([0, 0, math.pi / 2]))]
492 |
493 | if self.num_objects > 0:
494 | colcubeId = self._p.createCollisionShape(p.GEOM_BOX,halfExtents=[0.35,0.35,0.35])
495 | for i in range(0,self.num_objects):
496 | visId = p.createVisualShape(p.GEOM_BOX, halfExtents=[0.35, 0.35, 0.35],
497 | rgbaColor=colors[i])
498 | self.objects.append(self._p.createMultiBody(0.1,colcubeId ,visId,[0,0,1.5]))
499 |
500 |
501 | if GUI:
502 |
503 | ACTION_LIMIT = 1
504 | self.x_shift = self._p.addUserDebugParameter("X", -ACTION_LIMIT, ACTION_LIMIT, 0.0)
505 | self.y_shift= self._p.addUserDebugParameter("Y", -ACTION_LIMIT, ACTION_LIMIT, 0.0)
506 |
507 | self._p.configureDebugVisualizer(p.COV_ENABLE_GUI,GUI)
508 |
509 | self._p.setGravity(0,0,-10)
510 | lookat = [0, 0, 0.1]
511 | distance = 7
512 | yaw = 0
513 | self._p.resetDebugVisualizerCamera(distance, yaw, -89, lookat)
514 | colcubeId = self._p.createCollisionShape(p.GEOM_BOX, halfExtents=[5, 5, 0.1])
515 | visplaneId = self._p.createVisualShape(p.GEOM_BOX, halfExtents=[5, 5, 0.1],rgbaColor=[1,1,1,1])
516 | plane = self._p.createMultiBody(0, colcubeId, visplaneId, [0, 0, -0.2])
517 |
518 | #self._p.loadSDF(os.path.join(urdfRoot, "plane_stadium.sdf"))
519 |
520 |
521 |
522 |
523 |
524 | self._p.resetBasePositionAndOrientation(self.mass, [0, 0,0.6], [0,0,0,1])
525 | self.reset_goal_pos()
526 |
527 | # reset mass location
528 | if self.opposite_goal:
529 | x= -self.goal[0]
530 | y = -self.goal[1]
531 | else:
532 | x = self.crop(self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT),self.TARG_MIN)
533 | y = self.crop(self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT),self.TARG_MIN)
534 | if self.pointMaze:
535 | x = self.np_random.uniform(low=-2.0, high=-1.5)
536 | y = self.np_random.uniform(low=-2.0, high=-1.5)
537 |
538 | x_vel = 0#self.np_random.uniform(low=-1, high=1)
539 | y_vel = 0#self.np_random.uniform(low=-1, high=1)
540 |
541 | self.initialize_actor_pos([x,y,x_vel,y_vel])
542 | if self.num_objects > 0:
543 | self.reset_object_pos()
544 |
545 |
546 | #self.last_velocity_distance = self.calc_velocity_distance()
547 |
548 |
549 |
550 |
551 | #Instantiate some obstacles.
552 | #self.instaniate_obstacles()
553 | obs = self.calc_state()
554 | self.last_target_distance = self.calc_target_distance(obs['achieved_goal'], obs['desired_goal'])
555 | return obs
556 |
557 | def instaniate_obstacles(self):
558 | colcubeId = self._p.createCollisionShape(p.GEOM_BOX,halfExtents=[1.5,1.5,0.6])
559 | self.obstacle_center = self._p.createMultiBody(0,colcubeId ,2,[0,0,0.0])
560 | self.TARG_MIN = 1.5
561 | self.opposite_goal = True
562 |
563 |
564 | def render(self, mode='human'):
565 |
566 | if (mode=="human"):
567 | self.isRender = True
568 | return np.array([])
569 | if mode == 'rgb_array':
570 | raise NotImplementedError
571 |
572 |
573 | def close(self):
574 | print('closing')
575 | self._p.disconnect()
576 |
577 |
578 | def _seed(self, seed=None):
579 | print('seeding')
580 | self.np_random, seed = gym.utils.seeding.np_random(seed)
581 |
582 | return [seed]
583 |
584 |
585 |
586 |
587 |
588 | # a version where reward is determined by the obstacle.
589 | # need a sub class so we can load it as a openai gym env.
590 | class pointMassEnvObject(pointMassEnv):
591 | def __init__(self,
592 | render=False,
593 | num_objects = 1, sparse = True, TARG_LIMIT = 1.3
594 | ):
595 | super().__init__(render = render, num_objects = num_objects, sparse = sparse, TARG_LIMIT = TARG_LIMIT)
596 |
597 | class pointMassEnvObjectDuo(pointMassEnv):
598 | def __init__(self,
599 | render=False,
600 | num_objects = 2, sparse = True, TARG_LIMIT = 1.3, sparse_rew_thresh = 0.3 # TODO 6 when collecting examples.
601 | ):
602 | super().__init__(render = render, num_objects = num_objects, sparse = sparse, TARG_LIMIT = TARG_LIMIT, sparse_rew_thresh=sparse_rew_thresh)
603 |
604 | class pointMassEnvObjectDense(pointMassEnv):
605 | def __init__(self,
606 | render=False,
607 | num_objects = 1, sparse = False, TARG_LIMIT = 1.3
608 | ):
609 | super().__init__(render = render, num_objects = num_objects, sparse = sparse, TARG_LIMIT = TARG_LIMIT)
610 |
611 | class pointMassEnvDense(pointMassEnv):
612 | def __init__(self,
613 | render=False,
614 | num_objects = 0, sparse = False, TARG_LIMIT = 1.3
615 | ):
616 | super().__init__(render = render, num_objects = num_objects, sparse = sparse, TARG_LIMIT = TARG_LIMIT)
617 |
618 | class pointMassEnvMaze(pointMassEnv):
619 | def __init__(self,
620 | render=False,
621 | num_objects = 0, sparse = True, TARG_LIMIT = 1.3
622 | ):
623 | super().__init__(render = render, num_objects = num_objects, sparse = sparse, TARG_LIMIT = TARG_LIMIT, pointMaze=True, deterministic_pos=True)
624 |
625 |
626 | def main(**kwargs):
627 |
628 | cameraDistance = 1
629 | cameraYaw = 35
630 | cameraPitch = -35
631 |
632 | env = pointMassEnvMaze()
633 | env.render(mode = 'human')
634 | obs = env.reset()['observation']
635 |
636 | # objects = env._p.loadMJCF("/Users/francisdouglas/bullet3/data/mjcf/sphere.xml")
637 | # sphere = objects[0]
638 | # env._p.resetBasePositionAndOrientation(sphere, [0, 0, 1], [0, 0, 0, 1])
639 | # env._p.changeDynamics(sphere, -1, linearDamping=0.9)
640 | # env._p.changeVisualShape(sphere, -1, rgbaColor=[1, 0, 0, 1])
641 | forward = 0
642 | turn = 0
643 |
644 | forwardVec = [2, 0, 0]
645 | cameraDistance = 2
646 | cameraYaw = 35
647 | cameraPitch = -65
648 | steps = 0
649 | while steps<3000:
650 |
651 | spherePos, orn = env._p.getBasePositionAndOrientation(env.mass)
652 |
653 | cameraTargetPosition = spherePos
654 | env._p.resetDebugVisualizerCamera(cameraDistance, cameraYaw, cameraPitch, cameraTargetPosition)
655 | camInfo = env._p.getDebugVisualizerCamera()
656 | camForward = camInfo[5]
657 |
658 | keys = env._p.getKeyboardEvents()
659 | for k, v in keys.items():
660 |
661 | if (k == env._p.B3G_RIGHT_ARROW and (v & env._p.KEY_WAS_TRIGGERED)):
662 | turn = -3
663 | if (k == env._p.B3G_RIGHT_ARROW and (v & env._p.KEY_WAS_RELEASED)):
664 | turn = 0
665 | if (k == env._p.B3G_LEFT_ARROW and (v & env._p.KEY_WAS_TRIGGERED)):
666 | turn = 3
667 | if (k == env._p.B3G_LEFT_ARROW and (v & env._p.KEY_WAS_RELEASED)):
668 | turn = 0
669 |
670 | if (k == env._p.B3G_UP_ARROW and (v & env._p.KEY_WAS_TRIGGERED)):
671 | forward = 1.8
672 | if (k == env._p.B3G_UP_ARROW and (v & env._p.KEY_WAS_RELEASED)):
673 | forward = 0
674 | if (k == env._p.B3G_DOWN_ARROW and (v & env._p.KEY_WAS_TRIGGERED)):
675 | forward = -1.8
676 | if (k == env._p.B3G_DOWN_ARROW and (v & env._p.KEY_WAS_RELEASED)):
677 | forward = 0
678 |
679 |
680 | force = [forward * camForward[0], forward * camForward[1], 0]
681 | cameraYaw = cameraYaw + turn
682 |
683 | # if (forward):
684 | # env._p.applyExternalForce(sphere, -1, force, spherePos, flags=env._p.WORLD_FRAME)
685 | #
686 | # env._p.stepSimulation()
687 | time.sleep(3. / 240.)
688 |
689 | o,r,_,_ = env.step(np.array(force))
690 | print(o['observation'][0:2])#print(r)
691 |
692 | steps += 1
693 |
694 |
695 |
696 | if __name__ == "__main__":
697 | main()
698 |
699 |
700 |
701 | # env = pointMassEnv()
702 |
703 |
704 | # env.reset()
705 |
706 | # for i in range(0,100):
707 |
708 | # #env._p.stepSimulation()
709 | # time.sleep(0.005)
710 | # #action = [env._p.readUserDebugParameter(env.x_shift), env._p.readUserDebugParameter(env.y_shift)]
711 | # o2, r, d, _ = env.step(np.ones(2))
712 | # print(o2)
713 |
714 | # test_env = pointMassEnv()
715 |
716 | # test_env.render(mode = 'human')
717 | # test_env.reset()
718 |
719 | # for i in range(0,100000):
720 |
721 | # #env._p.stepSimulation()
722 | # time.sleep(0.005)
723 | # action = np.array([test_env._p.readUserDebugParameter(test_env.x_shift), test_env._p.readUserDebugParameter(test_env.y_shift)])
724 | # o2, r, d, _ = test_env.step(action)
725 | # print(o2)
726 |
727 |
728 |
729 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 |
2 | from setuptools import setup
3 |
4 | setup(name='pointMass',
5 | version='0.0.1',
6 | install_requires=['gym', 'pybullet', 'numpy'] # And any other dependencies foo needs
7 | )
--------------------------------------------------------------------------------