├── setup.py~ ├── gym_baxter ├── __init__.py~ ├── envs │ ├── __init__.py │ ├── baxter_reacher_env.py │ ├── baxter_avoider_env.py │ ├── baxter_utils.py │ └── baxter_env.py └── __init__.py ├── setup.py ├── README.md ├── .travis.yml └── .gitignore /setup.py~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym_baxter/__init__.py~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='gym_baxter', 4 | version='0.0.1', 5 | install_requires=['gym'] 6 | ) 7 | -------------------------------------------------------------------------------- /gym_baxter/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_baxter.envs.baxter_reacher_env import BaxterReacherEnv 2 | from gym_baxter.envs.baxter_avoider_env import BaxterAvoiderEnv 3 | -------------------------------------------------------------------------------- /gym_baxter/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='BaxterReacher-v0', 5 | entry_point='gym_baxter.envs:BaxterReacherEnv', 6 | timestep_limit=10000, 7 | ) 8 | register( 9 | id='BaxterAvoider-v0', 10 | entry_point='gym_baxter.envs:BaxterAvoiderEnv', 11 | timestep_limit=10000, 12 | ) 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Baxter robot OpenAI Gym Environment 2 | 3 | This package integrates the Gazebo simulator with the OpenAI Gym. The environments require the 4 | [baxter interface](http://sdk.rethinkrobotics.com/wiki/Baxter_Interface) to be installed. For the 5 | most part, the setup transfers to the physical robot, but the **BaxterAvoiderEnv** requires joint 6 | states that are retrieved from a Gazebo topic. 7 | 8 | # Environments 9 | 10 | Before running any environment you should start Gazebo and initialize the Baxter connection. 11 | 12 | ## BaxterReacherEnv 13 | 14 | Simply reach a given location in coordinate space. 15 | 16 | ## BaxterAvoiderEnv 17 | 18 | Reach a given location in coordinate space while avoiding passing though a certain area. 19 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Config file for automatic testing at travis-ci.org 2 | # This file will be regenerated if you run travis_pypi_setup.py 3 | 4 | language: python 5 | python: 3.5 6 | 7 | env: 8 | - TOXENV=py35 9 | - TOXENV=py34 10 | - TOXENV=py33 11 | - TOXENV=py27 12 | - TOXENV=py26 13 | - TOXENV=pypy 14 | 15 | # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors 16 | install: pip install -U tox 17 | 18 | # command to run tests, e.g. python setup.py test 19 | script: tox -e ${TOXENV} 20 | 21 | # After you create the Github repo and add it to Travis, run the 22 | # travis_pypi_setup.py script to finish PyPI deployment setup 23 | deploy: 24 | provider: pypi 25 | distributions: sdist bdist_wheel 26 | user: tpbarron 27 | password: 28 | secure: PLEASE_REPLACE_ME 29 | on: 30 | tags: true 31 | repo: tpbarron/gym_baxter 32 | condition: $TOXENV == py27 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | -------------------------------------------------------------------------------- /gym_baxter/envs/baxter_reacher_env.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import error, spaces, utils 3 | from gym.utils import seeding 4 | 5 | from .baxter_env import BaxterEnv 6 | import baxter_utils as bu 7 | 8 | class BaxterReacherEnv(BaxterEnv): 9 | """ 10 | An environment to test training the Baxter to reach a given location 11 | """ 12 | 13 | metadata = {'render.modes': ['human']} 14 | 15 | def __init__(self, timesteps, control=bu.POSITION, limbs=bu.BOTH_LIMBS): 16 | super(BaxterReacherEnv, self).__init__(timesteps, control=control, limbs=limbs) 17 | 18 | # left(x, y, z), right(x, y, z) 19 | # x is forward, y is left, z is up 20 | if (self.limbs == bu.BOTH_LIMBS): 21 | self.goal = np.array([np.random.random(), -np.random.random(), 1.0, 22 | np.random.random(), np.random.random(), 1.0]) 23 | elif (self.limbs == bu.LEFT_LIMB): 24 | self.goal = np.array([np.random.random(), -np.random.random(), 1.0]) 25 | elif (self.limbs == bu.RIGHT_LIMB): 26 | self.goal = np.array([np.random.random(), np.random.random(), 1.0]) 27 | 28 | self.t = 0 29 | 30 | 31 | @property 32 | def action_space(self): 33 | # this will be the joint space 34 | return Box(-np.inf, np.inf, (self.joint_space,)) 35 | 36 | 37 | @property 38 | def observation_space(self): 39 | # [baxter joint angles; goal pos] 40 | return Box(-np.inf, np.inf, (self.joint_space + self.goal.size,)) 41 | 42 | 43 | def _step(self, action): 44 | if (self.limbs == bu.BOTH_LIMBS): 45 | laction, raction = self.get_joint_action_dict(action) 46 | assert(len(laction)) == len(self.llimb.joint_angles()) 47 | assert(len(raction)) == len(self.rlimb.joint_angles()) 48 | if (self.control == bu.VELOCITY): 49 | self.llimb.set_joint_velocities(laction) 50 | self.rlimb.set_joint_velocities(raction) 51 | elif (self.control == bu.TORQUE): 52 | self.llimb.set_joint_torques(laction) 53 | self.rlimb.set_joint_torques(raction) 54 | elif (self.control == bu.POSITION): 55 | self.llimb.set_joint_positions(laction) 56 | self.rlimb.set_joint_positions(raction) 57 | elif (self.limbs == bu.LEFT_LIMB): 58 | laction = self.get_joint_action_dict(action) 59 | assert(len(laction)) == len(self.llimb.joint_angles()) 60 | if (self.control == bu.VELOCITY): 61 | self.llimb.set_joint_velocities(laction) 62 | elif (self.control == bu.TORQUE): 63 | self.llimb.set_joint_torques(laction) 64 | elif (self.control == bu.POSITION): 65 | self.llimb.set_joint_positions(laction) 66 | elif (self.limbs == bu.RIGHT_LIMB): 67 | raction = self.get_joint_action_dict(action) 68 | assert(len(raction)) == len(self.rlimb.joint_angles()) 69 | if (self.control == bu.VELOCITY): 70 | self.rlimb.set_joint_velocities(raction) 71 | elif (self.control == bu.TORQUE): 72 | self.rlimb.set_joint_torques(raction) 73 | elif (self.control == bu.POSITION): 74 | self.rlimb.set_joint_positions(raction) 75 | 76 | self.control_rate.sleep() 77 | self.state = np.hstack((self.get_joint_angles(), self.goal)) 78 | 79 | # only consider reward at end of task 80 | # if we are finished, the reward is the distance from the goal, else 0 81 | done = True if self.t == self.timesteps-1 else False 82 | if done: 83 | dist = np.linalg.norm(self.goal-self.get_endeff_position()) 84 | reward = np.exp(-dist) # if at goal reward = 1, else asymptopes to 0 85 | #reward = -dist # closer go goal, smaller the value 86 | else: 87 | reward = 0.0 88 | self.t += 1 89 | 90 | return self.state, reward, done, {} 91 | 92 | 93 | def _reset(self): 94 | if (self.limbs == bu.BOTH_LIMBS): 95 | self.llimb.move_to_neutral() 96 | self.rlimb.move_to_neutral() 97 | self.state = self.get_joint_angles() 98 | return np.hstack((self.state, self.goal)) 99 | elif (self.limbs == bu.LEFT_LIMB): 100 | self.llimb.move_to_neutral() 101 | self.state = self.get_joint_angles() 102 | return np.hstack((self.state, self.goal)) 103 | elif (self.limbs == bu.RIGHT_LIMB): 104 | self.rlimb.move_to_neutral() 105 | self.state = self.get_joint_angles() 106 | return np.hstack((self.state, self.goal)) 107 | self.t = 0 108 | 109 | def _render(self, mode='human', close=False): 110 | pass 111 | -------------------------------------------------------------------------------- /gym_baxter/envs/baxter_avoider_env.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import error, spaces, utils 3 | from gym.utils import seeding 4 | 5 | import numpy as np 6 | 7 | from .baxter_env import BaxterEnv 8 | import baxter_utils as bu 9 | 10 | class BaxterAvoiderEnv(BaxterEnv): 11 | 12 | """ 13 | An environment to test training the Baxter to reach a given location 14 | """ 15 | 16 | metadata = {'render.modes': ['human']} 17 | 18 | def __init__(self, timesteps, sphere, control=bu.POSITION, limbs=bu.BOTH_LIMBS): 19 | super(BaxterAvoiderEnv, self).__init__(timesteps, sphere, control=control, limbs=limbs) 20 | 21 | # left(x, y, z), right(x, y, z) 22 | # x is forward, y is left, z is up 23 | if (self.limbs == bu.BOTH_LIMBS): 24 | self.goal = np.array([np.random.random(), -np.random.random(), 1.0, 25 | np.random.random(), np.random.random(), 1.0]) 26 | elif (self.limbs == bu.LEFT_LIMB): 27 | self.goal = np.array([np.random.random(), -np.random.random(), 1.0]) 28 | elif (self.limbs == bu.RIGHT_LIMB): 29 | self.goal = np.array([np.random.random(), np.random.random(), 1.0]) 30 | 31 | self.sphere = sphere 32 | self.step_in_sphere = 0 33 | self.t = 0 34 | 35 | 36 | @property 37 | def action_space(self): 38 | # this will be the joint space 39 | return Box(-np.inf, np.inf, (self.joint_space,)) 40 | 41 | 42 | @property 43 | def observation_space(self): 44 | # [baxter joint angles; goal pos] 45 | return Box(-np.inf, np.inf, (self.joint_space + self.goal.size,)) 46 | 47 | 48 | def _step(self, action): 49 | if (self.limbs == bu.BOTH_LIMBS): 50 | laction, raction = self.get_joint_action_dict(action) 51 | assert(len(laction)) == len(self.llimb.joint_angles()) 52 | assert(len(raction)) == len(self.rlimb.joint_angles()) 53 | if (self.control == bu.VELOCITY): 54 | self.llimb.set_joint_velocities(laction) 55 | self.rlimb.set_joint_velocities(raction) 56 | elif (self.control == bu.TORQUE): 57 | self.llimb.set_joint_torques(laction) 58 | self.rlimb.set_joint_torques(raction) 59 | elif (self.control == bu.POSITION): 60 | self.llimb.set_joint_positions(laction) 61 | self.rlimb.set_joint_positions(raction) 62 | elif (self.limbs == bu.LEFT_LIMB): 63 | laction = self.get_joint_action_dict(action) 64 | assert(len(laction)) == len(self.llimb.joint_angles()) 65 | if (self.control == bu.VELOCITY): 66 | self.llimb.set_joint_velocities(laction) 67 | elif (self.control == bu.TORQUE): 68 | self.llimb.set_joint_torques(laction) 69 | elif (self.control == bu.POSITION): 70 | self.llimb.set_joint_positions(laction) 71 | elif (self.limbs == bu.RIGHT_LIMB): 72 | raction = self.get_joint_action_dict(action) 73 | assert(len(raction)) == len(self.rlimb.joint_angles()) 74 | if (self.control == bu.VELOCITY): 75 | self.rlimb.set_joint_velocities(raction) 76 | elif (self.control == bu.TORQUE): 77 | self.rlimb.set_joint_torques(raction) 78 | elif (self.control == bu.POSITION): 79 | self.rlimb.set_joint_positions(raction) 80 | 81 | self.control_rate.sleep() 82 | self.state = np.hstack((self.get_joint_angles(), self.goal)) 83 | 84 | # check if in area 85 | if (self.limbs == bu.BOTH_LIMBS): 86 | if (bu.limb_in_sphere(self.llimb, self.sphere) or 87 | bu.limb_in_sphere(self.rlimb, self.sphere)): 88 | self.step_in_sphere += 1 89 | elif (self.limbs == bu.LEFT_LIMB): 90 | if (bu.limb_in_sphere(self.llimb, self.sphere)): 91 | self.step_in_sphere += 1 92 | elif (self.limbs == bu.RIGHT_LIMB): 93 | if (bu.limb_in_sphere(self.rlimb, self.sphere)): 94 | self.step_in_sphere += 1 95 | 96 | 97 | # only consider reward at end of task 98 | # if we are finished, the reward is weighted combination of the distance 99 | # from the goal and the number of steps in the space 100 | done = True if self.t == self.timesteps-1 else False 101 | if done: 102 | dist = np.linalg.norm(self.goal-self.get_endeff_position()) 103 | dist_reward = np.exp(-dist) 104 | space_reward = np.exp(-float(self.step_in_sphere) / self.timesteps) 105 | print ("num steps in sphere: ", self.step_in_sphere, dist_reward, space_reward) 106 | reward = .5*dist_reward+.5*space_reward # if at goal reward = 1, else asymptopes to 0 107 | else: 108 | reward = 0.0 109 | self.t += 1 110 | 111 | return self.state, reward, done, {} 112 | 113 | 114 | def _reset(self): 115 | self.step_in_sphere = 0 116 | self.t = 0 117 | if (self.limbs == bu.BOTH_LIMBS): 118 | self.llimb.move_to_neutral() 119 | self.rlimb.move_to_neutral() 120 | self.state = self.get_joint_angles() 121 | return np.hstack((self.state, self.goal)) 122 | elif (self.limbs == bu.LEFT_LIMB): 123 | self.llimb.move_to_neutral() 124 | self.state = self.get_joint_angles() 125 | return np.hstack((self.state, self.goal)) 126 | if (self.limbs == bu.RIGHT_LIMB): 127 | self.rlimb.move_to_neutral() 128 | self.state = self.get_joint_angles() 129 | return np.hstack((self.state, self.goal)) 130 | 131 | 132 | def _render(self, mode='human', close=False): 133 | pass 134 | -------------------------------------------------------------------------------- /gym_baxter/envs/baxter_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse, csv, os, sys 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import rospy 6 | import baxter_interface 7 | from baxter_interface import CHECK_VERSION 8 | # from baxter_trajectory import Trajectory 9 | 10 | POSITION, VELOCITY, TORQUE = range(3) 11 | LEFT_LIMB, RIGHT_LIMB, BOTH_LIMBS = range(3) 12 | LEFT_LIMB_NAME = 'left' 13 | RIGHT_LIMB_NAME = 'right' 14 | 15 | DOF = 17 16 | DOF_NO_TIME = 16 17 | DOF_NO_TIME_NO_GRIPPER = 14 18 | DOF_NO_TIME_LEFT = 8 19 | DOF_NO_TIME_RIGHT = 8 20 | DOF_NO_TIME_NO_GRIPPER_LEFT = 7 21 | DOF_NO_TIME_NO_GRIPPER_RIGHT = 7 22 | 23 | 24 | def limb_in_sphere(limb, sphere): 25 | """ 26 | sphere - a sphere defined by (x, y, z, r) 27 | limb - a baxter limb object 28 | Returns True if any segment of limb is in space 29 | """ 30 | states = limb.get_link_states() 31 | links = limb._link_names[limb.name] 32 | for i in range(len(links)-1): 33 | l1 = links[i] 34 | l2 = links[i+1] 35 | p1 = states[l1].position 36 | # print ("p1 = ", p1) 37 | p2 = states[l2].position 38 | # print ("p2 = ", p2) 39 | p1 = (p1.x, p1.y, p1.z) 40 | p2 = (p2.x, p2.y, p2.z) 41 | if (check_segment(sphere, (p1, p2))): 42 | return True 43 | return False 44 | 45 | 46 | def check_segment(sphere, segment): 47 | """ 48 | segment = ((x1,y1,z1), (x2,y2,z2)) 49 | returns True if intersection 50 | """ 51 | p1, p2 = segment 52 | x1, y1, z1 = p1 53 | x2, y2, z2 = p2 54 | x3, y3, z3, r = sphere 55 | a = (x2-x1)**2.0 + (y2-y1)**2.0 + (z2-z1)**2.0 56 | b = 2*( (x2-x1)*(x1-x3) + (y2-y1)*(y1-y3) + (z2-z1)*(z1-z3) ) 57 | c = x3**2.0 + y3**2.0 + z3**2.0 + x1**2.0 + y1**2.0 + z1**2.0 - 2*(x3*x1 + y3*y1 + z3*z1) - r**2.0 58 | 59 | u1 = (-b + np.sqrt(b*b-4*a*c)) / 2*a 60 | u2 = (-b - np.sqrt(b*b-4*a*c)) / 2*a 61 | if ((u1 < 0 and u2 < 0) or (u1 > 1 and u2 > 1)): 62 | # line seg outside sphere 63 | return False 64 | if ((u1 < 0 and u2 > 1) or (u1 > 1 and u2 < 0)): 65 | # line segment inside sphere 66 | return True 67 | if ((abs(u1) < 1 and (u2 > 1 or u2 < 0)) or (abs(u2) < 1 and (u1 > 1 or u1 < 0))): 68 | # line segment intersects at one point 69 | return True 70 | if (abs(u1) < 1 and abs(u2) < 1): 71 | # intersects at two points 72 | return True 73 | if (abs(u1) < 1 and abs(u2) < 1 and u1 - u2 < .001): 74 | # tangential 75 | return True 76 | return False 77 | 78 | 79 | def load_trajectories(dir): 80 | """ 81 | Load all trajectories from dir into a list of numpy arrays 82 | """ 83 | trajs = [] 84 | keys = None 85 | files = os.listdir(dir) 86 | for f in files: 87 | keys, traj = load_trajectory(os.path.join(dir, f)) 88 | trajs.append(traj) 89 | 90 | return trajs, keys 91 | 92 | 93 | def load_trajectory(fname): 94 | """ 95 | Load the trajectory in the file fname to a numpy array 96 | """ 97 | with open(fname, 'rb') as csvfile: 98 | reader = csv.reader(csvfile) 99 | keys = reader.next() 100 | data = [] 101 | for row in reader: 102 | row = [float(f) for f in row] 103 | data.append(row) 104 | data = np.array(data) 105 | return keys, data 106 | 107 | 108 | def get_trajectories_without_time(trajs, limb=BOTH_LIMBS): 109 | if (limb == BOTH_LIMBS): 110 | trajs = [t[:,1:] for t in trajs] 111 | return trajs 112 | elif (limb == LEFT_LIMB): 113 | # remove time 114 | trajs = [t[:,1:] for t in trajs] 115 | trajs = [t[:,:8] for t in trajs] 116 | return trajs 117 | elif (limb == RIGHT_LIMB): 118 | # remove time 119 | trajs = [t[:,1:] for t in trajs] 120 | trajs = [t[:,8:] for t in trajs] 121 | return trajs 122 | 123 | 124 | def get_trajectories_without_time_and_gripper(trajs, limb=BOTH_LIMBS): 125 | if (limb == BOTH_LIMBS): 126 | trajs = [np.hstack((t[:,1:8],t[:,9:16])) for t in trajs] 127 | return trajs 128 | elif (limb == LEFT_LIMB): 129 | trajs = [t[:,1:8] for t in trajs] 130 | return trajs 131 | elif (limb == RIGHT_LIMB): 132 | trajs = [t[:,9:16] for t in trajs] 133 | return trajs 134 | 135 | 136 | def interpolate_time(secs, timesteps): 137 | time = np.linspace(0, secs, timesteps) 138 | return time 139 | 140 | 141 | def run_trajectory(traj, mode=POSITION): 142 | if (mode == POSITION): 143 | run_position_trajectory(traj) 144 | # elif (mode == VELOCITY): 145 | # run_velocity_trajectory(traj) 146 | 147 | 148 | # def run_velocity_trajectory(traj_data): 149 | # traj = Trajectory() 150 | # traj.parse_traj(self.keys, traj_data) 151 | # #for safe interrupt handling 152 | # rospy.on_shutdown(traj.stop) 153 | # result = True 154 | # print ("Starting trajectory") 155 | # traj.start() 156 | # result = traj.wait() 157 | # print("Result: " + str(result) + ", Playback Complete") 158 | 159 | 160 | def run_position_trajectory(traj, left, right, rate): 161 | # traj is a numpy array where each column is a DOF 162 | # and each row is a timestep 163 | time, lstart, rstart = get_cmds_from_row(traj[0]) 164 | left.move_to_joint_positions(lstart) 165 | right.move_to_joint_positions(rstart) 166 | 167 | start_time = rospy.get_time() 168 | for t in range(traj.shape[0]): # for each row 169 | sys.stdout.write("\r Record %d of %d" % 170 | (t, traj.shape[0])) 171 | sys.stdout.flush() 172 | time, lcmd, rcmd = get_cmds_from_row(traj[t]) 173 | # send these commands until the next frame 174 | while (rospy.get_time() - start_time) < time: 175 | if rospy.is_shutdown(): 176 | print ("ROS shutdown, aborting") 177 | return 178 | left.set_joint_positions(lcmd) 179 | right.set_joint_positions(rcmd) 180 | # TODO: future gripper handling? 181 | rate.sleep() 182 | 183 | 184 | def get_cmds_from_row(row, keys): 185 | limb_dof = 7 186 | row_list = row.tolist() 187 | row_dict = dict(zip(keys, row_list)) 188 | 189 | # create a dictionary of joint to value for left limb 190 | # skip element 0 because it is the time param 191 | # skip elements 8, 16 because they are gripper params 192 | 193 | ldict = {k: row_dict[k] for k in keys[1:limb_dof+1]} 194 | rdict = {k: row_dict[k] for k in keys[limb_dof+2:-1]} 195 | 196 | return row[0], ldict, rdict 197 | 198 | 199 | def get_limb_coordinate(left, right): 200 | return left.endpoint_pose(), right.endpoint_pose() 201 | 202 | 203 | def plot_trajectory(traj): 204 | """ 205 | Plot the given trajectory for each dof 206 | """ 207 | for i in range(traj.shape[1]): # num dofs 208 | plt.subplot(3,6,i+1) 209 | plt.plot(traj[:,i], lw=2) 210 | plt.title("Traj for DOF="+str(i)) 211 | plt.xlabel('timesteps') 212 | plt.ylabel('joint position') 213 | plt.show() 214 | 215 | 216 | 217 | # 218 | # if __name__ == "__main__": 219 | # parser = argparse.ArgumentParser(description='Baxter controller') 220 | # parser.add_argument('-d', '--dir', dest='dir', required=True) 221 | # args = parser.parse_args() 222 | # 223 | # bc = BaxterUtils() 224 | # bc.load_trajectories(args.dir) 225 | # bc.run_trajectory() 226 | #bc.plot_trajectory() 227 | -------------------------------------------------------------------------------- /gym_baxter/envs/baxter_env.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import gym 4 | from gym import error, spaces, utils 5 | from gym.utils import seeding 6 | 7 | import numpy as np 8 | import rospy 9 | from dynamic_reconfigure.server import Server 10 | from std_msgs.msg import Empty 11 | import baxter_interface 12 | from baxter_examples.cfg import JointSpringsExampleConfig 13 | from baxter_interface import CHECK_VERSION 14 | 15 | import baxter_utils as bu 16 | 17 | class BaxterEnv(gym.Env): 18 | """ 19 | The connection with the Baxter or Gazebo must be initialized before using 20 | this environment 21 | 22 | These environments require the usage of a modified Limb class to get 23 | the states of each joint. 24 | """ 25 | 26 | # RIGHT_LIMB = "right" 27 | # LEFT_LIMB = "left" 28 | # BOTH_LIMBS = "both" 29 | # 30 | # TORQUE = "torque" 31 | # VELOCITY = "velocity" 32 | # POSITION = "position" 33 | 34 | def __init__(self, timesteps, sphere, control=None, limbs=None): 35 | print("Initializing node... ") 36 | rospy.init_node("rlcore_baxter_env") 37 | 38 | self.timesteps = timesteps 39 | 40 | if control == None: 41 | control = bu.POSITION 42 | self.control = control 43 | 44 | if limbs == None: 45 | limbs = bu.BOTH_LIMBS 46 | self.limbs = limbs 47 | 48 | # set control rate 49 | self.rate = 100.0 50 | self.missed_cmds = 20000.0 # Missed cycles before triggering timeout 51 | self.control_rate = rospy.Rate(self.rate) 52 | 53 | # create our limb instance 54 | # for safety purposes, set the control rate command timeout. 55 | # if the specified number of command cycles are missed, the robot 56 | # will timeout and disable 57 | if (self.limbs == bu.BOTH_LIMBS): 58 | self.llimb = baxter_interface.Limb(bu.LEFT_LIMB_NAME) 59 | self.rlimb = baxter_interface.Limb(bu.RIGHT_LIMB_NAME) 60 | self.llimb.set_command_timeout((1.0 / self.rate) * self.missed_cmds) 61 | self.rlimb.set_command_timeout((1.0 / self.rate) * self.missed_cmds) 62 | # robot state 63 | self.joint_space = 2*len(self.llimb.joint_angles()) 64 | self.state = np.zeros(self.joint_space) 65 | elif (self.limbs == bu.LEFT_LIMB): 66 | self.llimb = baxter_interface.Limb(bu.LEFT_LIMB_NAME) 67 | self.llimb.set_command_timeout((1.0 / self.rate) * self.missed_cmds) 68 | # robot state 69 | self.joint_space = len(self.llimb.joint_angles()) 70 | self.state = np.zeros(self.joint_space) 71 | elif (self.limbs == bu.RIGHT_LIMB): 72 | self.rlimb = baxter_interface.Limb(bu.RIGHT_LIMB_NAME) 73 | self.rlimb.set_command_timeout((1.0 / self.rate) * self.missed_cmds) 74 | # robot state 75 | self.joint_space = len(self.rlimb.joint_angles()) 76 | self.state = np.zeros(self.joint_space) 77 | 78 | # verify robot is enabled 79 | print("Getting Baxter state... ") 80 | self._rs = baxter_interface.RobotEnable(CHECK_VERSION) 81 | self._init_state = self._rs.state().enabled 82 | print("Enabling Baxter... ") 83 | self._rs.enable() 84 | 85 | # prepare shutdown 86 | rospy.on_shutdown(self.clean_shutdown) 87 | 88 | #self.reset() 89 | # laction, raction = self.get_joint_action_dict(np.zeros(14)) 90 | # print (laction, raction) 91 | # self.llimb.move_to_joint_positions(laction) 92 | # self.rlimb.move_to_joint_positions(raction) 93 | 94 | # print ("Trying velocity") 95 | # acts = np.empty(14) 96 | # acts.fill(.25) 97 | # laction, raction = self.get_joint_action_dict(acts) 98 | # self.llimb.set_joint_velocities(laction) 99 | # self.rlimb.set_joint_velocities(raction) 100 | # 101 | # print ("Trying torque") 102 | # import time 103 | # for i in range(10): 104 | # print (i) 105 | # acts = np.random.normal(0.0, 0.5, (14,)) 106 | # laction, raction = self.get_joint_action_dict(acts) 107 | # self.llimb.set_joint_torques(laction) 108 | # self.rlimb.set_joint_torques(raction) 109 | # self.control_rate.sleep() 110 | # time.sleep(1) 111 | # 112 | # time.sleep(5) 113 | # import sys 114 | # sys.exit(1) 115 | 116 | 117 | def clean_shutdown(self): 118 | """ 119 | Switches out of joint torque mode to exit cleanly 120 | """ 121 | print("Exiting Baxter env...") 122 | if (self.limbs == bu.BOTH_LIMBS): 123 | self.llimb.exit_control_mode() 124 | self.rlimb.exit_control_mode() 125 | elif (self.limbs == bu.LEFT_LIMB): 126 | self.llimb.exit_control_mode() 127 | elif (self.limbs == bu.RIGHT_LIMB): 128 | self.rlimb.exit_control_mode() 129 | 130 | if not self._init_state and self._rs.state().enabled: 131 | print("Disabling robot...") 132 | self._rs.disable() 133 | 134 | 135 | def get_joint_angles(self): 136 | if (self.limbs == bu.BOTH_LIMBS): 137 | ljointdict = self.llimb.joint_angles() 138 | rjointdict = self.rlimb.joint_angles() 139 | ljointangles = [ljointdict[x] for x in self.llimb._joint_names[bu.LEFT_LIMB_NAME]] 140 | rjointangles = [rjointdict[x] for x in self.rlimb._joint_names[bu.RIGHT_LIMB_NAME]] 141 | return np.array(ljointangles + rjointangles) 142 | elif (self.limbs == bu.LEFT_LIMB): 143 | ljointdict = self.llimb.joint_angles() 144 | ljointangles = [ljointdict[x] for x in self.llimb._joint_names[bu.LEFT_LIMB_NAME]] 145 | return np.array(ljointangles) 146 | elif (self.limbs == bu.RIGHT_LIMB): 147 | rjointdict = self.rlimb.joint_angles() 148 | rjointangles = [rjointdict[x] for x in self.rlimb._joint_names[bu.RIGHT_LIMB_NAME]] 149 | return np.array(rjointangles) 150 | 151 | 152 | def get_endeff_position(self): 153 | if (self.limbs == bu.BOTH_LIMBS): 154 | return np.array(list(self.llimb.endpoint_pose()['position']) + 155 | list(self.rlimb.endpoint_pose()['position'])) 156 | elif (self.limbs == bu.LEFT_LIMB): 157 | return np.array(list(self.llimb.endpoint_pose()['position'])) 158 | elif (self.limbs == bu.RIGHT_LIMB): 159 | return np.array(list(self.rlimb.endpoint_pose()['position'])) 160 | 161 | 162 | def get_joint_action_dict(self, action): 163 | if (self.limbs == bu.BOTH_LIMBS): 164 | lkeys = self.llimb._joint_names[bu.LEFT_LIMB_NAME] 165 | ldict = dict(zip(lkeys, action.tolist()[:self.joint_space/2])) 166 | rkeys = self.rlimb._joint_names[bu.RIGHT_LIMB_NAME] 167 | rdict = dict(zip(rkeys, action.tolist()[self.joint_space/2:])) 168 | return ldict, rdict 169 | elif (self.limbs == bu.LEFT_LIMB): 170 | lkeys = self.llimb._joint_names[bu.LEFT_LIMB_NAME] 171 | ldict = dict(zip(lkeys, action.tolist()[:self.joint_space])) 172 | return ldict 173 | elif (self.limbs == bu.RIGHT_LIMB): 174 | rkeys = self.rlimb._joint_names[bu.RIGHT_LIMB_NAME] 175 | rdict = dict(zip(rkeys, action.tolist()[:self.joint_space])) 176 | return rdict 177 | 178 | 179 | def _reset(self): 180 | if (self.limbs == bu.BOTH_LIMBS): 181 | self.llimb.move_to_neutral() 182 | self.rlimb.move_to_neutral() 183 | self.state = self.get_joint_angles() 184 | return self.state 185 | elif (self.limbs == bu.LEFT_LIMB): 186 | self.llimb.move_to_neutral() 187 | self.state = self.get_joint_angles() 188 | return self.state 189 | if (self.limbs == bu.RIGHT_LIMB): 190 | self.rlimb.move_to_neutral() 191 | self.state = self.get_joint_angles() 192 | return self.state 193 | 194 | 195 | def _step(self, action): 196 | raise NotImplementedError 197 | 198 | 199 | def _render(self, mode='human', close=False): 200 | pass 201 | 202 | 203 | @property 204 | def action_space(self): 205 | raise NotImplementedError 206 | 207 | 208 | @property 209 | def observation_space(self): 210 | raise NotImplementedError 211 | --------------------------------------------------------------------------------