├── setup.py~
├── gym_baxter
    ├── __init__.py~
    ├── envs
    │   ├── __init__.py
    │   ├── baxter_reacher_env.py
    │   ├── baxter_avoider_env.py
    │   ├── baxter_utils.py
    │   └── baxter_env.py
    └── __init__.py
├── setup.py
├── README.md
├── .travis.yml
└── .gitignore


/setup.py~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gym_baxter/__init__.py~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(name='gym_baxter',
4 |       version='0.0.1',
5 |       install_requires=['gym']
6 | )
7 | 


--------------------------------------------------------------------------------
/gym_baxter/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from gym_baxter.envs.baxter_reacher_env import BaxterReacherEnv
2 | from gym_baxter.envs.baxter_avoider_env import BaxterAvoiderEnv
3 | 


--------------------------------------------------------------------------------
/gym_baxter/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register
 2 | 
 3 | register(
 4 |     id='BaxterReacher-v0',
 5 |     entry_point='gym_baxter.envs:BaxterReacherEnv',
 6 |     timestep_limit=10000,
 7 | )
 8 | register(
 9 |     id='BaxterAvoider-v0',
10 |     entry_point='gym_baxter.envs:BaxterAvoiderEnv',
11 |     timestep_limit=10000,
12 | )
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Baxter robot OpenAI Gym Environment
 2 | 
 3 | This package integrates the Gazebo simulator with the OpenAI Gym. The environments require the
 4 | [baxter interface](http://sdk.rethinkrobotics.com/wiki/Baxter_Interface) to be installed. For the
 5 | most part, the setup transfers to the physical robot, but the **BaxterAvoiderEnv** requires joint
 6 | states that are retrieved from a Gazebo topic.
 7 | 
 8 | # Environments
 9 | 
10 | Before running any environment you should start Gazebo and initialize the Baxter connection.
11 | 
12 | ## BaxterReacherEnv
13 | 
14 | Simply reach a given location in coordinate space.
15 | 
16 | ## BaxterAvoiderEnv
17 | 
18 | Reach a given location in coordinate space while avoiding passing though a certain area.
19 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Config file for automatic testing at travis-ci.org
 2 | # This file will be regenerated if you run travis_pypi_setup.py
 3 | 
 4 | language: python
 5 | python: 3.5
 6 | 
 7 | env:
 8 |   - TOXENV=py35
 9 |   - TOXENV=py34
10 |   - TOXENV=py33
11 |   - TOXENV=py27
12 |   - TOXENV=py26
13 |   - TOXENV=pypy
14 | 
15 | # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
16 | install: pip install -U tox
17 | 
18 | # command to run tests, e.g. python setup.py test
19 | script: tox -e ${TOXENV}
20 | 
21 | # After you create the Github repo and add it to Travis, run the
22 | # travis_pypi_setup.py script to finish PyPI deployment setup
23 | deploy:
24 |   provider: pypi
25 |   distributions: sdist bdist_wheel
26 |   user: tpbarron
27 |   password:
28 |     secure: PLEASE_REPLACE_ME
29 |   on:
30 |     tags: true
31 |     repo: tpbarron/gym_baxter
32 |     condition: $TOXENV == py27
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | 


--------------------------------------------------------------------------------
/gym_baxter/envs/baxter_reacher_env.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import error, spaces, utils
  3 | from gym.utils import seeding
  4 | 
  5 | from .baxter_env import BaxterEnv
  6 | import baxter_utils as bu
  7 | 
  8 | class BaxterReacherEnv(BaxterEnv):
  9 |     """
 10 |     An environment to test training the Baxter to reach a given location
 11 |     """
 12 | 
 13 |     metadata = {'render.modes': ['human']}
 14 | 
 15 |     def __init__(self, timesteps, control=bu.POSITION, limbs=bu.BOTH_LIMBS):
 16 |         super(BaxterReacherEnv, self).__init__(timesteps, control=control, limbs=limbs)
 17 | 
 18 |         # left(x, y, z), right(x, y, z)
 19 |         # x is forward, y is left, z is up
 20 |         if (self.limbs == bu.BOTH_LIMBS):
 21 |             self.goal = np.array([np.random.random(), -np.random.random(), 1.0,
 22 |                                   np.random.random(), np.random.random(), 1.0])
 23 |         elif (self.limbs == bu.LEFT_LIMB):
 24 |             self.goal = np.array([np.random.random(), -np.random.random(), 1.0])
 25 |         elif (self.limbs == bu.RIGHT_LIMB):
 26 |             self.goal = np.array([np.random.random(), np.random.random(), 1.0])
 27 | 
 28 |         self.t = 0
 29 | 
 30 | 
 31 |     @property
 32 |     def action_space(self):
 33 |         # this will be the joint space
 34 |         return Box(-np.inf, np.inf, (self.joint_space,))
 35 | 
 36 | 
 37 |     @property
 38 |     def observation_space(self):
 39 |         # [baxter joint angles; goal pos]
 40 |         return Box(-np.inf, np.inf, (self.joint_space + self.goal.size,))
 41 | 
 42 | 
 43 |     def _step(self, action):
 44 |         if (self.limbs == bu.BOTH_LIMBS):
 45 |             laction, raction = self.get_joint_action_dict(action)
 46 |             assert(len(laction)) == len(self.llimb.joint_angles())
 47 |             assert(len(raction)) == len(self.rlimb.joint_angles())
 48 |             if (self.control == bu.VELOCITY):
 49 |                 self.llimb.set_joint_velocities(laction)
 50 |                 self.rlimb.set_joint_velocities(raction)
 51 |             elif (self.control == bu.TORQUE):
 52 |                 self.llimb.set_joint_torques(laction)
 53 |                 self.rlimb.set_joint_torques(raction)
 54 |             elif (self.control == bu.POSITION):
 55 |                 self.llimb.set_joint_positions(laction)
 56 |                 self.rlimb.set_joint_positions(raction)
 57 |         elif (self.limbs == bu.LEFT_LIMB):
 58 |             laction = self.get_joint_action_dict(action)
 59 |             assert(len(laction)) == len(self.llimb.joint_angles())
 60 |             if (self.control == bu.VELOCITY):
 61 |                 self.llimb.set_joint_velocities(laction)
 62 |             elif (self.control == bu.TORQUE):
 63 |                 self.llimb.set_joint_torques(laction)
 64 |             elif (self.control == bu.POSITION):
 65 |                 self.llimb.set_joint_positions(laction)
 66 |         elif (self.limbs == bu.RIGHT_LIMB):
 67 |             raction = self.get_joint_action_dict(action)
 68 |             assert(len(raction)) == len(self.rlimb.joint_angles())
 69 |             if (self.control == bu.VELOCITY):
 70 |                 self.rlimb.set_joint_velocities(raction)
 71 |             elif (self.control == bu.TORQUE):
 72 |                 self.rlimb.set_joint_torques(raction)
 73 |             elif (self.control == bu.POSITION):
 74 |                 self.rlimb.set_joint_positions(raction)
 75 | 
 76 |         self.control_rate.sleep()
 77 |         self.state = np.hstack((self.get_joint_angles(), self.goal))
 78 | 
 79 |         # only consider reward at end of task
 80 |         # if we are finished, the reward is the distance from the goal, else 0
 81 |         done = True if self.t == self.timesteps-1 else False
 82 |         if done:
 83 |             dist = np.linalg.norm(self.goal-self.get_endeff_position())
 84 |             reward = np.exp(-dist) # if at goal reward = 1, else asymptopes to 0
 85 |             #reward = -dist # closer go goal, smaller the value
 86 |         else:
 87 |             reward = 0.0
 88 |         self.t += 1
 89 | 
 90 |         return self.state, reward, done, {}
 91 | 
 92 | 
 93 |     def _reset(self):
 94 |         if (self.limbs == bu.BOTH_LIMBS):
 95 |             self.llimb.move_to_neutral()
 96 |             self.rlimb.move_to_neutral()
 97 |             self.state = self.get_joint_angles()
 98 |             return np.hstack((self.state, self.goal))
 99 |         elif (self.limbs == bu.LEFT_LIMB):
100 |             self.llimb.move_to_neutral()
101 |             self.state = self.get_joint_angles()
102 |             return np.hstack((self.state, self.goal))
103 |         elif (self.limbs == bu.RIGHT_LIMB):
104 |             self.rlimb.move_to_neutral()
105 |             self.state = self.get_joint_angles()
106 |             return np.hstack((self.state, self.goal))
107 |         self.t = 0
108 | 
109 |     def _render(self, mode='human', close=False):
110 |         pass
111 | 


--------------------------------------------------------------------------------
/gym_baxter/envs/baxter_avoider_env.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import error, spaces, utils
  3 | from gym.utils import seeding
  4 | 
  5 | import numpy as np
  6 | 
  7 | from .baxter_env import BaxterEnv
  8 | import baxter_utils as bu
  9 | 
 10 | class BaxterAvoiderEnv(BaxterEnv):
 11 | 
 12 |     """
 13 |     An environment to test training the Baxter to reach a given location
 14 |     """
 15 | 
 16 |     metadata = {'render.modes': ['human']}
 17 | 
 18 |     def __init__(self, timesteps, sphere, control=bu.POSITION, limbs=bu.BOTH_LIMBS):
 19 |         super(BaxterAvoiderEnv, self).__init__(timesteps, sphere, control=control, limbs=limbs)
 20 | 
 21 |         # left(x, y, z), right(x, y, z)
 22 |         # x is forward, y is left, z is up
 23 |         if (self.limbs == bu.BOTH_LIMBS):
 24 |             self.goal = np.array([np.random.random(), -np.random.random(), 1.0,
 25 |                                   np.random.random(), np.random.random(), 1.0])
 26 |         elif (self.limbs == bu.LEFT_LIMB):
 27 |             self.goal = np.array([np.random.random(), -np.random.random(), 1.0])
 28 |         elif (self.limbs == bu.RIGHT_LIMB):
 29 |             self.goal = np.array([np.random.random(), np.random.random(), 1.0])
 30 | 
 31 |         self.sphere = sphere
 32 |         self.step_in_sphere = 0
 33 |         self.t = 0
 34 | 
 35 | 
 36 |     @property
 37 |     def action_space(self):
 38 |         # this will be the joint space
 39 |         return Box(-np.inf, np.inf, (self.joint_space,))
 40 | 
 41 | 
 42 |     @property
 43 |     def observation_space(self):
 44 |         # [baxter joint angles; goal pos]
 45 |         return Box(-np.inf, np.inf, (self.joint_space + self.goal.size,))
 46 | 
 47 | 
 48 |     def _step(self, action):
 49 |         if (self.limbs == bu.BOTH_LIMBS):
 50 |             laction, raction = self.get_joint_action_dict(action)
 51 |             assert(len(laction)) == len(self.llimb.joint_angles())
 52 |             assert(len(raction)) == len(self.rlimb.joint_angles())
 53 |             if (self.control == bu.VELOCITY):
 54 |                 self.llimb.set_joint_velocities(laction)
 55 |                 self.rlimb.set_joint_velocities(raction)
 56 |             elif (self.control == bu.TORQUE):
 57 |                 self.llimb.set_joint_torques(laction)
 58 |                 self.rlimb.set_joint_torques(raction)
 59 |             elif (self.control == bu.POSITION):
 60 |                 self.llimb.set_joint_positions(laction)
 61 |                 self.rlimb.set_joint_positions(raction)
 62 |         elif (self.limbs == bu.LEFT_LIMB):
 63 |             laction = self.get_joint_action_dict(action)
 64 |             assert(len(laction)) == len(self.llimb.joint_angles())
 65 |             if (self.control == bu.VELOCITY):
 66 |                 self.llimb.set_joint_velocities(laction)
 67 |             elif (self.control == bu.TORQUE):
 68 |                 self.llimb.set_joint_torques(laction)
 69 |             elif (self.control == bu.POSITION):
 70 |                 self.llimb.set_joint_positions(laction)
 71 |         elif (self.limbs == bu.RIGHT_LIMB):
 72 |             raction = self.get_joint_action_dict(action)
 73 |             assert(len(raction)) == len(self.rlimb.joint_angles())
 74 |             if (self.control == bu.VELOCITY):
 75 |                 self.rlimb.set_joint_velocities(raction)
 76 |             elif (self.control == bu.TORQUE):
 77 |                 self.rlimb.set_joint_torques(raction)
 78 |             elif (self.control == bu.POSITION):
 79 |                 self.rlimb.set_joint_positions(raction)
 80 | 
 81 |         self.control_rate.sleep()
 82 |         self.state = np.hstack((self.get_joint_angles(), self.goal))
 83 | 
 84 |         # check if in area
 85 |         if (self.limbs == bu.BOTH_LIMBS):
 86 |             if (bu.limb_in_sphere(self.llimb, self.sphere) or
 87 |                     bu.limb_in_sphere(self.rlimb, self.sphere)):
 88 |                 self.step_in_sphere += 1
 89 |         elif (self.limbs == bu.LEFT_LIMB):
 90 |             if (bu.limb_in_sphere(self.llimb, self.sphere)):
 91 |                 self.step_in_sphere += 1
 92 |         elif (self.limbs == bu.RIGHT_LIMB):
 93 |             if (bu.limb_in_sphere(self.rlimb, self.sphere)):
 94 |                 self.step_in_sphere += 1
 95 | 
 96 | 
 97 |         # only consider reward at end of task
 98 |         # if we are finished, the reward is weighted combination of the distance
 99 |         # from the goal and the number of steps in the space
100 |         done = True if self.t == self.timesteps-1 else False
101 |         if done:
102 |             dist = np.linalg.norm(self.goal-self.get_endeff_position())
103 |             dist_reward = np.exp(-dist)
104 |             space_reward = np.exp(-float(self.step_in_sphere) / self.timesteps)
105 |             print ("num steps in sphere: ", self.step_in_sphere, dist_reward, space_reward)
106 |             reward = .5*dist_reward+.5*space_reward # if at goal reward = 1, else asymptopes to 0
107 |         else:
108 |             reward = 0.0
109 |         self.t += 1
110 | 
111 |         return self.state, reward, done, {}
112 | 
113 | 
114 |     def _reset(self):
115 |         self.step_in_sphere = 0
116 |         self.t = 0
117 |         if (self.limbs == bu.BOTH_LIMBS):
118 |             self.llimb.move_to_neutral()
119 |             self.rlimb.move_to_neutral()
120 |             self.state = self.get_joint_angles()
121 |             return np.hstack((self.state, self.goal))
122 |         elif (self.limbs == bu.LEFT_LIMB):
123 |             self.llimb.move_to_neutral()
124 |             self.state = self.get_joint_angles()
125 |             return np.hstack((self.state, self.goal))
126 |         if (self.limbs == bu.RIGHT_LIMB):
127 |             self.rlimb.move_to_neutral()
128 |             self.state = self.get_joint_angles()
129 |             return np.hstack((self.state, self.goal))
130 | 
131 | 
132 |     def _render(self, mode='human', close=False):
133 |         pass
134 | 


--------------------------------------------------------------------------------
/gym_baxter/envs/baxter_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse, csv, os, sys
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | import rospy
  6 | import baxter_interface
  7 | from baxter_interface import CHECK_VERSION
  8 | # from baxter_trajectory import Trajectory
  9 | 
 10 | POSITION, VELOCITY, TORQUE = range(3)
 11 | LEFT_LIMB, RIGHT_LIMB, BOTH_LIMBS = range(3)
 12 | LEFT_LIMB_NAME = 'left'
 13 | RIGHT_LIMB_NAME = 'right'
 14 | 
 15 | DOF = 17
 16 | DOF_NO_TIME = 16
 17 | DOF_NO_TIME_NO_GRIPPER = 14
 18 | DOF_NO_TIME_LEFT = 8
 19 | DOF_NO_TIME_RIGHT = 8
 20 | DOF_NO_TIME_NO_GRIPPER_LEFT = 7
 21 | DOF_NO_TIME_NO_GRIPPER_RIGHT = 7
 22 | 
 23 | 
 24 | def limb_in_sphere(limb, sphere):
 25 |     """
 26 |     sphere - a sphere defined by (x, y, z, r)
 27 |     limb - a baxter limb object
 28 |     Returns True if any segment of limb is in space
 29 |     """
 30 |     states = limb.get_link_states()
 31 |     links = limb._link_names[limb.name]
 32 |     for i in range(len(links)-1):
 33 |         l1 = links[i]
 34 |         l2 = links[i+1]
 35 |         p1 = states[l1].position
 36 |         # print ("p1 = ", p1)
 37 |         p2 = states[l2].position
 38 |         # print ("p2 = ", p2)
 39 |         p1 = (p1.x, p1.y, p1.z)
 40 |         p2 = (p2.x, p2.y, p2.z)
 41 |         if (check_segment(sphere, (p1, p2))):
 42 |             return True
 43 |     return False
 44 | 
 45 | 
 46 | def check_segment(sphere, segment):
 47 |     """
 48 |     segment = ((x1,y1,z1), (x2,y2,z2))
 49 |     returns True if intersection
 50 |     """
 51 |     p1, p2 = segment
 52 |     x1, y1, z1 = p1
 53 |     x2, y2, z2 = p2
 54 |     x3, y3, z3, r = sphere
 55 |     a = (x2-x1)**2.0 + (y2-y1)**2.0 + (z2-z1)**2.0
 56 |     b = 2*( (x2-x1)*(x1-x3) + (y2-y1)*(y1-y3) + (z2-z1)*(z1-z3) )
 57 |     c = x3**2.0 + y3**2.0 + z3**2.0 + x1**2.0 + y1**2.0 + z1**2.0 - 2*(x3*x1 + y3*y1 + z3*z1) - r**2.0
 58 | 
 59 |     u1 = (-b + np.sqrt(b*b-4*a*c)) / 2*a
 60 |     u2 = (-b - np.sqrt(b*b-4*a*c)) / 2*a
 61 |     if ((u1 < 0 and u2 < 0) or (u1 > 1 and u2 > 1)):
 62 |         # line seg outside sphere
 63 |         return False
 64 |     if ((u1 < 0 and u2 > 1) or (u1 > 1 and u2 < 0)):
 65 |         # line segment inside sphere
 66 |         return True
 67 |     if ((abs(u1) < 1 and (u2 > 1 or u2 < 0)) or (abs(u2) < 1 and (u1 > 1 or u1 < 0))):
 68 |         # line segment intersects at one point
 69 |         return True
 70 |     if (abs(u1) < 1 and abs(u2) < 1):
 71 |         # intersects at two points
 72 |         return True
 73 |     if (abs(u1) < 1 and abs(u2) < 1 and u1 - u2 < .001):
 74 |         # tangential
 75 |         return True
 76 |     return False
 77 | 
 78 | 
 79 | def load_trajectories(dir):
 80 |     """
 81 |     Load all trajectories from dir into a list of numpy arrays
 82 |     """
 83 |     trajs = []
 84 |     keys = None
 85 |     files = os.listdir(dir)
 86 |     for f in files:
 87 |         keys, traj = load_trajectory(os.path.join(dir, f))
 88 |         trajs.append(traj)
 89 | 
 90 |     return trajs, keys
 91 | 
 92 | 
 93 | def load_trajectory(fname):
 94 |     """
 95 |     Load the trajectory in the file fname to a numpy array
 96 |     """
 97 |     with open(fname, 'rb') as csvfile:
 98 |         reader = csv.reader(csvfile)
 99 |         keys = reader.next()
100 |         data = []
101 |         for row in reader:
102 |             row = [float(f) for f in row]
103 |             data.append(row)
104 |         data = np.array(data)
105 |         return keys, data
106 | 
107 | 
108 | def get_trajectories_without_time(trajs, limb=BOTH_LIMBS):
109 |     if (limb == BOTH_LIMBS):
110 |         trajs = [t[:,1:] for t in trajs]
111 |         return trajs
112 |     elif (limb == LEFT_LIMB):
113 |         # remove time
114 |         trajs = [t[:,1:] for t in trajs]
115 |         trajs = [t[:,:8] for t in trajs]
116 |         return trajs
117 |     elif (limb == RIGHT_LIMB):
118 |         # remove time
119 |         trajs = [t[:,1:] for t in trajs]
120 |         trajs = [t[:,8:] for t in trajs]
121 |         return trajs
122 | 
123 | 
124 | def get_trajectories_without_time_and_gripper(trajs, limb=BOTH_LIMBS):
125 |     if (limb == BOTH_LIMBS):
126 |         trajs = [np.hstack((t[:,1:8],t[:,9:16])) for t in trajs]
127 |         return trajs
128 |     elif (limb == LEFT_LIMB):
129 |         trajs = [t[:,1:8] for t in trajs]
130 |         return trajs
131 |     elif (limb == RIGHT_LIMB):
132 |         trajs = [t[:,9:16] for t in trajs]
133 |         return trajs
134 | 
135 | 
136 | def interpolate_time(secs, timesteps):
137 |     time = np.linspace(0, secs, timesteps)
138 |     return time
139 | 
140 | 
141 | def run_trajectory(traj, mode=POSITION):
142 |     if (mode == POSITION):
143 |         run_position_trajectory(traj)
144 |     # elif (mode == VELOCITY):
145 |     #     run_velocity_trajectory(traj)
146 | 
147 | 
148 | # def run_velocity_trajectory(traj_data):
149 | #     traj = Trajectory()
150 | #     traj.parse_traj(self.keys, traj_data)
151 | #     #for safe interrupt handling
152 | #     rospy.on_shutdown(traj.stop)
153 | #     result = True
154 | #     print ("Starting trajectory")
155 | #     traj.start()
156 | #     result = traj.wait()
157 | #     print("Result: " + str(result) + ", Playback Complete")
158 | 
159 | 
160 | def run_position_trajectory(traj, left, right, rate):
161 |     # traj is a numpy array where each column is a DOF
162 |     # and each row is a timestep
163 |     time, lstart, rstart = get_cmds_from_row(traj[0])
164 |     left.move_to_joint_positions(lstart)
165 |     right.move_to_joint_positions(rstart)
166 | 
167 |     start_time = rospy.get_time()
168 |     for t in range(traj.shape[0]): # for each row
169 |         sys.stdout.write("\r Record %d of %d" %
170 |                          (t, traj.shape[0]))
171 |         sys.stdout.flush()
172 |         time, lcmd, rcmd = get_cmds_from_row(traj[t])
173 |         # send these commands until the next frame
174 |         while (rospy.get_time() - start_time) < time:
175 |             if rospy.is_shutdown():
176 |                 print ("ROS shutdown, aborting")
177 |                 return
178 |             left.set_joint_positions(lcmd)
179 |             right.set_joint_positions(rcmd)
180 |             # TODO: future gripper handling?
181 |             rate.sleep()
182 | 
183 | 
184 | def get_cmds_from_row(row, keys):
185 |     limb_dof = 7
186 |     row_list = row.tolist()
187 |     row_dict = dict(zip(keys, row_list))
188 | 
189 |     # create a dictionary of joint to value for left limb
190 |     # skip element 0 because it is the time param
191 |     # skip elements 8, 16 because they are gripper params
192 | 
193 |     ldict = {k: row_dict[k] for k in keys[1:limb_dof+1]}
194 |     rdict = {k: row_dict[k] for k in keys[limb_dof+2:-1]}
195 | 
196 |     return row[0], ldict, rdict
197 | 
198 | 
199 | def get_limb_coordinate(left, right):
200 |     return left.endpoint_pose(), right.endpoint_pose()
201 | 
202 | 
203 | def plot_trajectory(traj):
204 |     """
205 |     Plot the given trajectory for each dof
206 |     """
207 |     for i in range(traj.shape[1]): # num dofs
208 |         plt.subplot(3,6,i+1)
209 |         plt.plot(traj[:,i], lw=2)
210 |         plt.title("Traj for DOF="+str(i))
211 |         plt.xlabel('timesteps')
212 |         plt.ylabel('joint position')
213 |     plt.show()
214 | 
215 | 
216 | 
217 | #
218 | # if __name__ == "__main__":
219 | # parser = argparse.ArgumentParser(description='Baxter controller')
220 | # parser.add_argument('-d', '--dir', dest='dir', required=True)
221 | # args = parser.parse_args()
222 | #
223 | # bc = BaxterUtils()
224 | # bc.load_trajectories(args.dir)
225 | # bc.run_trajectory()
226 | #bc.plot_trajectory()
227 | 


--------------------------------------------------------------------------------
/gym_baxter/envs/baxter_env.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import gym
  4 | from gym import error, spaces, utils
  5 | from gym.utils import seeding
  6 | 
  7 | import numpy as np
  8 | import rospy
  9 | from dynamic_reconfigure.server import Server
 10 | from std_msgs.msg import Empty
 11 | import baxter_interface
 12 | from baxter_examples.cfg import JointSpringsExampleConfig
 13 | from baxter_interface import CHECK_VERSION
 14 | 
 15 | import baxter_utils as bu
 16 | 
 17 | class BaxterEnv(gym.Env):
 18 |     """
 19 |     The connection with the Baxter or Gazebo must be initialized before using
 20 |     this environment
 21 | 
 22 |     These environments require the usage of a modified Limb class to get
 23 |     the states of each joint.
 24 |     """
 25 | 
 26 |     # RIGHT_LIMB = "right"
 27 |     # LEFT_LIMB = "left"
 28 |     # BOTH_LIMBS = "both"
 29 |     #
 30 |     # TORQUE = "torque"
 31 |     # VELOCITY = "velocity"
 32 |     # POSITION = "position"
 33 | 
 34 |     def __init__(self, timesteps, sphere, control=None, limbs=None):
 35 |         print("Initializing node... ")
 36 |         rospy.init_node("rlcore_baxter_env")
 37 | 
 38 |         self.timesteps = timesteps
 39 | 
 40 |         if control == None:
 41 |             control = bu.POSITION
 42 |         self.control = control
 43 | 
 44 |         if limbs == None:
 45 |             limbs = bu.BOTH_LIMBS
 46 |         self.limbs = limbs
 47 | 
 48 |         # set control rate
 49 |         self.rate = 100.0
 50 |         self.missed_cmds = 20000.0  # Missed cycles before triggering timeout
 51 |         self.control_rate = rospy.Rate(self.rate)
 52 | 
 53 |         # create our limb instance
 54 |         # for safety purposes, set the control rate command timeout.
 55 |         # if the specified number of command cycles are missed, the robot
 56 |         # will timeout and disable
 57 |         if (self.limbs == bu.BOTH_LIMBS):
 58 |             self.llimb = baxter_interface.Limb(bu.LEFT_LIMB_NAME)
 59 |             self.rlimb = baxter_interface.Limb(bu.RIGHT_LIMB_NAME)
 60 |             self.llimb.set_command_timeout((1.0 / self.rate) * self.missed_cmds)
 61 |             self.rlimb.set_command_timeout((1.0 / self.rate) * self.missed_cmds)
 62 |             # robot state
 63 |             self.joint_space = 2*len(self.llimb.joint_angles())
 64 |             self.state = np.zeros(self.joint_space)
 65 |         elif (self.limbs == bu.LEFT_LIMB):
 66 |             self.llimb = baxter_interface.Limb(bu.LEFT_LIMB_NAME)
 67 |             self.llimb.set_command_timeout((1.0 / self.rate) * self.missed_cmds)
 68 |             # robot state
 69 |             self.joint_space = len(self.llimb.joint_angles())
 70 |             self.state = np.zeros(self.joint_space)
 71 |         elif (self.limbs == bu.RIGHT_LIMB):
 72 |             self.rlimb = baxter_interface.Limb(bu.RIGHT_LIMB_NAME)
 73 |             self.rlimb.set_command_timeout((1.0 / self.rate) * self.missed_cmds)
 74 |             # robot state
 75 |             self.joint_space = len(self.rlimb.joint_angles())
 76 |             self.state = np.zeros(self.joint_space)
 77 | 
 78 |         # verify robot is enabled
 79 |         print("Getting Baxter state... ")
 80 |         self._rs = baxter_interface.RobotEnable(CHECK_VERSION)
 81 |         self._init_state = self._rs.state().enabled
 82 |         print("Enabling Baxter... ")
 83 |         self._rs.enable()
 84 | 
 85 |         # prepare shutdown
 86 |         rospy.on_shutdown(self.clean_shutdown)
 87 | 
 88 |         #self.reset()
 89 |         # laction, raction = self.get_joint_action_dict(np.zeros(14))
 90 |         # print (laction, raction)
 91 |         # self.llimb.move_to_joint_positions(laction)
 92 |         # self.rlimb.move_to_joint_positions(raction)
 93 | 
 94 |         # print ("Trying velocity")
 95 |         # acts = np.empty(14)
 96 |         # acts.fill(.25)
 97 |         # laction, raction = self.get_joint_action_dict(acts)
 98 |         # self.llimb.set_joint_velocities(laction)
 99 |         # self.rlimb.set_joint_velocities(raction)
100 |         #
101 |         # print ("Trying torque")
102 |         # import time
103 |         # for i in range(10):
104 |         #     print (i)
105 |         #     acts = np.random.normal(0.0, 0.5, (14,))
106 |         #     laction, raction = self.get_joint_action_dict(acts)
107 |         #     self.llimb.set_joint_torques(laction)
108 |         #     self.rlimb.set_joint_torques(raction)
109 |         #     self.control_rate.sleep()
110 |         #     time.sleep(1)
111 |         #
112 |         # time.sleep(5)
113 |         # import sys
114 |         # sys.exit(1)
115 | 
116 | 
117 |     def clean_shutdown(self):
118 |         """
119 |         Switches out of joint torque mode to exit cleanly
120 |         """
121 |         print("Exiting Baxter env...")
122 |         if (self.limbs == bu.BOTH_LIMBS):
123 |             self.llimb.exit_control_mode()
124 |             self.rlimb.exit_control_mode()
125 |         elif (self.limbs == bu.LEFT_LIMB):
126 |             self.llimb.exit_control_mode()
127 |         elif (self.limbs == bu.RIGHT_LIMB):
128 |             self.rlimb.exit_control_mode()
129 | 
130 |         if not self._init_state and self._rs.state().enabled:
131 |             print("Disabling robot...")
132 |             self._rs.disable()
133 | 
134 | 
135 |     def get_joint_angles(self):
136 |         if (self.limbs == bu.BOTH_LIMBS):
137 |             ljointdict = self.llimb.joint_angles()
138 |             rjointdict = self.rlimb.joint_angles()
139 |             ljointangles = [ljointdict[x] for x in self.llimb._joint_names[bu.LEFT_LIMB_NAME]]
140 |             rjointangles = [rjointdict[x] for x in self.rlimb._joint_names[bu.RIGHT_LIMB_NAME]]
141 |             return np.array(ljointangles + rjointangles)
142 |         elif (self.limbs == bu.LEFT_LIMB):
143 |             ljointdict = self.llimb.joint_angles()
144 |             ljointangles = [ljointdict[x] for x in self.llimb._joint_names[bu.LEFT_LIMB_NAME]]
145 |             return np.array(ljointangles)
146 |         elif (self.limbs == bu.RIGHT_LIMB):
147 |             rjointdict = self.rlimb.joint_angles()
148 |             rjointangles = [rjointdict[x] for x in self.rlimb._joint_names[bu.RIGHT_LIMB_NAME]]
149 |             return np.array(rjointangles)
150 | 
151 | 
152 |     def get_endeff_position(self):
153 |         if (self.limbs == bu.BOTH_LIMBS):
154 |             return np.array(list(self.llimb.endpoint_pose()['position']) +
155 |                         list(self.rlimb.endpoint_pose()['position']))
156 |         elif (self.limbs == bu.LEFT_LIMB):
157 |             return np.array(list(self.llimb.endpoint_pose()['position']))
158 |         elif (self.limbs == bu.RIGHT_LIMB):
159 |             return np.array(list(self.rlimb.endpoint_pose()['position']))
160 | 
161 | 
162 |     def get_joint_action_dict(self, action):
163 |         if (self.limbs == bu.BOTH_LIMBS):
164 |             lkeys = self.llimb._joint_names[bu.LEFT_LIMB_NAME]
165 |             ldict = dict(zip(lkeys, action.tolist()[:self.joint_space/2]))
166 |             rkeys = self.rlimb._joint_names[bu.RIGHT_LIMB_NAME]
167 |             rdict = dict(zip(rkeys, action.tolist()[self.joint_space/2:]))
168 |             return ldict, rdict
169 |         elif (self.limbs == bu.LEFT_LIMB):
170 |             lkeys = self.llimb._joint_names[bu.LEFT_LIMB_NAME]
171 |             ldict = dict(zip(lkeys, action.tolist()[:self.joint_space]))
172 |             return ldict
173 |         elif (self.limbs == bu.RIGHT_LIMB):
174 |             rkeys = self.rlimb._joint_names[bu.RIGHT_LIMB_NAME]
175 |             rdict = dict(zip(rkeys, action.tolist()[:self.joint_space]))
176 |             return rdict
177 | 
178 | 
179 |     def _reset(self):
180 |         if (self.limbs == bu.BOTH_LIMBS):
181 |             self.llimb.move_to_neutral()
182 |             self.rlimb.move_to_neutral()
183 |             self.state = self.get_joint_angles()
184 |             return self.state
185 |         elif (self.limbs == bu.LEFT_LIMB):
186 |             self.llimb.move_to_neutral()
187 |             self.state = self.get_joint_angles()
188 |             return self.state
189 |         if (self.limbs == bu.RIGHT_LIMB):
190 |             self.rlimb.move_to_neutral()
191 |             self.state = self.get_joint_angles()
192 |             return self.state
193 | 
194 | 
195 |     def _step(self, action):
196 |         raise NotImplementedError
197 | 
198 | 
199 |     def _render(self, mode='human', close=False):
200 |         pass
201 | 
202 | 
203 |     @property
204 |     def action_space(self):
205 |         raise NotImplementedError
206 | 
207 | 
208 |     @property
209 |     def observation_space(self):
210 |         raise NotImplementedError
211 | 


--------------------------------------------------------------------------------