├── .gitignore
├── README.md
├── data
    ├── data.pkl
    ├── data_demo_promp.pkl
    ├── im_data_1d_opt.mat
    └── load_mat_file.py
├── demos
    ├── demo_promp.py
    └── demo_with_matlab_data.py
├── promp
    ├── __init__.py
    ├── discrete_promp.py
    ├── linear_sys_dyn.py
    └── promp_ctrl.py
└── set_env_bash.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PROMP library written in python
 2 | 
 3 | * Implementation of Probabilistic Movement Primitives in the [paper](https://link.springer.com/article/10.1007/s10514-017-9648-7)
 4 | 
 5 | * Few of the utility functions are used from following [implementation](https://github.com/baxter-flowers/promplib).
 6 | 
 7 | * Includes the closed form implementation of trajectory controller implementation as well.
 8 | 
 9 | * Since one PROMP can only model one 1D, several of these has to be called to create a multi-dimensional PROMP
10 | 
11 | 
12 | ### Instructions
13 | 
14 | * Clone the repo
15 | 
16 | * From the cloned repo folder; run set_env_bash.sh
17 | 
18 | * Run the file demo_with_matlab_data.py from the demos folder (this uses a demonstration given in the data folder)
19 | 


--------------------------------------------------------------------------------
/data/im_data_1d_opt.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjm522/promps_python/60259f9ed5201b61894859bfe268849b564c274a/data/im_data_1d_opt.mat


--------------------------------------------------------------------------------
/data/load_mat_file.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | from  scipy.io import loadmat
 4 | 
 5 | data = loadmat(os.environ['ROOT_DIR'] + '/data/im_data_1d_opt.mat')['imData']
 6 | 
 7 | 
 8 | steps  = {'numElements': data[0,0][0][0][0]['steps']['numElements'],
 9 |            'actions': data[0,0][0][0][0]['steps']['actions'],
10 |            'nextStates': data[0,0][0][0][0]['steps']['nextStates'],
11 |            'rewards': data[0,0][0][0][0]['steps']['rewards'],
12 |            'states': data[0,0][0][0][0]['steps']['states'],
13 |            'timeSteps': data[0,0][0][0][0]['steps']['timeSteps'],}
14 | 
15 | data_py = {'numElements': data[0,0][0][0][0]['numElements'],
16 |            'iterationNumber': data[0,0][0][0][0]['iterationNumber'],
17 |            'finalRewards': data[0,0][0][0][0]['finalRewards'],
18 |             'dt':data[0,0][0][0][0]['dt'],
19 |             'noise_std':data[0,0][0][0][0]['noise_std'],
20 |             'init_m':data[0,0][0][0][0]['init_m'],
21 |             'init_std':data[0,0][0][0][0]['init_std'],
22 |             'maxAction':data[0,0][0][0][0]['maxAction'],
23 |             'steps':steps, }
24 | 
25 | 
26 | pickle_out = open(os.environ['ROOT_DIR'] + '/data/data.pkl', "wb")
27 | pickle.dump(data_py, pickle_out)
28 | pickle_out.close()


--------------------------------------------------------------------------------
/demos/demo_promp.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | # from aml_lfd.promp.discrete_promp import DiscretePROMP
  4 | 
  5 | from aml_lfd.promp.promp_trial import DiscretePROMP
  6 | from aml_lfd.promp.promp_ctrl import PROMPCtrl
  7 | 
  8 | 
  9 | np.random.seed(0)
 10 | 
 11 | def demo_generate_traj():
 12 | 
 13 |     plt.figure("DiscretePROMP")
 14 | 
 15 |     # Generate and plot trajectory Data
 16 |     x = np.arange(0,1.01,0.01)           # time points for trajectories
 17 |     nrTraj=30                            # number of trajectoreis for training
 18 |     sigmaNoise=0.02                       # noise on training trajectories
 19 |     A = np.array([.2, .2, .01, -.05])
 20 |     X = np.vstack( (np.sin(5*x), x**2, x, np.ones((1,len(x))) ))
 21 |     Y = np.zeros( (nrTraj,len(x)) )
 22 | 
 23 |     demos_list = []
 24 |     for traj in range(0, nrTraj):
 25 |         sample = np.dot(A + sigmaNoise * np.random.randn(1,4), X)[0]
 26 |         plt.plot(sample, 'k')
 27 |         demos_list.append(sample)
 28 | 
 29 | 
 30 |     #create a promb object by passing the data
 31 |     d_promp = DiscretePROMP(data=demos_list)
 32 |     d_promp.train()
 33 | 
 34 |     #add a via point
 35 |     # d_promp.add_viapoint(0.7, 5)
 36 |     # plt.scatter(0.7, 5, marker='*', s=100)
 37 | 
 38 |     #set the start and goal, the spatial scaling
 39 |     d_promp.set_start(0.2)
 40 |     d_promp.set_goal(-0.1)
 41 | 
 42 |     for _ in  range(100):
 43 | 
 44 |         plt.plot(d_promp.generate_trajectory(phase_speed=0.5, randomness=1e-3), 'r')
 45 |         plt.plot(d_promp.generate_trajectory(phase_speed=1., randomness=1e-3), 'g')
 46 |         plt.plot(d_promp.generate_trajectory(phase_speed=2., randomness=1e-3), 'b')
 47 | 
 48 |     plt.legend()
 49 |     plt.show()
 50 | 
 51 | 
 52 | 
 53 | def demo_ctrl_traj():
 54 | 
 55 |     A = np.array([.2, .2, .01, -.05])
 56 |     B = np.random.randn(4,2)
 57 | 
 58 |     x0 = np.zeros(4)
 59 | 
 60 |     demos_list = []
 61 |     T = 100
 62 |     n_demos = 30
 63 |     sigma_noise = 0.01
 64 | 
 65 |     u = np.random.randn(2, T)
 66 | 
 67 |     for _ in range(n_demos):
 68 |         traj = np.zeros([4,T])
 69 | 
 70 |         for k in range(T-1):
 71 | 
 72 |             traj[:, k+1] = np.dot( (A + sigma_noise * np.random.randn(4)), traj[:, k]) + np.dot(B, u[:, k])
 73 | 
 74 |         demos_list.append(traj[0, :].copy())
 75 | 
 76 | 
 77 |     #create a promb object by passing the data
 78 |     d_promp = DiscretePROMP(data=demos_list)
 79 |     d_promp.train()
 80 | 
 81 |     c_promp = PROMPCtrl(promp_obj=d_promp, A=A, B=B)
 82 | 
 83 |     u_computed = np.zeros(T)
 84 | 
 85 |     for t in range(T):
 86 |         u_computed[t] = c_promp.compute_gains(t, add_noise=False)
 87 | 
 88 | 
 89 |     print "Initial u \n", u
 90 |     print "Computed u \n", u_computed
 91 | 
 92 | 
 93 | 
 94 | def main():
 95 | 
 96 |     demo_ctrl_traj()
 97 |     # demo_generate_traj()
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     main()
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/demos/demo_with_matlab_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from promp.discrete_promp import DiscretePROMP
  6 | from promp.linear_sys_dyn import LinearSysDyn
  7 | from promp.promp_ctrl import PROMPCtrl
  8 | 
  9 | 
 10 | pickle_in = open(os.environ['ROOT_DIR'] + '/data/data.pkl',"rb")
 11 | data = pickle.load(pickle_in)
 12 | 
 13 | 
 14 | demos_list    = [data['steps']['states'][k][0][:,0] for k in range(100)]
 15 | Ddemos_list   = [data['steps']['states'][k][0][:,1] for k in range(100)]
 16 | 
 17 | #create a promb object by passing the data
 18 | d_promp = DiscretePROMP(data=demos_list)
 19 | d_promp.train()
 20 | 
 21 | def plot_mean_and_sigma(mean, lower_bound, upper_bound, color_mean=None, color_shading=None):
 22 |     # plot the shaded range of the confidence intervals
 23 |     plt.fill_between(range(mean.shape[0]), lower_bound, upper_bound, color=color_shading, alpha=.5)
 24 |     # plot the mean on top
 25 |     plt.plot(mean, color_mean)
 26 | 
 27 | 
 28 | def demo_generate_traj():
 29 | 
 30 |     #add a via point
 31 |     # d_promp.add_viapoint(0.7, 5)
 32 |     # plt.scatter(0.7, 5, marker='*', s=100)
 33 | 
 34 |     #set the start and goal, the spatial scaling
 35 |     d_promp.set_start(demos_list[0][0])
 36 |     d_promp.set_goal(demos_list[0][-1])
 37 | 
 38 |     #add a via point
 39 |     # d_promp.add_viapoint(0.3, 2.25)
 40 |     # d_promp.add_viapoint(0.6, 2.25)
 41 |     # plt.scatter(0.7, 5, marker='*', s=100)
 42 | 
 43 |     for traj, traj_vel in zip(demos_list, Ddemos_list):
 44 |         plt.figure("ProMP-Pos")
 45 |         plt.plot(traj, 'k', alpha=0.2)
 46 |         plt.figure("ProMP-Vel")
 47 |         plt.plot(traj_vel, 'k', alpha=0.2)
 48 | 
 49 |     for _ in  range(1):
 50 | 
 51 |         pos_1, vel_1, acc_1 = d_promp.generate_trajectory(phase_speed=0.8,  randomness=1e-1)
 52 |         pos_2, vel_2, acc_2 = d_promp.generate_trajectory(phase_speed=1.,   randomness=1e-1)
 53 |         pos_3, vel_3, acc_3 = d_promp.generate_trajectory(phase_speed=1.33, randomness=1e-1)
 54 | 
 55 |         plt.figure("ProMP-Pos")
 56 |         plt.plot(pos_1, 'r')
 57 |         plt.plot(pos_2, 'g')
 58 |         plt.plot(pos_3, 'b')
 59 | 
 60 | 
 61 |         plt.figure("ProMP-Vel")
 62 |         plt.plot(vel_1, 'r')
 63 |         plt.plot(vel_2, 'g')
 64 |         plt.plot(vel_3, 'b')
 65 | 
 66 | 
 67 | def create_demo_traj():
 68 |     """
 69 |     This funciton shows how to compute 
 70 |     closed form control distribution from the trajectory distribution
 71 |     """
 72 | 
 73 |     #this is just used for demo purposes
 74 |     lsd = LinearSysDyn()
 75 | 
 76 |     state  = data['steps']['states'][0][0]
 77 |     action = data['steps']['actions'][0][0]
 78 | 
 79 |     promp_ctl = PROMPCtrl(promp_obj=d_promp)
 80 |     promp_ctl.update_system_matrices(A=lsd._A, B=lsd._B)
 81 | 
 82 |     ctrl_cmds_mean, ctrl_cmds_sigma = promp_ctl.compute_ctrl_traj(state_list=state)
 83 | 
 84 |     plt.figure("Ctrl cmds")
 85 | 
 86 |     for k in range(lsd._action_dim):
 87 |         
 88 |         mean        = ctrl_cmds_mean[:, k]
 89 |         lower_bound = mean - 3.*ctrl_cmds_sigma[:, k, k]
 90 |         upper_bound = mean + 3*ctrl_cmds_sigma[:, k, k]
 91 | 
 92 |         plot_mean_and_sigma(mean=mean, lower_bound=lower_bound, upper_bound=upper_bound, color_mean='g', color_shading='g')
 93 | 
 94 |     plt.plot(action, 'r')
 95 | 
 96 | 
 97 | def main():
 98 | 
 99 |     demo_generate_traj()
100 |     create_demo_traj()
101 |     plt.show()
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     main()


--------------------------------------------------------------------------------
/promp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjm522/promps_python/60259f9ed5201b61894859bfe268849b564c274a/promp/__init__.py


--------------------------------------------------------------------------------
/promp/discrete_promp.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import numpy.matlib as npm
  3 | from scipy.interpolate import interp1d
  4 | 
  5 | class Phase():
  6 |     """
  7 |     Utility funciton to generate phase of a
  8 |     trajectory
  9 |     """
 10 | 
 11 |     def __init__(self, dt=0.01, phase_speed=1., time_steps=200):
 12 | 
 13 |         self._dt =  dt
 14 |         self._phase_start = -dt
 15 |         self._time_steps = time_steps
 16 |         self._phase_end = self._phase_start + self._time_steps*dt
 17 | 
 18 |         #derivative of phase variable
 19 |         Dz  = np.ones(time_steps)*phase_speed
 20 | 
 21 |         z   = np.cumsum(Dz)*dt
 22 |         
 23 |         #phase variable
 24 |         # z   = np.arange(self._phase_start, self._phase_end, dt)
 25 | 
 26 |         # Dz  = np.diff(z)/dt
 27 |         # Dz  = np.hstack([Dz, Dz[-1]])
 28 |         #second derivative of phase variable
 29 |         DDz = np.diff(Dz)/dt
 30 |         DDz = np.hstack([DDz, DDz[-1]])
 31 | 
 32 |         self._z   = z
 33 |         self._Dz  = Dz
 34 |         self._DDz = DDz
 35 | 
 36 |     def get_phase_from_time(self, time_steps):
 37 |         return time_steps/self._phase_end
 38 | 
 39 |     def __call__(self):
 40 |         return self
 41 | 
 42 | class DiscretePROMP(object):
 43 |     """
 44 |     Discrete PROMP
 45 |     """
 46 | 
 47 |     def __init__(self, data, num_bfs=35, bfs_sigma=0.0286, num_centers_outside_range=2.):
 48 | 
 49 |         """
 50 |         Constructor of the class
 51 |         Args: 
 52 |         data: a list of demonstrated trajectories
 53 |         num_bfs = number of basis functions
 54 |         bfs_sigma = width of the basis function
 55 |         """
 56 | 
 57 |         #list to store all demo trajectories
 58 |         self._demo_trajs = data
 59 |         
 60 |         #number of demos available
 61 |         self._num_demos = len(self._demo_trajs)
 62 |         #lenght of each demonstrations
 63 | 
 64 |         self._num_centers_out_range = num_centers_outside_range
 65 |         
 66 |         self._traj_len  = len(self._demo_trajs[0])
 67 |         #time step
 68 |         self._dt  = 0.005
 69 |         
 70 |         #list to store all demo traj velocities
 71 |         self._Ddemo_trajs = self.compute_velocities()
 72 | 
 73 |         #number of basis function
 74 |         self._n_bfs = num_bfs
 75 |         #variance of the basis function
 76 |         self._bfs_sigma = bfs_sigma
 77 |         #centers of the basis function
 78 |         self._bfs_centres = np.linspace(0, 1, self._n_bfs)
 79 | 
 80 |         #list that stores all the weights
 81 |         self._W = []
 82 | 
 83 |         #phase variable
 84 |         self._phase = self.compute_phase(dt=self._dt, phase_speed=1.)
 85 | 
 86 |         #mean and sigma of the weights
 87 |         self._mean_W  = None
 88 |         self._sigma_W = None
 89 | 
 90 |         #compute the basis functions
 91 |         self._Phi, self._PhiD, self._PhiDD  = self.generate_basis_function(phase_z=self._phase._z, phase_zd=self._phase._Dz, phase_zdd=self._phase._DDz)
 92 | 
 93 |         #via points
 94 |         self._viapoints = []
 95 | 
 96 | 
 97 |     def generate_basis_function(self, phase_z, phase_zd, phase_zdd):
 98 | 
 99 |         # basis functions
100 |         phase_minus_centre = np.array(map(lambda x: x - self._bfs_centres, np.tile(phase_z, (self._n_bfs, 1)).T)).T
101 | 
102 |         #basis function
103 |         Phi   = np.exp(-0.5 *np.power(phase_minus_centre/self._bfs_sigma, 2)) / (np.sqrt(2.*np.pi)*self._bfs_sigma)
104 |         
105 |         #first derivative of basis function
106 |         PhiD  = np.multiply(Phi,  -phase_minus_centre/(self._bfs_sigma ** 2))
107 | 
108 |         #second derivative of basis function
109 |         PhiDD = Phi/(-self._bfs_sigma ** 2) + np.multiply(-phase_minus_centre/(self._bfs_sigma ** 2), PhiD)
110 | 
111 |         #for normalization purposes
112 |         sum_bfs    = np.sum(Phi,   axis=0)
113 |         sum_bfsD   = np.sum(PhiD,  axis=0)
114 |         sum_bfsDD  = np.sum(PhiDD, axis=0)
115 | 
116 |         # normalize
117 |         PhiD_normalized = ( (PhiD * sum_bfs - Phi * sum_bfsD) * 1./np.power(sum_bfs, 2) ) 
118 |         
119 |         Phi_normalized  = Phi/sum_bfs[None, :]
120 |  
121 |         tmp1 = Phi * (2 * np.power(sum_bfsD, 2) - np.multiply(sum_bfs, sum_bfsDD))
122 | 
123 |         tmp2 = tmp1 + PhiDD * np.power(sum_bfs, 2) - 2 * PhiD * sum_bfs * sum_bfsD
124 | 
125 |         #normalize acceleration
126 |         PhiDD_normalized = tmp2 * (1./(np.power(sum_bfs, 3)))
127 | 
128 |         #adding phase dependency
129 |         PhiDD_normalized = PhiDD_normalized * np.power(phase_zd, 2)  + PhiD_normalized * phase_zdd
130 |         PhiD_normalized  = PhiD_normalized * phase_zd
131 | 
132 |         return Phi_normalized, PhiD_normalized, PhiDD_normalized
133 | 
134 | 
135 |     def compute_velocities(self):
136 |         """
137 |         function to add new demo to the list
138 |         param: traj : a uni dimentional numpy array
139 |         """
140 |         Ddemo_trajs = []
141 | 
142 |         for demo_traj in self._demo_trajs:
143 |             d_traj = np.diff(demo_traj, axis=0)/self._dt
144 |             #append last element to adjust the length
145 |             d_traj = np.hstack([d_traj, d_traj[-1]])
146 |             #add it to the list
147 |             Ddemo_trajs.append(d_traj)
148 | 
149 | 
150 |     def compute_phase(self, dt, phase_speed):
151 |         """
152 |         This function is for adding the temporal scalability for 
153 |         the basis function
154 |         """
155 |         num_time_steps = int(self._traj_len / phase_speed)
156 | 
157 |         phase = Phase(dt=self._dt, phase_speed=phase_speed, time_steps=num_time_steps)
158 | 
159 |         return phase
160 | 
161 | 
162 |     def train(self):
163 |         """
164 |         This function finds the weights of the 
165 |         given demonstration by first interpolating them
166 |         to 0-1 range and then finding the kernel weights
167 |         corresponding to each trajectory
168 | 
169 |         import matplotlib.pyplot as plt
170 |         for k in range(35):
171 |             plt.plot(self._Phi[k, :])
172 |         plt.show()
173 |         """
174 |         
175 |         for demo_traj in self._demo_trajs:
176 | 
177 |             interpolate = interp1d(self._phase._z, demo_traj, kind='cubic')
178 | 
179 |             #strech the trajectory to fit 0 to 1
180 |             stretched_demo = interpolate(self._phase._z)[None,:]
181 | 
182 |             #compute the weights of the trajectory using the basis function
183 |             w_demo_traj = np.dot(np.linalg.inv(np.dot(self._Phi, self._Phi.T) + 1e-12*np.eye(self._n_bfs) ), np.dot(self._Phi, stretched_demo.T)).T  # weights for each trajectory
184 |             
185 |             #append the weights to the list
186 |             self._W.append(w_demo_traj.copy())
187 | 
188 |         self._W =  np.asarray(self._W).squeeze()
189 |         
190 |         # mean of weights
191 |         self._mean_W = np.mean(self._W, axis=0)
192 |         
193 |         # covariance of weights
194 |         # w1 = np.array(map(lambda x: x - self._mean_W.T, self._W))
195 |         # self._sigma_W = np.dot(w1.T, w1)/self._W.shape[0]
196 | 
197 |         self._sigma_W = np.cov(self._W.T)
198 | 
199 | 
200 |     def clear_viapoints(self):
201 |         """
202 |         delete the already stored via points
203 |         """
204 |         del self._viapoints[:]
205 | 
206 |     def add_viapoint(self, t, traj_point, traj_point_sigma=1e-6):
207 |         """
208 |         Add a viapoint to the trajectory
209 |         Observations and corresponding basis activations
210 |         :param t: timestamp of viapoint
211 |         :param traj_point: observed value at time t
212 |         :param sigmay: observation variance (constraint strength)
213 |         :return:
214 |         """
215 |         self._viapoints.append({"t": t, "traj_point": traj_point, "traj_point_sigma": traj_point_sigma})
216 | 
217 |     def set_goal(self, traj_point, traj_point_sigma=1e-6):
218 |         """
219 |         this function is used to set the goal point of the 
220 |         discrete promp. The last value at time step 1
221 |         """
222 |         self.add_viapoint(1., traj_point, traj_point_sigma)
223 | 
224 |     def set_start(self, traj_point, traj_point_sigma=1e-6):
225 |         """
226 |         this function is used to set the start point of the 
227 |         discrete promp. The last value at time step 0
228 |         """
229 |         self.add_viapoint(0., traj_point, traj_point_sigma)
230 | 
231 | 
232 |     def get_mean(self, t_index):
233 |         """
234 |         function to compute mean of a point at a 
235 |         particular time instant
236 |         """
237 |         mean = np.dot(self._Phi.T, self._mean_W)
238 |         return mean[t_index]
239 | 
240 |     def get_basis(self, t_index):
241 |         """
242 |         returns the basis at a particular instant
243 |         """
244 |         return self._Phi[:, t_index], self._PhiD[:, t_index]
245 | 
246 | 
247 |     def get_traj_cov(self):
248 |         """
249 |         return the covariance of a trajectory
250 |         """
251 |         return np.dot(self._Phi.T, np.dot(self._sigma_W, self._Phi))
252 | 
253 | 
254 |     def get_std(self):
255 |         """
256 |         standard deviation of a trajectory
257 |         """
258 |         std = 2 * np.sqrt(np.diag(np.dot(self._Phi.T, np.dot(self._sigma_W, self._Phi))))
259 |         return std
260 | 
261 |     def get_bounds(self, t_index):
262 |         """
263 |         compute bounds of a value at a specific time index
264 |         """
265 |         mean = self.get_mean(t_index)
266 |         std  = self.get_std()
267 |         return mean - std, mean + std
268 | 
269 | 
270 |     def generate_trajectory(self, phase_speed=1., randomness=1e-4):
271 |         """
272 |         Outputs a trajectory
273 |         :param randomness: float between 0. (output will be the mean of gaussians) and 1. (fully randomized inside the variance)
274 |         :return: a 1-D vector of the generated points
275 |         """
276 |         new_mean_W   = self._mean_W
277 |         new_sigma_W  = self._sigma_W
278 | 
279 |         phase = self.compute_phase(dt=self._dt, phase_speed=phase_speed)
280 | 
281 |         #create new basis functions
282 |         Phi, PhiD, PhiDD  = self.generate_basis_function(phase_z=phase._z, phase_zd=phase._Dz, phase_zdd=phase._DDz)
283 | 
284 |         #loop for all viapoints, to transform 
285 |         #the mean and covariance
286 |         for viapoint in self._viapoints:
287 |             # basis functions at observed time points
288 | 
289 |             PhiT, _, _ = self.generate_basis_function(phase_z=phase.get_phase_from_time(viapoint['t']), phase_zd=phase_speed, phase_zdd=0.)
290 | 
291 |             # Conditioning
292 |             aux = viapoint['traj_point_sigma'] + np.dot(np.dot(PhiT.T, new_sigma_W), PhiT)
293 |             new_mean_W  = new_mean_W  + np.dot(np.dot(new_sigma_W, PhiT) * 1 / aux, (viapoint['traj_point'] - np.dot(PhiT.T, new_mean_W)))  # new weight mean conditioned on observations
294 |             new_sigma_W = new_sigma_W - np.dot(np.dot(new_sigma_W, PhiT) * 1 / aux, np.dot(PhiT.T, new_sigma_W))
295 | 
296 |         #get a weight sample from the weight distribution
297 |         sample_W = np.random.multivariate_normal(new_mean_W, randomness*new_sigma_W, 1).T
298 | 
299 |         return np.dot(Phi.T, sample_W), np.dot(PhiD.T, sample_W), np.dot(PhiDD.T, sample_W)
300 | 
301 | 


--------------------------------------------------------------------------------
/promp/linear_sys_dyn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class LinearSysDyn():
 5 |     """
 6 |     A simple linear system dynamics
 7 |     a system of mass 1kg and 1D state.
 8 |     This class is a general linear system 
 9 |     if a different system matrix (A) and different control matrix (B) is passed
10 |     """
11 | 
12 |     def __init__(self, A=None, B=None, c=None, dt=0.005, mass=1.):
13 |         """
14 |         Constructor of the class
15 |         Args:
16 |         A = System matrix = > shape: [state_dim x state_dim]
17 |         B = Control matrix = > shape: [state_dim x action_dim]
18 |         c = Sysem drift matrix => shape: [state_dim x action_dim]
19 |         dt = time step
20 |         mass = mass of the body
21 |         """
22 | 
23 |         #system matrix
24 |         if A is None:
25 |             self._A = np.array([ [0.,1.], [0., 0.] ])
26 |         else:
27 |             self._A = A
28 | 
29 |         #control matrix
30 |         if B is None:
31 |             self._B = np.array([ [0.], [1.] ])
32 |         else:
33 |             self._B = B
34 | 
35 |         #drift vector
36 |         if c is None:
37 |             self._c = np.array([ [0.], [0.] ])
38 |         else:
39 |             self._c = c
40 |         
41 |         self._dt = dt
42 | 
43 |         self._mass = mass
44 | 
45 |         self._state_dim, self._action_dim = self._B.shape #position + velocity
46 | 
47 | 
48 |     def compute_next_states(self, state_list, action_list):
49 |         """
50 |         A simple one step integration 
51 |         for computing next state, the operations are done for
52 |         the entire statelist and action list
53 |         Args: 
54 |         state_list = array of states shape => [time_steps, state_dim]
55 |         action_list = array of states shape => [time_steps, action_dim]
56 |         """
57 | 
58 |         #assuming action are forces
59 |         acc = (action_list*1./self._mass).squeeze()
60 | 
61 |         d_state_list = np.zeros_like(state_list)
62 | 
63 |         #assign the velocity
64 |         d_state_list[:, 0:self._state_dim/2] = state_list[:, 0:self._state_dim/2]
65 |         #assign the acceleration
66 |         d_state_list[:, self._state_dim/2:] = acc
67 | 
68 |         #euler integration position
69 |         state_new_list = state_list + self._dt * d_state_list
70 | 
71 |         #euler integration velocity
72 |         state_new_list[:, self._state_dim/2:] += 0.5*acc*self._dt**2
73 | 
74 |         return state_new_list
75 | 
76 | 


--------------------------------------------------------------------------------
/promp/promp_ctrl.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class PROMPCtrl(object):
  5 |     """
  6 |     This class computes the feedback and feedforward gains of the a PROMP in
  7 |     closed loop. For more reference, see: https://link.springer.com/article/10.1007/s10514-017-9648-7
  8 |     """
  9 | 
 10 |     def __init__(self, promp_obj, dt=0.005):
 11 | 
 12 |         """
 13 |         Constructor of the class:
 14 |         Args:
 15 |         A = linearised system dynamics matrix
 16 |         B = linearised system control matrix
 17 |         D_mu = 
 18 |         D_cov= 
 19 |         promb_obj = Instatiation of the discrete promp class
 20 |         """
 21 | 
 22 |         #promp object
 23 |         self._promp = promp_obj
 24 | 
 25 |         #system matrix
 26 |         self._A = None
 27 | 
 28 |         #control matrix
 29 |         self._B = None
 30 | 
 31 |         #time step
 32 |         self._dt = dt
 33 | 
 34 |         self._Phi   = self._promp._Phi
 35 |         self._PhiD  = self._promp._PhiD
 36 |         self._PhiDD = self._promp._PhiDD
 37 | 
 38 |         self._sigma_W = self._promp._sigma_W
 39 |         self._mean_W  = self._promp._mean_W
 40 | 
 41 |         #time steps
 42 |         self._time_steps = self._Phi.shape[1]
 43 | 
 44 |        
 45 |     def update_system_matrices(self, A, B):
 46 |         """
 47 |         Update the system matrices 
 48 |         this is for the purpose of adding time varying 
 49 |         system matrices
 50 |         """
 51 |         self._A = A
 52 |         self._B = B
 53 | 
 54 | 
 55 |     def get_basis(self, t):
 56 |         """
 57 |         This function creates a basis and Dbasis
 58 |         basis  = [Phi; PhiD]
 59 |         Dbasis = [PhiD; PhiDD]
 60 |         """
 61 |         return np.vstack([self._Phi[:, t], self._PhiD[:, t]]), np.vstack([self._PhiD[:, t], self._PhiDD[:, t]])
 62 | 
 63 | 
 64 | 
 65 |     def compute_gains(self, t, add_noise=True):
 66 |         """
 67 |         the control command is assumed to be of type
 68 |         u = Kx + k + eps
 69 |         """
 70 |         #get the basis funtion at a time step
 71 |         basis, Dbasis = self.get_basis(t)
 72 | 
 73 |         if t < self._time_steps-1:
 74 |             basis_t_dt, _ = self.get_basis(t+1)
 75 |         else:
 76 |             basis_t_dt = np.zeros_like(basis)
 77 | 
 78 | 
 79 |         #part 1 equation 46
 80 |         B_pseudo = np.linalg.pinv(self._B)
 81 | 
 82 |         #equation 12 for t
 83 |         Sigma_t =  np.dot(np.dot(basis, self._sigma_W), basis.T)
 84 | 
 85 |         #equation 12 for t+dt
 86 |         Sigma_t_dt = np.dot(np.dot(basis_t_dt, self._sigma_W), basis_t_dt.T)
 87 | 
 88 |         #Cross correlation between t, t+dt, Equation 49
 89 |         Ct = np.dot(np.dot(basis, self._sigma_W), basis_t_dt.T)
 90 | 
 91 |         #System noise Equation 51
 92 |         Sigma_s = (1./self._dt)* ( Sigma_t_dt - np.dot( np.dot( Ct.T, np.linalg.inv(Sigma_t) ), Ct) )
 93 | 
 94 |         #control noise Equation 52
 95 |         Sigma_u = np.dot(np.dot(B_pseudo, Sigma_s), B_pseudo.T)
 96 | 
 97 |         #part 2 equation 46
 98 |         tmp1 = np.dot(np.dot(Dbasis, self._sigma_W), basis.T)
 99 | 
100 |         #part 3 equation 46
101 |         tmp2 = np.dot(self._A, Sigma_t) + 0.5*Sigma_s
102 | 
103 |         #compute feedback gain; complete equation 46
104 |         K = np.dot( np.dot(B_pseudo, (tmp1-tmp2) ), np.linalg.inv(Sigma_t))
105 | 
106 |         #part 1 equation 48
107 |         tmp3 = np.dot(Dbasis, self._mean_W)
108 | 
109 |         #part 2 equation 48
110 |         tmp4 = np.dot( (self._A + np.dot(self._B, K)), np.dot(basis, self._mean_W) )
111 | 
112 |         #compute feedforward gain; complete equation 48
113 |         k = np.dot(B_pseudo, (tmp3-tmp4))
114 | 
115 |         return K, k, Sigma_u
116 | 
117 | 
118 |     def compute_gain_traj(self):
119 |         """
120 |         This function is to compute the entire gain trajectory
121 |         of a given state distribution
122 |         """
123 |         time_steps = self._Phi.shape[1]
124 |         state_dim, action_dim = self._B.shape
125 | 
126 |         K_traj = np.zeros([time_steps, state_dim, state_dim])
127 |         k_traj = np.zeros([time_steps, action_dim])
128 |         Sigma_u_traj = np.zeros([time_steps, action_dim, action_dim])
129 | 
130 |         for t in range(time_steps):
131 | 
132 |             K_traj[t, :, :], k_traj[t, :], Sigma_u_traj[t, :, :] = self.compute_gains(t)
133 | 
134 |         return K_traj, k_traj, Sigma_u_traj
135 | 
136 | 
137 |     def compute_control_cmd(self, t, state, sample=False):
138 |         """
139 |         This function is compute the specific control
140 |         command at a time step t 
141 |         Args: 
142 |         t : time step
143 |         state : state for which control command needs to be computed
144 |         """
145 | 
146 |         K, k, Sigma_u = self.compute_gains(t)
147 | 
148 |         mean_u = np.dot(K, state) + k
149 | 
150 |         if sample:
151 |             #get a weight sample from the weight distribution
152 |             return np.random.multivariate_normal(mean_u, Sigma_u, 1).T,  Sigma_u
153 | 
154 |         else:
155 | 
156 |             return mean_u, Sigma_u
157 | 
158 | 
159 |     def compute_ctrl_traj(self, state_list):
160 |         """
161 |         This function computes an entire
162 |         control sequence for a given state list
163 |         Args:
164 |         state_list for which control has to be computed
165 |         this assumes that len(state_list) = timesteps in the basis function
166 |         """
167 | 
168 |         time_steps = self._Phi.shape[1]
169 |         _, action_dim = self._B.shape
170 | 
171 |         ctrl_cmds_mean  =  np.zeros([time_steps, action_dim])
172 |         ctrl_cmds_sigma =  np.zeros([time_steps, action_dim, action_dim])
173 | 
174 |         for t in range(time_steps):
175 | 
176 |             ctrl_cmds_mean[t, :], ctrl_cmds_sigma[t, :, :] = self.compute_control_cmd(t, state_list[t,:])
177 | 
178 |         return ctrl_cmds_mean, ctrl_cmds_sigma
179 | 
180 | 
181 | 
182 | 


--------------------------------------------------------------------------------
/set_env_bash.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 4 | 
 5 | export PYTHONPATH=$ROOT_DIR:$PYTHONPATH
 6 | 
 7 | cd $ROOT_DIR
 8 | 
 9 | export ROOT_DIR=$ROOT_DIR
10 | 
11 | echo "PYTHON PATH IS: $PYTHONPATH"


--------------------------------------------------------------------------------