├── .gitignore ├── README.md ├── data ├── data.pkl ├── data_demo_promp.pkl ├── im_data_1d_opt.mat └── load_mat_file.py ├── demos ├── demo_promp.py └── demo_with_matlab_data.py ├── promp ├── __init__.py ├── discrete_promp.py ├── linear_sys_dyn.py └── promp_ctrl.py └── set_env_bash.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PROMP library written in python 2 | 3 | * Implementation of Probabilistic Movement Primitives in the [paper](https://link.springer.com/article/10.1007/s10514-017-9648-7) 4 | 5 | * Few of the utility functions are used from following [implementation](https://github.com/baxter-flowers/promplib). 6 | 7 | * Includes the closed form implementation of trajectory controller implementation as well. 8 | 9 | * Since one PROMP can only model one 1D, several of these has to be called to create a multi-dimensional PROMP 10 | 11 | 12 | ### Instructions 13 | 14 | * Clone the repo 15 | 16 | * From the cloned repo folder; run set_env_bash.sh 17 | 18 | * Run the file demo_with_matlab_data.py from the demos folder (this uses a demonstration given in the data folder) 19 | -------------------------------------------------------------------------------- /data/im_data_1d_opt.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjm522/promps_python/60259f9ed5201b61894859bfe268849b564c274a/data/im_data_1d_opt.mat -------------------------------------------------------------------------------- /data/load_mat_file.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | from scipy.io import loadmat 4 | 5 | data = loadmat(os.environ['ROOT_DIR'] + '/data/im_data_1d_opt.mat')['imData'] 6 | 7 | 8 | steps = {'numElements': data[0,0][0][0][0]['steps']['numElements'], 9 | 'actions': data[0,0][0][0][0]['steps']['actions'], 10 | 'nextStates': data[0,0][0][0][0]['steps']['nextStates'], 11 | 'rewards': data[0,0][0][0][0]['steps']['rewards'], 12 | 'states': data[0,0][0][0][0]['steps']['states'], 13 | 'timeSteps': data[0,0][0][0][0]['steps']['timeSteps'],} 14 | 15 | data_py = {'numElements': data[0,0][0][0][0]['numElements'], 16 | 'iterationNumber': data[0,0][0][0][0]['iterationNumber'], 17 | 'finalRewards': data[0,0][0][0][0]['finalRewards'], 18 | 'dt':data[0,0][0][0][0]['dt'], 19 | 'noise_std':data[0,0][0][0][0]['noise_std'], 20 | 'init_m':data[0,0][0][0][0]['init_m'], 21 | 'init_std':data[0,0][0][0][0]['init_std'], 22 | 'maxAction':data[0,0][0][0][0]['maxAction'], 23 | 'steps':steps, } 24 | 25 | 26 | pickle_out = open(os.environ['ROOT_DIR'] + '/data/data.pkl', "wb") 27 | pickle.dump(data_py, pickle_out) 28 | pickle_out.close() -------------------------------------------------------------------------------- /demos/demo_promp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | # from aml_lfd.promp.discrete_promp import DiscretePROMP 4 | 5 | from aml_lfd.promp.promp_trial import DiscretePROMP 6 | from aml_lfd.promp.promp_ctrl import PROMPCtrl 7 | 8 | 9 | np.random.seed(0) 10 | 11 | def demo_generate_traj(): 12 | 13 | plt.figure("DiscretePROMP") 14 | 15 | # Generate and plot trajectory Data 16 | x = np.arange(0,1.01,0.01) # time points for trajectories 17 | nrTraj=30 # number of trajectoreis for training 18 | sigmaNoise=0.02 # noise on training trajectories 19 | A = np.array([.2, .2, .01, -.05]) 20 | X = np.vstack( (np.sin(5*x), x**2, x, np.ones((1,len(x))) )) 21 | Y = np.zeros( (nrTraj,len(x)) ) 22 | 23 | demos_list = [] 24 | for traj in range(0, nrTraj): 25 | sample = np.dot(A + sigmaNoise * np.random.randn(1,4), X)[0] 26 | plt.plot(sample, 'k') 27 | demos_list.append(sample) 28 | 29 | 30 | #create a promb object by passing the data 31 | d_promp = DiscretePROMP(data=demos_list) 32 | d_promp.train() 33 | 34 | #add a via point 35 | # d_promp.add_viapoint(0.7, 5) 36 | # plt.scatter(0.7, 5, marker='*', s=100) 37 | 38 | #set the start and goal, the spatial scaling 39 | d_promp.set_start(0.2) 40 | d_promp.set_goal(-0.1) 41 | 42 | for _ in range(100): 43 | 44 | plt.plot(d_promp.generate_trajectory(phase_speed=0.5, randomness=1e-3), 'r') 45 | plt.plot(d_promp.generate_trajectory(phase_speed=1., randomness=1e-3), 'g') 46 | plt.plot(d_promp.generate_trajectory(phase_speed=2., randomness=1e-3), 'b') 47 | 48 | plt.legend() 49 | plt.show() 50 | 51 | 52 | 53 | def demo_ctrl_traj(): 54 | 55 | A = np.array([.2, .2, .01, -.05]) 56 | B = np.random.randn(4,2) 57 | 58 | x0 = np.zeros(4) 59 | 60 | demos_list = [] 61 | T = 100 62 | n_demos = 30 63 | sigma_noise = 0.01 64 | 65 | u = np.random.randn(2, T) 66 | 67 | for _ in range(n_demos): 68 | traj = np.zeros([4,T]) 69 | 70 | for k in range(T-1): 71 | 72 | traj[:, k+1] = np.dot( (A + sigma_noise * np.random.randn(4)), traj[:, k]) + np.dot(B, u[:, k]) 73 | 74 | demos_list.append(traj[0, :].copy()) 75 | 76 | 77 | #create a promb object by passing the data 78 | d_promp = DiscretePROMP(data=demos_list) 79 | d_promp.train() 80 | 81 | c_promp = PROMPCtrl(promp_obj=d_promp, A=A, B=B) 82 | 83 | u_computed = np.zeros(T) 84 | 85 | for t in range(T): 86 | u_computed[t] = c_promp.compute_gains(t, add_noise=False) 87 | 88 | 89 | print "Initial u \n", u 90 | print "Computed u \n", u_computed 91 | 92 | 93 | 94 | def main(): 95 | 96 | demo_ctrl_traj() 97 | # demo_generate_traj() 98 | 99 | 100 | if __name__ == '__main__': 101 | main() 102 | 103 | 104 | -------------------------------------------------------------------------------- /demos/demo_with_matlab_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from promp.discrete_promp import DiscretePROMP 6 | from promp.linear_sys_dyn import LinearSysDyn 7 | from promp.promp_ctrl import PROMPCtrl 8 | 9 | 10 | pickle_in = open(os.environ['ROOT_DIR'] + '/data/data.pkl',"rb") 11 | data = pickle.load(pickle_in) 12 | 13 | 14 | demos_list = [data['steps']['states'][k][0][:,0] for k in range(100)] 15 | Ddemos_list = [data['steps']['states'][k][0][:,1] for k in range(100)] 16 | 17 | #create a promb object by passing the data 18 | d_promp = DiscretePROMP(data=demos_list) 19 | d_promp.train() 20 | 21 | def plot_mean_and_sigma(mean, lower_bound, upper_bound, color_mean=None, color_shading=None): 22 | # plot the shaded range of the confidence intervals 23 | plt.fill_between(range(mean.shape[0]), lower_bound, upper_bound, color=color_shading, alpha=.5) 24 | # plot the mean on top 25 | plt.plot(mean, color_mean) 26 | 27 | 28 | def demo_generate_traj(): 29 | 30 | #add a via point 31 | # d_promp.add_viapoint(0.7, 5) 32 | # plt.scatter(0.7, 5, marker='*', s=100) 33 | 34 | #set the start and goal, the spatial scaling 35 | d_promp.set_start(demos_list[0][0]) 36 | d_promp.set_goal(demos_list[0][-1]) 37 | 38 | #add a via point 39 | # d_promp.add_viapoint(0.3, 2.25) 40 | # d_promp.add_viapoint(0.6, 2.25) 41 | # plt.scatter(0.7, 5, marker='*', s=100) 42 | 43 | for traj, traj_vel in zip(demos_list, Ddemos_list): 44 | plt.figure("ProMP-Pos") 45 | plt.plot(traj, 'k', alpha=0.2) 46 | plt.figure("ProMP-Vel") 47 | plt.plot(traj_vel, 'k', alpha=0.2) 48 | 49 | for _ in range(1): 50 | 51 | pos_1, vel_1, acc_1 = d_promp.generate_trajectory(phase_speed=0.8, randomness=1e-1) 52 | pos_2, vel_2, acc_2 = d_promp.generate_trajectory(phase_speed=1., randomness=1e-1) 53 | pos_3, vel_3, acc_3 = d_promp.generate_trajectory(phase_speed=1.33, randomness=1e-1) 54 | 55 | plt.figure("ProMP-Pos") 56 | plt.plot(pos_1, 'r') 57 | plt.plot(pos_2, 'g') 58 | plt.plot(pos_3, 'b') 59 | 60 | 61 | plt.figure("ProMP-Vel") 62 | plt.plot(vel_1, 'r') 63 | plt.plot(vel_2, 'g') 64 | plt.plot(vel_3, 'b') 65 | 66 | 67 | def create_demo_traj(): 68 | """ 69 | This funciton shows how to compute 70 | closed form control distribution from the trajectory distribution 71 | """ 72 | 73 | #this is just used for demo purposes 74 | lsd = LinearSysDyn() 75 | 76 | state = data['steps']['states'][0][0] 77 | action = data['steps']['actions'][0][0] 78 | 79 | promp_ctl = PROMPCtrl(promp_obj=d_promp) 80 | promp_ctl.update_system_matrices(A=lsd._A, B=lsd._B) 81 | 82 | ctrl_cmds_mean, ctrl_cmds_sigma = promp_ctl.compute_ctrl_traj(state_list=state) 83 | 84 | plt.figure("Ctrl cmds") 85 | 86 | for k in range(lsd._action_dim): 87 | 88 | mean = ctrl_cmds_mean[:, k] 89 | lower_bound = mean - 3.*ctrl_cmds_sigma[:, k, k] 90 | upper_bound = mean + 3*ctrl_cmds_sigma[:, k, k] 91 | 92 | plot_mean_and_sigma(mean=mean, lower_bound=lower_bound, upper_bound=upper_bound, color_mean='g', color_shading='g') 93 | 94 | plt.plot(action, 'r') 95 | 96 | 97 | def main(): 98 | 99 | demo_generate_traj() 100 | create_demo_traj() 101 | plt.show() 102 | 103 | 104 | if __name__ == '__main__': 105 | main() -------------------------------------------------------------------------------- /promp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjm522/promps_python/60259f9ed5201b61894859bfe268849b564c274a/promp/__init__.py -------------------------------------------------------------------------------- /promp/discrete_promp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.matlib as npm 3 | from scipy.interpolate import interp1d 4 | 5 | class Phase(): 6 | """ 7 | Utility funciton to generate phase of a 8 | trajectory 9 | """ 10 | 11 | def __init__(self, dt=0.01, phase_speed=1., time_steps=200): 12 | 13 | self._dt = dt 14 | self._phase_start = -dt 15 | self._time_steps = time_steps 16 | self._phase_end = self._phase_start + self._time_steps*dt 17 | 18 | #derivative of phase variable 19 | Dz = np.ones(time_steps)*phase_speed 20 | 21 | z = np.cumsum(Dz)*dt 22 | 23 | #phase variable 24 | # z = np.arange(self._phase_start, self._phase_end, dt) 25 | 26 | # Dz = np.diff(z)/dt 27 | # Dz = np.hstack([Dz, Dz[-1]]) 28 | #second derivative of phase variable 29 | DDz = np.diff(Dz)/dt 30 | DDz = np.hstack([DDz, DDz[-1]]) 31 | 32 | self._z = z 33 | self._Dz = Dz 34 | self._DDz = DDz 35 | 36 | def get_phase_from_time(self, time_steps): 37 | return time_steps/self._phase_end 38 | 39 | def __call__(self): 40 | return self 41 | 42 | class DiscretePROMP(object): 43 | """ 44 | Discrete PROMP 45 | """ 46 | 47 | def __init__(self, data, num_bfs=35, bfs_sigma=0.0286, num_centers_outside_range=2.): 48 | 49 | """ 50 | Constructor of the class 51 | Args: 52 | data: a list of demonstrated trajectories 53 | num_bfs = number of basis functions 54 | bfs_sigma = width of the basis function 55 | """ 56 | 57 | #list to store all demo trajectories 58 | self._demo_trajs = data 59 | 60 | #number of demos available 61 | self._num_demos = len(self._demo_trajs) 62 | #lenght of each demonstrations 63 | 64 | self._num_centers_out_range = num_centers_outside_range 65 | 66 | self._traj_len = len(self._demo_trajs[0]) 67 | #time step 68 | self._dt = 0.005 69 | 70 | #list to store all demo traj velocities 71 | self._Ddemo_trajs = self.compute_velocities() 72 | 73 | #number of basis function 74 | self._n_bfs = num_bfs 75 | #variance of the basis function 76 | self._bfs_sigma = bfs_sigma 77 | #centers of the basis function 78 | self._bfs_centres = np.linspace(0, 1, self._n_bfs) 79 | 80 | #list that stores all the weights 81 | self._W = [] 82 | 83 | #phase variable 84 | self._phase = self.compute_phase(dt=self._dt, phase_speed=1.) 85 | 86 | #mean and sigma of the weights 87 | self._mean_W = None 88 | self._sigma_W = None 89 | 90 | #compute the basis functions 91 | self._Phi, self._PhiD, self._PhiDD = self.generate_basis_function(phase_z=self._phase._z, phase_zd=self._phase._Dz, phase_zdd=self._phase._DDz) 92 | 93 | #via points 94 | self._viapoints = [] 95 | 96 | 97 | def generate_basis_function(self, phase_z, phase_zd, phase_zdd): 98 | 99 | # basis functions 100 | phase_minus_centre = np.array(map(lambda x: x - self._bfs_centres, np.tile(phase_z, (self._n_bfs, 1)).T)).T 101 | 102 | #basis function 103 | Phi = np.exp(-0.5 *np.power(phase_minus_centre/self._bfs_sigma, 2)) / (np.sqrt(2.*np.pi)*self._bfs_sigma) 104 | 105 | #first derivative of basis function 106 | PhiD = np.multiply(Phi, -phase_minus_centre/(self._bfs_sigma ** 2)) 107 | 108 | #second derivative of basis function 109 | PhiDD = Phi/(-self._bfs_sigma ** 2) + np.multiply(-phase_minus_centre/(self._bfs_sigma ** 2), PhiD) 110 | 111 | #for normalization purposes 112 | sum_bfs = np.sum(Phi, axis=0) 113 | sum_bfsD = np.sum(PhiD, axis=0) 114 | sum_bfsDD = np.sum(PhiDD, axis=0) 115 | 116 | # normalize 117 | PhiD_normalized = ( (PhiD * sum_bfs - Phi * sum_bfsD) * 1./np.power(sum_bfs, 2) ) 118 | 119 | Phi_normalized = Phi/sum_bfs[None, :] 120 | 121 | tmp1 = Phi * (2 * np.power(sum_bfsD, 2) - np.multiply(sum_bfs, sum_bfsDD)) 122 | 123 | tmp2 = tmp1 + PhiDD * np.power(sum_bfs, 2) - 2 * PhiD * sum_bfs * sum_bfsD 124 | 125 | #normalize acceleration 126 | PhiDD_normalized = tmp2 * (1./(np.power(sum_bfs, 3))) 127 | 128 | #adding phase dependency 129 | PhiDD_normalized = PhiDD_normalized * np.power(phase_zd, 2) + PhiD_normalized * phase_zdd 130 | PhiD_normalized = PhiD_normalized * phase_zd 131 | 132 | return Phi_normalized, PhiD_normalized, PhiDD_normalized 133 | 134 | 135 | def compute_velocities(self): 136 | """ 137 | function to add new demo to the list 138 | param: traj : a uni dimentional numpy array 139 | """ 140 | Ddemo_trajs = [] 141 | 142 | for demo_traj in self._demo_trajs: 143 | d_traj = np.diff(demo_traj, axis=0)/self._dt 144 | #append last element to adjust the length 145 | d_traj = np.hstack([d_traj, d_traj[-1]]) 146 | #add it to the list 147 | Ddemo_trajs.append(d_traj) 148 | 149 | 150 | def compute_phase(self, dt, phase_speed): 151 | """ 152 | This function is for adding the temporal scalability for 153 | the basis function 154 | """ 155 | num_time_steps = int(self._traj_len / phase_speed) 156 | 157 | phase = Phase(dt=self._dt, phase_speed=phase_speed, time_steps=num_time_steps) 158 | 159 | return phase 160 | 161 | 162 | def train(self): 163 | """ 164 | This function finds the weights of the 165 | given demonstration by first interpolating them 166 | to 0-1 range and then finding the kernel weights 167 | corresponding to each trajectory 168 | 169 | import matplotlib.pyplot as plt 170 | for k in range(35): 171 | plt.plot(self._Phi[k, :]) 172 | plt.show() 173 | """ 174 | 175 | for demo_traj in self._demo_trajs: 176 | 177 | interpolate = interp1d(self._phase._z, demo_traj, kind='cubic') 178 | 179 | #strech the trajectory to fit 0 to 1 180 | stretched_demo = interpolate(self._phase._z)[None,:] 181 | 182 | #compute the weights of the trajectory using the basis function 183 | w_demo_traj = np.dot(np.linalg.inv(np.dot(self._Phi, self._Phi.T) + 1e-12*np.eye(self._n_bfs) ), np.dot(self._Phi, stretched_demo.T)).T # weights for each trajectory 184 | 185 | #append the weights to the list 186 | self._W.append(w_demo_traj.copy()) 187 | 188 | self._W = np.asarray(self._W).squeeze() 189 | 190 | # mean of weights 191 | self._mean_W = np.mean(self._W, axis=0) 192 | 193 | # covariance of weights 194 | # w1 = np.array(map(lambda x: x - self._mean_W.T, self._W)) 195 | # self._sigma_W = np.dot(w1.T, w1)/self._W.shape[0] 196 | 197 | self._sigma_W = np.cov(self._W.T) 198 | 199 | 200 | def clear_viapoints(self): 201 | """ 202 | delete the already stored via points 203 | """ 204 | del self._viapoints[:] 205 | 206 | def add_viapoint(self, t, traj_point, traj_point_sigma=1e-6): 207 | """ 208 | Add a viapoint to the trajectory 209 | Observations and corresponding basis activations 210 | :param t: timestamp of viapoint 211 | :param traj_point: observed value at time t 212 | :param sigmay: observation variance (constraint strength) 213 | :return: 214 | """ 215 | self._viapoints.append({"t": t, "traj_point": traj_point, "traj_point_sigma": traj_point_sigma}) 216 | 217 | def set_goal(self, traj_point, traj_point_sigma=1e-6): 218 | """ 219 | this function is used to set the goal point of the 220 | discrete promp. The last value at time step 1 221 | """ 222 | self.add_viapoint(1., traj_point, traj_point_sigma) 223 | 224 | def set_start(self, traj_point, traj_point_sigma=1e-6): 225 | """ 226 | this function is used to set the start point of the 227 | discrete promp. The last value at time step 0 228 | """ 229 | self.add_viapoint(0., traj_point, traj_point_sigma) 230 | 231 | 232 | def get_mean(self, t_index): 233 | """ 234 | function to compute mean of a point at a 235 | particular time instant 236 | """ 237 | mean = np.dot(self._Phi.T, self._mean_W) 238 | return mean[t_index] 239 | 240 | def get_basis(self, t_index): 241 | """ 242 | returns the basis at a particular instant 243 | """ 244 | return self._Phi[:, t_index], self._PhiD[:, t_index] 245 | 246 | 247 | def get_traj_cov(self): 248 | """ 249 | return the covariance of a trajectory 250 | """ 251 | return np.dot(self._Phi.T, np.dot(self._sigma_W, self._Phi)) 252 | 253 | 254 | def get_std(self): 255 | """ 256 | standard deviation of a trajectory 257 | """ 258 | std = 2 * np.sqrt(np.diag(np.dot(self._Phi.T, np.dot(self._sigma_W, self._Phi)))) 259 | return std 260 | 261 | def get_bounds(self, t_index): 262 | """ 263 | compute bounds of a value at a specific time index 264 | """ 265 | mean = self.get_mean(t_index) 266 | std = self.get_std() 267 | return mean - std, mean + std 268 | 269 | 270 | def generate_trajectory(self, phase_speed=1., randomness=1e-4): 271 | """ 272 | Outputs a trajectory 273 | :param randomness: float between 0. (output will be the mean of gaussians) and 1. (fully randomized inside the variance) 274 | :return: a 1-D vector of the generated points 275 | """ 276 | new_mean_W = self._mean_W 277 | new_sigma_W = self._sigma_W 278 | 279 | phase = self.compute_phase(dt=self._dt, phase_speed=phase_speed) 280 | 281 | #create new basis functions 282 | Phi, PhiD, PhiDD = self.generate_basis_function(phase_z=phase._z, phase_zd=phase._Dz, phase_zdd=phase._DDz) 283 | 284 | #loop for all viapoints, to transform 285 | #the mean and covariance 286 | for viapoint in self._viapoints: 287 | # basis functions at observed time points 288 | 289 | PhiT, _, _ = self.generate_basis_function(phase_z=phase.get_phase_from_time(viapoint['t']), phase_zd=phase_speed, phase_zdd=0.) 290 | 291 | # Conditioning 292 | aux = viapoint['traj_point_sigma'] + np.dot(np.dot(PhiT.T, new_sigma_W), PhiT) 293 | new_mean_W = new_mean_W + np.dot(np.dot(new_sigma_W, PhiT) * 1 / aux, (viapoint['traj_point'] - np.dot(PhiT.T, new_mean_W))) # new weight mean conditioned on observations 294 | new_sigma_W = new_sigma_W - np.dot(np.dot(new_sigma_W, PhiT) * 1 / aux, np.dot(PhiT.T, new_sigma_W)) 295 | 296 | #get a weight sample from the weight distribution 297 | sample_W = np.random.multivariate_normal(new_mean_W, randomness*new_sigma_W, 1).T 298 | 299 | return np.dot(Phi.T, sample_W), np.dot(PhiD.T, sample_W), np.dot(PhiDD.T, sample_W) 300 | 301 | -------------------------------------------------------------------------------- /promp/linear_sys_dyn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class LinearSysDyn(): 5 | """ 6 | A simple linear system dynamics 7 | a system of mass 1kg and 1D state. 8 | This class is a general linear system 9 | if a different system matrix (A) and different control matrix (B) is passed 10 | """ 11 | 12 | def __init__(self, A=None, B=None, c=None, dt=0.005, mass=1.): 13 | """ 14 | Constructor of the class 15 | Args: 16 | A = System matrix = > shape: [state_dim x state_dim] 17 | B = Control matrix = > shape: [state_dim x action_dim] 18 | c = Sysem drift matrix => shape: [state_dim x action_dim] 19 | dt = time step 20 | mass = mass of the body 21 | """ 22 | 23 | #system matrix 24 | if A is None: 25 | self._A = np.array([ [0.,1.], [0., 0.] ]) 26 | else: 27 | self._A = A 28 | 29 | #control matrix 30 | if B is None: 31 | self._B = np.array([ [0.], [1.] ]) 32 | else: 33 | self._B = B 34 | 35 | #drift vector 36 | if c is None: 37 | self._c = np.array([ [0.], [0.] ]) 38 | else: 39 | self._c = c 40 | 41 | self._dt = dt 42 | 43 | self._mass = mass 44 | 45 | self._state_dim, self._action_dim = self._B.shape #position + velocity 46 | 47 | 48 | def compute_next_states(self, state_list, action_list): 49 | """ 50 | A simple one step integration 51 | for computing next state, the operations are done for 52 | the entire statelist and action list 53 | Args: 54 | state_list = array of states shape => [time_steps, state_dim] 55 | action_list = array of states shape => [time_steps, action_dim] 56 | """ 57 | 58 | #assuming action are forces 59 | acc = (action_list*1./self._mass).squeeze() 60 | 61 | d_state_list = np.zeros_like(state_list) 62 | 63 | #assign the velocity 64 | d_state_list[:, 0:self._state_dim/2] = state_list[:, 0:self._state_dim/2] 65 | #assign the acceleration 66 | d_state_list[:, self._state_dim/2:] = acc 67 | 68 | #euler integration position 69 | state_new_list = state_list + self._dt * d_state_list 70 | 71 | #euler integration velocity 72 | state_new_list[:, self._state_dim/2:] += 0.5*acc*self._dt**2 73 | 74 | return state_new_list 75 | 76 | -------------------------------------------------------------------------------- /promp/promp_ctrl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class PROMPCtrl(object): 5 | """ 6 | This class computes the feedback and feedforward gains of the a PROMP in 7 | closed loop. For more reference, see: https://link.springer.com/article/10.1007/s10514-017-9648-7 8 | """ 9 | 10 | def __init__(self, promp_obj, dt=0.005): 11 | 12 | """ 13 | Constructor of the class: 14 | Args: 15 | A = linearised system dynamics matrix 16 | B = linearised system control matrix 17 | D_mu = 18 | D_cov= 19 | promb_obj = Instatiation of the discrete promp class 20 | """ 21 | 22 | #promp object 23 | self._promp = promp_obj 24 | 25 | #system matrix 26 | self._A = None 27 | 28 | #control matrix 29 | self._B = None 30 | 31 | #time step 32 | self._dt = dt 33 | 34 | self._Phi = self._promp._Phi 35 | self._PhiD = self._promp._PhiD 36 | self._PhiDD = self._promp._PhiDD 37 | 38 | self._sigma_W = self._promp._sigma_W 39 | self._mean_W = self._promp._mean_W 40 | 41 | #time steps 42 | self._time_steps = self._Phi.shape[1] 43 | 44 | 45 | def update_system_matrices(self, A, B): 46 | """ 47 | Update the system matrices 48 | this is for the purpose of adding time varying 49 | system matrices 50 | """ 51 | self._A = A 52 | self._B = B 53 | 54 | 55 | def get_basis(self, t): 56 | """ 57 | This function creates a basis and Dbasis 58 | basis = [Phi; PhiD] 59 | Dbasis = [PhiD; PhiDD] 60 | """ 61 | return np.vstack([self._Phi[:, t], self._PhiD[:, t]]), np.vstack([self._PhiD[:, t], self._PhiDD[:, t]]) 62 | 63 | 64 | 65 | def compute_gains(self, t, add_noise=True): 66 | """ 67 | the control command is assumed to be of type 68 | u = Kx + k + eps 69 | """ 70 | #get the basis funtion at a time step 71 | basis, Dbasis = self.get_basis(t) 72 | 73 | if t < self._time_steps-1: 74 | basis_t_dt, _ = self.get_basis(t+1) 75 | else: 76 | basis_t_dt = np.zeros_like(basis) 77 | 78 | 79 | #part 1 equation 46 80 | B_pseudo = np.linalg.pinv(self._B) 81 | 82 | #equation 12 for t 83 | Sigma_t = np.dot(np.dot(basis, self._sigma_W), basis.T) 84 | 85 | #equation 12 for t+dt 86 | Sigma_t_dt = np.dot(np.dot(basis_t_dt, self._sigma_W), basis_t_dt.T) 87 | 88 | #Cross correlation between t, t+dt, Equation 49 89 | Ct = np.dot(np.dot(basis, self._sigma_W), basis_t_dt.T) 90 | 91 | #System noise Equation 51 92 | Sigma_s = (1./self._dt)* ( Sigma_t_dt - np.dot( np.dot( Ct.T, np.linalg.inv(Sigma_t) ), Ct) ) 93 | 94 | #control noise Equation 52 95 | Sigma_u = np.dot(np.dot(B_pseudo, Sigma_s), B_pseudo.T) 96 | 97 | #part 2 equation 46 98 | tmp1 = np.dot(np.dot(Dbasis, self._sigma_W), basis.T) 99 | 100 | #part 3 equation 46 101 | tmp2 = np.dot(self._A, Sigma_t) + 0.5*Sigma_s 102 | 103 | #compute feedback gain; complete equation 46 104 | K = np.dot( np.dot(B_pseudo, (tmp1-tmp2) ), np.linalg.inv(Sigma_t)) 105 | 106 | #part 1 equation 48 107 | tmp3 = np.dot(Dbasis, self._mean_W) 108 | 109 | #part 2 equation 48 110 | tmp4 = np.dot( (self._A + np.dot(self._B, K)), np.dot(basis, self._mean_W) ) 111 | 112 | #compute feedforward gain; complete equation 48 113 | k = np.dot(B_pseudo, (tmp3-tmp4)) 114 | 115 | return K, k, Sigma_u 116 | 117 | 118 | def compute_gain_traj(self): 119 | """ 120 | This function is to compute the entire gain trajectory 121 | of a given state distribution 122 | """ 123 | time_steps = self._Phi.shape[1] 124 | state_dim, action_dim = self._B.shape 125 | 126 | K_traj = np.zeros([time_steps, state_dim, state_dim]) 127 | k_traj = np.zeros([time_steps, action_dim]) 128 | Sigma_u_traj = np.zeros([time_steps, action_dim, action_dim]) 129 | 130 | for t in range(time_steps): 131 | 132 | K_traj[t, :, :], k_traj[t, :], Sigma_u_traj[t, :, :] = self.compute_gains(t) 133 | 134 | return K_traj, k_traj, Sigma_u_traj 135 | 136 | 137 | def compute_control_cmd(self, t, state, sample=False): 138 | """ 139 | This function is compute the specific control 140 | command at a time step t 141 | Args: 142 | t : time step 143 | state : state for which control command needs to be computed 144 | """ 145 | 146 | K, k, Sigma_u = self.compute_gains(t) 147 | 148 | mean_u = np.dot(K, state) + k 149 | 150 | if sample: 151 | #get a weight sample from the weight distribution 152 | return np.random.multivariate_normal(mean_u, Sigma_u, 1).T, Sigma_u 153 | 154 | else: 155 | 156 | return mean_u, Sigma_u 157 | 158 | 159 | def compute_ctrl_traj(self, state_list): 160 | """ 161 | This function computes an entire 162 | control sequence for a given state list 163 | Args: 164 | state_list for which control has to be computed 165 | this assumes that len(state_list) = timesteps in the basis function 166 | """ 167 | 168 | time_steps = self._Phi.shape[1] 169 | _, action_dim = self._B.shape 170 | 171 | ctrl_cmds_mean = np.zeros([time_steps, action_dim]) 172 | ctrl_cmds_sigma = np.zeros([time_steps, action_dim, action_dim]) 173 | 174 | for t in range(time_steps): 175 | 176 | ctrl_cmds_mean[t, :], ctrl_cmds_sigma[t, :, :] = self.compute_control_cmd(t, state_list[t,:]) 177 | 178 | return ctrl_cmds_mean, ctrl_cmds_sigma 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /set_env_bash.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 4 | 5 | export PYTHONPATH=$ROOT_DIR:$PYTHONPATH 6 | 7 | cd $ROOT_DIR 8 | 9 | export ROOT_DIR=$ROOT_DIR 10 | 11 | echo "PYTHON PATH IS: $PYTHONPATH" --------------------------------------------------------------------------------