├── .gitignore ├── collect_samples.py ├── collect_samples_threaded.py ├── data_manipulation.py ├── docs ├── installation.md ├── notes.md ├── sample_plots │ ├── Ant_comparison.png │ ├── Cheetah_comparison.png │ ├── Hopper_comparison.png │ └── Swimmer_comparison.png └── trajfollow_videos │ ├── ant_left-1.ogv │ ├── ant_left.ogv │ ├── ant_right.ogv │ ├── ant_straight.ogv │ ├── ant_uturn.ogv │ ├── cheetah_backward.ogv │ ├── cheetah_forward.ogv │ ├── cheetah_forwardbackward.ogv │ ├── swimmer_left.ogv │ ├── swimmer_right.ogv │ └── swimmer_straight.ogv ├── dynamics_model.py ├── feedforward_network.py ├── get_true_action.py ├── helper_funcs.py ├── main.py ├── mbmf.py ├── mpc_controller.py ├── plotting ├── plot_forwardsim.ipynb ├── plot_loss.ipynb ├── plot_mbmf.py └── plot_trajfollow.ipynb ├── point_env.py ├── policy_random.py ├── readme.md ├── reward_functions.py ├── scripts ├── ant_mbmf.sh ├── cheetah_mbmf.sh ├── hopper_mbmf.sh └── swimmer_mbmf.sh ├── trajectories.py ├── trpo_run_mf.py └── yaml_files ├── ant_forward.yaml ├── ant_trajfollow.yaml ├── cheetah_forward.yaml ├── cheetah_trajfollow.yaml ├── hopper_forward.yaml ├── swimmer_forward.yaml └── swimmer_trajfollow.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | run_* 2 | reacher* 3 | saved_roach_* 4 | __pycache* 5 | -------------------------------------------------------------------------------- /collect_samples.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import rllab 3 | import time 4 | import matplotlib.pyplot as plt 5 | import copy 6 | 7 | class CollectSamples(object): 8 | 9 | def __init__(self, env, policy, visualize_rollouts, which_agent, dt_steps, dt_from_xml, follow_trajectories): 10 | self.env = env 11 | self.policy = policy 12 | self.visualize_at_all = visualize_rollouts 13 | self.which_agent = which_agent 14 | 15 | self.low = self.env.observation_space.low 16 | self.high = self.env.observation_space.high 17 | self.shape = self.env.observation_space.shape 18 | 19 | self.use_low = self.low + (self.high-self.low)/3.0 20 | self.use_high = self.high - (self.high-self.low)/3.0 21 | 22 | self.dt_steps = dt_steps 23 | self.dt_from_xml = dt_from_xml 24 | 25 | self.follow_trajectories = follow_trajectories 26 | 27 | def collect_samples(self, num_rollouts, steps_per_rollout): 28 | observations_list = [] 29 | actions_list = [] 30 | starting_states_list=[] 31 | rewards_list = [] 32 | visualization_frequency = 10 33 | for rollout_number in range(num_rollouts): 34 | if(self.which_agent==2): 35 | if(self.follow_trajectories): 36 | observation, starting_state = self.env.reset(returnStartState=True, isSwimmer=True, need_diff_headings=True) 37 | else: 38 | observation, starting_state = self.env.reset(returnStartState=True, isSwimmer=True) 39 | else: 40 | observation, starting_state = self.env.reset(returnStartState=True) 41 | observations, actions, reward_for_rollout = self.perform_rollout(observation, steps_per_rollout, 42 | rollout_number, visualization_frequency) 43 | 44 | rewards_list.append(reward_for_rollout) 45 | observations= np.array(observations) 46 | actions= np.array(actions) 47 | observations_list.append(observations) 48 | actions_list.append(actions) 49 | starting_states_list.append(starting_state) 50 | 51 | #return list of length = num rollouts 52 | #each entry of that list contains one rollout 53 | #each entry is [steps_per_rollout x statespace_dim] or [steps_per_rollout x actionspace_dim] 54 | return observations_list, actions_list, starting_states_list, rewards_list 55 | 56 | def perform_rollout(self, observation, steps_per_rollout, rollout_number, visualization_frequency): 57 | observations = [] 58 | actions = [] 59 | visualize = False 60 | reward_for_rollout = 0 61 | if((rollout_number%visualization_frequency)==0): 62 | print("currently performing rollout #", rollout_number) 63 | if(self.visualize_at_all): 64 | all_states=[] 65 | print ("---- visualizing a rollout ----") 66 | visualize=True 67 | 68 | for step_num in range(steps_per_rollout): 69 | action, _ = self.policy.get_action(observation) 70 | 71 | observations.append(observation) 72 | actions.append(action) 73 | 74 | next_observation, reward, terminal, _ = self.env.step(action, collectingInitialData=True) 75 | reward_for_rollout+= reward 76 | 77 | observation = np.copy(next_observation) 78 | 79 | if terminal: 80 | print("Had to stop rollout because terminal state was reached.") 81 | break 82 | 83 | if(visualize): 84 | if(self.which_agent==0): 85 | curr_state = self.env.render() 86 | all_states.append(np.expand_dims(curr_state, axis=0)) 87 | else: 88 | self.env.render() 89 | time.sleep(self.dt_steps*self.dt_from_xml) 90 | 91 | if(visualize and (self.which_agent==0)): 92 | all_states= np.concatenate(all_states, axis=0) 93 | plt.plot(all_states[:,0], all_states[:,1], 'r') 94 | plt.show() 95 | return observations, actions, reward_for_rollout -------------------------------------------------------------------------------- /collect_samples_threaded.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import copy 4 | import matplotlib.pyplot as plt 5 | import copy 6 | import multiprocessing 7 | 8 | class CollectSamples(object): 9 | 10 | def __init__(self, env, policy, visualize_rollouts, which_agent, dt_steps, dt_from_xml, follow_trajectories): 11 | self.main_env = copy.deepcopy(env) 12 | self.policy = policy 13 | self.visualize_at_all = visualize_rollouts 14 | self.which_agent = which_agent 15 | self.list_observations=[] 16 | self.list_actions=[] 17 | self.list_starting_states=[] 18 | 19 | self.stateDim = self.main_env.observation_space.shape[0] 20 | self.actionDim = self.main_env.action_space.shape[0] 21 | 22 | self.dt_steps = dt_steps 23 | self.dt_from_xml = dt_from_xml 24 | self.follow_trajectories = follow_trajectories 25 | 26 | def collect_samples(self, num_rollouts, steps_per_rollout): 27 | 28 | #vars 29 | all_processes=[] 30 | visualization_frequency = num_rollouts/10 31 | num_workers=multiprocessing.cpu_count() #detect number of cores 32 | pool = multiprocessing.Pool(8) 33 | 34 | #multiprocessing for running rollouts (utilize multiple cores) 35 | for rollout_number in range(num_rollouts): 36 | result = pool.apply_async(self.do_rollout, 37 | args=(steps_per_rollout, rollout_number, visualization_frequency), 38 | callback=self.mycallback) 39 | 40 | pool.close() #not going to add anything else to the pool 41 | pool.join() #wait for the processes to terminate 42 | 43 | #return lists of length = num rollouts 44 | #each entry contains one rollout 45 | #each entry is [steps_per_rollout x statespace_dim] or [steps_per_rollout x actionspace_dim] 46 | return self.list_observations, self.list_actions, self.list_starting_states, [] 47 | 48 | def mycallback(self, x): #x is shape [numSteps, state + action] 49 | self.list_observations.append(x[:,0:self.stateDim]) 50 | self.list_actions.append(x[:,self.stateDim:(self.stateDim+self.actionDim)]) 51 | self.list_starting_states.append(x[0,(self.stateDim+self.actionDim):]) 52 | 53 | def do_rollout(self, steps_per_rollout, rollout_number, visualization_frequency): 54 | #init vars 55 | #print("START ", rollout_number) 56 | observations = [] 57 | actions = [] 58 | visualize = False 59 | 60 | env = copy.deepcopy(self.main_env) 61 | 62 | #reset env 63 | if(self.which_agent==2): 64 | if(self.follow_trajectories): 65 | observation, starting_state = env.reset(returnStartState=True, isSwimmer=True, need_diff_headings=True) 66 | else: 67 | observation, starting_state = env.reset(returnStartState=True, isSwimmer=True) 68 | else: 69 | observation, starting_state = env.reset(returnStartState=True) 70 | 71 | #visualize only sometimes 72 | if((rollout_number%visualization_frequency)==0): 73 | if(self.visualize_at_all): 74 | all_states=[] 75 | print ("---- visualizing a rollout ----") 76 | visualize=True 77 | 78 | for step_num in range(steps_per_rollout): 79 | 80 | #decide what action to take 81 | action, _ = self.policy.get_action(observation) 82 | 83 | #keep tracks of observations + actions 84 | observations.append(observation) 85 | actions.append(action) 86 | 87 | #perform the action 88 | next_observation, reward, terminal, _ = env.step(action, collectingInitialData=True) 89 | 90 | #update the observation 91 | observation = np.copy(next_observation) 92 | 93 | if terminal: 94 | #print("Had to stop rollout because terminal state was reached.") 95 | break 96 | 97 | if(visualize): 98 | if(self.which_agent==0): 99 | curr_state = env.render() 100 | all_states.append(np.expand_dims(curr_state, axis=0)) 101 | else: 102 | env.render() 103 | time.sleep(self.dt_steps*self.dt_from_xml) 104 | 105 | if(visualize and (self.which_agent==0)): 106 | all_states= np.concatenate(all_states, axis=0) 107 | plt.plot(all_states[:,0], all_states[:,1], 'r') 108 | plt.show() 109 | 110 | if((rollout_number%visualization_frequency)==0): 111 | print("Completed rollout # ", rollout_number) 112 | 113 | array_starting_state = np.tile(starting_state, (np.array(actions).shape[0],1)) 114 | return np.concatenate((np.array(observations), np.array(actions), array_starting_state), axis=1) -------------------------------------------------------------------------------- /data_manipulation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | import tensorflow as tf 4 | import time 5 | import math 6 | import matplotlib.pyplot as plt 7 | import copy 8 | 9 | def get_indices(which_agent): 10 | x_index = -7 11 | y_index = -7 12 | z_index = -7 13 | yaw_index = -7 14 | joint1_index = -7 15 | joint2_index = -7 16 | frontleg_index = -7 17 | frontshin_index = -7 18 | frontfoot_index = -7 19 | xvel_index = -7 20 | orientation_index = -7 21 | 22 | if(which_agent==0): #pointmass 23 | x_index= 0 24 | y_index= 1 25 | elif(which_agent==1): #ant 26 | x_index= 29 27 | y_index= 30 28 | z_index = 31 29 | xvel_index = 38 30 | elif(which_agent==2): #swimmer 31 | x_index= 10 32 | y_index= 11 33 | yaw_index = 2 34 | joint1_index = 3 35 | joint2_index = 4 36 | xvel_index = 13 37 | elif(which_agent==3): #reacher 38 | x_index= 6 39 | y_index= 7 40 | elif(which_agent==4): #cheetah 41 | x_index= 18 42 | y_index= 20 43 | frontleg_index = 6 44 | frontshin_index = 7 45 | frontfoot_index = 8 46 | xvel_index = 21 47 | elif(which_agent==5): #roach (not mujoco) 48 | x_index= 0 49 | y_index= 1 50 | elif(which_agent==6): #hopper 51 | x_index = 11 52 | y_index = 13 53 | z_index = 0 54 | xvel_index = 14 55 | orientation_index = 1 56 | elif(which_agent==7): #walker 57 | x_index = 18 58 | y_index = 20 59 | 60 | return x_index, y_index, z_index, yaw_index, joint1_index, joint2_index, frontleg_index, \ 61 | frontshin_index, frontfoot_index, xvel_index, orientation_index 62 | 63 | def generate_training_data_inputs(states0, controls0): 64 | # init vars 65 | states=np.copy(states0) 66 | controls=np.copy(controls0) 67 | new_states=[] 68 | new_controls=[] 69 | 70 | # remove the last entry in each rollout (because that entry doesn't have an associated "output") 71 | for i in range(len(states)): 72 | curr_item = states[i] 73 | length = curr_item.shape[0] 74 | new_states.append(curr_item[0:length-1,:]) 75 | 76 | curr_item = controls[i] 77 | length = curr_item.shape[0] 78 | new_controls.append(curr_item[0:length-1,:]) 79 | 80 | #turn the list of rollouts into just one large array of data 81 | dataX= np.concatenate(new_states, axis=0) 82 | dataY= np.concatenate(new_controls, axis=0) 83 | return dataX, dataY 84 | 85 | def generate_training_data_outputs(states, which_agent): 86 | #for each rollout, the output corresponding to each (s_i) is (s_i+1 - s_i) 87 | differences=[] 88 | for states_in_single_rollout in states: 89 | output = states_in_single_rollout[1:states_in_single_rollout.shape[0],:] \ 90 | -states_in_single_rollout[0:states_in_single_rollout.shape[0]-1,:] 91 | differences.append(output) 92 | output = np.concatenate(differences, axis=0) 93 | return output 94 | 95 | def from_observation_to_usablestate(states, which_agent, just_one): 96 | 97 | ####################################### 98 | ######### POINTMASS ################### 99 | ####################################### 100 | 101 | #0: x 102 | #1: y 103 | #2: vx 104 | #3: vy 105 | if(which_agent==0): 106 | return states 107 | 108 | ####################################### 109 | ######### ANT ######################### 110 | ####################################### 111 | 112 | #we use the following observation as input to NN (41 things) 113 | #0 to 14... 15 joint positions 114 | #15 to 28... 14 joint velocities 115 | #29 to 31... 3 body com pos 116 | #32 to 37... 6 cos and sin of 3 body angles (from 9 rotation mat) 117 | #38 to 40... body com vel 118 | 119 | #returned by env.step 120 | #0 to 14 = positions 121 | #j0 x position 122 | #j1 y position 123 | #j2 z position 124 | #3 ? 125 | #4 5 body flip 126 | #6 body rotate 127 | #7 leg yaw ccw, 8 leg bend down 128 | #9, 10 129 | #11, 12 130 | #13,14 131 | #15 to 28 = velocities 132 | #29 to 37 = rotation matrix (9) 133 | #38 to 40 = com positions 134 | #41 to 43 = com velocities 135 | 136 | if(which_agent==1): 137 | if(just_one): 138 | curr_item = np.copy(states) 139 | joint_pos = curr_item[0:15] 140 | joint_vel = curr_item[15:29] 141 | body_pos = curr_item[38:41] 142 | body_rpy = to_euler(curr_item[29:38], just_one) #9 vals of rot mat --> 6 vals (cos sin of rpy) 143 | body_vel = curr_item[41:44] 144 | full_item = np.concatenate((joint_pos, joint_vel, body_pos, body_rpy, body_vel), axis=0) 145 | return full_item 146 | 147 | else: 148 | new_states=[] 149 | for i in range(len(states)): #for each rollout 150 | curr_item = np.copy(states[i]) 151 | 152 | joint_pos = curr_item[:,0:15] 153 | joint_vel = curr_item[:,15:29] 154 | body_pos = curr_item[:,38:41] 155 | body_rpy = to_euler(curr_item[:,29:38], just_one) #9 vals of rot mat --> 6 vals (cos sin of rpy) 156 | body_vel = curr_item[:,41:44] 157 | 158 | full_item = np.concatenate((joint_pos, joint_vel, body_pos, body_rpy, body_vel), axis=1) 159 | new_states.append(full_item) 160 | return new_states 161 | 162 | 163 | ####################################### 164 | ######### SWIMMER ##################### 165 | ####################################### 166 | 167 | #total = 16 168 | #0 slider x... 1 slider y.... 2 heading 169 | #3,4 the two hinge joint pos 170 | #5,6 slider x/y vel 171 | #7 heading vel 172 | #8,9 the two hinge joint vel 173 | #10,11,12 cm x and y and z pos 174 | #13,14,15 cm x and y and z vel 175 | if(which_agent==2): 176 | return states 177 | 178 | ####################################### 179 | ######### REACHER ##################### 180 | ####################################### 181 | 182 | #total = 11 183 | # 2-- cos(theta) of the 2 angles 184 | # 2-- sin(theta) of the 2 angles 185 | # 2-- goal pos -------------------(ignore this) 186 | # 2-- vel of the 2 angles 187 | # 3-- fingertip cm 188 | if(which_agent==3): 189 | if(just_one): 190 | curr_item = np.copy(states) 191 | keep_1 = curr_item[0:4] 192 | keep_2 = curr_item[6:11] 193 | full_item = np.concatenate((keep_1, keep_2), axis=0) 194 | return full_item 195 | 196 | else: 197 | new_states=[] 198 | for i in range(len(states)): #for each rollout 199 | curr_item = np.copy(states[i]) 200 | keep1 = curr_item[:,0:4] 201 | keep2 = curr_item[:,6:11] 202 | full_item = np.concatenate((keep1, keep2), axis=1) 203 | new_states.append(full_item) 204 | return new_states 205 | 206 | ####################################### 207 | ######### HALF CHEETAH ################ 208 | ####################################### 209 | 210 | #STATE when you pass in something to reset env: (33) 211 | # rootx, rootz, rooty 212 | # bthigh, bshin, bfoot 213 | # fthigh, fshin, ffoot 214 | # rootx, rootz, rooty --vel 215 | # bthigh, bshin, bfoot --vel 216 | # fthigh, fshin, ffoot --vel 217 | # self.model.data.qacc (9) 218 | # self.model.data.ctrl (6) 219 | #OBSERVATION: (24) 220 | # 0: rootx (forward/backward) 221 | # 1: rootz (up/down) 222 | # 2: rooty (angle of body) 223 | # 3: bthigh (+ is move back) 224 | # 4: bshin 225 | # 5: bfoot 226 | # 6: fthigh 227 | # 7: fshin 228 | # 8: ffoot 229 | # 9: root x vel 230 | # 10: root z vel 231 | # 11: root y vel 232 | # 12: bthigh vel 233 | # 13: bshin vel 234 | # 14: bfoot vel 235 | # 15: fthigh vel 236 | # 16: fshin vel 237 | # 17: ffoot vel 238 | #com x 239 | #com y 240 | #com z 241 | #com vx 242 | #com vy 243 | #com vz 244 | 245 | if(which_agent==4): 246 | return states 247 | 248 | ####################################### 249 | ######### ROACH (personal env) ######## 250 | ####################################### 251 | 252 | # x,y,z com position 253 | # orientation com 254 | # cos of 2 motor positions 255 | # sin of 2 motor positions 256 | # com velocity 257 | # orientation angular vel 258 | # 2 motor vel 259 | 260 | elif(which_agent==5): 261 | if(just_one): 262 | curr_item = np.copy(states) 263 | keep_1 = curr_item[0:6] 264 | two = np.cos(curr_item[6:8]) 265 | three = np.sin(curr_item[6:8]) 266 | keep_4 = curr_item[8:16] 267 | full_item = np.concatenate((keep_1, two, three, keep_4), axis=0) 268 | return full_item 269 | 270 | else: 271 | new_states=[] 272 | for i in range(len(states)): #for each rollout 273 | curr_item = np.copy(states[i]) 274 | keep1 = curr_item[:,0:6] 275 | two = np.cos(curr_item[:,6:8]) 276 | three = np.sin(curr_item[:,6:8]) 277 | keep4 = curr_item[:,8:16] 278 | full_item = np.concatenate((keep1, two, three, keep4), axis=1) 279 | new_states.append(full_item) 280 | return new_states 281 | 282 | ####################################### 283 | ######### HOPPER ###################### 284 | ####################################### 285 | 286 | #observation: 17 things 287 | #5 joints-- j0 (height), j2, j3, j4, j5 288 | #6 velocities 289 | #3 com pos 290 | #3 com vel 291 | #state: 21 things 292 | #6 joint pos 293 | #6 joint vel 294 | #6 qacc 295 | #3 ctrl 296 | 297 | if(which_agent==6): 298 | return states 299 | 300 | ####################################### 301 | ######### WALKER ###################### 302 | ####################################### 303 | 304 | #observation: 24 things 305 | #9 joint pos 306 | #9 velocities 307 | #3 com pos 308 | #3 com vel 309 | 310 | if(which_agent==7): 311 | return states 312 | 313 | 314 | def to_euler(rot_mat, just_one): 315 | if(just_one): 316 | r=np.arctan2(rot_mat[3], rot_mat[1]) 317 | p=np.arctan2(-rot_mat[6], np.sqrt(rot_mat[7]*rot_mat[7]+rot_mat[8]*rot_mat[8])) 318 | y=np.arctan2(rot_mat[7], rot_mat[8]) 319 | 320 | return np.array([np.cos(r), np.sin(r), np.cos(p), np.sin(p), np.cos(y), np.sin(y)]) 321 | 322 | else: 323 | r=np.arctan2(rot_mat[:,3], rot_mat[:,1]) 324 | r=np.concatenate((np.expand_dims(np.cos(r), axis=1), np.expand_dims(np.sin(r), axis=1)), axis=1) 325 | 326 | p=np.arctan2(-rot_mat[:,6], np.sqrt(rot_mat[:,7]*rot_mat[:,7]+rot_mat[:,8]*rot_mat[:,8])) 327 | p=np.concatenate((np.expand_dims(np.cos(p), axis=1), np.expand_dims(np.sin(p), axis=1)), axis=1) 328 | 329 | y=np.arctan2(rot_mat[:,7], rot_mat[:,8]) 330 | y=np.concatenate((np.expand_dims(np.cos(y), axis=1), np.expand_dims(np.sin(y), axis=1)), axis=1) 331 | 332 | return np.concatenate((r,p,y), axis=1) 333 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | 2 | # INSTALLING EVERYTHING 3 | 4 | ### ANACONDA (if you don't have)
5 | 6 |     Download from https://www.continuum.io/downloads (download the python 2.7 version)
7 | ``` 8 | bash Anaconda2-4.4.0-Linux-x86_64.sh 9 | vim ~/.bashrc 10 | ``` 11 | In .bashrc, type: 12 | ``` 13 | export PATH="$HOME/anaconda2/bin:$PATH" 14 | ``` 15 | Source the file: 16 | ``` 17 | source ~/.bashrc 18 | ``` 19 | 20 | ---------------------------------- 21 | 22 | ### MUJOCO
23 | 24 | Go to website: https://www.roboti.us/license.html
25 | 26 | a) mujoco files:
27 |     Under Downloads, download mjpro131 linux
28 |     extract/unzip it
29 | ``` 30 | mkdir ~/.mujoco 31 | cp -R mjpro131 ~/.mujoco/mjpro131 32 | ```
33 | b) license key:
34 |     i) If you don't have one: sign up for 30-day free trial to get a license
35 |     Need to sudo chmod permissions on the downloaded executable (for getting computer id)
36 |     Email will give you mjkey.txt + LICENSE.txt
37 | ``` 38 | cp mjkey.txt ~/.mujoco/mjkey.txt 39 | ```
40 |     ii) Else, just copy your existing key into ~/.mujoco/mjkey.txt 41 | 42 | ---------------------------------- 43 | 44 | ### RLLAB 45 | 46 | ``` 47 | git clone https://github.com/nagaban2/rllab.git 48 | cd rllab 49 | ./scripts/setup_linux.sh 50 | ./scripts/setup_mujoco.sh 51 | vim ~/.bashrc 52 | ``` 53 | In .bashrc, type: 54 | ``` 55 | export PATH="$HOME/anaconda2/envs/rllab3/bin:$PATH" 56 | export PYTHONPATH="$HOME/rllab:$PYTHONPATH" 57 | ``` 58 | Source the file: 59 | ``` 60 | source ~/.bashrc 61 | source activate rllab3 62 | ``` 63 | ---------------------------------- 64 | 65 | ### CUDA (Note: assuming you already have cuda and cudnn) 66 | 67 |     Set paths: 68 | ``` 69 | vim ~/.bashrc 70 | ``` 71 | In .bashrc, type: 72 | ``` 73 | export PATH="/usr/local/cuda-8.0/bin:$PATH" 74 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64" 75 | ``` 76 | Source the file: 77 | ``` 78 | source ~/.bashrc 79 | ``` 80 |     To see if gpu is being used while running code: 81 | ``` 82 | nvidia-smi 83 | ``` 84 | ---------------------------------- 85 | 86 | ### OTHER 87 | ``` 88 | source activate rllab3 89 | pip install gym 90 | pip install cloudpickle 91 | pip install seaborn 92 | ``` -------------------------------------------------------------------------------- /docs/notes.md: -------------------------------------------------------------------------------- 1 | # NOTES 2 | 3 | If you want to
4 | - know about the env
5 | - change the env
6 | - or make your own env
7 | 8 | You might care about the following:
9 | 10 | a) from_observation_to_usablestate in data_manipulation.py
11 | - This explains each element of the env's observation 12 | - It also allows you to edit which parts of the observations you want to feed into your NN (this is called "state" throughout the paper) 13 | 14 | b) get_indices in data_manipulation.py
15 | - Indicates which index of the state corresponds to what (ex. xindex, yindex, etc.) 16 | - These indeces are used throughout the code, for reward functions/etc. 17 | 18 | c) reward_functions.py
19 | - A reward function should be defined for each env/task 20 | 21 | --------------------------------------------------------------- 22 | --------------------------------------------------------------- 23 | 24 | ### Variables in the yaml files: 25 | 26 | **num_rollouts_train**
27 |          number of rollouts to collect for training dataset
28 | 29 | **nEpoch**
30 |          number of epochs for training the NN dynamics model
31 | 32 | **horizon**
33 |          horizon of the MPC controller
34 | 35 | **num_control_samples**
36 |          number of random candidate action sequences generated by MPC controller
37 | 38 | **fraction_use_new**
39 |          how much new vs old data to use when training the NN dynamics model
40 | 41 | **num_aggregation_iters**
42 |          how many full iterations of training-->rollouts-->aggregatedata to conduct
43 | 44 | **num_trajectories_for_aggregation**
45 |          how many MPC rollouts to conduct during each aggregation iteration
46 | 47 | **rollouts_forTraining**
48 |          how many of aggregated rollouts to put into training dataset (vs into validation dataset)
49 | 50 | **num_fc_layers**
51 |          number of hidden layers in dynamics model
52 | 53 | **depth_fc_layers**
54 |          dimension of each hidden layer in dynamics model
55 | 56 | -------------------------------------------------------------------------------- /docs/sample_plots/Ant_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/sample_plots/Ant_comparison.png -------------------------------------------------------------------------------- /docs/sample_plots/Cheetah_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/sample_plots/Cheetah_comparison.png -------------------------------------------------------------------------------- /docs/sample_plots/Hopper_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/sample_plots/Hopper_comparison.png -------------------------------------------------------------------------------- /docs/sample_plots/Swimmer_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/sample_plots/Swimmer_comparison.png -------------------------------------------------------------------------------- /docs/trajfollow_videos/ant_left-1.ogv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/ant_left-1.ogv -------------------------------------------------------------------------------- /docs/trajfollow_videos/ant_left.ogv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/ant_left.ogv -------------------------------------------------------------------------------- /docs/trajfollow_videos/ant_right.ogv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/ant_right.ogv -------------------------------------------------------------------------------- /docs/trajfollow_videos/ant_straight.ogv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/ant_straight.ogv -------------------------------------------------------------------------------- /docs/trajfollow_videos/ant_uturn.ogv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/ant_uturn.ogv -------------------------------------------------------------------------------- /docs/trajfollow_videos/cheetah_backward.ogv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/cheetah_backward.ogv -------------------------------------------------------------------------------- /docs/trajfollow_videos/cheetah_forward.ogv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/cheetah_forward.ogv -------------------------------------------------------------------------------- /docs/trajfollow_videos/cheetah_forwardbackward.ogv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/cheetah_forwardbackward.ogv -------------------------------------------------------------------------------- /docs/trajfollow_videos/swimmer_left.ogv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/swimmer_left.ogv -------------------------------------------------------------------------------- /docs/trajfollow_videos/swimmer_right.ogv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/swimmer_right.ogv -------------------------------------------------------------------------------- /docs/trajfollow_videos/swimmer_straight.ogv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/swimmer_straight.ogv -------------------------------------------------------------------------------- /dynamics_model.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import numpy.random as npr 4 | import tensorflow as tf 5 | import time 6 | import math 7 | 8 | from feedforward_network import feedforward_network 9 | 10 | 11 | class Dyn_Model: 12 | 13 | def __init__(self, inputSize, outputSize, sess, learning_rate, batchsize, which_agent, x_index, y_index, 14 | num_fc_layers, depth_fc_layers, mean_x, mean_y, mean_z, std_x, std_y, std_z, tf_datatype, print_minimal): 15 | 16 | #init vars 17 | self.sess = sess 18 | self.batchsize = batchsize 19 | self.which_agent = which_agent 20 | self.x_index = x_index 21 | self.y_index = y_index 22 | self.inputSize = inputSize 23 | self.outputSize = outputSize 24 | self.mean_x = mean_x 25 | self.mean_y = mean_y 26 | self.mean_z = mean_z 27 | self.std_x = std_x 28 | self.std_y = std_y 29 | self.std_z = std_z 30 | self.print_minimal = print_minimal 31 | 32 | #placeholders 33 | self.x_ = tf.placeholder(tf_datatype, shape=[None, self.inputSize], name='x') #inputs 34 | self.z_ = tf.placeholder(tf_datatype, shape=[None, self.outputSize], name='z') #labels 35 | 36 | #forward pass 37 | self.curr_nn_output = feedforward_network(self.x_, self.inputSize, self.outputSize, 38 | num_fc_layers, depth_fc_layers, tf_datatype) 39 | 40 | #loss 41 | self.mse_ = tf.reduce_mean(tf.square(self.z_ - self.curr_nn_output)) 42 | 43 | # Compute gradients and update parameters 44 | self.opt = tf.train.AdamOptimizer(learning_rate) 45 | self.theta = tf.trainable_variables() 46 | self.gv = [(g,v) for g,v in 47 | self.opt.compute_gradients(self.mse_, self.theta) 48 | if g is not None] 49 | self.train_step = self.opt.apply_gradients(self.gv) 50 | 51 | def train(self, dataX, dataZ, dataX_new, dataZ_new, nEpoch, save_dir, fraction_use_new): 52 | 53 | #init vars 54 | start = time.time() 55 | training_loss_list = [] 56 | range_of_indeces = np.arange(dataX.shape[0]) 57 | nData_old = dataX.shape[0] 58 | num_new_pts = dataX_new.shape[0] 59 | 60 | #how much of new data to use per batch 61 | if(num_new_pts<(self.batchsize*fraction_use_new)): 62 | batchsize_new_pts = num_new_pts #use all of the new ones 63 | else: 64 | batchsize_new_pts = int(self.batchsize*fraction_use_new) 65 | 66 | #how much of old data to use per batch 67 | batchsize_old_pts = int(self.batchsize- batchsize_new_pts) 68 | 69 | #training loop 70 | for i in range(nEpoch): 71 | 72 | #reset to 0 73 | avg_loss=0 74 | num_batches=0 75 | 76 | #randomly order indeces (equivalent to shuffling dataX and dataZ) 77 | old_indeces = npr.choice(range_of_indeces, size=(dataX.shape[0],), replace=False) 78 | #train from both old and new dataset 79 | if(batchsize_old_pts>0): 80 | 81 | #get through the full old dataset 82 | for batch in range(int(math.floor(nData_old / batchsize_old_pts))): 83 | 84 | #randomly sample points from new dataset 85 | if(num_new_pts==0): 86 | dataX_new_batch = dataX_new 87 | dataZ_new_batch = dataZ_new 88 | else: 89 | new_indeces = npr.randint(0,dataX_new.shape[0], (batchsize_new_pts,)) 90 | dataX_new_batch = dataX_new[new_indeces, :] 91 | dataZ_new_batch = dataZ_new[new_indeces, :] 92 | 93 | #walk through the randomly reordered "old data" 94 | dataX_old_batch = dataX[old_indeces[batch*batchsize_old_pts:(batch+1)*batchsize_old_pts], :] 95 | dataZ_old_batch = dataZ[old_indeces[batch*batchsize_old_pts:(batch+1)*batchsize_old_pts], :] 96 | 97 | #combine the old and new data 98 | dataX_batch = np.concatenate((dataX_old_batch, dataX_new_batch)) 99 | dataZ_batch = np.concatenate((dataZ_old_batch, dataZ_new_batch)) 100 | 101 | #one iteration of feedforward training 102 | _, loss, output, true_output = self.sess.run([self.train_step, self.mse_, self.curr_nn_output, self.z_], 103 | feed_dict={self.x_: dataX_batch, self.z_: dataZ_batch}) 104 | training_loss_list.append(loss) 105 | avg_loss+= loss 106 | num_batches+=1 107 | 108 | #train completely from new set 109 | else: 110 | for batch in range(int(math.floor(num_new_pts / batchsize_new_pts))): 111 | 112 | #walk through the shuffled new data 113 | dataX_batch = dataX_new[batch*batchsize_new_pts:(batch+1)*batchsize_new_pts, :] 114 | dataZ_batch = dataZ_new[batch*batchsize_new_pts:(batch+1)*batchsize_new_pts, :] 115 | 116 | #one iteration of feedforward training 117 | _, loss, output, true_output = self.sess.run([self.train_step, self.mse_, self.curr_nn_output, self.z_], 118 | feed_dict={self.x_: dataX_batch, self.z_: dataZ_batch}) 119 | 120 | training_loss_list.append(loss) 121 | avg_loss+= loss 122 | num_batches+=1 123 | 124 | #shuffle new dataset after an epoch (if training only on it) 125 | p = npr.permutation(dataX_new.shape[0]) 126 | dataX_new = dataX_new[p] 127 | dataZ_new = dataZ_new[p] 128 | 129 | #save losses after an epoch 130 | np.save(save_dir + '/training_losses.npy', training_loss_list) 131 | if(not(self.print_minimal)): 132 | if((i%10)==0): 133 | print("\n=== Epoch {} ===".format(i)) 134 | print ("loss: ", avg_loss/num_batches) 135 | 136 | if(not(self.print_minimal)): 137 | print ("Training set size: ", (nData_old + dataX_new.shape[0])) 138 | print("Training duration: {:0.2f} s".format(time.time()-start)) 139 | 140 | #get loss of curr model on old dataset 141 | avg_old_loss=0 142 | iters_in_batch=0 143 | for batch in range(int(math.floor(nData_old / self.batchsize))): 144 | # Batch the training data 145 | dataX_batch = dataX[batch*self.batchsize:(batch+1)*self.batchsize, :] 146 | dataZ_batch = dataZ[batch*self.batchsize:(batch+1)*self.batchsize, :] 147 | #one iteration of feedforward training 148 | loss, _ = self.sess.run([self.mse_, self.curr_nn_output], feed_dict={self.x_: dataX_batch, self.z_: dataZ_batch}) 149 | avg_old_loss+= loss 150 | iters_in_batch+=1 151 | old_loss = avg_old_loss/iters_in_batch 152 | 153 | #get loss of curr model on new dataset 154 | avg_new_loss=0 155 | iters_in_batch=0 156 | for batch in range(int(math.floor(dataX_new.shape[0] / self.batchsize))): 157 | # Batch the training data 158 | dataX_batch = dataX_new[batch*self.batchsize:(batch+1)*self.batchsize, :] 159 | dataZ_batch = dataZ_new[batch*self.batchsize:(batch+1)*self.batchsize, :] 160 | #one iteration of feedforward training 161 | loss, _ = self.sess.run([self.mse_, self.curr_nn_output], feed_dict={self.x_: dataX_batch, self.z_: dataZ_batch}) 162 | avg_new_loss+= loss 163 | iters_in_batch+=1 164 | if(iters_in_batch==0): 165 | new_loss=0 166 | else: 167 | new_loss = avg_new_loss/iters_in_batch 168 | 169 | #done 170 | return (avg_loss/num_batches), old_loss, new_loss 171 | 172 | def run_validation(self, inputs, outputs): 173 | 174 | #init vars 175 | nData = inputs.shape[0] 176 | avg_loss=0 177 | iters_in_batch=0 178 | 179 | for batch in range(int(math.floor(nData / self.batchsize))): 180 | # Batch the training data 181 | dataX_batch = inputs[batch*self.batchsize:(batch+1)*self.batchsize, :] 182 | dataZ_batch = outputs[batch*self.batchsize:(batch+1)*self.batchsize, :] 183 | 184 | #one iteration of feedforward training 185 | z_predictions, loss = self.sess.run([self.curr_nn_output, self.mse_], feed_dict={self.x_: dataX_batch, self.z_: dataZ_batch}) 186 | 187 | avg_loss+= loss 188 | iters_in_batch+=1 189 | 190 | #avg loss + all predictions 191 | print ("Validation set size: ", nData) 192 | print ("Validation set's total loss: ", avg_loss/iters_in_batch) 193 | 194 | return (avg_loss/iters_in_batch) 195 | 196 | #multistep prediction using the learned dynamics model at each step 197 | def do_forward_sim(self, forwardsim_x_true, forwardsim_y, many_in_parallel, env_inp, which_agent): 198 | 199 | #init vars 200 | state_list = [] 201 | 202 | if(many_in_parallel): 203 | #init vars 204 | N= forwardsim_y.shape[0] 205 | horizon = forwardsim_y.shape[1] 206 | array_stdz = np.tile(np.expand_dims(self.std_z, axis=0),(N,1)) 207 | array_meanz = np.tile(np.expand_dims(self.mean_z, axis=0),(N,1)) 208 | array_stdy = np.tile(np.expand_dims(self.std_y, axis=0),(N,1)) 209 | array_meany = np.tile(np.expand_dims(self.mean_y, axis=0),(N,1)) 210 | array_stdx = np.tile(np.expand_dims(self.std_x, axis=0),(N,1)) 211 | array_meanx = np.tile(np.expand_dims(self.mean_x, axis=0),(N,1)) 212 | 213 | if(len(forwardsim_x_true)==2): 214 | #N starting states, one for each of the simultaneous sims 215 | curr_states=np.tile(forwardsim_x_true[0], (N,1)) 216 | else: 217 | curr_states=np.copy(forwardsim_x_true) 218 | 219 | #advance all N sims, one timestep at a time 220 | for timestep in range(horizon): 221 | 222 | #keep track of states for all N sims 223 | state_list.append(np.copy(curr_states)) 224 | 225 | #make [N x (state,action)] array to pass into NN 226 | states_preprocessed = np.nan_to_num(np.divide((curr_states-array_meanx), array_stdx)) 227 | actions_preprocessed = np.nan_to_num(np.divide((forwardsim_y[:,timestep,:]-array_meany), array_stdy)) 228 | inputs_list= np.concatenate((states_preprocessed, actions_preprocessed), axis=1) 229 | 230 | #run the N sims all at once 231 | model_output = self.sess.run([self.curr_nn_output], feed_dict={self.x_: inputs_list}) 232 | state_differences = np.multiply(model_output[0],array_stdz)+array_meanz 233 | 234 | #update the state info 235 | curr_states = curr_states + state_differences 236 | 237 | #return a list of length = horizon+1... each one has N entries, where each entry is (13,) 238 | state_list.append(np.copy(curr_states)) 239 | else: 240 | curr_state = np.copy(forwardsim_x_true[0]) #curr state is of dim NN input 241 | 242 | for curr_control in forwardsim_y: 243 | 244 | state_list.append(np.copy(curr_state)) 245 | curr_control = np.expand_dims(curr_control, axis=0) 246 | 247 | #subtract mean and divide by standard deviation 248 | curr_state_preprocessed = curr_state - self.mean_x 249 | curr_state_preprocessed = np.nan_to_num(curr_state_preprocessed/self.std_x) 250 | curr_control_preprocessed = curr_control - self.mean_y 251 | curr_control_preprocessed = np.nan_to_num(curr_control_preprocessed/self.std_y) 252 | inputs_preprocessed = np.expand_dims(np.append(curr_state_preprocessed, curr_control_preprocessed), axis=0) 253 | 254 | #run through NN to get prediction 255 | model_output = self.sess.run([self.curr_nn_output], feed_dict={self.x_: inputs_preprocessed}) 256 | 257 | #multiply by std and add mean back in 258 | state_differences= (model_output[0][0]*self.std_z)+self.mean_z 259 | 260 | #update the state info 261 | next_state = curr_state + state_differences 262 | 263 | #copy the state info 264 | curr_state= np.copy(next_state) 265 | 266 | state_list.append(np.copy(curr_state)) 267 | 268 | return state_list -------------------------------------------------------------------------------- /feedforward_network.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | def feedforward_network(inputState, inputSize, outputSize, num_fc_layers, depth_fc_layers, tf_datatype): 6 | 7 | #vars 8 | intermediate_size=depth_fc_layers 9 | reuse= False 10 | initializer = tf.contrib.layers.xavier_initializer(uniform=False, seed=None, dtype=tf_datatype) 11 | fc = tf.contrib.layers.fully_connected 12 | 13 | # make hidden layers 14 | for i in range(num_fc_layers): 15 | if(i==0): 16 | fc_i = fc(inputState, num_outputs=intermediate_size, activation_fn=None, 17 | weights_initializer=initializer, biases_initializer=initializer, reuse=reuse, trainable=True) 18 | else: 19 | fc_i = fc(h_i, num_outputs=intermediate_size, activation_fn=None, 20 | weights_initializer=initializer, biases_initializer=initializer, reuse=reuse, trainable=True) 21 | h_i = tf.nn.relu(fc_i) 22 | 23 | # make output layer 24 | z=fc(h_i, num_outputs=outputSize, activation_fn=None, weights_initializer=initializer, 25 | biases_initializer=initializer, reuse=reuse, trainable=True) 26 | return z -------------------------------------------------------------------------------- /get_true_action.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | import tensorflow as tf 4 | import time 5 | import math 6 | import matplotlib.pyplot as plt 7 | import copy 8 | from six.moves import cPickle 9 | from rllab.misc import tensor_utils 10 | from rllab.envs.normalized_env import normalize 11 | from feedforward_network import feedforward_network 12 | import os 13 | from data_manipulation import from_observation_to_usablestate 14 | from dynamics_model import Dyn_Model 15 | from data_manipulation import get_indices 16 | from mpc_controller import MPCController 17 | from trajectories import make_trajectory 18 | 19 | class GetTrueAction: 20 | 21 | def make_model(self, sess, env_inp, rundir, tf_datatype, num_fc_layers, depth_fc_layers, which_agent, 22 | lr, batchsize, N, horizon, steps_per_episode, dt_steps, print_minimal): 23 | 24 | #vars 25 | self.sess = sess 26 | self.env = copy.deepcopy(env_inp) 27 | self.N = N 28 | self.horizon = horizon 29 | self.which_agent = which_agent 30 | self.steps_per_episode = steps_per_episode 31 | self.dt_steps = dt_steps 32 | self.print_minimal = print_minimal 33 | 34 | #get sizes 35 | dataX= np.load(rundir + '/training_data/dataX.npy') 36 | dataY= np.load(rundir + '/training_data/dataY.npy') 37 | dataZ= np.load(rundir + '/training_data/dataZ.npy') 38 | inputs = np.concatenate((dataX, dataY), axis=1) 39 | assert inputs.shape[0] == dataZ.shape[0] 40 | inputSize = inputs.shape[1] 41 | outputSize = dataZ.shape[1] 42 | 43 | #calculate the means and stds 44 | self.mean_x = np.mean(dataX, axis = 0) 45 | dataX = dataX - self.mean_x 46 | self.std_x = np.std(dataX, axis = 0) 47 | dataX = np.nan_to_num(dataX/self.std_x) 48 | self.mean_y = np.mean(dataY, axis = 0) 49 | dataY = dataY - self.mean_y 50 | self.std_y = np.std(dataY, axis = 0) 51 | dataY = np.nan_to_num(dataY/self.std_y) 52 | self.mean_z = np.mean(dataZ, axis = 0) 53 | dataZ = dataZ - self.mean_z 54 | self.std_z = np.std(dataZ, axis = 0) 55 | dataZ = np.nan_to_num(dataZ/self.std_z) 56 | 57 | #get x and y index 58 | x_index, y_index, z_index, yaw_index, joint1_index, joint2_index, frontleg_index, frontshin_index, frontfoot_index, xvel_index, orientation_index = get_indices(which_agent) 59 | 60 | #make dyn model and randomly initialize weights 61 | self.dyn_model = Dyn_Model(inputSize, outputSize, self.sess, lr, batchsize, which_agent, x_index, y_index, num_fc_layers, 62 | depth_fc_layers, self.mean_x, self.mean_y, self.mean_z, self.std_x, self.std_y, self.std_z, 63 | tf_datatype, self.print_minimal) 64 | self.sess.run(tf.global_variables_initializer()) 65 | 66 | #load in weights from desired trained dynamics model 67 | pathname = rundir + '/models/finalModel.ckpt' 68 | saver = tf.train.Saver(max_to_keep=0) 69 | saver.restore(self.sess, pathname) 70 | print("\n\nRestored dynamics model with variables from ", pathname,"\n\n") 71 | 72 | #make controller, to use for querying optimal action 73 | self.mpc_controller = MPCController(self.env, self.dyn_model, self.horizon, self.which_agent, self.steps_per_episode, 74 | self.dt_steps, self.N, self.mean_x, self.mean_y, self.mean_z, self.std_x, self.std_y, 75 | self.std_z, 'nc', self.print_minimal, x_index, y_index, z_index, yaw_index, joint1_index, 76 | joint2_index, frontleg_index, frontshin_index, frontfoot_index, xvel_index, orientation_index) 77 | self.mpc_controller.desired_states = make_trajectory('straight', np.zeros((100,)), x_index, y_index, which_agent) #junk, just a placeholder 78 | 79 | #select task or reward func 80 | self.reward_func = self.mpc_controller.reward_functions.get_reward_func(False, 0, 0, 0, 0) 81 | 82 | def get_action(self, curr_obs): 83 | 84 | curr_nn_state= from_observation_to_usablestate(curr_obs, self.which_agent, True) 85 | best_action, _, _, _ = self.mpc_controller.get_action(curr_nn_state, 0, self.reward_func) 86 | 87 | return best_action -------------------------------------------------------------------------------- /helper_funcs.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import time 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | #import rllab envs 7 | from rllab.envs.normalized_env import normalize 8 | from rllab.envs.mujoco.swimmer_env import SwimmerEnv 9 | from rllab.envs.mujoco.half_cheetah_env import HalfCheetahEnv 10 | from rllab.envs.mujoco.hopper_env import HopperEnv 11 | from rllab.envs.mujoco.walker2d_env import Walker2DEnv 12 | from point_env import PointEnv 13 | from rllab.envs.mujoco.ant_env import AntEnv 14 | 15 | #import gym envs 16 | import gym 17 | from gym import wrappers 18 | from gym.envs.mujoco.reacher import ReacherEnv 19 | from rllab.envs.gym_env import GymEnv 20 | 21 | 22 | def add_noise(data_inp, noiseToSignal): 23 | data= copy.deepcopy(data_inp) 24 | mean_data = np.mean(data, axis = 0) 25 | std_of_noise = mean_data*noiseToSignal 26 | for j in range(mean_data.shape[0]): 27 | if(std_of_noise[j]>0): 28 | data[:,j] = np.copy(data[:,j]+np.random.normal(0, np.absolute(std_of_noise[j]), (data.shape[0],))) 29 | return data 30 | 31 | def perform_rollouts(policy, num_rollouts, steps_per_rollout, visualize_rollouts, CollectSamples, 32 | env, which_agent, dt_steps, dt_from_xml, follow_trajectories): 33 | #collect training data by performing rollouts 34 | print("Beginning to do ", num_rollouts, " rollouts.") 35 | c = CollectSamples(env, policy, visualize_rollouts, which_agent, dt_steps, dt_from_xml, follow_trajectories) 36 | states, controls, starting_states, rewards_list = c.collect_samples(num_rollouts, steps_per_rollout) 37 | 38 | print("Performed ", len(states), " rollouts, each with ", states[0].shape[0], " steps.") 39 | return states, controls, starting_states, rewards_list 40 | 41 | 42 | def create_env(which_agent): 43 | 44 | # setup environment 45 | if(which_agent==0): 46 | env = normalize(PointEnv()) 47 | elif(which_agent==1): 48 | env = normalize(AntEnv()) 49 | elif(which_agent==2): 50 | env = normalize(SwimmerEnv()) #dt 0.001 and frameskip=150 51 | elif(which_agent==3): 52 | env = ReacherEnv() 53 | elif(which_agent==4): 54 | env = normalize(HalfCheetahEnv()) 55 | elif(which_agent==5): 56 | env = RoachEnv() #this is a personal vrep env 57 | elif(which_agent==6): 58 | env=normalize(HopperEnv()) 59 | elif(which_agent==7): 60 | env=normalize(Walker2DEnv()) 61 | 62 | #get dt value from env 63 | if(which_agent==5): 64 | dt_from_xml = env.VREP_DT 65 | else: 66 | dt_from_xml = env.model.opt.timestep 67 | print("\n\n the dt is: ", dt_from_xml, "\n\n") 68 | 69 | #set vars 70 | tf.set_random_seed(2) 71 | gym.logger.setLevel(gym.logging.WARNING) 72 | dimO = env.observation_space.shape 73 | dimA = env.action_space.shape 74 | print ('--------------------------------- \nState space dimension: ', dimO) 75 | print ('Action space dimension: ', dimA, "\n -----------------------------------") 76 | 77 | return env, dt_from_xml 78 | 79 | 80 | def visualize_rendering(starting_state, list_of_actions, env_inp, dt_steps, dt_from_xml, which_agent): 81 | env=copy.deepcopy(env_inp) 82 | 83 | if(which_agent==5): 84 | env.reset() 85 | else: 86 | env.reset(starting_state) 87 | 88 | for action in list_of_actions: 89 | 90 | if(action.shape[0]==1): 91 | env.step(action[0], collectingInitialData=False) 92 | else: 93 | env.step(action, collectingInitialData=False) 94 | 95 | if(which_agent==5): 96 | junk=1 97 | else: 98 | env.render() 99 | time.sleep(dt_steps*dt_from_xml) 100 | 101 | print("Done rendering.") 102 | return -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | 2 | #imports 3 | import numpy as np 4 | import numpy.random as npr 5 | import tensorflow as tf 6 | import time 7 | import matplotlib.pyplot as plt 8 | import pickle 9 | import copy 10 | import os 11 | import sys 12 | from six.moves import cPickle 13 | from rllab.envs.normalized_env import normalize 14 | import yaml 15 | import argparse 16 | import json 17 | 18 | #my imports 19 | from policy_random import Policy_Random 20 | from trajectories import make_trajectory 21 | from trajectories import get_trajfollow_params 22 | from data_manipulation import generate_training_data_inputs 23 | from data_manipulation import generate_training_data_outputs 24 | from data_manipulation import from_observation_to_usablestate 25 | from data_manipulation import get_indices 26 | from helper_funcs import perform_rollouts 27 | from helper_funcs import create_env 28 | from helper_funcs import visualize_rendering 29 | from helper_funcs import add_noise 30 | from dynamics_model import Dyn_Model 31 | from mpc_controller import MPCController 32 | 33 | def main(): 34 | 35 | ################################################# 36 | ############ commandline arguments ############## 37 | ################################################# 38 | 39 | parser = argparse.ArgumentParser() 40 | parser.add_argument('--yaml_file', type=str, default='ant_forward') 41 | parser.add_argument('--seed', type=int, default=0) 42 | parser.add_argument('--run_num', type=int, default=0) 43 | parser.add_argument('--use_existing_training_data', action="store_true", dest='use_existing_training_data', default=False) 44 | parser.add_argument('--use_existing_dynamics_model', action="store_true", dest='use_existing_dynamics_model', default=False) 45 | 46 | parser.add_argument('--desired_traj_type', type=str, default='straight') #straight, left_turn, right_turn, u_turn, backward, forward_backward 47 | parser.add_argument('--num_rollouts_save_for_mf', type=int, default=60) 48 | 49 | parser.add_argument('--might_render', action="store_true", dest='might_render', default=False) 50 | parser.add_argument('--visualize_MPC_rollout', action="store_true", dest='visualize_MPC_rollout', default=False) 51 | parser.add_argument('--perform_forwardsim_for_vis', action="store_true", dest='perform_forwardsim_for_vis', default=False) 52 | parser.add_argument('--print_minimal', action="store_true", dest='print_minimal', default=False) 53 | args = parser.parse_args() 54 | 55 | 56 | ######################################## 57 | ######### params from yaml file ######## 58 | ######################################## 59 | 60 | #load in parameters from specified file 61 | 62 | yaml_path = os.path.abspath('yaml_files/'+args.yaml_file+'.yaml') 63 | assert(os.path.exists(yaml_path)) 64 | with open(yaml_path, 'r') as f: 65 | params = yaml.load(f) 66 | 67 | #save params from specified file 68 | which_agent = params['which_agent'] 69 | follow_trajectories = params['follow_trajectories'] 70 | #data collection 71 | use_threading = params['data_collection']['use_threading'] 72 | num_rollouts_train = params['data_collection']['num_rollouts_train'] 73 | num_rollouts_val = params['data_collection']['num_rollouts_val'] 74 | #dynamics model 75 | num_fc_layers = params['dyn_model']['num_fc_layers'] 76 | depth_fc_layers = params['dyn_model']['depth_fc_layers'] 77 | batchsize = params['dyn_model']['batchsize'] 78 | lr = params['dyn_model']['lr'] 79 | nEpoch = params['dyn_model']['nEpoch'] 80 | fraction_use_new = params['dyn_model']['fraction_use_new'] 81 | #controller 82 | horizon = params['controller']['horizon'] 83 | num_control_samples = params['controller']['num_control_samples'] 84 | if(which_agent==1): 85 | if(args.desired_traj_type=='straight'): 86 | num_control_samples=3000 87 | #aggregation 88 | num_aggregation_iters = params['aggregation']['num_aggregation_iters'] 89 | num_trajectories_for_aggregation = params['aggregation']['num_trajectories_for_aggregation'] 90 | rollouts_forTraining = params['aggregation']['rollouts_forTraining'] 91 | #noise 92 | make_aggregated_dataset_noisy = params['noise']['make_aggregated_dataset_noisy'] 93 | make_training_dataset_noisy = params['noise']['make_training_dataset_noisy'] 94 | noise_actions_during_MPC_rollouts = params['noise']['noise_actions_during_MPC_rollouts'] 95 | #steps 96 | dt_steps = params['steps']['dt_steps'] 97 | steps_per_episode = params['steps']['steps_per_episode'] 98 | steps_per_rollout_train = params['steps']['steps_per_rollout_train'] 99 | steps_per_rollout_val = params['steps']['steps_per_rollout_val'] 100 | #saving 101 | min_rew_for_saving = params['saving']['min_rew_for_saving'] 102 | #generic 103 | visualize_True = params['generic']['visualize_True'] 104 | visualize_False = params['generic']['visualize_False'] 105 | #from args 106 | print_minimal= args.print_minimal 107 | 108 | 109 | ######################################## 110 | ### make directories for saving data ### 111 | ######################################## 112 | 113 | save_dir = 'run_'+ str(args.run_num) 114 | if not os.path.exists(save_dir): 115 | os.makedirs(save_dir) 116 | os.makedirs(save_dir+'/losses') 117 | os.makedirs(save_dir+'/models') 118 | os.makedirs(save_dir+'/saved_forwardsim') 119 | os.makedirs(save_dir+'/saved_trajfollow') 120 | os.makedirs(save_dir+'/training_data') 121 | 122 | ######################################## 123 | ############## set vars ################ 124 | ######################################## 125 | 126 | #set seeds 127 | npr.seed(args.seed) 128 | tf.set_random_seed(args.seed) 129 | 130 | #data collection, either with or without multi-threading 131 | if(use_threading): 132 | from collect_samples_threaded import CollectSamples 133 | else: 134 | from collect_samples import CollectSamples 135 | 136 | #more vars 137 | x_index, y_index, z_index, yaw_index, joint1_index, joint2_index, frontleg_index, frontshin_index, frontfoot_index, xvel_index, orientation_index = get_indices(which_agent) 138 | tf_datatype = tf.float64 139 | noiseToSignal = 0.01 140 | 141 | # n is noisy, c is clean... 1st letter is what action's executed and 2nd letter is what action's aggregated 142 | actions_ag='nc' 143 | 144 | ################################################# 145 | ######## save param values to a file ############ 146 | ################################################# 147 | 148 | param_dict={} 149 | param_dict['which_agent']= which_agent 150 | param_dict['use_existing_training_data']= str(args.use_existing_training_data) 151 | param_dict['desired_traj_type']= args.desired_traj_type 152 | param_dict['visualize_MPC_rollout']= str(args.visualize_MPC_rollout) 153 | param_dict['num_rollouts_save_for_mf']= args.num_rollouts_save_for_mf 154 | param_dict['seed']= args.seed 155 | param_dict['follow_trajectories']= str(follow_trajectories) 156 | param_dict['use_threading']= str(use_threading) 157 | param_dict['num_rollouts_train']= num_rollouts_train 158 | param_dict['num_fc_layers']= num_fc_layers 159 | param_dict['depth_fc_layers']= depth_fc_layers 160 | param_dict['batchsize']= batchsize 161 | param_dict['lr']= lr 162 | param_dict['nEpoch']= nEpoch 163 | param_dict['fraction_use_new']= fraction_use_new 164 | param_dict['horizon']= horizon 165 | param_dict['num_control_samples']= num_control_samples 166 | param_dict['num_aggregation_iters']= num_aggregation_iters 167 | param_dict['num_trajectories_for_aggregation']= num_trajectories_for_aggregation 168 | param_dict['rollouts_forTraining']= rollouts_forTraining 169 | param_dict['make_aggregated_dataset_noisy']= str(make_aggregated_dataset_noisy) 170 | param_dict['make_training_dataset_noisy']= str(make_training_dataset_noisy) 171 | param_dict['noise_actions_during_MPC_rollouts']= str(noise_actions_during_MPC_rollouts) 172 | param_dict['dt_steps']= dt_steps 173 | param_dict['steps_per_episode']= steps_per_episode 174 | param_dict['steps_per_rollout_train']= steps_per_rollout_train 175 | param_dict['steps_per_rollout_val']= steps_per_rollout_val 176 | param_dict['min_rew_for_saving']= min_rew_for_saving 177 | param_dict['x_index']= x_index 178 | param_dict['y_index']= y_index 179 | param_dict['tf_datatype']= str(tf_datatype) 180 | param_dict['noiseToSignal']= noiseToSignal 181 | 182 | with open(save_dir+'/params.pkl', 'wb') as f: 183 | pickle.dump(param_dict, f, pickle.HIGHEST_PROTOCOL) 184 | with open(save_dir+'/params.txt', 'w') as f: 185 | f.write(json.dumps(param_dict)) 186 | 187 | ################################################# 188 | ### initialize the experiment 189 | ################################################# 190 | 191 | if(not(print_minimal)): 192 | print("\n#####################################") 193 | print("Initializing environment") 194 | print("#####################################\n") 195 | 196 | #create env 197 | env, dt_from_xml= create_env(which_agent) 198 | 199 | #create random policy for data collection 200 | random_policy = Policy_Random(env) 201 | 202 | ################################################# 203 | ### set GPU options for TF 204 | ################################################# 205 | 206 | gpu_device = 0 207 | gpu_frac = 0.3 208 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_device) 209 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_frac) 210 | config = tf.ConfigProto(gpu_options=gpu_options, 211 | log_device_placement=False, 212 | allow_soft_placement=True, 213 | inter_op_parallelism_threads=1, 214 | intra_op_parallelism_threads=1) 215 | 216 | with tf.Session(config=config) as sess: 217 | 218 | ################################################# 219 | ### deal with data 220 | ################################################# 221 | 222 | if(args.use_existing_training_data): 223 | if(not(print_minimal)): 224 | print("\n#####################################") 225 | print("Retrieving training data & policy from saved files") 226 | print("#####################################\n") 227 | 228 | dataX= np.load(save_dir + '/training_data/dataX.npy') # input1: state 229 | dataY= np.load(save_dir + '/training_data/dataY.npy') # input2: control 230 | dataZ= np.load(save_dir + '/training_data/dataZ.npy') # output: nextstate-state 231 | states_val= np.load(save_dir + '/training_data/states_val.npy') 232 | controls_val= np.load(save_dir + '/training_data/controls_val.npy') 233 | forwardsim_x_true= np.load(save_dir + '/training_data/forwardsim_x_true.npy') 234 | forwardsim_y= np.load(save_dir + '/training_data/forwardsim_y.npy') 235 | 236 | else: 237 | 238 | if(not(print_minimal)): 239 | print("\n#####################################") 240 | print("Performing rollouts to collect training data") 241 | print("#####################################\n") 242 | 243 | #perform rollouts 244 | states, controls, _, _ = perform_rollouts(random_policy, num_rollouts_train, steps_per_rollout_train, visualize_False, 245 | CollectSamples, env, which_agent, dt_steps, dt_from_xml, follow_trajectories) 246 | 247 | if(not(print_minimal)): 248 | print("\n#####################################") 249 | print("Performing rollouts to collect validation data") 250 | print("#####################################\n") 251 | 252 | start_validation_rollouts = time.time() 253 | states_val, controls_val, _, _ = perform_rollouts(random_policy, num_rollouts_val, steps_per_rollout_val, visualize_False, 254 | CollectSamples, env, which_agent, dt_steps, dt_from_xml, follow_trajectories) 255 | 256 | if(not(print_minimal)): 257 | print("\n#####################################") 258 | print("Convert from env observations to NN 'states' ") 259 | print("#####################################\n") 260 | 261 | #training 262 | states = from_observation_to_usablestate(states, which_agent, False) 263 | #validation 264 | states_val = from_observation_to_usablestate(states_val, which_agent, False) 265 | states_val = np.array(states_val) 266 | 267 | if(not(print_minimal)): 268 | print("\n#####################################") 269 | print("Data formatting: create inputs and labels for NN ") 270 | print("#####################################\n") 271 | 272 | dataX , dataY = generate_training_data_inputs(states, controls) 273 | dataZ = generate_training_data_outputs(states, which_agent) 274 | 275 | if(not(print_minimal)): 276 | print("\n#####################################") 277 | print("Add noise") 278 | print("#####################################\n") 279 | 280 | #add a little dynamics noise (next state is not perfectly accurate, given correct state and action) 281 | if(make_training_dataset_noisy): 282 | dataX = add_noise(dataX, noiseToSignal) 283 | dataZ = add_noise(dataZ, noiseToSignal) 284 | 285 | if(not(print_minimal)): 286 | print("\n#####################################") 287 | print("Perform rollout & save for forward sim") 288 | print("#####################################\n") 289 | 290 | states_forwardsim_orig, controls_forwardsim, _,_ = perform_rollouts(random_policy, 1, 100, 291 | visualize_False, CollectSamples, 292 | env, which_agent, dt_steps, 293 | dt_from_xml, follow_trajectories) 294 | states_forwardsim = np.copy(from_observation_to_usablestate(states_forwardsim_orig, which_agent, False)) 295 | forwardsim_x_true, forwardsim_y = generate_training_data_inputs(states_forwardsim, controls_forwardsim) 296 | 297 | if(not(print_minimal)): 298 | print("\n#####################################") 299 | print("Saving data") 300 | print("#####################################\n") 301 | 302 | np.save(save_dir + '/training_data/dataX.npy', dataX) 303 | np.save(save_dir + '/training_data/dataY.npy', dataY) 304 | np.save(save_dir + '/training_data/dataZ.npy', dataZ) 305 | np.save(save_dir + '/training_data/states_val.npy', states_val) 306 | np.save(save_dir + '/training_data/controls_val.npy', controls_val) 307 | np.save(save_dir + '/training_data/forwardsim_x_true.npy', forwardsim_x_true) 308 | np.save(save_dir + '/training_data/forwardsim_y.npy', forwardsim_y) 309 | 310 | if(not(print_minimal)): 311 | print("Done getting data.") 312 | print("dataX dim: ", dataX.shape) 313 | 314 | ################################################# 315 | ### init vars 316 | ################################################# 317 | 318 | counter_agg_iters=0 319 | training_loss_list=[] 320 | forwardsim_score_list=[] 321 | old_loss_list=[] 322 | new_loss_list=[] 323 | errors_1_per_agg=[] 324 | errors_5_per_agg=[] 325 | errors_10_per_agg=[] 326 | errors_50_per_agg=[] 327 | errors_100_per_agg=[] 328 | list_avg_rew=[] 329 | list_num_datapoints=[] 330 | dataX_new = np.zeros((0,dataX.shape[1])) 331 | dataY_new = np.zeros((0,dataY.shape[1])) 332 | dataZ_new = np.zeros((0,dataZ.shape[1])) 333 | 334 | ################################################# 335 | ### preprocess the old training dataset 336 | ################################################# 337 | 338 | if(not(print_minimal)): 339 | print("\n#####################################") 340 | print("Preprocessing 'old' training data") 341 | print("#####################################\n") 342 | 343 | #every component (i.e. x position) should become mean 0, std 1 344 | mean_x = np.mean(dataX, axis = 0) 345 | dataX = dataX - mean_x 346 | std_x = np.std(dataX, axis = 0) 347 | dataX = np.nan_to_num(dataX/std_x) 348 | 349 | mean_y = np.mean(dataY, axis = 0) 350 | dataY = dataY - mean_y 351 | std_y = np.std(dataY, axis = 0) 352 | dataY = np.nan_to_num(dataY/std_y) 353 | 354 | mean_z = np.mean(dataZ, axis = 0) 355 | dataZ = dataZ - mean_z 356 | std_z = np.std(dataZ, axis = 0) 357 | dataZ = np.nan_to_num(dataZ/std_z) 358 | 359 | ## concatenate state and action, to be used for training dynamics 360 | inputs = np.concatenate((dataX, dataY), axis=1) 361 | outputs = np.copy(dataZ) 362 | 363 | #doing a render here somehow allows it to not produce an error later 364 | might_render= False 365 | if(args.visualize_MPC_rollout or args.might_render): 366 | might_render=True 367 | if(might_render): 368 | new_env, _ = create_env(which_agent) 369 | new_env.render() 370 | 371 | ############################################## 372 | ########## THE AGGREGATION LOOP ############## 373 | ############################################## 374 | 375 | #dimensions 376 | assert inputs.shape[0] == outputs.shape[0] 377 | inputSize = inputs.shape[1] 378 | outputSize = outputs.shape[1] 379 | 380 | #initialize dynamics model 381 | dyn_model = Dyn_Model(inputSize, outputSize, sess, lr, batchsize, which_agent, x_index, y_index, num_fc_layers, 382 | depth_fc_layers, mean_x, mean_y, mean_z, std_x, std_y, std_z, tf_datatype, print_minimal) 383 | 384 | #create mpc controller 385 | mpc_controller = MPCController(env, dyn_model, horizon, which_agent, steps_per_episode, dt_steps, num_control_samples, 386 | mean_x, mean_y, mean_z, std_x, std_y, std_z, actions_ag, print_minimal, x_index, y_index, 387 | z_index, yaw_index, joint1_index, joint2_index, frontleg_index, frontshin_index, 388 | frontfoot_index, xvel_index, orientation_index) 389 | 390 | #randomly initialize all vars 391 | sess.run(tf.global_variables_initializer()) 392 | 393 | while(counter_agg_iters100): 488 | 489 | ######################### 490 | #### STATE INPUTS TO NN 491 | ######################### 492 | 493 | ## take all except the last 100 pts from each rollout 494 | validation_inputs_states.append(states_val[i][0:length_curr_rollout-100]) 495 | 496 | ######################### 497 | #### CONTROL INPUTS TO NN 498 | ######################### 499 | 500 | #100 step controls 501 | list_100 = [] 502 | for j in range(100): 503 | list_100.append(controls_val[i][0+j:length_curr_rollout-100+j]) 504 | ##for states 0:x, first apply acs 0:x, then apply acs 1:x+1, then apply acs 2:x+2, etc... 505 | list_100=np.array(list_100) #100xstepsx2 506 | list_100= np.swapaxes(list_100,0,1) #stepsx100x2 507 | controls_100step.append(list_100) 508 | 509 | ######################### 510 | #### STATE LABELS- compare these to the outputs of NN (forward sim) 511 | ######################### 512 | labels_1step.append(states_val[i][0+1:length_curr_rollout-100+1]) 513 | labels_5step.append(states_val[i][0+5:length_curr_rollout-100+5]) 514 | labels_10step.append(states_val[i][0+10:length_curr_rollout-100+10]) 515 | labels_50step.append(states_val[i][0+50:length_curr_rollout-100+50]) 516 | labels_100step.append(states_val[i][0+100:length_curr_rollout-100+100]) 517 | 518 | validation_inputs_states = np.concatenate(validation_inputs_states) 519 | controls_100step = np.concatenate(controls_100step) 520 | labels_1step = np.concatenate(labels_1step) 521 | labels_5step = np.concatenate(labels_5step) 522 | labels_10step = np.concatenate(labels_10step) 523 | labels_50step = np.concatenate(labels_50step) 524 | labels_100step = np.concatenate(labels_100step) 525 | 526 | ##################################### 527 | ## pass into forward sim, to make predictions 528 | ##################################### 529 | 530 | many_in_parallel = True 531 | predicted_100step = dyn_model.do_forward_sim(validation_inputs_states, controls_100step, 532 | many_in_parallel, env, which_agent) 533 | 534 | ##################################### 535 | ## Calculate validation metrics (mse loss between predicted and true) 536 | ##################################### 537 | 538 | array_meanx = np.tile(np.expand_dims(mean_x, axis=0),(labels_1step.shape[0],1)) 539 | array_stdx = np.tile(np.expand_dims(std_x, axis=0),(labels_1step.shape[0],1)) 540 | 541 | error_1step = np.mean(np.square(np.nan_to_num(np.divide(predicted_100step[1]-array_meanx,array_stdx)) 542 | -np.nan_to_num(np.divide(labels_1step-array_meanx,array_stdx)))) 543 | error_5step = np.mean(np.square(np.nan_to_num(np.divide(predicted_100step[5]-array_meanx,array_stdx)) 544 | -np.nan_to_num(np.divide(labels_5step-array_meanx,array_stdx)))) 545 | error_10step = np.mean(np.square(np.nan_to_num(np.divide(predicted_100step[10]-array_meanx,array_stdx)) 546 | -np.nan_to_num(np.divide(labels_10step-array_meanx,array_stdx)))) 547 | error_50step = np.mean(np.square(np.nan_to_num(np.divide(predicted_100step[50]-array_meanx,array_stdx)) 548 | -np.nan_to_num(np.divide(labels_50step-array_meanx,array_stdx)))) 549 | error_100step = np.mean(np.square(np.nan_to_num(np.divide(predicted_100step[100]-array_meanx,array_stdx)) 550 | -np.nan_to_num(np.divide(labels_100step-array_meanx,array_stdx)))) 551 | print("Multistep error values: ", error_1step, error_5step, error_10step, error_50step, error_100step,"\n") 552 | 553 | errors_1_per_agg.append(error_1step) 554 | errors_5_per_agg.append(error_5step) 555 | errors_10_per_agg.append(error_10step) 556 | errors_50_per_agg.append(error_50step) 557 | errors_100_per_agg.append(error_100step) 558 | 559 | ##################################### 560 | ## Perform 1 forward simulation, for visualization purposes (compare predicted traj vs true traj) 561 | ##################################### 562 | 563 | if(args.perform_forwardsim_for_vis): 564 | if(not(print_minimal)): 565 | print("\n#####################################") 566 | print("Performing a forward sim of the learned model. using pre-saved dataset. just for visualization") 567 | print("#####################################\n") 568 | 569 | #for a given set of controls, 570 | #compare sim traj vs. learned model's traj 571 | #(dont expect this to be good cuz error accum) 572 | many_in_parallel = False 573 | forwardsim_x_pred = dyn_model.do_forward_sim(forwardsim_x_true, forwardsim_y, many_in_parallel, env, which_agent) 574 | forwardsim_x_pred = np.array(forwardsim_x_pred) 575 | 576 | # save results of forward sim 577 | np.save(save_dir + '/saved_forwardsim/forwardsim_states_true_'+str(counter_agg_iters)+'.npy', forwardsim_x_true) 578 | np.save(save_dir + '/saved_forwardsim/forwardsim_states_pred_'+str(counter_agg_iters)+'.npy', forwardsim_x_pred) 579 | 580 | ##################################### 581 | ######## EXECUTE CONTROLLER ######### 582 | ##################################### 583 | 584 | if(not(print_minimal)): 585 | print("##############################################") 586 | print("#### Execute the controller to follow desired trajectories") 587 | print("##############################################\n") 588 | 589 | ################################################################### 590 | ### Try to follow trajectory... collect rollouts 591 | ################################################################### 592 | 593 | #init vars 594 | list_rewards=[] 595 | starting_states=[] 596 | selected_multiple_u = [] 597 | resulting_multiple_x = [] 598 | 599 | #get parameters for trajectory following 600 | horiz_penalty_factor, forward_encouragement_factor, heading_penalty_factor, desired_snake_headingInit = get_trajfollow_params(which_agent, args.desired_traj_type) 601 | if(follow_trajectories==False): 602 | desired_snake_headingInit=0 603 | 604 | for rollout_num in range(num_trajectories_for_aggregation): 605 | 606 | if(not(print_minimal)): 607 | print("\nPerforming MPC rollout #", rollout_num) 608 | 609 | #reset env and set the desired traj 610 | if(which_agent==2): 611 | starting_observation, starting_state = env.reset(evaluating=True, returnStartState=True, isSwimmer=True) 612 | else: 613 | starting_observation, starting_state = env.reset(evaluating=True, returnStartState=True) 614 | #start swimmer heading in correct direction 615 | if(which_agent==2): 616 | starting_state[2] = desired_snake_headingInit 617 | starting_observation, starting_state = env.reset(starting_state, returnStartState=True) 618 | 619 | #desired trajectory to follow 620 | starting_observation_NNinput = from_observation_to_usablestate(starting_observation, which_agent, True) 621 | desired_x = make_trajectory(args.desired_traj_type, starting_observation_NNinput, x_index, y_index, which_agent) 622 | 623 | #perform 1 MPC rollout 624 | #depending on follow_trajectories, either move forward or follow desired_traj_type 625 | if(noise_actions_during_MPC_rollouts): 626 | curr_noise_amount = 0.005 627 | else: 628 | curr_noise_amount=0 629 | resulting_x, selected_u, ep_rew, _ = mpc_controller.perform_rollout(starting_state, starting_observation, 630 | starting_observation_NNinput, desired_x, 631 | follow_trajectories, horiz_penalty_factor, 632 | forward_encouragement_factor, heading_penalty_factor, 633 | noise_actions_during_MPC_rollouts, curr_noise_amount) 634 | 635 | #save info from MPC rollout 636 | list_rewards.append(ep_rew) 637 | selected_multiple_u.append(selected_u) 638 | resulting_multiple_x.append(resulting_x) 639 | starting_states.append(starting_state) 640 | 641 | if(args.visualize_MPC_rollout): 642 | input("\n\nPAUSE BEFORE VISUALIZATION... Press Enter to continue...") 643 | for vis_index in range(num_trajectories_for_aggregation): 644 | visualize_rendering(starting_states[vis_index], selected_multiple_u[vis_index], env, dt_steps, dt_from_xml, which_agent) 645 | 646 | #bookkeeping 647 | avg_rew = np.mean(np.array(list_rewards)) 648 | std_rew = np.std(np.array(list_rewards)) 649 | print("############# Avg reward for ", num_trajectories_for_aggregation, " MPC rollouts: ", avg_rew) 650 | print("############# Std reward for ", num_trajectories_for_aggregation, " MPC rollouts: ", std_rew) 651 | print("############# Rewards for the ", num_trajectories_for_aggregation, " MPC rollouts: ", list_rewards) 652 | 653 | #save pts_used_so_far + performance achieved by those points 654 | list_num_datapoints.append(dataX.shape[0]+dataX_new.shape[0]) 655 | list_avg_rew.append(avg_rew) 656 | 657 | ############################## 658 | ### Aggregate data 659 | ############################## 660 | 661 | full_states_list = [] 662 | full_controls_list = [] 663 | if(counter_agg_iters<(num_aggregation_iters-1)): 664 | 665 | ############################## 666 | ### aggregate some rollouts into training set 667 | ############################## 668 | 669 | x_array = np.array(resulting_multiple_x)[0:(rollouts_forTraining+1)] 670 | if(which_agent==6 or which_agent==1): 671 | u_array = np.array(selected_multiple_u)[0:(rollouts_forTraining+1)] 672 | else: 673 | u_array = np.squeeze(np.array(selected_multiple_u), axis=2)[0:(rollouts_forTraining+1)] 674 | 675 | for i in range(rollouts_forTraining): 676 | 677 | if(which_agent==6 or which_agent==1): 678 | x= np.array(x_array[i]) 679 | u= np.squeeze(u_array[i], axis=1) 680 | else: 681 | x= x_array[i] #[N+1, NN_inp] 682 | u= u_array[i] #[N, actionSize] 683 | 684 | newDataX= np.copy(x[0:-1, :]) 685 | newDataY= np.copy(u) 686 | newDataZ= np.copy(x[1:, :]-x[0:-1, :]) 687 | 688 | # make this new data a bit noisy before adding it into the dataset 689 | if(make_aggregated_dataset_noisy): 690 | newDataX = add_noise(newDataX, noiseToSignal) 691 | newDataZ = add_noise(newDataZ, noiseToSignal) 692 | 693 | # the actual aggregation 694 | dataX_new = np.concatenate((dataX_new, newDataX)) 695 | dataY_new = np.concatenate((dataY_new, newDataY)) 696 | dataZ_new = np.concatenate((dataZ_new, newDataZ)) 697 | 698 | ############################## 699 | ### aggregate the rest of the rollouts into validation set 700 | ############################## 701 | 702 | x_array = np.array(resulting_multiple_x)[rollouts_forTraining:len(resulting_multiple_x)] 703 | # ^ dim: [rollouts_forValidation x stepsPerEpisode+1 x stateSize] 704 | if(which_agent==6 or which_agent==1): 705 | u_array = np.array(selected_multiple_u)[rollouts_forTraining:len(resulting_multiple_x)] 706 | else: 707 | u_array = np.squeeze(np.array(selected_multiple_u), axis=2)[rollouts_forTraining:len(resulting_multiple_x)] 708 | # rollouts_forValidation x stepsPerEpisode x acSize 709 | 710 | full_states_list = [] 711 | full_controls_list = [] 712 | for i in range(states_val.shape[0]): 713 | full_states_list.append(states_val[i]) 714 | full_controls_list.append(controls_val[i]) 715 | for i in range(x_array.shape[0]): 716 | x = np.array(x_array[i]) 717 | full_states_list.append(x[0:-1,:]) 718 | full_controls_list.append(np.squeeze(u_array[i])) 719 | states_val = np.array(full_states_list) 720 | controls_val = np.array(full_controls_list) 721 | 722 | #save trajectory following stuff (aka trajectory taken) for plotting 723 | np.save(save_dir + '/saved_trajfollow/startingstate_iter' + str(counter_agg_iters) +'.npy', starting_state) 724 | np.save(save_dir + '/saved_trajfollow/control_iter' + str(counter_agg_iters) +'.npy', selected_u) 725 | np.save(save_dir + '/saved_trajfollow/true_iter' + str(counter_agg_iters) +'.npy', desired_x) 726 | np.save(save_dir + '/saved_trajfollow/pred_iter' + str(counter_agg_iters) +'.npy', np.array(resulting_multiple_x)) 727 | 728 | #bookkeeping 729 | if(not(print_minimal)): 730 | print("\n\nDONE WITH BIG LOOP ITERATION ", counter_agg_iters ,"\n\n") 731 | print("training dataset size: ", dataX.shape[0] + dataX_new.shape[0]) 732 | if(len(full_states_list)>0): 733 | print("validation dataset size: ", np.concatenate(full_states_list).shape[0]) 734 | print("Time taken: {:0.2f} s\n\n".format(time.time()-starting_big_loop)) 735 | counter_agg_iters= counter_agg_iters+1 736 | 737 | #save things after every agg iteration 738 | np.save(save_dir + '/errors_1_per_agg.npy', errors_1_per_agg) 739 | np.save(save_dir + '/errors_5_per_agg.npy', errors_5_per_agg) 740 | np.save(save_dir + '/errors_10_per_agg.npy', errors_10_per_agg) 741 | np.save(save_dir + '/errors_50_per_agg.npy', errors_50_per_agg) 742 | np.save(save_dir + '/errors_100_per_agg.npy', errors_100_per_agg) 743 | np.save(save_dir + '/avg_rollout_rewards_per_agg.npy', list_avg_rew) 744 | np.save(save_dir + '/losses/list_training_loss.npy', training_loss_list) 745 | np.save(save_dir + '/losses/list_old_loss.npy', old_loss_list) 746 | np.save(save_dir + '/losses/list_new_loss.npy', new_loss_list) 747 | 748 | ############################## 749 | ### perform a bunch of MPC rollouts to save for later mbmf TRPO usage 750 | ############################## 751 | 752 | all_rollouts_to_save = [] 753 | if(args.num_rollouts_save_for_mf>0): 754 | print("##############################################") 755 | print("#### Performing MPC rollouts to save for later mbmf TRPO usage") 756 | print("##############################################\n") 757 | 758 | #init vars 759 | list_rewards=[] 760 | starting_states=[] 761 | num_saved = 0 762 | rollout_num = 0 763 | while(num_saved < args.num_rollouts_save_for_mf): 764 | if(not(print_minimal)): 765 | print("\nSo far, saved ", num_saved, " rollouts") 766 | print("Currently, on rollout #", rollout_num) 767 | 768 | #reset env before performing rollout 769 | if(which_agent==2): 770 | starting_observation, starting_state = env.reset(evaluating=True, returnStartState=True, isSwimmer=True) 771 | else: 772 | starting_observation, starting_state = env.reset(evaluating=True, returnStartState=True) 773 | if(which_agent==2): 774 | starting_state[2] = desired_snake_headingInit 775 | starting_observation, starting_state = env.reset(starting_state, returnStartState=True) 776 | starting_observation_NNinput = from_observation_to_usablestate(starting_observation, which_agent, True) 777 | 778 | #perform 1 MPC rollout 779 | startrollout = time.time() 780 | curr_noise_amount=0 781 | _, _, ep_rew, rollout_saved = mpc_controller.perform_rollout(starting_state, starting_observation, 782 | starting_observation_NNinput, desired_x, 783 | follow_trajectories, horiz_penalty_factor, 784 | forward_encouragement_factor, heading_penalty_factor, 785 | noise_actions_during_MPC_rollouts, curr_noise_amount) 786 | 787 | if(not(print_minimal)): 788 | print("Time taken for a single rollout: {:0.2f} s\n\n".format(time.time()-startrollout)) 789 | 790 | #save rollouts 791 | rollout_num += 1 792 | if(ep_rew>min_rew_for_saving): 793 | list_rewards.append(ep_rew) 794 | all_rollouts_to_save.append(rollout_saved) 795 | starting_states.append(starting_state) 796 | num_saved += 1 797 | 798 | #bookkeeping 799 | if(len(list_rewards)>0): 800 | 801 | #get avg rew 802 | avg_rew = np.mean(np.array(list_rewards)) 803 | print("############# Avg over all selected runs: ", avg_rew) 804 | print("############# Rewards of all selected runs: ", list_rewards) 805 | 806 | #save the rollouts for later MBMF usage 807 | pathname_savedMPCrollouts = save_dir + '/savedRollouts_avg'+ str(int(avg_rew)) +'.save' 808 | pathname2_savedMPCrollouts = save_dir + '/savedRollouts.save' 809 | f = open(pathname_savedMPCrollouts, 'wb') 810 | cPickle.dump(all_rollouts_to_save, f, protocol=cPickle.HIGHEST_PROTOCOL) 811 | f.close() 812 | f = open(pathname2_savedMPCrollouts, 'wb') 813 | cPickle.dump(all_rollouts_to_save, f, protocol=cPickle.HIGHEST_PROTOCOL) 814 | f.close() 815 | 816 | #save the starting states of these rollouts, in case want to visualize them later 817 | f = open(save_dir + '/savedRollouts_startingStates.save', 'wb') 818 | cPickle.dump(starting_states, f, protocol=cPickle.HIGHEST_PROTOCOL) 819 | f.close() 820 | 821 | print("Saved MPC rollouts for later mbmf TRPO usage.") 822 | 823 | np.save(save_dir + '/datapoints_MB.npy', list_num_datapoints) 824 | np.save(save_dir + '/performance_MB.npy', list_avg_rew) 825 | 826 | print("ALL DONE.") 827 | 828 | return 829 | 830 | if __name__ == '__main__': 831 | main() 832 | -------------------------------------------------------------------------------- /mbmf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import math 4 | npr = np.random 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 6 | import tensorflow as tf 7 | from six.moves import cPickle 8 | from collect_samples import CollectSamples 9 | from get_true_action import GetTrueAction 10 | import os 11 | import copy 12 | from helper_funcs import create_env 13 | from helper_funcs import perform_rollouts 14 | from helper_funcs import add_noise 15 | from feedforward_network import feedforward_network 16 | from helper_funcs import visualize_rendering 17 | import argparse 18 | 19 | #TRPO things 20 | from rllab.envs.normalized_env import normalize 21 | from rllab.algos.trpo import TRPO 22 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 23 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer 24 | from rllab.optimizers.conjugate_gradient_optimizer import FiniteDifferenceHvp 25 | from rllab.misc.instrument import run_experiment_lite 26 | 27 | def nn_policy(inputState, junk1, outputSize, junk2, junk3, junk4): 28 | #init vars 29 | x = inputState 30 | initializer = tf.contrib.layers.xavier_initializer(uniform=False, seed=None, dtype=tf.float64) 31 | fc = tf.contrib.layers.fully_connected 32 | weights_reg = tf.contrib.layers.l2_regularizer(scale=0.001) 33 | #hidden layer 1 34 | fc1 = fc(x, num_outputs= 64, activation_fn=None, trainable=True, reuse=False, weights_initializer=initializer, 35 | biases_initializer=initializer, weights_regularizer=weights_reg) 36 | h1 = tf.tanh(fc1) 37 | #hidden layer 2 38 | fc2 = fc(h1, num_outputs= 64, activation_fn=None, trainable=True, reuse=False, weights_initializer=initializer, 39 | biases_initializer=initializer, weights_regularizer=weights_reg) 40 | h2 = tf.tanh(fc2) 41 | # output layer 42 | output = fc(h2, num_outputs=outputSize, activation_fn=None, trainable=True, reuse=False, 43 | weights_initializer=initializer, biases_initializer=initializer) 44 | return output 45 | 46 | def run_task(v): 47 | 48 | which_agent=v["which_agent"] 49 | env,_ = create_env(which_agent) 50 | baseline = LinearFeatureBaseline(env_spec=env.spec) 51 | optimizer_params = dict(base_eps=1e-5) 52 | 53 | #how many iters 54 | num_trpo_iters = 2500 55 | if(which_agent==1): 56 | num_trpo_iters = 2500 57 | if(which_agent==2): 58 | steps_per_rollout=333 59 | num_trpo_iters = 200 60 | if(which_agent==4): 61 | num_trpo_iters= 2000 62 | if(which_agent==6): 63 | num_trpo_iters= 2000 64 | 65 | #recreate the policy 66 | policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(v["depth_fc_layers"], v["depth_fc_layers"]), init_std=v["std_on_mlp_policy"]) 67 | all_params = np.concatenate((v["policy_values"], policy._l_log_std.get_params()[0].get_value())) 68 | policy.set_param_values(all_params) 69 | 70 | 71 | algo = TRPO( 72 | env=env, 73 | policy=policy, 74 | baseline=baseline, 75 | batch_size=v["trpo_batchsize"], 76 | max_path_length=v["steps_per_rollout"], 77 | n_itr=num_trpo_iters, 78 | discount=0.995, 79 | optimizer=v["ConjugateGradientOptimizer"](hvp_approach=v["FiniteDifferenceHvp"](**optimizer_params)), 80 | step_size=0.05, 81 | plot_true=True) 82 | 83 | #train the policy 84 | algo.train() 85 | 86 | ########################################## 87 | ########################################## 88 | 89 | #ARGUMENTS TO SPECIFY 90 | parser = argparse.ArgumentParser() 91 | parser.add_argument('--save_trpo_run_num', type=int, default='1') 92 | parser.add_argument('--run_num', type=int, default=1) 93 | parser.add_argument('--which_agent', type=int, default=1) 94 | parser.add_argument('--std_on_mlp_policy', type=float, default=0.5) 95 | parser.add_argument('--num_workers_trpo', type=int, default=2) 96 | parser.add_argument('--might_render', action="store_true", dest='might_render', default=False) 97 | parser.add_argument('--visualize_mlp_policy', action="store_true", dest='visualize_mlp_policy', default=False) 98 | parser.add_argument('--visualize_on_policy_rollouts', action="store_true", dest='visualize_on_policy_rollouts', default=False) 99 | parser.add_argument('--print_minimal', action="store_true", dest='print_minimal', default=False) 100 | parser.add_argument('--use_existing_pretrained_policy', action="store_true", dest='use_existing_pretrained_policy', default=False) 101 | args = parser.parse_args() 102 | 103 | ########################################## 104 | ########################################## 105 | 106 | #save args 107 | save_trpo_run_num= args.save_trpo_run_num 108 | run_num = args.run_num 109 | which_agent = args.which_agent 110 | visualize_mlp_policy = args.visualize_mlp_policy 111 | visualize_on_policy_rollouts = args.visualize_on_policy_rollouts 112 | print_minimal = args.print_minimal 113 | std_on_mlp_policy = args.std_on_mlp_policy 114 | 115 | #swimmer 116 | trpo_batchsize = 50000 117 | if(which_agent==2): 118 | #training vars for new policy 119 | batchsize = 512 120 | nEpoch = 70 121 | learning_rate = 0.001 122 | #aggregation for training of new policy 123 | num_agg_iters = 3 124 | num_rollouts_to_agg= 5 125 | num_rollouts_testperformance = 2 126 | start_using_noised_actions = 0 127 | #other 128 | do_trpo = True 129 | #cheetah 130 | if(which_agent==4): 131 | #training vars for new policy 132 | batchsize = 512 133 | nEpoch = 300 134 | learning_rate = 0.001 135 | #aggregation for training of new policy 136 | num_agg_iters = 3 137 | num_rollouts_to_agg= 2 138 | num_rollouts_testperformance = 2 139 | start_using_noised_actions = 10 140 | #other 141 | do_trpo = True 142 | #hopper 143 | if(which_agent==6): 144 | #training vars for new policy 145 | batchsize = 512 146 | nEpoch = 200 #70 147 | learning_rate = 0.001 148 | #aggregation for training of new policy 149 | num_agg_iters = 5 #10 150 | num_rollouts_to_agg= 5 ###10 151 | num_rollouts_testperformance = 3 152 | start_using_noised_actions = 50 153 | #other 154 | do_trpo = True 155 | trpo_batchsize = 25000 156 | #ant 157 | if(which_agent==1): 158 | #training vars for new policy 159 | batchsize = 512 160 | nEpoch = 200 161 | learning_rate = 0.001 162 | #aggregation for training of new policy 163 | num_agg_iters = 5 164 | num_rollouts_to_agg= 5 165 | num_rollouts_testperformance = 3 166 | start_using_noised_actions = 50 167 | #other 168 | do_trpo = True 169 | 170 | ########################################## 171 | ########################################## 172 | 173 | #get vars from saved MB run 174 | param_dict = np.load('run_'+ str(run_num) + '/params.pkl') 175 | N = param_dict['num_control_samples'] 176 | horizon = param_dict['horizon'] 177 | num_fc_layers_old = param_dict['num_fc_layers'] 178 | depth_fc_layers_old = param_dict['depth_fc_layers'] 179 | lr_olddynmodel = param_dict['lr'] 180 | batchsize_olddynmodel = param_dict['batchsize'] 181 | dt_steps = param_dict['dt_steps'] 182 | steps_per_rollout = param_dict['steps_per_episode'] 183 | tf_datatype = param_dict['tf_datatype'] 184 | seed = param_dict['seed'] 185 | if(tf_datatype==""): 186 | tf_datatype = tf.float64 187 | else: 188 | tf_datatype = tf.float32 189 | 190 | #load the saved MPC rollouts 191 | f = open('run_'+ str(run_num)+'/savedRollouts.save', 'rb') 192 | allData = cPickle.load(f) 193 | f.close() 194 | 195 | ########################################## 196 | ########################################## 197 | 198 | #create env 199 | env, dt_from_xml = create_env(which_agent) 200 | 201 | # set tf seed 202 | npr.seed(seed) 203 | tf.set_random_seed(seed) 204 | 205 | #init vars 206 | noise_onpol_rollouts=0.005 207 | plot=False 208 | print_frequency = 20 209 | validation_frequency = 50 210 | num_fc_layers=2 211 | depth_fc_layers=64 212 | save_dir = 'run_'+ str(run_num)+'/mbmf' 213 | if not os.path.exists(save_dir): 214 | os.makedirs(save_dir) 215 | 216 | #convert saved rollouts into array 217 | allDataArray=[] 218 | allControlsArray=[] 219 | for i in range(len(allData)): 220 | allDataArray.append(allData[i]['observations']) 221 | allControlsArray.append(allData[i]['actions']) 222 | training_data=np.concatenate(allDataArray) 223 | labels=np.concatenate(allControlsArray) 224 | 225 | if(len(labels.shape)==3): 226 | labels=np.squeeze(labels) 227 | print("\n(total) Data size ", training_data.shape[0],"\n\n") 228 | 229 | ################################################################################## 230 | 231 | # set aside some of the training data for validation 232 | validnum = 10000 233 | if((which_agent==6)or(which_agent==2)or(which_agent==1)): 234 | validnum=700 235 | num = training_data.shape[0]-validnum 236 | validation_x = training_data[num:num+validnum,:] 237 | training_data=training_data[0:num,:] 238 | validation_z = labels[num:num+validnum,:] 239 | labels=labels[0:num,:] 240 | print("\nTraining data size ", training_data.shape[0]) 241 | print("Validation data size ", validation_x.shape[0],"\n") 242 | 243 | if(args.might_render or args.visualize_mlp_policy or args.visualize_on_policy_rollouts): 244 | might_render=True 245 | else: 246 | might_render=False 247 | #this somehow prevents a seg fault from happening in the later visualization 248 | if(might_render): 249 | new_env = copy.deepcopy(env) 250 | new_env.render() 251 | 252 | #gpu options for tensorflow 253 | gpu_device = 0 254 | gpu_frac = 0.3 255 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_device) 256 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_frac) 257 | config = tf.ConfigProto(gpu_options=gpu_options, 258 | log_device_placement=False, 259 | allow_soft_placement=True, 260 | inter_op_parallelism_threads=1, 261 | intra_op_parallelism_threads=1) 262 | 263 | #add SL noise to training data inputs and outputs 264 | '''TO DO''' 265 | 266 | #keep track of sample complexity 267 | datapoints_used_forMB = np.load('run_'+ str(run_num) + '/datapoints_MB.npy')[-1] 268 | datapoints_used_to_init_imit = training_data.shape[0] 269 | total_datapoints = datapoints_used_forMB + datapoints_used_to_init_imit #points used thus far 270 | imit_list_num_datapoints = [] 271 | imit_list_avg_rew = [] 272 | 273 | with tf.Session(config=config) as sess: 274 | 275 | if(not(args.use_existing_pretrained_policy)): 276 | 277 | #init vars 278 | g=GetTrueAction() 279 | g.make_model(sess, env, 'run_'+ str(run_num), tf_datatype, num_fc_layers_old, depth_fc_layers_old, which_agent, 280 | lr_olddynmodel, batchsize_olddynmodel, N, horizon, steps_per_rollout, dt_steps, print_minimal) 281 | nData=training_data.shape[0] 282 | inputSize = training_data.shape[1] 283 | outputSize = labels.shape[1] 284 | 285 | #placeholders 286 | inputs_placeholder = tf.placeholder(tf_datatype, shape=[None, inputSize], name='inputs') 287 | labels_placeholder = tf.placeholder(tf_datatype, shape=[None, outputSize], name='outputs') 288 | 289 | #output of nn 290 | curr_output = nn_policy(inputs_placeholder, inputSize, outputSize, num_fc_layers, depth_fc_layers, tf_datatype) 291 | 292 | #define training 293 | theta = tf.trainable_variables() 294 | loss = tf.reduce_mean(tf.square(curr_output - labels_placeholder)) 295 | opt = tf.train.AdamOptimizer(learning_rate) 296 | gv = [(g,v) for g,v in opt.compute_gradients(loss, theta) if g is not None] 297 | train_step = opt.apply_gradients(gv) 298 | 299 | #get all the uninitialized variables (ie right now all of them) 300 | list_vars=[] 301 | for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES): 302 | if(not(tf.is_variable_initialized(var).eval())): 303 | list_vars.append(var) 304 | sess.run(tf.variables_initializer(list_vars)) 305 | 306 | #aggregation iterations 307 | for agg_iter in range(num_agg_iters): 308 | 309 | print("ON AGGREGATION ITERATION ", agg_iter) 310 | rewards_for_this_iter=[] 311 | plot_trainingloss_x=[] 312 | plot_trainingloss_y=[] 313 | plot_validloss_x=[] 314 | plot_validloss_y=[] 315 | 316 | for i in range(nEpoch): 317 | 318 | ################################ 319 | ############ TRAIN ############# 320 | ################################ 321 | 322 | avg_loss=0 323 | iters_in_batch=0 324 | range_of_indeces = np.arange(training_data.shape[0]) 325 | indeces = npr.choice(range_of_indeces, size=(training_data.shape[0],), replace=False) 326 | 327 | for batch in range(int(math.floor(nData / batchsize))): 328 | # Batch the training data 329 | inputs = training_data[indeces[batch*batchsize:(batch+1)*batchsize], :] 330 | outputs = labels[indeces[batch*batchsize:(batch+1)*batchsize], :] 331 | 332 | #one iteration of feedforward training 333 | _, my_loss = sess.run([train_step, loss], 334 | feed_dict={inputs_placeholder: inputs, labels_placeholder: outputs}) 335 | 336 | #loss 337 | avg_loss+= np.sqrt(my_loss) 338 | iters_in_batch+=1 339 | 340 | ################################ 341 | ###### SAVE TRAIN LOSSES ####### 342 | ################################ 343 | 344 | if(iters_in_batch==0): 345 | iters_in_batch=1 346 | 347 | current_loss = avg_loss/iters_in_batch 348 | 349 | #save training losses 350 | if(not(print_minimal)): 351 | if(i%print_frequency==0): 352 | print("training loss: ", current_loss, ", nEpoch: ", i) 353 | plot_trainingloss_x.append(i) 354 | plot_trainingloss_y.append(current_loss) 355 | np.save(save_dir + '/plot_trainingloss_x.npy', plot_trainingloss_x) 356 | np.save(save_dir + '/plot_trainingloss_y.npy', plot_trainingloss_y) 357 | 358 | ################################ 359 | ########## VALIDATION ########## 360 | ################################ 361 | 362 | if((i%validation_frequency)==0): 363 | avg_valid_loss=0 364 | iters_in_valid=0 365 | 366 | range_of_indeces = np.arange(validation_x.shape[0]) 367 | indeces = npr.choice(range_of_indeces, size=(validation_x.shape[0],), replace=False) 368 | 369 | for batch in range(int(math.floor(validation_x.shape[0] / batchsize))): 370 | # Batch the training data 371 | inputs = validation_x[indeces[batch*batchsize:(batch+1)*batchsize], :] 372 | outputs = validation_z[indeces[batch*batchsize:(batch+1)*batchsize], :] 373 | 374 | #one iteration of feedforward training 375 | my_loss, _ = sess.run([loss, curr_output], 376 | feed_dict={inputs_placeholder: inputs, labels_placeholder: outputs}) 377 | 378 | #loss 379 | avg_valid_loss+= np.sqrt(my_loss) 380 | iters_in_valid+=1 381 | 382 | curr_valid_loss = avg_valid_loss/iters_in_valid 383 | 384 | #save validation losses 385 | plot_validloss_x.append(i) 386 | plot_validloss_y.append(curr_valid_loss) 387 | if(not(print_minimal)): 388 | print("validation loss: ", curr_valid_loss, ", nEpoch: ", i, "\n") 389 | np.save(save_dir + '/plot_validloss_x.npy', plot_validloss_x) 390 | np.save(save_dir + '/plot_validloss_y.npy', plot_validloss_y) 391 | 392 | print("DONE TRAINING.") 393 | print("final training loss: ", current_loss, ", nEpoch: ", i) 394 | print("final validation loss: ", curr_valid_loss, ", nEpoch: ", i) 395 | 396 | ################## 397 | ##### PLOT ####### 398 | ################## 399 | if(plot): 400 | plt.plot(plot_validloss_x, plot_validloss_y, 'r') 401 | plt.plot(plot_trainingloss_x, plot_trainingloss_y, 'g') 402 | plt.show() 403 | 404 | ################################################## 405 | ##### RUN ON-POLICY ROLLOUTS --- DAGGER ########## 406 | ################################################## 407 | 408 | print("\n\nCollecting on-policy rollouts...\n\n") 409 | starting_states = [] 410 | observations = [] 411 | actions=[] 412 | true_actions=[] 413 | 414 | for rollout in range(num_rollouts_to_agg): 415 | if(not(print_minimal)): 416 | print("\nOn rollout #", rollout) 417 | total_rew = 0 418 | 419 | starting_observation, starting_state = env.reset(returnStartState=True) 420 | curr_ob=np.copy(starting_observation) 421 | 422 | observations_for_rollout = [] 423 | actions_for_rollout = [] 424 | true_actions_for_rollout=[] 425 | for step in range(steps_per_rollout): 426 | 427 | #get action 428 | action = sess.run([curr_output], feed_dict={inputs_placeholder: np.expand_dims(curr_ob, axis=0)}) 429 | action=np.copy(action[0][0]) #1x8 430 | 431 | #### add exploration noise to the action 432 | if(agg_iter>start_using_noised_actions): 433 | action = action + noise_onpol_rollouts*npr.normal(size=action.shape) 434 | 435 | #save obs and ac 436 | observations_for_rollout.append(curr_ob) 437 | actions_for_rollout.append(action) 438 | 439 | ##################################### 440 | ##### GET LABEL OF TRUE ACTION ###### 441 | ##################################### 442 | 443 | true_action = g.get_action(curr_ob) 444 | true_actions_for_rollout.append(true_action) 445 | 446 | #take step 447 | next_ob, rew, done, _ = env.step(action, collectingInitialData=False) 448 | total_rew+= rew 449 | curr_ob= np.copy(next_ob) 450 | 451 | if(done): 452 | break 453 | 454 | if((step%100)==0): 455 | print(" Done with step #: ", step) 456 | 457 | total_datapoints+= step 458 | print("rollout ", rollout," .... reward = ", total_rew) 459 | if(not(print_minimal)): 460 | print("number of steps: ", step) 461 | print("number of steps so far: ", total_datapoints) 462 | 463 | if(visualize_on_policy_rollouts): 464 | input("\n\nPAUSE BEFORE VISUALIZATION... Press Enter to continue...") 465 | visualize_rendering(starting_state, actions_for_rollout, env, dt_steps, dt_from_xml, which_agent) 466 | 467 | starting_states.append(starting_state) 468 | observations.append(observations_for_rollout) 469 | actions.append(actions_for_rollout) 470 | true_actions.append(true_actions_for_rollout) 471 | 472 | rewards_for_this_iter.append(total_rew) 473 | 474 | print("Avg reward for this iter: ", np.mean(rewards_for_this_iter), "\n\n") 475 | 476 | ################################################## 477 | ##### RUN CLEAN ROLLOUTS TO SEE PERFORMANCE ###### 478 | ################################################## 479 | 480 | print("\n\nTEST DAGGER PERFORMANCE (clean rollouts)...") 481 | rewards_for_this_iter2=[] 482 | for rollout in range(num_rollouts_testperformance): 483 | total_rew = 0 484 | starting_observation, starting_state = env.reset(returnStartState=True) 485 | curr_ob=np.copy(starting_observation) 486 | 487 | for step in range(steps_per_rollout): 488 | 489 | #get action 490 | action = sess.run([curr_output], feed_dict={inputs_placeholder: np.expand_dims(curr_ob, axis=0)}) 491 | action=np.copy(action[0][0]) #1x8 492 | 493 | #take step 494 | next_ob, rew, done, _ = env.step(action, collectingInitialData=False) 495 | total_rew+= rew 496 | curr_ob= np.copy(next_ob) 497 | 498 | if(done): 499 | break 500 | if(not(print_minimal)): 501 | print("reward = ", total_rew) 502 | rewards_for_this_iter2.append(total_rew) 503 | print("Avg DAGGER performance at this iter: ", np.mean(rewards_for_this_iter2), "\n\n") 504 | 505 | ###### SAVE datapoints vs performance 506 | imit_list_num_datapoints.append(total_datapoints) 507 | imit_list_avg_rew.append(total_rew) 508 | 509 | ########################### 510 | ##### AGGREGATE DATA ###### 511 | ########################### 512 | if(not(print_minimal)): 513 | print("\nAggregating Data...\n") 514 | training_data = np.concatenate([training_data, np.concatenate(observations)], axis=0) 515 | labels = np.concatenate([labels, np.concatenate(true_actions)], axis=0) 516 | 517 | #save the datapoints vs performance 518 | np.save('run_'+ str(run_num) + '/datapoints_IMIT.npy', imit_list_num_datapoints) 519 | np.save('run_'+ str(run_num) + '/performance_IMIT.npy', imit_list_avg_rew) 520 | 521 | if(not(print_minimal)): 522 | print("Done training the TF policy") 523 | 524 | ###################### 525 | ### SAVE NN PARAMS ### 526 | ###################### 527 | 528 | #prepare the params for saving 529 | values = [] 530 | for t in list_vars[0:6]: 531 | if(t.eval().shape==()): 532 | junk=1 533 | else: 534 | values.append(np.ndarray.flatten(t.eval())) 535 | values = np.concatenate(values) 536 | 537 | #save the TF policy params 538 | if(not(print_minimal)): 539 | print("Saving learned TF nn model parameters.") 540 | f = open(save_dir + '/policy_tf_values.save', 'wb') 541 | cPickle.dump(values, f, protocol=cPickle.HIGHEST_PROTOCOL) 542 | f.close() 543 | 544 | else: #use_existing_pretrained_policy is True 545 | 546 | f = open(save_dir + '/policy_tf_values.save', 'rb') 547 | values = cPickle.load(f) 548 | f.close() 549 | 550 | ####################### 551 | ### INIT MLP POLICY ### 552 | ####################### 553 | 554 | policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(depth_fc_layers, depth_fc_layers), init_std=std_on_mlp_policy) 555 | 556 | #copy params over to the MLP policy 557 | all_params = np.concatenate((values, policy._l_log_std.get_params()[0].get_value())) 558 | policy.set_param_values(all_params) 559 | 560 | #save the MLP policy 561 | f = open(save_dir + '/policy_mlp.save', 'wb') 562 | cPickle.dump(policy, f, protocol=cPickle.HIGHEST_PROTOCOL) 563 | f.close() 564 | if(not(print_minimal)): 565 | print("Done initializing MLP policy with a pre-trained policy.") 566 | 567 | ##see what this initialized MLP policy looks like 568 | if(visualize_mlp_policy): 569 | input("\n\nPAUSE BEFORE VISUALIZATION... Press Enter to continue...") 570 | states, controls, starting_states, rewards = perform_rollouts(policy, 1, steps_per_rollout, visualize_mlp_policy, 571 | CollectSamples, env, which_agent, dt_steps, dt_from_xml, False) 572 | print("Std of the MLP policy: ", std_on_mlp_policy) 573 | print("Reward of the MLP policy: ", rewards) 574 | 575 | ################################ 576 | ### TRAIN MLP POLICY W/ TRPO ### 577 | ################################ 578 | 579 | if(do_trpo): 580 | run_experiment_lite(run_task, plot=True, snapshot_mode="all", use_cloudpickle=True, n_parallel=str(args.num_workers_trpo), 581 | exp_name='run_' + str(run_num)+'_std' + str(std_on_mlp_policy)+ '_run'+ str(save_trpo_run_num), 582 | variant=dict(policy_values=values.tolist(), which_agent=which_agent, 583 | trpo_batchsize=trpo_batchsize, steps_per_rollout=steps_per_rollout, 584 | FiniteDifferenceHvp=FiniteDifferenceHvp, ConjugateGradientOptimizer=ConjugateGradientOptimizer, 585 | depth_fc_layers=depth_fc_layers, std_on_mlp_policy=std_on_mlp_policy)) -------------------------------------------------------------------------------- /mpc_controller.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | import tensorflow as tf 4 | import time 5 | import math 6 | import matplotlib.pyplot as plt 7 | import copy 8 | from six.moves import cPickle 9 | from rllab.misc import tensor_utils 10 | from data_manipulation import from_observation_to_usablestate 11 | from reward_functions import RewardFunctions 12 | 13 | class MPCController: 14 | 15 | def __init__(self, env_inp, dyn_model, horizon, which_agent, steps_per_episode, dt_steps, num_control_samples, 16 | mean_x, mean_y, mean_z, std_x, std_y, std_z, actions_ag, print_minimal, x_index, y_index, z_index, yaw_index, 17 | joint1_index, joint2_index, frontleg_index, frontshin_index, frontfoot_index, xvel_index, orientation_index): 18 | 19 | #init vars 20 | self.env=copy.deepcopy(env_inp) 21 | self.N = num_control_samples 22 | self.which_agent = which_agent 23 | self.horizon = horizon 24 | self.dyn_model = dyn_model 25 | self.steps_per_episode = steps_per_episode 26 | self.mean_x = mean_x 27 | self.mean_y = mean_y 28 | self.mean_z = mean_z 29 | self.std_x = std_x 30 | self.std_y = std_y 31 | self.std_z = std_z 32 | self.x_index = x_index 33 | self.y_index = y_index 34 | self.z_index = z_index 35 | self.yaw_index = yaw_index 36 | self.joint1_index = joint1_index 37 | self.joint2_index = joint2_index 38 | self.frontleg_index = frontleg_index 39 | self.frontshin_index = frontshin_index 40 | self.frontfoot_index = frontfoot_index 41 | self.xvel_index = xvel_index 42 | self.orientation_index = orientation_index 43 | self.actions_ag = actions_ag 44 | self.print_minimal = print_minimal 45 | self.reward_functions = RewardFunctions(self.which_agent, self.x_index, self.y_index, self.z_index, self.yaw_index, 46 | self.joint1_index, self.joint2_index, self.frontleg_index, self.frontshin_index, 47 | self.frontfoot_index, self.xvel_index, self.orientation_index) 48 | 49 | def perform_rollout(self, starting_fullenvstate, starting_observation, starting_observation_NNinput, desired_states, follow_trajectories, 50 | horiz_penalty_factor, forward_encouragement_factor, heading_penalty_factor, noise_actions, noise_amount): 51 | 52 | #lists for saving info 53 | traj_taken=[] #list of states that go into NN 54 | actions_taken=[] 55 | observations = [] #list of observations (direct output of the env) 56 | rewards = [] 57 | agent_infos = [] 58 | env_infos = [] 59 | 60 | #init vars 61 | stop_taking_steps = False 62 | total_reward_for_episode = 0 63 | step=0 64 | curr_line_segment = 0 65 | self.horiz_penalty_factor = horiz_penalty_factor 66 | self.forward_encouragement_factor = forward_encouragement_factor 67 | self.heading_penalty_factor = heading_penalty_factor 68 | 69 | #extend the list of desired states so you don't run out 70 | temp = np.tile(np.expand_dims(desired_states[-1], axis=0), (10,1)) 71 | self.desired_states = np.concatenate((desired_states, temp)) 72 | 73 | #reset env to the given full env state 74 | if(self.which_agent==5): 75 | self.env.reset() 76 | else: 77 | self.env.reset(starting_fullenvstate) 78 | 79 | #current observation 80 | obs = np.copy(starting_observation) 81 | #current observation in the right format for NN 82 | curr_state = np.copy(starting_observation_NNinput) 83 | traj_taken.append(curr_state) 84 | 85 | #select task or reward func 86 | reward_func = self.reward_functions.get_reward_func(follow_trajectories, self.desired_states, horiz_penalty_factor, 87 | forward_encouragement_factor, heading_penalty_factor) 88 | 89 | #take steps according to the chosen task/reward function 90 | while(stop_taking_steps==False): 91 | 92 | #get optimal action 93 | best_action, best_sim_number, best_sequence, moved_to_next = self.get_action(curr_state, curr_line_segment, reward_func) 94 | 95 | #advance which line segment we are on 96 | if(follow_trajectories): 97 | if(moved_to_next[best_sim_number]==1): 98 | curr_line_segment+=1 99 | print("MOVED ON TO LINE SEGMENT ", curr_line_segment) 100 | 101 | #noise the action 102 | action_to_take= np.copy(best_action) 103 | 104 | #whether to execute noisy or clean actions 105 | if(self.actions_ag=='nn'): 106 | noise_actions=True 107 | if(self.actions_ag=='nc'): 108 | noise_actions=True 109 | if(self.actions_ag=='cc'): 110 | noise_actions=False 111 | 112 | clean_action = np.copy(action_to_take) 113 | if(noise_actions): 114 | noise = noise_amount * npr.normal(size=action_to_take.shape)# 115 | action_to_take = action_to_take + noise 116 | action_to_take=np.clip(action_to_take, -1,1) 117 | 118 | #execute the action 119 | next_state, rew, done, env_info = self.env.step(action_to_take, collectingInitialData=False) 120 | 121 | #check if done 122 | if(done): 123 | stop_taking_steps=True 124 | else: 125 | #save things 126 | observations.append(obs) 127 | rewards.append(rew) 128 | env_infos.append(env_info) 129 | total_reward_for_episode += rew 130 | 131 | #whether to save clean or noisy actions 132 | if(self.actions_ag=='nn'): 133 | actions_taken.append(np.array([action_to_take])) 134 | if(self.actions_ag=='nc'): 135 | actions_taken.append(np.array([clean_action])) 136 | if(self.actions_ag=='cc'): 137 | actions_taken.append(np.array([clean_action])) 138 | 139 | #this is the observation returned by taking a step in the env 140 | obs=np.copy(next_state) 141 | 142 | #get the next state (usable by NN) 143 | just_one=True 144 | next_state = from_observation_to_usablestate(next_state, self.which_agent, just_one) 145 | curr_state=np.copy(next_state) 146 | traj_taken.append(curr_state) 147 | 148 | #bookkeeping 149 | if(not(self.print_minimal)): 150 | if(step%100==0): 151 | print("done step ", step, ", rew: ", total_reward_for_episode) 152 | step+=1 153 | 154 | #when to stop 155 | if(follow_trajectories): 156 | if((step>=self.steps_per_episode) or (curr_line_segment>5)): 157 | stop_taking_steps = True 158 | else: 159 | if(step>=self.steps_per_episode): 160 | stop_taking_steps = True 161 | 162 | if(not(self.print_minimal)): 163 | print("DONE TAKING ", step, " STEPS.") 164 | print("Reward: ", total_reward_for_episode) 165 | 166 | mydict = dict( 167 | observations=tensor_utils.stack_tensor_list(observations), 168 | actions=tensor_utils.stack_tensor_list(actions_taken), 169 | rewards=tensor_utils.stack_tensor_list(rewards), 170 | agent_infos=agent_infos, 171 | env_infos=tensor_utils.stack_tensor_dict_list(env_infos)) 172 | 173 | return traj_taken, actions_taken, total_reward_for_episode, mydict 174 | 175 | def get_action(self, curr_nn_state, curr_line_segment, reward_func): 176 | #randomly sample N candidate action sequences 177 | all_samples = npr.uniform(self.env.action_space.low, self.env.action_space.high, (self.N, self.horizon, self.env.action_space.shape[0])) 178 | 179 | #forward simulate the action sequences (in parallel) to get resulting (predicted) trajectories 180 | many_in_parallel = True 181 | resulting_states = self.dyn_model.do_forward_sim([curr_nn_state,0], np.copy(all_samples), many_in_parallel, self.env, self.which_agent) 182 | resulting_states = np.array(resulting_states) #this is [horizon+1, N, statesize] 183 | 184 | #init vars to evaluate the trajectories 185 | scores=np.zeros((self.N,)) 186 | done_forever=np.zeros((self.N,)) 187 | move_to_next=np.zeros((self.N,)) 188 | curr_seg = np.tile(curr_line_segment,(self.N,)) 189 | curr_seg = curr_seg.astype(int) 190 | prev_forward = np.zeros((self.N,)) 191 | moved_to_next = np.zeros((self.N,)) 192 | prev_pt = resulting_states[0] 193 | 194 | #accumulate reward over each timestep 195 | for pt_number in range(resulting_states.shape[0]): 196 | 197 | #array of "the point"... for each sim 198 | pt = resulting_states[pt_number] # N x state 199 | 200 | #how far is the point from the desired trajectory 201 | #how far along the desired traj have you moved since the last point 202 | min_perp_dist, curr_forward, curr_seg, moved_to_next = self.calculate_geometric_trajfollow_quantities(pt, curr_seg, moved_to_next) 203 | 204 | #update reward score 205 | scores, done_forever = reward_func(pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward, curr_seg, 206 | moved_to_next, done_forever, all_samples, pt_number) 207 | 208 | #update vars 209 | prev_forward = np.copy(curr_forward) 210 | prev_pt = np.copy(pt) 211 | 212 | #pick best action sequence 213 | best_score = np.min(scores) 214 | best_sim_number = np.argmin(scores) 215 | best_sequence = all_samples[best_sim_number] 216 | best_action = np.copy(best_sequence[0]) 217 | 218 | 219 | 220 | return best_action, best_sim_number, best_sequence, moved_to_next 221 | 222 | def calculate_geometric_trajfollow_quantities(self, pt, curr_seg, moved_to_next): 223 | 224 | #arrays of line segment points... for each sim 225 | curr_start = self.desired_states[curr_seg] 226 | curr_end = self.desired_states[curr_seg+1] 227 | next_start = self.desired_states[curr_seg+1] 228 | next_end = self.desired_states[curr_seg+2] 229 | 230 | #initialize 231 | min_perp_dist = np.ones((self.N, ))*5000 232 | 233 | ####################################### closest distance from point to current line segment 234 | 235 | #vars 236 | a = pt[:,self.x_index]- curr_start[:,0] 237 | b = pt[:,self.y_index]- curr_start[:,1] 238 | c = curr_end[:,0]- curr_start[:,0] 239 | d = curr_end[:,1]- curr_start[:,1] 240 | 241 | #project point onto line segment 242 | which_line_section = np.divide((np.multiply(a,c) + np.multiply(b,d)), (np.multiply(c,c) + np.multiply(d,d))) 243 | 244 | #point on line segment that's closest to the pt 245 | closest_pt_x = np.copy(which_line_section) 246 | closest_pt_y = np.copy(which_line_section) 247 | closest_pt_x[which_line_section<0] = curr_start[:,0][which_line_section<0] 248 | closest_pt_y[which_line_section<0] = curr_start[:,1][which_line_section<0] 249 | closest_pt_x[which_line_section>1] = curr_end[:,0][which_line_section>1] 250 | closest_pt_y[which_line_section>1] = curr_end[:,1][which_line_section>1] 251 | closest_pt_x[np.logical_and(which_line_section<=1, which_line_section>=0)] = (curr_start[:,0] + 252 | np.multiply(which_line_section,c))[np.logical_and(which_line_section<=1, which_line_section>=0)] 253 | closest_pt_y[np.logical_and(which_line_section<=1, which_line_section>=0)] = (curr_start[:,1] + 254 | np.multiply(which_line_section,d))[np.logical_and(which_line_section<=1, which_line_section>=0)] 255 | 256 | #min dist from pt to that closest point (ie closes dist from pt to line segment) 257 | min_perp_dist = np.sqrt((pt[:,self.x_index]-closest_pt_x)*(pt[:,self.x_index]-closest_pt_x) + 258 | (pt[:,self.y_index]-closest_pt_y)*(pt[:,self.y_index]-closest_pt_y)) 259 | 260 | ####################################### "forward-ness" of the pt... for each sim 261 | curr_forward = which_line_section 262 | 263 | ###################################### closest distance from point to next line segment 264 | 265 | #vars 266 | a = pt[:,self.x_index]- next_start[:,0] 267 | b = pt[:,self.y_index]- next_start[:,1] 268 | c = next_end[:,0]- next_start[:,0] 269 | d = next_end[:,1]- next_start[:,1] 270 | 271 | #project point onto line segment 272 | which_line_section = np.divide((np.multiply(a,c) + np.multiply(b,d)), 273 | (np.multiply(c,c) + np.multiply(d,d))) 274 | 275 | #point on line segment that's closest to the pt 276 | closest_pt_x = np.copy(which_line_section) 277 | closest_pt_y = np.copy(which_line_section) 278 | closest_pt_x[which_line_section<0] = next_start[:,0][which_line_section<0] 279 | closest_pt_y[which_line_section<0] = next_start[:,1][which_line_section<0] 280 | closest_pt_x[which_line_section>1] = next_end[:,0][which_line_section>1] 281 | closest_pt_y[which_line_section>1] = next_end[:,1][which_line_section>1] 282 | closest_pt_x[np.logical_and(which_line_section<=1, which_line_section>=0)] = (next_start[:,0] + 283 | np.multiply(which_line_section,c))[np.logical_and(which_line_section<=1, which_line_section>=0)] 284 | closest_pt_y[np.logical_and(which_line_section<=1, which_line_section>=0)] = (next_start[:,1] + 285 | np.multiply(which_line_section,d))[np.logical_and(which_line_section<=1, which_line_section>=0)] 286 | 287 | #min dist from pt to that closest point (ie closes dist from pt to line segment) 288 | dist = np.sqrt((pt[:,self.x_index]-closest_pt_x)*(pt[:,self.x_index]-closest_pt_x) + 289 | (pt[:,self.y_index]-closest_pt_y)*(pt[:,self.y_index]-closest_pt_y)) 290 | 291 | ############################################ 292 | 293 | #pick which line segment it's closest to, and update vars accordingly 294 | curr_seg[dist<=min_perp_dist] += 1 295 | moved_to_next[dist<=min_perp_dist] = 1 296 | curr_forward[dist<=min_perp_dist] = which_line_section[dist<=min_perp_dist] 297 | min_perp_dist = np.min([min_perp_dist, dist], axis=0) 298 | 299 | return min_perp_dist, curr_forward, curr_seg, moved_to_next -------------------------------------------------------------------------------- /plotting/plot_loss.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false, 8 | "scrolled": false 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "#####################################\n", 13 | "########### TO SPECIFY ##############\n", 14 | "#####################################\n", 15 | "\n", 16 | "save_dir = '../run_1001'\n", 17 | "\n", 18 | "#####################################\n", 19 | "#####################################\n", 20 | "\n", 21 | "\n", 22 | "import numpy as np\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "%matplotlib inline\n", 25 | "\n", 26 | "training_loss = np.load(save_dir + '/losses/list_training_loss.npy')\n", 27 | "onestep_val_loss = np.load(save_dir + '/errors_1_per_agg.npy')\n", 28 | "fivestep_val_loss = np.load(save_dir + '/errors_5_per_agg.npy')\n", 29 | "tenstep_val_loss = np.load(save_dir + '/errors_10_per_agg.npy')\n", 30 | "fiftystep_val_loss = np.load(save_dir + '/errors_50_per_agg.npy')\n", 31 | "hundredstep_val_loss = np.load(save_dir + '/errors_100_per_agg.npy')\n", 32 | "\n", 33 | "plt.figure()\n", 34 | "plt.ylabel(\"Loss\")\n", 35 | "plt.title(\"Training Loss\")\n", 36 | "plt.xlabel(\"Aggregation Iteration\")\n", 37 | "plt.plot(training_loss)\n", 38 | "\n", 39 | "plt.figure()\n", 40 | "plt.ylabel(\"Loss\")\n", 41 | "plt.title(\"1-step Validation Loss\")\n", 42 | "plt.xlabel(\"Aggregation Iteration\")\n", 43 | "plt.plot(onestep_val_loss)\n", 44 | "\n", 45 | "plt.figure()\n", 46 | "plt.ylabel(\"Loss\")\n", 47 | "plt.title(\"5-step Validation Loss\")\n", 48 | "plt.xlabel(\"Aggregation Iteration\")\n", 49 | "plt.plot(fivestep_val_loss)\n", 50 | "\n", 51 | "plt.figure()\n", 52 | "plt.ylabel(\"Loss\")\n", 53 | "plt.title(\"10-step Validation Loss\")\n", 54 | "plt.xlabel(\"Aggregation Iteration\")\n", 55 | "plt.plot(tenstep_val_loss)\n", 56 | "\n", 57 | "plt.figure()\n", 58 | "plt.ylabel(\"Loss\")\n", 59 | "plt.title(\"50-step Validation Loss\")\n", 60 | "plt.xlabel(\"Aggregation Iteration\")\n", 61 | "plt.plot(fiftystep_val_loss)\n", 62 | "\n", 63 | "plt.figure()\n", 64 | "plt.ylabel(\"Loss\")\n", 65 | "plt.title(\"100-step Validation Loss\")\n", 66 | "plt.xlabel(\"Aggregation Iteration\")\n", 67 | "plt.plot(hundredstep_val_loss)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": { 74 | "collapsed": true 75 | }, 76 | "outputs": [], 77 | "source": [] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "collapsed": true 84 | }, 85 | "outputs": [], 86 | "source": [] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "collapsed": true 93 | }, 94 | "outputs": [], 95 | "source": [] 96 | } 97 | ], 98 | "metadata": { 99 | "anaconda-cloud": {}, 100 | "celltoolbar": "Raw Cell Format", 101 | "kernelspec": { 102 | "display_name": "Python 3", 103 | "language": "python", 104 | "name": "python3" 105 | }, 106 | "language_info": { 107 | "codemirror_mode": { 108 | "name": "ipython", 109 | "version": 3 110 | }, 111 | "file_extension": ".py", 112 | "mimetype": "text/x-python", 113 | "name": "python", 114 | "nbconvert_exporter": "python", 115 | "pygments_lexer": "ipython3", 116 | "version": "3.5.2" 117 | }, 118 | "widgets": { 119 | "state": {}, 120 | "version": "1.1.2" 121 | } 122 | }, 123 | "nbformat": 4, 124 | "nbformat_minor": 1 125 | } 126 | -------------------------------------------------------------------------------- /plotting/plot_mbmf.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | import numpy as np 4 | from numpy import genfromtxt 5 | import joblib 6 | import pandas 7 | import argparse 8 | 9 | 10 | ###################### 11 | ## ARGUMENTS TO SPECIFY 12 | ###################### 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--run_nums', type=int, nargs='+', default=-5) 16 | parser.add_argument('--seeds', type=int, nargs='+', default=-5) 17 | parser.add_argument('--which_agent', type=int, default=1) 18 | parser.add_argument('--std_on_mlp_policy', type=float, default=0.5) 19 | parser.add_argument('--batchsize_TRPO_mf', type=int, default=50000) 20 | parser.add_argument('--batchsize_TRPO_mbmf', type=int, default=50000) 21 | parser.add_argument('--dont_include_mbmfTRPO', action="store_true", dest='dont_include_mbmfTRPO', default=False) 22 | parser.add_argument('--trpo_dir', type=str, default='/home/anagabandi/rllab/data/local/experiment/') 23 | args = parser.parse_args() 24 | 25 | ###################### 26 | ## vars 27 | ###################### 28 | 29 | #save args 30 | which_agent = args.which_agent 31 | std_on_mlp_policy = args.std_on_mlp_policy 32 | batchsize_TRPO_mf = args.batchsize_TRPO_mf 33 | batchsize_TRPO_mbmf = args.batchsize_TRPO_mbmf 34 | 35 | #agent name 36 | if(which_agent==2): 37 | agent_name='Swimmer' 38 | if(which_agent==4): 39 | agent_name='Cheetah' 40 | if(which_agent==6): 41 | agent_name='Hopper' 42 | batchsize_TRPO_mbmf= 25000 43 | if(which_agent==1): 44 | agent_name='Ant' 45 | 46 | #plotting vars 47 | plt.rc('text', usetex=True) 48 | plt.rc('font', family='serif', size=10) 49 | sns.set(font_scale=1) 50 | format = 'png' 51 | dpi=200 52 | 53 | ############################ 54 | ## vars that depend on number of runs 55 | ############################ 56 | 57 | #seeds 58 | how_many_seeds= len(args.seeds) 59 | 60 | #run numbers for MB and imitation learning data 61 | run_num1 = args.run_nums[0] 62 | run_num2 = args.run_nums[0] 63 | run_num3 = args.run_nums[0] 64 | if(how_many_seeds==2): 65 | run_num1 = args.run_nums[0] 66 | run_num2 = args.run_nums[1] 67 | run_num3 = args.run_nums[1] 68 | if(how_many_seeds==3): 69 | run_num1 = args.run_nums[0] 70 | run_num2 = args.run_nums[1] 71 | run_num3 = args.run_nums[2] 72 | 73 | #filenames for MBMF TRPO 74 | mbmf_filename_numbers = [1,1,1] 75 | if(how_many_seeds==2): 76 | mbmf_filename_numbers = [1,2,2] 77 | if(how_many_seeds==3): 78 | mbmf_filename_numbers = [1,2,3] 79 | 80 | #filenames for MF TRPO 81 | mf_filename_numbers = ['_seed_'+str(args.seeds[0])+'_mf_run1','_seed_'+str(args.seeds[0])+'_mf_run1','_seed_'+str(args.seeds[0])+'_mf_run1'] 82 | if(how_many_seeds==2): 83 | mf_filename_numbers = ['_seed_'+str(args.seeds[0])+'_mf_run1','_seed_'+str(args.seeds[1])+'_mf_run2','_seed_'+str(args.seeds[1])+'_mf_run2'] 84 | if(how_many_seeds==3): 85 | mf_filename_numbers = ['_seed_'+str(args.seeds[0])+'_mf_run1','_seed_'+str(args.seeds[1])+'_mf_run2','_seed_'+str(args.seeds[2])+'_mf_run3'] 86 | 87 | ###################### 88 | ## load in data 89 | ###################### 90 | 91 | #TRPO filenames to load in 92 | pathname_mbmf1 = trpo_dir + 'run_'+ str(run_num1)+'_std'+str(std_on_mlp_policy) + '_run' +str(mbmf_filename_numbers[0]) 93 | pathname_mbmf2 = trpo_dir + 'run_'+ str(run_num2)+'_std'+str(std_on_mlp_policy) + '_run' +str(mbmf_filename_numbers[1]) 94 | pathname_mbmf3 = trpo_dir + 'run_'+ str(run_num3)+'_std'+str(std_on_mlp_policy) + '_run' +str(mbmf_filename_numbers[2]) 95 | 96 | #mf trpo runs 97 | pathname_mf1 = trpo_dir + 'agent_'+str(which_agent)+ mf_filename_numbers[0] 98 | pathname_mf2 = trpo_dir + 'agent_'+str(which_agent)+ mf_filename_numbers[1] 99 | pathname_mf3 = trpo_dir + 'agent_'+str(which_agent)+ mf_filename_numbers[2] 100 | 101 | #load in MB 102 | MB_list_num_datapoints_run1 = np.load('../run_'+ str(run_num1) + '/datapoints_MB.npy') 103 | MB_list_avg_rew_run1 = np.load('../run_'+ str(run_num1) + '/performance_MB.npy') 104 | MB_list_num_datapoints_run2 = np.load('../run_'+ str(run_num2) + '/datapoints_MB.npy') 105 | MB_list_avg_rew_run2 = np.load('../run_'+ str(run_num2) + '/performance_MB.npy') 106 | MB_list_num_datapoints_run3 = np.load('../run_'+ str(run_num3) + '/datapoints_MB.npy') 107 | MB_list_avg_rew_run3 = np.load('../run_'+ str(run_num3) + '/performance_MB.npy') 108 | 109 | #load in imitation 110 | imit_list_num_datapoints_run1 = np.load('../run_'+ str(run_num1) + '/datapoints_IMIT.npy') 111 | imit_list_avg_rew_run1 = np.load('../run_'+ str(run_num1) + '/performance_IMIT.npy') 112 | imit_list_num_datapoints_run2 = np.load('../run_'+ str(run_num2) + '/datapoints_IMIT.npy') 113 | imit_list_avg_rew_run2 = np.load('../run_'+ str(run_num2) + '/performance_IMIT.npy') 114 | imit_list_num_datapoints_run3 = np.load('../run_'+ str(run_num3) + '/datapoints_IMIT.npy') 115 | imit_list_avg_rew_run3 = np.load('../run_'+ str(run_num3) + '/performance_IMIT.npy') 116 | 117 | ###################### 118 | ## MB 119 | ###################### 120 | 121 | #performance 122 | mb_run1= MB_list_avg_rew_run1[:6] 123 | mb_run2= MB_list_avg_rew_run2[:6] 124 | mb_run3= MB_list_avg_rew_run3[:6] 125 | 126 | #datapoints 127 | mb_num_data = MB_list_num_datapoints_run1[:6] 128 | 129 | #mean and std of performance 130 | mb_y = np.array([mb_run1, mb_run2, mb_run3]) 131 | mb_mean = mb_y.mean(axis=0) 132 | mb_std = mb_y.std(axis=0) 133 | 134 | 135 | ###################### 136 | ## MBMF 137 | ###################### 138 | 139 | if(args.dont_include_mbmfTRPO): 140 | #performance 141 | mbmf_run1 = np.concatenate([mb_run1, imit_list_avg_rew_run1]) 142 | mbmf_run2 = np.concatenate([mb_run2, imit_list_avg_rew_run2]) 143 | mbmf_run3 = np.concatenate([mb_run3, imit_list_avg_rew_run3]) 144 | 145 | #datapoints 146 | mbmf_num_data = np.concatenate([mb_num_data, imit_list_num_datapoints_run1]) 147 | 148 | #mean and std of performance 149 | mbmf_y = np.array([mbmf_run1, mbmf_run2, mbmf_run3]) 150 | mbmf_mean = mbmf_y.mean(axis=0) 151 | mbmf_std = mbmf_y.std(axis=0) 152 | else: 153 | #performance 154 | mbmf_run1_orig = np.array(pandas.read_csv(pathname_mbmf1+'/progress.csv')['AverageReturn']) 155 | mbmf_run2_orig = np.array(pandas.read_csv(pathname_mbmf2+'/progress.csv')['AverageReturn']) 156 | mbmf_run3_orig = np.array(pandas.read_csv(pathname_mbmf3+'/progress.csv')['AverageReturn']) 157 | 158 | mbmf_cutoff= np.min([mbmf_run1_orig.shape, mbmf_run2_orig.shape, mbmf_run3_orig.shape]) #make them all the same (min) length 159 | mbmf_run1_orig = mbmf_run1_orig[:mbmf_cutoff] 160 | mbmf_run2_orig = mbmf_run2_orig[:mbmf_cutoff] 161 | mbmf_run3_orig = mbmf_run3_orig[:mbmf_cutoff] 162 | 163 | mbmf_run1 = np.concatenate([mb_run1, imit_list_avg_rew_run1, mbmf_run1_orig]) 164 | mbmf_run2 = np.concatenate([mb_run2, imit_list_avg_rew_run2, mbmf_run2_orig]) 165 | mbmf_run3 = np.concatenate([mb_run3, imit_list_avg_rew_run3, mbmf_run3_orig]) 166 | 167 | #datapoints 168 | datapoints_used_thus_far = imit_list_num_datapoints_run1[-1] 169 | mbmf_num_data_orig = batchsize_TRPO_mbmf*np.arange(mbmf_run1_orig.shape[0]+1)[1:] + datapoints_used_thus_far 170 | mbmf_num_data = np.concatenate([mb_num_data, imit_list_num_datapoints_run1, mbmf_num_data_orig]) 171 | 172 | #mean and std of performance 173 | mbmf_y = np.array([mbmf_run1, mbmf_run2, mbmf_run3]) 174 | mbmf_mean = mbmf_y.mean(axis=0) 175 | mbmf_std = mbmf_y.std(axis=0) 176 | 177 | print("MB datapoints: ", mb_num_data) 178 | print("MBMF datapoints: ", imit_list_num_datapoints_run1) 179 | 180 | ###################### 181 | ## MF 182 | ###################### 183 | 184 | #performance 185 | 186 | mf_run1 = pandas.read_csv(pathname_mf1+'/progress.csv')['AverageReturn'] 187 | mf_run2 = pandas.read_csv(pathname_mf2+'/progress.csv')['AverageReturn'] 188 | mf_run3 = pandas.read_csv(pathname_mf3+'/progress.csv')['AverageReturn'] 189 | 190 | mf_cutoff = np.min([mf_run1.shape, mf_run2.shape, mf_run3.shape]) #make them all the same (min) length 191 | mf_run1=mf_run1[:mf_cutoff] 192 | mf_run2=mf_run2[:mf_cutoff] 193 | mf_run3=mf_run3[:mf_cutoff] 194 | 195 | #datapoints 196 | mf_num_data = batchsize_TRPO_mf*np.arange(mf_run1.shape[0]+1)[1:] 197 | 198 | #mean and std of performance 199 | mf_y = np.array([mf_run1, mf_run2, mf_run3]) 200 | mf_mean = mf_y.mean(axis=0) 201 | mf_std = mf_y.std(axis=0) 202 | 203 | ###################### 204 | ## PLOT 205 | ###################### 206 | 207 | fig, ax = plt.subplots(figsize=(7,3)) 208 | 209 | if(mb_num_data.shape[0]==1): 210 | ax.plot([mb_num_data[0],mb_num_data[0]], [0, mb_mean[0]], linewidth=2, color='g', label='Mb') 211 | else: 212 | ax.plot(mb_num_data, mb_mean, color='g', label='Mb') 213 | ax.fill_between(mb_num_data, mb_mean - mb_std, mb_mean + mb_std, color='g', alpha=0.25) 214 | 215 | ax.plot(mf_num_data, mf_mean, color='b', label='Mf') 216 | ax.fill_between(mf_num_data, mf_mean - mf_std, mf_mean + mf_std, color='b', alpha=0.25) 217 | 218 | ax.plot(mbmf_num_data, mbmf_mean, color='r', label='Mb-Mf (ours)', linewidth=0.5) 219 | ax.fill_between(mbmf_num_data, mbmf_mean - mbmf_std, mbmf_mean + mbmf_std, color='r', alpha=0.25) 220 | 221 | ax.hlines(mf_mean.max(), np.min([mb_num_data[0],mf_num_data[0]]), mf_num_data[-1], color='k', linestyle='--') 222 | 223 | ax.semilogx() 224 | ax.grid(True,which="both",ls="-") 225 | ax.set_xlabel('Steps') 226 | ax.set_ylabel('Cumulative Reward') 227 | ax.set_title(agent_name) 228 | 229 | ax.legend(loc='lower right') 230 | fig.savefig(agent_name+'_comparison.png', dpi=200, bbox_inches='tight') 231 | plt.close(fig) -------------------------------------------------------------------------------- /plotting/plot_trajfollow.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 11, 6 | "metadata": { 7 | "collapsed": false, 8 | "scrolled": false 9 | }, 10 | "outputs": [ 11 | { 12 | "name": "stdout", 13 | "output_type": "stream", 14 | "text": [ 15 | "(40, 2)\n", 16 | "(1, 1701, 16)\n" 17 | ] 18 | }, 19 | { 20 | "data": { 21 | "text/plain": [ 22 | "[]" 23 | ] 24 | }, 25 | "execution_count": 11, 26 | "metadata": {}, 27 | "output_type": "execute_result" 28 | }, 29 | { 30 | "data": { 31 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhcAAAFyCAYAAABGCPg8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3Xd8VFX6x/HPQ5WigKIg9gqsgpAoiiii2HXtirHXtYux\n/9aKvWNB1o64aNbeCy6isisgkNhAUVGsNAENgiAkOb8/nskmhPTcOyV836/XvGbmzrlzn1yG5Jlz\nz3mOhRAQERERiUqTVAcgIiIijYuSCxEREYmUkgsRERGJlJILERERiZSSCxEREYmUkgsRERGJlJIL\nERERiZSSCxEREYmUkgsRERGJlJILERERiZSSCxEREYmUkgsRERGJlJILERERiZSSCxGpNzNbw8y+\nSNxaltvewcxmm9l/zewUMysxs+0q2f/vZlZkZusnN3IRiZOSCxGptxDCMuBEYEvgxnIvDQfWTLz2\nDLAUOLaStzgGGBtCmB1zqCKSREouRKRBQgiTgNuAwWbWz8yOAAYBl4cQvgkhLAZeAnLK72dmvYG/\nAP9MdswiEi8LIaQ6BhHJcGbWHJiM91a0BaaGEAaWe30f4A1gzxDCu4ltdwBnAp1CCEuSH7WIxEXJ\nhYhEwsyy8QRjKfCXEML35V5rAvwIvBlCOM3MLPH8vRDCcSkJWERio8siIhKVfRP3awBblX8hhFAC\nPAUcbmYtgD2ALsCopEYoIkmhngsRaTAz6wlMwpOFXkBHoEcI4fdybXoAHwNHAfsDBwBdEomHiDQi\nSi5EpEHMrBmeWKwFbAdsjl8eGRVCOK1C24+BOcBOwIgQQm6SwxWRJNBlERFpqKuAnsApIYQlIYTP\ngOuAU8xsvwptnwD2xgd+PpncMEUkWdRzISL1lphOOhEYXr4XIjGAcwI+rmKbEMKixPZOwE/A1yGE\nv6QgZBFJgmapDkBEMlcI4SOgZSXbS4AdK9mlCAhoIKdIoxb7ZREzO8fMZprZUjObaGY71HK/fma2\nwswK4o5RRJLmZPz3jpILkUYs1uTCzAYBdwLXAL2BT4DRZtaxhv3aASOBMXHGJyLJYWa7m9m5wN+B\nF0MIP6Q6JhGJT6xjLsxsIvBhCGFw4nlp4Zx7Qwi3VbNfHvAVUAIcHELIii1IEYmdmb0L9AX+Cxyv\ntUREGrfYei4S5YCzgXdKtwXPZMbgv2Sq2u9kYDNgSFyxiUhyhRB2DyGsEULYU4mFSOMX54DOjkBT\nYG6F7XOBrpXtYGZbATcBu4QQSryjo3pmtg6wD/AdsKwB8YqIiKxu1gA2BUaHEBZE9aZpM1skMXXt\nSeCaEMI3pZtrses+aL68iIhIQxyLl+iPRJzJxXygGOhUYXsnvEJfRWsC2wO9zOz+xLYm+FCN5cDe\nIYT3KtnvO4BRo0bRvXv3CMKW2sjNzWXo0KGpDmO1onOefDrnyadznlxffPEFxx13HCT+lkYltuQi\nhLDCzPKBgcAr8L8BnQOBeyvZZRGwbYVt5wC7A4dT9Q++DKB79+5kZWncZ7K0a9dO5zvJdM6TT+c8\n+XTOUybSYQVxXxa5C3g8kWRMAnKB1sDjAGZ2M75w0YmJwZ6fl9/ZzOYBy0IIX8Qcp4iIiEQk1uQi\nhPBMoqbFdfjlkI+BfUIIvySadAY2ijMGERERSa7YB3SGEIYDw6t47eQa9h2CpqSKiIhkFK2KKvWS\nk5OT6hBWOzrnyadznnw6541Dxq+KamZZQH5+fr4GAYmIiNRBQUEB2dnZANkhhMjW8lLPhYiIiERK\nyYWIiIhESsmFiIiIRErJhYiIiERKyYWIiIhESsmFiIiIRErJhYiIiERKyYWIiIhESsmFiIiIRErJ\nhYiISBwyvAJ2Qyi5EBERido330DfvjBjRqojSYnYV0UVERFZrSxdCkccAYsXw7rrpjqalFByISIi\nEqXBg2H6dJg4Edq1S3U0KaHkQkREJCojR8LDD8Ojj8J226U6mpTRmAsREZEofPYZnHUWnHwynHJK\nqqNJKSUXIiIiUfjpJ8jOhmHDUh1Jyim5EBERicJ++8G4cdC6daojSTklFyIiIlExS3UEaUHJhYiI\niERKyYWIiIhESsmFiIiIRErJhYiIiERKyYWIiEhdffstPPssFBenOpK0pORCRESkrkaMgL/9DZYv\nT3UkaUnJhYiISF09/zwcdBC0apXqSNKSkgsREZG6+OILvx1+eKojSVtKLkREROrihRegbVvYe+9U\nR5K2lFyIiIjUxYQJsMsusMYaqY4kbSm5EBERqYtPP12tl1OvDSUXIiIitfXbb/Djj9CzZ6ojSWtK\nLkRERGrr119hjz2gd+9UR5LWYk8uzOwcM5tpZkvNbKKZ7VBN235m9l8zm29mf5jZF2Z2QdwxioiI\n1Mpmm8E770D37qmOJK01i/PNzWwQcCfwN2ASkAuMNrOtQwjzK9llCXAf8Gni8S7AQ2a2OITwSJyx\nioiIJMXvv8OUKfDJJ355ZY89Uh1R5GJNLvBk4sEQwhMAZnYmcABwCnBbxcYhhI+Bj8ttesrMDgd2\nBZRciIhI5lmwAD74AN5/H8aNg4ICKCmBpk1h1KhURxeL2JILM2sOZAM3lW4LIQQzGwP0reV79E60\nvSKWIEVEROJ0xBFezRNgo41gt928bPjOO0PXrp5gNEJx9lx0BJoCcytsnwt0rW5HM/sRWDex/7Uh\nhBGxRCgiIhKnY46BQw7xuhibbAJmqY4oKeK+LFJfuwBtgZ2AW81sRgjh6RTHJCIiUjeHHZbqCFIi\nzuRiPlAMdKqwvRMwp7odQwjfJx5OM7POwLVAtclFbm4u7dq1W2lbTk4OOTk5dQhZRESkccrLyyMv\nL2+lbYWFhbEcy0IIsbwxgJlNBD4MIQxOPDfgB+DeEMLttXyPq4GTQgibV/F6FpCfn59PVlZWRJGL\niIhUUFTklzUa0TiJgoICsrOzAbJDCAVRvW/cdS7uAk43sxPMrBvwANAaeBzAzG42s5Gljc3sbDM7\n0My2TNxOBS4C/hlznCIiItV78UVo3tyrdJZavhxGj677e61YAddc49U+G6FYk4sQwjPAxcB1wEdA\nT2CfEMIviSadgY0qxHNzou1k4CzgkhDCNXHGKSIiUqMWLSAEyMuDP/+EOXPgyCPh4IPhp5/q9l7v\nvAPXXQcLF8YTa4rFPqAzhDAcGF7FaydXeD4MGBZ3TCIiInWWnQ0dO8LZZ8PgwVBcDO3bw3PPwYYb\n1u29nnkGtt660a5Rkq6zRURERNLLhhvCL7/A55/D2LHQqpXPBunQoW7v8/vvnpDk5jbaqalKLkRE\nROriL3/xW32NGgVLlsBpp0UXU5rRqqgiIiLJEgIMG+aFtTbaqOb2GUrJhYiISLK8955fVjn33FRH\nEislFyIiIskybJhfUhkwINWRxEpjLkRERJLl4oth8eJGO5CzlJILERGRZOlbq0XBM54ui4iIiEik\nlFyIiIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRciIiIxGnFilRHkHRKLkREROKyfLmvpPrUU6mO\nJKmUXIiIiMSloAAWLYKttkp1JEml5EJERCQur74KbdpAr16pjiSplFyIiIhEraQEbr0V7rwTzjoL\nmjdPdURJpeRCREQkao89BpdfDscfD9dck+pokk5ri4iIiETtnnvgsMPg4YdTHUlKqOdCREQkSrNn\nw9SpMGhQqiNJGfVciIiIRKl9ex/IuZqsgFoZJRciIiJRatUKDjww1VGklC6LiIiISKSUXIiIiMSl\npARmzEh1FEmn5EJERCQu114L/fp5GfDViJILERGRuBx+OMybB++9l+pIkkrJhYiISFx69oRNN4WX\nXkp1JEml5EJERCQuZnDIIfDKKz7+YjWh5EJERKQhQoATT4QnnoDzzoPJk1d+/eCD4eefIT8/NfGl\ngJILERGRhrj+ek8sZs2CYcNg+PCVX99lF1+47JZbPBFZDSi5EBERqa9nn/WFya6/HoqLfduECWWP\nAZo1g7XXhhdegGXLUhNnkqlCp4iISH1MmeKXQ3Jy4MILoU0b3/7669C06cptCwq8JHirVsmPMwWU\nXIiIiNTVrFk+lqJHD3j0Ubj6at9+zDGwxRartu/SBc44I7kxppAui4iIiNRFSQkcf7w/fuklWLwY\n7rjDn991V+riSiOxJxdmdo6ZzTSzpWY20cx2qKbtoWb2tpnNM7NCMxtvZnvHHaOIiEitffCBF8Ua\nORLWXx9GjPDt3btDp04pDS1dxJpcmNkg4E7gGqA38Akw2sw6VrFLf+BtYD8gC3gXeNXMtoszThER\nkVrbdVf46ivYc09//sQTfj9oUOpiSjNx91zkAg+GEJ4IIUwHzgT+AE6prHEIITeEcEcIIT+E8E0I\n4Qrga+CvMccpIiJSe+XHVZx3nt8fckhqYklDsQ3oNLPmQDZwU+m2EEIwszFA31q+hwFrAgtjCVJE\nRKQ+QvDqm+AzRrp0ge3UyV4qzp6LjkBTYG6F7XOBzrV8j0uANsAzEcYlIiJSfyUlvl7IqFH+fI01\n4K/qYC8vbaeimtkxwFXAQSGE+TW1z83NpV27ditty8nJIScnJ6YIRURktTR1KvzwA2y0UaojqZO8\nvDzy8vJW2lZYWBjLseJMLuYDxUDFobOdgDnV7WhmRwMPAUeEEN6tzcGGDh1KVlZWfeIUERGpvXHj\noEUL6NMn1ZHUSWVfuAsKCsjOzo78WLFdFgkhrADygYGl2xJjKAYC46vaz8xygEeBo0MIb8UVn4iI\nSL2MG+eJxWpSbbM+4p4tchdwupmdYGbdgAeA1sDjAGZ2s5mNLG2cuBQyErgImGxmnRK3tWKOU0RE\npGpjx8K0aT6Qc9w4vyTSrRt89FGqI0tLsSYXIYRngIuB64CPgJ7APiGEXxJNOgPlL1qdjg8CvR+Y\nVe52d5xxioiIVOu887wK55w5MHeuT0X98stUR5W2Yh/QGUIYDgyv4rWTKzzfPe54RERE6uTnn+Hz\nz2HIEB/ICWWXRNZbL3VxpTGtLSIiIlKd0ksfO+wACxb446Iir3Ohct+VUnIhIiJSnYkTvYdi440h\nOxtefNGTi3XXhWZpW9EhpZRciIiIVGfCBOjbt6yn4pBDYP58X7RMKqXkQkREpCrFxTBpEuy008rb\nZ8/2kt9SKSUXIiIiVZk2DRYv9p6L8mbNUs9FNXSxSEREpCrz5sHmm8P226+8/eyzNZizGkouRERE\nqrLnnvDNN6tuP/HE5MeSQXRZREREpDZ+/tmLaEmNlFyIiIjUxp13+jiLf/871ZGkPSUXIiIitTF0\nqN8fdVRq48gASi5ERERqUv5yyG+/pS6ODKHkQkREpCZjx5Y97tUrdXFkCCUXIiIiFU2YAK+/Xvb8\njTfKHnfunPx4MoySCxERkYqGDIGLLvLHl10GTz4Ja6zhzydMgBBSF1sGUHIhIiJS0ejR8OWXsGIF\n3Habbxs3zu8LC32dEamSimiJiIiUt2yZ37doAe++W7Y9OxuaN4ett05NXBlEPRciIiLlPfSQ33fo\nAE8/DVtsAUuXQpMmMHUqfPhhauPLAEouREREyrvgAr8/6SR44QUYNKhsvMXWW0ObNikLLVMouRAR\nESn18MNlgzULC72mxaBBqY0pAym5EBERKfW3v5U9/vhj6NYNevRIXTwZSsmFiIhIZb780nstNDOk\nzjRbREREBOCHH1Z+npcHffqkJpYMp+RCREQEVi7xPWIE7LNP6mLJcLosIiIiAtCzZ9njQw5JXRyN\ngHouREREALKy4Ndfvbx3+/apjiajqedCRERWL88/D3feWfn6IO3bw377JT+mRkbJhYiIrD6WLYOz\nzvLeCYmNkgsREVl9PP00/PIL3HyzppjGSMmFiIhkvuXLoaio+jYhwH33wb77wlZbJSeu1ZSSCxER\nyXwtW8Iuu1Tf5q23ID8fBg9OTkyrMc0WERGRzLRokScLpauUrrVW5e2mTYM33oAHH4T+/VW/IgmU\nXIiISOZZuBC6d4d588q2Pfts5W2//BIuvRQ23xyGDdNYiyRQciEiIpln7bXh1luhpAROPRWOPhra\ntau87WGH+ZiMZs2UWCRJ7GMuzOwcM5tpZkvNbKKZ7VBN285m9qSZfWlmxWZ2V9zxiYhIhjrpJJ/1\nAXD99dW3bd5ciUUSxZpcmNkg4E7gGqA38Akw2sw6VrFLS2AecD3wcZyxiYhIhgsBZszwx1tumdpY\nZCVx91zkAg+GEJ4IIUwHzgT+AE6prHEI4fsQQm4IYRSwKObYREQkE/36KxQUwHvv+fP9909pOLKq\n2JILM2sOZAPvlG4LIQRgDNA3ruOKiEgjN2QIDBgA77/vlzrOOAO22AIKC70347ffYOnSVEe5Wouz\n56Ij0BSYW2H7XKBzjMcVEZHG6tNPfcbHFVfAtdfC/Pk+BfXbb+GUU7w4VocO0LYt3H13qqNdbTWa\n2SK5ubm0qzBSOCcnh5ycnBRFJCIikfj2W3juOe+luOUW2HRTuOACf23ttWHddf3xCy/AoEGw667e\ng3HJJXDmmbDGGikLPZ3k5eWRl5e30rbCwsJYjhVncjEfKAY6VdjeCZgT9cGGDh1KVlZW1G8rIiLJ\nMm0aXHYZzJoFe+8N554LG24Ixx8P48eXtevTB5o2LXu+4YZ+f/fdMHmyJyKPPAIjR8LPP/slE6n0\nC3dBQQHZ2dmRHyu2yyIhhBVAPjCwdJuZWeL5+Kr2ExGR1VBxsScRn38OPXrAP/4Bm23mvRWlicX6\n60ObNr594429LXh9i2239d6Mf/0LHnoIttnG961sWXWJXdyzRe4CTjezE8ysG/AA0Bp4HMDMbjaz\nkeV3MLPtzKwX0BZYN/G8e8xxiohIKi1eDHvt5cnByJHw449wwgkrt5k928dcTJ3qj7fd1i+TAAwf\nDrm5PoMkJ8dfe/VVTVFNEQsxZ3VmdjZwKX455GPgvBDClMRrI4BNQgh7lGtfAlQM6vsQwuZVvH8W\nkJ+fn6/LIiIijcXnn3vvA8A668CCBZW323ln+M9/oEkT+OMP79m45x44//zkxZrByl0WyQ4hFET1\nvrFX6AwhDA8hbBpCaBVC6FuaWCReO7l8YpHY1iSE0LTCrdLEQkREGqHp02GPxJ+Gpk3hzz9hgw1W\nbvPSS35/3XWeWAB88IHfb7xxcuKUKmnJdRERSR8zZsDuu/sMkFdegY4dvTdi2rSyolkDBsCcOZ54\n7LRT2b5vvQVdusDBB6cicimn0UxFFRGRDLd4MRx0kNeteOcdaN3aa1ZceqkP2lyUKNzcrJkP8uzV\nyxOPUq++CgceqDVE0oCSCxERSQ+tW8Nxx8Hhh8N66/m2Tz8tm3a6ZIknDi+/DD17rlz2+8sv4euv\n4S6td5kOdFlERETSQ5Mm8Pe/Q9euZduaNy8bU3H00b7E+u+/wzffQL9+Ze1eeQVatYKBA5HUU3Ih\nIiKZ5cMP/b5vuWWqXn0V9tzTEwxJOSUXIiKS3kpKysZbgK+Iuu66sNFG/ryoyBcr++tfUxOfrELJ\nhYiIpLenn/ZiWPPm+fPWrX3gZ+nAzWbN4KOPVi26JSmjAZ0iIpK+QoA77oDs7LJBnpdeumq7O+7w\nFVEPOyy58Uml1HMhIiLpIy/PVzYtrR49aZJfBjn33LI2N9zgy66X+v57Hwj6yy/JjVWqpJ4LERFJ\nH8cc4/ctWsD8+V77ols32HffsjZjxnjFztKE44EHoG1bOPbY5McrlVJyISIi6aGwsOzxqFF+v+aa\n8PzzKy+xvnChr5wKnoDcfz+ccYYnGJIWlFyIiEh6KC3vfeaZsOOOXsa7WTNPMEqVlHjp73XW8ceX\nXOKXUCobhyEpo+RCRETSQ+nCY+efD927V97mhRd8bEWfPnDSSd7D8dhjPjVV0oaSCxERSa4Q4LXX\n4JNP4JBDYNttffvgwb7wWLdule+3ZAkceaQ/PuAAX1dk1KiycRqSNpRciIhIcl13HVx7rScHG2xQ\nllxssAFccEHl+/z0U1nRrMGDfYDn1lvD5psnJWSpGyUXIiKSPB984InFkCFwxRU+bqImX3wBe+xR\n9vyuu8rWG5G0pORCRESSo6QEcnO9INaVV3qCUH4WSGWKirzyZrt2MG6cT1FVYpH2lFyIiEhyvPQS\nTJ7ss0JqmyDceacX0Ro/3itwSkZQciEiIsmRlwfbbw+77VZz2yVLYNo0GD7cezt23DH++CQySi5E\nRCR+IfhS6aefXnPb33/32SDffguffQYtW8Yfn0RKyYWIiMTPDGbOhGXLqm83fTocfzx89RW89RZ0\n6JCc+CRSGhUjIiLJ0bSpTz8t788/veLmihVw223Qq5eXAR87Fvr2TU2c0mDquRARkdR56y049FBf\nF2TJErjwQq+D0apVqiOTBlByISIiqdO/PzzyCMye7dU6t9km1RFJBJRciIhI6nToAKeckuooJGIa\ncyEiIiKRUnIhIiLp4euvYc894ccfUx2JNJCSCxERSQ+5uTBjBnTsmOpIpIE05kJEROpvyhR4+WVY\nbz3o2RN69IC1167dviF4SfC334YHHvBtDz+smSKNgJILERGpv4ICn+2xcCEsX+7bNtgAttvO61Qc\nckjZkurlFRfDaafB44+vvGz6SSclI2qJmS6LiIhI/Z12mk8jXbwYpk719UNOOMF7JW6/3WeChLDy\nPr/95vs98YTfLrjAtxcUQDN9520M9K8oIiL1V7q6afPmXqNim23g6KN92/Ll8OuvXvq71Gef+eUT\ngJEjvWdjq628x6J376SGLvFRciEiIvFo0QI6dVp52x9/lD0+4QS4/HIv933ttUkNTeIV+2URMzvH\nzGaa2VIzm2hmO9TQfoCZ5ZvZMjP7ysxOjDtGERFJgjlzvLw3+IyQFSu8V+PKK2GTTVIbm0Qq1uTC\nzAYBdwLXAL2BT4DRZlbpPCMz2xR4DXgH2A64B3jEzPaKM04REYnZlCmwww6+Muqjj8KCBX5/881w\nxRWpjk4iFnfPRS7wYAjhiRDCdOBM4A+gqlqvZwHfhhAuDSF8GUK4H3gu8T4iIpKJnngCdtkFunSB\nyZN9kOd++8FZZ8GLL6Y6OolBbMmFmTUHsvFeCABCCAEYA1S1ju5OidfLG11NexERSbZly1adAVKZ\noiIvjHXiiXDMMfD++z5NFeDee/3+lVfii1NSJs4BnR2BpsDcCtvnAl2r2KdzFe3XMrOWIYQ/ow1R\nRETq7JhjfCbIa69V3WbBAjjqKE8o7rsPzjln5VkjW2wB8+b5oE9pdDRbRERE6qZpUx+MWZ0Q4Pff\n4c03Ya8qhs2tu270sUlaiDO5mA8UAxXmIdEJmFPFPnOqaL+opl6L3Nxc2rVrt9K2nJwccnJyah2w\niIjUQtu2MGtW9W06dvTiWKeeCs8+68WxNtxw1ampkjR5eXnk5eWttK2wsDCWY8WWXIQQVphZPjAQ\neAXAzCzx/N4qdpsA7Fdh296J7dUaOnQoWVlZ9Q9YRERqp21b75Wozrx5nljsvjv88INfItl9dxg7\nNjkxyioq+8JdUFBAdnZ25MeK+7LIXcDjiSRjEj7rozXwOICZ3Qx0CSGU1rJ4ADjHzG4FHsMTkSOA\n/WOOU0REamvNNWHRourbvPuuD/x89FGfJQI+7VRWC7FORQ0hPANcDFwHfAT0BPYJIfySaNIZ2Khc\n+++AA4A9gY/xZOTUEELFGSQiIpIqW2wB338PY8b4AmSV+c9/YOutfR2RUjtUW0NRGpHYB3SGEIYD\nw6t47eRKto3Dp7CKiEg6GjDA1xLZay947jk4/PBV24wbB/37l10+2XPPsnVIpNHTbBEREambLbbw\nBci++86ThoqWLYNp0+D882H77eHGG+GII5IX39SpsNZasPHGyTumrETJhYiI1F3Xrn6rzPTpUFLi\nK6Q2aQJ//3vy4lqxAnr08MezZsH66yfv2PI/6qMSEZFo/fYbbLYZdO+e/GN/9FHZ4zffTP7xBVBy\nISIiURswAL79Ftq3r9/+EyfCeefVrsR4RVOm+P3558NJJ9Xv+NJguiwiIiLpY9Ei6JtYTurAA2H8\neLjmmtoNBl20yAeYAlx1lQaQppCSCxERSR9PPln2eN99/X7QIPjLX2re96GHvOfixRe9QqikjNI6\nERFJH6ee6tNYH3gAevXyQaEdOtS8XwheObR1a5g0yRdWk5RRciEiIumjRQvYdVc44wwfnDl1as0z\nPpYv92XdzzoLttsO7rzTy43XtLiaxEbJhYiIZK7CQth/f3j6aXjqKRg92i+LbLaZL5YmKaEzLyIi\n0VuwAH76yXsS4lJYCLvt5qXI337bH4MnG/trSapUUs+FiIhEa9YsHyvRqxe8/3607/3xx/DHH/64\nRQvYZRdfx2SXXeCmm7z3QlJOyYWIiETnootggw2gaVMvv33RRau2mTfPp4p+8knd3nvaNK+hcdVV\n/rxVKxg2DLbdFoYOhSuugOuvb/CPIA2nyyIiIhKdddf1+zfe8MGYxx3nl0jWWaesTcuWcNttXsPi\nnXdq974//AD77QebbOJ1L8qbPh2uvNIf12bKqsROPRciIhKdiy6CX37xsRa77lp5Iat27SAnBxYv\n9ufff+9Ltz/4YNkqquV9+insvLP3hrz+ui9KVt6WW/riaK1bey+GpJySCxERiU7z5mUFrDbe2Hsn\nyvdalCoo8ASkpAT22cdndpx55srrgcyfD7fcAjvu6D0i48fDhhuu+l7NmsFf/+pjMXbeOZ6fS+pE\nyYWIiMRnxx1X3bZ4sY+f6NMH7r4bvvzSt192mdenAJ9S2rmzXwI59VRPLNZf35OS0hLf5Y0Z40nG\nTjvF97NIrWnMhYiIJFdBgfdYvPBCWU/FEUf4bI9SO+wA998Phx1WNo7jjz9g773hkktWfc+TTvKe\nkHbtYg9faqbkQkREkqe4GE45xR+XJhY77ghPPLHy+Iy11/YqneU99JAv517au1Fe69bQr188MUud\n6bKIiIgkR1ER9O4N33xTtq1rV3jlFZ9WWp2FC+HCC+HYY736pqS1RtVz8ccfPiNJRETSQ9NFv7LR\nLecw65wbWd5lUzbr1I2SjbZjnTdGAfDpsHEU/bQe/FT5/t26eacEBx/si5O1aJG84KXeGlVy8fPI\nMbx89gc5/ofvAAAeG0lEQVRcxzU1NxYRkdhdzCPcwPMMHD2UeRjwDGuyiEWM4t/syd57rVvt/vn5\nkPX4+fDf//qGq6+OP2hpsEaVXGwyazxXrvMAB7+t5EJEJOWKitjm4GEs3j6HN4d0+t/mLvfeRMm/\nWtH5xRHkd7JV91uxgjZfTKHtlHfp/LeXIH+yb58922eQSNprVMlFC1ZAmxZkZaU6EhER4fmXYc4P\ntLxuMOv0TmybORPyhsLll9Njv0pqVhQVwbrr+cDNNdcsK6o1Y4YnFsXFcN11Xvlzq62S9qNI3TSq\n5ILly72Ai4iIpN5ll3mVzt69y7ZdfrkX1br00sr3adYMbr8devSA7GzfVlAAW2zhRbVycmDsWB/U\nqeQibTW+5EKDfUREUu/ZZ31WSN++Zds++ACeeQZGjIA2bare97TTVn7eo4fPKDnvPB+5//bbMHBg\nPHFLJBrXVNQVK5RciDRWIfhNMsPkxDiJ++7z+5ISn0qalQUnnFC79/jwQzjxRO/pOPhg6NLFR3gq\nsUh7jSu50GURkcxQVOQFlJ56qnbtX37Z16nYfHNfbVNJRvq74QYoLIT27f35nDn+BfCuuypfzKyi\nEDyhePllnyHy+edeAnzjjeONWyLRuC6LqOdCJL2FAC+95N9gv/vO/3gcc0zV7QsL4fTTvUz0fvvB\nsmVwwAFeGvrUU+GQQ6BTp6r3l9Rp0WLl38elvQ5//lm7/c2896NNG6/WKRmlcfVcbLKJltsVSVez\nZ/vKlYcdBt27w6RJ8OKL1e8zb54nIVdc4W3ffhveestnEZx9ts8gkMwxZw706gULFlT++kUXwd/+\nVvZ8o42UWGQoCxnevWhmWUB+fn4+WZqDKo1ZSQnceivceacvzjR0qP+xtkrqBKSbF1/0HohmzeCB\nB7zHoqFxL14MLVtWfyn0wQdhgw3gwAMbdixpuBB8afXPPoNPPy1bjKzUokVli45l+N+lTFJQUEC2\nz8rJDiEURPW+javnQqSxWrTILwFccYVPxdt6a/8Dvc02cM89/nqcXnnFr3nXx5Ah3lvRvz9Mneo/\nRxQJUdu21ScWIXhPx1//6tf5JbVeeAH+/W+fKVIxsQD/bJRaujR5cUkslFyIpNrChf5Lt7pvawsX\n+h/3117z0fdvvAFjxvhlwIsv9kFvdfHnn15O+YYbYK+9fHpfVULwdj16+GqW5f8I1MaAAb509vPP\nQ8eOddu3Icx8OuTll3t3++23J+/YsrKiIk+M993Xb5UpLCx7PGdOcuKS2DSuAZ0imeS777zI0Esv\n+Uyn+fN9yl1lNt3UV+Vrlvgva+bT8QYOhJ9/hvXXr/5Ykyf7ctVTp3qy8PHHnmCstZb/8V+wILE6\nVCXM4D//gX/8w/9AjxjhtQv228+7ubOzoWnTqo+9225+S4UmTeCmm/y8XXqp/+xVFW+S+Dz+OHz5\nJeTlVd2mTRsfj3HuuVr1tBGIbcyFmXUAhgEHAiXA88DgEMKSavY5FDgTyAbWBnqFED6t4TgacyGZ\n58kn4cwzoUMHuOACOOggn2ZZmyl6dfXrrz7uYL31PJEoKfFaA7vuCtttV5aw1MaKFfDcc94j8M47\nfjlm7bXhq6+qTozSQQhw7bVeNvrGG+H//q92l2a++QYmTPBzduSRNS8LLqtautQrae66a/XJhaRE\nXGMu4uy5eAroBAwEWgCPAw8Cx1WzTxvgP8DTwMMxxiaSGiHAsGFw/vm+NsL993vvQZw6dPD6AD16\nVN/DUBvNm/uYj5wcTzQmTfIko0OHaGKNi5mP/WjWzLvnFyzwgbFVefNNH8syenTZtqFDYdw4n6ki\ntXfhhd67dv31qY5EkiiW5MLMugH74JnQR4lt5wGvm9nFIYRKL6iFEEYl2m4CZMAQeMkoTz3lvQO9\ne/ssg2T7+muv6TBlCgwe7H+skjXTo1ev6N+zeXPo189vmeKqq3wwYU29NRMn+jiXf/7TB4R+951/\n8+7fH44/3v9gSu107w633AJbbll9u6KiuvWiSVqL5bKImZ0M3BFCWKfctqbAMuCIEEK1o88SycVM\ndFlEotS8uf8Ca9nSLwvstJPPuthkEx/TsPnm1Scdzz8P//qXtxswwP/YtG1b++PPnevTMc8/H/bc\ns6E/jSTbpEl+WeWHH+CTTzJjCnCmmDHDk9RTT/UxMpI0mXZZpDMwr/yGEEKxmS1MvBaP3Xf3X9pX\nXBHbISSD/fYbTJvm30onTPAZFt9/70s4A7z3XvUDD4uL/T1GjYLbbvNvWX36eILSqpUX/Pm//6t6\n/06dfEqnZKY+fXy2jkTvttu8YNott8All6T/ZTapUZ2SCzO7GbismiYB6N6giBrixx+9sI6sfkLw\nQXfVjSlo08b/QPTp470H4D0Zs2Z5t3dNlw6OOspvIfgAxrFjPSGZPh2WLKl6ip2IVG3hQk/YL7sM\nzjhDiUUjUdeeizuAETW0+RaYA6xXfmPissjaidcil5ubS7uff/au62nTAMjJySEnJyeOw0m6CAFe\nfdVnApxxht/qolkzXwipLoshmUHXrn4766y6HU8at5kz/TO5+eapjiT9zZjhS7CPH++9grm5Wicm\nZnl5eeRVmLFTWL6+SITqlFyEEBYAVRSFL2NmE4D2Zta7dEAnPmvEgA9re7i6xDZ06FCyDjjAB1td\ndVVddpVMFIJ3UV9zDXz0kV/O6NEj1VHJ6u7mm+HRR73+yK67+m3HHTWFtTI33eQ1U5o0gTvuUGKR\nBJV94S435iJSsVToDCFMB0YDD5vZDmbWD7gPyCs/U8TMppvZweWedzCz7YBt8ESkm5ltZ2a1+9QV\nFzd8qp2kvx9/9BH8Bx3k0zjffdcvT+y8c6ojk9Xd3Xf7VOPmzX2q6+67+3oZhx7q01u1ZkaZ++/3\nKcGzZnmvhTQqcZb/PgaYDowBXgPGARX7rLcC2pV7fhDwEfAq3nORBxRUsl/lmjTx6+7SOJWU+KJX\n22zjvRUvveRJxYABqY5MxLVu7ZfKXn/dxxJ88okPVpw5E/bf33s1xLVq5QXY1GPRKMU2qTiE8BvV\nF8wihNC0wvORwMh6H1TJReO2bJmXnz76aL9v167mfURSpUkT6NnTb4MHw9NPa3VWWW00roolSi4a\nt9atfU0MVUiUTGPmSbHIaqJxrYp61VW+wqM0XkosRBouBB982rMn/PRTqqORRqhxJRdnnJFZpYhF\nROI2e7aPbSi/lsrrr8Pf/+5F5C6/PLnx/PKLz+x64YXkHleSqnElF5LZlizx1TZ1aUskOq1awQ47\nwA03eC/FLbf4KrkffOAr8r75Zrz/56ZO9aniP//sM/qOOMK3/fFHfMeUlFNyIalTVOSj6R9+GE4+\nGdZfHwYN8sqXIhKN9u3hvvugsLCsRP2hh/o07v79fVbLZ5/Fd/yrr/bp4+uu64/HjYMNN4Rddonv\nmJJySi4kuYqLvZpmv37+y61XLzjzTMjP929R336rRb1k9VFcDA8+6DOh4hKC194oX2Pj++/90sTG\nG/tife++G8+xx4+HF1/0//OPP+6Fs9Zay3tQdtlFyzU0YkouJLmaNoX33/dvLjfc4N9iFi2CTz/1\nFSc33TTVEYokzzff+Do3cS62eNNN3nNxwAGrvrblltC3bzzJRUmJT8Ht3dvX9TnrLDjnHP+/Dn6Z\n5Lbboj+upIXGNRVV6mbqVHjmGb9fsMDrRuy+e/XV8ubP90WGpk3zgVnNmnk1wmbNym5DhkCXLlW/\nx9ixWq5aBHxF3Ztvhosu8iJbAwdGf4yTToLNNvP/36+/7tsOOMAff/+9z7B7//3oj3v//TBlCgwd\nCscdB4cfDvfc418wBg6Ed97xMVbXXRf9sSXllFysjsaPhyuv9G8r7dv7KqEbbAC//+7jIKrzxx8+\nuvwvf/ExEsuW+T4rVvh9URH8+Wf176HEQqTMBRf4H/oTT/SxD1GvCrrBBnDkkd5DeMklvo5H167+\nf7ZLFx+D8fe/R3e86dOhe2Jx7MGDPamYORNuvbVseYZbbvFei++/j+64klYaV3Ixf75/C1c56KqN\nGuWLu/XsCc8952t0tGhR+/032shndWgNF5FoNGkCI0f6GIizzoK8vGgT8FdfhdNP99+PN97oPRlP\nPAFff123//u1EULZFNObb/ZkpmlT77H46SdYbz0/5vbbe6+pNFqNa8zF6697t76mOFXt4IPhqad8\nbY7DD6/7LxczJRYiUdtwQ/jHP7xE+FNPRfe+//ynL/DXp48PqtxiCx/rdOmlsMYa0R2n1FtvlY0f\nmT7dk4sQ4LLL/IvJjTdGf0xJS42r52Kzzfz+u++8215WteaaUGHJXRFJA0cfDa+9Bqed5jM4jjii\nYe/33//6FO9TT/Xp3uV7Qy65pGHvXZU33vD7bbbxXuRDDoEJE8oGbm63Xe3f65dfvHfj9NOj72GR\n2DWunovS5GLmzNTGISJSHw8/DH/7G2y7bcPep6jIL7Fsv72vJBzVZZaiIq+EXJk5c+CRR7yHZOpU\nH8x55ZVerAs8hoMPrv2xbrnFx4IsWtTgsCX5Gldy0aWLz1xYnZOLoiKYOzfVUYhIfbRq5eMTunVr\n2PuMHOkzuu6/32dwReXjj+Ghh/ySR0UtWvhMs8GDy7Z9+61fggGvc1HVJdXly+HLLz1BAf8ddtdd\nfpm7Y8fo4pekaVzJRdOm3nvx9depjiRev//ugzLHjSub3bFkiXch9ugBxx6b2vhEJF5HHOGXO954\no/IZXk2aeA9Idna0x5061e8rq0fToYNPp23f3p9/8YWP8QC/PNKvn4+HKyryGWflvwReeKEnVOuv\n71NjO3f27YcfHm38kjSNK7kAyMry7rhM9fvvPtiyOt99B5Mne73+ddaBnXf27H7QIJ92dscdSQlV\nRFIgBB87NX6816vYaisfCFq+AufJJ/vlkKgtWeK9wxUHg4bg9Sp23dWr7c6YsfK4t5de8oJdbdr4\n/rfeunJ9i/79yx6PGVP2WF+UMlbjSy522MH/OC9fnupI6mbSJDjqKF+9cP/9V/5FUVGPHvD5534t\n85JLyqpdfvml/8fs1St5cYtIcpnBiBH+OyA/36eVH3203+bPr/8iZFOn+u+T6urUlJT48f/5z7Lk\npbDQB6Bee63PBnn3Xa+jAXD99f77+JNPvGjWP/9Z9l577eX3xcX+xaiiESO8B0YyUwgho29AFhDy\n8/NDCCGEqVNDWGONED78MKS9FStCePHFEHbdNQQIYcstQxg6NIQZM1IdmYhkkmefDaFNG/89cvzx\n9XuPJ58MoUWLEPbYI4TCwsrbvPyyHwNCOOUU/z3bqlXZthBCmDgxhPvvD+GPP1bdf/Zsb3/99WXb\n/vGPsv3nzQvhpptCaNYshD//rN/PIXWSn58fgABkhQj/Nluo7htyBjCzLCA/Pz+frKws37hokS+O\nk66WL/epWQ8+6IVl+vb1bwwHHaQaEiJSP3PmwL//7b0IRx5Zvxki48b576HNNvPxHOuv79sffdSn\ntk6fDhMnVr3/iBFepKsqF1zgAztnziyrRDp3ro+xGD7cZ7hIUhUUFJDtY3OyQwgFUb1v46pzUaqm\nxGL4cPjqK8+Vi4tXvhUV+Wp9p58eX3zNm8PLL8M++/h/pqgHXYnI6qdzZx/o3RD9+8N//gP77Qc7\n7eRfgPbe2+8nT/bBmldcUVYMq0MH+PVXaN3aB2tOmlR1cvHTT34p5YorVi5x3qnTyr+DFy705dkl\nozXO5KIm06eXLZ7VtOmqt9LrhVVZsQJ+/NGz+4rfDlas8LEP1c1TN4MPP9T1RBFJPz16+GDRk0/2\nJKNly7JxGG+84T2tv//uY8T69fMprw8+6CuvbrVV1e/78MOeSJSfqlqqSRP/stejh88y+eUXTUHN\ncKtncnHvvQ3b/9NPvTjNmmv6lKx11vHeiJ9/9iWU//yz5v8cSixEJF1tvLEPDh871gd6rrUWHHaY\nr6wKXouj1Dbb+O/U66/334lVWXNNv7RS2rO8YIH/7ix1++2eWID3YFT05pt+/J13btjPJkmxeiYX\nDbX11r6OybRpPi30t988oejWzeeWb799eo/5EBGpiZkvjV7bZeBLE4+K/vzTL7WUlHhxr3PPheef\nh/PP95kkW2/tYzCGDPHfoW3brnpZ5LPPvKfkwAOVXGQIJRf1seaaPl10//1THYmISPq6/nq4+uqy\n52uvDbNn+7i3M8+ETTbx7bm53tM7ebInF+XNnetJxZZbenlxyQjqmxcRkXiMHbvy80WLPLEYMsTv\nW7b0+hcvv+yXRSomFsuWwaGH+gy7V17xIlyVGT/ex8FJ2lByISIi8Xj3XV+O4a9/9edFRb4g2dVX\nlw2G32ILX+CstJBWUZGvibJkic88mTDBx7ZttFHVx3noIdhzz/oXEJPIKbkQEZH4bLmlzzxZc03v\nnbjsspVf33RTv3xSmmx89pmPy2jbFl580betvXb1xxg0yMsLzJgRefhSP0ouREQkXoce6pdELr64\n5ra9e/sgT/CxFmusAQMGVL9P6cqvLVo0KEyJjpILERFJLx9/DOedBy+84OMualod9ZtvPMEorSgq\nKafkQkRE0kurVnDXXWXPV6yovv2UKV64sGXLeOOSWlNyISIiyfXooz7W4qGHqm5TfmXrbt3gtNOq\nbjtlitcXkrSh5EJERJLn3//2YoNLlniti3/9q/J2o0f7/ZgxPtukqksjS5d6QUOt0ZRWVERLRESS\no6TEV0bdYw8v533qqXDssb6uSE7Oym2fe87XGqmpSugPP/hCaOq5SCtKLkREJDmaNPHFz5o29QGY\njz3mU1CPPdbvjz66rO0113h1zpp07Vq7dpJUsSUXZtYBGAYcCJQAzwODQwhLqmjfDLgR2A/YHCgE\nxgCXhxBmxxWniIgkUWnJb/Ak47HHfLn43r1Xbrf11mVTUmtScXVqSbk4x1w8BXQHBgIHAP2BB6tp\n3xroBQwBegOHAl2Bl2OMUUREUqlJE6/a2bVrqiORCMWSXJhZN2Af4NQQwpQQwnjgPOBoM+tc2T4h\nhEUhhH1CCM+HEL4OIUwCzgWyzWzDOOIUEZEUefbZskGb5d17r6+SKhktrp6LvsCvIYSPym0bAwRg\nxzq8T/vEPr9FGJuIiKTaUUfBvvt6OfCiIt/2/vsweLDP/pCMFldy0RmYV35DCKEYWJh4rUZm1hK4\nBXgqhLA48ghFRCQ1FiwoW6jsttt8LRHwXotu3aqf+aHFyTJCnQZ0mtnNwGXVNAn4OIsGSQzufDbx\nfmfXZp/c3FzatWu30racnBxyKk5vEhGR1HnoIbjqKl+g7IknYORIr3vRrZsvvV5cDMcd5/UtygsB\nhg717a+8UraeiNRaXl4eeXl5K20rLCyM5VgWQqh9Y7N1gHVqaPYtcDxwRwjhf23NrCmwDDgihFDl\nIM1yicWmwB4hhF9riCkLyM/PzycrK6tWP4eIiCTJ3Lnw448+G+S557yexTnneC+FmScNgwb5GIzy\nfv/dV0YF76045xx44AG/jHLTTT4QVBqsoKCAbC9Alh1CKIjqfev0rxNCWBBC+KqGWxEwAWhvZuXn\nFg0EDPiwqvcvl1hsDgysKbEQEZE09/rr0KePL59+3HFe0+Kee8qmj5pVXgb8vff8PgS45BJ48EF4\n5BGfWaLEIu3F8i8UQpgOjAYeNrMdzKwfcB+QF0KYU9rOzKab2cGJx83wWhhZwHFAczPrlLg1jyNO\nERGJ2Zw5niAceSTsv7/XtaiYHLRv75dHLr8cFi+GzTcvm0ly++2+iNk993hFT8kIcV60OgYvojUG\nL6L1HDC4QputgNKBEhvgBbcAPk7cGz7uYndgXIyxiohIHA4/HAoK4MADveeiqrESJ5zg9ytWwMKF\nvgbJY4/5ZZCrrvIl2CVjxJZchBB+w3sgqmvTtNzj74Gm1TQXEZFM07Wrj7WoraVL4bff/Hb66XDG\nGTBkSHzxSSx04UpERNLHWmvBoYfCeut5b8X996u8dwbSXB4REUkvzz/v90oqMpaSCxERSS9KKjKe\nLouIiIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRciIiISKSU\nXIiIiEiklFyIiIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRc\niIiISKSUXIiIiEiklFyIiIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRciIiISKSUXIiIiEiklFyI\niIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRciIiISKSUXEi95OXlpTqE1Y7OefLpnCefznnjEFty\nYWYdzOxJMys0s1/N7BEza1PDPteY2RdmttjMFprZv82sT1wxSv3pF0Dy6Zwnn8558umcNw5x9lw8\nBXQHBgIHAP2BB2vY50vgHGBboB/wHfC2ma0TX5giIiISpViSCzPrBuwDnBpCmBJCGA+cBxxtZp2r\n2i+E8K8QwtgQwnchhC+AC4G1gJ5xxCkiIiLRi6vnoi/wawjho3LbxgAB2LE2b2BmzYEzgN+ATyKP\nUERERGLRLKb37QzMK78hhFBsZgsTr1XJzA4A/gW0BmYBe4UQFlazyxoAX3zxRYMClropLCykoKAg\n1WGsVnTOk0/nPPl0zpOr3N/ONaJ8Xwsh1L6x2c3AZdU0Cfg4i8OBE0II3SvsPxe4OoRQ5dgLM2sF\nrA90BE7Hx2z0CSHMr6L9McCTtf4hREREpKJjQwhPRfVmdU0u1gFqGlz5LXA8cEcI4X9tzawpsAw4\nIoTwch2O+RXwaAjh1mpi2gcf/Lmstu8rIiIirAFsCowOISyI6k3rdFkkceAaD25mE4D2Zta73LiL\ngYABH9YxxiZAyxpiiizbEhERWc2Mj/oNYxnQGUKYDowGHjazHcysH3AfkBdCmFPazsymm9nBicet\nzexGM9vRzDY2sywzewzoAjwbR5wiIiISvbgGdAIcAwzDZ4mUAM8Bgyu02Qpol3hcDHQDTsDHWywA\nJgO7JKalioiISAao05gLERERkZpobRERERGJlJILERERiVRGJBdmdo6ZzTSzpWY20cx2qKH9ADPL\nN7NlZvaVmZ2YrFgbi7qcczPbzcxKKtyKzWy9ZMacycxsVzN7xcx+Tpy/g2qxjz7nDVDXc67PecOY\n2f+Z2SQzW2Rmc83sRTPbuhb76XNeT/U551F9ztM+uTCzQcCdwDVAb7wU+Ggz61hF+02B14B3gO2A\ne4BHzGyvZMTbGNT1nCcEfIBu58Rt/RDCvGray8raAB8DZ+Pnslr6nEeiTuc8QZ/z+tsVnzW4I7An\n0BxfmLJVVTvoc95gdT7nCQ3+nKf9gE4zmwh8GEIYnHhuwI/AvSGE2yppfyuwXwihZ7lteUC7EML+\nSQo7o9XjnO8GjAU6hBAWJTXYRsjMSoBDQgivVNNGn/MI1fKc63MeocSXlXlA/xDCf6too895hGp5\nziP5nKd1z0Vi8bJsPGsFIHg2NAZfHK0yOyVeL290Ne2lnHqec/ACaR+b2Swze9vMdo430tWePuep\noc95dNrj35CrWztKn/No1eacQwSf87ROLvB6F02BuRW2z6XqBdA6V9F+LTOrstKn/E99zvlsfAXb\nw4HD8F6O98ysV1xBij7nKaDPeUQSvaF3A/8NIXxeTVN9ziNSh3Meyec8ziJaspoIIXwFfFVu00Qz\n2wLIBTT4ShoFfc4jNRz4C9Av1YGsRmp1zqP6nKd7z8V8vHJnpwrbOwFzVm0Oie2VtV8UQvgz2vAa\npfqc88pMAraMKihZhT7n6UGf8zoys2HA/sCAEMLsGprrcx6BOp7zytT5c57WyUUIYQWQjy96Bvyv\na2cgVS+0MqF8+4S9E9ulBvU855XphXevSTz0OU8P+pzXQeKP3MHA7iGEH2qxiz7nDVSPc16ZOn/O\nM+GyyF3A42aWj2dPuUBr4HEAM7sZ6BJCKO2ueQA4JzHK+DH8g3kEnrVJ7dTpnJvZYGAmMA1fvvd0\nYHdA08Vqycza4N8MLLFpczPbDlgYQvhRn/Po1fWc63PeMGY2HMgBDgKWmFlpj0RhCGFZos1NwAb6\nnEejPuc8ss95CCHtb/g89O+ApXjGun2510YAYyu0749/+14KfA0cn+qfIdNudTnnwCWJ87wE+AWf\nadI/1T9DJt2A3fAF/oor3B6r7JwntulznsRzrs95g893Zee6GDihXBt9zlN8zqP6nKd9nQsRERHJ\nLGk95kJEREQyj5ILERERiZSSCxEREYmUkgsRERGJlJILERERiZSSCxEREYmUkgsRERGJlJILERER\niZSSCxEREYmUkgsRERGJlJILERERidT/A0u0Vy+buKaqAAAAAElFTkSuQmCC\n", 32 | "text/plain": [ 33 | "" 34 | ] 35 | }, 36 | "metadata": {}, 37 | "output_type": "display_data" 38 | } 39 | ], 40 | "source": [ 41 | "#####################################\n", 42 | "########### TO SPECIFY ##############\n", 43 | "#####################################\n", 44 | "\n", 45 | "save_dir = '../run_0'\n", 46 | "which_agent = 2\n", 47 | "time_lim = -1 #1000\n", 48 | "num_des_points = 3\n", 49 | "\n", 50 | "#####################################\n", 51 | "#####################################\n", 52 | "\n", 53 | "\n", 54 | "#imports\n", 55 | "import numpy as np\n", 56 | "import matplotlib.pyplot as plt\n", 57 | "%matplotlib inline\n", 58 | "\n", 59 | "\n", 60 | "#read in trajectory following data\n", 61 | "states_des = np.load(save_dir+'/saved_trajfollow/true_iter0.npy')\n", 62 | "states_exec = np.load(save_dir+'/saved_trajfollow/pred_iter0.npy')\n", 63 | "print (states_des.shape)\n", 64 | "print (states_exec.shape)\n", 65 | " \n", 66 | " \n", 67 | "### ANT\n", 68 | "if(which_agent==1):\n", 69 | " x=29\n", 70 | " y=30\n", 71 | "### SWIMMER\n", 72 | "if(which_agent==2):\n", 73 | " x=10\n", 74 | " y=11\n", 75 | "### CHEETAH\n", 76 | "if(which_agent==4):\n", 77 | " x=0\n", 78 | " y=1\n", 79 | " \n", 80 | "#look at right indeces of executed trajectory\n", 81 | "states_exec=states_exec[0][:,[x,y]]\n", 82 | "\n", 83 | "#plot desired vs executed trajectories\n", 84 | "plt.figure()\n", 85 | "plt.title('xy')\n", 86 | "plt.plot(states_des[0:num_des_points, 0], states_des[0:num_des_points, 1])\n", 87 | "plt.plot(states_exec[0:time_lim, 0], states_exec[0:time_lim, 1], 'r--')\n", 88 | "\n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": true 96 | }, 97 | "outputs": [], 98 | "source": [] 99 | } 100 | ], 101 | "metadata": { 102 | "anaconda-cloud": {}, 103 | "celltoolbar": "Raw Cell Format", 104 | "kernelspec": { 105 | "display_name": "Python 3", 106 | "language": "python", 107 | "name": "python3" 108 | }, 109 | "language_info": { 110 | "codemirror_mode": { 111 | "name": "ipython", 112 | "version": 3 113 | }, 114 | "file_extension": ".py", 115 | "mimetype": "text/x-python", 116 | "name": "python", 117 | "nbconvert_exporter": "python", 118 | "pygments_lexer": "ipython3", 119 | "version": "3.5.2" 120 | }, 121 | "widgets": { 122 | "state": {}, 123 | "version": "1.1.2" 124 | } 125 | }, 126 | "nbformat": 4, 127 | "nbformat_minor": 1 128 | } 129 | -------------------------------------------------------------------------------- /point_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.base import Env 2 | from rllab.spaces import Box 3 | from rllab.envs.base import Step 4 | import numpy as np 5 | 6 | class PointEnv(Env): 7 | @property 8 | def observation_space(self): 9 | return Box(low=-np.inf, high=np.inf, shape=(4,)) #state space = [x, y, vx, vy] 10 | 11 | @property 12 | def action_space(self): 13 | return Box(low=-5, high=5, shape=(2,)) #controls are the forces applied to pointmass 14 | 15 | def reset(self, init_state=None): 16 | if(init_state==None): 17 | np.random.seed() 18 | self._state=np.zeros((4,)) 19 | self._state[0]= np.random.uniform(-10, 10) 20 | self._state[1]= np.random.uniform(-10, 10) 21 | else: 22 | self._state = init_state 23 | 24 | observation = np.copy(self._state) 25 | return observation 26 | 27 | def step(self, action): 28 | #next state = what happens after taking "action" 29 | temp_state=np.copy(self._state) 30 | dt=0.1 31 | temp_state[0] = self._state[0] + self._state[2]*dt + 0.5*action[0]*dt*dt 32 | temp_state[1] = self._state[1] + self._state[3]*dt + 0.5*action[1]*dt*dt 33 | temp_state[2] = self._state[2] + action[0]*dt 34 | temp_state[3] = self._state[3] + action[1]*dt 35 | self._state = np.copy(temp_state) 36 | 37 | #make the reward what you care about 38 | x, y, vx, vy = self._state 39 | reward = vx - np.sqrt(abs(y-0)) #we care about moving in the forward x direction... and keeping our y value close to 0... (aka "going straight") 40 | done = 0#x>500 #when do you consider this to be "done" (rollout stops... "terminal") 41 | next_observation = np.copy(self._state) 42 | return Step(observation=next_observation, reward=reward, done=done) 43 | 44 | def render(self): 45 | return self._state -------------------------------------------------------------------------------- /policy_random.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Policy_Random(object): 4 | 5 | def __init__(self, env): 6 | 7 | #vars 8 | self.env = env 9 | self.low_val = self.env.action_space.low 10 | self.high_val = self.env.action_space.high 11 | self.shape = self.env.action_space.shape 12 | print("Created a random policy, where actions are selected between ", self.low_val, ", and ", self.high_val) 13 | 14 | def get_action(self, observation): 15 | return np.random.uniform(self.low_val, self.high_val, self.shape), 0 -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Neural Network Dynamics for Model-Based Deep Reinforcement Learning with Model-Free Fine-Tuning 2 | 3 | [Arxiv Link](https://arxiv.org/abs/1708.02596) 4 | 5 | **Abstract**: Model-free deep reinforcement learning algorithms have been shown to be capable of learning a wide range of robotic skills, but typically require a very large number of samples to achieve good performance. Model-based algorithms, in principle, can provide for much more efficient learning, but have proven difficult to extend to expressive, high-capacity models such as deep neural networks. In this work, we demonstrate that medium-sized neural network models can in fact be combined with model predictive control (MPC) to achieve excellent sample complexity in a model-based reinforcement learning algorithm, producing stable and plausible gaits to accomplish various complex locomotion tasks. We also propose using deep neural network dynamics models to initialize a model-free learner, in order to combine the sample efficiency of model-based approaches with the high task-specific performance of model-free methods. We empirically demonstrate on MuJoCo locomotion tasks that our pure model-based approach trained on just minutes of random action data can follow arbitrary trajectories, and that our hybrid algorithm can accelerate model-free learning on high-speed benchmark tasks, achieving sample efficiency gains of 3-5x on swimmer, cheetah, hopper, and ant agents. 6 | 9 | 10 | - For installation guide, go to [installation.md](https://github.com/nagaban2/learn_dynamics/blob/release/docs/installation.md) 11 | - For notes on how to use your own environment, how to edit envs, etc. go to [notes.md](https://github.com/nagaban2/learn_dynamics/blob/release/docs/notes.md) 12 | 13 | --------------------------------------------------------------- 14 | 15 | ### How to run everything 16 | 17 | ``` 18 | cd scripts 19 | ./swimmer_mbmf.sh 20 | ./cheetah_mbmf.sh 21 | ./hopper_mbmf.sh 22 | ./ant_mbmf.sh 23 | ``` 24 | 25 | Each of those scripts does something similar to the following (but for multiple seeds): 26 | 27 | ``` 28 | python main.py --seed=0 --run_num=1 --yaml_file='swimmer_forward' 29 | python mbmf.py --run_num=1 --which_agent=2 30 | python trpo_run_mf.py --seed=0 --save_trpo_run_num=1 --which_agent=2 --num_workers_trpo=2 --std_on_mlp_policy=0.5 31 | python plot_mbmf.py --trpo_dir=[trpo_dir] --std_on_mlp_policy=0.5 --which_agent=2 --run_nums 1 --seeds 0 32 | ``` 33 | Note that [trpo_dir] above corresponds to where the TRPO runs are saved. Probably somewhere in ~/rllab/data/...
34 | Each of these steps are further explained in the following sections. 35 | 36 | --------------------------------------------------------------- 37 | 38 | ### How to run MB 39 | 40 | Need to specify:
41 | 42 |     --**yaml_file** Specify the corresponding yaml file
43 |     --**seed** Set random seed to set for numpy and tensorflow
44 |     --**run_num** Specify what directory to save files under
45 |     --**use_existing_training_data** To use the data that already exists in the directory run_num instead of recollecting
46 |     --**desired_traj_type** What type of trajectory to follow (if you want to follow a trajectory)
47 |     --**num_rollouts_save_for_mf** Number of on-policy rollouts to save after last aggregation iteration, to be used later
48 |     --**might_render** If you might want to visualize anything during the run
49 |     --**visualize_MPC_rollout** To set a breakpoint and visualize the on-policy rollouts after each agg iteration
50 |     --**perform_forwardsim_for_vis** To visualize an open-loop prediction made by the learned dynamics model
51 |     --**print_minimal** To not print messages regarding progress/notes/etc.
52 | 53 | ##### Examples: 54 | ``` 55 | python main.py --seed=0 --run_num=0 --yaml_file='cheetah_forward' 56 | python main.py --seed=0 --run_num=1 --yaml_file='swimmer_forward' 57 | python main.py --seed=0 --run_num=2 --yaml_file='ant_forward' 58 | python main.py --seed=0 --run_num=3 --yaml_file='hopper_forward' 59 | ``` 60 | ``` 61 | python main.py --seed=0 --run_num=4 --yaml_file='cheetah_trajfollow' --desired_traj_type='straight' --visualize_MPC_rollout 62 | python main.py --seed=0 --run_num=4 --yaml_file='cheetah_trajfollow' --desired_traj_type='backward' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model 63 | python main.py --seed=0 --run_num=4 --yaml_file='cheetah_trajfollow' --desired_traj_type='forwardbackward' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model 64 | ``` 65 | ``` 66 | python main.py --seed=0 --run_num=5 --yaml_file='swimmer_trajfollow' --desired_traj_type='straight' --visualize_MPC_rollout 67 | python main.py --seed=0 --run_num=5 --yaml_file='swimmer_trajfollow' --desired_traj_type='left_turn' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model 68 | python main.py --seed=0 --run_num=5 --yaml_file='swimmer_trajfollow' --desired_traj_type='right_turn' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model 69 | ``` 70 | ``` 71 | python main.py --seed=0 --run_num=6 --yaml_file='ant_trajfollow' --desired_traj_type='straight' --visualize_MPC_rollout 72 | python main.py --seed=0 --run_num=6 --yaml_file='ant_trajfollow' --desired_traj_type='left_turn' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model 73 | python main.py --seed=0 --run_num=6 --yaml_file='ant_trajfollow' --desired_traj_type='right_turn' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model 74 | python main.py --seed=0 --run_num=6 --yaml_file='ant_trajfollow' --desired_traj_type='u_turn' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model 75 | ``` 76 | --------------------------------------------------------------- 77 | 78 | ### How to run MBMF 79 | 80 | Need to specify:
81 | 82 |     --**save_trpo_run_num number** Number used as part of directory name for saving mbmf TRPO run (you can use 1,2,3,etc to differentiate your different seeds)
83 |     --**run_num** Specify what directory to get relevant MB data from & to save new MBMF files in
84 |     --**which_agent** Specify which agent (1 ant, 2 swimmer, 4 cheetah, 6 hopper)
85 |     --**std_on_mlp_policy** Initial std you want to set on your pre-initialization policy for TRPO to use
86 |     --**num_workers_trpo** How many worker threads (cpu) for TRPO to use
87 |     --**might_render** If you might want to visualize anything during the run
88 |     --**visualize_mlp_policy** To visualize the rollout performed by trained policy (that will serve as pre-initialization for TRPO)
89 |     --**visualize_on_policy_rollouts** To set a breakpoint and visualize the on-policy rollouts after each agg iteration of dagger
90 |     --**print_minimal** To not print messages regarding progress/notes/etc.
91 |     --**use_existing_pretrained_policy** To run only the TRPO part (if you've already done the imitation learning part in the same directory)
92 | 93 | *Note that the finished TRPO run saves to ~/rllab/data/local/experiments/* 94 | 95 | ##### Examples: 96 | ``` 97 | python mbmf.py --run_num=1 --which_agent=2 --std_on_mlp_policy=1.0 98 | python mbmf.py --run_num=0 --which_agent=4 --std_on_mlp_policy=0.5 99 | python mbmf.py --run_num=3 --which_agent=6 --std_on_mlp_policy=1.0 100 | python mbmf.py --run_num=2 --which_agent=1 --std_on_mlp_policy=0.5 101 | ``` 102 | 103 | --------------------------------------------------------------- 104 | 105 | ### How to run MF 106 | 107 | Run pure TRPO, for comparisons.

108 | 109 | Need to specify command line args as desired
110 |     --**seed** Set random seed to set for numpy and tensorflow
111 |     --**steps_per_rollout** Length of each rollout that TRPO should collect
112 |     --**save_trpo_run_num** Number used as part of directory name for saving TRPO run (you can use 1,2,3,etc to differentiate your different seeds)
113 |     --**which_agent** Specify which agent (1 ant, 2 swimmer, 4 cheetah, 6 hopper)
114 |     --**num_workers_trpo** How many worker threads (cpu) for TRPO to use
115 |     --**num_trpo_iters** Total number of TRPO iterations to run before stopping
116 | 117 | *Note that the finished TRPO run saves to ~/rllab/data/local/experiments/* 118 | 119 | 120 | ##### Examples: 121 | ``` 122 | python trpo_run_mf.py --seed=0 --save_trpo_run_num=1 --which_agent=4 --num_workers_trpo=4 123 | python trpo_run_mf.py --seed=0 --save_trpo_run_num=1 --which_agent=2 --num_workers_trpo=4 124 | python trpo_run_mf.py --seed=0 --save_trpo_run_num=1 --which_agent=1 --num_workers_trpo=4 125 | python trpo_run_mf.py --seed=0 --save_trpo_run_num=1 --which_agent=6 --num_workers_trpo=4 126 | 127 | python trpo_run_mf.py --seed=50 --save_trpo_run_num=2 --which_agent=4 --num_workers_trpo=4 128 | python trpo_run_mf.py --seed=50 --save_trpo_run_num=2 --which_agent=2 --num_workers_trpo=4 129 | python trpo_run_mf.py --seed=50 --save_trpo_run_num=2 --which_agent=1 --num_workers_trpo=4 130 | python trpo_run_mf.py --seed=50 --save_trpo_run_num=2 --which_agent=6 --num_workers_trpo=4 131 | ``` 132 | --------------------------------------------------------------- 133 | 134 | ### How to plot 135 | 136 | 1) MBMF
137 |     -Need to specify the commandline arguments as desired (in plot_mbmf.py)
138 |     -Examples of running the plotting script:
139 | ``` 140 | cd plotting 141 | python plot_mbmf.py --trpo_dir=[trpo_dir] --std_on_mlp_policy=1.0 --which_agent=2 --run_nums 1 --seeds 0 142 | python plot_mbmf.py --trpo_dir=[trpo_dir] --std_on_mlp_policy=1.0 --which_agent=2 --run_nums 1 2 3 --seeds 0 70 100 143 | ``` 144 | Note that [trpo_dir] above corresponds to where the TRPO runs are saved. Probably somewhere in ~/rllab/data/... 145 | 146 | 2) Dynamics model training and validation losses per aggregation iteration
147 | IPython notebook: plotting/plot_loss.ipynb
148 | Example plots: docs/sample_plots/...
149 | 150 | 3) Visualize a forward simulation (an open-loop multi-step prediction of the elements of the state space)
151 | IPython notebook: plotting/plot_forwardsim.ipynb
152 | Example plots: docs/sample_plots/...
153 | 154 | 4) Visualize the trajectories (on policy rollouts) per aggregation iteration
155 | IPython notebook: plotting/plot_trajfollow.ipynb
156 | Example plots: docs/sample_plots/...
157 | 158 | -------------------------------------------------------------------------------- /reward_functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class RewardFunctions: 4 | 5 | def __init__(self, which_agent, x_index, y_index, z_index, yaw_index, joint1_index, joint2_index, 6 | frontleg_index, frontshin_index, frontfoot_index, xvel_index, orientation_index): 7 | self.which_agent = which_agent 8 | self.x_index = x_index 9 | self.y_index = y_index 10 | self.z_index = z_index 11 | self.yaw_index = yaw_index 12 | self.joint1_index = joint1_index 13 | self.joint2_index = joint2_index 14 | self.frontleg_index = frontleg_index 15 | self.frontshin_index = frontshin_index 16 | self.frontfoot_index = frontfoot_index 17 | self.xvel_index = xvel_index 18 | self.orientation_index = orientation_index 19 | 20 | def get_reward_func(self, follow_trajectories, desired_states, horiz_penalty_factor, 21 | forward_encouragement_factor, heading_penalty_factor): 22 | 23 | #init vars 24 | self.desired_states= desired_states 25 | self.horiz_penalty_factor = horiz_penalty_factor 26 | self.forward_encouragement_factor = forward_encouragement_factor 27 | self.heading_penalty_factor = heading_penalty_factor 28 | 29 | if(follow_trajectories): 30 | if(self.which_agent==1): 31 | reward_func= self.ant_follow_traj 32 | if(self.which_agent==2): 33 | reward_func= self.swimmer_follow_traj 34 | if(self.which_agent==4): 35 | reward_func= self.cheetah_follow_traj 36 | else: 37 | if(self.which_agent==1): 38 | reward_func= self.ant_forward 39 | if(self.which_agent==2): 40 | reward_func= self.swimmer_forward 41 | if(self.which_agent==4): 42 | reward_func= self.cheetah_forward 43 | if(self.which_agent==6): 44 | reward_func= self.hopper_forward 45 | return reward_func 46 | 47 | ###################################################################################################################### 48 | def ant_follow_traj(self, pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward, 49 | curr_seg, moved_to_next, done_forever, all_samples, pt_number): 50 | 51 | #penalize horiz dist away from trajectory 52 | scores[min_perp_dist<1] += (min_perp_dist*self.horiz_penalty_factor)[min_perp_dist<1] 53 | scores[min_perp_dist>=1] += (min_perp_dist*10*self.horiz_penalty_factor)[min_perp_dist>=1] 54 | 55 | #encourage moving-forward 56 | scores[moved_to_next==0] -= self.forward_encouragement_factor*(curr_forward - prev_forward)[moved_to_next==0] 57 | scores[moved_to_next==1] -= self.forward_encouragement_factor*(curr_forward)[moved_to_next==1] 58 | 59 | #prevent height from going too high or too low 60 | scores[pt[:,self.z_index]>0.67] += (self.heading_penalty_factor*40 + 0*pt[:,self.z_index])[pt[:,self.z_index]>0.67] 61 | scores[pt[:,self.z_index]<0.3] += (self.heading_penalty_factor*40 + 0*pt[:,self.z_index])[pt[:,self.z_index]<0.3] 62 | 63 | return scores, done_forever 64 | 65 | def swimmer_follow_traj(self, pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward, 66 | curr_seg, moved_to_next, done_forever, all_samples, pt_number): 67 | 68 | #penalize horiz dist away from trajectory 69 | scores += min_perp_dist*self.horiz_penalty_factor 70 | 71 | #encourage moving-forward and penalize not-moving-forward 72 | scores[moved_to_next==0] -= self.forward_encouragement_factor*(curr_forward - prev_forward)[moved_to_next==0] 73 | scores[moved_to_next==1] -= self.forward_encouragement_factor*(curr_forward)[moved_to_next==1] 74 | 75 | #angle that (desired traj) line segment makes WRT horizontal 76 | curr_line_start = self.desired_states[curr_seg] 77 | curr_line_end = self.desired_states[curr_seg+1] 78 | angle = np.arctan2(curr_line_end[:,1]-curr_line_start[:,1], curr_line_end[:,0]-curr_line_start[:,0]) 79 | # ^ -pi to pi 80 | 81 | #penalize heading away from that angle 82 | diff = np.abs(pt[:,self.yaw_index]-angle) 83 | diff[diff>np.pi]=(2*np.pi-diff)[diff>np.pi] 84 | #^ if the calculation takes you the long way around the circle, 85 | #take the shorter value instead as the difference 86 | my_range = np.pi/3.0 87 | scores[diff=my_range] += 20 89 | 90 | #dont bend in too much 91 | first_joint = np.abs(pt[:,self.joint1_index]) 92 | second_joint = np.abs(pt[:,self.joint2_index]) 93 | limit = np.pi/3 94 | scores[limit=my_range] += self.heading_penalty_factor 113 | 114 | front_shin = pt[:,self.frontshin_index] 115 | my_range = 0 116 | scores[front_shin>=my_range] += self.heading_penalty_factor 117 | 118 | front_foot = pt[:,self.frontfoot_index] 119 | my_range = 0 120 | scores[front_foot>=my_range] += self.heading_penalty_factor 121 | 122 | return scores, done_forever 123 | 124 | ###################################################################################################################### 125 | def ant_forward(self, pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward, 126 | curr_seg, moved_to_next, done_forever, all_samples, pt_number): 127 | 128 | #watch the height 129 | done_forever[pt[:,self.z_index] > 1] = 1 130 | done_forever[pt[:,self.z_index] < 0.3] = 1 131 | 132 | #action 133 | scaling= 150.0 134 | if(pt_number==all_samples.shape[1]): 135 | scores[done_forever==0] += 0.005*np.sum(np.square(all_samples[:,pt_number-1,:][done_forever==0]/scaling), axis=1) 136 | else: 137 | scores[done_forever==0] += 0.005*np.sum(np.square(all_samples[:,pt_number,:][done_forever==0]/scaling), axis=1) 138 | 139 | #velocity 140 | scores[done_forever==0] -= pt[:,self.xvel_index][done_forever==0] 141 | 142 | #survival 143 | scores[done_forever==0] -= 0.5 # used to be 0.05 144 | 145 | return scores, done_forever 146 | 147 | def swimmer_forward(self, pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward, 148 | curr_seg, moved_to_next, done_forever, all_samples, pt_number): 149 | 150 | ########### GYM 151 | 152 | '''if(pt_number==all_samples.shape[1]): 153 | reward_ctrl = 0.0001 * np.sum(np.square(all_samples[:,pt_number-1,:]), axis=1) 154 | else: 155 | reward_ctrl = 0.0001 * np.sum(np.square(all_samples[:,pt_number,:]), axis=1) 156 | reward_fwd = (pt[:,self.x_index]-prev_pt[:,self.x_index]) / 0.01''' 157 | 158 | ########### RLLAB 159 | 160 | scaling=50.0 161 | if(pt_number==all_samples.shape[1]): 162 | reward_ctrl = 0.5 * np.sum(np.square(all_samples[:,pt_number-1,:]/scaling), axis=1) 163 | else: 164 | reward_ctrl = 0.5 * np.sum(np.square(all_samples[:,pt_number,:]/scaling), axis=1) 165 | reward_fwd = pt[:,self.xvel_index] 166 | 167 | ######################### 168 | 169 | scores += -reward_fwd + reward_ctrl 170 | return scores, done_forever 171 | 172 | def cheetah_forward(self, pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward, 173 | curr_seg, moved_to_next, done_forever, all_samples, pt_number): 174 | 175 | ########### GYM 176 | 177 | '''#action 178 | if(pt_number==all_samples.shape[1]): 179 | scores += 0.1*np.sum(np.square(all_samples[:,pt_number-1,:]), axis=1) 180 | else: 181 | scores += 0.1*np.sum(np.square(all_samples[:,pt_number,:]), axis=1) 182 | 183 | #velocity 184 | scores -= (pt[:,self.x_index]-prev_pt[:,self.x_index]) / 0.01''' 185 | 186 | ########### RLLAB 187 | 188 | #action 189 | if(pt_number==all_samples.shape[1]): 190 | scores += 0.05*np.sum(np.square(all_samples[:,pt_number-1,:]), axis=1) 191 | else: 192 | scores += 0.05*np.sum(np.square(all_samples[:,pt_number,:]), axis=1) 193 | 194 | #velocity 195 | scores -= pt[:,self.xvel_index] 196 | 197 | return scores, done_forever 198 | 199 | def hopper_forward(self, pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward, 200 | curr_seg, moved_to_next, done_forever, all_samples, pt_number): 201 | 202 | scaling=200.0 203 | 204 | #dont tilt orientation out of range 205 | orientation = pt[:,self.orientation_index] 206 | done_forever[np.abs(orientation)>= 0.3] = 1 207 | 208 | #dont fall to ground 209 | done_forever[pt[:,self.z_index] <= 0.7] = 1 210 | 211 | #action 212 | if(pt_number==all_samples.shape[1]): 213 | scores[done_forever==0] += 0.005*np.sum(np.square(all_samples[:,pt_number-1,:][done_forever==0]/scaling), axis=1) 214 | else: 215 | scores[done_forever==0] += 0.005*np.sum(np.square(all_samples[:,pt_number,:][done_forever==0])/scaling, axis=1) 216 | 217 | #velocity 218 | scores[done_forever==0] -= pt[:,self.xvel_index][done_forever==0] 219 | 220 | #survival 221 | scores[done_forever==0] -= 1 222 | 223 | return scores, done_forever -------------------------------------------------------------------------------- /scripts/ant_mbmf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##################################### 4 | ## SET VARS 5 | ##################################### 6 | 7 | #location of all saved trpo runs 8 | trpo_dir='/home/anagabandi/rllab/data/local/experiment/' 9 | 10 | #specific to the run 11 | how_many_seeds=1 12 | seeds=(0 70 100) 13 | num_workers_trpo=2 #how many cores to use 14 | 15 | #specific to the agent 16 | which_agent=1 17 | std_on_mlp_policy=0.5 18 | base_run_num=31 #used for filenames for saving 19 | 20 | ##################################### 21 | ## DO THE RUNS 22 | ##################################### 23 | 24 | cd .. 25 | echo 'run numbers:' 26 | iter_num=0 27 | while [ $iter_num -lt $how_many_seeds ] 28 | do 29 | seed=${seeds[$iter_num]} 30 | run_num=$(( $base_run_num + $iter_num )) 31 | echo $run_num 32 | save_trpo_run_num=$(( 1 + $iter_num )) 33 | 34 | python main.py --seed=$seed --run_num=$run_num --yaml_file='ant_forward' 35 | python mbmf.py --run_num=$run_num --which_agent=$which_agent --std_on_mlp_policy=$std_on_mlp_policy 36 | python trpo_run_mf.py --seed=$seed --save_trpo_run_num=$save_trpo_run_num --which_agent=$which_agent --num_workers_trpo=$num_workers_trpo 37 | 38 | iter_num=$(( $iter_num + 1)) 39 | done 40 | 41 | ##################################### 42 | ## PLOTTING 43 | ##################################### 44 | 45 | cd plotting 46 | 47 | if [ $how_many_seeds -eq 3 ] 48 | then 49 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 31 32 33 --seeds ${seeds[0]} ${seeds[1]} ${seeds[2]} 50 | fi 51 | 52 | if [ $how_many_seeds -eq 2 ] 53 | then 54 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 31 32 --seeds ${seeds[0]} ${seeds[1]} 55 | fi 56 | 57 | if [ $how_many_seeds -eq 1 ] 58 | then 59 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 31 --seeds ${seeds[0]} 60 | fi 61 | -------------------------------------------------------------------------------- /scripts/cheetah_mbmf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##################################### 4 | ## SET VARS 5 | ##################################### 6 | 7 | #location of all saved trpo runs 8 | trpo_dir='/home/anagabandi/rllab/data/local/experiment/' 9 | 10 | #specific to the run 11 | how_many_seeds=3 12 | seeds=(0 70 100) 13 | num_workers_trpo=2 #how many cores to use 14 | 15 | #specific to the agent 16 | which_agent=4 17 | std_on_mlp_policy=0.5 18 | base_run_num=11 #used for filenames for saving 19 | 20 | ##################################### 21 | ## DO THE RUNS 22 | ##################################### 23 | 24 | cd .. 25 | echo 'run numbers:' 26 | iter_num=0 27 | while [ $iter_num -lt $how_many_seeds ] 28 | do 29 | seed=${seeds[$iter_num]} 30 | run_num=$(( $base_run_num + $iter_num )) 31 | echo $run_num 32 | save_trpo_run_num=$(( 1 + $iter_num )) 33 | 34 | python main.py --seed=$seed --run_num=$run_num --yaml_file='cheetah_forward' 35 | python mbmf.py --run_num=$run_num --which_agent=$which_agent --std_on_mlp_policy=$std_on_mlp_policy 36 | python trpo_run_mf.py --seed=$seed --save_trpo_run_num=$save_trpo_run_num --which_agent=$which_agent --num_workers_trpo=$num_workers_trpo 37 | 38 | iter_num=$(( $iter_num + 1)) 39 | done 40 | 41 | ##################################### 42 | ## PLOTTING 43 | ##################################### 44 | 45 | cd plotting 46 | 47 | if [ $how_many_seeds -eq 3 ] 48 | then 49 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 11 12 13 --seeds ${seeds[0]} ${seeds[1]} ${seeds[2]} 50 | fi 51 | 52 | if [ $how_many_seeds -eq 2 ] 53 | then 54 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 11 12 --seeds ${seeds[0]} ${seeds[1]} 55 | fi 56 | 57 | if [ $how_many_seeds -eq 1 ] 58 | then 59 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 11 --seeds ${seeds[0]} 60 | fi 61 | -------------------------------------------------------------------------------- /scripts/hopper_mbmf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##################################### 4 | ## SET VARS 5 | ##################################### 6 | 7 | #location of all saved trpo runs 8 | trpo_dir='/home/anagabandi/rllab/data/local/experiment/' 9 | 10 | #specific to the run 11 | how_many_seeds=3 12 | seeds=(0 70 100) 13 | num_workers_trpo=2 #how many cores to use 14 | 15 | #specific to the agent 16 | which_agent=6 17 | std_on_mlp_policy=1.0 18 | base_run_num=21 #used for filenames for saving 19 | 20 | ##################################### 21 | ## DO THE RUNS 22 | ##################################### 23 | 24 | cd .. 25 | echo 'run numbers:' 26 | iter_num=0 27 | while [ $iter_num -lt $how_many_seeds ] 28 | do 29 | seed=${seeds[$iter_num]} 30 | run_num=$(( $base_run_num + $iter_num )) 31 | echo $run_num 32 | save_trpo_run_num=$(( 1 + $iter_num )) 33 | 34 | python main.py --seed=$seed --run_num=$run_num --yaml_file='hopper_forward' 35 | python mbmf.py --run_num=$run_num --which_agent=$which_agent --std_on_mlp_policy=$std_on_mlp_policy 36 | python trpo_run_mf.py --seed=$seed --save_trpo_run_num=$save_trpo_run_num --which_agent=$which_agent --num_workers_trpo=$num_workers_trpo 37 | 38 | iter_num=$(( $iter_num + 1)) 39 | done 40 | 41 | ##################################### 42 | ## PLOTTING 43 | ##################################### 44 | 45 | cd plotting 46 | 47 | if [ $how_many_seeds -eq 3 ] 48 | then 49 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 21 22 23 --seeds ${seeds[0]} ${seeds[1]} ${seeds[2]} 50 | fi 51 | 52 | if [ $how_many_seeds -eq 2 ] 53 | then 54 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 21 22 --seeds ${seeds[0]} ${seeds[1]} 55 | fi 56 | 57 | if [ $how_many_seeds -eq 1 ] 58 | then 59 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 21 --seeds ${seeds[0]} 60 | fi 61 | -------------------------------------------------------------------------------- /scripts/swimmer_mbmf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##################################### 4 | ## SET VARS 5 | ##################################### 6 | 7 | #location of all saved trpo runs 8 | trpo_dir='/home/anagabandi/rllab/data/local/experiment/' 9 | 10 | #specific to the run 11 | how_many_seeds=3 12 | seeds=(0 70 100) 13 | num_workers_trpo=2 #how many cores to use 14 | 15 | #specific to the agent 16 | which_agent=2 17 | std_on_mlp_policy=1.0 18 | base_run_num=1 #used for filenames for saving 19 | 20 | ##################################### 21 | ## DO THE RUNS 22 | ##################################### 23 | 24 | cd .. 25 | echo 'run numbers:' 26 | iter_num=0 27 | while [ $iter_num -lt $how_many_seeds ] 28 | do 29 | seed=${seeds[$iter_num]} 30 | run_num=$(( $base_run_num + $iter_num )) 31 | echo $run_num 32 | save_trpo_run_num=$(( 1 + $iter_num )) 33 | 34 | python main.py --seed=$seed --run_num=$run_num --yaml_file='swimmer_forward' 35 | python mbmf.py --run_num=$run_num --which_agent=$which_agent --std_on_mlp_policy=$std_on_mlp_policy 36 | python trpo_run_mf.py --seed=$seed --save_trpo_run_num=$save_trpo_run_num --which_agent=$which_agent --num_workers_trpo=$num_workers_trpo 37 | 38 | iter_num=$(( $iter_num + 1)) 39 | done 40 | 41 | ##################################### 42 | ## PLOTTING 43 | ##################################### 44 | 45 | cd plotting 46 | 47 | if [ $how_many_seeds -eq 3 ] 48 | then 49 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 1 2 3 --seeds ${seeds[0]} ${seeds[1]} ${seeds[2]} 50 | fi 51 | 52 | if [ $how_many_seeds -eq 2 ] 53 | then 54 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 1 2 --seeds ${seeds[0]} ${seeds[1]} 55 | fi 56 | 57 | if [ $how_many_seeds -eq 1 ] 58 | then 59 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 1 --seeds ${seeds[0]} 60 | fi -------------------------------------------------------------------------------- /trajectories.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def make_trajectory(shape, starting_state_NN, x_index, y_index, which_agent): 4 | 5 | curr_x = np.copy(starting_state_NN[x_index]) 6 | curr_y = np.copy(starting_state_NN[y_index]) 7 | 8 | my_list = [] 9 | 10 | if(shape=="left_turn"): 11 | if(which_agent==1): 12 | my_list.append(np.array([curr_x, curr_y])) 13 | my_list.append(np.array([curr_x+2, curr_y])) 14 | my_list.append(np.array([curr_x+4, curr_y])) 15 | my_list.append(np.array([curr_x+6, curr_y])) 16 | my_list.append(np.array([curr_x+6, curr_y+2])) 17 | my_list.append(np.array([curr_x+6, curr_y+3])) 18 | my_list.append(np.array([curr_x+6, curr_y+4])) 19 | my_list.append(np.array([curr_x+6, curr_y+5])) 20 | my_list.append(np.array([curr_x+6, curr_y+6])) 21 | my_list.append(np.array([curr_x+6, curr_y+7])) 22 | else: 23 | my_list.append(np.array([curr_x, curr_y])) 24 | my_list.append(np.array([curr_x+1, curr_y])) 25 | my_list.append(np.array([curr_x+2, curr_y])) 26 | my_list.append(np.array([curr_x+3, curr_y])) 27 | my_list.append(np.array([curr_x+4, curr_y+1])) 28 | my_list.append(np.array([curr_x+4, curr_y+2])) 29 | my_list.append(np.array([curr_x+4, curr_y+3])) 30 | my_list.append(np.array([curr_x+4, curr_y+4])) 31 | 32 | if(shape=="right_turn"): 33 | if(which_agent==1): 34 | my_list.append(np.array([curr_x, curr_y])) 35 | my_list.append(np.array([curr_x, curr_y+1])) 36 | my_list.append(np.array([curr_x, curr_y+2])) 37 | my_list.append(np.array([curr_x, curr_y+3])) 38 | my_list.append(np.array([curr_x, curr_y+4])) 39 | my_list.append(np.array([curr_x+2, curr_y+4])) 40 | my_list.append(np.array([curr_x+3, curr_y+4])) 41 | my_list.append(np.array([curr_x+4, curr_y+4])) 42 | my_list.append(np.array([curr_x+6, curr_y+4])) 43 | my_list.append(np.array([curr_x+7, curr_y+4])) 44 | else: 45 | my_list.append(np.array([curr_x, curr_y])) 46 | my_list.append(np.array([curr_x, curr_y+1])) 47 | my_list.append(np.array([curr_x, curr_y+2])) 48 | my_list.append(np.array([curr_x+2, curr_y+3])) 49 | my_list.append(np.array([curr_x+3, curr_y+3])) 50 | my_list.append(np.array([curr_x+4, curr_y+3])) 51 | my_list.append(np.array([curr_x+5, curr_y+3])) 52 | my_list.append(np.array([curr_x+6, curr_y+3])) 53 | my_list.append(np.array([curr_x+7, curr_y+3])) 54 | my_list.append(np.array([curr_x+8, curr_y+3])) 55 | 56 | if(shape=="u_turn"): 57 | my_list.append(np.array([curr_x, curr_y])) 58 | my_list.append(np.array([curr_x+2, curr_y])) 59 | my_list.append(np.array([curr_x+4, curr_y])) 60 | my_list.append(np.array([curr_x+4, curr_y+1])) 61 | my_list.append(np.array([curr_x+4, curr_y+2])) 62 | my_list.append(np.array([curr_x+2, curr_y+2])) 63 | my_list.append(np.array([curr_x+1, curr_y+2])) 64 | my_list.append(np.array([curr_x, curr_y+2])) 65 | 66 | if(shape=="straight"): 67 | i=0 68 | num_pts = 40 69 | while(i < num_pts): 70 | my_list.append(np.array([curr_x+i, curr_y])) 71 | i+=0.7 72 | 73 | if(shape=="backward"): 74 | i=0 75 | num_pts = 40 76 | while(i < num_pts): 77 | my_list.append(np.array([curr_x-i, curr_y])) 78 | i+=0.5 79 | 80 | if(shape=="forward_backward"): 81 | my_list.append(np.array([curr_x, curr_y])) 82 | my_list.append(np.array([curr_x+1, curr_y])) 83 | my_list.append(np.array([curr_x+2, curr_y])) 84 | my_list.append(np.array([curr_x+3, curr_y])) 85 | my_list.append(np.array([curr_x+2, curr_y])) 86 | my_list.append(np.array([curr_x+1, curr_y])) 87 | my_list.append(np.array([curr_x+0, curr_y])) 88 | my_list.append(np.array([curr_x-1, curr_y])) 89 | my_list.append(np.array([curr_x-2, curr_y])) 90 | 91 | if(shape=="circle"): 92 | num_pts = 10 93 | radius=2.0 94 | speed=-np.pi/8.0 95 | for i in range(num_pts): 96 | curr_x= radius*np.cos(speed*i)-radius 97 | curr_y= radius*np.sin(speed*i) 98 | my_list.append(np.array([curr_x, curr_y])) 99 | 100 | return np.array(my_list) 101 | 102 | def get_trajfollow_params(which_agent, desired_traj_type): 103 | 104 | desired_snake_headingInit= 0 105 | horiz_penalty_factor= 0 106 | forward_encouragement_factor= 0 107 | heading_penalty_factor= 0 108 | 109 | if(which_agent==1): 110 | if(desired_traj_type=="right_turn"): 111 | horiz_penalty_factor= 3 112 | forward_encouragement_factor= 50 113 | heading_penalty_factor= 100 114 | if(desired_traj_type=="left_turn"): 115 | horiz_penalty_factor= 4 116 | forward_encouragement_factor= 85 117 | heading_penalty_factor= 100 118 | if(desired_traj_type=="straight"): 119 | horiz_penalty_factor= 3.5 120 | forward_encouragement_factor= 85 121 | heading_penalty_factor= 100 122 | if(desired_traj_type=="u_turn"): 123 | horiz_penalty_factor= 3 124 | forward_encouragement_factor= 50 125 | heading_penalty_factor= 100 126 | 127 | if(which_agent==2): 128 | if(desired_traj_type=="right_turn"): 129 | desired_snake_headingInit= np.pi/2.0 130 | horiz_penalty_factor= 0.1 131 | forward_encouragement_factor= 250 132 | heading_penalty_factor= 0.9 133 | if(desired_traj_type=="left_turn"): 134 | horiz_penalty_factor= 0.7 135 | forward_encouragement_factor= 200 136 | heading_penalty_factor= 0.9 137 | if(desired_traj_type=="straight"): 138 | horiz_penalty_factor= 4 139 | forward_encouragement_factor= 500 140 | heading_penalty_factor= 2 141 | 142 | if(which_agent==4): 143 | if(desired_traj_type=="backward"): 144 | horiz_penalty_factor= 0 145 | forward_encouragement_factor= 20 146 | heading_penalty_factor= 10 147 | if(desired_traj_type=="forward_backward"): 148 | horiz_penalty_factor= 0 149 | forward_encouragement_factor= 20 150 | heading_penalty_factor= 10 151 | if(desired_traj_type=="straight"): 152 | horiz_penalty_factor= 0 153 | forward_encouragement_factor= 40 154 | heading_penalty_factor= 10 155 | 156 | return horiz_penalty_factor, forward_encouragement_factor, heading_penalty_factor, desired_snake_headingInit -------------------------------------------------------------------------------- /trpo_run_mf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import math 4 | npr = np.random 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 6 | import tensorflow as tf 7 | from six.moves import cPickle 8 | from collect_samples import CollectSamples 9 | from get_true_action import GetTrueAction 10 | import os 11 | import copy 12 | from helper_funcs import create_env 13 | from helper_funcs import perform_rollouts 14 | from helper_funcs import add_noise 15 | from feedforward_network import feedforward_network 16 | from helper_funcs import visualize_rendering 17 | import argparse 18 | 19 | #TRPO things 20 | from rllab.algos.trpo import TRPO 21 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 22 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer 23 | from rllab.optimizers.conjugate_gradient_optimizer import FiniteDifferenceHvp 24 | from rllab.misc.instrument import run_experiment_lite 25 | 26 | 27 | def run_task(v): 28 | 29 | env, _ = create_env(v["which_agent"]) 30 | policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(64, 64)) 31 | baseline = LinearFeatureBaseline(env_spec=env.spec) 32 | optimizer_params = dict(base_eps=1e-5) 33 | 34 | algo = TRPO( 35 | env=env, 36 | policy=policy, 37 | baseline=baseline, 38 | batch_size=v["batch_size"], 39 | max_path_length=v["steps_per_rollout"], 40 | n_itr=v["num_trpo_iters"], 41 | discount=0.995, 42 | optimizer=v["ConjugateGradientOptimizer"](hvp_approach=v["FiniteDifferenceHvp"](**optimizer_params)), 43 | step_size=0.05, 44 | plot_true=True) 45 | 46 | #train the policy 47 | algo.train() 48 | 49 | ########################################## 50 | ########################################## 51 | 52 | #ARGUMENTS TO SPECIFY 53 | parser = argparse.ArgumentParser() 54 | parser.add_argument('--seed', type=int, default='0') 55 | parser.add_argument('--steps_per_rollout', type=int, default='1000') 56 | parser.add_argument('--save_trpo_run_num', type=int, default='1') 57 | parser.add_argument('--which_agent', type=int, default= 2) 58 | parser.add_argument('--num_workers_trpo', type=int, default=2) 59 | args = parser.parse_args() 60 | 61 | batch_size = 50000 62 | 63 | steps_per_rollout = args.steps_per_rollout 64 | num_trpo_iters = 2500 65 | if(args.which_agent==1): 66 | num_trpo_iters = 2500 67 | if(args.which_agent==2): 68 | steps_per_rollout=333 69 | num_trpo_iters = 500 70 | if(args.which_agent==4): 71 | num_trpo_iters= 2500 72 | if(args.which_agent==6): 73 | num_trpo_iters= 2000 74 | 75 | ########################################## 76 | ########################################## 77 | 78 | # set tf seed 79 | npr.seed(args.seed) 80 | tf.set_random_seed(args.seed) 81 | 82 | run_experiment_lite(run_task, plot=True, snapshot_mode="all", use_cloudpickle=True, 83 | n_parallel=str(args.num_workers_trpo), 84 | exp_name='agent_'+ str(args.which_agent)+'_seed_'+str(args.seed)+'_mf'+ '_run'+ str(args.save_trpo_run_num), 85 | variant=dict(batch_size=batch_size, which_agent=args.which_agent, 86 | steps_per_rollout=steps_per_rollout, num_trpo_iters=num_trpo_iters, 87 | FiniteDifferenceHvp=FiniteDifferenceHvp, ConjugateGradientOptimizer=ConjugateGradientOptimizer)) 88 | -------------------------------------------------------------------------------- /yaml_files/ant_forward.yaml: -------------------------------------------------------------------------------- 1 | which_agent: 1 2 | follow_trajectories: False 3 | 4 | data_collection: 5 | use_threading: True 6 | num_rollouts_train: 700 7 | num_rollouts_val: 20 8 | 9 | dyn_model: 10 | num_fc_layers: 1 11 | depth_fc_layers: 500 12 | batchsize: 512 13 | lr: 0.001 14 | nEpoch: 20 15 | fraction_use_new: 0.5 16 | 17 | controller: 18 | horizon: 5 19 | num_control_samples: 15000 20 | 21 | aggregation: 22 | num_aggregation_iters: 1 23 | num_trajectories_for_aggregation: 2 24 | rollouts_forTraining: 1 25 | 26 | noise: 27 | make_aggregated_dataset_noisy: True 28 | make_training_dataset_noisy: True 29 | noise_actions_during_MPC_rollouts: False 30 | 31 | steps: 32 | dt_steps: 1 33 | steps_per_episode: 1000 34 | steps_per_rollout_train: 1000 35 | steps_per_rollout_val: 200 36 | 37 | saving: 38 | min_rew_for_saving: 0 39 | 40 | generic: 41 | visualize_True: True 42 | visualize_False: False -------------------------------------------------------------------------------- /yaml_files/ant_trajfollow.yaml: -------------------------------------------------------------------------------- 1 | which_agent: 1 2 | follow_trajectories: True 3 | 4 | data_collection: 5 | use_threading: True 6 | num_rollouts_train: 700 7 | num_rollouts_val: 20 8 | 9 | dyn_model: 10 | num_fc_layers: 1 11 | depth_fc_layers: 500 12 | batchsize: 512 13 | lr: 0.001 14 | nEpoch: 60 15 | fraction_use_new: 0 16 | 17 | controller: 18 | horizon: 15 19 | num_control_samples: 7000 20 | 21 | aggregation: 22 | num_aggregation_iters: 1 23 | num_trajectories_for_aggregation: 1 24 | rollouts_forTraining: 1 25 | 26 | noise: 27 | make_aggregated_dataset_noisy: True 28 | make_training_dataset_noisy: True 29 | noise_actions_during_MPC_rollouts: False 30 | 31 | steps: 32 | dt_steps: 1 33 | steps_per_episode: 1000 34 | steps_per_rollout_train: 1000 35 | steps_per_rollout_val: 1000 36 | 37 | saving: 38 | min_rew_for_saving: -1000 39 | 40 | generic: 41 | visualize_True: True 42 | visualize_False: False -------------------------------------------------------------------------------- /yaml_files/cheetah_forward.yaml: -------------------------------------------------------------------------------- 1 | which_agent: 4 2 | follow_trajectories: False 3 | 4 | data_collection: 5 | use_threading: True 6 | num_rollouts_train: 10 7 | num_rollouts_val: 20 8 | 9 | dyn_model: 10 | num_fc_layers: 1 11 | depth_fc_layers: 500 12 | batchsize: 512 13 | lr: 0.001 14 | nEpoch: 60 15 | fraction_use_new: 0.9 16 | 17 | controller: 18 | horizon: 20 19 | num_control_samples: 1000 20 | 21 | aggregation: 22 | num_aggregation_iters: 7 23 | num_trajectories_for_aggregation: 10 24 | rollouts_forTraining: 9 25 | 26 | noise: 27 | make_aggregated_dataset_noisy: True 28 | make_training_dataset_noisy: True 29 | noise_actions_during_MPC_rollouts: True 30 | 31 | steps: 32 | dt_steps: 1 33 | steps_per_episode: 1000 34 | steps_per_rollout_train: 1000 35 | steps_per_rollout_val: 1000 36 | 37 | saving: 38 | min_rew_for_saving: 0 39 | 40 | generic: 41 | visualize_True: True 42 | visualize_False: False -------------------------------------------------------------------------------- /yaml_files/cheetah_trajfollow.yaml: -------------------------------------------------------------------------------- 1 | which_agent: 4 2 | follow_trajectories: True 3 | 4 | data_collection: 5 | use_threading: True 6 | num_rollouts_train: 200 7 | num_rollouts_val: 20 8 | 9 | dyn_model: 10 | num_fc_layers: 1 11 | depth_fc_layers: 500 12 | batchsize: 512 13 | lr: 0.001 14 | nEpoch: 40 15 | fraction_use_new: 0 16 | 17 | controller: 18 | horizon: 10 19 | num_control_samples: 1000 20 | 21 | aggregation: 22 | num_aggregation_iters: 1 23 | num_trajectories_for_aggregation: 1 24 | rollouts_forTraining: 1 25 | 26 | noise: 27 | make_aggregated_dataset_noisy: True 28 | make_training_dataset_noisy: True 29 | noise_actions_during_MPC_rollouts: True 30 | 31 | steps: 32 | dt_steps: 1 33 | steps_per_episode: 1000 34 | steps_per_rollout_train: 1000 35 | steps_per_rollout_val: 1000 36 | 37 | saving: 38 | min_rew_for_saving: -1000 39 | 40 | generic: 41 | visualize_True: True 42 | visualize_False: False -------------------------------------------------------------------------------- /yaml_files/hopper_forward.yaml: -------------------------------------------------------------------------------- 1 | which_agent: 6 2 | follow_trajectories: False 3 | 4 | data_collection: 5 | use_threading: True 6 | num_rollouts_train: 20 7 | num_rollouts_val: 20 8 | 9 | dyn_model: 10 | num_fc_layers: 1 11 | depth_fc_layers: 500 12 | batchsize: 512 13 | lr: 0.001 14 | nEpoch: 40 15 | fraction_use_new: 0.9 16 | 17 | controller: 18 | horizon: 40 19 | num_control_samples: 1000 20 | 21 | aggregation: 22 | num_aggregation_iters: 5 23 | num_trajectories_for_aggregation: 11 24 | rollouts_forTraining: 10 25 | 26 | noise: 27 | make_aggregated_dataset_noisy: True 28 | make_training_dataset_noisy: True 29 | noise_actions_during_MPC_rollouts: False 30 | 31 | steps: 32 | dt_steps: 1 33 | steps_per_episode: 1000 34 | steps_per_rollout_train: 200 35 | steps_per_rollout_val: 200 36 | 37 | saving: 38 | min_rew_for_saving: 0 39 | 40 | generic: 41 | visualize_True: True 42 | visualize_False: False -------------------------------------------------------------------------------- /yaml_files/swimmer_forward.yaml: -------------------------------------------------------------------------------- 1 | which_agent: 2 2 | follow_trajectories: False 3 | 4 | data_collection: 5 | use_threading: True 6 | num_rollouts_train: 25 7 | num_rollouts_val: 20 8 | 9 | dyn_model: 10 | num_fc_layers: 1 11 | depth_fc_layers: 500 12 | batchsize: 512 13 | lr: 0.001 14 | nEpoch: 30 15 | fraction_use_new: 0.9 16 | 17 | controller: 18 | horizon: 20 19 | num_control_samples: 5000 20 | 21 | aggregation: 22 | num_aggregation_iters: 6 23 | num_trajectories_for_aggregation: 10 24 | rollouts_forTraining: 9 25 | 26 | noise: 27 | make_aggregated_dataset_noisy: True 28 | make_training_dataset_noisy: True 29 | noise_actions_during_MPC_rollouts: True 30 | 31 | steps: 32 | dt_steps: 3 #dt_steps: frameskip normally 50, but changed it to 150 33 | steps_per_episode: 333 34 | steps_per_rollout_train: 333 35 | steps_per_rollout_val: 333 36 | 37 | saving: 38 | min_rew_for_saving: 0 39 | 40 | generic: 41 | visualize_True: True 42 | visualize_False: False -------------------------------------------------------------------------------- /yaml_files/swimmer_trajfollow.yaml: -------------------------------------------------------------------------------- 1 | which_agent: 2 2 | follow_trajectories: True 3 | 4 | data_collection: 5 | use_threading: True 6 | num_rollouts_train: 200 7 | num_rollouts_val: 20 8 | 9 | dyn_model: 10 | num_fc_layers: 1 11 | depth_fc_layers: 500 12 | batchsize: 512 13 | lr: 0.001 14 | nEpoch: 70 15 | fraction_use_new: 0 16 | 17 | controller: 18 | horizon: 5 19 | num_control_samples: 5000 20 | 21 | aggregation: 22 | num_aggregation_iters: 1 23 | num_trajectories_for_aggregation: 1 24 | rollouts_forTraining: 1 25 | 26 | noise: 27 | make_aggregated_dataset_noisy: True 28 | make_training_dataset_noisy: True 29 | noise_actions_during_MPC_rollouts: False 30 | 31 | steps: 32 | dt_steps: 3 #dt_steps: frameskip normally 50, but changed it to 150 33 | steps_per_episode: 1000 34 | steps_per_rollout_train: 500 35 | steps_per_rollout_val: 200 36 | 37 | saving: 38 | min_rew_for_saving: -1000 39 | 40 | generic: 41 | visualize_True: True 42 | visualize_False: False --------------------------------------------------------------------------------