├── .gitignore
├── collect_samples.py
├── collect_samples_threaded.py
├── data_manipulation.py
├── docs
├── installation.md
├── notes.md
├── sample_plots
│ ├── Ant_comparison.png
│ ├── Cheetah_comparison.png
│ ├── Hopper_comparison.png
│ └── Swimmer_comparison.png
└── trajfollow_videos
│ ├── ant_left-1.ogv
│ ├── ant_left.ogv
│ ├── ant_right.ogv
│ ├── ant_straight.ogv
│ ├── ant_uturn.ogv
│ ├── cheetah_backward.ogv
│ ├── cheetah_forward.ogv
│ ├── cheetah_forwardbackward.ogv
│ ├── swimmer_left.ogv
│ ├── swimmer_right.ogv
│ └── swimmer_straight.ogv
├── dynamics_model.py
├── feedforward_network.py
├── get_true_action.py
├── helper_funcs.py
├── main.py
├── mbmf.py
├── mpc_controller.py
├── plotting
├── plot_forwardsim.ipynb
├── plot_loss.ipynb
├── plot_mbmf.py
└── plot_trajfollow.ipynb
├── point_env.py
├── policy_random.py
├── readme.md
├── reward_functions.py
├── scripts
├── ant_mbmf.sh
├── cheetah_mbmf.sh
├── hopper_mbmf.sh
└── swimmer_mbmf.sh
├── trajectories.py
├── trpo_run_mf.py
└── yaml_files
├── ant_forward.yaml
├── ant_trajfollow.yaml
├── cheetah_forward.yaml
├── cheetah_trajfollow.yaml
├── hopper_forward.yaml
├── swimmer_forward.yaml
└── swimmer_trajfollow.yaml
/.gitignore:
--------------------------------------------------------------------------------
1 | run_*
2 | reacher*
3 | saved_roach_*
4 | __pycache*
5 |
--------------------------------------------------------------------------------
/collect_samples.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import rllab
3 | import time
4 | import matplotlib.pyplot as plt
5 | import copy
6 |
7 | class CollectSamples(object):
8 |
9 | def __init__(self, env, policy, visualize_rollouts, which_agent, dt_steps, dt_from_xml, follow_trajectories):
10 | self.env = env
11 | self.policy = policy
12 | self.visualize_at_all = visualize_rollouts
13 | self.which_agent = which_agent
14 |
15 | self.low = self.env.observation_space.low
16 | self.high = self.env.observation_space.high
17 | self.shape = self.env.observation_space.shape
18 |
19 | self.use_low = self.low + (self.high-self.low)/3.0
20 | self.use_high = self.high - (self.high-self.low)/3.0
21 |
22 | self.dt_steps = dt_steps
23 | self.dt_from_xml = dt_from_xml
24 |
25 | self.follow_trajectories = follow_trajectories
26 |
27 | def collect_samples(self, num_rollouts, steps_per_rollout):
28 | observations_list = []
29 | actions_list = []
30 | starting_states_list=[]
31 | rewards_list = []
32 | visualization_frequency = 10
33 | for rollout_number in range(num_rollouts):
34 | if(self.which_agent==2):
35 | if(self.follow_trajectories):
36 | observation, starting_state = self.env.reset(returnStartState=True, isSwimmer=True, need_diff_headings=True)
37 | else:
38 | observation, starting_state = self.env.reset(returnStartState=True, isSwimmer=True)
39 | else:
40 | observation, starting_state = self.env.reset(returnStartState=True)
41 | observations, actions, reward_for_rollout = self.perform_rollout(observation, steps_per_rollout,
42 | rollout_number, visualization_frequency)
43 |
44 | rewards_list.append(reward_for_rollout)
45 | observations= np.array(observations)
46 | actions= np.array(actions)
47 | observations_list.append(observations)
48 | actions_list.append(actions)
49 | starting_states_list.append(starting_state)
50 |
51 | #return list of length = num rollouts
52 | #each entry of that list contains one rollout
53 | #each entry is [steps_per_rollout x statespace_dim] or [steps_per_rollout x actionspace_dim]
54 | return observations_list, actions_list, starting_states_list, rewards_list
55 |
56 | def perform_rollout(self, observation, steps_per_rollout, rollout_number, visualization_frequency):
57 | observations = []
58 | actions = []
59 | visualize = False
60 | reward_for_rollout = 0
61 | if((rollout_number%visualization_frequency)==0):
62 | print("currently performing rollout #", rollout_number)
63 | if(self.visualize_at_all):
64 | all_states=[]
65 | print ("---- visualizing a rollout ----")
66 | visualize=True
67 |
68 | for step_num in range(steps_per_rollout):
69 | action, _ = self.policy.get_action(observation)
70 |
71 | observations.append(observation)
72 | actions.append(action)
73 |
74 | next_observation, reward, terminal, _ = self.env.step(action, collectingInitialData=True)
75 | reward_for_rollout+= reward
76 |
77 | observation = np.copy(next_observation)
78 |
79 | if terminal:
80 | print("Had to stop rollout because terminal state was reached.")
81 | break
82 |
83 | if(visualize):
84 | if(self.which_agent==0):
85 | curr_state = self.env.render()
86 | all_states.append(np.expand_dims(curr_state, axis=0))
87 | else:
88 | self.env.render()
89 | time.sleep(self.dt_steps*self.dt_from_xml)
90 |
91 | if(visualize and (self.which_agent==0)):
92 | all_states= np.concatenate(all_states, axis=0)
93 | plt.plot(all_states[:,0], all_states[:,1], 'r')
94 | plt.show()
95 | return observations, actions, reward_for_rollout
--------------------------------------------------------------------------------
/collect_samples_threaded.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import time
3 | import copy
4 | import matplotlib.pyplot as plt
5 | import copy
6 | import multiprocessing
7 |
8 | class CollectSamples(object):
9 |
10 | def __init__(self, env, policy, visualize_rollouts, which_agent, dt_steps, dt_from_xml, follow_trajectories):
11 | self.main_env = copy.deepcopy(env)
12 | self.policy = policy
13 | self.visualize_at_all = visualize_rollouts
14 | self.which_agent = which_agent
15 | self.list_observations=[]
16 | self.list_actions=[]
17 | self.list_starting_states=[]
18 |
19 | self.stateDim = self.main_env.observation_space.shape[0]
20 | self.actionDim = self.main_env.action_space.shape[0]
21 |
22 | self.dt_steps = dt_steps
23 | self.dt_from_xml = dt_from_xml
24 | self.follow_trajectories = follow_trajectories
25 |
26 | def collect_samples(self, num_rollouts, steps_per_rollout):
27 |
28 | #vars
29 | all_processes=[]
30 | visualization_frequency = num_rollouts/10
31 | num_workers=multiprocessing.cpu_count() #detect number of cores
32 | pool = multiprocessing.Pool(8)
33 |
34 | #multiprocessing for running rollouts (utilize multiple cores)
35 | for rollout_number in range(num_rollouts):
36 | result = pool.apply_async(self.do_rollout,
37 | args=(steps_per_rollout, rollout_number, visualization_frequency),
38 | callback=self.mycallback)
39 |
40 | pool.close() #not going to add anything else to the pool
41 | pool.join() #wait for the processes to terminate
42 |
43 | #return lists of length = num rollouts
44 | #each entry contains one rollout
45 | #each entry is [steps_per_rollout x statespace_dim] or [steps_per_rollout x actionspace_dim]
46 | return self.list_observations, self.list_actions, self.list_starting_states, []
47 |
48 | def mycallback(self, x): #x is shape [numSteps, state + action]
49 | self.list_observations.append(x[:,0:self.stateDim])
50 | self.list_actions.append(x[:,self.stateDim:(self.stateDim+self.actionDim)])
51 | self.list_starting_states.append(x[0,(self.stateDim+self.actionDim):])
52 |
53 | def do_rollout(self, steps_per_rollout, rollout_number, visualization_frequency):
54 | #init vars
55 | #print("START ", rollout_number)
56 | observations = []
57 | actions = []
58 | visualize = False
59 |
60 | env = copy.deepcopy(self.main_env)
61 |
62 | #reset env
63 | if(self.which_agent==2):
64 | if(self.follow_trajectories):
65 | observation, starting_state = env.reset(returnStartState=True, isSwimmer=True, need_diff_headings=True)
66 | else:
67 | observation, starting_state = env.reset(returnStartState=True, isSwimmer=True)
68 | else:
69 | observation, starting_state = env.reset(returnStartState=True)
70 |
71 | #visualize only sometimes
72 | if((rollout_number%visualization_frequency)==0):
73 | if(self.visualize_at_all):
74 | all_states=[]
75 | print ("---- visualizing a rollout ----")
76 | visualize=True
77 |
78 | for step_num in range(steps_per_rollout):
79 |
80 | #decide what action to take
81 | action, _ = self.policy.get_action(observation)
82 |
83 | #keep tracks of observations + actions
84 | observations.append(observation)
85 | actions.append(action)
86 |
87 | #perform the action
88 | next_observation, reward, terminal, _ = env.step(action, collectingInitialData=True)
89 |
90 | #update the observation
91 | observation = np.copy(next_observation)
92 |
93 | if terminal:
94 | #print("Had to stop rollout because terminal state was reached.")
95 | break
96 |
97 | if(visualize):
98 | if(self.which_agent==0):
99 | curr_state = env.render()
100 | all_states.append(np.expand_dims(curr_state, axis=0))
101 | else:
102 | env.render()
103 | time.sleep(self.dt_steps*self.dt_from_xml)
104 |
105 | if(visualize and (self.which_agent==0)):
106 | all_states= np.concatenate(all_states, axis=0)
107 | plt.plot(all_states[:,0], all_states[:,1], 'r')
108 | plt.show()
109 |
110 | if((rollout_number%visualization_frequency)==0):
111 | print("Completed rollout # ", rollout_number)
112 |
113 | array_starting_state = np.tile(starting_state, (np.array(actions).shape[0],1))
114 | return np.concatenate((np.array(observations), np.array(actions), array_starting_state), axis=1)
--------------------------------------------------------------------------------
/data_manipulation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import numpy.random as npr
3 | import tensorflow as tf
4 | import time
5 | import math
6 | import matplotlib.pyplot as plt
7 | import copy
8 |
9 | def get_indices(which_agent):
10 | x_index = -7
11 | y_index = -7
12 | z_index = -7
13 | yaw_index = -7
14 | joint1_index = -7
15 | joint2_index = -7
16 | frontleg_index = -7
17 | frontshin_index = -7
18 | frontfoot_index = -7
19 | xvel_index = -7
20 | orientation_index = -7
21 |
22 | if(which_agent==0): #pointmass
23 | x_index= 0
24 | y_index= 1
25 | elif(which_agent==1): #ant
26 | x_index= 29
27 | y_index= 30
28 | z_index = 31
29 | xvel_index = 38
30 | elif(which_agent==2): #swimmer
31 | x_index= 10
32 | y_index= 11
33 | yaw_index = 2
34 | joint1_index = 3
35 | joint2_index = 4
36 | xvel_index = 13
37 | elif(which_agent==3): #reacher
38 | x_index= 6
39 | y_index= 7
40 | elif(which_agent==4): #cheetah
41 | x_index= 18
42 | y_index= 20
43 | frontleg_index = 6
44 | frontshin_index = 7
45 | frontfoot_index = 8
46 | xvel_index = 21
47 | elif(which_agent==5): #roach (not mujoco)
48 | x_index= 0
49 | y_index= 1
50 | elif(which_agent==6): #hopper
51 | x_index = 11
52 | y_index = 13
53 | z_index = 0
54 | xvel_index = 14
55 | orientation_index = 1
56 | elif(which_agent==7): #walker
57 | x_index = 18
58 | y_index = 20
59 |
60 | return x_index, y_index, z_index, yaw_index, joint1_index, joint2_index, frontleg_index, \
61 | frontshin_index, frontfoot_index, xvel_index, orientation_index
62 |
63 | def generate_training_data_inputs(states0, controls0):
64 | # init vars
65 | states=np.copy(states0)
66 | controls=np.copy(controls0)
67 | new_states=[]
68 | new_controls=[]
69 |
70 | # remove the last entry in each rollout (because that entry doesn't have an associated "output")
71 | for i in range(len(states)):
72 | curr_item = states[i]
73 | length = curr_item.shape[0]
74 | new_states.append(curr_item[0:length-1,:])
75 |
76 | curr_item = controls[i]
77 | length = curr_item.shape[0]
78 | new_controls.append(curr_item[0:length-1,:])
79 |
80 | #turn the list of rollouts into just one large array of data
81 | dataX= np.concatenate(new_states, axis=0)
82 | dataY= np.concatenate(new_controls, axis=0)
83 | return dataX, dataY
84 |
85 | def generate_training_data_outputs(states, which_agent):
86 | #for each rollout, the output corresponding to each (s_i) is (s_i+1 - s_i)
87 | differences=[]
88 | for states_in_single_rollout in states:
89 | output = states_in_single_rollout[1:states_in_single_rollout.shape[0],:] \
90 | -states_in_single_rollout[0:states_in_single_rollout.shape[0]-1,:]
91 | differences.append(output)
92 | output = np.concatenate(differences, axis=0)
93 | return output
94 |
95 | def from_observation_to_usablestate(states, which_agent, just_one):
96 |
97 | #######################################
98 | ######### POINTMASS ###################
99 | #######################################
100 |
101 | #0: x
102 | #1: y
103 | #2: vx
104 | #3: vy
105 | if(which_agent==0):
106 | return states
107 |
108 | #######################################
109 | ######### ANT #########################
110 | #######################################
111 |
112 | #we use the following observation as input to NN (41 things)
113 | #0 to 14... 15 joint positions
114 | #15 to 28... 14 joint velocities
115 | #29 to 31... 3 body com pos
116 | #32 to 37... 6 cos and sin of 3 body angles (from 9 rotation mat)
117 | #38 to 40... body com vel
118 |
119 | #returned by env.step
120 | #0 to 14 = positions
121 | #j0 x position
122 | #j1 y position
123 | #j2 z position
124 | #3 ?
125 | #4 5 body flip
126 | #6 body rotate
127 | #7 leg yaw ccw, 8 leg bend down
128 | #9, 10
129 | #11, 12
130 | #13,14
131 | #15 to 28 = velocities
132 | #29 to 37 = rotation matrix (9)
133 | #38 to 40 = com positions
134 | #41 to 43 = com velocities
135 |
136 | if(which_agent==1):
137 | if(just_one):
138 | curr_item = np.copy(states)
139 | joint_pos = curr_item[0:15]
140 | joint_vel = curr_item[15:29]
141 | body_pos = curr_item[38:41]
142 | body_rpy = to_euler(curr_item[29:38], just_one) #9 vals of rot mat --> 6 vals (cos sin of rpy)
143 | body_vel = curr_item[41:44]
144 | full_item = np.concatenate((joint_pos, joint_vel, body_pos, body_rpy, body_vel), axis=0)
145 | return full_item
146 |
147 | else:
148 | new_states=[]
149 | for i in range(len(states)): #for each rollout
150 | curr_item = np.copy(states[i])
151 |
152 | joint_pos = curr_item[:,0:15]
153 | joint_vel = curr_item[:,15:29]
154 | body_pos = curr_item[:,38:41]
155 | body_rpy = to_euler(curr_item[:,29:38], just_one) #9 vals of rot mat --> 6 vals (cos sin of rpy)
156 | body_vel = curr_item[:,41:44]
157 |
158 | full_item = np.concatenate((joint_pos, joint_vel, body_pos, body_rpy, body_vel), axis=1)
159 | new_states.append(full_item)
160 | return new_states
161 |
162 |
163 | #######################################
164 | ######### SWIMMER #####################
165 | #######################################
166 |
167 | #total = 16
168 | #0 slider x... 1 slider y.... 2 heading
169 | #3,4 the two hinge joint pos
170 | #5,6 slider x/y vel
171 | #7 heading vel
172 | #8,9 the two hinge joint vel
173 | #10,11,12 cm x and y and z pos
174 | #13,14,15 cm x and y and z vel
175 | if(which_agent==2):
176 | return states
177 |
178 | #######################################
179 | ######### REACHER #####################
180 | #######################################
181 |
182 | #total = 11
183 | # 2-- cos(theta) of the 2 angles
184 | # 2-- sin(theta) of the 2 angles
185 | # 2-- goal pos -------------------(ignore this)
186 | # 2-- vel of the 2 angles
187 | # 3-- fingertip cm
188 | if(which_agent==3):
189 | if(just_one):
190 | curr_item = np.copy(states)
191 | keep_1 = curr_item[0:4]
192 | keep_2 = curr_item[6:11]
193 | full_item = np.concatenate((keep_1, keep_2), axis=0)
194 | return full_item
195 |
196 | else:
197 | new_states=[]
198 | for i in range(len(states)): #for each rollout
199 | curr_item = np.copy(states[i])
200 | keep1 = curr_item[:,0:4]
201 | keep2 = curr_item[:,6:11]
202 | full_item = np.concatenate((keep1, keep2), axis=1)
203 | new_states.append(full_item)
204 | return new_states
205 |
206 | #######################################
207 | ######### HALF CHEETAH ################
208 | #######################################
209 |
210 | #STATE when you pass in something to reset env: (33)
211 | # rootx, rootz, rooty
212 | # bthigh, bshin, bfoot
213 | # fthigh, fshin, ffoot
214 | # rootx, rootz, rooty --vel
215 | # bthigh, bshin, bfoot --vel
216 | # fthigh, fshin, ffoot --vel
217 | # self.model.data.qacc (9)
218 | # self.model.data.ctrl (6)
219 | #OBSERVATION: (24)
220 | # 0: rootx (forward/backward)
221 | # 1: rootz (up/down)
222 | # 2: rooty (angle of body)
223 | # 3: bthigh (+ is move back)
224 | # 4: bshin
225 | # 5: bfoot
226 | # 6: fthigh
227 | # 7: fshin
228 | # 8: ffoot
229 | # 9: root x vel
230 | # 10: root z vel
231 | # 11: root y vel
232 | # 12: bthigh vel
233 | # 13: bshin vel
234 | # 14: bfoot vel
235 | # 15: fthigh vel
236 | # 16: fshin vel
237 | # 17: ffoot vel
238 | #com x
239 | #com y
240 | #com z
241 | #com vx
242 | #com vy
243 | #com vz
244 |
245 | if(which_agent==4):
246 | return states
247 |
248 | #######################################
249 | ######### ROACH (personal env) ########
250 | #######################################
251 |
252 | # x,y,z com position
253 | # orientation com
254 | # cos of 2 motor positions
255 | # sin of 2 motor positions
256 | # com velocity
257 | # orientation angular vel
258 | # 2 motor vel
259 |
260 | elif(which_agent==5):
261 | if(just_one):
262 | curr_item = np.copy(states)
263 | keep_1 = curr_item[0:6]
264 | two = np.cos(curr_item[6:8])
265 | three = np.sin(curr_item[6:8])
266 | keep_4 = curr_item[8:16]
267 | full_item = np.concatenate((keep_1, two, three, keep_4), axis=0)
268 | return full_item
269 |
270 | else:
271 | new_states=[]
272 | for i in range(len(states)): #for each rollout
273 | curr_item = np.copy(states[i])
274 | keep1 = curr_item[:,0:6]
275 | two = np.cos(curr_item[:,6:8])
276 | three = np.sin(curr_item[:,6:8])
277 | keep4 = curr_item[:,8:16]
278 | full_item = np.concatenate((keep1, two, three, keep4), axis=1)
279 | new_states.append(full_item)
280 | return new_states
281 |
282 | #######################################
283 | ######### HOPPER ######################
284 | #######################################
285 |
286 | #observation: 17 things
287 | #5 joints-- j0 (height), j2, j3, j4, j5
288 | #6 velocities
289 | #3 com pos
290 | #3 com vel
291 | #state: 21 things
292 | #6 joint pos
293 | #6 joint vel
294 | #6 qacc
295 | #3 ctrl
296 |
297 | if(which_agent==6):
298 | return states
299 |
300 | #######################################
301 | ######### WALKER ######################
302 | #######################################
303 |
304 | #observation: 24 things
305 | #9 joint pos
306 | #9 velocities
307 | #3 com pos
308 | #3 com vel
309 |
310 | if(which_agent==7):
311 | return states
312 |
313 |
314 | def to_euler(rot_mat, just_one):
315 | if(just_one):
316 | r=np.arctan2(rot_mat[3], rot_mat[1])
317 | p=np.arctan2(-rot_mat[6], np.sqrt(rot_mat[7]*rot_mat[7]+rot_mat[8]*rot_mat[8]))
318 | y=np.arctan2(rot_mat[7], rot_mat[8])
319 |
320 | return np.array([np.cos(r), np.sin(r), np.cos(p), np.sin(p), np.cos(y), np.sin(y)])
321 |
322 | else:
323 | r=np.arctan2(rot_mat[:,3], rot_mat[:,1])
324 | r=np.concatenate((np.expand_dims(np.cos(r), axis=1), np.expand_dims(np.sin(r), axis=1)), axis=1)
325 |
326 | p=np.arctan2(-rot_mat[:,6], np.sqrt(rot_mat[:,7]*rot_mat[:,7]+rot_mat[:,8]*rot_mat[:,8]))
327 | p=np.concatenate((np.expand_dims(np.cos(p), axis=1), np.expand_dims(np.sin(p), axis=1)), axis=1)
328 |
329 | y=np.arctan2(rot_mat[:,7], rot_mat[:,8])
330 | y=np.concatenate((np.expand_dims(np.cos(y), axis=1), np.expand_dims(np.sin(y), axis=1)), axis=1)
331 |
332 | return np.concatenate((r,p,y), axis=1)
333 |
--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
1 |
2 | # INSTALLING EVERYTHING
3 |
4 | ### ANACONDA (if you don't have)
5 |
6 | Download from https://www.continuum.io/downloads (download the python 2.7 version)
7 | ```
8 | bash Anaconda2-4.4.0-Linux-x86_64.sh
9 | vim ~/.bashrc
10 | ```
11 | In .bashrc, type:
12 | ```
13 | export PATH="$HOME/anaconda2/bin:$PATH"
14 | ```
15 | Source the file:
16 | ```
17 | source ~/.bashrc
18 | ```
19 |
20 | ----------------------------------
21 |
22 | ### MUJOCO
23 |
24 | Go to website: https://www.roboti.us/license.html
25 |
26 | a) mujoco files:
27 | Under Downloads, download mjpro131 linux
28 | extract/unzip it
29 | ```
30 | mkdir ~/.mujoco
31 | cp -R mjpro131 ~/.mujoco/mjpro131
32 | ```
33 | b) license key:
34 | i) If you don't have one: sign up for 30-day free trial to get a license
35 | Need to sudo chmod permissions on the downloaded executable (for getting computer id)
36 | Email will give you mjkey.txt + LICENSE.txt
37 | ```
38 | cp mjkey.txt ~/.mujoco/mjkey.txt
39 | ```
40 | ii) Else, just copy your existing key into ~/.mujoco/mjkey.txt
41 |
42 | ----------------------------------
43 |
44 | ### RLLAB
45 |
46 | ```
47 | git clone https://github.com/nagaban2/rllab.git
48 | cd rllab
49 | ./scripts/setup_linux.sh
50 | ./scripts/setup_mujoco.sh
51 | vim ~/.bashrc
52 | ```
53 | In .bashrc, type:
54 | ```
55 | export PATH="$HOME/anaconda2/envs/rllab3/bin:$PATH"
56 | export PYTHONPATH="$HOME/rllab:$PYTHONPATH"
57 | ```
58 | Source the file:
59 | ```
60 | source ~/.bashrc
61 | source activate rllab3
62 | ```
63 | ----------------------------------
64 |
65 | ### CUDA (Note: assuming you already have cuda and cudnn)
66 |
67 | Set paths:
68 | ```
69 | vim ~/.bashrc
70 | ```
71 | In .bashrc, type:
72 | ```
73 | export PATH="/usr/local/cuda-8.0/bin:$PATH"
74 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64"
75 | ```
76 | Source the file:
77 | ```
78 | source ~/.bashrc
79 | ```
80 | To see if gpu is being used while running code:
81 | ```
82 | nvidia-smi
83 | ```
84 | ----------------------------------
85 |
86 | ### OTHER
87 | ```
88 | source activate rllab3
89 | pip install gym
90 | pip install cloudpickle
91 | pip install seaborn
92 | ```
--------------------------------------------------------------------------------
/docs/notes.md:
--------------------------------------------------------------------------------
1 | # NOTES
2 |
3 | If you want to
4 | - know about the env
5 | - change the env
6 | - or make your own env
7 |
8 | You might care about the following:
9 |
10 | a) from_observation_to_usablestate in data_manipulation.py
11 | - This explains each element of the env's observation
12 | - It also allows you to edit which parts of the observations you want to feed into your NN (this is called "state" throughout the paper)
13 |
14 | b) get_indices in data_manipulation.py
15 | - Indicates which index of the state corresponds to what (ex. xindex, yindex, etc.)
16 | - These indeces are used throughout the code, for reward functions/etc.
17 |
18 | c) reward_functions.py
19 | - A reward function should be defined for each env/task
20 |
21 | ---------------------------------------------------------------
22 | ---------------------------------------------------------------
23 |
24 | ### Variables in the yaml files:
25 |
26 | **num_rollouts_train**
27 | number of rollouts to collect for training dataset
28 |
29 | **nEpoch**
30 | number of epochs for training the NN dynamics model
31 |
32 | **horizon**
33 | horizon of the MPC controller
34 |
35 | **num_control_samples**
36 | number of random candidate action sequences generated by MPC controller
37 |
38 | **fraction_use_new**
39 | how much new vs old data to use when training the NN dynamics model
40 |
41 | **num_aggregation_iters**
42 | how many full iterations of training-->rollouts-->aggregatedata to conduct
43 |
44 | **num_trajectories_for_aggregation**
45 | how many MPC rollouts to conduct during each aggregation iteration
46 |
47 | **rollouts_forTraining**
48 | how many of aggregated rollouts to put into training dataset (vs into validation dataset)
49 |
50 | **num_fc_layers**
51 | number of hidden layers in dynamics model
52 |
53 | **depth_fc_layers**
54 | dimension of each hidden layer in dynamics model
55 |
56 |
--------------------------------------------------------------------------------
/docs/sample_plots/Ant_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/sample_plots/Ant_comparison.png
--------------------------------------------------------------------------------
/docs/sample_plots/Cheetah_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/sample_plots/Cheetah_comparison.png
--------------------------------------------------------------------------------
/docs/sample_plots/Hopper_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/sample_plots/Hopper_comparison.png
--------------------------------------------------------------------------------
/docs/sample_plots/Swimmer_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/sample_plots/Swimmer_comparison.png
--------------------------------------------------------------------------------
/docs/trajfollow_videos/ant_left-1.ogv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/ant_left-1.ogv
--------------------------------------------------------------------------------
/docs/trajfollow_videos/ant_left.ogv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/ant_left.ogv
--------------------------------------------------------------------------------
/docs/trajfollow_videos/ant_right.ogv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/ant_right.ogv
--------------------------------------------------------------------------------
/docs/trajfollow_videos/ant_straight.ogv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/ant_straight.ogv
--------------------------------------------------------------------------------
/docs/trajfollow_videos/ant_uturn.ogv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/ant_uturn.ogv
--------------------------------------------------------------------------------
/docs/trajfollow_videos/cheetah_backward.ogv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/cheetah_backward.ogv
--------------------------------------------------------------------------------
/docs/trajfollow_videos/cheetah_forward.ogv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/cheetah_forward.ogv
--------------------------------------------------------------------------------
/docs/trajfollow_videos/cheetah_forwardbackward.ogv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/cheetah_forwardbackward.ogv
--------------------------------------------------------------------------------
/docs/trajfollow_videos/swimmer_left.ogv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/swimmer_left.ogv
--------------------------------------------------------------------------------
/docs/trajfollow_videos/swimmer_right.ogv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/swimmer_right.ogv
--------------------------------------------------------------------------------
/docs/trajfollow_videos/swimmer_straight.ogv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anagabandi/nn_dynamics/b76a62faed78a52cd797651395b9962e423ce013/docs/trajfollow_videos/swimmer_straight.ogv
--------------------------------------------------------------------------------
/dynamics_model.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | import numpy.random as npr
4 | import tensorflow as tf
5 | import time
6 | import math
7 |
8 | from feedforward_network import feedforward_network
9 |
10 |
11 | class Dyn_Model:
12 |
13 | def __init__(self, inputSize, outputSize, sess, learning_rate, batchsize, which_agent, x_index, y_index,
14 | num_fc_layers, depth_fc_layers, mean_x, mean_y, mean_z, std_x, std_y, std_z, tf_datatype, print_minimal):
15 |
16 | #init vars
17 | self.sess = sess
18 | self.batchsize = batchsize
19 | self.which_agent = which_agent
20 | self.x_index = x_index
21 | self.y_index = y_index
22 | self.inputSize = inputSize
23 | self.outputSize = outputSize
24 | self.mean_x = mean_x
25 | self.mean_y = mean_y
26 | self.mean_z = mean_z
27 | self.std_x = std_x
28 | self.std_y = std_y
29 | self.std_z = std_z
30 | self.print_minimal = print_minimal
31 |
32 | #placeholders
33 | self.x_ = tf.placeholder(tf_datatype, shape=[None, self.inputSize], name='x') #inputs
34 | self.z_ = tf.placeholder(tf_datatype, shape=[None, self.outputSize], name='z') #labels
35 |
36 | #forward pass
37 | self.curr_nn_output = feedforward_network(self.x_, self.inputSize, self.outputSize,
38 | num_fc_layers, depth_fc_layers, tf_datatype)
39 |
40 | #loss
41 | self.mse_ = tf.reduce_mean(tf.square(self.z_ - self.curr_nn_output))
42 |
43 | # Compute gradients and update parameters
44 | self.opt = tf.train.AdamOptimizer(learning_rate)
45 | self.theta = tf.trainable_variables()
46 | self.gv = [(g,v) for g,v in
47 | self.opt.compute_gradients(self.mse_, self.theta)
48 | if g is not None]
49 | self.train_step = self.opt.apply_gradients(self.gv)
50 |
51 | def train(self, dataX, dataZ, dataX_new, dataZ_new, nEpoch, save_dir, fraction_use_new):
52 |
53 | #init vars
54 | start = time.time()
55 | training_loss_list = []
56 | range_of_indeces = np.arange(dataX.shape[0])
57 | nData_old = dataX.shape[0]
58 | num_new_pts = dataX_new.shape[0]
59 |
60 | #how much of new data to use per batch
61 | if(num_new_pts<(self.batchsize*fraction_use_new)):
62 | batchsize_new_pts = num_new_pts #use all of the new ones
63 | else:
64 | batchsize_new_pts = int(self.batchsize*fraction_use_new)
65 |
66 | #how much of old data to use per batch
67 | batchsize_old_pts = int(self.batchsize- batchsize_new_pts)
68 |
69 | #training loop
70 | for i in range(nEpoch):
71 |
72 | #reset to 0
73 | avg_loss=0
74 | num_batches=0
75 |
76 | #randomly order indeces (equivalent to shuffling dataX and dataZ)
77 | old_indeces = npr.choice(range_of_indeces, size=(dataX.shape[0],), replace=False)
78 | #train from both old and new dataset
79 | if(batchsize_old_pts>0):
80 |
81 | #get through the full old dataset
82 | for batch in range(int(math.floor(nData_old / batchsize_old_pts))):
83 |
84 | #randomly sample points from new dataset
85 | if(num_new_pts==0):
86 | dataX_new_batch = dataX_new
87 | dataZ_new_batch = dataZ_new
88 | else:
89 | new_indeces = npr.randint(0,dataX_new.shape[0], (batchsize_new_pts,))
90 | dataX_new_batch = dataX_new[new_indeces, :]
91 | dataZ_new_batch = dataZ_new[new_indeces, :]
92 |
93 | #walk through the randomly reordered "old data"
94 | dataX_old_batch = dataX[old_indeces[batch*batchsize_old_pts:(batch+1)*batchsize_old_pts], :]
95 | dataZ_old_batch = dataZ[old_indeces[batch*batchsize_old_pts:(batch+1)*batchsize_old_pts], :]
96 |
97 | #combine the old and new data
98 | dataX_batch = np.concatenate((dataX_old_batch, dataX_new_batch))
99 | dataZ_batch = np.concatenate((dataZ_old_batch, dataZ_new_batch))
100 |
101 | #one iteration of feedforward training
102 | _, loss, output, true_output = self.sess.run([self.train_step, self.mse_, self.curr_nn_output, self.z_],
103 | feed_dict={self.x_: dataX_batch, self.z_: dataZ_batch})
104 | training_loss_list.append(loss)
105 | avg_loss+= loss
106 | num_batches+=1
107 |
108 | #train completely from new set
109 | else:
110 | for batch in range(int(math.floor(num_new_pts / batchsize_new_pts))):
111 |
112 | #walk through the shuffled new data
113 | dataX_batch = dataX_new[batch*batchsize_new_pts:(batch+1)*batchsize_new_pts, :]
114 | dataZ_batch = dataZ_new[batch*batchsize_new_pts:(batch+1)*batchsize_new_pts, :]
115 |
116 | #one iteration of feedforward training
117 | _, loss, output, true_output = self.sess.run([self.train_step, self.mse_, self.curr_nn_output, self.z_],
118 | feed_dict={self.x_: dataX_batch, self.z_: dataZ_batch})
119 |
120 | training_loss_list.append(loss)
121 | avg_loss+= loss
122 | num_batches+=1
123 |
124 | #shuffle new dataset after an epoch (if training only on it)
125 | p = npr.permutation(dataX_new.shape[0])
126 | dataX_new = dataX_new[p]
127 | dataZ_new = dataZ_new[p]
128 |
129 | #save losses after an epoch
130 | np.save(save_dir + '/training_losses.npy', training_loss_list)
131 | if(not(self.print_minimal)):
132 | if((i%10)==0):
133 | print("\n=== Epoch {} ===".format(i))
134 | print ("loss: ", avg_loss/num_batches)
135 |
136 | if(not(self.print_minimal)):
137 | print ("Training set size: ", (nData_old + dataX_new.shape[0]))
138 | print("Training duration: {:0.2f} s".format(time.time()-start))
139 |
140 | #get loss of curr model on old dataset
141 | avg_old_loss=0
142 | iters_in_batch=0
143 | for batch in range(int(math.floor(nData_old / self.batchsize))):
144 | # Batch the training data
145 | dataX_batch = dataX[batch*self.batchsize:(batch+1)*self.batchsize, :]
146 | dataZ_batch = dataZ[batch*self.batchsize:(batch+1)*self.batchsize, :]
147 | #one iteration of feedforward training
148 | loss, _ = self.sess.run([self.mse_, self.curr_nn_output], feed_dict={self.x_: dataX_batch, self.z_: dataZ_batch})
149 | avg_old_loss+= loss
150 | iters_in_batch+=1
151 | old_loss = avg_old_loss/iters_in_batch
152 |
153 | #get loss of curr model on new dataset
154 | avg_new_loss=0
155 | iters_in_batch=0
156 | for batch in range(int(math.floor(dataX_new.shape[0] / self.batchsize))):
157 | # Batch the training data
158 | dataX_batch = dataX_new[batch*self.batchsize:(batch+1)*self.batchsize, :]
159 | dataZ_batch = dataZ_new[batch*self.batchsize:(batch+1)*self.batchsize, :]
160 | #one iteration of feedforward training
161 | loss, _ = self.sess.run([self.mse_, self.curr_nn_output], feed_dict={self.x_: dataX_batch, self.z_: dataZ_batch})
162 | avg_new_loss+= loss
163 | iters_in_batch+=1
164 | if(iters_in_batch==0):
165 | new_loss=0
166 | else:
167 | new_loss = avg_new_loss/iters_in_batch
168 |
169 | #done
170 | return (avg_loss/num_batches), old_loss, new_loss
171 |
172 | def run_validation(self, inputs, outputs):
173 |
174 | #init vars
175 | nData = inputs.shape[0]
176 | avg_loss=0
177 | iters_in_batch=0
178 |
179 | for batch in range(int(math.floor(nData / self.batchsize))):
180 | # Batch the training data
181 | dataX_batch = inputs[batch*self.batchsize:(batch+1)*self.batchsize, :]
182 | dataZ_batch = outputs[batch*self.batchsize:(batch+1)*self.batchsize, :]
183 |
184 | #one iteration of feedforward training
185 | z_predictions, loss = self.sess.run([self.curr_nn_output, self.mse_], feed_dict={self.x_: dataX_batch, self.z_: dataZ_batch})
186 |
187 | avg_loss+= loss
188 | iters_in_batch+=1
189 |
190 | #avg loss + all predictions
191 | print ("Validation set size: ", nData)
192 | print ("Validation set's total loss: ", avg_loss/iters_in_batch)
193 |
194 | return (avg_loss/iters_in_batch)
195 |
196 | #multistep prediction using the learned dynamics model at each step
197 | def do_forward_sim(self, forwardsim_x_true, forwardsim_y, many_in_parallel, env_inp, which_agent):
198 |
199 | #init vars
200 | state_list = []
201 |
202 | if(many_in_parallel):
203 | #init vars
204 | N= forwardsim_y.shape[0]
205 | horizon = forwardsim_y.shape[1]
206 | array_stdz = np.tile(np.expand_dims(self.std_z, axis=0),(N,1))
207 | array_meanz = np.tile(np.expand_dims(self.mean_z, axis=0),(N,1))
208 | array_stdy = np.tile(np.expand_dims(self.std_y, axis=0),(N,1))
209 | array_meany = np.tile(np.expand_dims(self.mean_y, axis=0),(N,1))
210 | array_stdx = np.tile(np.expand_dims(self.std_x, axis=0),(N,1))
211 | array_meanx = np.tile(np.expand_dims(self.mean_x, axis=0),(N,1))
212 |
213 | if(len(forwardsim_x_true)==2):
214 | #N starting states, one for each of the simultaneous sims
215 | curr_states=np.tile(forwardsim_x_true[0], (N,1))
216 | else:
217 | curr_states=np.copy(forwardsim_x_true)
218 |
219 | #advance all N sims, one timestep at a time
220 | for timestep in range(horizon):
221 |
222 | #keep track of states for all N sims
223 | state_list.append(np.copy(curr_states))
224 |
225 | #make [N x (state,action)] array to pass into NN
226 | states_preprocessed = np.nan_to_num(np.divide((curr_states-array_meanx), array_stdx))
227 | actions_preprocessed = np.nan_to_num(np.divide((forwardsim_y[:,timestep,:]-array_meany), array_stdy))
228 | inputs_list= np.concatenate((states_preprocessed, actions_preprocessed), axis=1)
229 |
230 | #run the N sims all at once
231 | model_output = self.sess.run([self.curr_nn_output], feed_dict={self.x_: inputs_list})
232 | state_differences = np.multiply(model_output[0],array_stdz)+array_meanz
233 |
234 | #update the state info
235 | curr_states = curr_states + state_differences
236 |
237 | #return a list of length = horizon+1... each one has N entries, where each entry is (13,)
238 | state_list.append(np.copy(curr_states))
239 | else:
240 | curr_state = np.copy(forwardsim_x_true[0]) #curr state is of dim NN input
241 |
242 | for curr_control in forwardsim_y:
243 |
244 | state_list.append(np.copy(curr_state))
245 | curr_control = np.expand_dims(curr_control, axis=0)
246 |
247 | #subtract mean and divide by standard deviation
248 | curr_state_preprocessed = curr_state - self.mean_x
249 | curr_state_preprocessed = np.nan_to_num(curr_state_preprocessed/self.std_x)
250 | curr_control_preprocessed = curr_control - self.mean_y
251 | curr_control_preprocessed = np.nan_to_num(curr_control_preprocessed/self.std_y)
252 | inputs_preprocessed = np.expand_dims(np.append(curr_state_preprocessed, curr_control_preprocessed), axis=0)
253 |
254 | #run through NN to get prediction
255 | model_output = self.sess.run([self.curr_nn_output], feed_dict={self.x_: inputs_preprocessed})
256 |
257 | #multiply by std and add mean back in
258 | state_differences= (model_output[0][0]*self.std_z)+self.mean_z
259 |
260 | #update the state info
261 | next_state = curr_state + state_differences
262 |
263 | #copy the state info
264 | curr_state= np.copy(next_state)
265 |
266 | state_list.append(np.copy(curr_state))
267 |
268 | return state_list
--------------------------------------------------------------------------------
/feedforward_network.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | import tensorflow as tf
4 |
5 | def feedforward_network(inputState, inputSize, outputSize, num_fc_layers, depth_fc_layers, tf_datatype):
6 |
7 | #vars
8 | intermediate_size=depth_fc_layers
9 | reuse= False
10 | initializer = tf.contrib.layers.xavier_initializer(uniform=False, seed=None, dtype=tf_datatype)
11 | fc = tf.contrib.layers.fully_connected
12 |
13 | # make hidden layers
14 | for i in range(num_fc_layers):
15 | if(i==0):
16 | fc_i = fc(inputState, num_outputs=intermediate_size, activation_fn=None,
17 | weights_initializer=initializer, biases_initializer=initializer, reuse=reuse, trainable=True)
18 | else:
19 | fc_i = fc(h_i, num_outputs=intermediate_size, activation_fn=None,
20 | weights_initializer=initializer, biases_initializer=initializer, reuse=reuse, trainable=True)
21 | h_i = tf.nn.relu(fc_i)
22 |
23 | # make output layer
24 | z=fc(h_i, num_outputs=outputSize, activation_fn=None, weights_initializer=initializer,
25 | biases_initializer=initializer, reuse=reuse, trainable=True)
26 | return z
--------------------------------------------------------------------------------
/get_true_action.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import numpy.random as npr
3 | import tensorflow as tf
4 | import time
5 | import math
6 | import matplotlib.pyplot as plt
7 | import copy
8 | from six.moves import cPickle
9 | from rllab.misc import tensor_utils
10 | from rllab.envs.normalized_env import normalize
11 | from feedforward_network import feedforward_network
12 | import os
13 | from data_manipulation import from_observation_to_usablestate
14 | from dynamics_model import Dyn_Model
15 | from data_manipulation import get_indices
16 | from mpc_controller import MPCController
17 | from trajectories import make_trajectory
18 |
19 | class GetTrueAction:
20 |
21 | def make_model(self, sess, env_inp, rundir, tf_datatype, num_fc_layers, depth_fc_layers, which_agent,
22 | lr, batchsize, N, horizon, steps_per_episode, dt_steps, print_minimal):
23 |
24 | #vars
25 | self.sess = sess
26 | self.env = copy.deepcopy(env_inp)
27 | self.N = N
28 | self.horizon = horizon
29 | self.which_agent = which_agent
30 | self.steps_per_episode = steps_per_episode
31 | self.dt_steps = dt_steps
32 | self.print_minimal = print_minimal
33 |
34 | #get sizes
35 | dataX= np.load(rundir + '/training_data/dataX.npy')
36 | dataY= np.load(rundir + '/training_data/dataY.npy')
37 | dataZ= np.load(rundir + '/training_data/dataZ.npy')
38 | inputs = np.concatenate((dataX, dataY), axis=1)
39 | assert inputs.shape[0] == dataZ.shape[0]
40 | inputSize = inputs.shape[1]
41 | outputSize = dataZ.shape[1]
42 |
43 | #calculate the means and stds
44 | self.mean_x = np.mean(dataX, axis = 0)
45 | dataX = dataX - self.mean_x
46 | self.std_x = np.std(dataX, axis = 0)
47 | dataX = np.nan_to_num(dataX/self.std_x)
48 | self.mean_y = np.mean(dataY, axis = 0)
49 | dataY = dataY - self.mean_y
50 | self.std_y = np.std(dataY, axis = 0)
51 | dataY = np.nan_to_num(dataY/self.std_y)
52 | self.mean_z = np.mean(dataZ, axis = 0)
53 | dataZ = dataZ - self.mean_z
54 | self.std_z = np.std(dataZ, axis = 0)
55 | dataZ = np.nan_to_num(dataZ/self.std_z)
56 |
57 | #get x and y index
58 | x_index, y_index, z_index, yaw_index, joint1_index, joint2_index, frontleg_index, frontshin_index, frontfoot_index, xvel_index, orientation_index = get_indices(which_agent)
59 |
60 | #make dyn model and randomly initialize weights
61 | self.dyn_model = Dyn_Model(inputSize, outputSize, self.sess, lr, batchsize, which_agent, x_index, y_index, num_fc_layers,
62 | depth_fc_layers, self.mean_x, self.mean_y, self.mean_z, self.std_x, self.std_y, self.std_z,
63 | tf_datatype, self.print_minimal)
64 | self.sess.run(tf.global_variables_initializer())
65 |
66 | #load in weights from desired trained dynamics model
67 | pathname = rundir + '/models/finalModel.ckpt'
68 | saver = tf.train.Saver(max_to_keep=0)
69 | saver.restore(self.sess, pathname)
70 | print("\n\nRestored dynamics model with variables from ", pathname,"\n\n")
71 |
72 | #make controller, to use for querying optimal action
73 | self.mpc_controller = MPCController(self.env, self.dyn_model, self.horizon, self.which_agent, self.steps_per_episode,
74 | self.dt_steps, self.N, self.mean_x, self.mean_y, self.mean_z, self.std_x, self.std_y,
75 | self.std_z, 'nc', self.print_minimal, x_index, y_index, z_index, yaw_index, joint1_index,
76 | joint2_index, frontleg_index, frontshin_index, frontfoot_index, xvel_index, orientation_index)
77 | self.mpc_controller.desired_states = make_trajectory('straight', np.zeros((100,)), x_index, y_index, which_agent) #junk, just a placeholder
78 |
79 | #select task or reward func
80 | self.reward_func = self.mpc_controller.reward_functions.get_reward_func(False, 0, 0, 0, 0)
81 |
82 | def get_action(self, curr_obs):
83 |
84 | curr_nn_state= from_observation_to_usablestate(curr_obs, self.which_agent, True)
85 | best_action, _, _, _ = self.mpc_controller.get_action(curr_nn_state, 0, self.reward_func)
86 |
87 | return best_action
--------------------------------------------------------------------------------
/helper_funcs.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import time
3 | import tensorflow as tf
4 | import numpy as np
5 |
6 | #import rllab envs
7 | from rllab.envs.normalized_env import normalize
8 | from rllab.envs.mujoco.swimmer_env import SwimmerEnv
9 | from rllab.envs.mujoco.half_cheetah_env import HalfCheetahEnv
10 | from rllab.envs.mujoco.hopper_env import HopperEnv
11 | from rllab.envs.mujoco.walker2d_env import Walker2DEnv
12 | from point_env import PointEnv
13 | from rllab.envs.mujoco.ant_env import AntEnv
14 |
15 | #import gym envs
16 | import gym
17 | from gym import wrappers
18 | from gym.envs.mujoco.reacher import ReacherEnv
19 | from rllab.envs.gym_env import GymEnv
20 |
21 |
22 | def add_noise(data_inp, noiseToSignal):
23 | data= copy.deepcopy(data_inp)
24 | mean_data = np.mean(data, axis = 0)
25 | std_of_noise = mean_data*noiseToSignal
26 | for j in range(mean_data.shape[0]):
27 | if(std_of_noise[j]>0):
28 | data[:,j] = np.copy(data[:,j]+np.random.normal(0, np.absolute(std_of_noise[j]), (data.shape[0],)))
29 | return data
30 |
31 | def perform_rollouts(policy, num_rollouts, steps_per_rollout, visualize_rollouts, CollectSamples,
32 | env, which_agent, dt_steps, dt_from_xml, follow_trajectories):
33 | #collect training data by performing rollouts
34 | print("Beginning to do ", num_rollouts, " rollouts.")
35 | c = CollectSamples(env, policy, visualize_rollouts, which_agent, dt_steps, dt_from_xml, follow_trajectories)
36 | states, controls, starting_states, rewards_list = c.collect_samples(num_rollouts, steps_per_rollout)
37 |
38 | print("Performed ", len(states), " rollouts, each with ", states[0].shape[0], " steps.")
39 | return states, controls, starting_states, rewards_list
40 |
41 |
42 | def create_env(which_agent):
43 |
44 | # setup environment
45 | if(which_agent==0):
46 | env = normalize(PointEnv())
47 | elif(which_agent==1):
48 | env = normalize(AntEnv())
49 | elif(which_agent==2):
50 | env = normalize(SwimmerEnv()) #dt 0.001 and frameskip=150
51 | elif(which_agent==3):
52 | env = ReacherEnv()
53 | elif(which_agent==4):
54 | env = normalize(HalfCheetahEnv())
55 | elif(which_agent==5):
56 | env = RoachEnv() #this is a personal vrep env
57 | elif(which_agent==6):
58 | env=normalize(HopperEnv())
59 | elif(which_agent==7):
60 | env=normalize(Walker2DEnv())
61 |
62 | #get dt value from env
63 | if(which_agent==5):
64 | dt_from_xml = env.VREP_DT
65 | else:
66 | dt_from_xml = env.model.opt.timestep
67 | print("\n\n the dt is: ", dt_from_xml, "\n\n")
68 |
69 | #set vars
70 | tf.set_random_seed(2)
71 | gym.logger.setLevel(gym.logging.WARNING)
72 | dimO = env.observation_space.shape
73 | dimA = env.action_space.shape
74 | print ('--------------------------------- \nState space dimension: ', dimO)
75 | print ('Action space dimension: ', dimA, "\n -----------------------------------")
76 |
77 | return env, dt_from_xml
78 |
79 |
80 | def visualize_rendering(starting_state, list_of_actions, env_inp, dt_steps, dt_from_xml, which_agent):
81 | env=copy.deepcopy(env_inp)
82 |
83 | if(which_agent==5):
84 | env.reset()
85 | else:
86 | env.reset(starting_state)
87 |
88 | for action in list_of_actions:
89 |
90 | if(action.shape[0]==1):
91 | env.step(action[0], collectingInitialData=False)
92 | else:
93 | env.step(action, collectingInitialData=False)
94 |
95 | if(which_agent==5):
96 | junk=1
97 | else:
98 | env.render()
99 | time.sleep(dt_steps*dt_from_xml)
100 |
101 | print("Done rendering.")
102 | return
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 |
2 | #imports
3 | import numpy as np
4 | import numpy.random as npr
5 | import tensorflow as tf
6 | import time
7 | import matplotlib.pyplot as plt
8 | import pickle
9 | import copy
10 | import os
11 | import sys
12 | from six.moves import cPickle
13 | from rllab.envs.normalized_env import normalize
14 | import yaml
15 | import argparse
16 | import json
17 |
18 | #my imports
19 | from policy_random import Policy_Random
20 | from trajectories import make_trajectory
21 | from trajectories import get_trajfollow_params
22 | from data_manipulation import generate_training_data_inputs
23 | from data_manipulation import generate_training_data_outputs
24 | from data_manipulation import from_observation_to_usablestate
25 | from data_manipulation import get_indices
26 | from helper_funcs import perform_rollouts
27 | from helper_funcs import create_env
28 | from helper_funcs import visualize_rendering
29 | from helper_funcs import add_noise
30 | from dynamics_model import Dyn_Model
31 | from mpc_controller import MPCController
32 |
33 | def main():
34 |
35 | #################################################
36 | ############ commandline arguments ##############
37 | #################################################
38 |
39 | parser = argparse.ArgumentParser()
40 | parser.add_argument('--yaml_file', type=str, default='ant_forward')
41 | parser.add_argument('--seed', type=int, default=0)
42 | parser.add_argument('--run_num', type=int, default=0)
43 | parser.add_argument('--use_existing_training_data', action="store_true", dest='use_existing_training_data', default=False)
44 | parser.add_argument('--use_existing_dynamics_model', action="store_true", dest='use_existing_dynamics_model', default=False)
45 |
46 | parser.add_argument('--desired_traj_type', type=str, default='straight') #straight, left_turn, right_turn, u_turn, backward, forward_backward
47 | parser.add_argument('--num_rollouts_save_for_mf', type=int, default=60)
48 |
49 | parser.add_argument('--might_render', action="store_true", dest='might_render', default=False)
50 | parser.add_argument('--visualize_MPC_rollout', action="store_true", dest='visualize_MPC_rollout', default=False)
51 | parser.add_argument('--perform_forwardsim_for_vis', action="store_true", dest='perform_forwardsim_for_vis', default=False)
52 | parser.add_argument('--print_minimal', action="store_true", dest='print_minimal', default=False)
53 | args = parser.parse_args()
54 |
55 |
56 | ########################################
57 | ######### params from yaml file ########
58 | ########################################
59 |
60 | #load in parameters from specified file
61 |
62 | yaml_path = os.path.abspath('yaml_files/'+args.yaml_file+'.yaml')
63 | assert(os.path.exists(yaml_path))
64 | with open(yaml_path, 'r') as f:
65 | params = yaml.load(f)
66 |
67 | #save params from specified file
68 | which_agent = params['which_agent']
69 | follow_trajectories = params['follow_trajectories']
70 | #data collection
71 | use_threading = params['data_collection']['use_threading']
72 | num_rollouts_train = params['data_collection']['num_rollouts_train']
73 | num_rollouts_val = params['data_collection']['num_rollouts_val']
74 | #dynamics model
75 | num_fc_layers = params['dyn_model']['num_fc_layers']
76 | depth_fc_layers = params['dyn_model']['depth_fc_layers']
77 | batchsize = params['dyn_model']['batchsize']
78 | lr = params['dyn_model']['lr']
79 | nEpoch = params['dyn_model']['nEpoch']
80 | fraction_use_new = params['dyn_model']['fraction_use_new']
81 | #controller
82 | horizon = params['controller']['horizon']
83 | num_control_samples = params['controller']['num_control_samples']
84 | if(which_agent==1):
85 | if(args.desired_traj_type=='straight'):
86 | num_control_samples=3000
87 | #aggregation
88 | num_aggregation_iters = params['aggregation']['num_aggregation_iters']
89 | num_trajectories_for_aggregation = params['aggregation']['num_trajectories_for_aggregation']
90 | rollouts_forTraining = params['aggregation']['rollouts_forTraining']
91 | #noise
92 | make_aggregated_dataset_noisy = params['noise']['make_aggregated_dataset_noisy']
93 | make_training_dataset_noisy = params['noise']['make_training_dataset_noisy']
94 | noise_actions_during_MPC_rollouts = params['noise']['noise_actions_during_MPC_rollouts']
95 | #steps
96 | dt_steps = params['steps']['dt_steps']
97 | steps_per_episode = params['steps']['steps_per_episode']
98 | steps_per_rollout_train = params['steps']['steps_per_rollout_train']
99 | steps_per_rollout_val = params['steps']['steps_per_rollout_val']
100 | #saving
101 | min_rew_for_saving = params['saving']['min_rew_for_saving']
102 | #generic
103 | visualize_True = params['generic']['visualize_True']
104 | visualize_False = params['generic']['visualize_False']
105 | #from args
106 | print_minimal= args.print_minimal
107 |
108 |
109 | ########################################
110 | ### make directories for saving data ###
111 | ########################################
112 |
113 | save_dir = 'run_'+ str(args.run_num)
114 | if not os.path.exists(save_dir):
115 | os.makedirs(save_dir)
116 | os.makedirs(save_dir+'/losses')
117 | os.makedirs(save_dir+'/models')
118 | os.makedirs(save_dir+'/saved_forwardsim')
119 | os.makedirs(save_dir+'/saved_trajfollow')
120 | os.makedirs(save_dir+'/training_data')
121 |
122 | ########################################
123 | ############## set vars ################
124 | ########################################
125 |
126 | #set seeds
127 | npr.seed(args.seed)
128 | tf.set_random_seed(args.seed)
129 |
130 | #data collection, either with or without multi-threading
131 | if(use_threading):
132 | from collect_samples_threaded import CollectSamples
133 | else:
134 | from collect_samples import CollectSamples
135 |
136 | #more vars
137 | x_index, y_index, z_index, yaw_index, joint1_index, joint2_index, frontleg_index, frontshin_index, frontfoot_index, xvel_index, orientation_index = get_indices(which_agent)
138 | tf_datatype = tf.float64
139 | noiseToSignal = 0.01
140 |
141 | # n is noisy, c is clean... 1st letter is what action's executed and 2nd letter is what action's aggregated
142 | actions_ag='nc'
143 |
144 | #################################################
145 | ######## save param values to a file ############
146 | #################################################
147 |
148 | param_dict={}
149 | param_dict['which_agent']= which_agent
150 | param_dict['use_existing_training_data']= str(args.use_existing_training_data)
151 | param_dict['desired_traj_type']= args.desired_traj_type
152 | param_dict['visualize_MPC_rollout']= str(args.visualize_MPC_rollout)
153 | param_dict['num_rollouts_save_for_mf']= args.num_rollouts_save_for_mf
154 | param_dict['seed']= args.seed
155 | param_dict['follow_trajectories']= str(follow_trajectories)
156 | param_dict['use_threading']= str(use_threading)
157 | param_dict['num_rollouts_train']= num_rollouts_train
158 | param_dict['num_fc_layers']= num_fc_layers
159 | param_dict['depth_fc_layers']= depth_fc_layers
160 | param_dict['batchsize']= batchsize
161 | param_dict['lr']= lr
162 | param_dict['nEpoch']= nEpoch
163 | param_dict['fraction_use_new']= fraction_use_new
164 | param_dict['horizon']= horizon
165 | param_dict['num_control_samples']= num_control_samples
166 | param_dict['num_aggregation_iters']= num_aggregation_iters
167 | param_dict['num_trajectories_for_aggregation']= num_trajectories_for_aggregation
168 | param_dict['rollouts_forTraining']= rollouts_forTraining
169 | param_dict['make_aggregated_dataset_noisy']= str(make_aggregated_dataset_noisy)
170 | param_dict['make_training_dataset_noisy']= str(make_training_dataset_noisy)
171 | param_dict['noise_actions_during_MPC_rollouts']= str(noise_actions_during_MPC_rollouts)
172 | param_dict['dt_steps']= dt_steps
173 | param_dict['steps_per_episode']= steps_per_episode
174 | param_dict['steps_per_rollout_train']= steps_per_rollout_train
175 | param_dict['steps_per_rollout_val']= steps_per_rollout_val
176 | param_dict['min_rew_for_saving']= min_rew_for_saving
177 | param_dict['x_index']= x_index
178 | param_dict['y_index']= y_index
179 | param_dict['tf_datatype']= str(tf_datatype)
180 | param_dict['noiseToSignal']= noiseToSignal
181 |
182 | with open(save_dir+'/params.pkl', 'wb') as f:
183 | pickle.dump(param_dict, f, pickle.HIGHEST_PROTOCOL)
184 | with open(save_dir+'/params.txt', 'w') as f:
185 | f.write(json.dumps(param_dict))
186 |
187 | #################################################
188 | ### initialize the experiment
189 | #################################################
190 |
191 | if(not(print_minimal)):
192 | print("\n#####################################")
193 | print("Initializing environment")
194 | print("#####################################\n")
195 |
196 | #create env
197 | env, dt_from_xml= create_env(which_agent)
198 |
199 | #create random policy for data collection
200 | random_policy = Policy_Random(env)
201 |
202 | #################################################
203 | ### set GPU options for TF
204 | #################################################
205 |
206 | gpu_device = 0
207 | gpu_frac = 0.3
208 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_device)
209 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_frac)
210 | config = tf.ConfigProto(gpu_options=gpu_options,
211 | log_device_placement=False,
212 | allow_soft_placement=True,
213 | inter_op_parallelism_threads=1,
214 | intra_op_parallelism_threads=1)
215 |
216 | with tf.Session(config=config) as sess:
217 |
218 | #################################################
219 | ### deal with data
220 | #################################################
221 |
222 | if(args.use_existing_training_data):
223 | if(not(print_minimal)):
224 | print("\n#####################################")
225 | print("Retrieving training data & policy from saved files")
226 | print("#####################################\n")
227 |
228 | dataX= np.load(save_dir + '/training_data/dataX.npy') # input1: state
229 | dataY= np.load(save_dir + '/training_data/dataY.npy') # input2: control
230 | dataZ= np.load(save_dir + '/training_data/dataZ.npy') # output: nextstate-state
231 | states_val= np.load(save_dir + '/training_data/states_val.npy')
232 | controls_val= np.load(save_dir + '/training_data/controls_val.npy')
233 | forwardsim_x_true= np.load(save_dir + '/training_data/forwardsim_x_true.npy')
234 | forwardsim_y= np.load(save_dir + '/training_data/forwardsim_y.npy')
235 |
236 | else:
237 |
238 | if(not(print_minimal)):
239 | print("\n#####################################")
240 | print("Performing rollouts to collect training data")
241 | print("#####################################\n")
242 |
243 | #perform rollouts
244 | states, controls, _, _ = perform_rollouts(random_policy, num_rollouts_train, steps_per_rollout_train, visualize_False,
245 | CollectSamples, env, which_agent, dt_steps, dt_from_xml, follow_trajectories)
246 |
247 | if(not(print_minimal)):
248 | print("\n#####################################")
249 | print("Performing rollouts to collect validation data")
250 | print("#####################################\n")
251 |
252 | start_validation_rollouts = time.time()
253 | states_val, controls_val, _, _ = perform_rollouts(random_policy, num_rollouts_val, steps_per_rollout_val, visualize_False,
254 | CollectSamples, env, which_agent, dt_steps, dt_from_xml, follow_trajectories)
255 |
256 | if(not(print_minimal)):
257 | print("\n#####################################")
258 | print("Convert from env observations to NN 'states' ")
259 | print("#####################################\n")
260 |
261 | #training
262 | states = from_observation_to_usablestate(states, which_agent, False)
263 | #validation
264 | states_val = from_observation_to_usablestate(states_val, which_agent, False)
265 | states_val = np.array(states_val)
266 |
267 | if(not(print_minimal)):
268 | print("\n#####################################")
269 | print("Data formatting: create inputs and labels for NN ")
270 | print("#####################################\n")
271 |
272 | dataX , dataY = generate_training_data_inputs(states, controls)
273 | dataZ = generate_training_data_outputs(states, which_agent)
274 |
275 | if(not(print_minimal)):
276 | print("\n#####################################")
277 | print("Add noise")
278 | print("#####################################\n")
279 |
280 | #add a little dynamics noise (next state is not perfectly accurate, given correct state and action)
281 | if(make_training_dataset_noisy):
282 | dataX = add_noise(dataX, noiseToSignal)
283 | dataZ = add_noise(dataZ, noiseToSignal)
284 |
285 | if(not(print_minimal)):
286 | print("\n#####################################")
287 | print("Perform rollout & save for forward sim")
288 | print("#####################################\n")
289 |
290 | states_forwardsim_orig, controls_forwardsim, _,_ = perform_rollouts(random_policy, 1, 100,
291 | visualize_False, CollectSamples,
292 | env, which_agent, dt_steps,
293 | dt_from_xml, follow_trajectories)
294 | states_forwardsim = np.copy(from_observation_to_usablestate(states_forwardsim_orig, which_agent, False))
295 | forwardsim_x_true, forwardsim_y = generate_training_data_inputs(states_forwardsim, controls_forwardsim)
296 |
297 | if(not(print_minimal)):
298 | print("\n#####################################")
299 | print("Saving data")
300 | print("#####################################\n")
301 |
302 | np.save(save_dir + '/training_data/dataX.npy', dataX)
303 | np.save(save_dir + '/training_data/dataY.npy', dataY)
304 | np.save(save_dir + '/training_data/dataZ.npy', dataZ)
305 | np.save(save_dir + '/training_data/states_val.npy', states_val)
306 | np.save(save_dir + '/training_data/controls_val.npy', controls_val)
307 | np.save(save_dir + '/training_data/forwardsim_x_true.npy', forwardsim_x_true)
308 | np.save(save_dir + '/training_data/forwardsim_y.npy', forwardsim_y)
309 |
310 | if(not(print_minimal)):
311 | print("Done getting data.")
312 | print("dataX dim: ", dataX.shape)
313 |
314 | #################################################
315 | ### init vars
316 | #################################################
317 |
318 | counter_agg_iters=0
319 | training_loss_list=[]
320 | forwardsim_score_list=[]
321 | old_loss_list=[]
322 | new_loss_list=[]
323 | errors_1_per_agg=[]
324 | errors_5_per_agg=[]
325 | errors_10_per_agg=[]
326 | errors_50_per_agg=[]
327 | errors_100_per_agg=[]
328 | list_avg_rew=[]
329 | list_num_datapoints=[]
330 | dataX_new = np.zeros((0,dataX.shape[1]))
331 | dataY_new = np.zeros((0,dataY.shape[1]))
332 | dataZ_new = np.zeros((0,dataZ.shape[1]))
333 |
334 | #################################################
335 | ### preprocess the old training dataset
336 | #################################################
337 |
338 | if(not(print_minimal)):
339 | print("\n#####################################")
340 | print("Preprocessing 'old' training data")
341 | print("#####################################\n")
342 |
343 | #every component (i.e. x position) should become mean 0, std 1
344 | mean_x = np.mean(dataX, axis = 0)
345 | dataX = dataX - mean_x
346 | std_x = np.std(dataX, axis = 0)
347 | dataX = np.nan_to_num(dataX/std_x)
348 |
349 | mean_y = np.mean(dataY, axis = 0)
350 | dataY = dataY - mean_y
351 | std_y = np.std(dataY, axis = 0)
352 | dataY = np.nan_to_num(dataY/std_y)
353 |
354 | mean_z = np.mean(dataZ, axis = 0)
355 | dataZ = dataZ - mean_z
356 | std_z = np.std(dataZ, axis = 0)
357 | dataZ = np.nan_to_num(dataZ/std_z)
358 |
359 | ## concatenate state and action, to be used for training dynamics
360 | inputs = np.concatenate((dataX, dataY), axis=1)
361 | outputs = np.copy(dataZ)
362 |
363 | #doing a render here somehow allows it to not produce an error later
364 | might_render= False
365 | if(args.visualize_MPC_rollout or args.might_render):
366 | might_render=True
367 | if(might_render):
368 | new_env, _ = create_env(which_agent)
369 | new_env.render()
370 |
371 | ##############################################
372 | ########## THE AGGREGATION LOOP ##############
373 | ##############################################
374 |
375 | #dimensions
376 | assert inputs.shape[0] == outputs.shape[0]
377 | inputSize = inputs.shape[1]
378 | outputSize = outputs.shape[1]
379 |
380 | #initialize dynamics model
381 | dyn_model = Dyn_Model(inputSize, outputSize, sess, lr, batchsize, which_agent, x_index, y_index, num_fc_layers,
382 | depth_fc_layers, mean_x, mean_y, mean_z, std_x, std_y, std_z, tf_datatype, print_minimal)
383 |
384 | #create mpc controller
385 | mpc_controller = MPCController(env, dyn_model, horizon, which_agent, steps_per_episode, dt_steps, num_control_samples,
386 | mean_x, mean_y, mean_z, std_x, std_y, std_z, actions_ag, print_minimal, x_index, y_index,
387 | z_index, yaw_index, joint1_index, joint2_index, frontleg_index, frontshin_index,
388 | frontfoot_index, xvel_index, orientation_index)
389 |
390 | #randomly initialize all vars
391 | sess.run(tf.global_variables_initializer())
392 |
393 | while(counter_agg_iters100):
488 |
489 | #########################
490 | #### STATE INPUTS TO NN
491 | #########################
492 |
493 | ## take all except the last 100 pts from each rollout
494 | validation_inputs_states.append(states_val[i][0:length_curr_rollout-100])
495 |
496 | #########################
497 | #### CONTROL INPUTS TO NN
498 | #########################
499 |
500 | #100 step controls
501 | list_100 = []
502 | for j in range(100):
503 | list_100.append(controls_val[i][0+j:length_curr_rollout-100+j])
504 | ##for states 0:x, first apply acs 0:x, then apply acs 1:x+1, then apply acs 2:x+2, etc...
505 | list_100=np.array(list_100) #100xstepsx2
506 | list_100= np.swapaxes(list_100,0,1) #stepsx100x2
507 | controls_100step.append(list_100)
508 |
509 | #########################
510 | #### STATE LABELS- compare these to the outputs of NN (forward sim)
511 | #########################
512 | labels_1step.append(states_val[i][0+1:length_curr_rollout-100+1])
513 | labels_5step.append(states_val[i][0+5:length_curr_rollout-100+5])
514 | labels_10step.append(states_val[i][0+10:length_curr_rollout-100+10])
515 | labels_50step.append(states_val[i][0+50:length_curr_rollout-100+50])
516 | labels_100step.append(states_val[i][0+100:length_curr_rollout-100+100])
517 |
518 | validation_inputs_states = np.concatenate(validation_inputs_states)
519 | controls_100step = np.concatenate(controls_100step)
520 | labels_1step = np.concatenate(labels_1step)
521 | labels_5step = np.concatenate(labels_5step)
522 | labels_10step = np.concatenate(labels_10step)
523 | labels_50step = np.concatenate(labels_50step)
524 | labels_100step = np.concatenate(labels_100step)
525 |
526 | #####################################
527 | ## pass into forward sim, to make predictions
528 | #####################################
529 |
530 | many_in_parallel = True
531 | predicted_100step = dyn_model.do_forward_sim(validation_inputs_states, controls_100step,
532 | many_in_parallel, env, which_agent)
533 |
534 | #####################################
535 | ## Calculate validation metrics (mse loss between predicted and true)
536 | #####################################
537 |
538 | array_meanx = np.tile(np.expand_dims(mean_x, axis=0),(labels_1step.shape[0],1))
539 | array_stdx = np.tile(np.expand_dims(std_x, axis=0),(labels_1step.shape[0],1))
540 |
541 | error_1step = np.mean(np.square(np.nan_to_num(np.divide(predicted_100step[1]-array_meanx,array_stdx))
542 | -np.nan_to_num(np.divide(labels_1step-array_meanx,array_stdx))))
543 | error_5step = np.mean(np.square(np.nan_to_num(np.divide(predicted_100step[5]-array_meanx,array_stdx))
544 | -np.nan_to_num(np.divide(labels_5step-array_meanx,array_stdx))))
545 | error_10step = np.mean(np.square(np.nan_to_num(np.divide(predicted_100step[10]-array_meanx,array_stdx))
546 | -np.nan_to_num(np.divide(labels_10step-array_meanx,array_stdx))))
547 | error_50step = np.mean(np.square(np.nan_to_num(np.divide(predicted_100step[50]-array_meanx,array_stdx))
548 | -np.nan_to_num(np.divide(labels_50step-array_meanx,array_stdx))))
549 | error_100step = np.mean(np.square(np.nan_to_num(np.divide(predicted_100step[100]-array_meanx,array_stdx))
550 | -np.nan_to_num(np.divide(labels_100step-array_meanx,array_stdx))))
551 | print("Multistep error values: ", error_1step, error_5step, error_10step, error_50step, error_100step,"\n")
552 |
553 | errors_1_per_agg.append(error_1step)
554 | errors_5_per_agg.append(error_5step)
555 | errors_10_per_agg.append(error_10step)
556 | errors_50_per_agg.append(error_50step)
557 | errors_100_per_agg.append(error_100step)
558 |
559 | #####################################
560 | ## Perform 1 forward simulation, for visualization purposes (compare predicted traj vs true traj)
561 | #####################################
562 |
563 | if(args.perform_forwardsim_for_vis):
564 | if(not(print_minimal)):
565 | print("\n#####################################")
566 | print("Performing a forward sim of the learned model. using pre-saved dataset. just for visualization")
567 | print("#####################################\n")
568 |
569 | #for a given set of controls,
570 | #compare sim traj vs. learned model's traj
571 | #(dont expect this to be good cuz error accum)
572 | many_in_parallel = False
573 | forwardsim_x_pred = dyn_model.do_forward_sim(forwardsim_x_true, forwardsim_y, many_in_parallel, env, which_agent)
574 | forwardsim_x_pred = np.array(forwardsim_x_pred)
575 |
576 | # save results of forward sim
577 | np.save(save_dir + '/saved_forwardsim/forwardsim_states_true_'+str(counter_agg_iters)+'.npy', forwardsim_x_true)
578 | np.save(save_dir + '/saved_forwardsim/forwardsim_states_pred_'+str(counter_agg_iters)+'.npy', forwardsim_x_pred)
579 |
580 | #####################################
581 | ######## EXECUTE CONTROLLER #########
582 | #####################################
583 |
584 | if(not(print_minimal)):
585 | print("##############################################")
586 | print("#### Execute the controller to follow desired trajectories")
587 | print("##############################################\n")
588 |
589 | ###################################################################
590 | ### Try to follow trajectory... collect rollouts
591 | ###################################################################
592 |
593 | #init vars
594 | list_rewards=[]
595 | starting_states=[]
596 | selected_multiple_u = []
597 | resulting_multiple_x = []
598 |
599 | #get parameters for trajectory following
600 | horiz_penalty_factor, forward_encouragement_factor, heading_penalty_factor, desired_snake_headingInit = get_trajfollow_params(which_agent, args.desired_traj_type)
601 | if(follow_trajectories==False):
602 | desired_snake_headingInit=0
603 |
604 | for rollout_num in range(num_trajectories_for_aggregation):
605 |
606 | if(not(print_minimal)):
607 | print("\nPerforming MPC rollout #", rollout_num)
608 |
609 | #reset env and set the desired traj
610 | if(which_agent==2):
611 | starting_observation, starting_state = env.reset(evaluating=True, returnStartState=True, isSwimmer=True)
612 | else:
613 | starting_observation, starting_state = env.reset(evaluating=True, returnStartState=True)
614 | #start swimmer heading in correct direction
615 | if(which_agent==2):
616 | starting_state[2] = desired_snake_headingInit
617 | starting_observation, starting_state = env.reset(starting_state, returnStartState=True)
618 |
619 | #desired trajectory to follow
620 | starting_observation_NNinput = from_observation_to_usablestate(starting_observation, which_agent, True)
621 | desired_x = make_trajectory(args.desired_traj_type, starting_observation_NNinput, x_index, y_index, which_agent)
622 |
623 | #perform 1 MPC rollout
624 | #depending on follow_trajectories, either move forward or follow desired_traj_type
625 | if(noise_actions_during_MPC_rollouts):
626 | curr_noise_amount = 0.005
627 | else:
628 | curr_noise_amount=0
629 | resulting_x, selected_u, ep_rew, _ = mpc_controller.perform_rollout(starting_state, starting_observation,
630 | starting_observation_NNinput, desired_x,
631 | follow_trajectories, horiz_penalty_factor,
632 | forward_encouragement_factor, heading_penalty_factor,
633 | noise_actions_during_MPC_rollouts, curr_noise_amount)
634 |
635 | #save info from MPC rollout
636 | list_rewards.append(ep_rew)
637 | selected_multiple_u.append(selected_u)
638 | resulting_multiple_x.append(resulting_x)
639 | starting_states.append(starting_state)
640 |
641 | if(args.visualize_MPC_rollout):
642 | input("\n\nPAUSE BEFORE VISUALIZATION... Press Enter to continue...")
643 | for vis_index in range(num_trajectories_for_aggregation):
644 | visualize_rendering(starting_states[vis_index], selected_multiple_u[vis_index], env, dt_steps, dt_from_xml, which_agent)
645 |
646 | #bookkeeping
647 | avg_rew = np.mean(np.array(list_rewards))
648 | std_rew = np.std(np.array(list_rewards))
649 | print("############# Avg reward for ", num_trajectories_for_aggregation, " MPC rollouts: ", avg_rew)
650 | print("############# Std reward for ", num_trajectories_for_aggregation, " MPC rollouts: ", std_rew)
651 | print("############# Rewards for the ", num_trajectories_for_aggregation, " MPC rollouts: ", list_rewards)
652 |
653 | #save pts_used_so_far + performance achieved by those points
654 | list_num_datapoints.append(dataX.shape[0]+dataX_new.shape[0])
655 | list_avg_rew.append(avg_rew)
656 |
657 | ##############################
658 | ### Aggregate data
659 | ##############################
660 |
661 | full_states_list = []
662 | full_controls_list = []
663 | if(counter_agg_iters<(num_aggregation_iters-1)):
664 |
665 | ##############################
666 | ### aggregate some rollouts into training set
667 | ##############################
668 |
669 | x_array = np.array(resulting_multiple_x)[0:(rollouts_forTraining+1)]
670 | if(which_agent==6 or which_agent==1):
671 | u_array = np.array(selected_multiple_u)[0:(rollouts_forTraining+1)]
672 | else:
673 | u_array = np.squeeze(np.array(selected_multiple_u), axis=2)[0:(rollouts_forTraining+1)]
674 |
675 | for i in range(rollouts_forTraining):
676 |
677 | if(which_agent==6 or which_agent==1):
678 | x= np.array(x_array[i])
679 | u= np.squeeze(u_array[i], axis=1)
680 | else:
681 | x= x_array[i] #[N+1, NN_inp]
682 | u= u_array[i] #[N, actionSize]
683 |
684 | newDataX= np.copy(x[0:-1, :])
685 | newDataY= np.copy(u)
686 | newDataZ= np.copy(x[1:, :]-x[0:-1, :])
687 |
688 | # make this new data a bit noisy before adding it into the dataset
689 | if(make_aggregated_dataset_noisy):
690 | newDataX = add_noise(newDataX, noiseToSignal)
691 | newDataZ = add_noise(newDataZ, noiseToSignal)
692 |
693 | # the actual aggregation
694 | dataX_new = np.concatenate((dataX_new, newDataX))
695 | dataY_new = np.concatenate((dataY_new, newDataY))
696 | dataZ_new = np.concatenate((dataZ_new, newDataZ))
697 |
698 | ##############################
699 | ### aggregate the rest of the rollouts into validation set
700 | ##############################
701 |
702 | x_array = np.array(resulting_multiple_x)[rollouts_forTraining:len(resulting_multiple_x)]
703 | # ^ dim: [rollouts_forValidation x stepsPerEpisode+1 x stateSize]
704 | if(which_agent==6 or which_agent==1):
705 | u_array = np.array(selected_multiple_u)[rollouts_forTraining:len(resulting_multiple_x)]
706 | else:
707 | u_array = np.squeeze(np.array(selected_multiple_u), axis=2)[rollouts_forTraining:len(resulting_multiple_x)]
708 | # rollouts_forValidation x stepsPerEpisode x acSize
709 |
710 | full_states_list = []
711 | full_controls_list = []
712 | for i in range(states_val.shape[0]):
713 | full_states_list.append(states_val[i])
714 | full_controls_list.append(controls_val[i])
715 | for i in range(x_array.shape[0]):
716 | x = np.array(x_array[i])
717 | full_states_list.append(x[0:-1,:])
718 | full_controls_list.append(np.squeeze(u_array[i]))
719 | states_val = np.array(full_states_list)
720 | controls_val = np.array(full_controls_list)
721 |
722 | #save trajectory following stuff (aka trajectory taken) for plotting
723 | np.save(save_dir + '/saved_trajfollow/startingstate_iter' + str(counter_agg_iters) +'.npy', starting_state)
724 | np.save(save_dir + '/saved_trajfollow/control_iter' + str(counter_agg_iters) +'.npy', selected_u)
725 | np.save(save_dir + '/saved_trajfollow/true_iter' + str(counter_agg_iters) +'.npy', desired_x)
726 | np.save(save_dir + '/saved_trajfollow/pred_iter' + str(counter_agg_iters) +'.npy', np.array(resulting_multiple_x))
727 |
728 | #bookkeeping
729 | if(not(print_minimal)):
730 | print("\n\nDONE WITH BIG LOOP ITERATION ", counter_agg_iters ,"\n\n")
731 | print("training dataset size: ", dataX.shape[0] + dataX_new.shape[0])
732 | if(len(full_states_list)>0):
733 | print("validation dataset size: ", np.concatenate(full_states_list).shape[0])
734 | print("Time taken: {:0.2f} s\n\n".format(time.time()-starting_big_loop))
735 | counter_agg_iters= counter_agg_iters+1
736 |
737 | #save things after every agg iteration
738 | np.save(save_dir + '/errors_1_per_agg.npy', errors_1_per_agg)
739 | np.save(save_dir + '/errors_5_per_agg.npy', errors_5_per_agg)
740 | np.save(save_dir + '/errors_10_per_agg.npy', errors_10_per_agg)
741 | np.save(save_dir + '/errors_50_per_agg.npy', errors_50_per_agg)
742 | np.save(save_dir + '/errors_100_per_agg.npy', errors_100_per_agg)
743 | np.save(save_dir + '/avg_rollout_rewards_per_agg.npy', list_avg_rew)
744 | np.save(save_dir + '/losses/list_training_loss.npy', training_loss_list)
745 | np.save(save_dir + '/losses/list_old_loss.npy', old_loss_list)
746 | np.save(save_dir + '/losses/list_new_loss.npy', new_loss_list)
747 |
748 | ##############################
749 | ### perform a bunch of MPC rollouts to save for later mbmf TRPO usage
750 | ##############################
751 |
752 | all_rollouts_to_save = []
753 | if(args.num_rollouts_save_for_mf>0):
754 | print("##############################################")
755 | print("#### Performing MPC rollouts to save for later mbmf TRPO usage")
756 | print("##############################################\n")
757 |
758 | #init vars
759 | list_rewards=[]
760 | starting_states=[]
761 | num_saved = 0
762 | rollout_num = 0
763 | while(num_saved < args.num_rollouts_save_for_mf):
764 | if(not(print_minimal)):
765 | print("\nSo far, saved ", num_saved, " rollouts")
766 | print("Currently, on rollout #", rollout_num)
767 |
768 | #reset env before performing rollout
769 | if(which_agent==2):
770 | starting_observation, starting_state = env.reset(evaluating=True, returnStartState=True, isSwimmer=True)
771 | else:
772 | starting_observation, starting_state = env.reset(evaluating=True, returnStartState=True)
773 | if(which_agent==2):
774 | starting_state[2] = desired_snake_headingInit
775 | starting_observation, starting_state = env.reset(starting_state, returnStartState=True)
776 | starting_observation_NNinput = from_observation_to_usablestate(starting_observation, which_agent, True)
777 |
778 | #perform 1 MPC rollout
779 | startrollout = time.time()
780 | curr_noise_amount=0
781 | _, _, ep_rew, rollout_saved = mpc_controller.perform_rollout(starting_state, starting_observation,
782 | starting_observation_NNinput, desired_x,
783 | follow_trajectories, horiz_penalty_factor,
784 | forward_encouragement_factor, heading_penalty_factor,
785 | noise_actions_during_MPC_rollouts, curr_noise_amount)
786 |
787 | if(not(print_minimal)):
788 | print("Time taken for a single rollout: {:0.2f} s\n\n".format(time.time()-startrollout))
789 |
790 | #save rollouts
791 | rollout_num += 1
792 | if(ep_rew>min_rew_for_saving):
793 | list_rewards.append(ep_rew)
794 | all_rollouts_to_save.append(rollout_saved)
795 | starting_states.append(starting_state)
796 | num_saved += 1
797 |
798 | #bookkeeping
799 | if(len(list_rewards)>0):
800 |
801 | #get avg rew
802 | avg_rew = np.mean(np.array(list_rewards))
803 | print("############# Avg over all selected runs: ", avg_rew)
804 | print("############# Rewards of all selected runs: ", list_rewards)
805 |
806 | #save the rollouts for later MBMF usage
807 | pathname_savedMPCrollouts = save_dir + '/savedRollouts_avg'+ str(int(avg_rew)) +'.save'
808 | pathname2_savedMPCrollouts = save_dir + '/savedRollouts.save'
809 | f = open(pathname_savedMPCrollouts, 'wb')
810 | cPickle.dump(all_rollouts_to_save, f, protocol=cPickle.HIGHEST_PROTOCOL)
811 | f.close()
812 | f = open(pathname2_savedMPCrollouts, 'wb')
813 | cPickle.dump(all_rollouts_to_save, f, protocol=cPickle.HIGHEST_PROTOCOL)
814 | f.close()
815 |
816 | #save the starting states of these rollouts, in case want to visualize them later
817 | f = open(save_dir + '/savedRollouts_startingStates.save', 'wb')
818 | cPickle.dump(starting_states, f, protocol=cPickle.HIGHEST_PROTOCOL)
819 | f.close()
820 |
821 | print("Saved MPC rollouts for later mbmf TRPO usage.")
822 |
823 | np.save(save_dir + '/datapoints_MB.npy', list_num_datapoints)
824 | np.save(save_dir + '/performance_MB.npy', list_avg_rew)
825 |
826 | print("ALL DONE.")
827 |
828 | return
829 |
830 | if __name__ == '__main__':
831 | main()
832 |
--------------------------------------------------------------------------------
/mbmf.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import math
4 | npr = np.random
5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
6 | import tensorflow as tf
7 | from six.moves import cPickle
8 | from collect_samples import CollectSamples
9 | from get_true_action import GetTrueAction
10 | import os
11 | import copy
12 | from helper_funcs import create_env
13 | from helper_funcs import perform_rollouts
14 | from helper_funcs import add_noise
15 | from feedforward_network import feedforward_network
16 | from helper_funcs import visualize_rendering
17 | import argparse
18 |
19 | #TRPO things
20 | from rllab.envs.normalized_env import normalize
21 | from rllab.algos.trpo import TRPO
22 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
23 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
24 | from rllab.optimizers.conjugate_gradient_optimizer import FiniteDifferenceHvp
25 | from rllab.misc.instrument import run_experiment_lite
26 |
27 | def nn_policy(inputState, junk1, outputSize, junk2, junk3, junk4):
28 | #init vars
29 | x = inputState
30 | initializer = tf.contrib.layers.xavier_initializer(uniform=False, seed=None, dtype=tf.float64)
31 | fc = tf.contrib.layers.fully_connected
32 | weights_reg = tf.contrib.layers.l2_regularizer(scale=0.001)
33 | #hidden layer 1
34 | fc1 = fc(x, num_outputs= 64, activation_fn=None, trainable=True, reuse=False, weights_initializer=initializer,
35 | biases_initializer=initializer, weights_regularizer=weights_reg)
36 | h1 = tf.tanh(fc1)
37 | #hidden layer 2
38 | fc2 = fc(h1, num_outputs= 64, activation_fn=None, trainable=True, reuse=False, weights_initializer=initializer,
39 | biases_initializer=initializer, weights_regularizer=weights_reg)
40 | h2 = tf.tanh(fc2)
41 | # output layer
42 | output = fc(h2, num_outputs=outputSize, activation_fn=None, trainable=True, reuse=False,
43 | weights_initializer=initializer, biases_initializer=initializer)
44 | return output
45 |
46 | def run_task(v):
47 |
48 | which_agent=v["which_agent"]
49 | env,_ = create_env(which_agent)
50 | baseline = LinearFeatureBaseline(env_spec=env.spec)
51 | optimizer_params = dict(base_eps=1e-5)
52 |
53 | #how many iters
54 | num_trpo_iters = 2500
55 | if(which_agent==1):
56 | num_trpo_iters = 2500
57 | if(which_agent==2):
58 | steps_per_rollout=333
59 | num_trpo_iters = 200
60 | if(which_agent==4):
61 | num_trpo_iters= 2000
62 | if(which_agent==6):
63 | num_trpo_iters= 2000
64 |
65 | #recreate the policy
66 | policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(v["depth_fc_layers"], v["depth_fc_layers"]), init_std=v["std_on_mlp_policy"])
67 | all_params = np.concatenate((v["policy_values"], policy._l_log_std.get_params()[0].get_value()))
68 | policy.set_param_values(all_params)
69 |
70 |
71 | algo = TRPO(
72 | env=env,
73 | policy=policy,
74 | baseline=baseline,
75 | batch_size=v["trpo_batchsize"],
76 | max_path_length=v["steps_per_rollout"],
77 | n_itr=num_trpo_iters,
78 | discount=0.995,
79 | optimizer=v["ConjugateGradientOptimizer"](hvp_approach=v["FiniteDifferenceHvp"](**optimizer_params)),
80 | step_size=0.05,
81 | plot_true=True)
82 |
83 | #train the policy
84 | algo.train()
85 |
86 | ##########################################
87 | ##########################################
88 |
89 | #ARGUMENTS TO SPECIFY
90 | parser = argparse.ArgumentParser()
91 | parser.add_argument('--save_trpo_run_num', type=int, default='1')
92 | parser.add_argument('--run_num', type=int, default=1)
93 | parser.add_argument('--which_agent', type=int, default=1)
94 | parser.add_argument('--std_on_mlp_policy', type=float, default=0.5)
95 | parser.add_argument('--num_workers_trpo', type=int, default=2)
96 | parser.add_argument('--might_render', action="store_true", dest='might_render', default=False)
97 | parser.add_argument('--visualize_mlp_policy', action="store_true", dest='visualize_mlp_policy', default=False)
98 | parser.add_argument('--visualize_on_policy_rollouts', action="store_true", dest='visualize_on_policy_rollouts', default=False)
99 | parser.add_argument('--print_minimal', action="store_true", dest='print_minimal', default=False)
100 | parser.add_argument('--use_existing_pretrained_policy', action="store_true", dest='use_existing_pretrained_policy', default=False)
101 | args = parser.parse_args()
102 |
103 | ##########################################
104 | ##########################################
105 |
106 | #save args
107 | save_trpo_run_num= args.save_trpo_run_num
108 | run_num = args.run_num
109 | which_agent = args.which_agent
110 | visualize_mlp_policy = args.visualize_mlp_policy
111 | visualize_on_policy_rollouts = args.visualize_on_policy_rollouts
112 | print_minimal = args.print_minimal
113 | std_on_mlp_policy = args.std_on_mlp_policy
114 |
115 | #swimmer
116 | trpo_batchsize = 50000
117 | if(which_agent==2):
118 | #training vars for new policy
119 | batchsize = 512
120 | nEpoch = 70
121 | learning_rate = 0.001
122 | #aggregation for training of new policy
123 | num_agg_iters = 3
124 | num_rollouts_to_agg= 5
125 | num_rollouts_testperformance = 2
126 | start_using_noised_actions = 0
127 | #other
128 | do_trpo = True
129 | #cheetah
130 | if(which_agent==4):
131 | #training vars for new policy
132 | batchsize = 512
133 | nEpoch = 300
134 | learning_rate = 0.001
135 | #aggregation for training of new policy
136 | num_agg_iters = 3
137 | num_rollouts_to_agg= 2
138 | num_rollouts_testperformance = 2
139 | start_using_noised_actions = 10
140 | #other
141 | do_trpo = True
142 | #hopper
143 | if(which_agent==6):
144 | #training vars for new policy
145 | batchsize = 512
146 | nEpoch = 200 #70
147 | learning_rate = 0.001
148 | #aggregation for training of new policy
149 | num_agg_iters = 5 #10
150 | num_rollouts_to_agg= 5 ###10
151 | num_rollouts_testperformance = 3
152 | start_using_noised_actions = 50
153 | #other
154 | do_trpo = True
155 | trpo_batchsize = 25000
156 | #ant
157 | if(which_agent==1):
158 | #training vars for new policy
159 | batchsize = 512
160 | nEpoch = 200
161 | learning_rate = 0.001
162 | #aggregation for training of new policy
163 | num_agg_iters = 5
164 | num_rollouts_to_agg= 5
165 | num_rollouts_testperformance = 3
166 | start_using_noised_actions = 50
167 | #other
168 | do_trpo = True
169 |
170 | ##########################################
171 | ##########################################
172 |
173 | #get vars from saved MB run
174 | param_dict = np.load('run_'+ str(run_num) + '/params.pkl')
175 | N = param_dict['num_control_samples']
176 | horizon = param_dict['horizon']
177 | num_fc_layers_old = param_dict['num_fc_layers']
178 | depth_fc_layers_old = param_dict['depth_fc_layers']
179 | lr_olddynmodel = param_dict['lr']
180 | batchsize_olddynmodel = param_dict['batchsize']
181 | dt_steps = param_dict['dt_steps']
182 | steps_per_rollout = param_dict['steps_per_episode']
183 | tf_datatype = param_dict['tf_datatype']
184 | seed = param_dict['seed']
185 | if(tf_datatype==""):
186 | tf_datatype = tf.float64
187 | else:
188 | tf_datatype = tf.float32
189 |
190 | #load the saved MPC rollouts
191 | f = open('run_'+ str(run_num)+'/savedRollouts.save', 'rb')
192 | allData = cPickle.load(f)
193 | f.close()
194 |
195 | ##########################################
196 | ##########################################
197 |
198 | #create env
199 | env, dt_from_xml = create_env(which_agent)
200 |
201 | # set tf seed
202 | npr.seed(seed)
203 | tf.set_random_seed(seed)
204 |
205 | #init vars
206 | noise_onpol_rollouts=0.005
207 | plot=False
208 | print_frequency = 20
209 | validation_frequency = 50
210 | num_fc_layers=2
211 | depth_fc_layers=64
212 | save_dir = 'run_'+ str(run_num)+'/mbmf'
213 | if not os.path.exists(save_dir):
214 | os.makedirs(save_dir)
215 |
216 | #convert saved rollouts into array
217 | allDataArray=[]
218 | allControlsArray=[]
219 | for i in range(len(allData)):
220 | allDataArray.append(allData[i]['observations'])
221 | allControlsArray.append(allData[i]['actions'])
222 | training_data=np.concatenate(allDataArray)
223 | labels=np.concatenate(allControlsArray)
224 |
225 | if(len(labels.shape)==3):
226 | labels=np.squeeze(labels)
227 | print("\n(total) Data size ", training_data.shape[0],"\n\n")
228 |
229 | ##################################################################################
230 |
231 | # set aside some of the training data for validation
232 | validnum = 10000
233 | if((which_agent==6)or(which_agent==2)or(which_agent==1)):
234 | validnum=700
235 | num = training_data.shape[0]-validnum
236 | validation_x = training_data[num:num+validnum,:]
237 | training_data=training_data[0:num,:]
238 | validation_z = labels[num:num+validnum,:]
239 | labels=labels[0:num,:]
240 | print("\nTraining data size ", training_data.shape[0])
241 | print("Validation data size ", validation_x.shape[0],"\n")
242 |
243 | if(args.might_render or args.visualize_mlp_policy or args.visualize_on_policy_rollouts):
244 | might_render=True
245 | else:
246 | might_render=False
247 | #this somehow prevents a seg fault from happening in the later visualization
248 | if(might_render):
249 | new_env = copy.deepcopy(env)
250 | new_env.render()
251 |
252 | #gpu options for tensorflow
253 | gpu_device = 0
254 | gpu_frac = 0.3
255 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_device)
256 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_frac)
257 | config = tf.ConfigProto(gpu_options=gpu_options,
258 | log_device_placement=False,
259 | allow_soft_placement=True,
260 | inter_op_parallelism_threads=1,
261 | intra_op_parallelism_threads=1)
262 |
263 | #add SL noise to training data inputs and outputs
264 | '''TO DO'''
265 |
266 | #keep track of sample complexity
267 | datapoints_used_forMB = np.load('run_'+ str(run_num) + '/datapoints_MB.npy')[-1]
268 | datapoints_used_to_init_imit = training_data.shape[0]
269 | total_datapoints = datapoints_used_forMB + datapoints_used_to_init_imit #points used thus far
270 | imit_list_num_datapoints = []
271 | imit_list_avg_rew = []
272 |
273 | with tf.Session(config=config) as sess:
274 |
275 | if(not(args.use_existing_pretrained_policy)):
276 |
277 | #init vars
278 | g=GetTrueAction()
279 | g.make_model(sess, env, 'run_'+ str(run_num), tf_datatype, num_fc_layers_old, depth_fc_layers_old, which_agent,
280 | lr_olddynmodel, batchsize_olddynmodel, N, horizon, steps_per_rollout, dt_steps, print_minimal)
281 | nData=training_data.shape[0]
282 | inputSize = training_data.shape[1]
283 | outputSize = labels.shape[1]
284 |
285 | #placeholders
286 | inputs_placeholder = tf.placeholder(tf_datatype, shape=[None, inputSize], name='inputs')
287 | labels_placeholder = tf.placeholder(tf_datatype, shape=[None, outputSize], name='outputs')
288 |
289 | #output of nn
290 | curr_output = nn_policy(inputs_placeholder, inputSize, outputSize, num_fc_layers, depth_fc_layers, tf_datatype)
291 |
292 | #define training
293 | theta = tf.trainable_variables()
294 | loss = tf.reduce_mean(tf.square(curr_output - labels_placeholder))
295 | opt = tf.train.AdamOptimizer(learning_rate)
296 | gv = [(g,v) for g,v in opt.compute_gradients(loss, theta) if g is not None]
297 | train_step = opt.apply_gradients(gv)
298 |
299 | #get all the uninitialized variables (ie right now all of them)
300 | list_vars=[]
301 | for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
302 | if(not(tf.is_variable_initialized(var).eval())):
303 | list_vars.append(var)
304 | sess.run(tf.variables_initializer(list_vars))
305 |
306 | #aggregation iterations
307 | for agg_iter in range(num_agg_iters):
308 |
309 | print("ON AGGREGATION ITERATION ", agg_iter)
310 | rewards_for_this_iter=[]
311 | plot_trainingloss_x=[]
312 | plot_trainingloss_y=[]
313 | plot_validloss_x=[]
314 | plot_validloss_y=[]
315 |
316 | for i in range(nEpoch):
317 |
318 | ################################
319 | ############ TRAIN #############
320 | ################################
321 |
322 | avg_loss=0
323 | iters_in_batch=0
324 | range_of_indeces = np.arange(training_data.shape[0])
325 | indeces = npr.choice(range_of_indeces, size=(training_data.shape[0],), replace=False)
326 |
327 | for batch in range(int(math.floor(nData / batchsize))):
328 | # Batch the training data
329 | inputs = training_data[indeces[batch*batchsize:(batch+1)*batchsize], :]
330 | outputs = labels[indeces[batch*batchsize:(batch+1)*batchsize], :]
331 |
332 | #one iteration of feedforward training
333 | _, my_loss = sess.run([train_step, loss],
334 | feed_dict={inputs_placeholder: inputs, labels_placeholder: outputs})
335 |
336 | #loss
337 | avg_loss+= np.sqrt(my_loss)
338 | iters_in_batch+=1
339 |
340 | ################################
341 | ###### SAVE TRAIN LOSSES #######
342 | ################################
343 |
344 | if(iters_in_batch==0):
345 | iters_in_batch=1
346 |
347 | current_loss = avg_loss/iters_in_batch
348 |
349 | #save training losses
350 | if(not(print_minimal)):
351 | if(i%print_frequency==0):
352 | print("training loss: ", current_loss, ", nEpoch: ", i)
353 | plot_trainingloss_x.append(i)
354 | plot_trainingloss_y.append(current_loss)
355 | np.save(save_dir + '/plot_trainingloss_x.npy', plot_trainingloss_x)
356 | np.save(save_dir + '/plot_trainingloss_y.npy', plot_trainingloss_y)
357 |
358 | ################################
359 | ########## VALIDATION ##########
360 | ################################
361 |
362 | if((i%validation_frequency)==0):
363 | avg_valid_loss=0
364 | iters_in_valid=0
365 |
366 | range_of_indeces = np.arange(validation_x.shape[0])
367 | indeces = npr.choice(range_of_indeces, size=(validation_x.shape[0],), replace=False)
368 |
369 | for batch in range(int(math.floor(validation_x.shape[0] / batchsize))):
370 | # Batch the training data
371 | inputs = validation_x[indeces[batch*batchsize:(batch+1)*batchsize], :]
372 | outputs = validation_z[indeces[batch*batchsize:(batch+1)*batchsize], :]
373 |
374 | #one iteration of feedforward training
375 | my_loss, _ = sess.run([loss, curr_output],
376 | feed_dict={inputs_placeholder: inputs, labels_placeholder: outputs})
377 |
378 | #loss
379 | avg_valid_loss+= np.sqrt(my_loss)
380 | iters_in_valid+=1
381 |
382 | curr_valid_loss = avg_valid_loss/iters_in_valid
383 |
384 | #save validation losses
385 | plot_validloss_x.append(i)
386 | plot_validloss_y.append(curr_valid_loss)
387 | if(not(print_minimal)):
388 | print("validation loss: ", curr_valid_loss, ", nEpoch: ", i, "\n")
389 | np.save(save_dir + '/plot_validloss_x.npy', plot_validloss_x)
390 | np.save(save_dir + '/plot_validloss_y.npy', plot_validloss_y)
391 |
392 | print("DONE TRAINING.")
393 | print("final training loss: ", current_loss, ", nEpoch: ", i)
394 | print("final validation loss: ", curr_valid_loss, ", nEpoch: ", i)
395 |
396 | ##################
397 | ##### PLOT #######
398 | ##################
399 | if(plot):
400 | plt.plot(plot_validloss_x, plot_validloss_y, 'r')
401 | plt.plot(plot_trainingloss_x, plot_trainingloss_y, 'g')
402 | plt.show()
403 |
404 | ##################################################
405 | ##### RUN ON-POLICY ROLLOUTS --- DAGGER ##########
406 | ##################################################
407 |
408 | print("\n\nCollecting on-policy rollouts...\n\n")
409 | starting_states = []
410 | observations = []
411 | actions=[]
412 | true_actions=[]
413 |
414 | for rollout in range(num_rollouts_to_agg):
415 | if(not(print_minimal)):
416 | print("\nOn rollout #", rollout)
417 | total_rew = 0
418 |
419 | starting_observation, starting_state = env.reset(returnStartState=True)
420 | curr_ob=np.copy(starting_observation)
421 |
422 | observations_for_rollout = []
423 | actions_for_rollout = []
424 | true_actions_for_rollout=[]
425 | for step in range(steps_per_rollout):
426 |
427 | #get action
428 | action = sess.run([curr_output], feed_dict={inputs_placeholder: np.expand_dims(curr_ob, axis=0)})
429 | action=np.copy(action[0][0]) #1x8
430 |
431 | #### add exploration noise to the action
432 | if(agg_iter>start_using_noised_actions):
433 | action = action + noise_onpol_rollouts*npr.normal(size=action.shape)
434 |
435 | #save obs and ac
436 | observations_for_rollout.append(curr_ob)
437 | actions_for_rollout.append(action)
438 |
439 | #####################################
440 | ##### GET LABEL OF TRUE ACTION ######
441 | #####################################
442 |
443 | true_action = g.get_action(curr_ob)
444 | true_actions_for_rollout.append(true_action)
445 |
446 | #take step
447 | next_ob, rew, done, _ = env.step(action, collectingInitialData=False)
448 | total_rew+= rew
449 | curr_ob= np.copy(next_ob)
450 |
451 | if(done):
452 | break
453 |
454 | if((step%100)==0):
455 | print(" Done with step #: ", step)
456 |
457 | total_datapoints+= step
458 | print("rollout ", rollout," .... reward = ", total_rew)
459 | if(not(print_minimal)):
460 | print("number of steps: ", step)
461 | print("number of steps so far: ", total_datapoints)
462 |
463 | if(visualize_on_policy_rollouts):
464 | input("\n\nPAUSE BEFORE VISUALIZATION... Press Enter to continue...")
465 | visualize_rendering(starting_state, actions_for_rollout, env, dt_steps, dt_from_xml, which_agent)
466 |
467 | starting_states.append(starting_state)
468 | observations.append(observations_for_rollout)
469 | actions.append(actions_for_rollout)
470 | true_actions.append(true_actions_for_rollout)
471 |
472 | rewards_for_this_iter.append(total_rew)
473 |
474 | print("Avg reward for this iter: ", np.mean(rewards_for_this_iter), "\n\n")
475 |
476 | ##################################################
477 | ##### RUN CLEAN ROLLOUTS TO SEE PERFORMANCE ######
478 | ##################################################
479 |
480 | print("\n\nTEST DAGGER PERFORMANCE (clean rollouts)...")
481 | rewards_for_this_iter2=[]
482 | for rollout in range(num_rollouts_testperformance):
483 | total_rew = 0
484 | starting_observation, starting_state = env.reset(returnStartState=True)
485 | curr_ob=np.copy(starting_observation)
486 |
487 | for step in range(steps_per_rollout):
488 |
489 | #get action
490 | action = sess.run([curr_output], feed_dict={inputs_placeholder: np.expand_dims(curr_ob, axis=0)})
491 | action=np.copy(action[0][0]) #1x8
492 |
493 | #take step
494 | next_ob, rew, done, _ = env.step(action, collectingInitialData=False)
495 | total_rew+= rew
496 | curr_ob= np.copy(next_ob)
497 |
498 | if(done):
499 | break
500 | if(not(print_minimal)):
501 | print("reward = ", total_rew)
502 | rewards_for_this_iter2.append(total_rew)
503 | print("Avg DAGGER performance at this iter: ", np.mean(rewards_for_this_iter2), "\n\n")
504 |
505 | ###### SAVE datapoints vs performance
506 | imit_list_num_datapoints.append(total_datapoints)
507 | imit_list_avg_rew.append(total_rew)
508 |
509 | ###########################
510 | ##### AGGREGATE DATA ######
511 | ###########################
512 | if(not(print_minimal)):
513 | print("\nAggregating Data...\n")
514 | training_data = np.concatenate([training_data, np.concatenate(observations)], axis=0)
515 | labels = np.concatenate([labels, np.concatenate(true_actions)], axis=0)
516 |
517 | #save the datapoints vs performance
518 | np.save('run_'+ str(run_num) + '/datapoints_IMIT.npy', imit_list_num_datapoints)
519 | np.save('run_'+ str(run_num) + '/performance_IMIT.npy', imit_list_avg_rew)
520 |
521 | if(not(print_minimal)):
522 | print("Done training the TF policy")
523 |
524 | ######################
525 | ### SAVE NN PARAMS ###
526 | ######################
527 |
528 | #prepare the params for saving
529 | values = []
530 | for t in list_vars[0:6]:
531 | if(t.eval().shape==()):
532 | junk=1
533 | else:
534 | values.append(np.ndarray.flatten(t.eval()))
535 | values = np.concatenate(values)
536 |
537 | #save the TF policy params
538 | if(not(print_minimal)):
539 | print("Saving learned TF nn model parameters.")
540 | f = open(save_dir + '/policy_tf_values.save', 'wb')
541 | cPickle.dump(values, f, protocol=cPickle.HIGHEST_PROTOCOL)
542 | f.close()
543 |
544 | else: #use_existing_pretrained_policy is True
545 |
546 | f = open(save_dir + '/policy_tf_values.save', 'rb')
547 | values = cPickle.load(f)
548 | f.close()
549 |
550 | #######################
551 | ### INIT MLP POLICY ###
552 | #######################
553 |
554 | policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(depth_fc_layers, depth_fc_layers), init_std=std_on_mlp_policy)
555 |
556 | #copy params over to the MLP policy
557 | all_params = np.concatenate((values, policy._l_log_std.get_params()[0].get_value()))
558 | policy.set_param_values(all_params)
559 |
560 | #save the MLP policy
561 | f = open(save_dir + '/policy_mlp.save', 'wb')
562 | cPickle.dump(policy, f, protocol=cPickle.HIGHEST_PROTOCOL)
563 | f.close()
564 | if(not(print_minimal)):
565 | print("Done initializing MLP policy with a pre-trained policy.")
566 |
567 | ##see what this initialized MLP policy looks like
568 | if(visualize_mlp_policy):
569 | input("\n\nPAUSE BEFORE VISUALIZATION... Press Enter to continue...")
570 | states, controls, starting_states, rewards = perform_rollouts(policy, 1, steps_per_rollout, visualize_mlp_policy,
571 | CollectSamples, env, which_agent, dt_steps, dt_from_xml, False)
572 | print("Std of the MLP policy: ", std_on_mlp_policy)
573 | print("Reward of the MLP policy: ", rewards)
574 |
575 | ################################
576 | ### TRAIN MLP POLICY W/ TRPO ###
577 | ################################
578 |
579 | if(do_trpo):
580 | run_experiment_lite(run_task, plot=True, snapshot_mode="all", use_cloudpickle=True, n_parallel=str(args.num_workers_trpo),
581 | exp_name='run_' + str(run_num)+'_std' + str(std_on_mlp_policy)+ '_run'+ str(save_trpo_run_num),
582 | variant=dict(policy_values=values.tolist(), which_agent=which_agent,
583 | trpo_batchsize=trpo_batchsize, steps_per_rollout=steps_per_rollout,
584 | FiniteDifferenceHvp=FiniteDifferenceHvp, ConjugateGradientOptimizer=ConjugateGradientOptimizer,
585 | depth_fc_layers=depth_fc_layers, std_on_mlp_policy=std_on_mlp_policy))
--------------------------------------------------------------------------------
/mpc_controller.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import numpy.random as npr
3 | import tensorflow as tf
4 | import time
5 | import math
6 | import matplotlib.pyplot as plt
7 | import copy
8 | from six.moves import cPickle
9 | from rllab.misc import tensor_utils
10 | from data_manipulation import from_observation_to_usablestate
11 | from reward_functions import RewardFunctions
12 |
13 | class MPCController:
14 |
15 | def __init__(self, env_inp, dyn_model, horizon, which_agent, steps_per_episode, dt_steps, num_control_samples,
16 | mean_x, mean_y, mean_z, std_x, std_y, std_z, actions_ag, print_minimal, x_index, y_index, z_index, yaw_index,
17 | joint1_index, joint2_index, frontleg_index, frontshin_index, frontfoot_index, xvel_index, orientation_index):
18 |
19 | #init vars
20 | self.env=copy.deepcopy(env_inp)
21 | self.N = num_control_samples
22 | self.which_agent = which_agent
23 | self.horizon = horizon
24 | self.dyn_model = dyn_model
25 | self.steps_per_episode = steps_per_episode
26 | self.mean_x = mean_x
27 | self.mean_y = mean_y
28 | self.mean_z = mean_z
29 | self.std_x = std_x
30 | self.std_y = std_y
31 | self.std_z = std_z
32 | self.x_index = x_index
33 | self.y_index = y_index
34 | self.z_index = z_index
35 | self.yaw_index = yaw_index
36 | self.joint1_index = joint1_index
37 | self.joint2_index = joint2_index
38 | self.frontleg_index = frontleg_index
39 | self.frontshin_index = frontshin_index
40 | self.frontfoot_index = frontfoot_index
41 | self.xvel_index = xvel_index
42 | self.orientation_index = orientation_index
43 | self.actions_ag = actions_ag
44 | self.print_minimal = print_minimal
45 | self.reward_functions = RewardFunctions(self.which_agent, self.x_index, self.y_index, self.z_index, self.yaw_index,
46 | self.joint1_index, self.joint2_index, self.frontleg_index, self.frontshin_index,
47 | self.frontfoot_index, self.xvel_index, self.orientation_index)
48 |
49 | def perform_rollout(self, starting_fullenvstate, starting_observation, starting_observation_NNinput, desired_states, follow_trajectories,
50 | horiz_penalty_factor, forward_encouragement_factor, heading_penalty_factor, noise_actions, noise_amount):
51 |
52 | #lists for saving info
53 | traj_taken=[] #list of states that go into NN
54 | actions_taken=[]
55 | observations = [] #list of observations (direct output of the env)
56 | rewards = []
57 | agent_infos = []
58 | env_infos = []
59 |
60 | #init vars
61 | stop_taking_steps = False
62 | total_reward_for_episode = 0
63 | step=0
64 | curr_line_segment = 0
65 | self.horiz_penalty_factor = horiz_penalty_factor
66 | self.forward_encouragement_factor = forward_encouragement_factor
67 | self.heading_penalty_factor = heading_penalty_factor
68 |
69 | #extend the list of desired states so you don't run out
70 | temp = np.tile(np.expand_dims(desired_states[-1], axis=0), (10,1))
71 | self.desired_states = np.concatenate((desired_states, temp))
72 |
73 | #reset env to the given full env state
74 | if(self.which_agent==5):
75 | self.env.reset()
76 | else:
77 | self.env.reset(starting_fullenvstate)
78 |
79 | #current observation
80 | obs = np.copy(starting_observation)
81 | #current observation in the right format for NN
82 | curr_state = np.copy(starting_observation_NNinput)
83 | traj_taken.append(curr_state)
84 |
85 | #select task or reward func
86 | reward_func = self.reward_functions.get_reward_func(follow_trajectories, self.desired_states, horiz_penalty_factor,
87 | forward_encouragement_factor, heading_penalty_factor)
88 |
89 | #take steps according to the chosen task/reward function
90 | while(stop_taking_steps==False):
91 |
92 | #get optimal action
93 | best_action, best_sim_number, best_sequence, moved_to_next = self.get_action(curr_state, curr_line_segment, reward_func)
94 |
95 | #advance which line segment we are on
96 | if(follow_trajectories):
97 | if(moved_to_next[best_sim_number]==1):
98 | curr_line_segment+=1
99 | print("MOVED ON TO LINE SEGMENT ", curr_line_segment)
100 |
101 | #noise the action
102 | action_to_take= np.copy(best_action)
103 |
104 | #whether to execute noisy or clean actions
105 | if(self.actions_ag=='nn'):
106 | noise_actions=True
107 | if(self.actions_ag=='nc'):
108 | noise_actions=True
109 | if(self.actions_ag=='cc'):
110 | noise_actions=False
111 |
112 | clean_action = np.copy(action_to_take)
113 | if(noise_actions):
114 | noise = noise_amount * npr.normal(size=action_to_take.shape)#
115 | action_to_take = action_to_take + noise
116 | action_to_take=np.clip(action_to_take, -1,1)
117 |
118 | #execute the action
119 | next_state, rew, done, env_info = self.env.step(action_to_take, collectingInitialData=False)
120 |
121 | #check if done
122 | if(done):
123 | stop_taking_steps=True
124 | else:
125 | #save things
126 | observations.append(obs)
127 | rewards.append(rew)
128 | env_infos.append(env_info)
129 | total_reward_for_episode += rew
130 |
131 | #whether to save clean or noisy actions
132 | if(self.actions_ag=='nn'):
133 | actions_taken.append(np.array([action_to_take]))
134 | if(self.actions_ag=='nc'):
135 | actions_taken.append(np.array([clean_action]))
136 | if(self.actions_ag=='cc'):
137 | actions_taken.append(np.array([clean_action]))
138 |
139 | #this is the observation returned by taking a step in the env
140 | obs=np.copy(next_state)
141 |
142 | #get the next state (usable by NN)
143 | just_one=True
144 | next_state = from_observation_to_usablestate(next_state, self.which_agent, just_one)
145 | curr_state=np.copy(next_state)
146 | traj_taken.append(curr_state)
147 |
148 | #bookkeeping
149 | if(not(self.print_minimal)):
150 | if(step%100==0):
151 | print("done step ", step, ", rew: ", total_reward_for_episode)
152 | step+=1
153 |
154 | #when to stop
155 | if(follow_trajectories):
156 | if((step>=self.steps_per_episode) or (curr_line_segment>5)):
157 | stop_taking_steps = True
158 | else:
159 | if(step>=self.steps_per_episode):
160 | stop_taking_steps = True
161 |
162 | if(not(self.print_minimal)):
163 | print("DONE TAKING ", step, " STEPS.")
164 | print("Reward: ", total_reward_for_episode)
165 |
166 | mydict = dict(
167 | observations=tensor_utils.stack_tensor_list(observations),
168 | actions=tensor_utils.stack_tensor_list(actions_taken),
169 | rewards=tensor_utils.stack_tensor_list(rewards),
170 | agent_infos=agent_infos,
171 | env_infos=tensor_utils.stack_tensor_dict_list(env_infos))
172 |
173 | return traj_taken, actions_taken, total_reward_for_episode, mydict
174 |
175 | def get_action(self, curr_nn_state, curr_line_segment, reward_func):
176 | #randomly sample N candidate action sequences
177 | all_samples = npr.uniform(self.env.action_space.low, self.env.action_space.high, (self.N, self.horizon, self.env.action_space.shape[0]))
178 |
179 | #forward simulate the action sequences (in parallel) to get resulting (predicted) trajectories
180 | many_in_parallel = True
181 | resulting_states = self.dyn_model.do_forward_sim([curr_nn_state,0], np.copy(all_samples), many_in_parallel, self.env, self.which_agent)
182 | resulting_states = np.array(resulting_states) #this is [horizon+1, N, statesize]
183 |
184 | #init vars to evaluate the trajectories
185 | scores=np.zeros((self.N,))
186 | done_forever=np.zeros((self.N,))
187 | move_to_next=np.zeros((self.N,))
188 | curr_seg = np.tile(curr_line_segment,(self.N,))
189 | curr_seg = curr_seg.astype(int)
190 | prev_forward = np.zeros((self.N,))
191 | moved_to_next = np.zeros((self.N,))
192 | prev_pt = resulting_states[0]
193 |
194 | #accumulate reward over each timestep
195 | for pt_number in range(resulting_states.shape[0]):
196 |
197 | #array of "the point"... for each sim
198 | pt = resulting_states[pt_number] # N x state
199 |
200 | #how far is the point from the desired trajectory
201 | #how far along the desired traj have you moved since the last point
202 | min_perp_dist, curr_forward, curr_seg, moved_to_next = self.calculate_geometric_trajfollow_quantities(pt, curr_seg, moved_to_next)
203 |
204 | #update reward score
205 | scores, done_forever = reward_func(pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward, curr_seg,
206 | moved_to_next, done_forever, all_samples, pt_number)
207 |
208 | #update vars
209 | prev_forward = np.copy(curr_forward)
210 | prev_pt = np.copy(pt)
211 |
212 | #pick best action sequence
213 | best_score = np.min(scores)
214 | best_sim_number = np.argmin(scores)
215 | best_sequence = all_samples[best_sim_number]
216 | best_action = np.copy(best_sequence[0])
217 |
218 |
219 |
220 | return best_action, best_sim_number, best_sequence, moved_to_next
221 |
222 | def calculate_geometric_trajfollow_quantities(self, pt, curr_seg, moved_to_next):
223 |
224 | #arrays of line segment points... for each sim
225 | curr_start = self.desired_states[curr_seg]
226 | curr_end = self.desired_states[curr_seg+1]
227 | next_start = self.desired_states[curr_seg+1]
228 | next_end = self.desired_states[curr_seg+2]
229 |
230 | #initialize
231 | min_perp_dist = np.ones((self.N, ))*5000
232 |
233 | ####################################### closest distance from point to current line segment
234 |
235 | #vars
236 | a = pt[:,self.x_index]- curr_start[:,0]
237 | b = pt[:,self.y_index]- curr_start[:,1]
238 | c = curr_end[:,0]- curr_start[:,0]
239 | d = curr_end[:,1]- curr_start[:,1]
240 |
241 | #project point onto line segment
242 | which_line_section = np.divide((np.multiply(a,c) + np.multiply(b,d)), (np.multiply(c,c) + np.multiply(d,d)))
243 |
244 | #point on line segment that's closest to the pt
245 | closest_pt_x = np.copy(which_line_section)
246 | closest_pt_y = np.copy(which_line_section)
247 | closest_pt_x[which_line_section<0] = curr_start[:,0][which_line_section<0]
248 | closest_pt_y[which_line_section<0] = curr_start[:,1][which_line_section<0]
249 | closest_pt_x[which_line_section>1] = curr_end[:,0][which_line_section>1]
250 | closest_pt_y[which_line_section>1] = curr_end[:,1][which_line_section>1]
251 | closest_pt_x[np.logical_and(which_line_section<=1, which_line_section>=0)] = (curr_start[:,0] +
252 | np.multiply(which_line_section,c))[np.logical_and(which_line_section<=1, which_line_section>=0)]
253 | closest_pt_y[np.logical_and(which_line_section<=1, which_line_section>=0)] = (curr_start[:,1] +
254 | np.multiply(which_line_section,d))[np.logical_and(which_line_section<=1, which_line_section>=0)]
255 |
256 | #min dist from pt to that closest point (ie closes dist from pt to line segment)
257 | min_perp_dist = np.sqrt((pt[:,self.x_index]-closest_pt_x)*(pt[:,self.x_index]-closest_pt_x) +
258 | (pt[:,self.y_index]-closest_pt_y)*(pt[:,self.y_index]-closest_pt_y))
259 |
260 | ####################################### "forward-ness" of the pt... for each sim
261 | curr_forward = which_line_section
262 |
263 | ###################################### closest distance from point to next line segment
264 |
265 | #vars
266 | a = pt[:,self.x_index]- next_start[:,0]
267 | b = pt[:,self.y_index]- next_start[:,1]
268 | c = next_end[:,0]- next_start[:,0]
269 | d = next_end[:,1]- next_start[:,1]
270 |
271 | #project point onto line segment
272 | which_line_section = np.divide((np.multiply(a,c) + np.multiply(b,d)),
273 | (np.multiply(c,c) + np.multiply(d,d)))
274 |
275 | #point on line segment that's closest to the pt
276 | closest_pt_x = np.copy(which_line_section)
277 | closest_pt_y = np.copy(which_line_section)
278 | closest_pt_x[which_line_section<0] = next_start[:,0][which_line_section<0]
279 | closest_pt_y[which_line_section<0] = next_start[:,1][which_line_section<0]
280 | closest_pt_x[which_line_section>1] = next_end[:,0][which_line_section>1]
281 | closest_pt_y[which_line_section>1] = next_end[:,1][which_line_section>1]
282 | closest_pt_x[np.logical_and(which_line_section<=1, which_line_section>=0)] = (next_start[:,0] +
283 | np.multiply(which_line_section,c))[np.logical_and(which_line_section<=1, which_line_section>=0)]
284 | closest_pt_y[np.logical_and(which_line_section<=1, which_line_section>=0)] = (next_start[:,1] +
285 | np.multiply(which_line_section,d))[np.logical_and(which_line_section<=1, which_line_section>=0)]
286 |
287 | #min dist from pt to that closest point (ie closes dist from pt to line segment)
288 | dist = np.sqrt((pt[:,self.x_index]-closest_pt_x)*(pt[:,self.x_index]-closest_pt_x) +
289 | (pt[:,self.y_index]-closest_pt_y)*(pt[:,self.y_index]-closest_pt_y))
290 |
291 | ############################################
292 |
293 | #pick which line segment it's closest to, and update vars accordingly
294 | curr_seg[dist<=min_perp_dist] += 1
295 | moved_to_next[dist<=min_perp_dist] = 1
296 | curr_forward[dist<=min_perp_dist] = which_line_section[dist<=min_perp_dist]
297 | min_perp_dist = np.min([min_perp_dist, dist], axis=0)
298 |
299 | return min_perp_dist, curr_forward, curr_seg, moved_to_next
--------------------------------------------------------------------------------
/plotting/plot_loss.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": false,
8 | "scrolled": false
9 | },
10 | "outputs": [],
11 | "source": [
12 | "#####################################\n",
13 | "########### TO SPECIFY ##############\n",
14 | "#####################################\n",
15 | "\n",
16 | "save_dir = '../run_1001'\n",
17 | "\n",
18 | "#####################################\n",
19 | "#####################################\n",
20 | "\n",
21 | "\n",
22 | "import numpy as np\n",
23 | "import matplotlib.pyplot as plt\n",
24 | "%matplotlib inline\n",
25 | "\n",
26 | "training_loss = np.load(save_dir + '/losses/list_training_loss.npy')\n",
27 | "onestep_val_loss = np.load(save_dir + '/errors_1_per_agg.npy')\n",
28 | "fivestep_val_loss = np.load(save_dir + '/errors_5_per_agg.npy')\n",
29 | "tenstep_val_loss = np.load(save_dir + '/errors_10_per_agg.npy')\n",
30 | "fiftystep_val_loss = np.load(save_dir + '/errors_50_per_agg.npy')\n",
31 | "hundredstep_val_loss = np.load(save_dir + '/errors_100_per_agg.npy')\n",
32 | "\n",
33 | "plt.figure()\n",
34 | "plt.ylabel(\"Loss\")\n",
35 | "plt.title(\"Training Loss\")\n",
36 | "plt.xlabel(\"Aggregation Iteration\")\n",
37 | "plt.plot(training_loss)\n",
38 | "\n",
39 | "plt.figure()\n",
40 | "plt.ylabel(\"Loss\")\n",
41 | "plt.title(\"1-step Validation Loss\")\n",
42 | "plt.xlabel(\"Aggregation Iteration\")\n",
43 | "plt.plot(onestep_val_loss)\n",
44 | "\n",
45 | "plt.figure()\n",
46 | "plt.ylabel(\"Loss\")\n",
47 | "plt.title(\"5-step Validation Loss\")\n",
48 | "plt.xlabel(\"Aggregation Iteration\")\n",
49 | "plt.plot(fivestep_val_loss)\n",
50 | "\n",
51 | "plt.figure()\n",
52 | "plt.ylabel(\"Loss\")\n",
53 | "plt.title(\"10-step Validation Loss\")\n",
54 | "plt.xlabel(\"Aggregation Iteration\")\n",
55 | "plt.plot(tenstep_val_loss)\n",
56 | "\n",
57 | "plt.figure()\n",
58 | "plt.ylabel(\"Loss\")\n",
59 | "plt.title(\"50-step Validation Loss\")\n",
60 | "plt.xlabel(\"Aggregation Iteration\")\n",
61 | "plt.plot(fiftystep_val_loss)\n",
62 | "\n",
63 | "plt.figure()\n",
64 | "plt.ylabel(\"Loss\")\n",
65 | "plt.title(\"100-step Validation Loss\")\n",
66 | "plt.xlabel(\"Aggregation Iteration\")\n",
67 | "plt.plot(hundredstep_val_loss)"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": null,
73 | "metadata": {
74 | "collapsed": true
75 | },
76 | "outputs": [],
77 | "source": []
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": null,
82 | "metadata": {
83 | "collapsed": true
84 | },
85 | "outputs": [],
86 | "source": []
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {
92 | "collapsed": true
93 | },
94 | "outputs": [],
95 | "source": []
96 | }
97 | ],
98 | "metadata": {
99 | "anaconda-cloud": {},
100 | "celltoolbar": "Raw Cell Format",
101 | "kernelspec": {
102 | "display_name": "Python 3",
103 | "language": "python",
104 | "name": "python3"
105 | },
106 | "language_info": {
107 | "codemirror_mode": {
108 | "name": "ipython",
109 | "version": 3
110 | },
111 | "file_extension": ".py",
112 | "mimetype": "text/x-python",
113 | "name": "python",
114 | "nbconvert_exporter": "python",
115 | "pygments_lexer": "ipython3",
116 | "version": "3.5.2"
117 | },
118 | "widgets": {
119 | "state": {},
120 | "version": "1.1.2"
121 | }
122 | },
123 | "nbformat": 4,
124 | "nbformat_minor": 1
125 | }
126 |
--------------------------------------------------------------------------------
/plotting/plot_mbmf.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import seaborn as sns
3 | import numpy as np
4 | from numpy import genfromtxt
5 | import joblib
6 | import pandas
7 | import argparse
8 |
9 |
10 | ######################
11 | ## ARGUMENTS TO SPECIFY
12 | ######################
13 |
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--run_nums', type=int, nargs='+', default=-5)
16 | parser.add_argument('--seeds', type=int, nargs='+', default=-5)
17 | parser.add_argument('--which_agent', type=int, default=1)
18 | parser.add_argument('--std_on_mlp_policy', type=float, default=0.5)
19 | parser.add_argument('--batchsize_TRPO_mf', type=int, default=50000)
20 | parser.add_argument('--batchsize_TRPO_mbmf', type=int, default=50000)
21 | parser.add_argument('--dont_include_mbmfTRPO', action="store_true", dest='dont_include_mbmfTRPO', default=False)
22 | parser.add_argument('--trpo_dir', type=str, default='/home/anagabandi/rllab/data/local/experiment/')
23 | args = parser.parse_args()
24 |
25 | ######################
26 | ## vars
27 | ######################
28 |
29 | #save args
30 | which_agent = args.which_agent
31 | std_on_mlp_policy = args.std_on_mlp_policy
32 | batchsize_TRPO_mf = args.batchsize_TRPO_mf
33 | batchsize_TRPO_mbmf = args.batchsize_TRPO_mbmf
34 |
35 | #agent name
36 | if(which_agent==2):
37 | agent_name='Swimmer'
38 | if(which_agent==4):
39 | agent_name='Cheetah'
40 | if(which_agent==6):
41 | agent_name='Hopper'
42 | batchsize_TRPO_mbmf= 25000
43 | if(which_agent==1):
44 | agent_name='Ant'
45 |
46 | #plotting vars
47 | plt.rc('text', usetex=True)
48 | plt.rc('font', family='serif', size=10)
49 | sns.set(font_scale=1)
50 | format = 'png'
51 | dpi=200
52 |
53 | ############################
54 | ## vars that depend on number of runs
55 | ############################
56 |
57 | #seeds
58 | how_many_seeds= len(args.seeds)
59 |
60 | #run numbers for MB and imitation learning data
61 | run_num1 = args.run_nums[0]
62 | run_num2 = args.run_nums[0]
63 | run_num3 = args.run_nums[0]
64 | if(how_many_seeds==2):
65 | run_num1 = args.run_nums[0]
66 | run_num2 = args.run_nums[1]
67 | run_num3 = args.run_nums[1]
68 | if(how_many_seeds==3):
69 | run_num1 = args.run_nums[0]
70 | run_num2 = args.run_nums[1]
71 | run_num3 = args.run_nums[2]
72 |
73 | #filenames for MBMF TRPO
74 | mbmf_filename_numbers = [1,1,1]
75 | if(how_many_seeds==2):
76 | mbmf_filename_numbers = [1,2,2]
77 | if(how_many_seeds==3):
78 | mbmf_filename_numbers = [1,2,3]
79 |
80 | #filenames for MF TRPO
81 | mf_filename_numbers = ['_seed_'+str(args.seeds[0])+'_mf_run1','_seed_'+str(args.seeds[0])+'_mf_run1','_seed_'+str(args.seeds[0])+'_mf_run1']
82 | if(how_many_seeds==2):
83 | mf_filename_numbers = ['_seed_'+str(args.seeds[0])+'_mf_run1','_seed_'+str(args.seeds[1])+'_mf_run2','_seed_'+str(args.seeds[1])+'_mf_run2']
84 | if(how_many_seeds==3):
85 | mf_filename_numbers = ['_seed_'+str(args.seeds[0])+'_mf_run1','_seed_'+str(args.seeds[1])+'_mf_run2','_seed_'+str(args.seeds[2])+'_mf_run3']
86 |
87 | ######################
88 | ## load in data
89 | ######################
90 |
91 | #TRPO filenames to load in
92 | pathname_mbmf1 = trpo_dir + 'run_'+ str(run_num1)+'_std'+str(std_on_mlp_policy) + '_run' +str(mbmf_filename_numbers[0])
93 | pathname_mbmf2 = trpo_dir + 'run_'+ str(run_num2)+'_std'+str(std_on_mlp_policy) + '_run' +str(mbmf_filename_numbers[1])
94 | pathname_mbmf3 = trpo_dir + 'run_'+ str(run_num3)+'_std'+str(std_on_mlp_policy) + '_run' +str(mbmf_filename_numbers[2])
95 |
96 | #mf trpo runs
97 | pathname_mf1 = trpo_dir + 'agent_'+str(which_agent)+ mf_filename_numbers[0]
98 | pathname_mf2 = trpo_dir + 'agent_'+str(which_agent)+ mf_filename_numbers[1]
99 | pathname_mf3 = trpo_dir + 'agent_'+str(which_agent)+ mf_filename_numbers[2]
100 |
101 | #load in MB
102 | MB_list_num_datapoints_run1 = np.load('../run_'+ str(run_num1) + '/datapoints_MB.npy')
103 | MB_list_avg_rew_run1 = np.load('../run_'+ str(run_num1) + '/performance_MB.npy')
104 | MB_list_num_datapoints_run2 = np.load('../run_'+ str(run_num2) + '/datapoints_MB.npy')
105 | MB_list_avg_rew_run2 = np.load('../run_'+ str(run_num2) + '/performance_MB.npy')
106 | MB_list_num_datapoints_run3 = np.load('../run_'+ str(run_num3) + '/datapoints_MB.npy')
107 | MB_list_avg_rew_run3 = np.load('../run_'+ str(run_num3) + '/performance_MB.npy')
108 |
109 | #load in imitation
110 | imit_list_num_datapoints_run1 = np.load('../run_'+ str(run_num1) + '/datapoints_IMIT.npy')
111 | imit_list_avg_rew_run1 = np.load('../run_'+ str(run_num1) + '/performance_IMIT.npy')
112 | imit_list_num_datapoints_run2 = np.load('../run_'+ str(run_num2) + '/datapoints_IMIT.npy')
113 | imit_list_avg_rew_run2 = np.load('../run_'+ str(run_num2) + '/performance_IMIT.npy')
114 | imit_list_num_datapoints_run3 = np.load('../run_'+ str(run_num3) + '/datapoints_IMIT.npy')
115 | imit_list_avg_rew_run3 = np.load('../run_'+ str(run_num3) + '/performance_IMIT.npy')
116 |
117 | ######################
118 | ## MB
119 | ######################
120 |
121 | #performance
122 | mb_run1= MB_list_avg_rew_run1[:6]
123 | mb_run2= MB_list_avg_rew_run2[:6]
124 | mb_run3= MB_list_avg_rew_run3[:6]
125 |
126 | #datapoints
127 | mb_num_data = MB_list_num_datapoints_run1[:6]
128 |
129 | #mean and std of performance
130 | mb_y = np.array([mb_run1, mb_run2, mb_run3])
131 | mb_mean = mb_y.mean(axis=0)
132 | mb_std = mb_y.std(axis=0)
133 |
134 |
135 | ######################
136 | ## MBMF
137 | ######################
138 |
139 | if(args.dont_include_mbmfTRPO):
140 | #performance
141 | mbmf_run1 = np.concatenate([mb_run1, imit_list_avg_rew_run1])
142 | mbmf_run2 = np.concatenate([mb_run2, imit_list_avg_rew_run2])
143 | mbmf_run3 = np.concatenate([mb_run3, imit_list_avg_rew_run3])
144 |
145 | #datapoints
146 | mbmf_num_data = np.concatenate([mb_num_data, imit_list_num_datapoints_run1])
147 |
148 | #mean and std of performance
149 | mbmf_y = np.array([mbmf_run1, mbmf_run2, mbmf_run3])
150 | mbmf_mean = mbmf_y.mean(axis=0)
151 | mbmf_std = mbmf_y.std(axis=0)
152 | else:
153 | #performance
154 | mbmf_run1_orig = np.array(pandas.read_csv(pathname_mbmf1+'/progress.csv')['AverageReturn'])
155 | mbmf_run2_orig = np.array(pandas.read_csv(pathname_mbmf2+'/progress.csv')['AverageReturn'])
156 | mbmf_run3_orig = np.array(pandas.read_csv(pathname_mbmf3+'/progress.csv')['AverageReturn'])
157 |
158 | mbmf_cutoff= np.min([mbmf_run1_orig.shape, mbmf_run2_orig.shape, mbmf_run3_orig.shape]) #make them all the same (min) length
159 | mbmf_run1_orig = mbmf_run1_orig[:mbmf_cutoff]
160 | mbmf_run2_orig = mbmf_run2_orig[:mbmf_cutoff]
161 | mbmf_run3_orig = mbmf_run3_orig[:mbmf_cutoff]
162 |
163 | mbmf_run1 = np.concatenate([mb_run1, imit_list_avg_rew_run1, mbmf_run1_orig])
164 | mbmf_run2 = np.concatenate([mb_run2, imit_list_avg_rew_run2, mbmf_run2_orig])
165 | mbmf_run3 = np.concatenate([mb_run3, imit_list_avg_rew_run3, mbmf_run3_orig])
166 |
167 | #datapoints
168 | datapoints_used_thus_far = imit_list_num_datapoints_run1[-1]
169 | mbmf_num_data_orig = batchsize_TRPO_mbmf*np.arange(mbmf_run1_orig.shape[0]+1)[1:] + datapoints_used_thus_far
170 | mbmf_num_data = np.concatenate([mb_num_data, imit_list_num_datapoints_run1, mbmf_num_data_orig])
171 |
172 | #mean and std of performance
173 | mbmf_y = np.array([mbmf_run1, mbmf_run2, mbmf_run3])
174 | mbmf_mean = mbmf_y.mean(axis=0)
175 | mbmf_std = mbmf_y.std(axis=0)
176 |
177 | print("MB datapoints: ", mb_num_data)
178 | print("MBMF datapoints: ", imit_list_num_datapoints_run1)
179 |
180 | ######################
181 | ## MF
182 | ######################
183 |
184 | #performance
185 |
186 | mf_run1 = pandas.read_csv(pathname_mf1+'/progress.csv')['AverageReturn']
187 | mf_run2 = pandas.read_csv(pathname_mf2+'/progress.csv')['AverageReturn']
188 | mf_run3 = pandas.read_csv(pathname_mf3+'/progress.csv')['AverageReturn']
189 |
190 | mf_cutoff = np.min([mf_run1.shape, mf_run2.shape, mf_run3.shape]) #make them all the same (min) length
191 | mf_run1=mf_run1[:mf_cutoff]
192 | mf_run2=mf_run2[:mf_cutoff]
193 | mf_run3=mf_run3[:mf_cutoff]
194 |
195 | #datapoints
196 | mf_num_data = batchsize_TRPO_mf*np.arange(mf_run1.shape[0]+1)[1:]
197 |
198 | #mean and std of performance
199 | mf_y = np.array([mf_run1, mf_run2, mf_run3])
200 | mf_mean = mf_y.mean(axis=0)
201 | mf_std = mf_y.std(axis=0)
202 |
203 | ######################
204 | ## PLOT
205 | ######################
206 |
207 | fig, ax = plt.subplots(figsize=(7,3))
208 |
209 | if(mb_num_data.shape[0]==1):
210 | ax.plot([mb_num_data[0],mb_num_data[0]], [0, mb_mean[0]], linewidth=2, color='g', label='Mb')
211 | else:
212 | ax.plot(mb_num_data, mb_mean, color='g', label='Mb')
213 | ax.fill_between(mb_num_data, mb_mean - mb_std, mb_mean + mb_std, color='g', alpha=0.25)
214 |
215 | ax.plot(mf_num_data, mf_mean, color='b', label='Mf')
216 | ax.fill_between(mf_num_data, mf_mean - mf_std, mf_mean + mf_std, color='b', alpha=0.25)
217 |
218 | ax.plot(mbmf_num_data, mbmf_mean, color='r', label='Mb-Mf (ours)', linewidth=0.5)
219 | ax.fill_between(mbmf_num_data, mbmf_mean - mbmf_std, mbmf_mean + mbmf_std, color='r', alpha=0.25)
220 |
221 | ax.hlines(mf_mean.max(), np.min([mb_num_data[0],mf_num_data[0]]), mf_num_data[-1], color='k', linestyle='--')
222 |
223 | ax.semilogx()
224 | ax.grid(True,which="both",ls="-")
225 | ax.set_xlabel('Steps')
226 | ax.set_ylabel('Cumulative Reward')
227 | ax.set_title(agent_name)
228 |
229 | ax.legend(loc='lower right')
230 | fig.savefig(agent_name+'_comparison.png', dpi=200, bbox_inches='tight')
231 | plt.close(fig)
--------------------------------------------------------------------------------
/plotting/plot_trajfollow.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 11,
6 | "metadata": {
7 | "collapsed": false,
8 | "scrolled": false
9 | },
10 | "outputs": [
11 | {
12 | "name": "stdout",
13 | "output_type": "stream",
14 | "text": [
15 | "(40, 2)\n",
16 | "(1, 1701, 16)\n"
17 | ]
18 | },
19 | {
20 | "data": {
21 | "text/plain": [
22 | "[]"
23 | ]
24 | },
25 | "execution_count": 11,
26 | "metadata": {},
27 | "output_type": "execute_result"
28 | },
29 | {
30 | "data": {
31 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhcAAAFyCAYAAABGCPg8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3Xd8VFX6x/HPQ5WigKIg9gqsgpAoiiii2HXtirHXtYux\n/9aKvWNB1o64aNbeCy6isisgkNhAUVGsNAENgiAkOb8/nskmhPTcOyV836/XvGbmzrlzn1yG5Jlz\nz3mOhRAQERERiUqTVAcgIiIijYuSCxEREYmUkgsRERGJlJILERERiZSSCxEREYmUkgsRERGJlJIL\nERERiZSSCxEREYmUkgsRERGJlJILERERiZSSCxEREYmUkgsRERGJlJILERERiZSSCxGpNzNbw8y+\nSNxaltvewcxmm9l/zewUMysxs+0q2f/vZlZkZusnN3IRiZOSCxGptxDCMuBEYEvgxnIvDQfWTLz2\nDLAUOLaStzgGGBtCmB1zqCKSREouRKRBQgiTgNuAwWbWz8yOAAYBl4cQvgkhLAZeAnLK72dmvYG/\nAP9MdswiEi8LIaQ6BhHJcGbWHJiM91a0BaaGEAaWe30f4A1gzxDCu4ltdwBnAp1CCEuSH7WIxEXJ\nhYhEwsyy8QRjKfCXEML35V5rAvwIvBlCOM3MLPH8vRDCcSkJWERio8siIhKVfRP3awBblX8hhFAC\nPAUcbmYtgD2ALsCopEYoIkmhngsRaTAz6wlMwpOFXkBHoEcI4fdybXoAHwNHAfsDBwBdEomHiDQi\nSi5EpEHMrBmeWKwFbAdsjl8eGRVCOK1C24+BOcBOwIgQQm6SwxWRJNBlERFpqKuAnsApIYQlIYTP\ngOuAU8xsvwptnwD2xgd+PpncMEUkWdRzISL1lphOOhEYXr4XIjGAcwI+rmKbEMKixPZOwE/A1yGE\nv6QgZBFJgmapDkBEMlcI4SOgZSXbS4AdK9mlCAhoIKdIoxb7ZREzO8fMZprZUjObaGY71HK/fma2\nwswK4o5RRJLmZPz3jpILkUYs1uTCzAYBdwLXAL2BT4DRZtaxhv3aASOBMXHGJyLJYWa7m9m5wN+B\nF0MIP6Q6JhGJT6xjLsxsIvBhCGFw4nlp4Zx7Qwi3VbNfHvAVUAIcHELIii1IEYmdmb0L9AX+Cxyv\ntUREGrfYei4S5YCzgXdKtwXPZMbgv2Sq2u9kYDNgSFyxiUhyhRB2DyGsEULYU4mFSOMX54DOjkBT\nYG6F7XOBrpXtYGZbATcBu4QQSryjo3pmtg6wD/AdsKwB8YqIiKxu1gA2BUaHEBZE9aZpM1skMXXt\nSeCaEMI3pZtrses+aL68iIhIQxyLl+iPRJzJxXygGOhUYXsnvEJfRWsC2wO9zOz+xLYm+FCN5cDe\nIYT3KtnvO4BRo0bRvXv3CMKW2sjNzWXo0KGpDmO1onOefDrnyadznlxffPEFxx13HCT+lkYltuQi\nhLDCzPKBgcAr8L8BnQOBeyvZZRGwbYVt5wC7A4dT9Q++DKB79+5kZWncZ7K0a9dO5zvJdM6TT+c8\n+XTOUybSYQVxXxa5C3g8kWRMAnKB1sDjAGZ2M75w0YmJwZ6fl9/ZzOYBy0IIX8Qcp4iIiEQk1uQi\nhPBMoqbFdfjlkI+BfUIIvySadAY2ijMGERERSa7YB3SGEIYDw6t47eQa9h2CpqSKiIhkFK2KKvWS\nk5OT6hBWOzrnyadznnw6541Dxq+KamZZQH5+fr4GAYmIiNRBQUEB2dnZANkhhMjW8lLPhYiIiERK\nyYWIiIhESsmFiIiIRErJhYiIiERKyYWIiIhESsmFiIiIRErJhYiIiERKyYWIiIhESsmFiIiIRErJ\nhYiISBwyvAJ2Qyi5EBERido330DfvjBjRqojSYnYV0UVERFZrSxdCkccAYsXw7rrpjqalFByISIi\nEqXBg2H6dJg4Edq1S3U0KaHkQkREJCojR8LDD8Ojj8J226U6mpTRmAsREZEofPYZnHUWnHwynHJK\nqqNJKSUXIiIiUfjpJ8jOhmHDUh1Jyim5EBERicJ++8G4cdC6daojSTklFyIiIlExS3UEaUHJhYiI\niERKyYWIiIhESsmFiIiIRErJhYiIiERKyYWIiEhdffstPPssFBenOpK0pORCRESkrkaMgL/9DZYv\nT3UkaUnJhYiISF09/zwcdBC0apXqSNKSkgsREZG6+OILvx1+eKojSVtKLkREROrihRegbVvYe+9U\nR5K2lFyIiIjUxYQJsMsusMYaqY4kbSm5EBERqYtPP12tl1OvDSUXIiIitfXbb/Djj9CzZ6ojSWtK\nLkRERGrr119hjz2gd+9UR5LWYk8uzOwcM5tpZkvNbKKZ7VBN235m9l8zm29mf5jZF2Z2QdwxioiI\n1Mpmm8E770D37qmOJK01i/PNzWwQcCfwN2ASkAuMNrOtQwjzK9llCXAf8Gni8S7AQ2a2OITwSJyx\nioiIJMXvv8OUKfDJJ355ZY89Uh1R5GJNLvBk4sEQwhMAZnYmcABwCnBbxcYhhI+Bj8ttesrMDgd2\nBZRciIhI5lmwAD74AN5/H8aNg4ICKCmBpk1h1KhURxeL2JILM2sOZAM3lW4LIQQzGwP0reV79E60\nvSKWIEVEROJ0xBFezRNgo41gt928bPjOO0PXrp5gNEJx9lx0BJoCcytsnwt0rW5HM/sRWDex/7Uh\nhBGxRCgiIhKnY46BQw7xuhibbAJmqY4oKeK+LFJfuwBtgZ2AW81sRgjh6RTHJCIiUjeHHZbqCFIi\nzuRiPlAMdKqwvRMwp7odQwjfJx5OM7POwLVAtclFbm4u7dq1W2lbTk4OOTk5dQhZRESkccrLyyMv\nL2+lbYWFhbEcy0IIsbwxgJlNBD4MIQxOPDfgB+DeEMLttXyPq4GTQgibV/F6FpCfn59PVlZWRJGL\niIhUUFTklzUa0TiJgoICsrOzAbJDCAVRvW/cdS7uAk43sxPMrBvwANAaeBzAzG42s5Gljc3sbDM7\n0My2TNxOBS4C/hlznCIiItV78UVo3tyrdJZavhxGj677e61YAddc49U+G6FYk4sQwjPAxcB1wEdA\nT2CfEMIviSadgY0qxHNzou1k4CzgkhDCNXHGKSIiUqMWLSAEyMuDP/+EOXPgyCPh4IPhp5/q9l7v\nvAPXXQcLF8YTa4rFPqAzhDAcGF7FaydXeD4MGBZ3TCIiInWWnQ0dO8LZZ8PgwVBcDO3bw3PPwYYb\n1u29nnkGtt660a5Rkq6zRURERNLLhhvCL7/A55/D2LHQqpXPBunQoW7v8/vvnpDk5jbaqalKLkRE\nROriL3/xW32NGgVLlsBpp0UXU5rRqqgiIiLJEgIMG+aFtTbaqOb2GUrJhYiISLK8955fVjn33FRH\nEislFyIiIskybJhfUhkwINWRxEpjLkRERJLl4oth8eJGO5CzlJILERGRZOlbq0XBM54ui4iIiEik\nlFyIiIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRciIiIxGnFilRHkHRKLkREROKyfLmvpPrUU6mO\nJKmUXIiIiMSloAAWLYKttkp1JEml5EJERCQur74KbdpAr16pjiSplFyIiIhEraQEbr0V7rwTzjoL\nmjdPdURJpeRCREQkao89BpdfDscfD9dck+pokk5ri4iIiETtnnvgsMPg4YdTHUlKqOdCREQkSrNn\nw9SpMGhQqiNJGfVciIiIRKl9ex/IuZqsgFoZJRciIiJRatUKDjww1VGklC6LiIiISKSUXIiIiMSl\npARmzEh1FEmn5EJERCQu114L/fp5GfDViJILERGRuBx+OMybB++9l+pIkkrJhYiISFx69oRNN4WX\nXkp1JEml5EJERCQuZnDIIfDKKz7+YjWh5EJERKQhQoATT4QnnoDzzoPJk1d+/eCD4eefIT8/NfGl\ngJILERGRhrj+ek8sZs2CYcNg+PCVX99lF1+47JZbPBFZDSi5EBERqa9nn/WFya6/HoqLfduECWWP\nAZo1g7XXhhdegGXLUhNnkqlCp4iISH1MmeKXQ3Jy4MILoU0b3/7669C06cptCwq8JHirVsmPMwWU\nXIiIiNTVrFk+lqJHD3j0Ubj6at9+zDGwxRartu/SBc44I7kxppAui4iIiNRFSQkcf7w/fuklWLwY\n7rjDn991V+riSiOxJxdmdo6ZzTSzpWY20cx2qKbtoWb2tpnNM7NCMxtvZnvHHaOIiEitffCBF8Ua\nORLWXx9GjPDt3btDp04pDS1dxJpcmNkg4E7gGqA38Akw2sw6VrFLf+BtYD8gC3gXeNXMtoszThER\nkVrbdVf46ivYc09//sQTfj9oUOpiSjNx91zkAg+GEJ4IIUwHzgT+AE6prHEIITeEcEcIIT+E8E0I\n4Qrga+CvMccpIiJSe+XHVZx3nt8fckhqYklDsQ3oNLPmQDZwU+m2EEIwszFA31q+hwFrAgtjCVJE\nRKQ+QvDqm+AzRrp0ge3UyV4qzp6LjkBTYG6F7XOBzrV8j0uANsAzEcYlIiJSfyUlvl7IqFH+fI01\n4K/qYC8vbaeimtkxwFXAQSGE+TW1z83NpV27ditty8nJIScnJ6YIRURktTR1KvzwA2y0UaojqZO8\nvDzy8vJW2lZYWBjLseJMLuYDxUDFobOdgDnV7WhmRwMPAUeEEN6tzcGGDh1KVlZWfeIUERGpvXHj\noEUL6NMn1ZHUSWVfuAsKCsjOzo78WLFdFgkhrADygYGl2xJjKAYC46vaz8xygEeBo0MIb8UVn4iI\nSL2MG+eJxWpSbbM+4p4tchdwupmdYGbdgAeA1sDjAGZ2s5mNLG2cuBQyErgImGxmnRK3tWKOU0RE\npGpjx8K0aT6Qc9w4vyTSrRt89FGqI0tLsSYXIYRngIuB64CPgJ7APiGEXxJNOgPlL1qdjg8CvR+Y\nVe52d5xxioiIVOu887wK55w5MHeuT0X98stUR5W2Yh/QGUIYDgyv4rWTKzzfPe54RERE6uTnn+Hz\nz2HIEB/ICWWXRNZbL3VxpTGtLSIiIlKd0ksfO+wACxb446Iir3Ohct+VUnIhIiJSnYkTvYdi440h\nOxtefNGTi3XXhWZpW9EhpZRciIiIVGfCBOjbt6yn4pBDYP58X7RMKqXkQkREpCrFxTBpEuy008rb\nZ8/2kt9SKSUXIiIiVZk2DRYv9p6L8mbNUs9FNXSxSEREpCrz5sHmm8P226+8/eyzNZizGkouRERE\nqrLnnvDNN6tuP/HE5MeSQXRZREREpDZ+/tmLaEmNlFyIiIjUxp13+jiLf/871ZGkPSUXIiIitTF0\nqN8fdVRq48gASi5ERERqUv5yyG+/pS6ODKHkQkREpCZjx5Y97tUrdXFkCCUXIiIiFU2YAK+/Xvb8\njTfKHnfunPx4MoySCxERkYqGDIGLLvLHl10GTz4Ja6zhzydMgBBSF1sGUHIhIiJS0ejR8OWXsGIF\n3Habbxs3zu8LC32dEamSimiJiIiUt2yZ37doAe++W7Y9OxuaN4ett05NXBlEPRciIiLlPfSQ33fo\nAE8/DVtsAUuXQpMmMHUqfPhhauPLAEouREREyrvgAr8/6SR44QUYNKhsvMXWW0ObNikLLVMouRAR\nESn18MNlgzULC72mxaBBqY0pAym5EBERKfW3v5U9/vhj6NYNevRIXTwZSsmFiIhIZb780nstNDOk\nzjRbREREBOCHH1Z+npcHffqkJpYMp+RCREQEVi7xPWIE7LNP6mLJcLosIiIiAtCzZ9njQw5JXRyN\ngHouREREALKy4Ndfvbx3+/apjiajqedCRERWL88/D3feWfn6IO3bw377JT+mRkbJhYiIrD6WLYOz\nzvLeCYmNkgsREVl9PP00/PIL3HyzppjGSMmFiIhkvuXLoaio+jYhwH33wb77wlZbJSeu1ZSSCxER\nyXwtW8Iuu1Tf5q23ID8fBg9OTkyrMc0WERGRzLRokScLpauUrrVW5e2mTYM33oAHH4T+/VW/IgmU\nXIiISOZZuBC6d4d588q2Pfts5W2//BIuvRQ23xyGDdNYiyRQciEiIpln7bXh1luhpAROPRWOPhra\ntau87WGH+ZiMZs2UWCRJ7GMuzOwcM5tpZkvNbKKZ7VBN285m9qSZfWlmxWZ2V9zxiYhIhjrpJJ/1\nAXD99dW3bd5ciUUSxZpcmNkg4E7gGqA38Akw2sw6VrFLS2AecD3wcZyxiYhIhgsBZszwx1tumdpY\nZCVx91zkAg+GEJ4IIUwHzgT+AE6prHEI4fsQQm4IYRSwKObYREQkE/36KxQUwHvv+fP9909pOLKq\n2JILM2sOZAPvlG4LIQRgDNA3ruOKiEgjN2QIDBgA77/vlzrOOAO22AIKC70347ffYOnSVEe5Wouz\n56Ij0BSYW2H7XKBzjMcVEZHG6tNPfcbHFVfAtdfC/Pk+BfXbb+GUU7w4VocO0LYt3H13qqNdbTWa\n2SK5ubm0qzBSOCcnh5ycnBRFJCIikfj2W3juOe+luOUW2HRTuOACf23ttWHddf3xCy/AoEGw667e\ng3HJJXDmmbDGGikLPZ3k5eWRl5e30rbCwsJYjhVncjEfKAY6VdjeCZgT9cGGDh1KVlZW1G8rIiLJ\nMm0aXHYZzJoFe+8N554LG24Ixx8P48eXtevTB5o2LXu+4YZ+f/fdMHmyJyKPPAIjR8LPP/slE6n0\nC3dBQQHZ2dmRHyu2yyIhhBVAPjCwdJuZWeL5+Kr2ExGR1VBxsScRn38OPXrAP/4Bm23mvRWlicX6\n60ObNr594429LXh9i2239d6Mf/0LHnoIttnG961sWXWJXdyzRe4CTjezE8ysG/AA0Bp4HMDMbjaz\nkeV3MLPtzKwX0BZYN/G8e8xxiohIKi1eDHvt5cnByJHw449wwgkrt5k928dcTJ3qj7fd1i+TAAwf\nDrm5PoMkJ8dfe/VVTVFNEQsxZ3VmdjZwKX455GPgvBDClMRrI4BNQgh7lGtfAlQM6vsQwuZVvH8W\nkJ+fn6/LIiIijcXnn3vvA8A668CCBZW323ln+M9/oEkT+OMP79m45x44//zkxZrByl0WyQ4hFET1\nvrFX6AwhDA8hbBpCaBVC6FuaWCReO7l8YpHY1iSE0LTCrdLEQkREGqHp02GPxJ+Gpk3hzz9hgw1W\nbvPSS35/3XWeWAB88IHfb7xxcuKUKmnJdRERSR8zZsDuu/sMkFdegY4dvTdi2rSyolkDBsCcOZ54\n7LRT2b5vvQVdusDBB6cicimn0UxFFRGRDLd4MRx0kNeteOcdaN3aa1ZceqkP2lyUKNzcrJkP8uzV\nyxOPUq++CgceqDVE0oCSCxERSQ+tW8Nxx8Hhh8N66/m2Tz8tm3a6ZIknDi+/DD17rlz2+8sv4euv\n4S6td5kOdFlERETSQ5Mm8Pe/Q9euZduaNy8bU3H00b7E+u+/wzffQL9+Ze1eeQVatYKBA5HUU3Ih\nIiKZ5cMP/b5vuWWqXn0V9tzTEwxJOSUXIiKS3kpKysZbgK+Iuu66sNFG/ryoyBcr++tfUxOfrELJ\nhYiIpLenn/ZiWPPm+fPWrX3gZ+nAzWbN4KOPVi26JSmjAZ0iIpK+QoA77oDs7LJBnpdeumq7O+7w\nFVEPOyy58Uml1HMhIiLpIy/PVzYtrR49aZJfBjn33LI2N9zgy66X+v57Hwj6yy/JjVWqpJ4LERFJ\nH8cc4/ctWsD8+V77ols32HffsjZjxnjFztKE44EHoG1bOPbY5McrlVJyISIi6aGwsOzxqFF+v+aa\n8PzzKy+xvnChr5wKnoDcfz+ccYYnGJIWlFyIiEh6KC3vfeaZsOOOXsa7WTNPMEqVlHjp73XW8ceX\nXOKXUCobhyEpo+RCRETSQ+nCY+efD927V97mhRd8bEWfPnDSSd7D8dhjPjVV0oaSCxERSa4Q4LXX\n4JNP4JBDYNttffvgwb7wWLdule+3ZAkceaQ/PuAAX1dk1KiycRqSNpRciIhIcl13HVx7rScHG2xQ\nllxssAFccEHl+/z0U1nRrMGDfYDn1lvD5psnJWSpGyUXIiKSPB984InFkCFwxRU+bqImX3wBe+xR\n9vyuu8rWG5G0pORCRESSo6QEcnO9INaVV3qCUH4WSGWKirzyZrt2MG6cT1FVYpH2lFyIiEhyvPQS\nTJ7ss0JqmyDceacX0Ro/3itwSkZQciEiIsmRlwfbbw+77VZz2yVLYNo0GD7cezt23DH++CQySi5E\nRCR+IfhS6aefXnPb33/32SDffguffQYtW8Yfn0RKyYWIiMTPDGbOhGXLqm83fTocfzx89RW89RZ0\n6JCc+CRSGhUjIiLJ0bSpTz8t788/veLmihVw223Qq5eXAR87Fvr2TU2c0mDquRARkdR56y049FBf\nF2TJErjwQq+D0apVqiOTBlByISIiqdO/PzzyCMye7dU6t9km1RFJBJRciIhI6nToAKeckuooJGIa\ncyEiIiKRUnIhIiLp4euvYc894ccfUx2JNJCSCxERSQ+5uTBjBnTsmOpIpIE05kJEROpvyhR4+WVY\nbz3o2RN69IC1167dviF4SfC334YHHvBtDz+smSKNgJILERGpv4ICn+2xcCEsX+7bNtgAttvO61Qc\nckjZkurlFRfDaafB44+vvGz6SSclI2qJmS6LiIhI/Z12mk8jXbwYpk719UNOOMF7JW6/3WeChLDy\nPr/95vs98YTfLrjAtxcUQDN9520M9K8oIiL1V7q6afPmXqNim23g6KN92/Ll8OuvXvq71Gef+eUT\ngJEjvWdjq628x6J376SGLvFRciEiIvFo0QI6dVp52x9/lD0+4QS4/HIv933ttUkNTeIV+2URMzvH\nzGaa2VIzm2hmO9TQfoCZ5ZvZMjP7ysxOjDtGERFJgjlzvLw3+IyQFSu8V+PKK2GTTVIbm0Qq1uTC\nzAYBdwLXAL2BT4DRZlbpPCMz2xR4DXgH2A64B3jEzPaKM04REYnZlCmwww6+Muqjj8KCBX5/881w\nxRWpjk4iFnfPRS7wYAjhiRDCdOBM4A+gqlqvZwHfhhAuDSF8GUK4H3gu8T4iIpKJnngCdtkFunSB\nyZN9kOd++8FZZ8GLL6Y6OolBbMmFmTUHsvFeCABCCAEYA1S1ju5OidfLG11NexERSbZly1adAVKZ\noiIvjHXiiXDMMfD++z5NFeDee/3+lVfii1NSJs4BnR2BpsDcCtvnAl2r2KdzFe3XMrOWIYQ/ow1R\nRETq7JhjfCbIa69V3WbBAjjqKE8o7rsPzjln5VkjW2wB8+b5oE9pdDRbRERE6qZpUx+MWZ0Q4Pff\n4c03Ya8qhs2tu270sUlaiDO5mA8UAxXmIdEJmFPFPnOqaL+opl6L3Nxc2rVrt9K2nJwccnJyah2w\niIjUQtu2MGtW9W06dvTiWKeeCs8+68WxNtxw1ampkjR5eXnk5eWttK2wsDCWY8WWXIQQVphZPjAQ\neAXAzCzx/N4qdpsA7Fdh296J7dUaOnQoWVlZ9Q9YRERqp21b75Wozrx5nljsvjv88INfItl9dxg7\nNjkxyioq+8JdUFBAdnZ25MeK+7LIXcDjiSRjEj7rozXwOICZ3Qx0CSGU1rJ4ADjHzG4FHsMTkSOA\n/WOOU0REamvNNWHRourbvPuuD/x89FGfJQI+7VRWC7FORQ0hPANcDFwHfAT0BPYJIfySaNIZ2Khc\n+++AA4A9gY/xZOTUEELFGSQiIpIqW2wB338PY8b4AmSV+c9/YOutfR2RUjtUW0NRGpHYB3SGEIYD\nw6t47eRKto3Dp7CKiEg6GjDA1xLZay947jk4/PBV24wbB/37l10+2XPPsnVIpNHTbBEREambLbbw\nBci++86ThoqWLYNp0+D882H77eHGG+GII5IX39SpsNZasPHGyTumrETJhYiI1F3Xrn6rzPTpUFLi\nK6Q2aQJ//3vy4lqxAnr08MezZsH66yfv2PI/6qMSEZFo/fYbbLYZdO+e/GN/9FHZ4zffTP7xBVBy\nISIiURswAL79Ftq3r9/+EyfCeefVrsR4RVOm+P3558NJJ9Xv+NJguiwiIiLpY9Ei6JtYTurAA2H8\neLjmmtoNBl20yAeYAlx1lQaQppCSCxERSR9PPln2eN99/X7QIPjLX2re96GHvOfixRe9QqikjNI6\nERFJH6ee6tNYH3gAevXyQaEdOtS8XwheObR1a5g0yRdWk5RRciEiIumjRQvYdVc44wwfnDl1as0z\nPpYv92XdzzoLttsO7rzTy43XtLiaxEbJhYiIZK7CQth/f3j6aXjqKRg92i+LbLaZL5YmKaEzLyIi\n0VuwAH76yXsS4lJYCLvt5qXI337bH4MnG/trSapUUs+FiIhEa9YsHyvRqxe8/3607/3xx/DHH/64\nRQvYZRdfx2SXXeCmm7z3QlJOyYWIiETnootggw2gaVMvv33RRau2mTfPp4p+8knd3nvaNK+hcdVV\n/rxVKxg2DLbdFoYOhSuugOuvb/CPIA2nyyIiIhKdddf1+zfe8MGYxx3nl0jWWaesTcuWcNttXsPi\nnXdq974//AD77QebbOJ1L8qbPh2uvNIf12bKqsROPRciIhKdiy6CX37xsRa77lp5Iat27SAnBxYv\n9ufff+9Ltz/4YNkqquV9+insvLP3hrz+ui9KVt6WW/riaK1bey+GpJySCxERiU7z5mUFrDbe2Hsn\nyvdalCoo8ASkpAT22cdndpx55srrgcyfD7fcAjvu6D0i48fDhhuu+l7NmsFf/+pjMXbeOZ6fS+pE\nyYWIiMRnxx1X3bZ4sY+f6NMH7r4bvvzSt192mdenAJ9S2rmzXwI59VRPLNZf35OS0hLf5Y0Z40nG\nTjvF97NIrWnMhYiIJFdBgfdYvPBCWU/FEUf4bI9SO+wA998Phx1WNo7jjz9g773hkktWfc+TTvKe\nkHbtYg9faqbkQkREkqe4GE45xR+XJhY77ghPPLHy+Iy11/YqneU99JAv517au1Fe69bQr188MUud\n6bKIiIgkR1ER9O4N33xTtq1rV3jlFZ9WWp2FC+HCC+HYY736pqS1RtVz8ccfPiNJRETSQ9NFv7LR\nLecw65wbWd5lUzbr1I2SjbZjnTdGAfDpsHEU/bQe/FT5/t26eacEBx/si5O1aJG84KXeGlVy8fPI\nMbx89gc5/ofvAAAeG0lEQVRcxzU1NxYRkdhdzCPcwPMMHD2UeRjwDGuyiEWM4t/syd57rVvt/vn5\nkPX4+fDf//qGq6+OP2hpsEaVXGwyazxXrvMAB7+t5EJEJOWKitjm4GEs3j6HN4d0+t/mLvfeRMm/\nWtH5xRHkd7JV91uxgjZfTKHtlHfp/LeXIH+yb58922eQSNprVMlFC1ZAmxZkZaU6EhER4fmXYc4P\ntLxuMOv0TmybORPyhsLll9Njv0pqVhQVwbrr+cDNNdcsK6o1Y4YnFsXFcN11Xvlzq62S9qNI3TSq\n5ILly72Ai4iIpN5ll3mVzt69y7ZdfrkX1br00sr3adYMbr8devSA7GzfVlAAW2zhRbVycmDsWB/U\nqeQibTW+5EKDfUREUu/ZZ31WSN++Zds++ACeeQZGjIA2bare97TTVn7eo4fPKDnvPB+5//bbMHBg\nPHFLJBrXVNQVK5RciDRWIfhNMsPkxDiJ++7z+5ISn0qalQUnnFC79/jwQzjxRO/pOPhg6NLFR3gq\nsUh7jSu50GURkcxQVOQFlJ56qnbtX37Z16nYfHNfbVNJRvq74QYoLIT27f35nDn+BfCuuypfzKyi\nEDyhePllnyHy+edeAnzjjeONWyLRuC6LqOdCJL2FAC+95N9gv/vO/3gcc0zV7QsL4fTTvUz0fvvB\nsmVwwAFeGvrUU+GQQ6BTp6r3l9Rp0WLl38elvQ5//lm7/c2896NNG6/WKRmlcfVcbLKJltsVSVez\nZ/vKlYcdBt27w6RJ8OKL1e8zb54nIVdc4W3ffhveestnEZx9ts8gkMwxZw706gULFlT++kUXwd/+\nVvZ8o42UWGQoCxnevWhmWUB+fn4+WZqDKo1ZSQnceivceacvzjR0qP+xtkrqBKSbF1/0HohmzeCB\nB7zHoqFxL14MLVtWfyn0wQdhgw3gwAMbdixpuBB8afXPPoNPPy1bjKzUokVli45l+N+lTFJQUEC2\nz8rJDiEURPW+javnQqSxWrTILwFccYVPxdt6a/8Dvc02cM89/nqcXnnFr3nXx5Ah3lvRvz9Mneo/\nRxQJUdu21ScWIXhPx1//6tf5JbVeeAH+/W+fKVIxsQD/bJRaujR5cUkslFyIpNrChf5Lt7pvawsX\n+h/3117z0fdvvAFjxvhlwIsv9kFvdfHnn15O+YYbYK+9fHpfVULwdj16+GqW5f8I1MaAAb509vPP\nQ8eOddu3Icx8OuTll3t3++23J+/YsrKiIk+M993Xb5UpLCx7PGdOcuKS2DSuAZ0imeS777zI0Esv\n+Uyn+fN9yl1lNt3UV+Vrlvgva+bT8QYOhJ9/hvXXr/5Ykyf7ctVTp3qy8PHHnmCstZb/8V+wILE6\nVCXM4D//gX/8w/9AjxjhtQv228+7ubOzoWnTqo+9225+S4UmTeCmm/y8XXqp/+xVFW+S+Dz+OHz5\nJeTlVd2mTRsfj3HuuVr1tBGIbcyFmXUAhgEHAiXA88DgEMKSavY5FDgTyAbWBnqFED6t4TgacyGZ\n58kn4cwzoUMHuOACOOggn2ZZmyl6dfXrrz7uYL31PJEoKfFaA7vuCtttV5aw1MaKFfDcc94j8M47\nfjlm7bXhq6+qTozSQQhw7bVeNvrGG+H//q92l2a++QYmTPBzduSRNS8LLqtautQrae66a/XJhaRE\nXGMu4uy5eAroBAwEWgCPAw8Cx1WzTxvgP8DTwMMxxiaSGiHAsGFw/vm+NsL993vvQZw6dPD6AD16\nVN/DUBvNm/uYj5wcTzQmTfIko0OHaGKNi5mP/WjWzLvnFyzwgbFVefNNH8syenTZtqFDYdw4n6ki\ntXfhhd67dv31qY5EkiiW5MLMugH74JnQR4lt5wGvm9nFIYRKL6iFEEYl2m4CZMAQeMkoTz3lvQO9\ne/ssg2T7+muv6TBlCgwe7H+skjXTo1ev6N+zeXPo189vmeKqq3wwYU29NRMn+jiXf/7TB4R+951/\n8+7fH44/3v9gSu107w633AJbbll9u6KiuvWiSVqL5bKImZ0M3BFCWKfctqbAMuCIEEK1o88SycVM\ndFlEotS8uf8Ca9nSLwvstJPPuthkEx/TsPnm1Scdzz8P//qXtxswwP/YtG1b++PPnevTMc8/H/bc\ns6E/jSTbpEl+WeWHH+CTTzJjCnCmmDHDk9RTT/UxMpI0mXZZpDMwr/yGEEKxmS1MvBaP3Xf3X9pX\nXBHbISSD/fYbTJvm30onTPAZFt9/70s4A7z3XvUDD4uL/T1GjYLbbvNvWX36eILSqpUX/Pm//6t6\n/06dfEqnZKY+fXy2jkTvttu8YNott8All6T/ZTapUZ2SCzO7GbismiYB6N6giBrixx+9sI6sfkLw\nQXfVjSlo08b/QPTp470H4D0Zs2Z5t3dNlw6OOspvIfgAxrFjPSGZPh2WLKl6ip2IVG3hQk/YL7sM\nzjhDiUUjUdeeizuAETW0+RaYA6xXfmPissjaidcil5ubS7uff/au62nTAMjJySEnJyeOw0m6CAFe\nfdVnApxxht/qolkzXwipLoshmUHXrn4766y6HU8at5kz/TO5+eapjiT9zZjhS7CPH++9grm5Wicm\nZnl5eeRVmLFTWL6+SITqlFyEEBYAVRSFL2NmE4D2Zta7dEAnPmvEgA9re7i6xDZ06FCyDjjAB1td\ndVVddpVMFIJ3UV9zDXz0kV/O6NEj1VHJ6u7mm+HRR73+yK67+m3HHTWFtTI33eQ1U5o0gTvuUGKR\nBJV94S435iJSsVToDCFMB0YDD5vZDmbWD7gPyCs/U8TMppvZweWedzCz7YBt8ESkm5ltZ2a1+9QV\nFzd8qp2kvx9/9BH8Bx3k0zjffdcvT+y8c6ojk9Xd3Xf7VOPmzX2q6+67+3oZhx7q01u1ZkaZ++/3\nKcGzZnmvhTQqcZb/PgaYDowBXgPGARX7rLcC2pV7fhDwEfAq3nORBxRUsl/lmjTx6+7SOJWU+KJX\n22zjvRUvveRJxYABqY5MxLVu7ZfKXn/dxxJ88okPVpw5E/bf33s1xLVq5QXY1GPRKMU2qTiE8BvV\nF8wihNC0wvORwMh6H1TJReO2bJmXnz76aL9v167mfURSpUkT6NnTb4MHw9NPa3VWWW00roolSi4a\nt9atfU0MVUiUTGPmSbHIaqJxrYp61VW+wqM0XkosRBouBB982rMn/PRTqqORRqhxJRdnnJFZpYhF\nROI2e7aPbSi/lsrrr8Pf/+5F5C6/PLnx/PKLz+x64YXkHleSqnElF5LZlizx1TZ1aUskOq1awQ47\nwA03eC/FLbf4KrkffOAr8r75Zrz/56ZO9aniP//sM/qOOMK3/fFHfMeUlFNyIalTVOSj6R9+GE4+\nGdZfHwYN8sqXIhKN9u3hvvugsLCsRP2hh/o07v79fVbLZ5/Fd/yrr/bp4+uu64/HjYMNN4Rddonv\nmJJySi4kuYqLvZpmv37+y61XLzjzTMjP929R336rRb1k9VFcDA8+6DOh4hKC194oX2Pj++/90sTG\nG/tife++G8+xx4+HF1/0//OPP+6Fs9Zay3tQdtlFyzU0YkouJLmaNoX33/dvLjfc4N9iFi2CTz/1\nFSc33TTVEYokzzff+Do3cS62eNNN3nNxwAGrvrblltC3bzzJRUmJT8Ht3dvX9TnrLDjnHP+/Dn6Z\n5Lbboj+upIXGNRVV6mbqVHjmGb9fsMDrRuy+e/XV8ubP90WGpk3zgVnNmnk1wmbNym5DhkCXLlW/\nx9ixWq5aBHxF3Ztvhosu8iJbAwdGf4yTToLNNvP/36+/7tsOOMAff/+9z7B7//3oj3v//TBlCgwd\nCscdB4cfDvfc418wBg6Ed97xMVbXXRf9sSXllFysjsaPhyuv9G8r7dv7KqEbbAC//+7jIKrzxx8+\nuvwvf/ExEsuW+T4rVvh9URH8+Wf176HEQqTMBRf4H/oTT/SxD1GvCrrBBnDkkd5DeMklvo5H167+\nf7ZLFx+D8fe/R3e86dOhe2Jx7MGDPamYORNuvbVseYZbbvFei++/j+64klYaV3Ixf75/C1c56KqN\nGuWLu/XsCc8952t0tGhR+/032shndWgNF5FoNGkCI0f6GIizzoK8vGgT8FdfhdNP99+PN97oPRlP\nPAFff123//u1EULZFNObb/ZkpmlT77H46SdYbz0/5vbbe6+pNFqNa8zF6697t76mOFXt4IPhqad8\nbY7DD6/7LxczJRYiUdtwQ/jHP7xE+FNPRfe+//ynL/DXp48PqtxiCx/rdOmlsMYa0R2n1FtvlY0f\nmT7dk4sQ4LLL/IvJjTdGf0xJS42r52Kzzfz+u++8215WteaaUGHJXRFJA0cfDa+9Bqed5jM4jjii\nYe/33//6FO9TT/Xp3uV7Qy65pGHvXZU33vD7bbbxXuRDDoEJE8oGbm63Xe3f65dfvHfj9NOj72GR\n2DWunovS5GLmzNTGISJSHw8/DH/7G2y7bcPep6jIL7Fsv72vJBzVZZaiIq+EXJk5c+CRR7yHZOpU\nH8x55ZVerAs8hoMPrv2xbrnFx4IsWtTgsCX5Gldy0aWLz1xYnZOLoiKYOzfVUYhIfbRq5eMTunVr\n2PuMHOkzuu6/32dwReXjj+Ghh/ySR0UtWvhMs8GDy7Z9+61fggGvc1HVJdXly+HLLz1BAf8ddtdd\nfpm7Y8fo4pekaVzJRdOm3nvx9depjiRev//ugzLHjSub3bFkiXch9ugBxx6b2vhEJF5HHOGXO954\no/IZXk2aeA9Idna0x5061e8rq0fToYNPp23f3p9/8YWP8QC/PNKvn4+HKyryGWflvwReeKEnVOuv\n71NjO3f27YcfHm38kjSNK7kAyMry7rhM9fvvPtiyOt99B5Mne73+ddaBnXf27H7QIJ92dscdSQlV\nRFIgBB87NX6816vYaisfCFq+AufJJ/vlkKgtWeK9wxUHg4bg9Sp23dWr7c6YsfK4t5de8oJdbdr4\n/rfeunJ9i/79yx6PGVP2WF+UMlbjSy522MH/OC9fnupI6mbSJDjqKF+9cP/9V/5FUVGPHvD5534t\n85JLyqpdfvml/8fs1St5cYtIcpnBiBH+OyA/36eVH3203+bPr/8iZFOn+u+T6urUlJT48f/5z7Lk\npbDQB6Bee63PBnn3Xa+jAXD99f77+JNPvGjWP/9Z9l577eX3xcX+xaiiESO8B0YyUwgho29AFhDy\n8/NDCCGEqVNDWGONED78MKS9FStCePHFEHbdNQQIYcstQxg6NIQZM1IdmYhkkmefDaFNG/89cvzx\n9XuPJ58MoUWLEPbYI4TCwsrbvPyyHwNCOOUU/z3bqlXZthBCmDgxhPvvD+GPP1bdf/Zsb3/99WXb\n/vGPsv3nzQvhpptCaNYshD//rN/PIXWSn58fgABkhQj/Nluo7htyBjCzLCA/Pz+frKws37hokS+O\nk66WL/epWQ8+6IVl+vb1bwwHHaQaEiJSP3PmwL//7b0IRx5Zvxki48b576HNNvPxHOuv79sffdSn\ntk6fDhMnVr3/iBFepKsqF1zgAztnziyrRDp3ro+xGD7cZ7hIUhUUFJDtY3OyQwgFUb1v46pzUaqm\nxGL4cPjqK8+Vi4tXvhUV+Wp9p58eX3zNm8PLL8M++/h/pqgHXYnI6qdzZx/o3RD9+8N//gP77Qc7\n7eRfgPbe2+8nT/bBmldcUVYMq0MH+PVXaN3aB2tOmlR1cvHTT34p5YorVi5x3qnTyr+DFy705dkl\nozXO5KIm06eXLZ7VtOmqt9LrhVVZsQJ+/NGz+4rfDlas8LEP1c1TN4MPP9T1RBFJPz16+GDRk0/2\nJKNly7JxGG+84T2tv//uY8T69fMprw8+6CuvbrVV1e/78MOeSJSfqlqqSRP/stejh88y+eUXTUHN\ncKtncnHvvQ3b/9NPvTjNmmv6lKx11vHeiJ9/9iWU//yz5v8cSixEJF1tvLEPDh871gd6rrUWHHaY\nr6wKXouj1Dbb+O/U66/334lVWXNNv7RS2rO8YIH/7ix1++2eWID3YFT05pt+/J13btjPJkmxeiYX\nDbX11r6OybRpPi30t988oejWzeeWb799eo/5EBGpiZkvjV7bZeBLE4+K/vzTL7WUlHhxr3PPheef\nh/PP95kkW2/tYzCGDPHfoW3brnpZ5LPPvKfkwAOVXGQIJRf1seaaPl10//1THYmISPq6/nq4+uqy\n52uvDbNn+7i3M8+ETTbx7bm53tM7ebInF+XNnetJxZZbenlxyQjqmxcRkXiMHbvy80WLPLEYMsTv\nW7b0+hcvv+yXRSomFsuWwaGH+gy7V17xIlyVGT/ex8FJ2lByISIi8Xj3XV+O4a9/9edFRb4g2dVX\nlw2G32ILX+CstJBWUZGvibJkic88mTDBx7ZttFHVx3noIdhzz/oXEJPIKbkQEZH4bLmlzzxZc03v\nnbjsspVf33RTv3xSmmx89pmPy2jbFl580betvXb1xxg0yMsLzJgRefhSP0ouREQkXoce6pdELr64\n5ra9e/sgT/CxFmusAQMGVL9P6cqvLVo0KEyJjpILERFJLx9/DOedBy+84OMualod9ZtvPMEorSgq\nKafkQkRE0kurVnDXXWXPV6yovv2UKV64sGXLeOOSWlNyISIiyfXooz7W4qGHqm5TfmXrbt3gtNOq\nbjtlitcXkrSh5EJERJLn3//2YoNLlniti3/9q/J2o0f7/ZgxPtukqksjS5d6QUOt0ZRWVERLRESS\no6TEV0bdYw8v533qqXDssb6uSE7Oym2fe87XGqmpSugPP/hCaOq5SCtKLkREJDmaNPHFz5o29QGY\njz3mU1CPPdbvjz66rO0113h1zpp07Vq7dpJUsSUXZtYBGAYcCJQAzwODQwhLqmjfDLgR2A/YHCgE\nxgCXhxBmxxWniIgkUWnJb/Ak47HHfLn43r1Xbrf11mVTUmtScXVqSbk4x1w8BXQHBgIHAP2BB6tp\n3xroBQwBegOHAl2Bl2OMUUREUqlJE6/a2bVrqiORCMWSXJhZN2Af4NQQwpQQwnjgPOBoM+tc2T4h\nhEUhhH1CCM+HEL4OIUwCzgWyzWzDOOIUEZEUefbZskGb5d17r6+SKhktrp6LvsCvIYSPym0bAwRg\nxzq8T/vEPr9FGJuIiKTaUUfBvvt6OfCiIt/2/vsweLDP/pCMFldy0RmYV35DCKEYWJh4rUZm1hK4\nBXgqhLA48ghFRCQ1FiwoW6jsttt8LRHwXotu3aqf+aHFyTJCnQZ0mtnNwGXVNAn4OIsGSQzufDbx\nfmfXZp/c3FzatWu30racnBxyKk5vEhGR1HnoIbjqKl+g7IknYORIr3vRrZsvvV5cDMcd5/UtygsB\nhg717a+8UraeiNRaXl4eeXl5K20rLCyM5VgWQqh9Y7N1gHVqaPYtcDxwRwjhf23NrCmwDDgihFDl\nIM1yicWmwB4hhF9riCkLyM/PzycrK6tWP4eIiCTJ3Lnw448+G+S557yexTnneC+FmScNgwb5GIzy\nfv/dV0YF76045xx44AG/jHLTTT4QVBqsoKCAbC9Alh1CKIjqfev0rxNCWBBC+KqGWxEwAWhvZuXn\nFg0EDPiwqvcvl1hsDgysKbEQEZE09/rr0KePL59+3HFe0+Kee8qmj5pVXgb8vff8PgS45BJ48EF4\n5BGfWaLEIu3F8i8UQpgOjAYeNrMdzKwfcB+QF0KYU9rOzKab2cGJx83wWhhZwHFAczPrlLg1jyNO\nERGJ2Zw5niAceSTsv7/XtaiYHLRv75dHLr8cFi+GzTcvm0ly++2+iNk993hFT8kIcV60OgYvojUG\nL6L1HDC4QputgNKBEhvgBbcAPk7cGz7uYndgXIyxiohIHA4/HAoK4MADveeiqrESJ5zg9ytWwMKF\nvgbJY4/5ZZCrrvIl2CVjxJZchBB+w3sgqmvTtNzj74Gm1TQXEZFM07Wrj7WoraVL4bff/Hb66XDG\nGTBkSHzxSSx04UpERNLHWmvBoYfCeut5b8X996u8dwbSXB4REUkvzz/v90oqMpaSCxERSS9KKjKe\nLouIiIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRciIiISKSU\nXIiIiEiklFyIiIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRc\niIiISKSUXIiIiEiklFyIiIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRciIiISKSUXIiIiEiklFyI\niIhIpJRciIiISKSUXIiIiEiklFyIiIhIpJRciIiISKSUXEi95OXlpTqE1Y7OefLpnCefznnjEFty\nYWYdzOxJMys0s1/N7BEza1PDPteY2RdmttjMFprZv82sT1wxSv3pF0Dy6Zwnn8558umcNw5x9lw8\nBXQHBgIHAP2BB2vY50vgHGBboB/wHfC2ma0TX5giIiISpViSCzPrBuwDnBpCmBJCGA+cBxxtZp2r\n2i+E8K8QwtgQwnchhC+AC4G1gJ5xxCkiIiLRi6vnoi/wawjho3LbxgAB2LE2b2BmzYEzgN+ATyKP\nUERERGLRLKb37QzMK78hhFBsZgsTr1XJzA4A/gW0BmYBe4UQFlazyxoAX3zxRYMClropLCykoKAg\n1WGsVnTOk0/nPPl0zpOr3N/ONaJ8Xwsh1L6x2c3AZdU0Cfg4i8OBE0II3SvsPxe4OoRQ5dgLM2sF\nrA90BE7Hx2z0CSHMr6L9McCTtf4hREREpKJjQwhPRfVmdU0u1gFqGlz5LXA8cEcI4X9tzawpsAw4\nIoTwch2O+RXwaAjh1mpi2gcf/Lmstu8rIiIirAFsCowOISyI6k3rdFkkceAaD25mE4D2Zta73LiL\ngYABH9YxxiZAyxpiiizbEhERWc2Mj/oNYxnQGUKYDowGHjazHcysH3AfkBdCmFPazsymm9nBicet\nzexGM9vRzDY2sywzewzoAjwbR5wiIiISvbgGdAIcAwzDZ4mUAM8Bgyu02Qpol3hcDHQDTsDHWywA\nJgO7JKalioiISAao05gLERERkZpobRERERGJlJILERERiVRGJBdmdo6ZzTSzpWY20cx2qKH9ADPL\nN7NlZvaVmZ2YrFgbi7qcczPbzcxKKtyKzWy9ZMacycxsVzN7xcx+Tpy/g2qxjz7nDVDXc67PecOY\n2f+Z2SQzW2Rmc83sRTPbuhb76XNeT/U551F9ztM+uTCzQcCdwDVAb7wU+Ggz61hF+02B14B3gO2A\ne4BHzGyvZMTbGNT1nCcEfIBu58Rt/RDCvGray8raAB8DZ+Pnslr6nEeiTuc8QZ/z+tsVnzW4I7An\n0BxfmLJVVTvoc95gdT7nCQ3+nKf9gE4zmwh8GEIYnHhuwI/AvSGE2yppfyuwXwihZ7lteUC7EML+\nSQo7o9XjnO8GjAU6hBAWJTXYRsjMSoBDQgivVNNGn/MI1fKc63MeocSXlXlA/xDCf6too895hGp5\nziP5nKd1z0Vi8bJsPGsFIHg2NAZfHK0yOyVeL290Ne2lnHqec/ACaR+b2Swze9vMdo430tWePuep\noc95dNrj35CrWztKn/No1eacQwSf87ROLvB6F02BuRW2z6XqBdA6V9F+LTOrstKn/E99zvlsfAXb\nw4HD8F6O98ysV1xBij7nKaDPeUQSvaF3A/8NIXxeTVN9ziNSh3Meyec8ziJaspoIIXwFfFVu00Qz\n2wLIBTT4ShoFfc4jNRz4C9Av1YGsRmp1zqP6nKd7z8V8vHJnpwrbOwFzVm0Oie2VtV8UQvgz2vAa\npfqc88pMAraMKihZhT7n6UGf8zoys2HA/sCAEMLsGprrcx6BOp7zytT5c57WyUUIYQWQjy96Bvyv\na2cgVS+0MqF8+4S9E9ulBvU855XphXevSTz0OU8P+pzXQeKP3MHA7iGEH2qxiz7nDVSPc16ZOn/O\nM+GyyF3A42aWj2dPuUBr4HEAM7sZ6BJCKO2ueQA4JzHK+DH8g3kEnrVJ7dTpnJvZYGAmMA1fvvd0\nYHdA08Vqycza4N8MLLFpczPbDlgYQvhRn/Po1fWc63PeMGY2HMgBDgKWmFlpj0RhCGFZos1NwAb6\nnEejPuc8ss95CCHtb/g89O+ApXjGun2510YAYyu0749/+14KfA0cn+qfIdNudTnnwCWJ87wE+AWf\nadI/1T9DJt2A3fAF/oor3B6r7JwntulznsRzrs95g893Zee6GDihXBt9zlN8zqP6nKd9nQsRERHJ\nLGk95kJEREQyj5ILERERiZSSCxEREYmUkgsRERGJlJILERERiZSSCxEREYmUkgsRERGJlJILERER\niZSSCxEREYmUkgsRERGJlJILERERidT/A0u0Vy+buKaqAAAAAElFTkSuQmCC\n",
32 | "text/plain": [
33 | ""
34 | ]
35 | },
36 | "metadata": {},
37 | "output_type": "display_data"
38 | }
39 | ],
40 | "source": [
41 | "#####################################\n",
42 | "########### TO SPECIFY ##############\n",
43 | "#####################################\n",
44 | "\n",
45 | "save_dir = '../run_0'\n",
46 | "which_agent = 2\n",
47 | "time_lim = -1 #1000\n",
48 | "num_des_points = 3\n",
49 | "\n",
50 | "#####################################\n",
51 | "#####################################\n",
52 | "\n",
53 | "\n",
54 | "#imports\n",
55 | "import numpy as np\n",
56 | "import matplotlib.pyplot as plt\n",
57 | "%matplotlib inline\n",
58 | "\n",
59 | "\n",
60 | "#read in trajectory following data\n",
61 | "states_des = np.load(save_dir+'/saved_trajfollow/true_iter0.npy')\n",
62 | "states_exec = np.load(save_dir+'/saved_trajfollow/pred_iter0.npy')\n",
63 | "print (states_des.shape)\n",
64 | "print (states_exec.shape)\n",
65 | " \n",
66 | " \n",
67 | "### ANT\n",
68 | "if(which_agent==1):\n",
69 | " x=29\n",
70 | " y=30\n",
71 | "### SWIMMER\n",
72 | "if(which_agent==2):\n",
73 | " x=10\n",
74 | " y=11\n",
75 | "### CHEETAH\n",
76 | "if(which_agent==4):\n",
77 | " x=0\n",
78 | " y=1\n",
79 | " \n",
80 | "#look at right indeces of executed trajectory\n",
81 | "states_exec=states_exec[0][:,[x,y]]\n",
82 | "\n",
83 | "#plot desired vs executed trajectories\n",
84 | "plt.figure()\n",
85 | "plt.title('xy')\n",
86 | "plt.plot(states_des[0:num_des_points, 0], states_des[0:num_des_points, 1])\n",
87 | "plt.plot(states_exec[0:time_lim, 0], states_exec[0:time_lim, 1], 'r--')\n",
88 | "\n"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": null,
94 | "metadata": {
95 | "collapsed": true
96 | },
97 | "outputs": [],
98 | "source": []
99 | }
100 | ],
101 | "metadata": {
102 | "anaconda-cloud": {},
103 | "celltoolbar": "Raw Cell Format",
104 | "kernelspec": {
105 | "display_name": "Python 3",
106 | "language": "python",
107 | "name": "python3"
108 | },
109 | "language_info": {
110 | "codemirror_mode": {
111 | "name": "ipython",
112 | "version": 3
113 | },
114 | "file_extension": ".py",
115 | "mimetype": "text/x-python",
116 | "name": "python",
117 | "nbconvert_exporter": "python",
118 | "pygments_lexer": "ipython3",
119 | "version": "3.5.2"
120 | },
121 | "widgets": {
122 | "state": {},
123 | "version": "1.1.2"
124 | }
125 | },
126 | "nbformat": 4,
127 | "nbformat_minor": 1
128 | }
129 |
--------------------------------------------------------------------------------
/point_env.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.base import Env
2 | from rllab.spaces import Box
3 | from rllab.envs.base import Step
4 | import numpy as np
5 |
6 | class PointEnv(Env):
7 | @property
8 | def observation_space(self):
9 | return Box(low=-np.inf, high=np.inf, shape=(4,)) #state space = [x, y, vx, vy]
10 |
11 | @property
12 | def action_space(self):
13 | return Box(low=-5, high=5, shape=(2,)) #controls are the forces applied to pointmass
14 |
15 | def reset(self, init_state=None):
16 | if(init_state==None):
17 | np.random.seed()
18 | self._state=np.zeros((4,))
19 | self._state[0]= np.random.uniform(-10, 10)
20 | self._state[1]= np.random.uniform(-10, 10)
21 | else:
22 | self._state = init_state
23 |
24 | observation = np.copy(self._state)
25 | return observation
26 |
27 | def step(self, action):
28 | #next state = what happens after taking "action"
29 | temp_state=np.copy(self._state)
30 | dt=0.1
31 | temp_state[0] = self._state[0] + self._state[2]*dt + 0.5*action[0]*dt*dt
32 | temp_state[1] = self._state[1] + self._state[3]*dt + 0.5*action[1]*dt*dt
33 | temp_state[2] = self._state[2] + action[0]*dt
34 | temp_state[3] = self._state[3] + action[1]*dt
35 | self._state = np.copy(temp_state)
36 |
37 | #make the reward what you care about
38 | x, y, vx, vy = self._state
39 | reward = vx - np.sqrt(abs(y-0)) #we care about moving in the forward x direction... and keeping our y value close to 0... (aka "going straight")
40 | done = 0#x>500 #when do you consider this to be "done" (rollout stops... "terminal")
41 | next_observation = np.copy(self._state)
42 | return Step(observation=next_observation, reward=reward, done=done)
43 |
44 | def render(self):
45 | return self._state
--------------------------------------------------------------------------------
/policy_random.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class Policy_Random(object):
4 |
5 | def __init__(self, env):
6 |
7 | #vars
8 | self.env = env
9 | self.low_val = self.env.action_space.low
10 | self.high_val = self.env.action_space.high
11 | self.shape = self.env.action_space.shape
12 | print("Created a random policy, where actions are selected between ", self.low_val, ", and ", self.high_val)
13 |
14 | def get_action(self, observation):
15 | return np.random.uniform(self.low_val, self.high_val, self.shape), 0
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Neural Network Dynamics for Model-Based Deep Reinforcement Learning with Model-Free Fine-Tuning
2 |
3 | [Arxiv Link](https://arxiv.org/abs/1708.02596)
4 |
5 | **Abstract**: Model-free deep reinforcement learning algorithms have been shown to be capable of learning a wide range of robotic skills, but typically require a very large number of samples to achieve good performance. Model-based algorithms, in principle, can provide for much more efficient learning, but have proven difficult to extend to expressive, high-capacity models such as deep neural networks. In this work, we demonstrate that medium-sized neural network models can in fact be combined with model predictive control (MPC) to achieve excellent sample complexity in a model-based reinforcement learning algorithm, producing stable and plausible gaits to accomplish various complex locomotion tasks. We also propose using deep neural network dynamics models to initialize a model-free learner, in order to combine the sample efficiency of model-based approaches with the high task-specific performance of model-free methods. We empirically demonstrate on MuJoCo locomotion tasks that our pure model-based approach trained on just minutes of random action data can follow arbitrary trajectories, and that our hybrid algorithm can accelerate model-free learning on high-speed benchmark tasks, achieving sample efficiency gains of 3-5x on swimmer, cheetah, hopper, and ant agents.
6 |
9 |
10 | - For installation guide, go to [installation.md](https://github.com/nagaban2/learn_dynamics/blob/release/docs/installation.md)
11 | - For notes on how to use your own environment, how to edit envs, etc. go to [notes.md](https://github.com/nagaban2/learn_dynamics/blob/release/docs/notes.md)
12 |
13 | ---------------------------------------------------------------
14 |
15 | ### How to run everything
16 |
17 | ```
18 | cd scripts
19 | ./swimmer_mbmf.sh
20 | ./cheetah_mbmf.sh
21 | ./hopper_mbmf.sh
22 | ./ant_mbmf.sh
23 | ```
24 |
25 | Each of those scripts does something similar to the following (but for multiple seeds):
26 |
27 | ```
28 | python main.py --seed=0 --run_num=1 --yaml_file='swimmer_forward'
29 | python mbmf.py --run_num=1 --which_agent=2
30 | python trpo_run_mf.py --seed=0 --save_trpo_run_num=1 --which_agent=2 --num_workers_trpo=2 --std_on_mlp_policy=0.5
31 | python plot_mbmf.py --trpo_dir=[trpo_dir] --std_on_mlp_policy=0.5 --which_agent=2 --run_nums 1 --seeds 0
32 | ```
33 | Note that [trpo_dir] above corresponds to where the TRPO runs are saved. Probably somewhere in ~/rllab/data/...
34 | Each of these steps are further explained in the following sections.
35 |
36 | ---------------------------------------------------------------
37 |
38 | ### How to run MB
39 |
40 | Need to specify:
41 |
42 | --**yaml_file** Specify the corresponding yaml file
43 | --**seed** Set random seed to set for numpy and tensorflow
44 | --**run_num** Specify what directory to save files under
45 | --**use_existing_training_data** To use the data that already exists in the directory run_num instead of recollecting
46 | --**desired_traj_type** What type of trajectory to follow (if you want to follow a trajectory)
47 | --**num_rollouts_save_for_mf** Number of on-policy rollouts to save after last aggregation iteration, to be used later
48 | --**might_render** If you might want to visualize anything during the run
49 | --**visualize_MPC_rollout** To set a breakpoint and visualize the on-policy rollouts after each agg iteration
50 | --**perform_forwardsim_for_vis** To visualize an open-loop prediction made by the learned dynamics model
51 | --**print_minimal** To not print messages regarding progress/notes/etc.
52 |
53 | ##### Examples:
54 | ```
55 | python main.py --seed=0 --run_num=0 --yaml_file='cheetah_forward'
56 | python main.py --seed=0 --run_num=1 --yaml_file='swimmer_forward'
57 | python main.py --seed=0 --run_num=2 --yaml_file='ant_forward'
58 | python main.py --seed=0 --run_num=3 --yaml_file='hopper_forward'
59 | ```
60 | ```
61 | python main.py --seed=0 --run_num=4 --yaml_file='cheetah_trajfollow' --desired_traj_type='straight' --visualize_MPC_rollout
62 | python main.py --seed=0 --run_num=4 --yaml_file='cheetah_trajfollow' --desired_traj_type='backward' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model
63 | python main.py --seed=0 --run_num=4 --yaml_file='cheetah_trajfollow' --desired_traj_type='forwardbackward' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model
64 | ```
65 | ```
66 | python main.py --seed=0 --run_num=5 --yaml_file='swimmer_trajfollow' --desired_traj_type='straight' --visualize_MPC_rollout
67 | python main.py --seed=0 --run_num=5 --yaml_file='swimmer_trajfollow' --desired_traj_type='left_turn' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model
68 | python main.py --seed=0 --run_num=5 --yaml_file='swimmer_trajfollow' --desired_traj_type='right_turn' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model
69 | ```
70 | ```
71 | python main.py --seed=0 --run_num=6 --yaml_file='ant_trajfollow' --desired_traj_type='straight' --visualize_MPC_rollout
72 | python main.py --seed=0 --run_num=6 --yaml_file='ant_trajfollow' --desired_traj_type='left_turn' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model
73 | python main.py --seed=0 --run_num=6 --yaml_file='ant_trajfollow' --desired_traj_type='right_turn' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model
74 | python main.py --seed=0 --run_num=6 --yaml_file='ant_trajfollow' --desired_traj_type='u_turn' --visualize_MPC_rollout --use_existing_training_data --use_existing_dynamics_model
75 | ```
76 | ---------------------------------------------------------------
77 |
78 | ### How to run MBMF
79 |
80 | Need to specify:
81 |
82 | --**save_trpo_run_num number** Number used as part of directory name for saving mbmf TRPO run (you can use 1,2,3,etc to differentiate your different seeds)
83 | --**run_num** Specify what directory to get relevant MB data from & to save new MBMF files in
84 | --**which_agent** Specify which agent (1 ant, 2 swimmer, 4 cheetah, 6 hopper)
85 | --**std_on_mlp_policy** Initial std you want to set on your pre-initialization policy for TRPO to use
86 | --**num_workers_trpo** How many worker threads (cpu) for TRPO to use
87 | --**might_render** If you might want to visualize anything during the run
88 | --**visualize_mlp_policy** To visualize the rollout performed by trained policy (that will serve as pre-initialization for TRPO)
89 | --**visualize_on_policy_rollouts** To set a breakpoint and visualize the on-policy rollouts after each agg iteration of dagger
90 | --**print_minimal** To not print messages regarding progress/notes/etc.
91 | --**use_existing_pretrained_policy** To run only the TRPO part (if you've already done the imitation learning part in the same directory)
92 |
93 | *Note that the finished TRPO run saves to ~/rllab/data/local/experiments/*
94 |
95 | ##### Examples:
96 | ```
97 | python mbmf.py --run_num=1 --which_agent=2 --std_on_mlp_policy=1.0
98 | python mbmf.py --run_num=0 --which_agent=4 --std_on_mlp_policy=0.5
99 | python mbmf.py --run_num=3 --which_agent=6 --std_on_mlp_policy=1.0
100 | python mbmf.py --run_num=2 --which_agent=1 --std_on_mlp_policy=0.5
101 | ```
102 |
103 | ---------------------------------------------------------------
104 |
105 | ### How to run MF
106 |
107 | Run pure TRPO, for comparisons.
108 |
109 | Need to specify command line args as desired
110 | --**seed** Set random seed to set for numpy and tensorflow
111 | --**steps_per_rollout** Length of each rollout that TRPO should collect
112 | --**save_trpo_run_num** Number used as part of directory name for saving TRPO run (you can use 1,2,3,etc to differentiate your different seeds)
113 | --**which_agent** Specify which agent (1 ant, 2 swimmer, 4 cheetah, 6 hopper)
114 | --**num_workers_trpo** How many worker threads (cpu) for TRPO to use
115 | --**num_trpo_iters** Total number of TRPO iterations to run before stopping
116 |
117 | *Note that the finished TRPO run saves to ~/rllab/data/local/experiments/*
118 |
119 |
120 | ##### Examples:
121 | ```
122 | python trpo_run_mf.py --seed=0 --save_trpo_run_num=1 --which_agent=4 --num_workers_trpo=4
123 | python trpo_run_mf.py --seed=0 --save_trpo_run_num=1 --which_agent=2 --num_workers_trpo=4
124 | python trpo_run_mf.py --seed=0 --save_trpo_run_num=1 --which_agent=1 --num_workers_trpo=4
125 | python trpo_run_mf.py --seed=0 --save_trpo_run_num=1 --which_agent=6 --num_workers_trpo=4
126 |
127 | python trpo_run_mf.py --seed=50 --save_trpo_run_num=2 --which_agent=4 --num_workers_trpo=4
128 | python trpo_run_mf.py --seed=50 --save_trpo_run_num=2 --which_agent=2 --num_workers_trpo=4
129 | python trpo_run_mf.py --seed=50 --save_trpo_run_num=2 --which_agent=1 --num_workers_trpo=4
130 | python trpo_run_mf.py --seed=50 --save_trpo_run_num=2 --which_agent=6 --num_workers_trpo=4
131 | ```
132 | ---------------------------------------------------------------
133 |
134 | ### How to plot
135 |
136 | 1) MBMF
137 | -Need to specify the commandline arguments as desired (in plot_mbmf.py)
138 | -Examples of running the plotting script:
139 | ```
140 | cd plotting
141 | python plot_mbmf.py --trpo_dir=[trpo_dir] --std_on_mlp_policy=1.0 --which_agent=2 --run_nums 1 --seeds 0
142 | python plot_mbmf.py --trpo_dir=[trpo_dir] --std_on_mlp_policy=1.0 --which_agent=2 --run_nums 1 2 3 --seeds 0 70 100
143 | ```
144 | Note that [trpo_dir] above corresponds to where the TRPO runs are saved. Probably somewhere in ~/rllab/data/...
145 |
146 | 2) Dynamics model training and validation losses per aggregation iteration
147 | IPython notebook: plotting/plot_loss.ipynb
148 | Example plots: docs/sample_plots/...
149 |
150 | 3) Visualize a forward simulation (an open-loop multi-step prediction of the elements of the state space)
151 | IPython notebook: plotting/plot_forwardsim.ipynb
152 | Example plots: docs/sample_plots/...
153 |
154 | 4) Visualize the trajectories (on policy rollouts) per aggregation iteration
155 | IPython notebook: plotting/plot_trajfollow.ipynb
156 | Example plots: docs/sample_plots/...
157 |
158 |
--------------------------------------------------------------------------------
/reward_functions.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class RewardFunctions:
4 |
5 | def __init__(self, which_agent, x_index, y_index, z_index, yaw_index, joint1_index, joint2_index,
6 | frontleg_index, frontshin_index, frontfoot_index, xvel_index, orientation_index):
7 | self.which_agent = which_agent
8 | self.x_index = x_index
9 | self.y_index = y_index
10 | self.z_index = z_index
11 | self.yaw_index = yaw_index
12 | self.joint1_index = joint1_index
13 | self.joint2_index = joint2_index
14 | self.frontleg_index = frontleg_index
15 | self.frontshin_index = frontshin_index
16 | self.frontfoot_index = frontfoot_index
17 | self.xvel_index = xvel_index
18 | self.orientation_index = orientation_index
19 |
20 | def get_reward_func(self, follow_trajectories, desired_states, horiz_penalty_factor,
21 | forward_encouragement_factor, heading_penalty_factor):
22 |
23 | #init vars
24 | self.desired_states= desired_states
25 | self.horiz_penalty_factor = horiz_penalty_factor
26 | self.forward_encouragement_factor = forward_encouragement_factor
27 | self.heading_penalty_factor = heading_penalty_factor
28 |
29 | if(follow_trajectories):
30 | if(self.which_agent==1):
31 | reward_func= self.ant_follow_traj
32 | if(self.which_agent==2):
33 | reward_func= self.swimmer_follow_traj
34 | if(self.which_agent==4):
35 | reward_func= self.cheetah_follow_traj
36 | else:
37 | if(self.which_agent==1):
38 | reward_func= self.ant_forward
39 | if(self.which_agent==2):
40 | reward_func= self.swimmer_forward
41 | if(self.which_agent==4):
42 | reward_func= self.cheetah_forward
43 | if(self.which_agent==6):
44 | reward_func= self.hopper_forward
45 | return reward_func
46 |
47 | ######################################################################################################################
48 | def ant_follow_traj(self, pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward,
49 | curr_seg, moved_to_next, done_forever, all_samples, pt_number):
50 |
51 | #penalize horiz dist away from trajectory
52 | scores[min_perp_dist<1] += (min_perp_dist*self.horiz_penalty_factor)[min_perp_dist<1]
53 | scores[min_perp_dist>=1] += (min_perp_dist*10*self.horiz_penalty_factor)[min_perp_dist>=1]
54 |
55 | #encourage moving-forward
56 | scores[moved_to_next==0] -= self.forward_encouragement_factor*(curr_forward - prev_forward)[moved_to_next==0]
57 | scores[moved_to_next==1] -= self.forward_encouragement_factor*(curr_forward)[moved_to_next==1]
58 |
59 | #prevent height from going too high or too low
60 | scores[pt[:,self.z_index]>0.67] += (self.heading_penalty_factor*40 + 0*pt[:,self.z_index])[pt[:,self.z_index]>0.67]
61 | scores[pt[:,self.z_index]<0.3] += (self.heading_penalty_factor*40 + 0*pt[:,self.z_index])[pt[:,self.z_index]<0.3]
62 |
63 | return scores, done_forever
64 |
65 | def swimmer_follow_traj(self, pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward,
66 | curr_seg, moved_to_next, done_forever, all_samples, pt_number):
67 |
68 | #penalize horiz dist away from trajectory
69 | scores += min_perp_dist*self.horiz_penalty_factor
70 |
71 | #encourage moving-forward and penalize not-moving-forward
72 | scores[moved_to_next==0] -= self.forward_encouragement_factor*(curr_forward - prev_forward)[moved_to_next==0]
73 | scores[moved_to_next==1] -= self.forward_encouragement_factor*(curr_forward)[moved_to_next==1]
74 |
75 | #angle that (desired traj) line segment makes WRT horizontal
76 | curr_line_start = self.desired_states[curr_seg]
77 | curr_line_end = self.desired_states[curr_seg+1]
78 | angle = np.arctan2(curr_line_end[:,1]-curr_line_start[:,1], curr_line_end[:,0]-curr_line_start[:,0])
79 | # ^ -pi to pi
80 |
81 | #penalize heading away from that angle
82 | diff = np.abs(pt[:,self.yaw_index]-angle)
83 | diff[diff>np.pi]=(2*np.pi-diff)[diff>np.pi]
84 | #^ if the calculation takes you the long way around the circle,
85 | #take the shorter value instead as the difference
86 | my_range = np.pi/3.0
87 | scores[diff=my_range] += 20
89 |
90 | #dont bend in too much
91 | first_joint = np.abs(pt[:,self.joint1_index])
92 | second_joint = np.abs(pt[:,self.joint2_index])
93 | limit = np.pi/3
94 | scores[limit=my_range] += self.heading_penalty_factor
113 |
114 | front_shin = pt[:,self.frontshin_index]
115 | my_range = 0
116 | scores[front_shin>=my_range] += self.heading_penalty_factor
117 |
118 | front_foot = pt[:,self.frontfoot_index]
119 | my_range = 0
120 | scores[front_foot>=my_range] += self.heading_penalty_factor
121 |
122 | return scores, done_forever
123 |
124 | ######################################################################################################################
125 | def ant_forward(self, pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward,
126 | curr_seg, moved_to_next, done_forever, all_samples, pt_number):
127 |
128 | #watch the height
129 | done_forever[pt[:,self.z_index] > 1] = 1
130 | done_forever[pt[:,self.z_index] < 0.3] = 1
131 |
132 | #action
133 | scaling= 150.0
134 | if(pt_number==all_samples.shape[1]):
135 | scores[done_forever==0] += 0.005*np.sum(np.square(all_samples[:,pt_number-1,:][done_forever==0]/scaling), axis=1)
136 | else:
137 | scores[done_forever==0] += 0.005*np.sum(np.square(all_samples[:,pt_number,:][done_forever==0]/scaling), axis=1)
138 |
139 | #velocity
140 | scores[done_forever==0] -= pt[:,self.xvel_index][done_forever==0]
141 |
142 | #survival
143 | scores[done_forever==0] -= 0.5 # used to be 0.05
144 |
145 | return scores, done_forever
146 |
147 | def swimmer_forward(self, pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward,
148 | curr_seg, moved_to_next, done_forever, all_samples, pt_number):
149 |
150 | ########### GYM
151 |
152 | '''if(pt_number==all_samples.shape[1]):
153 | reward_ctrl = 0.0001 * np.sum(np.square(all_samples[:,pt_number-1,:]), axis=1)
154 | else:
155 | reward_ctrl = 0.0001 * np.sum(np.square(all_samples[:,pt_number,:]), axis=1)
156 | reward_fwd = (pt[:,self.x_index]-prev_pt[:,self.x_index]) / 0.01'''
157 |
158 | ########### RLLAB
159 |
160 | scaling=50.0
161 | if(pt_number==all_samples.shape[1]):
162 | reward_ctrl = 0.5 * np.sum(np.square(all_samples[:,pt_number-1,:]/scaling), axis=1)
163 | else:
164 | reward_ctrl = 0.5 * np.sum(np.square(all_samples[:,pt_number,:]/scaling), axis=1)
165 | reward_fwd = pt[:,self.xvel_index]
166 |
167 | #########################
168 |
169 | scores += -reward_fwd + reward_ctrl
170 | return scores, done_forever
171 |
172 | def cheetah_forward(self, pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward,
173 | curr_seg, moved_to_next, done_forever, all_samples, pt_number):
174 |
175 | ########### GYM
176 |
177 | '''#action
178 | if(pt_number==all_samples.shape[1]):
179 | scores += 0.1*np.sum(np.square(all_samples[:,pt_number-1,:]), axis=1)
180 | else:
181 | scores += 0.1*np.sum(np.square(all_samples[:,pt_number,:]), axis=1)
182 |
183 | #velocity
184 | scores -= (pt[:,self.x_index]-prev_pt[:,self.x_index]) / 0.01'''
185 |
186 | ########### RLLAB
187 |
188 | #action
189 | if(pt_number==all_samples.shape[1]):
190 | scores += 0.05*np.sum(np.square(all_samples[:,pt_number-1,:]), axis=1)
191 | else:
192 | scores += 0.05*np.sum(np.square(all_samples[:,pt_number,:]), axis=1)
193 |
194 | #velocity
195 | scores -= pt[:,self.xvel_index]
196 |
197 | return scores, done_forever
198 |
199 | def hopper_forward(self, pt, prev_pt, scores, min_perp_dist, curr_forward, prev_forward,
200 | curr_seg, moved_to_next, done_forever, all_samples, pt_number):
201 |
202 | scaling=200.0
203 |
204 | #dont tilt orientation out of range
205 | orientation = pt[:,self.orientation_index]
206 | done_forever[np.abs(orientation)>= 0.3] = 1
207 |
208 | #dont fall to ground
209 | done_forever[pt[:,self.z_index] <= 0.7] = 1
210 |
211 | #action
212 | if(pt_number==all_samples.shape[1]):
213 | scores[done_forever==0] += 0.005*np.sum(np.square(all_samples[:,pt_number-1,:][done_forever==0]/scaling), axis=1)
214 | else:
215 | scores[done_forever==0] += 0.005*np.sum(np.square(all_samples[:,pt_number,:][done_forever==0])/scaling, axis=1)
216 |
217 | #velocity
218 | scores[done_forever==0] -= pt[:,self.xvel_index][done_forever==0]
219 |
220 | #survival
221 | scores[done_forever==0] -= 1
222 |
223 | return scores, done_forever
--------------------------------------------------------------------------------
/scripts/ant_mbmf.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #####################################
4 | ## SET VARS
5 | #####################################
6 |
7 | #location of all saved trpo runs
8 | trpo_dir='/home/anagabandi/rllab/data/local/experiment/'
9 |
10 | #specific to the run
11 | how_many_seeds=1
12 | seeds=(0 70 100)
13 | num_workers_trpo=2 #how many cores to use
14 |
15 | #specific to the agent
16 | which_agent=1
17 | std_on_mlp_policy=0.5
18 | base_run_num=31 #used for filenames for saving
19 |
20 | #####################################
21 | ## DO THE RUNS
22 | #####################################
23 |
24 | cd ..
25 | echo 'run numbers:'
26 | iter_num=0
27 | while [ $iter_num -lt $how_many_seeds ]
28 | do
29 | seed=${seeds[$iter_num]}
30 | run_num=$(( $base_run_num + $iter_num ))
31 | echo $run_num
32 | save_trpo_run_num=$(( 1 + $iter_num ))
33 |
34 | python main.py --seed=$seed --run_num=$run_num --yaml_file='ant_forward'
35 | python mbmf.py --run_num=$run_num --which_agent=$which_agent --std_on_mlp_policy=$std_on_mlp_policy
36 | python trpo_run_mf.py --seed=$seed --save_trpo_run_num=$save_trpo_run_num --which_agent=$which_agent --num_workers_trpo=$num_workers_trpo
37 |
38 | iter_num=$(( $iter_num + 1))
39 | done
40 |
41 | #####################################
42 | ## PLOTTING
43 | #####################################
44 |
45 | cd plotting
46 |
47 | if [ $how_many_seeds -eq 3 ]
48 | then
49 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 31 32 33 --seeds ${seeds[0]} ${seeds[1]} ${seeds[2]}
50 | fi
51 |
52 | if [ $how_many_seeds -eq 2 ]
53 | then
54 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 31 32 --seeds ${seeds[0]} ${seeds[1]}
55 | fi
56 |
57 | if [ $how_many_seeds -eq 1 ]
58 | then
59 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 31 --seeds ${seeds[0]}
60 | fi
61 |
--------------------------------------------------------------------------------
/scripts/cheetah_mbmf.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #####################################
4 | ## SET VARS
5 | #####################################
6 |
7 | #location of all saved trpo runs
8 | trpo_dir='/home/anagabandi/rllab/data/local/experiment/'
9 |
10 | #specific to the run
11 | how_many_seeds=3
12 | seeds=(0 70 100)
13 | num_workers_trpo=2 #how many cores to use
14 |
15 | #specific to the agent
16 | which_agent=4
17 | std_on_mlp_policy=0.5
18 | base_run_num=11 #used for filenames for saving
19 |
20 | #####################################
21 | ## DO THE RUNS
22 | #####################################
23 |
24 | cd ..
25 | echo 'run numbers:'
26 | iter_num=0
27 | while [ $iter_num -lt $how_many_seeds ]
28 | do
29 | seed=${seeds[$iter_num]}
30 | run_num=$(( $base_run_num + $iter_num ))
31 | echo $run_num
32 | save_trpo_run_num=$(( 1 + $iter_num ))
33 |
34 | python main.py --seed=$seed --run_num=$run_num --yaml_file='cheetah_forward'
35 | python mbmf.py --run_num=$run_num --which_agent=$which_agent --std_on_mlp_policy=$std_on_mlp_policy
36 | python trpo_run_mf.py --seed=$seed --save_trpo_run_num=$save_trpo_run_num --which_agent=$which_agent --num_workers_trpo=$num_workers_trpo
37 |
38 | iter_num=$(( $iter_num + 1))
39 | done
40 |
41 | #####################################
42 | ## PLOTTING
43 | #####################################
44 |
45 | cd plotting
46 |
47 | if [ $how_many_seeds -eq 3 ]
48 | then
49 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 11 12 13 --seeds ${seeds[0]} ${seeds[1]} ${seeds[2]}
50 | fi
51 |
52 | if [ $how_many_seeds -eq 2 ]
53 | then
54 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 11 12 --seeds ${seeds[0]} ${seeds[1]}
55 | fi
56 |
57 | if [ $how_many_seeds -eq 1 ]
58 | then
59 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 11 --seeds ${seeds[0]}
60 | fi
61 |
--------------------------------------------------------------------------------
/scripts/hopper_mbmf.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #####################################
4 | ## SET VARS
5 | #####################################
6 |
7 | #location of all saved trpo runs
8 | trpo_dir='/home/anagabandi/rllab/data/local/experiment/'
9 |
10 | #specific to the run
11 | how_many_seeds=3
12 | seeds=(0 70 100)
13 | num_workers_trpo=2 #how many cores to use
14 |
15 | #specific to the agent
16 | which_agent=6
17 | std_on_mlp_policy=1.0
18 | base_run_num=21 #used for filenames for saving
19 |
20 | #####################################
21 | ## DO THE RUNS
22 | #####################################
23 |
24 | cd ..
25 | echo 'run numbers:'
26 | iter_num=0
27 | while [ $iter_num -lt $how_many_seeds ]
28 | do
29 | seed=${seeds[$iter_num]}
30 | run_num=$(( $base_run_num + $iter_num ))
31 | echo $run_num
32 | save_trpo_run_num=$(( 1 + $iter_num ))
33 |
34 | python main.py --seed=$seed --run_num=$run_num --yaml_file='hopper_forward'
35 | python mbmf.py --run_num=$run_num --which_agent=$which_agent --std_on_mlp_policy=$std_on_mlp_policy
36 | python trpo_run_mf.py --seed=$seed --save_trpo_run_num=$save_trpo_run_num --which_agent=$which_agent --num_workers_trpo=$num_workers_trpo
37 |
38 | iter_num=$(( $iter_num + 1))
39 | done
40 |
41 | #####################################
42 | ## PLOTTING
43 | #####################################
44 |
45 | cd plotting
46 |
47 | if [ $how_many_seeds -eq 3 ]
48 | then
49 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 21 22 23 --seeds ${seeds[0]} ${seeds[1]} ${seeds[2]}
50 | fi
51 |
52 | if [ $how_many_seeds -eq 2 ]
53 | then
54 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 21 22 --seeds ${seeds[0]} ${seeds[1]}
55 | fi
56 |
57 | if [ $how_many_seeds -eq 1 ]
58 | then
59 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 21 --seeds ${seeds[0]}
60 | fi
61 |
--------------------------------------------------------------------------------
/scripts/swimmer_mbmf.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #####################################
4 | ## SET VARS
5 | #####################################
6 |
7 | #location of all saved trpo runs
8 | trpo_dir='/home/anagabandi/rllab/data/local/experiment/'
9 |
10 | #specific to the run
11 | how_many_seeds=3
12 | seeds=(0 70 100)
13 | num_workers_trpo=2 #how many cores to use
14 |
15 | #specific to the agent
16 | which_agent=2
17 | std_on_mlp_policy=1.0
18 | base_run_num=1 #used for filenames for saving
19 |
20 | #####################################
21 | ## DO THE RUNS
22 | #####################################
23 |
24 | cd ..
25 | echo 'run numbers:'
26 | iter_num=0
27 | while [ $iter_num -lt $how_many_seeds ]
28 | do
29 | seed=${seeds[$iter_num]}
30 | run_num=$(( $base_run_num + $iter_num ))
31 | echo $run_num
32 | save_trpo_run_num=$(( 1 + $iter_num ))
33 |
34 | python main.py --seed=$seed --run_num=$run_num --yaml_file='swimmer_forward'
35 | python mbmf.py --run_num=$run_num --which_agent=$which_agent --std_on_mlp_policy=$std_on_mlp_policy
36 | python trpo_run_mf.py --seed=$seed --save_trpo_run_num=$save_trpo_run_num --which_agent=$which_agent --num_workers_trpo=$num_workers_trpo
37 |
38 | iter_num=$(( $iter_num + 1))
39 | done
40 |
41 | #####################################
42 | ## PLOTTING
43 | #####################################
44 |
45 | cd plotting
46 |
47 | if [ $how_many_seeds -eq 3 ]
48 | then
49 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 1 2 3 --seeds ${seeds[0]} ${seeds[1]} ${seeds[2]}
50 | fi
51 |
52 | if [ $how_many_seeds -eq 2 ]
53 | then
54 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 1 2 --seeds ${seeds[0]} ${seeds[1]}
55 | fi
56 |
57 | if [ $how_many_seeds -eq 1 ]
58 | then
59 | python plot_mbmf.py --trpo_dir=$trpo_dir --std_on_mlp_policy=$std_on_mlp_policy --which_agent=$which_agent --run_nums 1 --seeds ${seeds[0]}
60 | fi
--------------------------------------------------------------------------------
/trajectories.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def make_trajectory(shape, starting_state_NN, x_index, y_index, which_agent):
4 |
5 | curr_x = np.copy(starting_state_NN[x_index])
6 | curr_y = np.copy(starting_state_NN[y_index])
7 |
8 | my_list = []
9 |
10 | if(shape=="left_turn"):
11 | if(which_agent==1):
12 | my_list.append(np.array([curr_x, curr_y]))
13 | my_list.append(np.array([curr_x+2, curr_y]))
14 | my_list.append(np.array([curr_x+4, curr_y]))
15 | my_list.append(np.array([curr_x+6, curr_y]))
16 | my_list.append(np.array([curr_x+6, curr_y+2]))
17 | my_list.append(np.array([curr_x+6, curr_y+3]))
18 | my_list.append(np.array([curr_x+6, curr_y+4]))
19 | my_list.append(np.array([curr_x+6, curr_y+5]))
20 | my_list.append(np.array([curr_x+6, curr_y+6]))
21 | my_list.append(np.array([curr_x+6, curr_y+7]))
22 | else:
23 | my_list.append(np.array([curr_x, curr_y]))
24 | my_list.append(np.array([curr_x+1, curr_y]))
25 | my_list.append(np.array([curr_x+2, curr_y]))
26 | my_list.append(np.array([curr_x+3, curr_y]))
27 | my_list.append(np.array([curr_x+4, curr_y+1]))
28 | my_list.append(np.array([curr_x+4, curr_y+2]))
29 | my_list.append(np.array([curr_x+4, curr_y+3]))
30 | my_list.append(np.array([curr_x+4, curr_y+4]))
31 |
32 | if(shape=="right_turn"):
33 | if(which_agent==1):
34 | my_list.append(np.array([curr_x, curr_y]))
35 | my_list.append(np.array([curr_x, curr_y+1]))
36 | my_list.append(np.array([curr_x, curr_y+2]))
37 | my_list.append(np.array([curr_x, curr_y+3]))
38 | my_list.append(np.array([curr_x, curr_y+4]))
39 | my_list.append(np.array([curr_x+2, curr_y+4]))
40 | my_list.append(np.array([curr_x+3, curr_y+4]))
41 | my_list.append(np.array([curr_x+4, curr_y+4]))
42 | my_list.append(np.array([curr_x+6, curr_y+4]))
43 | my_list.append(np.array([curr_x+7, curr_y+4]))
44 | else:
45 | my_list.append(np.array([curr_x, curr_y]))
46 | my_list.append(np.array([curr_x, curr_y+1]))
47 | my_list.append(np.array([curr_x, curr_y+2]))
48 | my_list.append(np.array([curr_x+2, curr_y+3]))
49 | my_list.append(np.array([curr_x+3, curr_y+3]))
50 | my_list.append(np.array([curr_x+4, curr_y+3]))
51 | my_list.append(np.array([curr_x+5, curr_y+3]))
52 | my_list.append(np.array([curr_x+6, curr_y+3]))
53 | my_list.append(np.array([curr_x+7, curr_y+3]))
54 | my_list.append(np.array([curr_x+8, curr_y+3]))
55 |
56 | if(shape=="u_turn"):
57 | my_list.append(np.array([curr_x, curr_y]))
58 | my_list.append(np.array([curr_x+2, curr_y]))
59 | my_list.append(np.array([curr_x+4, curr_y]))
60 | my_list.append(np.array([curr_x+4, curr_y+1]))
61 | my_list.append(np.array([curr_x+4, curr_y+2]))
62 | my_list.append(np.array([curr_x+2, curr_y+2]))
63 | my_list.append(np.array([curr_x+1, curr_y+2]))
64 | my_list.append(np.array([curr_x, curr_y+2]))
65 |
66 | if(shape=="straight"):
67 | i=0
68 | num_pts = 40
69 | while(i < num_pts):
70 | my_list.append(np.array([curr_x+i, curr_y]))
71 | i+=0.7
72 |
73 | if(shape=="backward"):
74 | i=0
75 | num_pts = 40
76 | while(i < num_pts):
77 | my_list.append(np.array([curr_x-i, curr_y]))
78 | i+=0.5
79 |
80 | if(shape=="forward_backward"):
81 | my_list.append(np.array([curr_x, curr_y]))
82 | my_list.append(np.array([curr_x+1, curr_y]))
83 | my_list.append(np.array([curr_x+2, curr_y]))
84 | my_list.append(np.array([curr_x+3, curr_y]))
85 | my_list.append(np.array([curr_x+2, curr_y]))
86 | my_list.append(np.array([curr_x+1, curr_y]))
87 | my_list.append(np.array([curr_x+0, curr_y]))
88 | my_list.append(np.array([curr_x-1, curr_y]))
89 | my_list.append(np.array([curr_x-2, curr_y]))
90 |
91 | if(shape=="circle"):
92 | num_pts = 10
93 | radius=2.0
94 | speed=-np.pi/8.0
95 | for i in range(num_pts):
96 | curr_x= radius*np.cos(speed*i)-radius
97 | curr_y= radius*np.sin(speed*i)
98 | my_list.append(np.array([curr_x, curr_y]))
99 |
100 | return np.array(my_list)
101 |
102 | def get_trajfollow_params(which_agent, desired_traj_type):
103 |
104 | desired_snake_headingInit= 0
105 | horiz_penalty_factor= 0
106 | forward_encouragement_factor= 0
107 | heading_penalty_factor= 0
108 |
109 | if(which_agent==1):
110 | if(desired_traj_type=="right_turn"):
111 | horiz_penalty_factor= 3
112 | forward_encouragement_factor= 50
113 | heading_penalty_factor= 100
114 | if(desired_traj_type=="left_turn"):
115 | horiz_penalty_factor= 4
116 | forward_encouragement_factor= 85
117 | heading_penalty_factor= 100
118 | if(desired_traj_type=="straight"):
119 | horiz_penalty_factor= 3.5
120 | forward_encouragement_factor= 85
121 | heading_penalty_factor= 100
122 | if(desired_traj_type=="u_turn"):
123 | horiz_penalty_factor= 3
124 | forward_encouragement_factor= 50
125 | heading_penalty_factor= 100
126 |
127 | if(which_agent==2):
128 | if(desired_traj_type=="right_turn"):
129 | desired_snake_headingInit= np.pi/2.0
130 | horiz_penalty_factor= 0.1
131 | forward_encouragement_factor= 250
132 | heading_penalty_factor= 0.9
133 | if(desired_traj_type=="left_turn"):
134 | horiz_penalty_factor= 0.7
135 | forward_encouragement_factor= 200
136 | heading_penalty_factor= 0.9
137 | if(desired_traj_type=="straight"):
138 | horiz_penalty_factor= 4
139 | forward_encouragement_factor= 500
140 | heading_penalty_factor= 2
141 |
142 | if(which_agent==4):
143 | if(desired_traj_type=="backward"):
144 | horiz_penalty_factor= 0
145 | forward_encouragement_factor= 20
146 | heading_penalty_factor= 10
147 | if(desired_traj_type=="forward_backward"):
148 | horiz_penalty_factor= 0
149 | forward_encouragement_factor= 20
150 | heading_penalty_factor= 10
151 | if(desired_traj_type=="straight"):
152 | horiz_penalty_factor= 0
153 | forward_encouragement_factor= 40
154 | heading_penalty_factor= 10
155 |
156 | return horiz_penalty_factor, forward_encouragement_factor, heading_penalty_factor, desired_snake_headingInit
--------------------------------------------------------------------------------
/trpo_run_mf.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import math
4 | npr = np.random
5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
6 | import tensorflow as tf
7 | from six.moves import cPickle
8 | from collect_samples import CollectSamples
9 | from get_true_action import GetTrueAction
10 | import os
11 | import copy
12 | from helper_funcs import create_env
13 | from helper_funcs import perform_rollouts
14 | from helper_funcs import add_noise
15 | from feedforward_network import feedforward_network
16 | from helper_funcs import visualize_rendering
17 | import argparse
18 |
19 | #TRPO things
20 | from rllab.algos.trpo import TRPO
21 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
22 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
23 | from rllab.optimizers.conjugate_gradient_optimizer import FiniteDifferenceHvp
24 | from rllab.misc.instrument import run_experiment_lite
25 |
26 |
27 | def run_task(v):
28 |
29 | env, _ = create_env(v["which_agent"])
30 | policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(64, 64))
31 | baseline = LinearFeatureBaseline(env_spec=env.spec)
32 | optimizer_params = dict(base_eps=1e-5)
33 |
34 | algo = TRPO(
35 | env=env,
36 | policy=policy,
37 | baseline=baseline,
38 | batch_size=v["batch_size"],
39 | max_path_length=v["steps_per_rollout"],
40 | n_itr=v["num_trpo_iters"],
41 | discount=0.995,
42 | optimizer=v["ConjugateGradientOptimizer"](hvp_approach=v["FiniteDifferenceHvp"](**optimizer_params)),
43 | step_size=0.05,
44 | plot_true=True)
45 |
46 | #train the policy
47 | algo.train()
48 |
49 | ##########################################
50 | ##########################################
51 |
52 | #ARGUMENTS TO SPECIFY
53 | parser = argparse.ArgumentParser()
54 | parser.add_argument('--seed', type=int, default='0')
55 | parser.add_argument('--steps_per_rollout', type=int, default='1000')
56 | parser.add_argument('--save_trpo_run_num', type=int, default='1')
57 | parser.add_argument('--which_agent', type=int, default= 2)
58 | parser.add_argument('--num_workers_trpo', type=int, default=2)
59 | args = parser.parse_args()
60 |
61 | batch_size = 50000
62 |
63 | steps_per_rollout = args.steps_per_rollout
64 | num_trpo_iters = 2500
65 | if(args.which_agent==1):
66 | num_trpo_iters = 2500
67 | if(args.which_agent==2):
68 | steps_per_rollout=333
69 | num_trpo_iters = 500
70 | if(args.which_agent==4):
71 | num_trpo_iters= 2500
72 | if(args.which_agent==6):
73 | num_trpo_iters= 2000
74 |
75 | ##########################################
76 | ##########################################
77 |
78 | # set tf seed
79 | npr.seed(args.seed)
80 | tf.set_random_seed(args.seed)
81 |
82 | run_experiment_lite(run_task, plot=True, snapshot_mode="all", use_cloudpickle=True,
83 | n_parallel=str(args.num_workers_trpo),
84 | exp_name='agent_'+ str(args.which_agent)+'_seed_'+str(args.seed)+'_mf'+ '_run'+ str(args.save_trpo_run_num),
85 | variant=dict(batch_size=batch_size, which_agent=args.which_agent,
86 | steps_per_rollout=steps_per_rollout, num_trpo_iters=num_trpo_iters,
87 | FiniteDifferenceHvp=FiniteDifferenceHvp, ConjugateGradientOptimizer=ConjugateGradientOptimizer))
88 |
--------------------------------------------------------------------------------
/yaml_files/ant_forward.yaml:
--------------------------------------------------------------------------------
1 | which_agent: 1
2 | follow_trajectories: False
3 |
4 | data_collection:
5 | use_threading: True
6 | num_rollouts_train: 700
7 | num_rollouts_val: 20
8 |
9 | dyn_model:
10 | num_fc_layers: 1
11 | depth_fc_layers: 500
12 | batchsize: 512
13 | lr: 0.001
14 | nEpoch: 20
15 | fraction_use_new: 0.5
16 |
17 | controller:
18 | horizon: 5
19 | num_control_samples: 15000
20 |
21 | aggregation:
22 | num_aggregation_iters: 1
23 | num_trajectories_for_aggregation: 2
24 | rollouts_forTraining: 1
25 |
26 | noise:
27 | make_aggregated_dataset_noisy: True
28 | make_training_dataset_noisy: True
29 | noise_actions_during_MPC_rollouts: False
30 |
31 | steps:
32 | dt_steps: 1
33 | steps_per_episode: 1000
34 | steps_per_rollout_train: 1000
35 | steps_per_rollout_val: 200
36 |
37 | saving:
38 | min_rew_for_saving: 0
39 |
40 | generic:
41 | visualize_True: True
42 | visualize_False: False
--------------------------------------------------------------------------------
/yaml_files/ant_trajfollow.yaml:
--------------------------------------------------------------------------------
1 | which_agent: 1
2 | follow_trajectories: True
3 |
4 | data_collection:
5 | use_threading: True
6 | num_rollouts_train: 700
7 | num_rollouts_val: 20
8 |
9 | dyn_model:
10 | num_fc_layers: 1
11 | depth_fc_layers: 500
12 | batchsize: 512
13 | lr: 0.001
14 | nEpoch: 60
15 | fraction_use_new: 0
16 |
17 | controller:
18 | horizon: 15
19 | num_control_samples: 7000
20 |
21 | aggregation:
22 | num_aggregation_iters: 1
23 | num_trajectories_for_aggregation: 1
24 | rollouts_forTraining: 1
25 |
26 | noise:
27 | make_aggregated_dataset_noisy: True
28 | make_training_dataset_noisy: True
29 | noise_actions_during_MPC_rollouts: False
30 |
31 | steps:
32 | dt_steps: 1
33 | steps_per_episode: 1000
34 | steps_per_rollout_train: 1000
35 | steps_per_rollout_val: 1000
36 |
37 | saving:
38 | min_rew_for_saving: -1000
39 |
40 | generic:
41 | visualize_True: True
42 | visualize_False: False
--------------------------------------------------------------------------------
/yaml_files/cheetah_forward.yaml:
--------------------------------------------------------------------------------
1 | which_agent: 4
2 | follow_trajectories: False
3 |
4 | data_collection:
5 | use_threading: True
6 | num_rollouts_train: 10
7 | num_rollouts_val: 20
8 |
9 | dyn_model:
10 | num_fc_layers: 1
11 | depth_fc_layers: 500
12 | batchsize: 512
13 | lr: 0.001
14 | nEpoch: 60
15 | fraction_use_new: 0.9
16 |
17 | controller:
18 | horizon: 20
19 | num_control_samples: 1000
20 |
21 | aggregation:
22 | num_aggregation_iters: 7
23 | num_trajectories_for_aggregation: 10
24 | rollouts_forTraining: 9
25 |
26 | noise:
27 | make_aggregated_dataset_noisy: True
28 | make_training_dataset_noisy: True
29 | noise_actions_during_MPC_rollouts: True
30 |
31 | steps:
32 | dt_steps: 1
33 | steps_per_episode: 1000
34 | steps_per_rollout_train: 1000
35 | steps_per_rollout_val: 1000
36 |
37 | saving:
38 | min_rew_for_saving: 0
39 |
40 | generic:
41 | visualize_True: True
42 | visualize_False: False
--------------------------------------------------------------------------------
/yaml_files/cheetah_trajfollow.yaml:
--------------------------------------------------------------------------------
1 | which_agent: 4
2 | follow_trajectories: True
3 |
4 | data_collection:
5 | use_threading: True
6 | num_rollouts_train: 200
7 | num_rollouts_val: 20
8 |
9 | dyn_model:
10 | num_fc_layers: 1
11 | depth_fc_layers: 500
12 | batchsize: 512
13 | lr: 0.001
14 | nEpoch: 40
15 | fraction_use_new: 0
16 |
17 | controller:
18 | horizon: 10
19 | num_control_samples: 1000
20 |
21 | aggregation:
22 | num_aggregation_iters: 1
23 | num_trajectories_for_aggregation: 1
24 | rollouts_forTraining: 1
25 |
26 | noise:
27 | make_aggregated_dataset_noisy: True
28 | make_training_dataset_noisy: True
29 | noise_actions_during_MPC_rollouts: True
30 |
31 | steps:
32 | dt_steps: 1
33 | steps_per_episode: 1000
34 | steps_per_rollout_train: 1000
35 | steps_per_rollout_val: 1000
36 |
37 | saving:
38 | min_rew_for_saving: -1000
39 |
40 | generic:
41 | visualize_True: True
42 | visualize_False: False
--------------------------------------------------------------------------------
/yaml_files/hopper_forward.yaml:
--------------------------------------------------------------------------------
1 | which_agent: 6
2 | follow_trajectories: False
3 |
4 | data_collection:
5 | use_threading: True
6 | num_rollouts_train: 20
7 | num_rollouts_val: 20
8 |
9 | dyn_model:
10 | num_fc_layers: 1
11 | depth_fc_layers: 500
12 | batchsize: 512
13 | lr: 0.001
14 | nEpoch: 40
15 | fraction_use_new: 0.9
16 |
17 | controller:
18 | horizon: 40
19 | num_control_samples: 1000
20 |
21 | aggregation:
22 | num_aggregation_iters: 5
23 | num_trajectories_for_aggregation: 11
24 | rollouts_forTraining: 10
25 |
26 | noise:
27 | make_aggregated_dataset_noisy: True
28 | make_training_dataset_noisy: True
29 | noise_actions_during_MPC_rollouts: False
30 |
31 | steps:
32 | dt_steps: 1
33 | steps_per_episode: 1000
34 | steps_per_rollout_train: 200
35 | steps_per_rollout_val: 200
36 |
37 | saving:
38 | min_rew_for_saving: 0
39 |
40 | generic:
41 | visualize_True: True
42 | visualize_False: False
--------------------------------------------------------------------------------
/yaml_files/swimmer_forward.yaml:
--------------------------------------------------------------------------------
1 | which_agent: 2
2 | follow_trajectories: False
3 |
4 | data_collection:
5 | use_threading: True
6 | num_rollouts_train: 25
7 | num_rollouts_val: 20
8 |
9 | dyn_model:
10 | num_fc_layers: 1
11 | depth_fc_layers: 500
12 | batchsize: 512
13 | lr: 0.001
14 | nEpoch: 30
15 | fraction_use_new: 0.9
16 |
17 | controller:
18 | horizon: 20
19 | num_control_samples: 5000
20 |
21 | aggregation:
22 | num_aggregation_iters: 6
23 | num_trajectories_for_aggregation: 10
24 | rollouts_forTraining: 9
25 |
26 | noise:
27 | make_aggregated_dataset_noisy: True
28 | make_training_dataset_noisy: True
29 | noise_actions_during_MPC_rollouts: True
30 |
31 | steps:
32 | dt_steps: 3 #dt_steps: frameskip normally 50, but changed it to 150
33 | steps_per_episode: 333
34 | steps_per_rollout_train: 333
35 | steps_per_rollout_val: 333
36 |
37 | saving:
38 | min_rew_for_saving: 0
39 |
40 | generic:
41 | visualize_True: True
42 | visualize_False: False
--------------------------------------------------------------------------------
/yaml_files/swimmer_trajfollow.yaml:
--------------------------------------------------------------------------------
1 | which_agent: 2
2 | follow_trajectories: True
3 |
4 | data_collection:
5 | use_threading: True
6 | num_rollouts_train: 200
7 | num_rollouts_val: 20
8 |
9 | dyn_model:
10 | num_fc_layers: 1
11 | depth_fc_layers: 500
12 | batchsize: 512
13 | lr: 0.001
14 | nEpoch: 70
15 | fraction_use_new: 0
16 |
17 | controller:
18 | horizon: 5
19 | num_control_samples: 5000
20 |
21 | aggregation:
22 | num_aggregation_iters: 1
23 | num_trajectories_for_aggregation: 1
24 | rollouts_forTraining: 1
25 |
26 | noise:
27 | make_aggregated_dataset_noisy: True
28 | make_training_dataset_noisy: True
29 | noise_actions_during_MPC_rollouts: False
30 |
31 | steps:
32 | dt_steps: 3 #dt_steps: frameskip normally 50, but changed it to 150
33 | steps_per_episode: 1000
34 | steps_per_rollout_train: 500
35 | steps_per_rollout_val: 200
36 |
37 | saving:
38 | min_rew_for_saving: -1000
39 |
40 | generic:
41 | visualize_True: True
42 | visualize_False: False
--------------------------------------------------------------------------------