├── README.md
└── train_joint10Policy.py


/README.md:
--------------------------------------------------------------------------------
1 | Code for training policies based on paper Coordinated Multi-Agent Imitation Learning
2 | 
3 | Corresponding author: Hoang M. Le, California Institute of Technology
4 | The data will soon be available and can be downloaded here: https://www.stats.com/data-science/
5 | 
6 | Email me if you have questions: hmle at caltech dot edu
7 | 
8 | Note: Missing libraries to be uploaded soon
9 | 


--------------------------------------------------------------------------------
/train_joint10Policy.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | #from __future__ import division
  3 | 
  4 | import time
  5 | 
  6 | import numpy as np
  7 | from math import sqrt
  8 | import random
  9 | import sys
 10 | import subprocess
 11 | from collections import Counter
 12 | from keras.models import Sequential
 13 | from keras.layers import Dense, Activation, Dropout, TimeDistributed, BatchNormalization
 14 | from keras.layers import LSTM, GRU
 15 | from keras.optimizers import RMSprop, Adagrad, Adam, SGD
 16 | #from keras.models import load_model
 17 | import keras.backend as K
 18 | 
 19 | import matplotlib.pyplot as plt
 20 | import matplotlib.animation as animation
 21 | import matplotlib.lines as lines
 22 | import matplotlib.patches as patches
 23 | 
 24 | from multiprocessing import Pool
 25 | import multiprocessing
 26 | 
 27 | path = '/home/leh/Preprocessing/Progressive_Training/'
 28 | 
 29 | #activeRole = 'lcm'
 30 | activeRole = ['lcm','lcb', 'lb' , 'lw', 'lf', 'rcm', 'rcb', 'rb', 'rw', 'rf']
 31 | #activeRole = ['lcm','lcb']
 32 | #activeRole = ['lcb', 'lcm']
 33 | model_list = [path+'weights_progressive_Double_FullBatch_rollsteps_10_'+role+'.h5' for role in activeRole]
 34 | 
 35 | roleOrderDefense = ['gk','rb','rcb','lcb','lb','rw','rcm','lcm','lw','rf','lf']
 36 | roleOrderAttack = ['gk','rb','rcb','lcb','lb','rw','rcm','lcm','lw','rf','lf']
 37 | 
 38 | global roleOrderList
 39 | roleOrderList = [roleOrderDefense.index(role) for role in activeRole]
 40 | 
 41 | #global roleOrder
 42 | #roleOrder = roleOrderDefense.index(activeRole)
 43 | 
 44 | if 'darwin' in sys.platform:
 45 |     print('Running \'caffeinate\' on MacOSX to prevent the system from sleeping')
 46 |     subprocess.Popen('caffeinate')
 47 | 
 48 | def chunks(X, length):
 49 |     return [X[0+i:length+i] for i in range(0, X.shape[0], length)]
 50 | 
 51 | def roll_out(params):
 52 |     goalPosition = [1.0, 0]
 53 | 
 54 |     prev_feature_vector, legacy_feature_vector, pos_prediction, roleOrder = params
 55 |     prev_feature_vector = np.concatenate((prev_feature_vector[:roleOrder*13], np.zeros(3),prev_feature_vector[roleOrder*13:]   ))
 56 |     legacy_feature_vector = np.concatenate((legacy_feature_vector[:roleOrder*13], np.zeros(3),legacy_feature_vector[roleOrder*13:]   ))
 57 | 
 58 |     legacy_current = legacy_feature_vector[0:390] # 308 = 28*11
 59 |     ball_current = legacy_feature_vector[390:399] # 317 = 28*11 +9
 60 | 
 61 |     legacy = legacy_current.reshape(30,13)
 62 |     ball = ball_current[0:2]
 63 |     new_matrix = np.zeros((22,13))
 64 | 
 65 |     role_long = legacy[roleOrder]
 66 |     teammateList = range(11)
 67 |     teammateList.remove(roleOrder)
 68 | 
 69 |     # fix role vector
 70 |     mainRoleIndex = roleOrderList.index(roleOrder)
 71 |     role_long[0:3] = np.zeros(3)
 72 |     role_long[3:5] = pos_prediction[2*mainRoleIndex:(2*mainRoleIndex+2)]
 73 |     role_long[5:7] = role_long[3:5] - prev_feature_vector[roleOrder*13+3:(roleOrder*13+5)] # velocity = current pos - prev pos
 74 | 
 75 |     role = role_long[3:5]
 76 |     role_long[7] = sqrt((role[0]-goalPosition[0])**2+(role[1]-goalPosition[1])**2 )
 77 |     if role_long[7] !=0:
 78 |         role_long[8] = (role[0]-goalPosition[0]) / role_long[7]
 79 |         role_long[9] = (role[1]-goalPosition[1]) / role_long[7]
 80 |     else:
 81 |         role_long[8] = 0.0
 82 |         role_long[9] = 0.0
 83 | 
 84 |     role_long[10] = sqrt((role[0]-ball[0])**2+(role[1]-ball[1])**2 )
 85 |     if role_long[10] != 0:
 86 |         role_long[11] = (role[0]-ball[0]) / role_long[10]
 87 |         role_long[12] = (role[1]-ball[1]) / role_long[10]
 88 |     else:
 89 |         role_long[11] = 0.0
 90 |         role_long[12] = 0.0
 91 |     new_matrix[roleOrder] = role_long
 92 | 
 93 |     # fix all teammates vector
 94 |     for teammate in teammateList:
 95 |         player = legacy[teammate]
 96 |         if teammate in roleOrderList: # if the teammate is one of the active players
 97 |             teammateRoleIndex = roleOrderList.index(teammate)
 98 |             player[3:5] = pos_prediction[2*teammateRoleIndex:(2*teammateRoleIndex+2)]
 99 |             currentPos = player[3:5]
100 |             player[5:7] = currentPos - prev_feature_vector[teammate*13+3:(teammate*13+5)] # velocity = current pos - prev pos
101 | 
102 |             player[7] = sqrt((currentPos[0]-goalPosition[0])**2+(currentPos[1]-goalPosition[1])**2 )
103 |             if player[7] !=0:
104 |                 player[8] = (currentPos[0]-goalPosition[0]) / player[7]
105 |                 player[9] = (currentPos[1]-goalPosition[1]) / player[7]
106 |             else:
107 |                 player[8] = 0.0
108 |                 player[9] = 0.0
109 | 
110 |             player[10] = sqrt((currentPos[0]-ball[0])**2+(currentPos[1]-ball[1])**2 )
111 |             if player[10] != 0:
112 |                 player[11] = (currentPos[0]-ball[0]) / player[10]
113 |                 player[12] = (currentPos[1]-ball[1]) / player[10]
114 |             else:
115 |                 player[11] = 0.0
116 |                 player[12] = 0.0                    
117 |         
118 |         currentPos = player[3:5]
119 | 
120 |         player[0] = sqrt((currentPos[0]-role[0])**2+(currentPos[1]-role[1])**2 )
121 |         if player[0] != 0:
122 |             player[1] = (currentPos[0]-role[0]) / player[0]
123 |             player[2] = (currentPos[1]-role[1]) / player[0]
124 |         else:
125 |             player[1] = prev_feature_vector[teammate*13+1]
126 |             player[2] = prev_feature_vector[teammate*13+2]
127 | 
128 |         new_matrix[teammate] = player
129 | 
130 |     for opponent in range(11,22):
131 |         player = legacy[opponent]
132 |         currentPos = player[3:5]
133 |         player[0] = sqrt((currentPos[0]-role[0])**2+(currentPos[1]-role[1])**2 )
134 |         if player[0] != 0:
135 |             player[1] = (currentPos[0]-role[0]) / player[0]
136 |             player[2] = (currentPos[1]-role[1]) / player[0]
137 |         else:
138 |             player[1] = prev_feature_vector[opponent*13+1]
139 |             player[2] = prev_feature_vector[opponent*13+2]
140 |         new_matrix[opponent] = player
141 | 
142 |     teammates_distance = new_matrix[:11,0].copy()
143 |     opponents_distance = new_matrix[11:22,0].copy()
144 |     k = 4
145 |     k_nearest_teammate = teammates_distance.argsort()[0:(k+1)]
146 |     k_nearest_opponent = 11+opponents_distance.argsort()[0:k] # add 11 for offset
147 |     # remove the role itself out of the nearest teammate list
148 |     k_nearest_teammate = k_nearest_teammate[np.nonzero(k_nearest_teammate-roleOrder)]
149 |     # Now look for the closest 3 teammates and duplicate the vector
150 |     new_matrix = np.vstack((new_matrix, new_matrix[k_nearest_teammate], new_matrix[k_nearest_opponent])) ## Combine all ordered player_state with the nearest teammates and nearest opponents
151 |     
152 |     new_feature_vector = np.concatenate((new_matrix.flatten(), ball_current ))
153 | 
154 |     ## delete the 3 zeros
155 |     new_feature_vector = np.concatenate((new_feature_vector[:roleOrder*13], new_feature_vector[roleOrder*13+3:] ))
156 |     
157 |     return new_feature_vector
158 | 
159 | if __name__ == '__main__':
160 | 
161 | 
162 |     #####################################
163 |     #### LOAD THE NORMAL DATA
164 | 
165 |     data = [np.load(path+role+'_data_CurrWithVel_training.npy') for role in activeRole]
166 |     
167 |     ## holder for the raw invert data
168 |     data_invert = [np.load(path+role+'_data_CurrWithVel_training_invert.npy') for role in activeRole]
169 | 
170 |     totalTimeSteps = 50
171 | 
172 |     endOfSequenceMarker = np.nonzero(data[0][:,0])[0]
173 |     numSequence = endOfSequenceMarker.shape[0]
174 |     beginOfSequenceMarker = np.zeros(numSequence).astype(np.int64)
175 |     for index in range(numSequence-1):
176 |         beginOfSequenceMarker[index+1] = endOfSequenceMarker[index]+1
177 | 
178 |     sequenceLength = endOfSequenceMarker - beginOfSequenceMarker +1
179 | 
180 |     subsequenceLength = totalTimeSteps+1 # this will be the number of time steps later in the lstm model + 1
181 |     overlapWindow = 26
182 | 
183 |     def chunking_window(subsequenceLength, overlapWindow):
184 |         startSequence = []
185 |         endSequence = []
186 |         for index in range(numSequence):
187 |             startSubSequence = []
188 |             endSubSequence = []
189 |             start = beginOfSequenceMarker[index]
190 |             end = endOfSequenceMarker[index]
191 |             while end >= (start+subsequenceLength-1):
192 |                 endSubSequence.append(end)
193 |                 startSubSequence.append(end-subsequenceLength+1)
194 |                 end = end - overlapWindow
195 |             startSubSequence.reverse()
196 |             endSubSequence.reverse()
197 |             startSequence = startSequence + startSubSequence
198 |             endSequence = endSequence + endSubSequence
199 | 
200 |         sequenceMarker = zip(startSequence, endSequence)
201 | 
202 |         return sequenceMarker
203 | 
204 |     sequenceMarker = chunking_window(subsequenceLength, overlapWindow)
205 |     
206 |     fullColumnIndex = np.arange(401) # 401 = 2+ 30*13+9 columns in the original training data file
207 |     #excludedColumns = np.concatenate((np.array([0,1]),np.arange(2+roleOrder*13,2+roleOrder*13+3) )) # first two columns are time stamps
208 |     #retainedColumns = np.delete(fullColumnIndex, excludedColumns)
209 | 
210 |     
211 |     X_train_all = []
212 |     Y_train_all = []
213 | 
214 |     for roleIndex in range(len(activeRole)):
215 | 
216 |         X_subset_data = []
217 |         Y_subset_data = []
218 |         excludedColumns = np.concatenate((np.array([0,1]),np.arange(2+roleOrderList[roleIndex]*13,2+roleOrderList[roleIndex]*13+3) )) # first two columns are time stamps
219 |         retainedColumns = np.delete(fullColumnIndex, excludedColumns)    
220 | 
221 |         for index in sequenceMarker:
222 |             dataSegment = data[roleIndex][index[0]:index[1]+1,:].copy()
223 |             x_segment = dataSegment[:-1,retainedColumns] 
224 |             y_segment = dataSegment[1:,(2+roleOrderList[roleIndex]*13+3):(2+roleOrderList[roleIndex]*13+5) ] # the role's position located in column 3 and 4, offset by the role order* number of feature per player
225 | 
226 |             X_subset_data.append(x_segment)
227 |             Y_subset_data.append(y_segment)
228 |         
229 |         ## Load the invert data
230 |         
231 |         for index in sequenceMarker:
232 |             dataSegment = data_invert[roleIndex][index[0]:index[1]+1,:].copy()
233 |             x_segment = dataSegment[:-1,retainedColumns] 
234 |             y_segment = dataSegment[1:,(2+roleOrderList[roleIndex]*13+3):(2+roleOrderList[roleIndex]*13+5) ] # the role's position located in column 3 and 4, offset by the role order* number of feature per player
235 | 
236 |             X_subset_data.append(x_segment)
237 |             Y_subset_data.append(y_segment)
238 |         
239 |         ########################################################################
240 |         #### COMBINE THE TWO BATCHES OF DATA
241 | 
242 |         X_train = np.vstack(X_subset_data)
243 |         Y_train = np.vstack(Y_subset_data)
244 | 
245 |         X_train_all.append(X_train)
246 |         Y_train_all.append(Y_train)
247 | 
248 |     featurelen = retainedColumns.shape[0]
249 |     outputlen = 2 
250 |     numOfPrevSteps = 1 # We are only looking at the most recent character each time. 
251 |     #########################################################################
252 |     print('Formatting Data')
253 | 
254 |     print('total training frame is ', X_train.shape[0])
255 |     batches = chunks(X_train, totalTimeSteps)
256 |     batchSize = 2200
257 |     #batchSize = len(batches)
258 |     batches = batches[0:len(batches)/batchSize*batchSize]
259 |     offSet = len(batches) / batchSize
260 |     #########################################################################
261 |     ## Further processing. Clipping the maximum velocity norm here
262 | 
263 |     #velNorm = sqrt(Y[:,0]**2+Y[:,1]**2)
264 |     #velLimit = np.percentile(velNorm, 95)  ## Clip the limit of velocity to 95 percentile of all velocities, to get rid of outliers # artificially remove the limit
265 | 
266 |     #X = np.zeros([batchSize, totalTimeSteps , featurelen]) 
267 |     X_all = []
268 |     Y_all = []
269 |     for X_train in X_train_all:
270 |         X = np.zeros([offSet*batchSize, totalTimeSteps , featurelen]) 
271 |         for b in range(len(batches)):
272 |             for r in range(totalTimeSteps):
273 |                 currentFeature = X_train[r + b*totalTimeSteps]
274 |                 X[b][r][:] = currentFeature
275 |         X_all.append(X)
276 |     
277 |     for Y_train in Y_train_all:
278 |         Y = np.zeros([offSet*batchSize, totalTimeSteps , outputlen])
279 |         for b in range(len(batches)):
280 |             for r in range(totalTimeSteps):
281 |                 currentPrediction = Y_train[r + b*totalTimeSteps]
282 |                 Y[b][r][:] = currentPrediction
283 |         Y_all.append(Y)
284 |     
285 | 
286 |     X_original = [X_all[index].copy() for index in range(len(activeRole))]
287 | 
288 |     ##################################
289 |     #### Load the test data#####
290 |     #data = np.load(activeRole+'_data_CurrWithVel_test1.npy')
291 |     data = [np.load(path+role+'_data_CurrWithVel_test1.npy') for role in activeRole]
292 | 
293 |     endOfSequenceMarker = np.nonzero(data[0][:,0])[0]
294 |     numSequence = endOfSequenceMarker.shape[0]
295 |     beginOfSequenceMarker = np.zeros(numSequence).astype(np.int64)
296 |     for index in range(numSequence-1):
297 |         beginOfSequenceMarker[index+1] = endOfSequenceMarker[index]+1
298 | 
299 |     sequenceLength = endOfSequenceMarker - beginOfSequenceMarker +1
300 | 
301 |     includedSequence = np.where(sequenceLength>=50)[0] ## expect all sequences to have length of at least 50, since this is how the test set was formed
302 | 
303 |     maxlen = sequenceLength.max()
304 |     totalTimeSteps_test = maxlen
305 | 
306 |     X_test_all = []
307 |     for roleIndex in range(len(activeRole)):
308 |         X_test_data = []
309 |         Y_test_data = []
310 | 
311 |         excludedColumns = np.concatenate((np.array([0,1]),np.arange(2+roleOrderList[roleIndex]*13,2+roleOrderList[roleIndex]*13+3) )) # first two columns are time stamps
312 |         retainedColumns = np.delete(fullColumnIndex, excludedColumns)    
313 | 
314 |         for index in includedSequence:
315 |             dataSegment = data[roleIndex][beginOfSequenceMarker[index]:endOfSequenceMarker[index]+1,:].copy()
316 |             x_segment = np.zeros((maxlen, retainedColumns.shape[0]))
317 |             x_segment[:dataSegment.shape[0]] = dataSegment[:,retainedColumns] 
318 | 
319 |             X_test_data.append(x_segment)
320 | 
321 |         X_test = np.vstack(X_test_data)
322 |         X_test_all.append(X_test)
323 |     
324 |     batches_test = chunks(X_test, totalTimeSteps_test)
325 |     batchSize_test = 38
326 |     
327 |     X_test_test_all = []
328 |     for X_test in X_test_all:
329 |         X_test_test = np.zeros([batchSize_test, totalTimeSteps_test , featurelen]) 
330 |         for b in range(len(batches_test)):
331 |             for r in range(totalTimeSteps_test):
332 |                 currentFeature = X_test[r + b*totalTimeSteps_test]
333 |                 X_test_test[b][r][:] = currentFeature
334 |         X_test_test_all.append(X_test_test)
335 | 
336 |     X_original_test = [X_test_test_all[index].copy() for index in range(len(activeRole))]
337 |     
338 |     ########################################################################
339 |     #### FINISH LOADING THE DATA ####
340 |     ########################################################################
341 | 
342 | 
343 |     #############
344 | 
345 |     """
346 |     init_model = Sequential()
347 |     #init_model.add(BatchNormalization(batch_input_shape=(batchSize, numOfPrevSteps , featurelen) ))
348 |     #init_model.add(LSTM(512 , batch_input_shape=(batchSize, numOfPrevSteps , featurelen), return_sequences=True,  stateful=True))
349 |     init_model.add(LSTM(512 , batch_input_shape=(batchSize, numOfPrevSteps, featurelen), return_sequences=True,  stateful=True))
350 |     init_model.add(LSTM(512 , return_sequences=False,stateful=True))
351 |     init_model.add(Dense (2))
352 |     init_model.add(Activation('linear'))
353 |     init_model.compile(loss='mse', optimizer='rmsprop')
354 |     init_model.reset_states()
355 | 
356 |     print('starting initializing')
357 |     num_epochs = 5
358 |     for e in range(num_epochs):
359 |         print('epoch - ',e+1)
360 |         #p = sampling_prob[e]
361 | 
362 |         startTime = time.time()
363 |         training_loss = []
364 |         #loss = init_model.fit(X,Y,nb_epoch = 5, batch_size = batchSize)
365 |         
366 |         for j in range(offSet):
367 |             #for i in range(0,totalTimeSteps-1):
368 |             for i in range(0,totalTimeSteps):
369 |                 #loss = init_model.train_on_batch(X[batchSize*j:batchSize*(j+1), numOfPrevSteps*i:(i+1)*numOfPrevSteps, :], np.reshape(Y[batchSize*j:batchSize*(j+1), (i+1)*numOfPrevSteps, :], (batchSize, outputlen)) )
370 |                 loss = init_model.train_on_batch(X[batchSize*j:batchSize*(j+1), numOfPrevSteps*i:(i+1)*numOfPrevSteps, :], Y[batchSize*j:batchSize*(j+1), i, :] )
371 |                 training_loss.append(loss)
372 | 
373 |             init_model.reset_states()
374 |         print('Initializing loss: ',sum(training_loss)/len(training_loss))
375 |         
376 |         #print('Initializing loss: ',loss)
377 |         totalTime = time.time() - startTime
378 |         print('Completed epoch in ',totalTime,' seconds')
379 |         print()
380 |     print('Initializing complete')
381 | 
382 |     init_model.save_weights('init_weights_minibatch1024_10epochs.h5',overwrite = True)
383 |     
384 |     model = Sequential()
385 |     #model.add(BatchNormalization(batch_input_shape=(batchSize, numOfPrevSteps , featurelen) ) )
386 |     model.add(LSTM(512 ,batch_input_shape=(batchSize, numOfPrevSteps , featurelen), return_sequences=True,  stateful=True))
387 |     model.add(LSTM(512 , return_sequences=False,stateful=True))
388 |     model.add(Dense (2))
389 |     model.add(Activation('linear'))
390 |     adagrad = Adagrad(lr=0.005, epsilon=1e-08)
391 |     model.compile(loss='mse', optimizer='adagrad')
392 |     model.load_weights('init_weights_minibatch1024_10epochs.h5') # Load the pretrained model
393 |     model.reset_states()
394 |     """
395 |     
396 |     adagradOpt = Adagrad(lr=0.005, epsilon=1e-08)
397 | 
398 |     print('Load models...')
399 |     policy = []
400 |     #### Load the model
401 |     for model_name in model_list:
402 |         model = Sequential()
403 |         model.add(LSTM(512 ,return_sequences=True, batch_input_shape=(batchSize, numOfPrevSteps , featurelen), stateful=True))
404 |         model.add(LSTM(512 , return_sequences=False,stateful=True))
405 |         model.add(Dense (2))
406 |         model.add(Activation('linear'))
407 |         model.compile(loss='mse', optimizer='adagrad')
408 |         #model.compile(loss='mse', optimizer=adagradOpt)
409 |         model.load_weights(model_name)
410 |         #model.load_weights('init_weights_minibatch1024_10epochs.h5') # Load the pretrained model
411 |         model.reset_states()
412 |         policy.append(model)    
413 |     
414 |     val_policy = []
415 |     #### Load the model
416 |     for index in range(len(activeRole)):
417 |         val_model = Sequential()
418 |         val_model.add(LSTM(512 ,batch_input_shape=(batchSize_test, numOfPrevSteps , featurelen) ,return_sequences=True,  stateful=True))
419 |         val_model.add(LSTM(512 , return_sequences=False,stateful=True))
420 |         val_model.add(Dense (2))
421 |         val_model.add(Activation('linear'))
422 |         val_model.compile(loss='mse', optimizer='adagrad')
423 |         #val_model.compile(loss='mse', optimizer=adagradOpt)
424 |         val_model.reset_states()
425 |         val_policy.append(val_model)
426 | 
427 |     print('starting training')
428 |     rollout_horizon = [10]
429 |     num_epochs = 100
430 | 
431 |     #sampling_prob = np.linspace(0,1,num_epochs)[::-1]
432 |     lr_schedule = np.zeros(num_epochs)
433 | 
434 |     # Set up parallel processing
435 |     numProcess = multiprocessing.cpu_count()
436 |     print('Number of processes ', numProcess)
437 |     pool = Pool(processes = 16)
438 | 
439 |     for horizon in rollout_horizon:
440 |         best_loss = [10 for index in range(len(activeRole))]
441 |         bestValLoss = 10
442 |         for e in range(num_epochs):
443 |             text_file = open("Output_10Policy_double_2200batch_roll10_overlapWindow25.txt", "a")
444 |             #lr_schedule[e] = model.optimizer.lr.get_value()
445 |             #p = sampling_prob[e]
446 |             print('epoch - ',e+1)
447 |             text_file.write('epoch - %s \n' %(e+1) )
448 |             print('training joint policies - Double rollout horizon 10 - Predict then train - adagrad- batch 2200 - Overlapping window')
449 |             
450 |             #print('learning rate before training ', lr_schedule[e])
451 |             startTime = time.time()
452 |             
453 |             training_loss = [[] for index in range(len(activeRole))] # initialize empty list of list to store training loss
454 |             for j in range(offSet):
455 |                 #for i in range(0,totalTimeSteps-1):
456 |                 for i in range(0,totalTimeSteps+1-horizon,horizon):                                                                               
457 | 
458 |                     # roll out horizon times
459 |                     for k in range(horizon):
460 |                         if i+k+1<50:
461 |                             next_prediction_all = []
462 |                             ## Roll out all next step predictions and gather them into one place
463 |                             for index in range(len(activeRole)):
464 |                                 next_prediction = policy[index].predict_on_batch(X_all[index][batchSize*j:batchSize*(j+1), (i+k):(i+k+1), :])
465 |                                 next_prediction_all.append(next_prediction)
466 |                             ## and then update all the feature vector for the next step, for each active role
467 |                             next_prediction_all = np.hstack(next_prediction_all)
468 |                             for index in range(len(activeRole)):
469 |                                 prev_feature = X_all[index][batchSize*j:batchSize*(j+1),i+k,:]
470 |                                 legacy_feature = X_all[index][batchSize*j:batchSize*(j+1),i+k+1,:]
471 |                                 order = np.empty(batchSize).astype(int)
472 |                                 order.fill(roleOrderList[index])
473 |                                 params = zip(prev_feature, legacy_feature, next_prediction_all, order)
474 |                                 result = pool.map(roll_out, params)
475 |                                 #result = map(roll_out, params)
476 |                                 X_all[index][batchSize*j:batchSize*(j+1),i+k+1,:] = np.array(result)
477 |                                 
478 |                     for index in range(len(activeRole)):
479 |                         ## train the model for the horizon steps
480 |                         for k in range(horizon):
481 |                             loss = policy[index].train_on_batch(X_all[index][batchSize*j:batchSize*(j+1), (i+k):(i+k+1), :], Y_all[index][batchSize*j:batchSize*(j+1), i+k, :] ) 
482 |                             training_loss[index].append(loss)                                                                                                                     
483 | 
484 | 
485 |                 for index in range(len(activeRole)):
486 |                     policy[index].reset_states()
487 |             for index in range(len(activeRole)):
488 |                 print('training loss for role '+activeRole[index]+': ',sum(training_loss[index])/len(training_loss[index]))  
489 |                 rolledOutLoss = ((X_all[index][:,:,(roleOrderList[index]*13):(roleOrderList[index]*13+2)] - X_original[index][:,:,(roleOrderList[index]*13):(roleOrderList[index]*13+2)])**2).mean()
490 |                 print('rolled out loss for role '+ activeRole[index] +': ', rolledOutLoss)
491 |             
492 | 
493 |             
494 |             #### True roll out ####
495 |             for index in range(len(activeRole)):
496 |                 for i in range(len(policy[index].layers)):
497 |                     val_policy[index].layers[i].set_weights(policy[index].layers[i].get_weights())
498 |                 val_policy[index].reset_states()
499 | 
500 | 
501 |             for i in range(0,totalTimeSteps_test-1):
502 |                 next_prediction_all = []
503 |                 ## Roll out all next step predictions and gather them into one place
504 |                 for index in range(len(activeRole)):
505 |                     next_prediction = val_policy[index].predict_on_batch(X_test_test_all[index][0:batchSize_test, numOfPrevSteps*i:(i+1)*numOfPrevSteps, :])
506 |                     next_prediction_all.append(next_prediction)
507 |                 ## and then update all the feature vector for the next step, for each active role
508 |                 next_prediction_all = np.hstack(next_prediction_all)
509 |                 for index in range(len(activeRole)):
510 |                     prev_feature = X_test_test_all[index][0:batchSize_test,i,:]
511 |                     legacy_feature = X_test_test_all[index][0:batchSize_test,i+1,:]
512 |                     order = np.empty(batchSize_test).astype(int)
513 |                     order.fill(roleOrderList[index])
514 |                     params = zip(prev_feature, legacy_feature, next_prediction_all, order)
515 |                     result = pool.map(roll_out, params[0:len(batches_test)])
516 |                     X_test_test_all[index][0:len(batches_test),i+1,:] = np.array(result)
517 | 
518 |             #model.reset_states()
519 | 
520 |             valLoss = 0
521 |             for index in range(len(activeRole)):
522 |                 predPosition = []
523 |                 truePosition = []
524 | 
525 |                 for i in includedSequence:
526 |                     predPosition.append(X_test_test_all[index][i,:sequenceLength[i],(roleOrderList[index]*13):(roleOrderList[index]*13+2)])
527 |                     truePosition.append(X_original_test[index][i,:sequenceLength[i],(roleOrderList[index]*13):(roleOrderList[index]*13+2)])
528 |                 #rolledOutLoss = ((X_test_test_all[index][:,:,(roleOrderList[index]*13):(roleOrderList[index]*13+2)] - X_original_test[index][:,:,(roleOrderList[index]*13):(roleOrderList[index]*13+2)])**2).mean()
529 |                 rolledOutLoss = ((np.vstack(predPosition) - np.vstack(truePosition))**2).mean()
530 |                 valLoss = valLoss + rolledOutLoss
531 |                 print('True validation loss for role '+ activeRole[index]+':', rolledOutLoss)
532 |                 text_file.write('True validation loss for role %s : %s \n' %(activeRole[index], rolledOutLoss))
533 |                 if rolledOutLoss < best_loss[index]:
534 |                     best_loss[index] = rolledOutLoss
535 |                 policy[index].save_weights('weights_joint10Policy_DoubleOW25_batch2200_adagrad_rollsteps_'+str(horizon)+'_'+activeRole[index]+'_epoch'+str(e+1)+'.h5', overwrite = True)
536 |                 
537 |                 print('best validation loss so far with rollout '+str(horizon)+' for role '+ activeRole[index] + ': ', best_loss[index])
538 |                 text_file.write('best validation loss so far with rollout %s for role %s : %s \n' %(str(horizon), activeRole[index],best_loss[index] ) )
539 |             print()
540 |             print('Total validation loss this round: ', valLoss)
541 |             text_file.write('Total validation loss this round: %s \n' %(valLoss))
542 |             if valLoss < bestValLoss:
543 |                 bestValLoss = valLoss
544 |             print('best total validation loss up to this round: ', bestValLoss)
545 |             text_file.write('best total validation loss up to this round: %s \n' %bestValLoss)
546 |             
547 |             ### ENd of true roll out ####
548 | 
549 |             print()
550 | 
551 |             totalTime = time.time() - startTime
552 |             print('Completed epoch in ',totalTime,' seconds')
553 |             print()
554 |             text_file.close()
555 | 
556 |     print('training complete')


--------------------------------------------------------------------------------