├── README.md └── train_joint10Policy.py /README.md: -------------------------------------------------------------------------------- 1 | Code for training policies based on paper Coordinated Multi-Agent Imitation Learning 2 | 3 | Corresponding author: Hoang M. Le, California Institute of Technology 4 | The data will soon be available and can be downloaded here: https://www.stats.com/data-science/ 5 | 6 | Email me if you have questions: hmle at caltech dot edu 7 | 8 | Note: Missing libraries to be uploaded soon 9 | -------------------------------------------------------------------------------- /train_joint10Policy.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | #from __future__ import division 3 | 4 | import time 5 | 6 | import numpy as np 7 | from math import sqrt 8 | import random 9 | import sys 10 | import subprocess 11 | from collections import Counter 12 | from keras.models import Sequential 13 | from keras.layers import Dense, Activation, Dropout, TimeDistributed, BatchNormalization 14 | from keras.layers import LSTM, GRU 15 | from keras.optimizers import RMSprop, Adagrad, Adam, SGD 16 | #from keras.models import load_model 17 | import keras.backend as K 18 | 19 | import matplotlib.pyplot as plt 20 | import matplotlib.animation as animation 21 | import matplotlib.lines as lines 22 | import matplotlib.patches as patches 23 | 24 | from multiprocessing import Pool 25 | import multiprocessing 26 | 27 | path = '/home/leh/Preprocessing/Progressive_Training/' 28 | 29 | #activeRole = 'lcm' 30 | activeRole = ['lcm','lcb', 'lb' , 'lw', 'lf', 'rcm', 'rcb', 'rb', 'rw', 'rf'] 31 | #activeRole = ['lcm','lcb'] 32 | #activeRole = ['lcb', 'lcm'] 33 | model_list = [path+'weights_progressive_Double_FullBatch_rollsteps_10_'+role+'.h5' for role in activeRole] 34 | 35 | roleOrderDefense = ['gk','rb','rcb','lcb','lb','rw','rcm','lcm','lw','rf','lf'] 36 | roleOrderAttack = ['gk','rb','rcb','lcb','lb','rw','rcm','lcm','lw','rf','lf'] 37 | 38 | global roleOrderList 39 | roleOrderList = [roleOrderDefense.index(role) for role in activeRole] 40 | 41 | #global roleOrder 42 | #roleOrder = roleOrderDefense.index(activeRole) 43 | 44 | if 'darwin' in sys.platform: 45 | print('Running \'caffeinate\' on MacOSX to prevent the system from sleeping') 46 | subprocess.Popen('caffeinate') 47 | 48 | def chunks(X, length): 49 | return [X[0+i:length+i] for i in range(0, X.shape[0], length)] 50 | 51 | def roll_out(params): 52 | goalPosition = [1.0, 0] 53 | 54 | prev_feature_vector, legacy_feature_vector, pos_prediction, roleOrder = params 55 | prev_feature_vector = np.concatenate((prev_feature_vector[:roleOrder*13], np.zeros(3),prev_feature_vector[roleOrder*13:] )) 56 | legacy_feature_vector = np.concatenate((legacy_feature_vector[:roleOrder*13], np.zeros(3),legacy_feature_vector[roleOrder*13:] )) 57 | 58 | legacy_current = legacy_feature_vector[0:390] # 308 = 28*11 59 | ball_current = legacy_feature_vector[390:399] # 317 = 28*11 +9 60 | 61 | legacy = legacy_current.reshape(30,13) 62 | ball = ball_current[0:2] 63 | new_matrix = np.zeros((22,13)) 64 | 65 | role_long = legacy[roleOrder] 66 | teammateList = range(11) 67 | teammateList.remove(roleOrder) 68 | 69 | # fix role vector 70 | mainRoleIndex = roleOrderList.index(roleOrder) 71 | role_long[0:3] = np.zeros(3) 72 | role_long[3:5] = pos_prediction[2*mainRoleIndex:(2*mainRoleIndex+2)] 73 | role_long[5:7] = role_long[3:5] - prev_feature_vector[roleOrder*13+3:(roleOrder*13+5)] # velocity = current pos - prev pos 74 | 75 | role = role_long[3:5] 76 | role_long[7] = sqrt((role[0]-goalPosition[0])**2+(role[1]-goalPosition[1])**2 ) 77 | if role_long[7] !=0: 78 | role_long[8] = (role[0]-goalPosition[0]) / role_long[7] 79 | role_long[9] = (role[1]-goalPosition[1]) / role_long[7] 80 | else: 81 | role_long[8] = 0.0 82 | role_long[9] = 0.0 83 | 84 | role_long[10] = sqrt((role[0]-ball[0])**2+(role[1]-ball[1])**2 ) 85 | if role_long[10] != 0: 86 | role_long[11] = (role[0]-ball[0]) / role_long[10] 87 | role_long[12] = (role[1]-ball[1]) / role_long[10] 88 | else: 89 | role_long[11] = 0.0 90 | role_long[12] = 0.0 91 | new_matrix[roleOrder] = role_long 92 | 93 | # fix all teammates vector 94 | for teammate in teammateList: 95 | player = legacy[teammate] 96 | if teammate in roleOrderList: # if the teammate is one of the active players 97 | teammateRoleIndex = roleOrderList.index(teammate) 98 | player[3:5] = pos_prediction[2*teammateRoleIndex:(2*teammateRoleIndex+2)] 99 | currentPos = player[3:5] 100 | player[5:7] = currentPos - prev_feature_vector[teammate*13+3:(teammate*13+5)] # velocity = current pos - prev pos 101 | 102 | player[7] = sqrt((currentPos[0]-goalPosition[0])**2+(currentPos[1]-goalPosition[1])**2 ) 103 | if player[7] !=0: 104 | player[8] = (currentPos[0]-goalPosition[0]) / player[7] 105 | player[9] = (currentPos[1]-goalPosition[1]) / player[7] 106 | else: 107 | player[8] = 0.0 108 | player[9] = 0.0 109 | 110 | player[10] = sqrt((currentPos[0]-ball[0])**2+(currentPos[1]-ball[1])**2 ) 111 | if player[10] != 0: 112 | player[11] = (currentPos[0]-ball[0]) / player[10] 113 | player[12] = (currentPos[1]-ball[1]) / player[10] 114 | else: 115 | player[11] = 0.0 116 | player[12] = 0.0 117 | 118 | currentPos = player[3:5] 119 | 120 | player[0] = sqrt((currentPos[0]-role[0])**2+(currentPos[1]-role[1])**2 ) 121 | if player[0] != 0: 122 | player[1] = (currentPos[0]-role[0]) / player[0] 123 | player[2] = (currentPos[1]-role[1]) / player[0] 124 | else: 125 | player[1] = prev_feature_vector[teammate*13+1] 126 | player[2] = prev_feature_vector[teammate*13+2] 127 | 128 | new_matrix[teammate] = player 129 | 130 | for opponent in range(11,22): 131 | player = legacy[opponent] 132 | currentPos = player[3:5] 133 | player[0] = sqrt((currentPos[0]-role[0])**2+(currentPos[1]-role[1])**2 ) 134 | if player[0] != 0: 135 | player[1] = (currentPos[0]-role[0]) / player[0] 136 | player[2] = (currentPos[1]-role[1]) / player[0] 137 | else: 138 | player[1] = prev_feature_vector[opponent*13+1] 139 | player[2] = prev_feature_vector[opponent*13+2] 140 | new_matrix[opponent] = player 141 | 142 | teammates_distance = new_matrix[:11,0].copy() 143 | opponents_distance = new_matrix[11:22,0].copy() 144 | k = 4 145 | k_nearest_teammate = teammates_distance.argsort()[0:(k+1)] 146 | k_nearest_opponent = 11+opponents_distance.argsort()[0:k] # add 11 for offset 147 | # remove the role itself out of the nearest teammate list 148 | k_nearest_teammate = k_nearest_teammate[np.nonzero(k_nearest_teammate-roleOrder)] 149 | # Now look for the closest 3 teammates and duplicate the vector 150 | new_matrix = np.vstack((new_matrix, new_matrix[k_nearest_teammate], new_matrix[k_nearest_opponent])) ## Combine all ordered player_state with the nearest teammates and nearest opponents 151 | 152 | new_feature_vector = np.concatenate((new_matrix.flatten(), ball_current )) 153 | 154 | ## delete the 3 zeros 155 | new_feature_vector = np.concatenate((new_feature_vector[:roleOrder*13], new_feature_vector[roleOrder*13+3:] )) 156 | 157 | return new_feature_vector 158 | 159 | if __name__ == '__main__': 160 | 161 | 162 | ##################################### 163 | #### LOAD THE NORMAL DATA 164 | 165 | data = [np.load(path+role+'_data_CurrWithVel_training.npy') for role in activeRole] 166 | 167 | ## holder for the raw invert data 168 | data_invert = [np.load(path+role+'_data_CurrWithVel_training_invert.npy') for role in activeRole] 169 | 170 | totalTimeSteps = 50 171 | 172 | endOfSequenceMarker = np.nonzero(data[0][:,0])[0] 173 | numSequence = endOfSequenceMarker.shape[0] 174 | beginOfSequenceMarker = np.zeros(numSequence).astype(np.int64) 175 | for index in range(numSequence-1): 176 | beginOfSequenceMarker[index+1] = endOfSequenceMarker[index]+1 177 | 178 | sequenceLength = endOfSequenceMarker - beginOfSequenceMarker +1 179 | 180 | subsequenceLength = totalTimeSteps+1 # this will be the number of time steps later in the lstm model + 1 181 | overlapWindow = 26 182 | 183 | def chunking_window(subsequenceLength, overlapWindow): 184 | startSequence = [] 185 | endSequence = [] 186 | for index in range(numSequence): 187 | startSubSequence = [] 188 | endSubSequence = [] 189 | start = beginOfSequenceMarker[index] 190 | end = endOfSequenceMarker[index] 191 | while end >= (start+subsequenceLength-1): 192 | endSubSequence.append(end) 193 | startSubSequence.append(end-subsequenceLength+1) 194 | end = end - overlapWindow 195 | startSubSequence.reverse() 196 | endSubSequence.reverse() 197 | startSequence = startSequence + startSubSequence 198 | endSequence = endSequence + endSubSequence 199 | 200 | sequenceMarker = zip(startSequence, endSequence) 201 | 202 | return sequenceMarker 203 | 204 | sequenceMarker = chunking_window(subsequenceLength, overlapWindow) 205 | 206 | fullColumnIndex = np.arange(401) # 401 = 2+ 30*13+9 columns in the original training data file 207 | #excludedColumns = np.concatenate((np.array([0,1]),np.arange(2+roleOrder*13,2+roleOrder*13+3) )) # first two columns are time stamps 208 | #retainedColumns = np.delete(fullColumnIndex, excludedColumns) 209 | 210 | 211 | X_train_all = [] 212 | Y_train_all = [] 213 | 214 | for roleIndex in range(len(activeRole)): 215 | 216 | X_subset_data = [] 217 | Y_subset_data = [] 218 | excludedColumns = np.concatenate((np.array([0,1]),np.arange(2+roleOrderList[roleIndex]*13,2+roleOrderList[roleIndex]*13+3) )) # first two columns are time stamps 219 | retainedColumns = np.delete(fullColumnIndex, excludedColumns) 220 | 221 | for index in sequenceMarker: 222 | dataSegment = data[roleIndex][index[0]:index[1]+1,:].copy() 223 | x_segment = dataSegment[:-1,retainedColumns] 224 | y_segment = dataSegment[1:,(2+roleOrderList[roleIndex]*13+3):(2+roleOrderList[roleIndex]*13+5) ] # the role's position located in column 3 and 4, offset by the role order* number of feature per player 225 | 226 | X_subset_data.append(x_segment) 227 | Y_subset_data.append(y_segment) 228 | 229 | ## Load the invert data 230 | 231 | for index in sequenceMarker: 232 | dataSegment = data_invert[roleIndex][index[0]:index[1]+1,:].copy() 233 | x_segment = dataSegment[:-1,retainedColumns] 234 | y_segment = dataSegment[1:,(2+roleOrderList[roleIndex]*13+3):(2+roleOrderList[roleIndex]*13+5) ] # the role's position located in column 3 and 4, offset by the role order* number of feature per player 235 | 236 | X_subset_data.append(x_segment) 237 | Y_subset_data.append(y_segment) 238 | 239 | ######################################################################## 240 | #### COMBINE THE TWO BATCHES OF DATA 241 | 242 | X_train = np.vstack(X_subset_data) 243 | Y_train = np.vstack(Y_subset_data) 244 | 245 | X_train_all.append(X_train) 246 | Y_train_all.append(Y_train) 247 | 248 | featurelen = retainedColumns.shape[0] 249 | outputlen = 2 250 | numOfPrevSteps = 1 # We are only looking at the most recent character each time. 251 | ######################################################################### 252 | print('Formatting Data') 253 | 254 | print('total training frame is ', X_train.shape[0]) 255 | batches = chunks(X_train, totalTimeSteps) 256 | batchSize = 2200 257 | #batchSize = len(batches) 258 | batches = batches[0:len(batches)/batchSize*batchSize] 259 | offSet = len(batches) / batchSize 260 | ######################################################################### 261 | ## Further processing. Clipping the maximum velocity norm here 262 | 263 | #velNorm = sqrt(Y[:,0]**2+Y[:,1]**2) 264 | #velLimit = np.percentile(velNorm, 95) ## Clip the limit of velocity to 95 percentile of all velocities, to get rid of outliers # artificially remove the limit 265 | 266 | #X = np.zeros([batchSize, totalTimeSteps , featurelen]) 267 | X_all = [] 268 | Y_all = [] 269 | for X_train in X_train_all: 270 | X = np.zeros([offSet*batchSize, totalTimeSteps , featurelen]) 271 | for b in range(len(batches)): 272 | for r in range(totalTimeSteps): 273 | currentFeature = X_train[r + b*totalTimeSteps] 274 | X[b][r][:] = currentFeature 275 | X_all.append(X) 276 | 277 | for Y_train in Y_train_all: 278 | Y = np.zeros([offSet*batchSize, totalTimeSteps , outputlen]) 279 | for b in range(len(batches)): 280 | for r in range(totalTimeSteps): 281 | currentPrediction = Y_train[r + b*totalTimeSteps] 282 | Y[b][r][:] = currentPrediction 283 | Y_all.append(Y) 284 | 285 | 286 | X_original = [X_all[index].copy() for index in range(len(activeRole))] 287 | 288 | ################################## 289 | #### Load the test data##### 290 | #data = np.load(activeRole+'_data_CurrWithVel_test1.npy') 291 | data = [np.load(path+role+'_data_CurrWithVel_test1.npy') for role in activeRole] 292 | 293 | endOfSequenceMarker = np.nonzero(data[0][:,0])[0] 294 | numSequence = endOfSequenceMarker.shape[0] 295 | beginOfSequenceMarker = np.zeros(numSequence).astype(np.int64) 296 | for index in range(numSequence-1): 297 | beginOfSequenceMarker[index+1] = endOfSequenceMarker[index]+1 298 | 299 | sequenceLength = endOfSequenceMarker - beginOfSequenceMarker +1 300 | 301 | includedSequence = np.where(sequenceLength>=50)[0] ## expect all sequences to have length of at least 50, since this is how the test set was formed 302 | 303 | maxlen = sequenceLength.max() 304 | totalTimeSteps_test = maxlen 305 | 306 | X_test_all = [] 307 | for roleIndex in range(len(activeRole)): 308 | X_test_data = [] 309 | Y_test_data = [] 310 | 311 | excludedColumns = np.concatenate((np.array([0,1]),np.arange(2+roleOrderList[roleIndex]*13,2+roleOrderList[roleIndex]*13+3) )) # first two columns are time stamps 312 | retainedColumns = np.delete(fullColumnIndex, excludedColumns) 313 | 314 | for index in includedSequence: 315 | dataSegment = data[roleIndex][beginOfSequenceMarker[index]:endOfSequenceMarker[index]+1,:].copy() 316 | x_segment = np.zeros((maxlen, retainedColumns.shape[0])) 317 | x_segment[:dataSegment.shape[0]] = dataSegment[:,retainedColumns] 318 | 319 | X_test_data.append(x_segment) 320 | 321 | X_test = np.vstack(X_test_data) 322 | X_test_all.append(X_test) 323 | 324 | batches_test = chunks(X_test, totalTimeSteps_test) 325 | batchSize_test = 38 326 | 327 | X_test_test_all = [] 328 | for X_test in X_test_all: 329 | X_test_test = np.zeros([batchSize_test, totalTimeSteps_test , featurelen]) 330 | for b in range(len(batches_test)): 331 | for r in range(totalTimeSteps_test): 332 | currentFeature = X_test[r + b*totalTimeSteps_test] 333 | X_test_test[b][r][:] = currentFeature 334 | X_test_test_all.append(X_test_test) 335 | 336 | X_original_test = [X_test_test_all[index].copy() for index in range(len(activeRole))] 337 | 338 | ######################################################################## 339 | #### FINISH LOADING THE DATA #### 340 | ######################################################################## 341 | 342 | 343 | ############# 344 | 345 | """ 346 | init_model = Sequential() 347 | #init_model.add(BatchNormalization(batch_input_shape=(batchSize, numOfPrevSteps , featurelen) )) 348 | #init_model.add(LSTM(512 , batch_input_shape=(batchSize, numOfPrevSteps , featurelen), return_sequences=True, stateful=True)) 349 | init_model.add(LSTM(512 , batch_input_shape=(batchSize, numOfPrevSteps, featurelen), return_sequences=True, stateful=True)) 350 | init_model.add(LSTM(512 , return_sequences=False,stateful=True)) 351 | init_model.add(Dense (2)) 352 | init_model.add(Activation('linear')) 353 | init_model.compile(loss='mse', optimizer='rmsprop') 354 | init_model.reset_states() 355 | 356 | print('starting initializing') 357 | num_epochs = 5 358 | for e in range(num_epochs): 359 | print('epoch - ',e+1) 360 | #p = sampling_prob[e] 361 | 362 | startTime = time.time() 363 | training_loss = [] 364 | #loss = init_model.fit(X,Y,nb_epoch = 5, batch_size = batchSize) 365 | 366 | for j in range(offSet): 367 | #for i in range(0,totalTimeSteps-1): 368 | for i in range(0,totalTimeSteps): 369 | #loss = init_model.train_on_batch(X[batchSize*j:batchSize*(j+1), numOfPrevSteps*i:(i+1)*numOfPrevSteps, :], np.reshape(Y[batchSize*j:batchSize*(j+1), (i+1)*numOfPrevSteps, :], (batchSize, outputlen)) ) 370 | loss = init_model.train_on_batch(X[batchSize*j:batchSize*(j+1), numOfPrevSteps*i:(i+1)*numOfPrevSteps, :], Y[batchSize*j:batchSize*(j+1), i, :] ) 371 | training_loss.append(loss) 372 | 373 | init_model.reset_states() 374 | print('Initializing loss: ',sum(training_loss)/len(training_loss)) 375 | 376 | #print('Initializing loss: ',loss) 377 | totalTime = time.time() - startTime 378 | print('Completed epoch in ',totalTime,' seconds') 379 | print() 380 | print('Initializing complete') 381 | 382 | init_model.save_weights('init_weights_minibatch1024_10epochs.h5',overwrite = True) 383 | 384 | model = Sequential() 385 | #model.add(BatchNormalization(batch_input_shape=(batchSize, numOfPrevSteps , featurelen) ) ) 386 | model.add(LSTM(512 ,batch_input_shape=(batchSize, numOfPrevSteps , featurelen), return_sequences=True, stateful=True)) 387 | model.add(LSTM(512 , return_sequences=False,stateful=True)) 388 | model.add(Dense (2)) 389 | model.add(Activation('linear')) 390 | adagrad = Adagrad(lr=0.005, epsilon=1e-08) 391 | model.compile(loss='mse', optimizer='adagrad') 392 | model.load_weights('init_weights_minibatch1024_10epochs.h5') # Load the pretrained model 393 | model.reset_states() 394 | """ 395 | 396 | adagradOpt = Adagrad(lr=0.005, epsilon=1e-08) 397 | 398 | print('Load models...') 399 | policy = [] 400 | #### Load the model 401 | for model_name in model_list: 402 | model = Sequential() 403 | model.add(LSTM(512 ,return_sequences=True, batch_input_shape=(batchSize, numOfPrevSteps , featurelen), stateful=True)) 404 | model.add(LSTM(512 , return_sequences=False,stateful=True)) 405 | model.add(Dense (2)) 406 | model.add(Activation('linear')) 407 | model.compile(loss='mse', optimizer='adagrad') 408 | #model.compile(loss='mse', optimizer=adagradOpt) 409 | model.load_weights(model_name) 410 | #model.load_weights('init_weights_minibatch1024_10epochs.h5') # Load the pretrained model 411 | model.reset_states() 412 | policy.append(model) 413 | 414 | val_policy = [] 415 | #### Load the model 416 | for index in range(len(activeRole)): 417 | val_model = Sequential() 418 | val_model.add(LSTM(512 ,batch_input_shape=(batchSize_test, numOfPrevSteps , featurelen) ,return_sequences=True, stateful=True)) 419 | val_model.add(LSTM(512 , return_sequences=False,stateful=True)) 420 | val_model.add(Dense (2)) 421 | val_model.add(Activation('linear')) 422 | val_model.compile(loss='mse', optimizer='adagrad') 423 | #val_model.compile(loss='mse', optimizer=adagradOpt) 424 | val_model.reset_states() 425 | val_policy.append(val_model) 426 | 427 | print('starting training') 428 | rollout_horizon = [10] 429 | num_epochs = 100 430 | 431 | #sampling_prob = np.linspace(0,1,num_epochs)[::-1] 432 | lr_schedule = np.zeros(num_epochs) 433 | 434 | # Set up parallel processing 435 | numProcess = multiprocessing.cpu_count() 436 | print('Number of processes ', numProcess) 437 | pool = Pool(processes = 16) 438 | 439 | for horizon in rollout_horizon: 440 | best_loss = [10 for index in range(len(activeRole))] 441 | bestValLoss = 10 442 | for e in range(num_epochs): 443 | text_file = open("Output_10Policy_double_2200batch_roll10_overlapWindow25.txt", "a") 444 | #lr_schedule[e] = model.optimizer.lr.get_value() 445 | #p = sampling_prob[e] 446 | print('epoch - ',e+1) 447 | text_file.write('epoch - %s \n' %(e+1) ) 448 | print('training joint policies - Double rollout horizon 10 - Predict then train - adagrad- batch 2200 - Overlapping window') 449 | 450 | #print('learning rate before training ', lr_schedule[e]) 451 | startTime = time.time() 452 | 453 | training_loss = [[] for index in range(len(activeRole))] # initialize empty list of list to store training loss 454 | for j in range(offSet): 455 | #for i in range(0,totalTimeSteps-1): 456 | for i in range(0,totalTimeSteps+1-horizon,horizon): 457 | 458 | # roll out horizon times 459 | for k in range(horizon): 460 | if i+k+1<50: 461 | next_prediction_all = [] 462 | ## Roll out all next step predictions and gather them into one place 463 | for index in range(len(activeRole)): 464 | next_prediction = policy[index].predict_on_batch(X_all[index][batchSize*j:batchSize*(j+1), (i+k):(i+k+1), :]) 465 | next_prediction_all.append(next_prediction) 466 | ## and then update all the feature vector for the next step, for each active role 467 | next_prediction_all = np.hstack(next_prediction_all) 468 | for index in range(len(activeRole)): 469 | prev_feature = X_all[index][batchSize*j:batchSize*(j+1),i+k,:] 470 | legacy_feature = X_all[index][batchSize*j:batchSize*(j+1),i+k+1,:] 471 | order = np.empty(batchSize).astype(int) 472 | order.fill(roleOrderList[index]) 473 | params = zip(prev_feature, legacy_feature, next_prediction_all, order) 474 | result = pool.map(roll_out, params) 475 | #result = map(roll_out, params) 476 | X_all[index][batchSize*j:batchSize*(j+1),i+k+1,:] = np.array(result) 477 | 478 | for index in range(len(activeRole)): 479 | ## train the model for the horizon steps 480 | for k in range(horizon): 481 | loss = policy[index].train_on_batch(X_all[index][batchSize*j:batchSize*(j+1), (i+k):(i+k+1), :], Y_all[index][batchSize*j:batchSize*(j+1), i+k, :] ) 482 | training_loss[index].append(loss) 483 | 484 | 485 | for index in range(len(activeRole)): 486 | policy[index].reset_states() 487 | for index in range(len(activeRole)): 488 | print('training loss for role '+activeRole[index]+': ',sum(training_loss[index])/len(training_loss[index])) 489 | rolledOutLoss = ((X_all[index][:,:,(roleOrderList[index]*13):(roleOrderList[index]*13+2)] - X_original[index][:,:,(roleOrderList[index]*13):(roleOrderList[index]*13+2)])**2).mean() 490 | print('rolled out loss for role '+ activeRole[index] +': ', rolledOutLoss) 491 | 492 | 493 | 494 | #### True roll out #### 495 | for index in range(len(activeRole)): 496 | for i in range(len(policy[index].layers)): 497 | val_policy[index].layers[i].set_weights(policy[index].layers[i].get_weights()) 498 | val_policy[index].reset_states() 499 | 500 | 501 | for i in range(0,totalTimeSteps_test-1): 502 | next_prediction_all = [] 503 | ## Roll out all next step predictions and gather them into one place 504 | for index in range(len(activeRole)): 505 | next_prediction = val_policy[index].predict_on_batch(X_test_test_all[index][0:batchSize_test, numOfPrevSteps*i:(i+1)*numOfPrevSteps, :]) 506 | next_prediction_all.append(next_prediction) 507 | ## and then update all the feature vector for the next step, for each active role 508 | next_prediction_all = np.hstack(next_prediction_all) 509 | for index in range(len(activeRole)): 510 | prev_feature = X_test_test_all[index][0:batchSize_test,i,:] 511 | legacy_feature = X_test_test_all[index][0:batchSize_test,i+1,:] 512 | order = np.empty(batchSize_test).astype(int) 513 | order.fill(roleOrderList[index]) 514 | params = zip(prev_feature, legacy_feature, next_prediction_all, order) 515 | result = pool.map(roll_out, params[0:len(batches_test)]) 516 | X_test_test_all[index][0:len(batches_test),i+1,:] = np.array(result) 517 | 518 | #model.reset_states() 519 | 520 | valLoss = 0 521 | for index in range(len(activeRole)): 522 | predPosition = [] 523 | truePosition = [] 524 | 525 | for i in includedSequence: 526 | predPosition.append(X_test_test_all[index][i,:sequenceLength[i],(roleOrderList[index]*13):(roleOrderList[index]*13+2)]) 527 | truePosition.append(X_original_test[index][i,:sequenceLength[i],(roleOrderList[index]*13):(roleOrderList[index]*13+2)]) 528 | #rolledOutLoss = ((X_test_test_all[index][:,:,(roleOrderList[index]*13):(roleOrderList[index]*13+2)] - X_original_test[index][:,:,(roleOrderList[index]*13):(roleOrderList[index]*13+2)])**2).mean() 529 | rolledOutLoss = ((np.vstack(predPosition) - np.vstack(truePosition))**2).mean() 530 | valLoss = valLoss + rolledOutLoss 531 | print('True validation loss for role '+ activeRole[index]+':', rolledOutLoss) 532 | text_file.write('True validation loss for role %s : %s \n' %(activeRole[index], rolledOutLoss)) 533 | if rolledOutLoss < best_loss[index]: 534 | best_loss[index] = rolledOutLoss 535 | policy[index].save_weights('weights_joint10Policy_DoubleOW25_batch2200_adagrad_rollsteps_'+str(horizon)+'_'+activeRole[index]+'_epoch'+str(e+1)+'.h5', overwrite = True) 536 | 537 | print('best validation loss so far with rollout '+str(horizon)+' for role '+ activeRole[index] + ': ', best_loss[index]) 538 | text_file.write('best validation loss so far with rollout %s for role %s : %s \n' %(str(horizon), activeRole[index],best_loss[index] ) ) 539 | print() 540 | print('Total validation loss this round: ', valLoss) 541 | text_file.write('Total validation loss this round: %s \n' %(valLoss)) 542 | if valLoss < bestValLoss: 543 | bestValLoss = valLoss 544 | print('best total validation loss up to this round: ', bestValLoss) 545 | text_file.write('best total validation loss up to this round: %s \n' %bestValLoss) 546 | 547 | ### ENd of true roll out #### 548 | 549 | print() 550 | 551 | totalTime = time.time() - startTime 552 | print('Completed epoch in ',totalTime,' seconds') 553 | print() 554 | text_file.close() 555 | 556 | print('training complete') --------------------------------------------------------------------------------