├── .gitignore ├── .idea └── vcs.xml ├── CNN.py ├── FC.py ├── FCN.py ├── LICENSE.md ├── MDPs ├── @Gridworld_Graph8 │ ├── GetOptTraj.m │ ├── Gridworld_Graph8.m │ ├── OptimalActionsOnPath.m │ └── SampleGraphTraj.m ├── Finite_MDP_class.m └── MDP_class.m ├── NN_run_training.py ├── NNobj.py ├── NNpredict.m ├── ObstacleGenerators └── @obstacle_gen │ └── obstacle_gen.m ├── README.md ├── addpaths.m ├── data ├── gridworld_16.mat ├── gridworld_16_test.mat ├── gridworld_28.mat ├── gridworld_28_test.mat ├── gridworld_8.mat └── gridworld_8_test.mat ├── extract_action.m ├── script_make_data.m ├── script_viz_policy.m ├── scripts ├── make_data_gridworld_nips.m ├── nips_gridworld_experiments_CNN.sh ├── nips_gridworld_experiments_FCN.sh ├── nips_gridworld_experiments_VIN.sh ├── nips_gridworld_experiments_VIN_untied.sh └── nips_gridworld_experiments_VIN_untied_data_fraction.sh ├── test_network.m ├── theano_utils.py ├── util ├── SP.m ├── python_ndarray_to_matrix.m ├── rand_choose.m └── set_var.m ├── vin.py └── vin_untied.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.sh~ 3 | .idea/* 4 | position_paper/* 5 | data/mmpCode/* 6 | matlab_bgl/* 7 | misc/* 8 | *.png 9 | *.mat 10 | *.py~ 11 | figures/* 12 | .gitignore~ 13 | obsolete/* 14 | icml16results/* 15 | nips16results/* 16 | *.sh# 17 | 18 | 19 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /CNN.py: -------------------------------------------------------------------------------- 1 | # VI network using THEANO, takes batches of state input 2 | from NNobj import * 3 | from theano_utils import * 4 | 5 | 6 | class cnn(NNobj): 7 | "Class for a convolutional neural network, inthe style of LeNet/Alexnet" 8 | def __init__(self, model="CNN", im_size=[28, 28], dropout=False, devtype="cpu", grad_check=False, reg=0, 9 | batchsize=128): 10 | self.im_size = im_size # input image size 11 | self.model = model 12 | self.reg = reg # regularization (currently not implemented) 13 | self.batchsize = batchsize # batch size for training 14 | np.random.seed(0) 15 | print(model) 16 | # theano.config.blas.ldflags = "-L/usr/local/lib -lopenblas" 17 | 18 | # X input : l=3 stacked images: obstacle map, goal map, current state map 19 | self.X = T.ftensor4(name="X") 20 | self.y = T.bvector("y") # output action 21 | 22 | l = 3 23 | filter_sizes = [[50, 3, 3], 24 | [50, 3, 3], 25 | [100, 3, 3], 26 | [100, 3, 3], 27 | [100, 3, 3]] 28 | poolings = [2, 1, 2, 1, 1] 29 | 30 | self.cnn_net = CNN(in_x=self.X, in_x_channels=l, imsize=self.im_size, 31 | batchsize=self.batchsize, filter_sizes=filter_sizes, 32 | poolings=poolings) 33 | self.p_of_y = self.cnn_net.output 34 | self.params = self.cnn_net.params 35 | # Total 1910 parameters 36 | 37 | self.cost = -T.mean(T.log(self.p_of_y)[T.arange(self.y.shape[0]), 38 | self.y], dtype=theano.config.floatX) 39 | self.y_pred = T.argmax(self.p_of_y, axis=1) 40 | self.err = T.mean(T.neq(self.y_pred, self.y.flatten()), dtype=theano.config.floatX) 41 | 42 | self.computeloss = theano.function(inputs=[self.X, self.y], 43 | outputs=[self.err, self.cost]) 44 | self.y_out = theano.function(inputs=[self.X], outputs=[self.y_pred]) 45 | self.updates = [] 46 | self.train = [] 47 | 48 | def run_training(self, input, stepsize=0.01, epochs=10, output='None', batch_size=128, grad_check=True, 49 | profile=False, data_fraction=1): 50 | # run training from input matlab data file, and save test data prediction in output file 51 | # load data from Matlab file, including 52 | # im_data: flattened images 53 | # value_data: flattened reward image 54 | # state_data: flattened state images 55 | # label_data: one-hot vector for action (state difference) 56 | matlab_data = sio.loadmat(input) 57 | im_data = matlab_data["im_data"] 58 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0 59 | value_data = matlab_data["value_data"] 60 | state1_data = matlab_data["state_x_data"] 61 | state2_data = matlab_data["state_y_data"] 62 | label_data = matlab_data["label_data"] 63 | y_data = label_data.astype('int8') 64 | x_im_data = im_data.astype(theano.config.floatX) 65 | x_im_data = x_im_data.reshape(-1, 1, self.im_size[0], self.im_size[1]) 66 | x_val_data = value_data.astype(theano.config.floatX) 67 | x_val_data = x_val_data.reshape(-1, 1, self.im_size[0], self.im_size[1]) 68 | x_state_data = np.zeros_like(x_im_data) 69 | for i in x_state_data.shape[0]: 70 | pos1 = state1_data[i] 71 | pos2 = state2_data[i] 72 | x_state_data[i, 0, pos1, pos2] = 1 73 | x_data = np.append(x_im_data, x_val_data, axis=1) 74 | x_data = np.append(x_data, x_state_data, axis=1) 75 | 76 | all_training_samples = int(6/7.0*x_data.shape[0]) 77 | training_samples = int(data_fraction * all_training_samples) 78 | x_train = x_data[0:training_samples] 79 | y_train = y_data[0:training_samples] 80 | 81 | x_test = x_data[all_training_samples:] 82 | y_test = y_data[all_training_samples:] 83 | y_test = y_test.flatten() 84 | 85 | sortinds = np.random.permutation(training_samples) 86 | x_train = x_train[sortinds] 87 | y_train = y_train[sortinds] 88 | y_train = y_train.flatten() 89 | 90 | self.updates = rmsprop_updates_T(self.cost, self.params, stepsize=stepsize) 91 | self.train = theano.function(inputs=[self.X, self.y], outputs=[], updates=self.updates) 92 | 93 | print fmt_row(10, ["Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time"]) 94 | for i_epoch in xrange(int(epochs)): 95 | tstart = time.time() 96 | # do training 97 | for start in xrange(0, x_train.shape[0], batch_size): 98 | end = start+batch_size 99 | if end <= x_train.shape[0]: 100 | self.train(x_train[start:end], y_train[start:end]) 101 | elapsed = time.time() - tstart 102 | # compute losses 103 | trainerr = 0. 104 | trainloss = 0. 105 | testerr = 0. 106 | testloss = 0. 107 | num = 0 108 | for start in xrange(0, x_test.shape[0], batch_size): 109 | end = start+batch_size 110 | if end <= x_test.shape[0]: 111 | num += 1 112 | trainerr_, trainloss_ = self.computeloss(x_train[start:end], y_train[start:end]) 113 | testerr_, testloss_ = self.computeloss(x_test[start:end], y_test[start:end]) 114 | trainerr += trainerr_ 115 | trainloss += trainloss_ 116 | testerr += testerr_ 117 | testloss += testloss_ 118 | print fmt_row(10, [i_epoch, trainloss/num, trainerr/num, testloss/num, testerr/num, elapsed]) 119 | 120 | def predict(self, input): 121 | # NN output for a single input, read from file 122 | matlab_data = sio.loadmat(input) 123 | im_data = matlab_data["im_data"] 124 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0 125 | # state_data = matlab_data["state_data"] 126 | state_data = matlab_data["state_xy_data"] 127 | value_data = matlab_data["value_data"] 128 | x_im_test = im_data.astype(theano.config.floatX) 129 | x_im_test = x_im_test.reshape(-1, 1, self.im_size[0], self.im_size[1]) 130 | x_val_test = value_data.astype(theano.config.floatX) 131 | x_val_test = x_val_test.reshape(-1, 1, self.im_size[0], self.im_size[1]) 132 | x_state_test = np.zeros_like(x_im_test) 133 | x_state_test[0, 0, state_data[0, 0], state_data[0, 1]] = 1 134 | x_test = np.append(x_im_test, x_val_test, axis=1) 135 | x_test = np.append(x_test, x_state_test, axis=1) 136 | out = self.y_out(x_test) 137 | return out[0][0] 138 | 139 | def load_weights(self, infile="weight_dump.pk"): 140 | dump = pickle.load(open(infile, 'r')) 141 | [n.set_value(p) for n, p in zip(self.params, dump)] 142 | 143 | def save_weights(self, outfile="weight_dump.pk"): 144 | pickle.dump([n.get_value() for n in self.params], open(outfile, 'w')) 145 | 146 | 147 | class CNN(object): 148 | """CNN network""" 149 | def __init__(self, in_x, in_x_channels, imsize, batchsize=128, 150 | filter_sizes=[[50, 3, 3], [100, 3, 3]], poolings=[2, 2]): 151 | """ 152 | Allocate a CNN network with shared variable internal parameters. 153 | 154 | :type in_x: theano.tensor.dtensor4 155 | :param in_x: symbolic input image tensor, of shape [batchsize, in_x_channels, imsize[0], imsize[1]] 156 | Typically : first channel is image, second is the reward prior, third is the current state image. 157 | 158 | :type in_x_channels: int32 159 | :param in_x_channels: number of input channels 160 | 161 | :type imsize: tuple or list of length 2 162 | :param imsize: (image height, image width) 163 | 164 | :type batchsize: int32 165 | :param batchsize: batch size 166 | 167 | :type filter_sizes: int32 list of int32 3-tuples 168 | :param filter_sizes: list of filter sizes for each layer, each a list of 3 integers: 169 | num_filters,filter_width,filter_height 170 | 171 | :type batchsize: int32 list 172 | :param batchsize: list of pooling ratios after each layer (assumed symmetric) 173 | """ 174 | assert len(filter_sizes) == len(poolings) 175 | n_conv_layers = len(filter_sizes) 176 | self.params = [] 177 | # first conv layer 178 | prev_layer = ConvLayer(in_x, filter_shape=[filter_sizes[0][0], in_x_channels, filter_sizes[0][1], 179 | filter_sizes[0][2]], 180 | image_shape=[batchsize, in_x_channels, imsize[0], imsize[1]], 181 | poolsize=(poolings[0], poolings[0])) 182 | self.params = self.params + prev_layer.params 183 | # then the rest of the conv layers 184 | for l in range(1, n_conv_layers): 185 | new_layer = ConvLayer(prev_layer.output, 186 | filter_shape=[filter_sizes[l][0], prev_layer.out_shape[1], filter_sizes[l][1], 187 | filter_sizes[l][2]], 188 | image_shape=prev_layer.out_shape, 189 | poolsize=(poolings[l], poolings[l])) 190 | self.params = self.params + new_layer.params 191 | prev_layer = new_layer 192 | # fully connected layer 193 | final_conv_shape = new_layer.out_shape 194 | flat_conv_out = new_layer.output.flatten(ndim=2) 195 | flat_shape = [final_conv_shape[0], final_conv_shape[1]*final_conv_shape[2]*final_conv_shape[3]] 196 | self.w_o = init_weights_T(flat_shape[1], 8) 197 | self.output = T.nnet.softmax(T.dot(flat_conv_out, self.w_o)) 198 | self.params = self.params + [self.w_o] 199 | -------------------------------------------------------------------------------- /FC.py: -------------------------------------------------------------------------------- 1 | # Based on tutorial by Alec Radford 2 | # https://github.com/Newmu/Theano-Tutorials/blob/master/4_modern_net.py 3 | 4 | import cgt 5 | from cgt import nn 6 | from cgt.distributions import categorical 7 | from NNobj import * 8 | 9 | 10 | class FC(NNobj): 11 | "Class for a multi-layer perceptron (fully connected network) object" 12 | def __init__(self, model="dense", im_size=[28, 28], dropout=True, devtype="cpu", grad_check=True, reg=0): 13 | if grad_check: cgt.set_precision("quad") 14 | self.model = model 15 | self.reg = reg 16 | np.random.seed(0) 17 | cgt.update_config(default_device=cgt.core.Device(devtype=devtype), backend="native") 18 | print(model) 19 | # MLP with 1 hidden layer 20 | if model == "dense1": 21 | self.Xsize = 2*im_size[0]*im_size[1]+im_size[0]+im_size[1] 22 | self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize)) 23 | self.y = cgt.vector("y", dtype='i8') 24 | self.p_drop_input, self.p_drop_hidden = (0.2, 0.5) if dropout else (0, 0) 25 | self.w_h = init_weights(self.Xsize, 256) 26 | self.w_o = init_weights(256, 8) 27 | self.pofy_drop = dense_model1(self.X, self.w_h, self.w_o, self.p_drop_input, self.p_drop_hidden) 28 | self.pofy_nodrop = dense_model1(self.X, self.w_h, self.w_o, 0., 0.) 29 | self.params = [self.w_h, self.w_o] 30 | self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_o).sum() 31 | self.cost_drop = -cgt.mean(categorical.loglik(self.y, self.pofy_drop)) + self.reg*self.l1 32 | # MLP with 2 hidden layers 33 | elif model == "dense2": 34 | self.Xsize = 2*im_size[0]*im_size[1]+im_size[0]+im_size[1] 35 | self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize)) 36 | self.y = cgt.vector("y", dtype='i8') 37 | self.p_drop_input, self.p_drop_hidden = (0.2, 0.5) if dropout else (0, 0) 38 | self.w_h = init_weights(self.Xsize, 256) 39 | self.w_h2 = init_weights(256, 256) 40 | self.w_o = init_weights(256, 8) 41 | self.pofy_drop = dense_model2(self.X, self.w_h, self.w_h2, self.w_o, self.p_drop_input, self.p_drop_hidden) 42 | self.pofy_nodrop = dense_model2(self.X, self.w_h, self.w_h2, self.w_o, 0., 0.) 43 | self.params = [self.w_h, self.w_h2, self.w_o] 44 | self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_h2).sum() + cgt.abs(self.w_o).sum() 45 | self.cost_drop = -cgt.mean(categorical.loglik(self.y, self.pofy_drop)) + self.reg*self.l1 46 | # MLP with 3 hidden layers 47 | elif model == "dense3": 48 | self.Xsize = 2*im_size[0]*im_size[1]+im_size[0]+im_size[1] 49 | self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize)) 50 | self.y = cgt.vector("y", dtype='i8') 51 | self.p_drop_input, self.p_drop_hidden = (0.0, [0.5, 0.5, 0.5]) if dropout else (0, [0, 0, 0]) 52 | self.w_h = init_weights(self.Xsize, 256) 53 | self.w_h2 = init_weights(256, 256) 54 | self.w_h3 = init_weights(256, 256) 55 | self.w_o = init_weights(256, 8) 56 | self.pofy_drop = dense_model3(self.X, self.w_h, self.w_h2, self.w_h3, self.w_o, self.p_drop_input, 57 | self.p_drop_hidden) 58 | self.pofy_nodrop = dense_model3(self.X, self.w_h, self.w_h2, self.w_h3, self.w_o, 0., [0., 0., 0.]) 59 | self.params = [self.w_h, self.w_h2, self.w_h3, self.w_o] 60 | self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_h2).sum() + cgt.abs(self.w_h3).sum() + \ 61 | cgt.abs(self.w_o).sum() 62 | self.cost_drop = -cgt.mean(categorical.loglik(self.y, self.pofy_drop)) + self.reg*self.l1 63 | else: 64 | raise RuntimeError("Unknown Model") 65 | 66 | self.y_nodrop = cgt.argmax(self.pofy_nodrop, axis=1) 67 | self.cost_nodrop = -cgt.mean(categorical.loglik(self.y, self.pofy_nodrop)) 68 | self.err_nodrop = cgt.cast(cgt.not_equal(self.y_nodrop, self.y), cgt.floatX).mean() 69 | self.computeloss = cgt.function(inputs=[self.X, self.y], outputs=[self.err_nodrop,self.cost_nodrop]) 70 | self.y_out = cgt.function(inputs=[self.X], outputs=[self.y_nodrop]) 71 | self.updates = rmsprop_updates(self.cost_drop, self.params) 72 | self.train = cgt.function(inputs=[self.X, self.y], outputs=[], updates=self.updates) 73 | 74 | def run_training(self, input, stepsize=0.01, epochs=10, output='None', batch_size=128, grad_check=True, 75 | profile=False, step_decrease_rate=0.5, step_decrease_time=1000): 76 | # run NN training from input matlab data file, and save test data prediction in output file 77 | 78 | # load data from Matlab file, including 79 | # im_data: flattened images 80 | # state_data: concatenated one-hot vectors for each state variable 81 | # label_data: one-hot vector for action (state difference) 82 | if grad_check: cgt.set_precision("quad") 83 | matlab_data = sio.loadmat(input) 84 | im_data = matlab_data["im_data"] 85 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0 86 | state_data = matlab_data["state_data"] 87 | value_data = matlab_data["value_data"] 88 | label_data = matlab_data["label_data"] 89 | Xdata = (np.concatenate((np.concatenate((im_data,value_data),axis=1), state_data), axis=1)).astype(cgt.floatX) 90 | ydata = label_data 91 | 92 | training_samples = int(6/7.0*Xdata.shape[0]) 93 | Xtrain = Xdata[0:training_samples] 94 | ytrain = ydata[0:training_samples] 95 | 96 | Xtest = Xdata[training_samples:] 97 | ytest = ydata[training_samples:] 98 | 99 | sortinds = np.random.permutation(training_samples) 100 | Xtrain = Xtrain[sortinds] 101 | ytrain = ytrain[sortinds] 102 | 103 | self.updates = rmsprop_updates(self.cost_drop, self.params, stepsize=stepsize) 104 | self.train = cgt.function(inputs=[self.X, self.y], outputs=[], updates=self.updates) 105 | 106 | from cgt.tests import gradcheck_model 107 | if grad_check: 108 | cost_nodrop = cgt.core.clone(self.cost_nodrop, {self.X: Xtrain[:1], self.y: ytrain[:1]}) 109 | print "doing gradient check..." 110 | print "------------------------------------" 111 | gradcheck_model(cost_nodrop, self.params[0:1]) 112 | print "success!" 113 | return 114 | 115 | if profile: cgt.profiler.start() 116 | 117 | print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) 118 | for i_epoch in xrange(int(epochs)): 119 | tstart = time.time() 120 | for start in xrange(0, Xtrain.shape[0], batch_size): 121 | end = start+batch_size 122 | self.train(Xtrain[start:end], ytrain[start:end]) 123 | elapsed = time.time() - tstart 124 | trainerr, trainloss = self.computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) 125 | testerr, testloss = self.computeloss(Xtest, ytest) 126 | print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) 127 | if (i_epoch > 0) & (i_epoch % step_decrease_time == 0): 128 | stepsize = step_decrease_rate * stepsize 129 | self.updates = rmsprop_updates(self.cost_drop, self.params, stepsize=stepsize) 130 | self.train = cgt.function(inputs=[self.X, self.y], outputs=[], updates=self.updates) 131 | print stepsize 132 | if profile: cgt.execution.profiler.print_stats() 133 | 134 | # save Matlab data 135 | if output != 'None': 136 | sio.savemat(file_name=output, mdict={'in': Xtest, 'out': self.y_out(Xtest)}) 137 | 138 | def predict(self, input): 139 | # NN output for a single input, read from file 140 | matlab_data = sio.loadmat(input) 141 | im_data = matlab_data["im_data"] 142 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0 143 | state_data = matlab_data["state_data"] 144 | value_data = matlab_data["value_data"] 145 | x_test = (np.concatenate((np.concatenate((im_data, value_data), axis=1), state_data), axis=1)).astype(cgt.floatX) 146 | out = self.y_out(x_test) 147 | return out[0][0] 148 | 149 | 150 | def init_weights(*shape): 151 | return cgt.shared(np.random.randn(*shape) * 0.01, fixed_shape_mask='all') 152 | 153 | 154 | def rmsprop_updates(cost, params, stepsize=0.001, rho=0.9, epsilon=1e-6): 155 | grads = cgt.grad(cost, params) 156 | updates = [] 157 | for p, g in zip(params, grads): 158 | acc = cgt.shared(p.op.get_value() * 0.) 159 | acc_new = rho * acc + (1 - rho) * cgt.square(g) 160 | gradient_scaling = cgt.sqrt(acc_new + epsilon) 161 | g = g / gradient_scaling 162 | updates.append((acc, acc_new)) 163 | updates.append((p, p - stepsize * g)) 164 | return updates 165 | 166 | 167 | def adagrad_updates(cost, params, stepsize=0.001, rho=0.9, epsilon=1e-6): 168 | grads = cgt.grad(cost, params) 169 | updates = [] 170 | for param, grad in zip(params, grads): 171 | value = param.op.get_value() 172 | accu = cgt.shared(np.zeros(value.shape, dtype=value.dtype)) 173 | delta_accu = cgt.shared(np.zeros(value.shape, dtype=value.dtype)) 174 | 175 | accu_new = rho * accu + (1 - rho) * grad ** 2 176 | updates.append((accu, accu_new)) 177 | 178 | update = (grad * cgt.sqrt(delta_accu + epsilon) / cgt.sqrt(accu_new + epsilon)) 179 | updates.append((param, param - stepsize * update)) 180 | 181 | delta_accu_new = rho * delta_accu + (1 - rho) * update ** 2 182 | updates.append((delta_accu, delta_accu_new)) 183 | return updates 184 | 185 | 186 | def dense_model1(X, w_h, w_o, p_drop_input, p_drop_hidden): 187 | X = nn.dropout(X, p_drop_input) 188 | h = nn.rectify(cgt.dot(X, w_h)) 189 | h = nn.dropout(h, p_drop_hidden) 190 | py_x = nn.softmax(cgt.dot(h, w_o)) 191 | return py_x 192 | 193 | 194 | def dense_model2(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden): 195 | X = nn.dropout(X, p_drop_input) 196 | h = nn.rectify(cgt.dot(X, w_h)) 197 | 198 | h = nn.dropout(h, p_drop_hidden) 199 | h2 = nn.rectify(cgt.dot(h, w_h2)) 200 | 201 | h2 = nn.dropout(h2, p_drop_hidden) 202 | py_x = nn.softmax(cgt.dot(h2, w_o)) 203 | return py_x 204 | 205 | 206 | def dense_model3(X, w_h, w_h2, w_h3, w_o, p_drop_input, p_drop_hidden): 207 | X = nn.dropout(X, p_drop_input) 208 | h = nn.rectify(cgt.dot(X, w_h)) 209 | 210 | h = nn.dropout(h, p_drop_hidden[0]) 211 | h2 = nn.rectify(cgt.dot(h, w_h2)) 212 | 213 | h2 = nn.dropout(h2, p_drop_hidden[1]) 214 | h3 = nn.rectify(cgt.dot(h2, w_h3)) 215 | 216 | h3 = nn.dropout(h3, p_drop_hidden[2]) 217 | py_x = nn.softmax(cgt.dot(h3, w_o)) 218 | return py_x 219 | 220 | 221 | -------------------------------------------------------------------------------- /FCN.py: -------------------------------------------------------------------------------- 1 | # VI network using THEANO, takes batches of state input 2 | from NNobj import * 3 | from theano_utils import * 4 | 5 | 6 | class fcn(NNobj): 7 | "Class for a fully connected convolutional network" 8 | def __init__(self, model="FCN", im_size=[28, 28], dropout=False, devtype="cpu", grad_check=False, reg=0, 9 | statebatchsize=10, batchsize=128): 10 | self.im_size = im_size # input image size 11 | self.model = model 12 | self.reg = reg # regularization (currently not implemented) 13 | self.batchsize = batchsize # batch size for training 14 | self.statebatchsize = statebatchsize # number of state inputs for every image input, since each image is the 15 | # same for many states in the data 16 | np.random.seed(0) 17 | print(model) 18 | theano.config.blas.ldflags = "-L/usr/local/lib -lopenblas" 19 | 20 | # X input : l=2 stacked images: obstacle map and reward function prior 21 | self.X = T.ftensor4(name="X") 22 | # S1,S2 input : state position (vertical and horizontal position) 23 | self.S1 = T.bmatrix("S1") # state first dimension * statebatchsize 24 | self.S2 = T.bmatrix("S2") # state second dimension * statebatchsize 25 | self.y = T.bvector("y") # output action * statebatchsize 26 | 27 | l = 2 28 | l_1 = 150 # channels (filters) in first conv layer 29 | l_2 = 150 30 | l_3 = 10 31 | 32 | self.fcn_net = FCN(in_x=self.X, in_s1=self.S1, in_s2=self.S2, in_x_channels=l, imsize=self.im_size, 33 | batchsize=self.batchsize, state_batch_size=self.statebatchsize, l_1=l_1, l_2=l_2, 34 | l_3=l_3) 35 | self.p_of_y = self.fcn_net.output 36 | self.params = self.fcn_net.params 37 | # Total 1910 parameters 38 | 39 | self.cost = -T.mean(T.log(self.p_of_y)[T.arange(self.y.shape[0]), 40 | self.y], dtype=theano.config.floatX) 41 | self.y_pred = T.argmax(self.p_of_y, axis=1) 42 | self.err = T.mean(T.neq(self.y_pred, self.y.flatten()), dtype=theano.config.floatX) 43 | 44 | self.computeloss = theano.function(inputs=[self.X, self.S1, self.S2, self.y], 45 | outputs=[self.err, self.cost]) 46 | self.y_out = theano.function(inputs=[self.X, self.S1, self.S2], outputs=[self.y_pred]) 47 | 48 | def run_training(self, input, stepsize=0.01, epochs=10, output='None', batch_size=128, grad_check=True, 49 | profile=False, data_fraction=1): 50 | # run training from input matlab data file, and save test data prediction in output file 51 | # load data from Matlab file, including 52 | # im_data: flattened images 53 | # state_data: concatenated one-hot vectors for each state variable 54 | # state_xy_data: state variable (x,y position) 55 | # label_data: one-hot vector for action (state difference) 56 | matlab_data = sio.loadmat(input) 57 | im_data = matlab_data["batch_im_data"] 58 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0 59 | value_data = matlab_data["batch_value_data"] 60 | state1_data = matlab_data["state_x_data"] 61 | state2_data = matlab_data["state_y_data"] 62 | label_data = matlab_data["batch_label_data"] 63 | ydata = label_data.astype('int8') 64 | Xim_data = im_data.astype(theano.config.floatX) 65 | Xim_data = Xim_data.reshape(-1, 1, self.im_size[0], self.im_size[1]) 66 | Xval_data = value_data.astype(theano.config.floatX) 67 | Xval_data = Xval_data.reshape(-1, 1, self.im_size[0], self.im_size[1]) 68 | Xdata = np.append(Xim_data, Xval_data, axis=1) 69 | S1data = state1_data.astype('int8') 70 | S2data = state2_data.astype('int8') 71 | 72 | all_training_samples = int(6/7.0*Xdata.shape[0]) 73 | training_samples = int(data_fraction * all_training_samples) 74 | Xtrain = Xdata[0:training_samples] 75 | S1train = S1data[0:training_samples] 76 | S2train = S2data[0:training_samples] 77 | ytrain = ydata[0:training_samples] 78 | 79 | Xtest = Xdata[all_training_samples:] 80 | S1test = S1data[all_training_samples:] 81 | S2test = S2data[all_training_samples:] 82 | ytest = ydata[all_training_samples:] 83 | ytest = ytest.flatten() 84 | 85 | sortinds = np.random.permutation(training_samples) 86 | Xtrain = Xtrain[sortinds] 87 | S1train = S1train[sortinds] 88 | S2train = S2train[sortinds] 89 | ytrain = ytrain[sortinds] 90 | ytrain = ytrain.flatten() 91 | 92 | self.updates = rmsprop_updates_T(self.cost, self.params, stepsize=stepsize) 93 | self.train = theano.function(inputs=[self.X, self.S1, self.S2, self.y], outputs=[], updates=self.updates) 94 | 95 | print fmt_row(10, ["Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time"]) 96 | for i_epoch in xrange(int(epochs)): 97 | tstart = time.time() 98 | # do training 99 | for start in xrange(0, Xtrain.shape[0], batch_size): 100 | end = start+batch_size 101 | if end <= Xtrain.shape[0]: 102 | self.train(Xtrain[start:end], S1train[start:end], S2train[start:end], 103 | ytrain[start*self.statebatchsize:end*self.statebatchsize]) 104 | elapsed = time.time() - tstart 105 | # compute losses 106 | trainerr = 0. 107 | trainloss = 0. 108 | testerr = 0. 109 | testloss = 0. 110 | num = 0 111 | for start in xrange(0, Xtest.shape[0], batch_size): 112 | end = start+batch_size 113 | if end <= Xtest.shape[0]: 114 | num += 1 115 | trainerr_, trainloss_ = self.computeloss(Xtrain[start:end], S1train[start:end], S2train[start:end], 116 | ytrain[start*self.statebatchsize:end*self.statebatchsize]) 117 | testerr_, testloss_ = self.computeloss(Xtest[start:end], S1test[start:end], S2test[start:end], 118 | ytest[start*self.statebatchsize:end*self.statebatchsize]) 119 | trainerr += trainerr_ 120 | trainloss += trainloss_ 121 | testerr += testerr_ 122 | testloss += testloss_ 123 | print fmt_row(10, [i_epoch, trainloss/num, trainerr/num, testloss/num, testerr/num, elapsed]) 124 | 125 | def predict(self, input): 126 | # NN output for a single input, read from file 127 | matlab_data = sio.loadmat(input) 128 | im_data = matlab_data["im_data"] 129 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0 130 | state_data = matlab_data["state_xy_data"] 131 | value_data = matlab_data["value_data"] 132 | xim_test = im_data.astype(theano.config.floatX) 133 | xim_test = xim_test.reshape(-1, 1, self.im_size[0], self.im_size[1]) 134 | xval_test = value_data.astype(theano.config.floatX) 135 | xval_test = xval_test.reshape(-1, 1, self.im_size[0], self.im_size[1]) 136 | x_test = np.append(xim_test, xval_test, axis=1) 137 | s_test = state_data.astype('int8') 138 | s1_test = s_test[:, 0].reshape([1, 1]) 139 | s2_test = s_test[:, 1].reshape([1, 1]) 140 | out = self.y_out(x_test, s1_test, s2_test) 141 | return out[0][0] 142 | 143 | def load_weights(self, infile="weight_dump.pk"): 144 | dump = pickle.load(open(infile, 'r')) 145 | [n.set_value(p) for n, p in zip(self.params, dump)] 146 | 147 | def save_weights(self, outfile="weight_dump.pk"): 148 | pickle.dump([n.get_value() for n in self.params], open(outfile, 'w')) 149 | 150 | 151 | class FCN(object): 152 | """FCN network""" 153 | def __init__(self, in_x, in_s1, in_s2, in_x_channels, imsize, batchsize=128, 154 | state_batch_size=1, l_1=150, l_2=150, l_3=150): 155 | """ 156 | Allocate a FCN network with shared variable internal parameters. Assumes 16X16 images 157 | 158 | :type in_x: theano.tensor.dtensor4 159 | :param in_x: symbolic input image tensor, of shape [batchsize, in_x_channels, imsize[0], imsize[1]] 160 | Typically : first channel is image, second is the reward prior. 161 | 162 | :type in_s1: theano.tensor.bmatrix 163 | :param in_s1: symbolic input batches of vertical positions, of shape [batchsize, state_batch_size] 164 | 165 | :type in_s2: theano.tensor.bmatrix 166 | :param in_s2: symbolic input batches of horizontal positions, of shape [batchsize, state_batch_size] 167 | 168 | :type in_x_channels: int32 169 | :param in_x_channels: number of input channels 170 | 171 | :type imsize: tuple or list of length 2 172 | :param imsize: (image height, image width) 173 | 174 | :type batchsize: int32 175 | :param batchsize: batch size 176 | 177 | :type state_batch_size: int32 178 | :param state_batch_size: number of state inputs for each sample 179 | 180 | :type l_1: int32 181 | :param l_1: number of filters in first conv layer 182 | 183 | :type l_2: int32 184 | :param l_2: number of filters in second conv layer 185 | 186 | :type l_3: int32 187 | :param l_3: number of filters in third conv layer 188 | 189 | """ 190 | self.b1 = theano.shared((np.random.randn(l_1) * 0.01).astype(theano.config.floatX)) 191 | self.w1 = init_weights_T(l_1, in_x_channels, imsize[0]*2-1, imsize[1]*2-1) 192 | self.h1 = T.nnet.conv2d(in_x, self.w1, input_shape=[batchsize, self.w1.shape.eval()[1], imsize[0], imsize[1]], 193 | border_mode=(imsize[0]-1, imsize[1]-1), 194 | filter_shape=[l_1, in_x_channels, imsize[0]*2-1, imsize[1]*2-1]) 195 | self.h1 = T.nnet.relu(self.h1 + self.b1.dimshuffle('x', 0, 'x', 'x')) 196 | 197 | self.w2 = init_weights_T(l_2, l_1, 1, 1) 198 | self.h2 = conv2D_keep_shape(self.h1, self.w2, image_shape=[batchsize, self.w1.shape.eval()[0], 199 | imsize[0], imsize[1]], 200 | filter_shape=[l_2, l_1, 1, 1]) 201 | self.b2 = theano.shared((np.random.randn(l_2) * 0.01).astype(theano.config.floatX)) # 150 parameters 202 | self.h2 = T.nnet.relu(self.h2 + self.b2.dimshuffle('x', 0, 'x', 'x')) 203 | 204 | self.w3 = init_weights_T(l_3, l_2, 1, 1) 205 | self.h3 = conv2D_keep_shape(self.h2, self.w3, image_shape=[batchsize, self.w2.shape.eval()[0], 206 | imsize[0], imsize[1]], 207 | filter_shape=[l_3, l_2, 1, 1]) 208 | self.b3 = theano.shared((np.random.randn(l_3) * 0.01).astype(theano.config.floatX)) # 150 parameters 209 | self.h3 = T.nnet.relu(self.h3 + self.b3.dimshuffle('x', 0, 'x', 'x')) 210 | 211 | # Select the conv-net channels at the state position (S1,S2). This is the FCN thing. 212 | self.h_out = self.h3[T.extra_ops.repeat(T.arange(self.h3.shape[0]), state_batch_size), :, in_s1.flatten(), 213 | in_s2.flatten()] 214 | 215 | # softmax output weights 216 | self.w_o = init_weights_T(l_3, 8) 217 | self.output = T.nnet.softmax(T.dot(self.h_out, self.w_o)) 218 | 219 | self.params = [self.w1, self.b1, self.w2, self.b2, self.w3, self.b3, self.w_o] 220 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | COPYRIGHT 2 | 3 | All contributions by the University of California: 4 | Copyright (c) 2015, 2016 The Regents of the University of California (Regents) 5 | All rights reserved. 6 | 7 | All other contributions: 8 | Copyright (c) 2015, 2016, the respective contributors 9 | All rights reserved. 10 | 11 | VIN uses a shared copyright model: each contributor holds copyright over 12 | their contributions to the VIN codebase. The project versioning records all such 13 | contribution and copyright details. If a contributor wants to further mark 14 | their specific copyright on a particular contribution, they should indicate 15 | their copyright solely in the commit message of the change when it is 16 | committed. 17 | 18 | LICENSE 19 | 20 | Redistribution and use in source and binary forms, with or without 21 | modification, are permitted provided that the following conditions are met: 22 | 23 | 1. Redistributions of source code must retain the above copyright notice, this 24 | list of conditions and the following disclaimer. 25 | 2. Redistributions in binary form must reproduce the above copyright notice, 26 | this list of conditions and the following disclaimer in the documentation 27 | and/or other materials provided with the distribution. 28 | 29 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 30 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 31 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 32 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 33 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 34 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 35 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 36 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 38 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 | 40 | CONTRIBUTION AGREEMENT 41 | 42 | By contributing to the VIN repository through pull-request, comment, 43 | or otherwise, the contributor releases their content to the 44 | license and copyright terms herein. 45 | -------------------------------------------------------------------------------- /MDPs/@Gridworld_Graph8/GetOptTraj.m: -------------------------------------------------------------------------------- 1 | function [states_xy, states_one_hot] = GetOptTraj(M,s0) 2 | % Optimal trajectories from initial state to goal 3 | % return states_xy: cell array of trajectories in xy-space 4 | % states_one_hot: cell array of trajectories in one-hot vectors for 5 | % x and y 6 | [G,W] = M.getGraph_inv; 7 | G_inv = G'; % transpose graph for tranposing single-node SP -> single destination SP 8 | N = size(G,1); 9 | Ns = 1; 10 | init_states = s0; 11 | goal_s = M.map_ind_to_state(M.targetRow,M.targetCol); 12 | states = cell(Ns,1); 13 | states_xy = cell(Ns,1); 14 | states_one_hot = cell(Ns,1); 15 | i = 1; 16 | options.edge_weight = W; 17 | [~, pred] = shortest_paths(G_inv,goal_s,options); % all SP from goal 18 | for n = 1:Ns 19 | [path] = SP(pred,goal_s,init_states(n)); % get SP from goal->init 20 | path = path(end:-1:1)'; % reverse path since we want init->goal 21 | states{i} = path; 22 | i = i+1; 23 | end 24 | for i = 1:length(states) 25 | L = length(states{i}); 26 | [r,c] = M.getCoords(states{i}); 27 | row_mat = zeros(L,M.Nrow); 28 | col_mat = zeros(L,M.Ncol); 29 | for j = 1:L 30 | row_mat(j,r(j)) = 1; 31 | col_mat(j,c(j)) = 1; 32 | end 33 | states_one_hot{i} = [row_mat col_mat]; 34 | states_xy{i} = [r,c]; 35 | end -------------------------------------------------------------------------------- /MDPs/@Gridworld_Graph8/Gridworld_Graph8.m: -------------------------------------------------------------------------------- 1 | classdef Gridworld_Graph8 < Finite_MDP_class 2 | % Gridworld domain with obstacles. Actions are to 3 | % {n,s,e,w,ne,nw,se,sw}. Transitions are deterministic 4 | properties 5 | Nrow = 1; % image rows 6 | Ncol = 1; % image columns 7 | img = []; % image 8 | obstacles = [];% indices of obtacles in image 9 | non_obstacles; % indices of obtacles in image 10 | targetRow = 1; 11 | targetCol = 1; 12 | G = []; % transition graph 13 | W = []; 14 | state_map_col; % map from states to col values 15 | state_map_row; % map from states to row values 16 | end 17 | methods (Static) 18 | function [newrow,newcol] = north(row,col,Nrow,Ncol,im) 19 | newrow = max(row-1,1); 20 | newcol = col; 21 | if im(newrow,newcol) == 0 % obstacle 22 | newrow = row; 23 | newcol = col; 24 | end 25 | end 26 | function [newrow,newcol] = northeast(row,col,Nrow,Ncol,im) 27 | newrow = max(row-1,1); 28 | newcol = min(col+1,Ncol); 29 | if im(newrow,newcol) == 0 % obstacle 30 | newrow = row; 31 | newcol = col; 32 | end 33 | end 34 | function [newrow,newcol] = northwest(row,col,Nrow,Ncol,im) 35 | newrow = max(row-1,1); 36 | newcol = max(col-1,1); 37 | if im(newrow,newcol) == 0 % obstacle 38 | newrow = row; 39 | newcol = col; 40 | end 41 | end 42 | function [newrow,newcol] = south(row,col,Nrow,Ncol,im) 43 | newrow = min(row+1,Nrow); 44 | newcol = col; 45 | if im(newrow,newcol) == 0 % obstacle 46 | newrow = row; 47 | newcol = col; 48 | end 49 | end 50 | function [newrow,newcol] = southeast(row,col,Nrow,Ncol,im) 51 | newrow = min(row+1,Nrow); 52 | newcol = min(col+1,Ncol); 53 | if im(newrow,newcol) == 0 % obstacle 54 | newrow = row; 55 | newcol = col; 56 | end 57 | end 58 | function [newrow,newcol] = southwest(row,col,Nrow,Ncol,im) 59 | newrow = min(row+1,Nrow); 60 | newcol = max(col-1,1); 61 | if im(newrow,newcol) == 0 % obstacle 62 | newrow = row; 63 | newcol = col; 64 | end 65 | end 66 | function [newrow,newcol] = east(row,col,Nrow,Ncol,im) 67 | newrow = row; 68 | newcol = min(col+1,Ncol); 69 | if im(newrow,newcol) == 0 % obstacle 70 | newrow = row; 71 | newcol = col; 72 | end 73 | end 74 | function [newrow,newcol] = west(row,col,Nrow,Ncol,im) 75 | newrow = row; 76 | newcol = max(col-1,1); 77 | if im(newrow,newcol) == 0 % obstacle 78 | newrow = row; 79 | newcol = col; 80 | end 81 | end 82 | function [rows,cols] = neighbors(row,col,Nrow,Ncol,im) 83 | [rows,cols] = Gridworld_Graph8.north(row,col,Nrow,Ncol,im); 84 | [newrow,newcol] = Gridworld_Graph8.south(row,col,Nrow,Ncol,im); 85 | rows = [rows,newrow]; cols = [cols,newcol]; 86 | [newrow,newcol] = Gridworld_Graph8.east(row,col,Nrow,Ncol,im); 87 | rows = [rows,newrow]; cols = [cols,newcol]; 88 | [newrow,newcol] = Gridworld_Graph8.west(row,col,Nrow,Ncol,im); 89 | rows = [rows,newrow]; cols = [cols,newcol]; 90 | [newrow,newcol] = Gridworld_Graph8.northeast(row,col,Nrow,Ncol,im); 91 | rows = [rows,newrow]; cols = [cols,newcol]; 92 | [newrow,newcol] = Gridworld_Graph8.northwest(row,col,Nrow,Ncol,im); 93 | rows = [rows,newrow]; cols = [cols,newcol]; 94 | [newrow,newcol] = Gridworld_Graph8.southeast(row,col,Nrow,Ncol,im); 95 | rows = [rows,newrow]; cols = [cols,newcol]; 96 | [newrow,newcol] = Gridworld_Graph8.southwest(row,col,Nrow,Ncol,im); 97 | rows = [rows,newrow]; cols = [cols,newcol]; 98 | end 99 | end 100 | methods 101 | function obj = Gridworld_Graph8(ImageFile,targetRow,targetCol) 102 | if ischar(ImageFile) 103 | % construct graph from image file 104 | im = imread(ImageFile); 105 | img = double(rgb2gray(im)); 106 | else 107 | % image is already a matrix 108 | img = ImageFile; 109 | end 110 | Nrow = size(img,1); 111 | Ncol = size(img,2); 112 | obstacles = find(img == 0); 113 | non_obstacles = find(img ~= 0); 114 | target = sub2ind([Nrow,Ncol],targetRow,targetCol); 115 | Ns = Nrow*Ncol; 116 | Na = 8; 117 | Pn = zeros(Ns,Ns); % north 118 | Ps = zeros(Ns,Ns); % south 119 | Pe = zeros(Ns,Ns); % east 120 | Pw = zeros(Ns,Ns); % west 121 | Pne = zeros(Ns,Ns); % north east 122 | Pnw = zeros(Ns,Ns); % north west 123 | Pse = zeros(Ns,Ns); % south east 124 | Psw = zeros(Ns,Ns); % south west 125 | G = zeros(Ns,Ns); 126 | R = -1*ones(Ns,Na); 127 | R(:,5:8) = R(:,5:8)*sqrt(2); % diagonal cost 128 | R(target,:) = 0; 129 | for row = 1:Nrow 130 | for col = 1:Ncol 131 | curpos = sub2ind([Nrow,Ncol],row,col); 132 | [rows,cols] = Gridworld_Graph8.neighbors(row,col,Nrow,Ncol,img); 133 | neighbor_inds = sub2ind([Nrow,Ncol],rows,cols); 134 | Pn(curpos,neighbor_inds(1)) = Pn(curpos,neighbor_inds(1)) + 1; 135 | Ps(curpos,neighbor_inds(2)) = Ps(curpos,neighbor_inds(2)) + 1; 136 | Pe(curpos,neighbor_inds(3)) = Pe(curpos,neighbor_inds(3)) + 1; 137 | Pw(curpos,neighbor_inds(4)) = Pw(curpos,neighbor_inds(4)) + 1; 138 | Pne(curpos,neighbor_inds(5)) = Pne(curpos,neighbor_inds(5)) + 1; 139 | Pnw(curpos,neighbor_inds(6)) = Pnw(curpos,neighbor_inds(6)) + 1; 140 | Pse(curpos,neighbor_inds(7)) = Pse(curpos,neighbor_inds(7)) + 1; 141 | Psw(curpos,neighbor_inds(8)) = Psw(curpos,neighbor_inds(8)) + 1; 142 | end 143 | end 144 | G = Pn | Ps | Pe | Pw | Pne | Pnw | Pse | Psw; 145 | W = max(max(max(max(max(max(max(Pn,Ps),Pe),Pw),sqrt(2)*Pne),sqrt(2)*Pnw),sqrt(2)*Pse),sqrt(2)*Psw); 146 | Pn = Pn(non_obstacles,:); Pn = Pn(:,non_obstacles); 147 | Ps = Ps(non_obstacles,:); Ps = Ps(:,non_obstacles); 148 | Pe = Pe(non_obstacles,:); Pe = Pe(:,non_obstacles); 149 | Pw = Pw(non_obstacles,:); Pw = Pw(:,non_obstacles); 150 | Pne = Pne(non_obstacles,:); Pne = Pne(:,non_obstacles); 151 | Pnw = Pnw(non_obstacles,:); Pnw = Pnw(:,non_obstacles); 152 | Pse = Pse(non_obstacles,:); Pse = Pse(:,non_obstacles); 153 | Psw = Psw(non_obstacles,:); Psw = Psw(:,non_obstacles); 154 | G = G(non_obstacles,:); G = G(:,non_obstacles); 155 | W = W(non_obstacles,:); W = W(:,non_obstacles); 156 | R = R(non_obstacles,:); 157 | P = cat(3,Pn,Ps,Pe,Pw,Pne,Pnw,Pse,Psw); 158 | obj@Finite_MDP_class(P,R); 159 | obj.Nrow = Nrow; 160 | obj.Ncol = Ncol; 161 | obj.img = img; 162 | obj.obstacles = obstacles; 163 | obj.non_obstacles = non_obstacles; 164 | obj.targetRow = targetRow; 165 | obj.targetCol = targetCol; 166 | obj.G = G; 167 | obj.W = W; 168 | [state_map_col, state_map_row] = meshgrid(1:Ncol,1:Nrow); 169 | obj.state_map_row = state_map_row(non_obstacles); 170 | obj.state_map_col = state_map_col(non_obstacles); 171 | end 172 | function [G,W] = getGraph(obj) 173 | % return directed graph G with weights W for gridworld 174 | G = sparse(double(obj.G)); 175 | W = obj.W(obj.W~=0); 176 | end 177 | function [G,W] = getGraph_inv(obj) 178 | % return inverse directed graph G with weights W for gridworld 179 | G = sparse(double(obj.G')); 180 | W_inv = obj.W'; 181 | W = W_inv(W_inv~=0); 182 | end 183 | function [im] = val2image(obj,val) 184 | % put values (for states) on the image 185 | im = zeros(obj.Nrow,obj.Ncol); 186 | im(obj.non_obstacles) = val; 187 | end 188 | function [im] = getValuePrior(obj) 189 | % get a prior for the value function (just Euclidean distance to goal) 190 | [s_map_col, s_map_row] = meshgrid(1:obj.Ncol,1:obj.Nrow); 191 | im = sqrt((s_map_col-obj.targetCol).^2 + (s_map_row-obj.targetRow).^2); 192 | end 193 | function [im] = getRewardPrior(obj) 194 | % get a prior for the reward function (just -1 for every non-goal state) 195 | im = -1*ones(obj.Nrow,obj.Ncol); 196 | im(obj.targetRow,obj.targetCol) = 10; 197 | end 198 | function [im] = getStateImage(obj, row, col) 199 | % get an image for the current state (just 0 for every other state) 200 | im = zeros(obj.Nrow,obj.Ncol); 201 | im(row,col) = 1; 202 | end 203 | function [s] = map_ind_to_state(obj,row,col) 204 | % find state index for given row and col 205 | s = find(obj.state_map_row == row & obj.state_map_col == col); 206 | end 207 | function [r,c] = getCoords(obj,states) 208 | [r,c] = ind2sub([obj.Nrow,obj.Ncol],obj.non_obstacles(states)); 209 | end 210 | function [Nrow,Ncol] = getSize(obj) 211 | Nrow = obj.Nrow; 212 | Ncol = obj.Ncol; 213 | end 214 | end 215 | end -------------------------------------------------------------------------------- /MDPs/@Gridworld_Graph8/OptimalActionsOnPath.m: -------------------------------------------------------------------------------- 1 | function [states_xy, states_one_hot] = OptimalActionsOnPath(M,traj) 2 | % returns the optimal next states (shortest distance to goal) along path in 3 | % xy-space 4 | % return states_xy: cell array of trajectories in xy-space 5 | % states_one_hot: cell array of trajectories in one-hot vectors for 6 | % x and y 7 | [G,W] = M.getGraph_inv; 8 | G_inv = G'; % transpose graph for tranposing single-node SP -> single destination SP 9 | % [dist] = all_shortest_paths(G); 10 | N = size(G,1); 11 | Ns = size(traj,1); 12 | goal_s = M.map_ind_to_state(M.targetRow,M.targetCol); 13 | states = zeros(Ns,1); 14 | states_xy = zeros(Ns,2); 15 | r_one_hot = zeros(Ns,M.Nrow); 16 | c_one_hot = zeros(Ns,M.Ncol); 17 | options.edge_weight = W; 18 | [~, pred] = shortest_paths(G_inv,goal_s,options); % all SP from goal 19 | for s = 1:Ns 20 | curr_s = M.map_ind_to_state(traj(s,2),traj(s,1)); % TODO - figure out why? 21 | next_s = pred(curr_s); 22 | if next_s == 0 23 | next_s = curr_s; 24 | end 25 | [r,c] = M.getCoords(next_s); 26 | states(s) = next_s; 27 | states_xy(s,:) = [r,c]; 28 | r_one_hot(s,r) = 1; 29 | c_one_hot(s,c) = 1; 30 | end 31 | states_one_hot = [r_one_hot, c_one_hot]; -------------------------------------------------------------------------------- /MDPs/@Gridworld_Graph8/SampleGraphTraj.m: -------------------------------------------------------------------------------- 1 | function [states_xy, states_one_hot] = SampleGraphTraj(M,Ns) 2 | % sample Ns states trajectories from random nodes in graph object M to goal 3 | % return states_xy: cell array of trajectories in xy-space 4 | % states_one_hot: cell array of trajectories in one-hot vectors for 5 | % x and y 6 | [G,W] = M.getGraph_inv; 7 | G_inv = G'; % transpose graph for tranposing single-node SP -> single destination SP 8 | N = size(G,1); 9 | if N >= Ns 10 | rand_ind = randperm(N); 11 | else 12 | rand_ind = repmat(randperm(N),1,10); % hack for small domains 13 | end 14 | 15 | init_states = rand_ind(1:Ns); 16 | goal_s = M.map_ind_to_state(M.targetRow,M.targetCol); 17 | states = cell(Ns,1); 18 | states_xy = cell(Ns,1); 19 | states_one_hot = cell(Ns,1); 20 | i = 1; 21 | options.edge_weight = W; 22 | [~, pred] = shortest_paths(G_inv,goal_s,options); % all SP from goal 23 | for n = 1:Ns 24 | [path] = SP(pred,goal_s,init_states(n)); % get SP from goal->init 25 | path = path(end:-1:1)'; % reverse path since we want init->goal 26 | states{i} = path; 27 | i = i+1; 28 | end 29 | for i = 1:length(states) 30 | L = length(states{i}); 31 | [r,c] = M.getCoords(states{i}); 32 | row_mat = zeros(L,M.Nrow); 33 | col_mat = zeros(L,M.Ncol); 34 | for j = 1:L 35 | row_mat(j,r(j)) = 1; 36 | col_mat(j,c(j)) = 1; 37 | end 38 | states_one_hot{i} = [row_mat col_mat]; 39 | states_xy{i} = [r,c]; 40 | end -------------------------------------------------------------------------------- /MDPs/Finite_MDP_class.m: -------------------------------------------------------------------------------- 1 | classdef Finite_MDP_class < MDP_class 2 | % Finite state and action MDP 3 | properties 4 | P = []; % transition kernel 5 | R = []; % reward 6 | A = []; % possible actions at each state 7 | Ns = 0; % number of states 8 | Na = 0; % number of actions 9 | end 10 | methods 11 | function obj = Finite_MDP_class(P,R,A) 12 | % constructor: 13 | % P is Ns*Ns*Na matrix of transitions P(s'|s,a) 14 | % R is Ns*Na matrix of deterministic rewards r(s,a) 15 | % A is Ns*Na binary matrix of available actions at each state 16 | % (default - all actions are possible). 17 | obj.P = P; 18 | obj.R = R; 19 | obj.Ns = size(P,1); 20 | obj.Na = size(P,3); 21 | if nargin < 3 22 | A = ones(obj.Ns,obj.Na); 23 | end 24 | obj.A = A; 25 | end 26 | 27 | function Ns= getNumStates(obj) 28 | Ns = obj.Ns; 29 | end 30 | 31 | function Na = getNumActions(obj) 32 | Na = obj.Na; 33 | end 34 | 35 | function a = getActions(obj,s) 36 | a = find(obj.A(s,:)); 37 | end 38 | 39 | function r = getReward(obj,s,a) 40 | r = obj.R(s,a)'; 41 | end 42 | 43 | function p = nextStateProb(obj,s,a) 44 | % get next state probability for action a 45 | % if a is a scalar the function returns a row vector 46 | % if a is a vector then a matrix is returned with the 47 | % probabilities on rows 48 | if numel(a) == 1 49 | p = squeeze(obj.P(s,:,a)); 50 | else 51 | p = squeeze(obj.P(s,:,a))'; 52 | end 53 | end 54 | 55 | function snext = sampleNextState(obj,s,a) 56 | % sample a next state given s and a 57 | snext = rand_choose(obj.nextStateProb(s,a)); 58 | end 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /MDPs/MDP_class.m: -------------------------------------------------------------------------------- 1 | classdef MDP_class < matlab.mixin.Copyable 2 | % Interface for MDP 3 | methods (Abstract) 4 | Ns= getNumStates(obj); % total states 5 | a = getNumActions(obj); % total possible actions 6 | a = getActions(obj,s); % actions at state s 7 | r = getReward(obj,s,a); 8 | p = nextStateProb(obj,s,a); 9 | snext = sampleNextState(obj,s,a); 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /NN_run_training.py: -------------------------------------------------------------------------------- 1 | from NNobj import * 2 | from vin import vin 3 | from vin_untied import vin_untied 4 | from FCN import fcn 5 | from CNN import cnn 6 | 7 | 8 | def main(): 9 | import argparse 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("--input") 12 | parser.add_argument("--output", default="None") 13 | parser.add_argument("--epochs", type=int, default=10) 14 | parser.add_argument("--profile", action="store_true") 15 | parser.add_argument("--dropout", action="store_true") 16 | parser.add_argument("--stepsize", type=float, default=.0002) 17 | parser.add_argument("--model", 18 | choices=["dense1", "dense2", "dense3", "conv", "valIterMultiBatch", "valIterBatch", 19 | "valIterMars", "valIterMarsSingle", "valIterBatchUntied", "fcn", "cnn"], 20 | default="dense") 21 | parser.add_argument("--unittest", action="store_true") 22 | parser.add_argument("--grad_check", action="store_true") 23 | parser.add_argument("--devtype", choices=["cpu", "gpu"], default="cpu") 24 | parser.add_argument("--warmstart", default="None") 25 | parser.add_argument("--reg", type=float, default=.0) 26 | parser.add_argument("--imsize", type=int, default=28) 27 | parser.add_argument("--k", type=int, default=10) 28 | parser.add_argument("--batchsize", type=int, default=128) 29 | parser.add_argument("--statebatchsize", type=int, default=1) 30 | parser.add_argument("--stepdecreaserate", type=float, default=1.0) 31 | parser.add_argument("--stepdecreasetime", type=int, default=10000) 32 | parser.add_argument("--data_fraction", type=float, default=1.0) 33 | args = parser.parse_args() 34 | 35 | if args.model == "fcn": 36 | # FCN network 37 | my_nn = fcn(model=args.model, im_size=[args.imsize, args.imsize], dropout=args.dropout, 38 | devtype=args.devtype, grad_check=args.grad_check, reg=args.reg, 39 | batchsize=args.batchsize, statebatchsize=args.statebatchsize) 40 | elif args.model == "cnn": 41 | # FCN network 42 | my_nn = cnn(model=args.model, im_size=[args.imsize, args.imsize], dropout=args.dropout, 43 | devtype=args.devtype, grad_check=args.grad_check, reg=args.reg, 44 | batchsize=args.batchsize) 45 | elif args.model == "valIterBatch": 46 | # VI network 47 | my_nn = vin(model=args.model, im_size=[args.imsize, args.imsize], dropout=args.dropout, 48 | devtype=args.devtype, grad_check=args.grad_check, reg=args.reg, k=args.k, 49 | batchsize=args.batchsize, statebatchsize=args.statebatchsize) 50 | elif args.model == "valIterBatchUntied": 51 | # VI network with untied weights 52 | my_nn = vin_untied(model=args.model, im_size=[args.imsize, args.imsize], dropout=args.dropout, 53 | devtype=args.devtype, grad_check=args.grad_check, reg=args.reg, k=args.k, 54 | batchsize=args.batchsize, statebatchsize=args.statebatchsize) 55 | else: 56 | # FC network 57 | my_nn = NNobj(model=args.model, im_size=[args.imsize, args.imsize], dropout=args.dropout, 58 | devtype=args.devtype, grad_check=args.grad_check, reg=args.reg) 59 | if args.warmstart != "None": 60 | print('warmstarting...') 61 | my_nn.load_weights(args.warmstart) 62 | my_nn.run_training(input=str(args.input), stepsize=args.stepsize, epochs=args.epochs, 63 | grad_check=args.grad_check, batch_size=args.batchsize, data_fraction=args.data_fraction) 64 | my_nn.save_weights(outfile=str(args.output)) 65 | 66 | if __name__ == "__main__": 67 | main() 68 | -------------------------------------------------------------------------------- /NNobj.py: -------------------------------------------------------------------------------- 1 | # interface for NN object 2 | 3 | import numpy as np 4 | import pickle 5 | import scipy.io as sio 6 | import time 7 | 8 | 9 | class NNobj: 10 | "Class for a multi-layer perceptron object" 11 | def __init__(self): 12 | raise RuntimeError("Not implemented") 13 | 14 | def save_weights(self, outfile="weight_dump.pk"): 15 | pickle.dump([n.op.get_value() for n in self.params], open(outfile, 'w')) 16 | 17 | def load_weights(self, infile="weight_dump.pk"): 18 | dump = pickle.load(open(infile, 'r')) 19 | [n.op.set_value(p) for n, p in zip(self.params, dump)] 20 | 21 | 22 | # helper methods to print nice table (taken from CGT code) 23 | def fmt_item(x, l): 24 | if isinstance(x, np.ndarray): 25 | assert x.ndim==0 26 | x = x.item() 27 | if isinstance(x, float): rep = "%g"%x 28 | else: rep = str(x) 29 | return " "*(l - len(rep)) + rep 30 | 31 | 32 | def fmt_row(width, row, header=False): 33 | out = " | ".join(fmt_item(x, width) for x in row) 34 | if header: out = out + "\n" + "-"*len(out) 35 | return out 36 | -------------------------------------------------------------------------------- /NNpredict.m: -------------------------------------------------------------------------------- 1 | function [y] = NNpredict(nn,im,value,x,y,maxX,maxY) 2 | % call python to generate prediction for nn object, with input image and 3 | % x,y state (0 0 82 | p = cube(obj, i, j, 0, 1); 83 | % p.FaceColor = 'interp'; 84 | % p.FaceLighting = 'gouraud'; 85 | end 86 | end 87 | end 88 | view(3); 89 | end 90 | function [res] = add_border(obj) 91 | im_try = insertShape(obj.dom, 'Rectangle', [1, 1, obj.domsize(1), obj.domsize(2)], ... 92 | 'LineWidth', 1,'Opacity',1,'SmoothEdges',false); 93 | if obj.check_mask(im_try) 94 | res = 1; 95 | else 96 | obj.dom = im_try; 97 | res = 0; 98 | end 99 | end 100 | function p = cube(obj, X0, Y0, Z0, C0) 101 | X1 = [0;0;1;1], Y1 = [0;1;1;0], Z1 = [0;0;0;0]; 102 | X2 = [0;0;1;1], Y2 = [0;1;1;0], Z2 = [1;1;1;1]; 103 | Y3 = [0;0;1;1], Z3 = [0;1;1;0], X3 = [0;0;0;0]; 104 | Y4 = [0;0;1;1], Z4 = [0;1;1;0], X4 = [1;1;1;1]; 105 | X5 = [0;0;1;1], Z5 = [0;1;1;0], Y5 = [0;0;0;0]; 106 | X6 = [0;0;1;1], Z6 = [0;1;1;0], Y6 = [1;1;1;1]; 107 | X = [X1,X2,X3,X4,X5,X6] + X0; 108 | Y = [Y1,Y2,Y3,Y4,Y5,Y6] + Y0; 109 | Z = [Z1,Z2,Z3,Z4,Z5,Z6] + Z0; 110 | C = C0*rand(size(X)); 111 | p = patch(X,Y,Z,C); 112 | end 113 | end 114 | end 115 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Value Iteration Networks 2 | Code for NIPS 2016 paper: 3 | 4 | Value Iteration Networks 5 | 6 | Aviv Tamar, Yi Wu, Garrett Thomas, Sergey Levine, and Pieter Abbeel 7 | 8 | UC Berkeley 9 | 10 | 11 | Requires: 12 | - Python (2.7) 13 | - Theano (0.8) 14 | 15 | For generating the gridworld data and visualizing results, also requires: 16 | - Matlab (2015 or later required for calling python objects for visualizing trajectories) 17 | - Matlab BGL: http://www.mathworks.com/matlabcentral/fileexchange/10922-matlabbgl 18 | Put it in matlab_bgl folder. 19 | 20 | To start: the scripts directory contains scripts for generating the data, 21 | and training the different models. 22 | 23 | scripts/make_data_gridworld_nips.m generates the training data (random grid worlds). 24 | Alternatively, you can use the existing data files in the data folder (instead of generating them). 25 | 26 | scripts/nips_gridworld_experiments_VIN.sh shows how to train the VIN models. 27 | 28 | After training, a weights file (e.g., /results/grid28_VIN.pk) will be created. You can then run: 29 | - script_viz_policy.m to run the trained VIN with the learned weights and view the trajectories 30 | it produces (line 17 selects the weights file). 31 | - test_network.m to numerically evaluate the learned network on a test set (needs to be generated). 32 | 33 | 34 | # Related implementations: 35 | Kent Sommer's implementation of VINs (including data generation) in python + pytorch 36 | 37 | https://github.com/kentsommer/pytorch-value-iteration-networks 38 | 39 | Abhishek Kumar's implementation of VINs in Tensor Flow 40 | 41 | https://github.com/TheAbhiKumar/tensorflow-value-iteration-networks 42 | -------------------------------------------------------------------------------- /addpaths.m: -------------------------------------------------------------------------------- 1 | addpath(genpath('./matlab_bgl')); 2 | addpath('MDPs'); 3 | addpath('ObstacleGenerators'); 4 | addpath('util'); -------------------------------------------------------------------------------- /data/gridworld_16.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avivt/VIN/fe11bb1ae8ad9bcb3a02e2cc5e21b9499ccf0db4/data/gridworld_16.mat -------------------------------------------------------------------------------- /data/gridworld_16_test.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avivt/VIN/fe11bb1ae8ad9bcb3a02e2cc5e21b9499ccf0db4/data/gridworld_16_test.mat -------------------------------------------------------------------------------- /data/gridworld_28.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avivt/VIN/fe11bb1ae8ad9bcb3a02e2cc5e21b9499ccf0db4/data/gridworld_28.mat -------------------------------------------------------------------------------- /data/gridworld_28_test.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avivt/VIN/fe11bb1ae8ad9bcb3a02e2cc5e21b9499ccf0db4/data/gridworld_28_test.mat -------------------------------------------------------------------------------- /data/gridworld_8.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avivt/VIN/fe11bb1ae8ad9bcb3a02e2cc5e21b9499ccf0db4/data/gridworld_8.mat -------------------------------------------------------------------------------- /data/gridworld_8_test.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avivt/VIN/fe11bb1ae8ad9bcb3a02e2cc5e21b9499ccf0db4/data/gridworld_8_test.mat -------------------------------------------------------------------------------- /extract_action.m: -------------------------------------------------------------------------------- 1 | function [actions] = extract_action(traj) 2 | % extract actions from state trajectory 3 | % an action corresponds to difference in state (n,s,e,w,ne,nw,se,sw) 4 | numActions = 8; 5 | action_vecs = ([[-1,0; 1,0; 0,1; 0,-1]; 1/sqrt(2)*[-1,1; -1,-1; 1,1; 1,-1]])'; % state difference unit vectors for each action 6 | % action_vecs_unnorm = ([-1,0; 1,0; 0,1; 0,-1; -1,1; -1,-1; 1,1; 1,-1]); % un-normalized state difference vectors 7 | 8 | state_diff = diff(traj); % state difference 9 | norm_state_diff = state_diff.*repmat(1./sqrt(sum(state_diff.^2,2)),1,size(state_diff,2)); % normalized state difference 10 | prj_state_diff = norm_state_diff*action_vecs; % project state difference on action vectors 11 | actions_one_hot = abs(prj_state_diff-1)<1e-5; % action corresponds to projection==1 12 | actions = actions_one_hot * (1:numActions)'; % action labels -------------------------------------------------------------------------------- /script_make_data.m: -------------------------------------------------------------------------------- 1 | % script to generate training data for learning trajectories 2 | % The data is organized in batches of multiple states from the same domain. 3 | % The batch size is determined by state_batch_size. 4 | % In addition, a flattened data (non-batched) organization is maintained. 5 | 6 | addpaths; 7 | % set parameters (defaults) 8 | set_var('size_1',28); set_var('size_2',28); 9 | dom_size = [size_1,size_2]; % domain size 10 | maxTrajLen = (size_1+size_2); % this is approximate, just to preallocate memory 11 | set_var('Ndomains', 10000); % number of domains 12 | set_var('maxObs', 10); % maximum number of obstacles in a domain 13 | set_var('maxObsSize',0.0); % maximum obstacle size 14 | set_var('Ntrajs', 1); % trajectories from each domain 15 | set_var('goal', [1,1]); % goal position 16 | set_var('rand_goal', false); % random goal position 17 | set_var('state_batch_size', 1); % batchsize for states per each data sample 18 | 19 | % containers for flattened data 20 | maxSamples = Ndomains*Ntrajs*maxTrajLen/2; % this is approximate, just to preallocate memory 21 | im_data = uint8(zeros([maxSamples, size_1*size_2])); % obstacle image 22 | value_data = uint8(zeros([maxSamples, size_1*size_2])); % value function prior (e.g., a reward function) 23 | state_onehot_data = uint8(zeros([maxSamples, size_1+size_2])); % 1-hot vectors of position for each dimension (x,y) 24 | state_xy_data = uint8(zeros([maxSamples, 2])); % position (in both coordinates) 25 | label_data = uint8(zeros([maxSamples, 1])); % action 26 | 27 | % containers for batched data 28 | numSamples = 1; 29 | all_states_xy = cell(Ndomains*Ntrajs,1); 30 | all_doms = cell(Ndomains*Ntrajs,1); 31 | numTrajs = 1; 32 | maxBatches = ceil(Ndomains*Ntrajs*maxTrajLen/state_batch_size); 33 | numBatches = 1; 34 | batch_im_data = uint8(zeros([maxBatches, size_1*size_2])); % obstacle image 35 | batch_value_data = uint8(zeros([maxBatches, size_1*size_2])); % value function prior 36 | state_x_data = uint8(zeros([maxBatches, state_batch_size])); % position (in 1st coordinate) 37 | state_y_data = uint8(zeros([maxBatches, state_batch_size])); % position (in 2nd coordinate) 38 | batch_label_data = uint8(zeros([maxBatches, state_batch_size])); % action 39 | 40 | %% make data 41 | figure; 42 | dom = 1; 43 | while dom <= Ndomains 44 | % allocate buffers for batched data from this domain 45 | s1_buffer = uint8(zeros([ceil(Ntrajs*maxTrajLen/state_batch_size), 1])); 46 | s2_buffer = uint8(zeros([ceil(Ntrajs*maxTrajLen/state_batch_size), 1])); 47 | label_buffer = uint8(zeros([ceil(Ntrajs*maxTrajLen/state_batch_size), 1])); 48 | % generate random domain 49 | buffer_pos = 1; 50 | if rand_goal 51 | goal(1,1) = 1+randi(size_1-1); 52 | goal(1,2) = 1+randi(size_2-1); 53 | end 54 | % generate random obstacles 55 | obs = obstacle_gen(dom_size,goal,maxObsSize); 56 | n_obs = obs.add_N_rand_obs(randi(maxObs)); 57 | add_border_res = obs.add_border; 58 | if n_obs == 0 || add_border_res 59 | disp('no obstacles added, or problem with border, regenerating map') 60 | continue; % no obstacles added, or problem with border, skip 61 | end 62 | im = double(rgb2gray(obs.getimage)); 63 | im = max(max(im)) - im; im = im./max(max(im)); imagesc(im); drawnow; 64 | % make graph (deterministic MDP) 65 | G = Gridworld_Graph8(im,goal(1),goal(2)); 66 | value_prior = G.getRewardPrior; 67 | % sample shortest-path trajectories in graph 68 | [states_xy, states_one_hot] = SampleGraphTraj(G,Ntrajs); 69 | hold on; 70 | for i = 1:Ntrajs % loop over trajectories in domain 71 | if ~isempty(states_xy{i}) && size(states_xy{i},1)>1 72 | % calculate the actions along the trajectory 73 | actions = extract_action(states_xy{i}); 74 | ns = size(states_xy{i},1)-1; 75 | % add trajecory to dataset 76 | % we transpose - since python is row major order 77 | % we subtract 1 - since python indexing starts at zero 78 | im_data(numSamples:numSamples+ns-1,:) = repmat(reshape(im',1,[]),ns,1); 79 | value_data(numSamples:numSamples+ns-1,:) = repmat(reshape(value_prior',1,[]),ns,1); 80 | state_onehot_data(numSamples:numSamples+ns-1,:) = states_one_hot{i}(1:ns,:); 81 | state_xy_data(numSamples:numSamples+ns-1,:) = states_xy{i}(1:ns,:)-1; 82 | s1_buffer(buffer_pos:buffer_pos+ns-1,:) = states_xy{i}(1:ns,1)-1; 83 | s2_buffer(buffer_pos:buffer_pos+ns-1,:) = states_xy{i}(1:ns,2)-1; 84 | label_data(numSamples:numSamples+ns-1,:) = actions - 1; 85 | label_buffer(buffer_pos:buffer_pos+ns-1,:) = actions - 1; 86 | % update sample counters and flattened data containers 87 | numSamples = numSamples+ns; 88 | buffer_pos = buffer_pos+ns; 89 | all_states_xy{numTrajs} = states_xy{i}; 90 | all_doms{numTrajs} = uint8(im); 91 | numTrajs = numTrajs + 1; 92 | % plot 93 | plot(states_xy{i}(:,2),states_xy{i}(:,1));drawnow; 94 | end 95 | end 96 | % batch size is fixed. We replicate the last sample to fill the batch. 97 | if mod(buffer_pos-1,state_batch_size)~=0 98 | samples_to_fill = state_batch_size-mod(buffer_pos,state_batch_size); 99 | s1_buffer(buffer_pos : buffer_pos+samples_to_fill) = s1_buffer(buffer_pos-1); 100 | s2_buffer(buffer_pos : buffer_pos+samples_to_fill) = s2_buffer(buffer_pos-1); 101 | label_buffer(buffer_pos : buffer_pos+samples_to_fill) = label_buffer(buffer_pos-1); 102 | buffer_pos = buffer_pos+samples_to_fill+1; 103 | end 104 | % fill data containers with random permutation of the data 105 | s1_buffer = s1_buffer(1:buffer_pos-1); 106 | s2_buffer = s2_buffer(1:buffer_pos-1); 107 | label_buffer = label_buffer(1:buffer_pos-1); 108 | rand_ind = randperm(buffer_pos-1); 109 | s1_buffer = s1_buffer(rand_ind); 110 | s2_buffer = s2_buffer(rand_ind); 111 | label_buffer = label_buffer(rand_ind); 112 | s1_batch = reshape(s1_buffer,state_batch_size,[])'; 113 | s2_batch = reshape(s2_buffer,state_batch_size,[])'; 114 | label_batch = reshape(label_buffer,state_batch_size,[])'; 115 | cur_batch_size = size(s1_batch,1); 116 | state_x_data(numBatches:numBatches+cur_batch_size-1,:) = s1_batch; 117 | state_y_data(numBatches:numBatches+cur_batch_size-1,:) = s2_batch; 118 | batch_label_data(numBatches:numBatches+cur_batch_size-1,:) = label_batch; 119 | batch_im_data(numBatches:numBatches+cur_batch_size-1,:) = repmat(reshape(im',1,[]),cur_batch_size,1); 120 | batch_value_data(numBatches:numBatches+cur_batch_size-1,:) = repmat(reshape(value_prior',1,[]),cur_batch_size,1); 121 | numBatches = numBatches+cur_batch_size; 122 | % pause; 123 | disp([num2str(Ndomains - dom) ' remaining domains']); 124 | hold off; 125 | dom = dom + 1; 126 | end 127 | % remove empty (preallocated) space in containers 128 | im_data = im_data(1:numSamples-1,:); 129 | value_data = value_data(1:numSamples-1,:); 130 | state_onehot_data = state_onehot_data(1:numSamples-1,:); 131 | state_xy_data = state_xy_data(1:numSamples-1,:); 132 | label_data = label_data(1:numSamples-1,:); 133 | all_states_xy = all_states_xy(1:numTrajs-1); 134 | all_doms = all_doms(1:numTrajs-1); 135 | state_x_data = state_x_data(1:numBatches-1,:); 136 | state_y_data = state_y_data(1:numBatches-1,:); 137 | batch_label_data = batch_label_data(1:numBatches-1,:); 138 | batch_im_data = batch_im_data(1:numBatches-1,:); 139 | batch_value_data = batch_value_data(1:numBatches-1,:); 140 | %% save data 141 | disp('saving data'); 142 | set_var('data_dir', '~/Data/LearnTraj/'); 143 | set_var('data_file', 'data.mat'); % store training data variables 144 | save([data_dir data_file],'im_data','state_onehot_data','label_data','value_data',... 145 | 'state_xy_data','state_x_data','state_y_data','batch_label_data','batch_im_data','batch_value_data'); -------------------------------------------------------------------------------- /script_viz_policy.m: -------------------------------------------------------------------------------- 1 | % script to visualize trajectories from a trained NN policy 2 | 3 | tmp = py.vin.vin; clear tmp; % to load Python 4 | 5 | % set parameters and load NN 6 | size_1 = 28; size_2 = 28; 7 | k = 36; 8 | prior = 'reward'; 9 | model = 'vin'; 10 | if strcmp(model,'cnn') 11 | nn = py.cnn.cnn(pyargs('im_size',int32([size_1,size_2]),'batchsize',int32(1),'statebatchsize',int32(1))); 12 | elseif strcmp(model,'vin') 13 | nn = py.vin.vin(pyargs('im_size',int32([size_1,size_2]),'k',int32(k),'batchsize',int32(1),'statebatchsize',int32(1))); 14 | elseif strcmp(model,'fcn') 15 | nn = py.FCN.fcn(pyargs('im_size',int32([size_1,size_2]),'batchsize',int32(1),'statebatchsize',int32(1))); 16 | end 17 | weight_file = './results/grid28_VIN.pk'; 18 | nn.load_weights(pyargs('infile',weight_file)); 19 | 20 | %% Evaluate NN 21 | % Predict trajectories in closed-loop, and compare with shortest path 22 | dom_size = [size_1,size_2]; % domain size 23 | Ndomains = 100; % number of domains to evaluate 24 | maxObs = 50; % maximum number of obstacles in a domain 25 | maxObsSize = 2.0; % maximum obstacle size 26 | Ntrajs = 1; % trajectories from each domain 27 | numActions = 8; 28 | action_vecs = ([[-1,0; 1,0; 0,1; 0,-1]; 1/sqrt(2)*[-1,1; -1,-1; 1,1; 1,-1]])'; % state difference unit vectors for each action 29 | action_vecs_unnorm = ([-1,0; 1,0; 0,1; 0,-1; -1,1; -1,-1; 1,1; 1,-1]); % un-normalized state difference vectors 30 | plot_value = false; 31 | 32 | % containers for data 33 | numSamples = 1; 34 | numTrajs = 1; 35 | figure(1); 36 | for dom = 1:Ndomains 37 | % generate random domain 38 | goal(1,1) = 1+randi(size_1-1); 39 | goal(1,2) = 1+randi(size_2-1); 40 | % generate random obstacles 41 | obs = obstacle_gen(dom_size,goal,maxObsSize); 42 | n_obs = obs.add_N_rand_obs(randi(maxObs)); 43 | add_border_res = obs.add_border; 44 | if n_obs == 0 || add_border_res 45 | disp('no obstacles added, or problem with border, regenerating map') 46 | continue; % no obstacles added, or problem with border, skip 47 | end 48 | im = double(rgb2gray(obs.getimage)); 49 | im = max(max(im)) - im; im = im./max(max(im)); imagesc(im); drawnow; 50 | % make graph (deterministic MDP) 51 | G = Gridworld_Graph8(im,goal(1),goal(2)); 52 | value_prior = G.getRewardPrior; 53 | % sample shortest-path trajectories in graph 54 | [states_xy, states_one_hot] = SampleGraphTraj(G,Ntrajs); 55 | figure(1); hold on; 56 | for i = 1:Ntrajs 57 | if ~isempty(states_xy{i}) && size(states_xy{i},1)>1 58 | L = size(states_xy{i},1)*2; 59 | pred_traj = zeros(L,2); 60 | pred_traj(1,:) = states_xy{i}(1,:); 61 | for j = 2:L 62 | % creat state vector and image vector, and save to file 63 | state_xy_data = uint8([pred_traj(j-1,1)-1, pred_traj(j-1,2)-1]); 64 | im_data = uint8(reshape(im',1,[])); 65 | value_data = uint8(reshape(value_prior',1,[])); 66 | % call NN to predict action from input file (passing data directly from Matlab to python is difficult) 67 | save('test_input.mat','im_data','value_data','state_xy_data'); 68 | a = nn.predict(pyargs('input', 'test_input.mat'))+1; 69 | % calculate next state based on action 70 | s = G.map_ind_to_state(pred_traj(j-1,1),pred_traj(j-1,2)); 71 | ns = G.sampleNextState(s,a); 72 | [nr,nc] = G.getCoords(ns); 73 | pred_traj(j,2) = nc; 74 | pred_traj(j,1) = nr; 75 | if (nr == goal(1)) && (nc == goal(2)) 76 | pred_traj(j+1:end,2) = nc; 77 | pred_traj(j+1:end,1) = nr; 78 | break; 79 | end 80 | end 81 | % plot stuff 82 | figure(1); 83 | plot(states_xy{i}(:,2),states_xy{i}(:,1));drawnow; 84 | plot(pred_traj(:,2),pred_traj(:,1),'-X');drawnow; 85 | legend('Shortest path','Predicted path'); 86 | plot(states_xy{i}(1,2),states_xy{i}(1,1),'-o');drawnow; 87 | plot(states_xy{i}(end,2),states_xy{i}(end,1),'-s');drawnow; 88 | hold off; 89 | if plot_value 90 | figure(2); 91 | pred_val = nn.predict_value(pyargs('input', 'test_input.mat')); 92 | val_map = python_ndarray_to_matrix(pred_val(1),[size_1,size_2]); 93 | r_map = python_ndarray_to_matrix(pred_val(2),[size_1,size_2]); 94 | subplot(1,2,1); 95 | imagesc(r_map); 96 | title('Learned Reward'); 97 | subplot(1,2,2); 98 | imagesc(val_map); 99 | title('Learned Value'); 100 | drawnow; 101 | end 102 | pause;%(0.6); 103 | end 104 | end 105 | end 106 | -------------------------------------------------------------------------------- /scripts/make_data_gridworld_nips.m: -------------------------------------------------------------------------------- 1 | % script to make data for nips CNN experiments 2 | clear all; 3 | data_dir = './data'; 4 | dodraw = false; 5 | %% Generate 8x8 map data 6 | data_file = 'gridworld_8.mat'; 7 | size_1 = 8; 8 | size_2 = 8; 9 | add_border = true; 10 | maxObs = 30; 11 | maxObsSize = 0.0; 12 | Ndomains = 5000; 13 | Ntrajs = 7; 14 | prior = 'reward'; 15 | rand_goal = true; 16 | zero_min_action = true; 17 | state_batch_size = 1; 18 | script_make_data; 19 | clear all; 20 | 21 | %% Generate 16x16 map data 22 | data_dir = './data'; 23 | data_file = 'gridworld_16.mat'; 24 | size_1 = 16; 25 | size_2 = 16; 26 | add_border = true; 27 | maxObs = 40; 28 | maxObsSize = 1.0; 29 | Ndomains = 5000; 30 | Ntrajs = 7; 31 | prior = 'reward'; 32 | rand_goal = true; 33 | zero_min_action = true; 34 | state_batch_size = 1; 35 | script_make_data; 36 | clear all; 37 | 38 | %% Generate 28x28 map data 39 | data_dir = './data'; 40 | data_file = 'gridworld_28.mat'; 41 | size_1 = 28; 42 | size_2 = 28; 43 | add_border = true; 44 | maxObs = 50; 45 | maxObsSize = 2.0; 46 | Ndomains = 5000; 47 | Ntrajs = 7; 48 | prior = 'reward'; 49 | rand_goal = true; 50 | zero_min_action = true; 51 | state_batch_size = 1; 52 | script_make_data; 53 | clear all; 54 | -------------------------------------------------------------------------------- /scripts/nips_gridworld_experiments_CNN.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script for running icml gridworld experiments with CNN networks 3 | # 8x8 map 4 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_8_state_channel.mat --output ./nips16results/gridworld/grid8_CNN.pk --epochs 20 --model cnn --stepsize 0.01 --imsize 8 --reg 0.0 --batchsize 128 | tee -a ./nips16results/gridworld/out_grid8_CNN.txt ; 5 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_8_state_channel.mat --output ./nips16results/gridworld/grid8_CNN.pk --epochs 20 --model cnn --stepsize 0.002 --imsize 8 --reg 0.0 --batchsize 128 --warmstart ./nips16results/gridworld/grid8_CNN.pk | tee -a ./nips16results/gridworld/out_grid8_CNN.txt ; 6 | # 16x16 map 7 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16_state_channel.mat --output ./nips16results/gridworld/grid16_CNN.pk --epochs 20 --model cnn --stepsize 0.01 --imsize 16 --reg 0.0 --batchsize 128 | tee -a ./nips16results/gridworld/out_grid16_CNN.txt ; 8 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16_state_channel.mat --output ./nips16results/gridworld/grid16_CNN.pk --epochs 20 --model cnn --stepsize 0.002 --imsize 16 --reg 0.0 --batchsize 128 --warmstart ./nips16results/gridworld/grid16_CNN.pk | tee -a ./nips16results/gridworld/out_grid16_CNN.txt ; 9 | # 28x28 map 10 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28_state_channel.mat --output ./nips16results/gridworld/grid28_CNN.pk --epochs 20 --model cnn --stepsize 0.01 --imsize 28 --reg 0.0 --batchsize 128 | tee -a ./nips16results/gridworld/out_grid28_CNN.txt ; 11 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28_state_channel.mat --output ./nips16results/gridworld/grid28_CNN.pk --epochs 20 --model cnn --stepsize 0.002 --imsize 28 --reg 0.0 --batchsize 128 --warmstart ./nips16results/gridworld/grid28_CNN.pk | tee -a ./nips16results/gridworld/out_grid28_CNN.txt ; 12 | -------------------------------------------------------------------------------- /scripts/nips_gridworld_experiments_FCN.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script for running icml gridworld experiments with FCN networks 3 | # 8x8 map 4 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./nips16results/gridworld/grid8_FCN.pk --epochs 30 --model fcn --stepsize 0.01 --imsize 8 --reg 0.0 --batchsize 12 --statebatchsize 10 | tee -a ./nips16results/gridworld/out_grid8_FCN.txt ; 5 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./nips16results/gridworld/grid8_FCN.pk --epochs 30 --model fcn --stepsize 0.005 --imsize 8 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid8_FCN.pk | tee -a ./nips16results/gridworld/out_grid8_FCN.txt ; 6 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid8_FCN.pk --epochs 30 --model fcn --stepsize 0.002 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN.pk | tee -a ./nips16results/gridworld/out_grid16_FCN.txt ; 7 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid8_FCN.pk --epochs 30 --model fcn --stepsize 0.001 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN.pk | tee -a ./nips16results/gridworld/out_grid16_FCN.txt ; 8 | 9 | # 16x16 map 10 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN.pk --epochs 30 --model fcn --stepsize 0.01 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 | tee -a ./nips16results/gridworld/out_grid16_FCN.txt ; 11 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN.pk --epochs 30 --model fcn --stepsize 0.005 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN.pk | tee -a ./nips16results/gridworld/out_grid16_FCN.txt ; 12 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN.pk --epochs 30 --model fcn --stepsize 0.002 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN.pk | tee -a ./nips16results/gridworld/out_grid16_FCN.txt ; 13 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN.pk --epochs 30 --model fcn --stepsize 0.001 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN.pk | tee -a ./nips16results/gridworld/out_grid16_FCN.txt ; 14 | 15 | # 28x28 map 16 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_FCN.pk --epochs 30 --model fcn --stepsize 0.01 --imsize 28 --reg 0.0 --batchsize 12 --statebatchsize 10 | tee -a ./nips16results/gridworld/out_grid28_FCN.txt ; 17 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_FCN.pk --epochs 30 --model fcn --stepsize 0.005 --imsize 28 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid28_FCN.pk | tee -a ./nips16results/gridworld/out_grid28_FCN.txt ; 18 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_FCN.pk --epochs 30 --model fcn --stepsize 0.002 --imsize 28 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid28_FCN.pk | tee -a ./nips16results/gridworld/out_grid28_FCN.txt ; 19 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_FCN.pk --epochs 30 --model fcn --stepsize 0.001 --imsize 28 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid28_FCN.pk | tee -a ./nips16results/gridworld/out_grid28_FCN.txt ; 20 | 21 | # 16x16 map 0.5 data 22 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN_05.pk --epochs 30 --model fcn --stepsize 0.01 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_FCN_05.txt ; 23 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN_05.pk --epochs 30 --model fcn --stepsize 0.005 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN_05.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_FCN_05.txt ; 24 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN_05.pk --epochs 30 --model fcn --stepsize 0.002 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN_05.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_FCN_05.txt ; 25 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN_05.pk --epochs 30 --model fcn --stepsize 0.001 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN_05.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_FCN_05.txt ; 26 | 27 | -------------------------------------------------------------------------------- /scripts/nips_gridworld_experiments_VIN.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script for running nips gridworld experiments with VIN networks 3 | mkdir -p results 4 | # 8x8 map 5 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./results/grid8_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.01 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 | tee ./results/out_grid8_VIN.txt ; 6 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./results/grid8_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.005 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid8_VIN.pk | tee -a ./results/out_grid8_VIN.txt ; 7 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./results/grid8_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.002 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid8_VIN.pk | tee -a ./results/out_grid8_VIN.txt ; 8 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./results/grid8_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.001 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid8_VIN.pk | tee -a ./results/out_grid8_VIN.txt ; 9 | # 16x16 map 10 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./results/grid16_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.01 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 | tee ./results/out_grid16_VIN.txt ; 11 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./results/grid16_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid16_VIN.pk | tee -a ./results/out_grid16_VIN.txt ; 12 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./results/grid16_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.002 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid16_VIN.pk | tee -a ./results/out_grid16_VIN.txt ; 13 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./results/grid16_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.001 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid16_VIN.pk | tee -a ./results/out_grid16_VIN.txt ; 14 | # 28x28 map 15 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./results/grid28_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.01 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 | tee ./results/out_grid28_VIN.txt ; 16 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./results/grid28_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.005 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid28_VIN.pk | tee -a ./results/out_grid28_VIN.txt ; 17 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./results/grid28_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.002 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid28_VIN.pk | tee -a ./results/out_grid28_VIN.txt ; 18 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./results/grid28_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.001 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid28_VIN.pk | tee -a ./results/out_grid28_VIN.txt ; 19 | 20 | -------------------------------------------------------------------------------- /scripts/nips_gridworld_experiments_VIN_untied.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script for running icml gridworld experiments with VIN -res networks 3 | # 8x8 map 4 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./nips16results/gridworld/grid8_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.01 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 | tee -a ./nips16results/gridworld/out_grid8_VIN_untied.txt ; 5 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./nips16results/gridworld/grid8_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.005 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid8_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid8_VIN_untied.txt ; 6 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./nips16results/gridworld/grid8_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.002 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid8_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid8_VIN_untied.txt ; 7 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./nips16results/gridworld/grid8_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.001 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid8_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid8_VIN_untied.txt ; 8 | # 16x16 map 9 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.01 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied.txt ; 10 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid16_VIN_untied.txt ; 11 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.002 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid16_VIN_untied.txt ; 12 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.001 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid16_VIN_untied.txt ; 13 | # 28x28 map 14 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.01 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 | tee -a ./nips16results/gridworld/out_grid28_VIN_untied.txt ; 15 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.005 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid28_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid28_VIN_untied.txt ; 16 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.002 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid28_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid28_VIN_untied.txt ; 17 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.001 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid28_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid28_VIN_untied.txt ; 18 | 19 | -------------------------------------------------------------------------------- /scripts/nips_gridworld_experiments_VIN_untied_data_fraction.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script for running icml gridworld experiments with VIN networks 3 | # 16x16 map 4 | 5 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.01 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_02_data.txt ; 6 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_02_data.txt ; 7 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.002 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_02_data.txt ; 8 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.001 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_02_data.txt ; 9 | 10 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_02_data.pk --epochs 30 --model valIterBatch --stepsize 0.01 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_02_data.txt ; 11 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_02_data.pk --epochs 30 --model valIterBatch --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_02_data.pk --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_02_data.txt ; 12 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_02_data.pk --epochs 30 --model valIterBatch --stepsize 0.002 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_02_data.pk --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_02_data.txt ; 13 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_02_data.pk --epochs 30 --model valIterBatch --stepsize 0.001 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_02_data.pk --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_02_data.txt ; 14 | 15 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.01 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_05_data.txt ; 16 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_05_data.txt ; 17 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_05_data.txt ; 18 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.001 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_05_data.txt ; 19 | 20 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_05_data.pk --epochs 30 --model valIterBatch --stepsize 0.01 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_05_data.txt ; 21 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_05_data.pk --epochs 30 --model valIterBatch --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_05_data.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_05_data.txt ; 22 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_05_data.pk --epochs 30 --model valIterBatch --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_05_data.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_05_data.txt ; 23 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_05_data.pk --epochs 30 --model valIterBatch --stepsize 0.001 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_05_data.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_05_data.txt ; 24 | -------------------------------------------------------------------------------- /test_network.m: -------------------------------------------------------------------------------- 1 | function [optimal_lengths,pred_lengths] = test_network(model, weight_file, test_file, imsize, k) 2 | % script to evaluate success rate of network on a test-set of trajectories 3 | 4 | tmp = py.convNN.convNN; clear tmp; % to load Python 5 | size_1 = imsize(1); size_2 = imsize(2); 6 | if strcmp(model,'VIN') 7 | nn = py.convBatch.convBatch(pyargs('im_size',int32([size_1,size_2]),'k',int32(k),'batchsize',int32(1),'statebatchsize',int32(1))); 8 | elseif strcmp(model,'untiedVIN') 9 | nn = py.vin_untied.vin_untied(pyargs('im_size',int32([size_1,size_2]),'k',int32(k),'batchsize',int32(1),'statebatchsize',int32(1))); 10 | elseif strcmp(model,'FCN') 11 | nn = py.FCN.fcn(pyargs('im_size',int32([size_1,size_2]),'batchsize',int32(1),'statebatchsize',int32(1))); 12 | elseif strcmp(model,'CNN') 13 | nn = py.CNN.cnn(pyargs('im_size',int32([size_1,size_2]),'batchsize',int32(1))); 14 | end 15 | nn.load_weights(pyargs('infile',weight_file)); 16 | load(test_file); 17 | %% Evaluate NN 18 | % Predict trajectories in closed-loop, and compare with shortest path 19 | Ndomains = size(all_im_data,1); % number of domains 20 | 21 | % containers for data 22 | optimal_lengths = zeros(Ndomains,1); 23 | pred_lengths = zeros(Ndomains,1); 24 | no_obs_im = ones(size_1,size_2); 25 | for dom = 1:Ndomains 26 | goal = all_states_xy{dom}(end,:); 27 | start = all_states_xy{dom}(1,:); 28 | optimal_lengths(dom) = length(all_states_xy{dom}); 29 | im = reshape(all_im_data(dom,:),size_1,size_2); 30 | G = Gridworld_Graph8(im,goal(1),goal(2)); 31 | G_no_obs = Gridworld_Graph8(no_obs_im,goal(1),goal(2)); 32 | value_prior = reshape(all_value_data(dom,:),size_1,size_2); 33 | if ~isempty(all_states_xy{dom}) && size(all_states_xy{dom},1)>1 34 | L = size(all_states_xy{dom},1)*2; 35 | pred_traj = zeros(L,2); 36 | pred_traj(1,:) = all_states_xy{dom}(1,:); 37 | for j = 2:L 38 | % creat current state vector and image vector, and save to file 39 | state_xy_data = uint8([pred_traj(j-1,1)-1, pred_traj(j-1,2)-1]); 40 | im_data = uint8(reshape(im',1,[])); 41 | value_data = uint8(reshape(value_prior',1,[])); 42 | % call NN to predict action from input file 43 | save('test_input.mat','im_data','value_data','state_xy_data'); 44 | a = nn.predict(pyargs('input', 'test_input.mat'))+1; 45 | % calculate next state based on action 46 | s = G.map_ind_to_state(pred_traj(j-1,1),pred_traj(j-1,2)); 47 | ns = G.sampleNextState(s,a); 48 | [nr,nc] = G.getCoords(ns); 49 | pred_traj(j,2) = nc; 50 | pred_traj(j,1) = nr; 51 | if (nr == goal(1)) && (nc == goal(2)) 52 | pred_traj(j+1:end,2) = nc; 53 | pred_traj(j+1:end,1) = nr; 54 | pred_lengths(dom) = j; 55 | break; 56 | end 57 | end 58 | end 59 | disp(Ndomains-dom); 60 | end 61 | end 62 | -------------------------------------------------------------------------------- /theano_utils.py: -------------------------------------------------------------------------------- 1 | # THEANO NN utils 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | 7 | def init_weights_T(*shape): 8 | return theano.shared((np.random.randn(*shape) * 0.01).astype(theano.config.floatX)) 9 | 10 | 11 | def conv2D_keep_shape(x, w, image_shape, filter_shape, subsample=(1, 1)): 12 | # crop output to same size as input 13 | fs = T.shape(w)[2] - 1 # this is the filter size minus 1 14 | ims = T.shape(x)[2] # this is the image size 15 | return theano.sandbox.cuda.dnn.dnn_conv(img=x, 16 | kerns=w, 17 | border_mode='full', 18 | subsample=subsample, 19 | )[:, :, fs/2:ims+fs/2, fs/2:ims+fs/2] 20 | 21 | 22 | def rmsprop_updates_T(cost, params, stepsize=0.001, rho=0.9, epsilon=1e-6): 23 | # rmsprop in Theano 24 | grads = T.grad(cost=cost, wrt=params) 25 | updates = [] 26 | for p, g in zip(params, grads): 27 | acc = theano.shared(p.get_value() * 0.) 28 | acc_new = rho * acc + (1 - rho) * g ** 2 29 | gradient_scaling = T.sqrt(acc_new + epsilon) 30 | g = g / gradient_scaling 31 | updates.append((acc, acc_new)) 32 | updates.append((p, p - stepsize * g)) 33 | return updates 34 | 35 | 36 | def flip_filter(w): 37 | if w.ndim == 4: 38 | t = w.copy() 39 | s = t.shape 40 | for i in range(0, s[0]): 41 | for j in range(0, s[1]): 42 | t[i][j] = np.fliplr(t[i][j]) 43 | t[i][j] = np.flipud(t[i][j]) 44 | return t 45 | else: 46 | return w 47 | 48 | 49 | class ConvLayer(object): 50 | """Pool Layer of a convolutional network, copied from Theano tutorial """ 51 | def __init__(self, input_tensor, filter_shape, image_shape, poolsize=(2, 2)): 52 | assert image_shape[1] == filter_shape[1] 53 | self.input = input_tensor 54 | fan_in = np.prod(filter_shape[1:]) 55 | fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) // 56 | np.prod(poolsize)) 57 | 58 | # initialize weights with random weights 59 | W_bound = np.sqrt(6. / (fan_in + fan_out)) 60 | self.W = theano.shared( 61 | np.asarray(np.random.uniform(low=-W_bound, high=W_bound, size=filter_shape), 62 | dtype=theano.config.floatX), 63 | ) 64 | b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX) 65 | self.b = theano.shared(value=b_values, borrow=True) 66 | 67 | # convolve input feature maps with filters 68 | conv_out = conv2D_keep_shape( 69 | x=input_tensor, 70 | w=self.W, 71 | image_shape=image_shape, 72 | filter_shape=filter_shape 73 | ) 74 | 75 | # downsample each feature map individually, using maxpooling 76 | pooled_out = theano.tensor.signal.pool.pool_2d( 77 | input=conv_out, 78 | ds=poolsize, 79 | ignore_border=True 80 | ) 81 | 82 | # add the bias term. Since the bias is a vector (1D array), we first 83 | # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will 84 | # thus be broadcasted across mini-batches and feature map 85 | # width & height 86 | self.output = T.nnet.relu(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) 87 | 88 | self.out_shape = (image_shape[0], filter_shape[0], 89 | int(np.floor(image_shape[2]/poolsize[0])), 90 | int(np.floor(image_shape[3]/poolsize[1]))) 91 | 92 | # store parameters of this layer 93 | self.params = [self.W, self.b] 94 | 95 | # keep track of model input 96 | self.input = input_tensor 97 | 98 | -------------------------------------------------------------------------------- /util/SP.m: -------------------------------------------------------------------------------- 1 | function [path] = SP(pred,s,t) 2 | % trace-back shortest path from s(ource) to t(arget), with predecessor list 3 | % pred, calculated before-hand 4 | max_len = 1e3; 5 | path = zeros(max_len,1); 6 | i = max_len; 7 | path(i) = t; 8 | while path(i)~=s && i>1 9 | try 10 | path(i-1) = pred(path(i)); 11 | i = i-1; 12 | catch 13 | warning('no path found, continuing'); 14 | path = []; 15 | return 16 | end 17 | end 18 | if i>=1 19 | path = path(i:end); 20 | else 21 | path = NaN; 22 | end 23 | end -------------------------------------------------------------------------------- /util/python_ndarray_to_matrix.m: -------------------------------------------------------------------------------- 1 | function m = python_ndarray_to_matrix(p,psize) 2 | cP = cell(p); 3 | flat = cP{1,1}.flatten(); 4 | flatlist = flat.tolist(); 5 | m = zeros(psize); 6 | ind = 1; 7 | for i = 1:psize(1) 8 | for j = 1:psize(2) 9 | m(i,j) = double(flatlist{ind}); 10 | ind = ind+1; 11 | end 12 | end -------------------------------------------------------------------------------- /util/rand_choose.m: -------------------------------------------------------------------------------- 1 | function res = rand_choose(in_vec) 2 | % sample an element from probability vector in_vec 3 | if size(in_vec,2)==1 4 | in_vec = in_vec'; 5 | end 6 | 7 | tmp = [0 cumsum(in_vec)]; 8 | q = rand; 9 | res = find(q>tmp(1:end-1) & q0)=1; 10 | if ~doesexist 11 | assignin('caller',name,val); 12 | end -------------------------------------------------------------------------------- /vin.py: -------------------------------------------------------------------------------- 1 | # VI network using THEANO, takes batches of state input 2 | from NNobj import * 3 | from theano_utils import * 4 | 5 | 6 | class vin(NNobj): 7 | "Class for a neural network that does k iterations of value iteration" 8 | def __init__(self, model="VIN", im_size=[28, 28], dropout=False, devtype="cpu", grad_check=False, reg=0, k=10, 9 | statebatchsize=10, batchsize=128): 10 | self.im_size = im_size # input image size 11 | self.model = model 12 | self.reg = reg # regularization (currently not implemented) 13 | self.k = k # number of VI iterations 14 | self.batchsize = batchsize # batch size for training 15 | self.statebatchsize = statebatchsize # number of state inputs for every image input, since each image is the 16 | # same for many states in the data 17 | np.random.seed(0) 18 | print(model) 19 | theano.config.blas.ldflags = "-L/usr/local/lib -lopenblas" 20 | 21 | # X input : l=2 stacked images: obstacle map and reward function prior 22 | self.X = T.ftensor4(name="X") 23 | # S1,S2 input : state position (vertical and horizontal position) 24 | self.S1 = T.bmatrix("S1") # state first dimension * statebatchsize 25 | self.S2 = T.bmatrix("S2") # state second dimension * statebatchsize 26 | self.y = T.bvector("y") # output action * statebatchsize 27 | 28 | l = 2 # channels in input layer 29 | l_h = 150 # channels in initial hidden layer 30 | l_q = 10 # channels in q layer (~actions) 31 | 32 | self.vin_net = VinBlock(in_x=self.X, in_s1=self.S1, in_s2=self.S2, in_x_channels=l, imsize=self.im_size, 33 | batchsize=self.batchsize, state_batch_size=self.statebatchsize, l_h=l_h, l_q=l_q, 34 | k=self.k) 35 | self.p_of_y = self.vin_net.output 36 | self.params = self.vin_net.params 37 | # Total 1910 parameters 38 | 39 | self.cost = -T.mean(T.log(self.p_of_y)[T.arange(self.y.shape[0]), 40 | self.y], dtype=theano.config.floatX) 41 | self.y_pred = T.argmax(self.p_of_y, axis=1) 42 | self.err = T.mean(T.neq(self.y_pred, self.y.flatten()), dtype=theano.config.floatX) 43 | 44 | self.computeloss = theano.function(inputs=[self.X, self.S1, self.S2, self.y], 45 | outputs=[self.err, self.cost]) 46 | self.y_out = theano.function(inputs=[self.X, self.S1, self.S2], outputs=[self.y_pred]) 47 | 48 | def run_training(self, input, stepsize=0.01, epochs=10, output='None', batch_size=128, grad_check=True, 49 | profile=False, data_fraction=1): 50 | # run training from input matlab data file, and save test data prediction in output file 51 | # load data from Matlab file, including 52 | # im_data: flattened images 53 | # state_data: concatenated one-hot vectors for each state variable 54 | # state_xy_data: state variable (x,y position) 55 | # label_data: one-hot vector for action (state difference) 56 | matlab_data = sio.loadmat(input) 57 | im_data = matlab_data["batch_im_data"] 58 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0 59 | value_data = matlab_data["batch_value_data"] 60 | state1_data = matlab_data["state_x_data"] 61 | state2_data = matlab_data["state_y_data"] 62 | label_data = matlab_data["batch_label_data"] 63 | ydata = label_data.astype('int8') 64 | Xim_data = im_data.astype(theano.config.floatX) 65 | Xim_data = Xim_data.reshape(-1, 1, self.im_size[0], self.im_size[1]) 66 | Xval_data = value_data.astype(theano.config.floatX) 67 | Xval_data = Xval_data.reshape(-1, 1, self.im_size[0], self.im_size[1]) 68 | Xdata = np.append(Xim_data, Xval_data, axis=1) 69 | S1data = state1_data.astype('int8') 70 | S2data = state2_data.astype('int8') 71 | 72 | all_training_samples = int(6/7.0*Xdata.shape[0]) 73 | training_samples = int(data_fraction * all_training_samples) 74 | Xtrain = Xdata[0:training_samples] 75 | S1train = S1data[0:training_samples] 76 | S2train = S2data[0:training_samples] 77 | ytrain = ydata[0:training_samples] 78 | 79 | Xtest = Xdata[all_training_samples:] 80 | S1test = S1data[all_training_samples:] 81 | S2test = S2data[all_training_samples:] 82 | ytest = ydata[all_training_samples:] 83 | ytest = ytest.flatten() 84 | 85 | sortinds = np.random.permutation(training_samples) 86 | Xtrain = Xtrain[sortinds] 87 | S1train = S1train[sortinds] 88 | S2train = S2train[sortinds] 89 | ytrain = ytrain[sortinds] 90 | ytrain = ytrain.flatten() 91 | 92 | self.updates = rmsprop_updates_T(self.cost, self.params, stepsize=stepsize) 93 | self.train = theano.function(inputs=[self.X, self.S1, self.S2, self.y], outputs=[], updates=self.updates) 94 | 95 | print fmt_row(10, ["Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time"]) 96 | for i_epoch in xrange(int(epochs)): 97 | tstart = time.time() 98 | # do training 99 | for start in xrange(0, Xtrain.shape[0], batch_size): 100 | end = start+batch_size 101 | if end <= Xtrain.shape[0]: 102 | self.train(Xtrain[start:end], S1train[start:end], S2train[start:end], 103 | ytrain[start*self.statebatchsize:end*self.statebatchsize]) 104 | elapsed = time.time() - tstart 105 | # compute losses 106 | trainerr = 0. 107 | trainloss = 0. 108 | testerr = 0. 109 | testloss = 0. 110 | num = 0 111 | for start in xrange(0, Xtest.shape[0], batch_size): 112 | end = start+batch_size 113 | if end <= Xtest.shape[0]: 114 | num += 1 115 | trainerr_, trainloss_ = self.computeloss(Xtrain[start:end], S1train[start:end], S2train[start:end], 116 | ytrain[start*self.statebatchsize:end*self.statebatchsize]) 117 | testerr_, testloss_ = self.computeloss(Xtest[start:end], S1test[start:end], S2test[start:end], 118 | ytest[start*self.statebatchsize:end*self.statebatchsize]) 119 | trainerr += trainerr_ 120 | trainloss += trainloss_ 121 | testerr += testerr_ 122 | testloss += testloss_ 123 | print fmt_row(10, [i_epoch, trainloss/num, trainerr/num, testloss/num, testerr/num, elapsed]) 124 | 125 | def predict(self, input): 126 | # NN output for a single input, read from file 127 | matlab_data = sio.loadmat(input) 128 | im_data = matlab_data["im_data"] 129 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0 130 | state_data = matlab_data["state_xy_data"] 131 | value_data = matlab_data["value_data"] 132 | xim_test = im_data.astype(theano.config.floatX) 133 | xim_test = xim_test.reshape(-1, 1, self.im_size[0], self.im_size[1]) 134 | xval_test = value_data.astype(theano.config.floatX) 135 | xval_test = xval_test.reshape(-1, 1, self.im_size[0], self.im_size[1]) 136 | x_test = np.append(xim_test, xval_test, axis=1) 137 | s_test = state_data.astype('int8') 138 | s1_test = s_test[:, 0].reshape([1, 1]) 139 | s2_test = s_test[:, 1].reshape([1, 1]) 140 | out = self.y_out(x_test, s1_test, s2_test) 141 | return out[0][0] 142 | 143 | def predict_value(self, input): 144 | # Value and reward for a single input, read from file 145 | val_pred = theano.function(inputs=[self.X], outputs=[self.vin_net.v]) 146 | r_pred = theano.function(inputs=[self.X], outputs=[self.vin_net.r]) 147 | matlab_data = sio.loadmat(input) 148 | im_data = matlab_data["im_data"] 149 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0 150 | value_data = matlab_data["value_data"] 151 | xim_test = im_data.astype(theano.config.floatX) 152 | xim_test = xim_test.reshape(-1, 1, self.im_size[0], self.im_size[1]) 153 | xval_test = value_data.astype(theano.config.floatX) 154 | xval_test = xval_test.reshape(-1, 1, self.im_size[0], self.im_size[1]) 155 | x_test = np.append(xim_test, xval_test, axis=1) 156 | out_v = val_pred(x_test) 157 | out_r = r_pred(x_test) 158 | return [out_v[0][0], out_r[0][0]] 159 | 160 | def load_weights(self, infile="weight_dump.pk"): 161 | dump = pickle.load(open(infile, 'r')) 162 | [n.set_value(p) for n, p in zip(self.params, dump)] 163 | 164 | def save_weights(self, outfile="weight_dump.pk"): 165 | pickle.dump([n.get_value() for n in self.params], open(outfile, 'w')) 166 | 167 | 168 | class VinBlock(object): 169 | """VIN block""" 170 | def __init__(self, in_x, in_s1, in_s2, in_x_channels, imsize, batchsize=128, 171 | state_batch_size=1, l_h=150, l_q=10, k=0): 172 | """ 173 | Allocate a VIN block with shared variable internal parameters. 174 | 175 | :type in_x: theano.tensor.dtensor4 176 | :param in_x: symbolic input image tensor, of shape [batchsize, in_x_channels, imsize[0], imsize[1]] 177 | Typically : first channel is image, second is the reward prior. 178 | 179 | :type in_s1: theano.tensor.bmatrix 180 | :param in_s1: symbolic input batches of vertical positions, of shape [batchsize, state_batch_size] 181 | 182 | :type in_s2: theano.tensor.bmatrix 183 | :param in_s2: symbolic input batches of horizontal positions, of shape [batchsize, state_batch_size] 184 | 185 | :type in_x_channels: int32 186 | :param in_x_channels: number of input channels 187 | 188 | :type imsize: tuple or list of length 2 189 | :param imsize: (image height, image width) 190 | 191 | :type batchsize: int32 192 | :param batchsize: batch size 193 | 194 | :type state_batch_size: int32 195 | :param state_batch_size: number of state inputs for each sample 196 | 197 | :type l_h: int32 198 | :param l_h: number of channels in first hidden layer 199 | 200 | :type l_q: int32 201 | :param l_q: number of channels in q layer (~actions) 202 | 203 | :type k: int32 204 | :param k: number of VI iterations (actually, real number of iterations is k+1) 205 | 206 | """ 207 | self.bias = theano.shared((np.random.randn(l_h) * 0.01).astype(theano.config.floatX)) # 150 parameters 208 | self.w0 = init_weights_T(l_h, in_x_channels, 3, 3) # 1350 parameters 209 | # initial conv layer over image+reward prior 210 | self.h = conv2D_keep_shape(in_x, self.w0, image_shape=[batchsize, self.w0.shape.eval()[1], 211 | imsize[0], imsize[1]], 212 | filter_shape=self.w0.shape.eval()) 213 | self.h = self.h + self.bias.dimshuffle('x', 0, 'x', 'x') 214 | 215 | self.w1 = init_weights_T(1, l_h, 1, 1) # 150 parameters 216 | self.r = conv2D_keep_shape(self.h, self.w1, image_shape=[batchsize, self.w0.shape.eval()[0], 217 | imsize[0], imsize[1]], 218 | filter_shape=self.w1.shape.eval()) 219 | 220 | # weights from inputs to q layer (~reward in Bellman equation) 221 | self.w = init_weights_T(l_q, 1, 3, 3) # 90 parameters 222 | # feedback weights from v layer into q layer (~transition probabilities in Bellman equation) 223 | self.w_fb = init_weights_T(l_q, 1, 3, 3) # 90 parameters 224 | 225 | self.q = conv2D_keep_shape(self.r, self.w, image_shape=[batchsize, self.w1.shape.eval()[0], 226 | imsize[0], imsize[1]], 227 | filter_shape=self.w.shape.eval()) 228 | self.v = T.max(self.q, axis=1, keepdims=True) 229 | 230 | for i in range(0, k-1): 231 | self.q = conv2D_keep_shape(T.concatenate([self.r, self.v], axis=1), T.concatenate([self.w, self.w_fb], 232 | axis=1), 233 | image_shape=[batchsize, self.w1.shape.eval()[0]+1, imsize[0], imsize[1]], 234 | filter_shape=T.concatenate([self.w, self.w_fb], axis=1).shape.eval()) 235 | self.v = T.max(self.q, axis=1, keepdims=True) 236 | 237 | # do one last convolution 238 | self.q = conv2D_keep_shape(T.concatenate([self.r, self.v], axis=1), T.concatenate([self.w, self.w_fb], axis=1), 239 | image_shape=[batchsize, self.w1.shape.eval()[0]+1, imsize[0], imsize[1]], 240 | filter_shape=T.concatenate([self.w, self.w_fb], axis=1).shape.eval()) 241 | 242 | # Select the conv-net channels at the state position (S1,S2). 243 | # This intuitively corresponds to each channel representing an action, and the convnet the Q function. 244 | # The tricky thing is we want to select the same (S1,S2) position *for each* channel and for each sample 245 | self.q_out = self.q[T.extra_ops.repeat(T.arange(self.q.shape[0]), state_batch_size), :, in_s1.flatten(), 246 | in_s2.flatten()] 247 | 248 | # softmax output weights 249 | self.w_o = init_weights_T(l_q, 8) # 80 parameters 250 | self.output = T.nnet.softmax(T.dot(self.q_out, self.w_o)) 251 | 252 | self.params = [self.w0, self.bias, self.w1, self.w, self.w_fb, self.w_o] 253 | -------------------------------------------------------------------------------- /vin_untied.py: -------------------------------------------------------------------------------- 1 | # VI network using THEANO, takes batches of state input 2 | from NNobj import * 3 | from theano_utils import * 4 | 5 | 6 | class vin_untied(NNobj): 7 | "Class for a neural network that does k iterations of value iteration" 8 | def __init__(self, model="VIN", im_size=[28, 28], dropout=False, devtype="cpu", grad_check=False, reg=0, k=10, 9 | statebatchsize=10, batchsize=128): 10 | self.im_size = im_size # input image size 11 | self.model = model 12 | self.reg = reg # regularization (currently not implemented) 13 | self.k = k # number of VI iterations 14 | self.batchsize = batchsize # batch size for training 15 | self.statebatchsize = statebatchsize # number of state inputs for every image input, since each image is the 16 | # same for many states in the data 17 | np.random.seed(0) 18 | print(model) 19 | theano.config.blas.ldflags = "-L/usr/local/lib -lopenblas" 20 | 21 | # X input : l=2 stacked images: obstacle map and reward function prior 22 | self.X = T.ftensor4(name="X") 23 | # S1,S2 input : state position (vertical and horizontal position) 24 | self.S1 = T.bmatrix("S1") # state first dimension * statebatchsize 25 | self.S2 = T.bmatrix("S2") # state second dimension * statebatchsize 26 | self.y = T.bvector("y") # output action * statebatchsize 27 | 28 | l = 2 # channels in input layer 29 | l_h = 150 # channels in initial hidden layer 30 | l_q = 10 # channels in q layer (~actions) 31 | 32 | self.vin_net = VinBlock(in_x=self.X, in_s1=self.S1, in_s2=self.S2, in_x_channels=l, imsize=self.im_size, 33 | batchsize=self.batchsize, state_batch_size=self.statebatchsize, l_h=l_h, l_q=l_q, 34 | k=self.k) 35 | self.p_of_y = self.vin_net.output 36 | self.params = self.vin_net.params 37 | 38 | self.cost = -T.mean(T.log(self.p_of_y)[T.arange(self.y.shape[0]), 39 | self.y], dtype=theano.config.floatX) 40 | self.y_pred = T.argmax(self.p_of_y, axis=1) 41 | self.err = T.mean(T.neq(self.y_pred, self.y.flatten()), dtype=theano.config.floatX) 42 | 43 | self.computeloss = theano.function(inputs=[self.X, self.S1, self.S2, self.y], 44 | outputs=[self.err, self.cost]) 45 | self.y_out = theano.function(inputs=[self.X, self.S1, self.S2], outputs=[self.y_pred]) 46 | 47 | def run_training(self, input, stepsize=0.01, epochs=10, output='None', batch_size=128, grad_check=True, 48 | profile=False, data_fraction=1): 49 | # run training from input matlab data file, and save test data prediction in output file 50 | # load data from Matlab file, including 51 | # im_data: flattened images 52 | # state_data: concatenated one-hot vectors for each state variable 53 | # state_xy_data: state variable (x,y position) 54 | # label_data: one-hot vector for action (state difference) 55 | matlab_data = sio.loadmat(input) 56 | im_data = matlab_data["batch_im_data"] 57 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0 58 | value_data = matlab_data["batch_value_data"] 59 | state1_data = matlab_data["state_x_data"] 60 | state2_data = matlab_data["state_y_data"] 61 | label_data = matlab_data["batch_label_data"] 62 | ydata = label_data.astype('int8') 63 | Xim_data = im_data.astype(theano.config.floatX) 64 | Xim_data = Xim_data.reshape(-1, 1, self.im_size[0], self.im_size[1]) 65 | Xval_data = value_data.astype(theano.config.floatX) 66 | Xval_data = Xval_data.reshape(-1, 1, self.im_size[0], self.im_size[1]) 67 | Xdata = np.append(Xim_data, Xval_data, axis=1) 68 | S1data = state1_data.astype('int8') 69 | S2data = state2_data.astype('int8') 70 | 71 | all_training_samples = int(6/7.0*Xdata.shape[0]) 72 | training_samples = int(data_fraction * all_training_samples) 73 | Xtrain = Xdata[0:training_samples] 74 | S1train = S1data[0:training_samples] 75 | S2train = S2data[0:training_samples] 76 | ytrain = ydata[0:training_samples] 77 | 78 | Xtest = Xdata[all_training_samples:] 79 | S1test = S1data[all_training_samples:] 80 | S2test = S2data[all_training_samples:] 81 | ytest = ydata[all_training_samples:] 82 | ytest = ytest.flatten() 83 | 84 | sortinds = np.random.permutation(training_samples) 85 | Xtrain = Xtrain[sortinds] 86 | S1train = S1train[sortinds] 87 | S2train = S2train[sortinds] 88 | ytrain = ytrain[sortinds] 89 | ytrain = ytrain.flatten() 90 | 91 | self.updates = rmsprop_updates_T(self.cost, self.params, stepsize=stepsize) 92 | self.train = theano.function(inputs=[self.X, self.S1, self.S2, self.y], outputs=[], updates=self.updates) 93 | 94 | print fmt_row(10, ["Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time"]) 95 | for i_epoch in xrange(int(epochs)): 96 | tstart = time.time() 97 | # do training 98 | for start in xrange(0, Xtrain.shape[0], batch_size): 99 | end = start+batch_size 100 | if end <= Xtrain.shape[0]: 101 | self.train(Xtrain[start:end], S1train[start:end], S2train[start:end], 102 | ytrain[start*self.statebatchsize:end*self.statebatchsize]) 103 | elapsed = time.time() - tstart 104 | # compute losses 105 | trainerr = 0. 106 | trainloss = 0. 107 | testerr = 0. 108 | testloss = 0. 109 | num = 0 110 | for start in xrange(0, Xtest.shape[0], batch_size): 111 | end = start+batch_size 112 | if end <= Xtest.shape[0]: 113 | num += 1 114 | trainerr_, trainloss_ = self.computeloss(Xtrain[start:end], S1train[start:end], S2train[start:end], 115 | ytrain[start*self.statebatchsize:end*self.statebatchsize]) 116 | testerr_, testloss_ = self.computeloss(Xtest[start:end], S1test[start:end], S2test[start:end], 117 | ytest[start*self.statebatchsize:end*self.statebatchsize]) 118 | trainerr += trainerr_ 119 | trainloss += trainloss_ 120 | testerr += testerr_ 121 | testloss += testloss_ 122 | print fmt_row(10, [i_epoch, trainloss/num, trainerr/num, testloss/num, testerr/num, elapsed]) 123 | 124 | def predict(self, input): 125 | # NN output for a single input, read from file 126 | matlab_data = sio.loadmat(input) 127 | im_data = matlab_data["im_data"] 128 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0 129 | state_data = matlab_data["state_xy_data"] 130 | value_data = matlab_data["value_data"] 131 | xim_test = im_data.astype(theano.config.floatX) 132 | xim_test = xim_test.reshape(-1, 1, self.im_size[0], self.im_size[1]) 133 | xval_test = value_data.astype(theano.config.floatX) 134 | xval_test = xval_test.reshape(-1, 1, self.im_size[0], self.im_size[1]) 135 | x_test = np.append(xim_test, xval_test, axis=1) 136 | s_test = state_data.astype('int8') 137 | s1_test = s_test[:, 0].reshape([1, 1]) 138 | s2_test = s_test[:, 1].reshape([1, 1]) 139 | out = self.y_out(x_test, s1_test, s2_test) 140 | return out[0][0] 141 | 142 | def load_weights(self, infile="weight_dump.pk"): 143 | dump = pickle.load(open(infile, 'r')) 144 | [n.set_value(p) for n, p in zip(self.params, dump)] 145 | 146 | def save_weights(self, outfile="weight_dump.pk"): 147 | pickle.dump([n.get_value() for n in self.params], open(outfile, 'w')) 148 | 149 | 150 | class VinBlock(object): 151 | """VIN block""" 152 | def __init__(self, in_x, in_s1, in_s2, in_x_channels, imsize, batchsize=128, 153 | state_batch_size=1, l_h=150, l_q=10, k=0): 154 | """ 155 | Allocate a VIN block with shared variable internal parameters. 156 | 157 | :type in_x: theano.tensor.dtensor4 158 | :param in_x: symbolic input image tensor, of shape [batchsize, in_x_channels, imsize[0], imsize[1]] 159 | Typically : first channel is image, second is the reward prior. 160 | 161 | :type in_s1: theano.tensor.bmatrix 162 | :param in_s1: symbolic input batches of vertical positions, of shape [batchsize, state_batch_size] 163 | 164 | :type in_s2: theano.tensor.bmatrix 165 | :param in_s2: symbolic input batches of horizontal positions, of shape [batchsize, state_batch_size] 166 | 167 | :type in_x_channels: int32 168 | :param in_x_channels: number of input channels 169 | 170 | :type imsize: tuple or list of length 2 171 | :param imsize: (image height, image width) 172 | 173 | :type batchsize: int32 174 | :param batchsize: batch size 175 | 176 | :type state_batch_size: int32 177 | :param state_batch_size: number of state inputs for each sample 178 | 179 | :type l_h: int32 180 | :param l_h: number of channels in first hidden layer 181 | 182 | :type l_q: int32 183 | :param l_q: number of channels in q layer (~actions) 184 | 185 | :type k: int32 186 | :param k: number of VI iterations (actually, real number of iterations is k+1) 187 | 188 | """ 189 | self.bias = theano.shared((np.random.randn(l_h) * 0.01).astype(theano.config.floatX)) # 150 parameters 190 | self.w0 = init_weights_T(l_h, in_x_channels, 3, 3) # 1350 parameters 191 | # initial conv layer over image+reward prior 192 | self.h = conv2D_keep_shape(in_x, self.w0, image_shape=[batchsize, self.w0.shape.eval()[1], 193 | imsize[0], imsize[1]], 194 | filter_shape=self.w0.shape.eval()) 195 | self.h = self.h + self.bias.dimshuffle('x', 0, 'x', 'x') 196 | 197 | self.w1 = init_weights_T(1, l_h, 1, 1) # 150 parameters 198 | self.r = conv2D_keep_shape(self.h, self.w1, image_shape=[batchsize, self.w0.shape.eval()[0], 199 | imsize[0], imsize[1]], 200 | filter_shape=self.w1.shape.eval()) 201 | 202 | # weights from inputs to q layer (~reward in Bellman equation) 203 | self.w_list = [init_weights_T(l_q, 1, 3, 3) for i in range(0,k+1)] # 90 parameters 204 | # feedback weights from v layer into q layer (~transition probabilities in Bellman equation) 205 | self.w_fb_list = [init_weights_T(l_q, 1, 3, 3) for i in range(0,k)] # 90 parameters 206 | 207 | self.q = conv2D_keep_shape(self.r, self.w_list[0], image_shape=[batchsize, self.w1.shape.eval()[0], 208 | imsize[0], imsize[1]], 209 | filter_shape=self.w_list[0].shape.eval()) 210 | self.v = T.max(self.q, axis=1, keepdims=True) 211 | 212 | for i in range(0, k-1): 213 | self.q = conv2D_keep_shape(T.concatenate([self.r, self.v], axis=1), T.concatenate([self.w_list[i+1], self.w_fb_list[i]], 214 | axis=1), 215 | image_shape=[batchsize, self.w1.shape.eval()[0]+1, imsize[0], imsize[1]], 216 | filter_shape=T.concatenate([self.w_list[i+1], self.w_fb_list[i]], axis=1).shape.eval()) 217 | self.v = T.max(self.q, axis=1, keepdims=True) 218 | 219 | # do one last convolution 220 | self.q = conv2D_keep_shape(T.concatenate([self.r, self.v], axis=1), T.concatenate([self.w_list[k], self.w_fb_list[k-1]], axis=1), 221 | image_shape=[batchsize, self.w1.shape.eval()[0]+1, imsize[0], imsize[1]], 222 | filter_shape=T.concatenate([self.w_list[k], self.w_fb_list[k-1]], axis=1).shape.eval()) 223 | 224 | # Select the conv-net channels at the state position (S1,S2). 225 | # This intuitively corresponds to each channel representing an action, and the convnet the Q function. 226 | # The tricky thing is we want to select the same (S1,S2) position *for each* channel and for each sample 227 | self.q_out = self.q[T.extra_ops.repeat(T.arange(self.q.shape[0]), state_batch_size), :, in_s1.flatten(), 228 | in_s2.flatten()] 229 | 230 | # softmax output weights 231 | self.w_o = init_weights_T(l_q, 8) # 80 parameters 232 | self.output = T.nnet.softmax(T.dot(self.q_out, self.w_o)) 233 | 234 | self.params = self.w_list + self.w_fb_list + [self.w0, self.bias, self.w1, self.w_o] 235 | --------------------------------------------------------------------------------