├── .gitignore
├── .idea
└── vcs.xml
├── CNN.py
├── FC.py
├── FCN.py
├── LICENSE.md
├── MDPs
├── @Gridworld_Graph8
│ ├── GetOptTraj.m
│ ├── Gridworld_Graph8.m
│ ├── OptimalActionsOnPath.m
│ └── SampleGraphTraj.m
├── Finite_MDP_class.m
└── MDP_class.m
├── NN_run_training.py
├── NNobj.py
├── NNpredict.m
├── ObstacleGenerators
└── @obstacle_gen
│ └── obstacle_gen.m
├── README.md
├── addpaths.m
├── data
├── gridworld_16.mat
├── gridworld_16_test.mat
├── gridworld_28.mat
├── gridworld_28_test.mat
├── gridworld_8.mat
└── gridworld_8_test.mat
├── extract_action.m
├── script_make_data.m
├── script_viz_policy.m
├── scripts
├── make_data_gridworld_nips.m
├── nips_gridworld_experiments_CNN.sh
├── nips_gridworld_experiments_FCN.sh
├── nips_gridworld_experiments_VIN.sh
├── nips_gridworld_experiments_VIN_untied.sh
└── nips_gridworld_experiments_VIN_untied_data_fraction.sh
├── test_network.m
├── theano_utils.py
├── util
├── SP.m
├── python_ndarray_to_matrix.m
├── rand_choose.m
└── set_var.m
├── vin.py
└── vin_untied.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.sh~
3 | .idea/*
4 | position_paper/*
5 | data/mmpCode/*
6 | matlab_bgl/*
7 | misc/*
8 | *.png
9 | *.mat
10 | *.py~
11 | figures/*
12 | .gitignore~
13 | obsolete/*
14 | icml16results/*
15 | nips16results/*
16 | *.sh#
17 |
18 |
19 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/CNN.py:
--------------------------------------------------------------------------------
1 | # VI network using THEANO, takes batches of state input
2 | from NNobj import *
3 | from theano_utils import *
4 |
5 |
6 | class cnn(NNobj):
7 | "Class for a convolutional neural network, inthe style of LeNet/Alexnet"
8 | def __init__(self, model="CNN", im_size=[28, 28], dropout=False, devtype="cpu", grad_check=False, reg=0,
9 | batchsize=128):
10 | self.im_size = im_size # input image size
11 | self.model = model
12 | self.reg = reg # regularization (currently not implemented)
13 | self.batchsize = batchsize # batch size for training
14 | np.random.seed(0)
15 | print(model)
16 | # theano.config.blas.ldflags = "-L/usr/local/lib -lopenblas"
17 |
18 | # X input : l=3 stacked images: obstacle map, goal map, current state map
19 | self.X = T.ftensor4(name="X")
20 | self.y = T.bvector("y") # output action
21 |
22 | l = 3
23 | filter_sizes = [[50, 3, 3],
24 | [50, 3, 3],
25 | [100, 3, 3],
26 | [100, 3, 3],
27 | [100, 3, 3]]
28 | poolings = [2, 1, 2, 1, 1]
29 |
30 | self.cnn_net = CNN(in_x=self.X, in_x_channels=l, imsize=self.im_size,
31 | batchsize=self.batchsize, filter_sizes=filter_sizes,
32 | poolings=poolings)
33 | self.p_of_y = self.cnn_net.output
34 | self.params = self.cnn_net.params
35 | # Total 1910 parameters
36 |
37 | self.cost = -T.mean(T.log(self.p_of_y)[T.arange(self.y.shape[0]),
38 | self.y], dtype=theano.config.floatX)
39 | self.y_pred = T.argmax(self.p_of_y, axis=1)
40 | self.err = T.mean(T.neq(self.y_pred, self.y.flatten()), dtype=theano.config.floatX)
41 |
42 | self.computeloss = theano.function(inputs=[self.X, self.y],
43 | outputs=[self.err, self.cost])
44 | self.y_out = theano.function(inputs=[self.X], outputs=[self.y_pred])
45 | self.updates = []
46 | self.train = []
47 |
48 | def run_training(self, input, stepsize=0.01, epochs=10, output='None', batch_size=128, grad_check=True,
49 | profile=False, data_fraction=1):
50 | # run training from input matlab data file, and save test data prediction in output file
51 | # load data from Matlab file, including
52 | # im_data: flattened images
53 | # value_data: flattened reward image
54 | # state_data: flattened state images
55 | # label_data: one-hot vector for action (state difference)
56 | matlab_data = sio.loadmat(input)
57 | im_data = matlab_data["im_data"]
58 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0
59 | value_data = matlab_data["value_data"]
60 | state1_data = matlab_data["state_x_data"]
61 | state2_data = matlab_data["state_y_data"]
62 | label_data = matlab_data["label_data"]
63 | y_data = label_data.astype('int8')
64 | x_im_data = im_data.astype(theano.config.floatX)
65 | x_im_data = x_im_data.reshape(-1, 1, self.im_size[0], self.im_size[1])
66 | x_val_data = value_data.astype(theano.config.floatX)
67 | x_val_data = x_val_data.reshape(-1, 1, self.im_size[0], self.im_size[1])
68 | x_state_data = np.zeros_like(x_im_data)
69 | for i in x_state_data.shape[0]:
70 | pos1 = state1_data[i]
71 | pos2 = state2_data[i]
72 | x_state_data[i, 0, pos1, pos2] = 1
73 | x_data = np.append(x_im_data, x_val_data, axis=1)
74 | x_data = np.append(x_data, x_state_data, axis=1)
75 |
76 | all_training_samples = int(6/7.0*x_data.shape[0])
77 | training_samples = int(data_fraction * all_training_samples)
78 | x_train = x_data[0:training_samples]
79 | y_train = y_data[0:training_samples]
80 |
81 | x_test = x_data[all_training_samples:]
82 | y_test = y_data[all_training_samples:]
83 | y_test = y_test.flatten()
84 |
85 | sortinds = np.random.permutation(training_samples)
86 | x_train = x_train[sortinds]
87 | y_train = y_train[sortinds]
88 | y_train = y_train.flatten()
89 |
90 | self.updates = rmsprop_updates_T(self.cost, self.params, stepsize=stepsize)
91 | self.train = theano.function(inputs=[self.X, self.y], outputs=[], updates=self.updates)
92 |
93 | print fmt_row(10, ["Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time"])
94 | for i_epoch in xrange(int(epochs)):
95 | tstart = time.time()
96 | # do training
97 | for start in xrange(0, x_train.shape[0], batch_size):
98 | end = start+batch_size
99 | if end <= x_train.shape[0]:
100 | self.train(x_train[start:end], y_train[start:end])
101 | elapsed = time.time() - tstart
102 | # compute losses
103 | trainerr = 0.
104 | trainloss = 0.
105 | testerr = 0.
106 | testloss = 0.
107 | num = 0
108 | for start in xrange(0, x_test.shape[0], batch_size):
109 | end = start+batch_size
110 | if end <= x_test.shape[0]:
111 | num += 1
112 | trainerr_, trainloss_ = self.computeloss(x_train[start:end], y_train[start:end])
113 | testerr_, testloss_ = self.computeloss(x_test[start:end], y_test[start:end])
114 | trainerr += trainerr_
115 | trainloss += trainloss_
116 | testerr += testerr_
117 | testloss += testloss_
118 | print fmt_row(10, [i_epoch, trainloss/num, trainerr/num, testloss/num, testerr/num, elapsed])
119 |
120 | def predict(self, input):
121 | # NN output for a single input, read from file
122 | matlab_data = sio.loadmat(input)
123 | im_data = matlab_data["im_data"]
124 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0
125 | # state_data = matlab_data["state_data"]
126 | state_data = matlab_data["state_xy_data"]
127 | value_data = matlab_data["value_data"]
128 | x_im_test = im_data.astype(theano.config.floatX)
129 | x_im_test = x_im_test.reshape(-1, 1, self.im_size[0], self.im_size[1])
130 | x_val_test = value_data.astype(theano.config.floatX)
131 | x_val_test = x_val_test.reshape(-1, 1, self.im_size[0], self.im_size[1])
132 | x_state_test = np.zeros_like(x_im_test)
133 | x_state_test[0, 0, state_data[0, 0], state_data[0, 1]] = 1
134 | x_test = np.append(x_im_test, x_val_test, axis=1)
135 | x_test = np.append(x_test, x_state_test, axis=1)
136 | out = self.y_out(x_test)
137 | return out[0][0]
138 |
139 | def load_weights(self, infile="weight_dump.pk"):
140 | dump = pickle.load(open(infile, 'r'))
141 | [n.set_value(p) for n, p in zip(self.params, dump)]
142 |
143 | def save_weights(self, outfile="weight_dump.pk"):
144 | pickle.dump([n.get_value() for n in self.params], open(outfile, 'w'))
145 |
146 |
147 | class CNN(object):
148 | """CNN network"""
149 | def __init__(self, in_x, in_x_channels, imsize, batchsize=128,
150 | filter_sizes=[[50, 3, 3], [100, 3, 3]], poolings=[2, 2]):
151 | """
152 | Allocate a CNN network with shared variable internal parameters.
153 |
154 | :type in_x: theano.tensor.dtensor4
155 | :param in_x: symbolic input image tensor, of shape [batchsize, in_x_channels, imsize[0], imsize[1]]
156 | Typically : first channel is image, second is the reward prior, third is the current state image.
157 |
158 | :type in_x_channels: int32
159 | :param in_x_channels: number of input channels
160 |
161 | :type imsize: tuple or list of length 2
162 | :param imsize: (image height, image width)
163 |
164 | :type batchsize: int32
165 | :param batchsize: batch size
166 |
167 | :type filter_sizes: int32 list of int32 3-tuples
168 | :param filter_sizes: list of filter sizes for each layer, each a list of 3 integers:
169 | num_filters,filter_width,filter_height
170 |
171 | :type batchsize: int32 list
172 | :param batchsize: list of pooling ratios after each layer (assumed symmetric)
173 | """
174 | assert len(filter_sizes) == len(poolings)
175 | n_conv_layers = len(filter_sizes)
176 | self.params = []
177 | # first conv layer
178 | prev_layer = ConvLayer(in_x, filter_shape=[filter_sizes[0][0], in_x_channels, filter_sizes[0][1],
179 | filter_sizes[0][2]],
180 | image_shape=[batchsize, in_x_channels, imsize[0], imsize[1]],
181 | poolsize=(poolings[0], poolings[0]))
182 | self.params = self.params + prev_layer.params
183 | # then the rest of the conv layers
184 | for l in range(1, n_conv_layers):
185 | new_layer = ConvLayer(prev_layer.output,
186 | filter_shape=[filter_sizes[l][0], prev_layer.out_shape[1], filter_sizes[l][1],
187 | filter_sizes[l][2]],
188 | image_shape=prev_layer.out_shape,
189 | poolsize=(poolings[l], poolings[l]))
190 | self.params = self.params + new_layer.params
191 | prev_layer = new_layer
192 | # fully connected layer
193 | final_conv_shape = new_layer.out_shape
194 | flat_conv_out = new_layer.output.flatten(ndim=2)
195 | flat_shape = [final_conv_shape[0], final_conv_shape[1]*final_conv_shape[2]*final_conv_shape[3]]
196 | self.w_o = init_weights_T(flat_shape[1], 8)
197 | self.output = T.nnet.softmax(T.dot(flat_conv_out, self.w_o))
198 | self.params = self.params + [self.w_o]
199 |
--------------------------------------------------------------------------------
/FC.py:
--------------------------------------------------------------------------------
1 | # Based on tutorial by Alec Radford
2 | # https://github.com/Newmu/Theano-Tutorials/blob/master/4_modern_net.py
3 |
4 | import cgt
5 | from cgt import nn
6 | from cgt.distributions import categorical
7 | from NNobj import *
8 |
9 |
10 | class FC(NNobj):
11 | "Class for a multi-layer perceptron (fully connected network) object"
12 | def __init__(self, model="dense", im_size=[28, 28], dropout=True, devtype="cpu", grad_check=True, reg=0):
13 | if grad_check: cgt.set_precision("quad")
14 | self.model = model
15 | self.reg = reg
16 | np.random.seed(0)
17 | cgt.update_config(default_device=cgt.core.Device(devtype=devtype), backend="native")
18 | print(model)
19 | # MLP with 1 hidden layer
20 | if model == "dense1":
21 | self.Xsize = 2*im_size[0]*im_size[1]+im_size[0]+im_size[1]
22 | self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize))
23 | self.y = cgt.vector("y", dtype='i8')
24 | self.p_drop_input, self.p_drop_hidden = (0.2, 0.5) if dropout else (0, 0)
25 | self.w_h = init_weights(self.Xsize, 256)
26 | self.w_o = init_weights(256, 8)
27 | self.pofy_drop = dense_model1(self.X, self.w_h, self.w_o, self.p_drop_input, self.p_drop_hidden)
28 | self.pofy_nodrop = dense_model1(self.X, self.w_h, self.w_o, 0., 0.)
29 | self.params = [self.w_h, self.w_o]
30 | self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_o).sum()
31 | self.cost_drop = -cgt.mean(categorical.loglik(self.y, self.pofy_drop)) + self.reg*self.l1
32 | # MLP with 2 hidden layers
33 | elif model == "dense2":
34 | self.Xsize = 2*im_size[0]*im_size[1]+im_size[0]+im_size[1]
35 | self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize))
36 | self.y = cgt.vector("y", dtype='i8')
37 | self.p_drop_input, self.p_drop_hidden = (0.2, 0.5) if dropout else (0, 0)
38 | self.w_h = init_weights(self.Xsize, 256)
39 | self.w_h2 = init_weights(256, 256)
40 | self.w_o = init_weights(256, 8)
41 | self.pofy_drop = dense_model2(self.X, self.w_h, self.w_h2, self.w_o, self.p_drop_input, self.p_drop_hidden)
42 | self.pofy_nodrop = dense_model2(self.X, self.w_h, self.w_h2, self.w_o, 0., 0.)
43 | self.params = [self.w_h, self.w_h2, self.w_o]
44 | self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_h2).sum() + cgt.abs(self.w_o).sum()
45 | self.cost_drop = -cgt.mean(categorical.loglik(self.y, self.pofy_drop)) + self.reg*self.l1
46 | # MLP with 3 hidden layers
47 | elif model == "dense3":
48 | self.Xsize = 2*im_size[0]*im_size[1]+im_size[0]+im_size[1]
49 | self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize))
50 | self.y = cgt.vector("y", dtype='i8')
51 | self.p_drop_input, self.p_drop_hidden = (0.0, [0.5, 0.5, 0.5]) if dropout else (0, [0, 0, 0])
52 | self.w_h = init_weights(self.Xsize, 256)
53 | self.w_h2 = init_weights(256, 256)
54 | self.w_h3 = init_weights(256, 256)
55 | self.w_o = init_weights(256, 8)
56 | self.pofy_drop = dense_model3(self.X, self.w_h, self.w_h2, self.w_h3, self.w_o, self.p_drop_input,
57 | self.p_drop_hidden)
58 | self.pofy_nodrop = dense_model3(self.X, self.w_h, self.w_h2, self.w_h3, self.w_o, 0., [0., 0., 0.])
59 | self.params = [self.w_h, self.w_h2, self.w_h3, self.w_o]
60 | self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_h2).sum() + cgt.abs(self.w_h3).sum() + \
61 | cgt.abs(self.w_o).sum()
62 | self.cost_drop = -cgt.mean(categorical.loglik(self.y, self.pofy_drop)) + self.reg*self.l1
63 | else:
64 | raise RuntimeError("Unknown Model")
65 |
66 | self.y_nodrop = cgt.argmax(self.pofy_nodrop, axis=1)
67 | self.cost_nodrop = -cgt.mean(categorical.loglik(self.y, self.pofy_nodrop))
68 | self.err_nodrop = cgt.cast(cgt.not_equal(self.y_nodrop, self.y), cgt.floatX).mean()
69 | self.computeloss = cgt.function(inputs=[self.X, self.y], outputs=[self.err_nodrop,self.cost_nodrop])
70 | self.y_out = cgt.function(inputs=[self.X], outputs=[self.y_nodrop])
71 | self.updates = rmsprop_updates(self.cost_drop, self.params)
72 | self.train = cgt.function(inputs=[self.X, self.y], outputs=[], updates=self.updates)
73 |
74 | def run_training(self, input, stepsize=0.01, epochs=10, output='None', batch_size=128, grad_check=True,
75 | profile=False, step_decrease_rate=0.5, step_decrease_time=1000):
76 | # run NN training from input matlab data file, and save test data prediction in output file
77 |
78 | # load data from Matlab file, including
79 | # im_data: flattened images
80 | # state_data: concatenated one-hot vectors for each state variable
81 | # label_data: one-hot vector for action (state difference)
82 | if grad_check: cgt.set_precision("quad")
83 | matlab_data = sio.loadmat(input)
84 | im_data = matlab_data["im_data"]
85 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0
86 | state_data = matlab_data["state_data"]
87 | value_data = matlab_data["value_data"]
88 | label_data = matlab_data["label_data"]
89 | Xdata = (np.concatenate((np.concatenate((im_data,value_data),axis=1), state_data), axis=1)).astype(cgt.floatX)
90 | ydata = label_data
91 |
92 | training_samples = int(6/7.0*Xdata.shape[0])
93 | Xtrain = Xdata[0:training_samples]
94 | ytrain = ydata[0:training_samples]
95 |
96 | Xtest = Xdata[training_samples:]
97 | ytest = ydata[training_samples:]
98 |
99 | sortinds = np.random.permutation(training_samples)
100 | Xtrain = Xtrain[sortinds]
101 | ytrain = ytrain[sortinds]
102 |
103 | self.updates = rmsprop_updates(self.cost_drop, self.params, stepsize=stepsize)
104 | self.train = cgt.function(inputs=[self.X, self.y], outputs=[], updates=self.updates)
105 |
106 | from cgt.tests import gradcheck_model
107 | if grad_check:
108 | cost_nodrop = cgt.core.clone(self.cost_nodrop, {self.X: Xtrain[:1], self.y: ytrain[:1]})
109 | print "doing gradient check..."
110 | print "------------------------------------"
111 | gradcheck_model(cost_nodrop, self.params[0:1])
112 | print "success!"
113 | return
114 |
115 | if profile: cgt.profiler.start()
116 |
117 | print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
118 | for i_epoch in xrange(int(epochs)):
119 | tstart = time.time()
120 | for start in xrange(0, Xtrain.shape[0], batch_size):
121 | end = start+batch_size
122 | self.train(Xtrain[start:end], ytrain[start:end])
123 | elapsed = time.time() - tstart
124 | trainerr, trainloss = self.computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
125 | testerr, testloss = self.computeloss(Xtest, ytest)
126 | print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
127 | if (i_epoch > 0) & (i_epoch % step_decrease_time == 0):
128 | stepsize = step_decrease_rate * stepsize
129 | self.updates = rmsprop_updates(self.cost_drop, self.params, stepsize=stepsize)
130 | self.train = cgt.function(inputs=[self.X, self.y], outputs=[], updates=self.updates)
131 | print stepsize
132 | if profile: cgt.execution.profiler.print_stats()
133 |
134 | # save Matlab data
135 | if output != 'None':
136 | sio.savemat(file_name=output, mdict={'in': Xtest, 'out': self.y_out(Xtest)})
137 |
138 | def predict(self, input):
139 | # NN output for a single input, read from file
140 | matlab_data = sio.loadmat(input)
141 | im_data = matlab_data["im_data"]
142 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0
143 | state_data = matlab_data["state_data"]
144 | value_data = matlab_data["value_data"]
145 | x_test = (np.concatenate((np.concatenate((im_data, value_data), axis=1), state_data), axis=1)).astype(cgt.floatX)
146 | out = self.y_out(x_test)
147 | return out[0][0]
148 |
149 |
150 | def init_weights(*shape):
151 | return cgt.shared(np.random.randn(*shape) * 0.01, fixed_shape_mask='all')
152 |
153 |
154 | def rmsprop_updates(cost, params, stepsize=0.001, rho=0.9, epsilon=1e-6):
155 | grads = cgt.grad(cost, params)
156 | updates = []
157 | for p, g in zip(params, grads):
158 | acc = cgt.shared(p.op.get_value() * 0.)
159 | acc_new = rho * acc + (1 - rho) * cgt.square(g)
160 | gradient_scaling = cgt.sqrt(acc_new + epsilon)
161 | g = g / gradient_scaling
162 | updates.append((acc, acc_new))
163 | updates.append((p, p - stepsize * g))
164 | return updates
165 |
166 |
167 | def adagrad_updates(cost, params, stepsize=0.001, rho=0.9, epsilon=1e-6):
168 | grads = cgt.grad(cost, params)
169 | updates = []
170 | for param, grad in zip(params, grads):
171 | value = param.op.get_value()
172 | accu = cgt.shared(np.zeros(value.shape, dtype=value.dtype))
173 | delta_accu = cgt.shared(np.zeros(value.shape, dtype=value.dtype))
174 |
175 | accu_new = rho * accu + (1 - rho) * grad ** 2
176 | updates.append((accu, accu_new))
177 |
178 | update = (grad * cgt.sqrt(delta_accu + epsilon) / cgt.sqrt(accu_new + epsilon))
179 | updates.append((param, param - stepsize * update))
180 |
181 | delta_accu_new = rho * delta_accu + (1 - rho) * update ** 2
182 | updates.append((delta_accu, delta_accu_new))
183 | return updates
184 |
185 |
186 | def dense_model1(X, w_h, w_o, p_drop_input, p_drop_hidden):
187 | X = nn.dropout(X, p_drop_input)
188 | h = nn.rectify(cgt.dot(X, w_h))
189 | h = nn.dropout(h, p_drop_hidden)
190 | py_x = nn.softmax(cgt.dot(h, w_o))
191 | return py_x
192 |
193 |
194 | def dense_model2(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden):
195 | X = nn.dropout(X, p_drop_input)
196 | h = nn.rectify(cgt.dot(X, w_h))
197 |
198 | h = nn.dropout(h, p_drop_hidden)
199 | h2 = nn.rectify(cgt.dot(h, w_h2))
200 |
201 | h2 = nn.dropout(h2, p_drop_hidden)
202 | py_x = nn.softmax(cgt.dot(h2, w_o))
203 | return py_x
204 |
205 |
206 | def dense_model3(X, w_h, w_h2, w_h3, w_o, p_drop_input, p_drop_hidden):
207 | X = nn.dropout(X, p_drop_input)
208 | h = nn.rectify(cgt.dot(X, w_h))
209 |
210 | h = nn.dropout(h, p_drop_hidden[0])
211 | h2 = nn.rectify(cgt.dot(h, w_h2))
212 |
213 | h2 = nn.dropout(h2, p_drop_hidden[1])
214 | h3 = nn.rectify(cgt.dot(h2, w_h3))
215 |
216 | h3 = nn.dropout(h3, p_drop_hidden[2])
217 | py_x = nn.softmax(cgt.dot(h3, w_o))
218 | return py_x
219 |
220 |
221 |
--------------------------------------------------------------------------------
/FCN.py:
--------------------------------------------------------------------------------
1 | # VI network using THEANO, takes batches of state input
2 | from NNobj import *
3 | from theano_utils import *
4 |
5 |
6 | class fcn(NNobj):
7 | "Class for a fully connected convolutional network"
8 | def __init__(self, model="FCN", im_size=[28, 28], dropout=False, devtype="cpu", grad_check=False, reg=0,
9 | statebatchsize=10, batchsize=128):
10 | self.im_size = im_size # input image size
11 | self.model = model
12 | self.reg = reg # regularization (currently not implemented)
13 | self.batchsize = batchsize # batch size for training
14 | self.statebatchsize = statebatchsize # number of state inputs for every image input, since each image is the
15 | # same for many states in the data
16 | np.random.seed(0)
17 | print(model)
18 | theano.config.blas.ldflags = "-L/usr/local/lib -lopenblas"
19 |
20 | # X input : l=2 stacked images: obstacle map and reward function prior
21 | self.X = T.ftensor4(name="X")
22 | # S1,S2 input : state position (vertical and horizontal position)
23 | self.S1 = T.bmatrix("S1") # state first dimension * statebatchsize
24 | self.S2 = T.bmatrix("S2") # state second dimension * statebatchsize
25 | self.y = T.bvector("y") # output action * statebatchsize
26 |
27 | l = 2
28 | l_1 = 150 # channels (filters) in first conv layer
29 | l_2 = 150
30 | l_3 = 10
31 |
32 | self.fcn_net = FCN(in_x=self.X, in_s1=self.S1, in_s2=self.S2, in_x_channels=l, imsize=self.im_size,
33 | batchsize=self.batchsize, state_batch_size=self.statebatchsize, l_1=l_1, l_2=l_2,
34 | l_3=l_3)
35 | self.p_of_y = self.fcn_net.output
36 | self.params = self.fcn_net.params
37 | # Total 1910 parameters
38 |
39 | self.cost = -T.mean(T.log(self.p_of_y)[T.arange(self.y.shape[0]),
40 | self.y], dtype=theano.config.floatX)
41 | self.y_pred = T.argmax(self.p_of_y, axis=1)
42 | self.err = T.mean(T.neq(self.y_pred, self.y.flatten()), dtype=theano.config.floatX)
43 |
44 | self.computeloss = theano.function(inputs=[self.X, self.S1, self.S2, self.y],
45 | outputs=[self.err, self.cost])
46 | self.y_out = theano.function(inputs=[self.X, self.S1, self.S2], outputs=[self.y_pred])
47 |
48 | def run_training(self, input, stepsize=0.01, epochs=10, output='None', batch_size=128, grad_check=True,
49 | profile=False, data_fraction=1):
50 | # run training from input matlab data file, and save test data prediction in output file
51 | # load data from Matlab file, including
52 | # im_data: flattened images
53 | # state_data: concatenated one-hot vectors for each state variable
54 | # state_xy_data: state variable (x,y position)
55 | # label_data: one-hot vector for action (state difference)
56 | matlab_data = sio.loadmat(input)
57 | im_data = matlab_data["batch_im_data"]
58 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0
59 | value_data = matlab_data["batch_value_data"]
60 | state1_data = matlab_data["state_x_data"]
61 | state2_data = matlab_data["state_y_data"]
62 | label_data = matlab_data["batch_label_data"]
63 | ydata = label_data.astype('int8')
64 | Xim_data = im_data.astype(theano.config.floatX)
65 | Xim_data = Xim_data.reshape(-1, 1, self.im_size[0], self.im_size[1])
66 | Xval_data = value_data.astype(theano.config.floatX)
67 | Xval_data = Xval_data.reshape(-1, 1, self.im_size[0], self.im_size[1])
68 | Xdata = np.append(Xim_data, Xval_data, axis=1)
69 | S1data = state1_data.astype('int8')
70 | S2data = state2_data.astype('int8')
71 |
72 | all_training_samples = int(6/7.0*Xdata.shape[0])
73 | training_samples = int(data_fraction * all_training_samples)
74 | Xtrain = Xdata[0:training_samples]
75 | S1train = S1data[0:training_samples]
76 | S2train = S2data[0:training_samples]
77 | ytrain = ydata[0:training_samples]
78 |
79 | Xtest = Xdata[all_training_samples:]
80 | S1test = S1data[all_training_samples:]
81 | S2test = S2data[all_training_samples:]
82 | ytest = ydata[all_training_samples:]
83 | ytest = ytest.flatten()
84 |
85 | sortinds = np.random.permutation(training_samples)
86 | Xtrain = Xtrain[sortinds]
87 | S1train = S1train[sortinds]
88 | S2train = S2train[sortinds]
89 | ytrain = ytrain[sortinds]
90 | ytrain = ytrain.flatten()
91 |
92 | self.updates = rmsprop_updates_T(self.cost, self.params, stepsize=stepsize)
93 | self.train = theano.function(inputs=[self.X, self.S1, self.S2, self.y], outputs=[], updates=self.updates)
94 |
95 | print fmt_row(10, ["Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time"])
96 | for i_epoch in xrange(int(epochs)):
97 | tstart = time.time()
98 | # do training
99 | for start in xrange(0, Xtrain.shape[0], batch_size):
100 | end = start+batch_size
101 | if end <= Xtrain.shape[0]:
102 | self.train(Xtrain[start:end], S1train[start:end], S2train[start:end],
103 | ytrain[start*self.statebatchsize:end*self.statebatchsize])
104 | elapsed = time.time() - tstart
105 | # compute losses
106 | trainerr = 0.
107 | trainloss = 0.
108 | testerr = 0.
109 | testloss = 0.
110 | num = 0
111 | for start in xrange(0, Xtest.shape[0], batch_size):
112 | end = start+batch_size
113 | if end <= Xtest.shape[0]:
114 | num += 1
115 | trainerr_, trainloss_ = self.computeloss(Xtrain[start:end], S1train[start:end], S2train[start:end],
116 | ytrain[start*self.statebatchsize:end*self.statebatchsize])
117 | testerr_, testloss_ = self.computeloss(Xtest[start:end], S1test[start:end], S2test[start:end],
118 | ytest[start*self.statebatchsize:end*self.statebatchsize])
119 | trainerr += trainerr_
120 | trainloss += trainloss_
121 | testerr += testerr_
122 | testloss += testloss_
123 | print fmt_row(10, [i_epoch, trainloss/num, trainerr/num, testloss/num, testerr/num, elapsed])
124 |
125 | def predict(self, input):
126 | # NN output for a single input, read from file
127 | matlab_data = sio.loadmat(input)
128 | im_data = matlab_data["im_data"]
129 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0
130 | state_data = matlab_data["state_xy_data"]
131 | value_data = matlab_data["value_data"]
132 | xim_test = im_data.astype(theano.config.floatX)
133 | xim_test = xim_test.reshape(-1, 1, self.im_size[0], self.im_size[1])
134 | xval_test = value_data.astype(theano.config.floatX)
135 | xval_test = xval_test.reshape(-1, 1, self.im_size[0], self.im_size[1])
136 | x_test = np.append(xim_test, xval_test, axis=1)
137 | s_test = state_data.astype('int8')
138 | s1_test = s_test[:, 0].reshape([1, 1])
139 | s2_test = s_test[:, 1].reshape([1, 1])
140 | out = self.y_out(x_test, s1_test, s2_test)
141 | return out[0][0]
142 |
143 | def load_weights(self, infile="weight_dump.pk"):
144 | dump = pickle.load(open(infile, 'r'))
145 | [n.set_value(p) for n, p in zip(self.params, dump)]
146 |
147 | def save_weights(self, outfile="weight_dump.pk"):
148 | pickle.dump([n.get_value() for n in self.params], open(outfile, 'w'))
149 |
150 |
151 | class FCN(object):
152 | """FCN network"""
153 | def __init__(self, in_x, in_s1, in_s2, in_x_channels, imsize, batchsize=128,
154 | state_batch_size=1, l_1=150, l_2=150, l_3=150):
155 | """
156 | Allocate a FCN network with shared variable internal parameters. Assumes 16X16 images
157 |
158 | :type in_x: theano.tensor.dtensor4
159 | :param in_x: symbolic input image tensor, of shape [batchsize, in_x_channels, imsize[0], imsize[1]]
160 | Typically : first channel is image, second is the reward prior.
161 |
162 | :type in_s1: theano.tensor.bmatrix
163 | :param in_s1: symbolic input batches of vertical positions, of shape [batchsize, state_batch_size]
164 |
165 | :type in_s2: theano.tensor.bmatrix
166 | :param in_s2: symbolic input batches of horizontal positions, of shape [batchsize, state_batch_size]
167 |
168 | :type in_x_channels: int32
169 | :param in_x_channels: number of input channels
170 |
171 | :type imsize: tuple or list of length 2
172 | :param imsize: (image height, image width)
173 |
174 | :type batchsize: int32
175 | :param batchsize: batch size
176 |
177 | :type state_batch_size: int32
178 | :param state_batch_size: number of state inputs for each sample
179 |
180 | :type l_1: int32
181 | :param l_1: number of filters in first conv layer
182 |
183 | :type l_2: int32
184 | :param l_2: number of filters in second conv layer
185 |
186 | :type l_3: int32
187 | :param l_3: number of filters in third conv layer
188 |
189 | """
190 | self.b1 = theano.shared((np.random.randn(l_1) * 0.01).astype(theano.config.floatX))
191 | self.w1 = init_weights_T(l_1, in_x_channels, imsize[0]*2-1, imsize[1]*2-1)
192 | self.h1 = T.nnet.conv2d(in_x, self.w1, input_shape=[batchsize, self.w1.shape.eval()[1], imsize[0], imsize[1]],
193 | border_mode=(imsize[0]-1, imsize[1]-1),
194 | filter_shape=[l_1, in_x_channels, imsize[0]*2-1, imsize[1]*2-1])
195 | self.h1 = T.nnet.relu(self.h1 + self.b1.dimshuffle('x', 0, 'x', 'x'))
196 |
197 | self.w2 = init_weights_T(l_2, l_1, 1, 1)
198 | self.h2 = conv2D_keep_shape(self.h1, self.w2, image_shape=[batchsize, self.w1.shape.eval()[0],
199 | imsize[0], imsize[1]],
200 | filter_shape=[l_2, l_1, 1, 1])
201 | self.b2 = theano.shared((np.random.randn(l_2) * 0.01).astype(theano.config.floatX)) # 150 parameters
202 | self.h2 = T.nnet.relu(self.h2 + self.b2.dimshuffle('x', 0, 'x', 'x'))
203 |
204 | self.w3 = init_weights_T(l_3, l_2, 1, 1)
205 | self.h3 = conv2D_keep_shape(self.h2, self.w3, image_shape=[batchsize, self.w2.shape.eval()[0],
206 | imsize[0], imsize[1]],
207 | filter_shape=[l_3, l_2, 1, 1])
208 | self.b3 = theano.shared((np.random.randn(l_3) * 0.01).astype(theano.config.floatX)) # 150 parameters
209 | self.h3 = T.nnet.relu(self.h3 + self.b3.dimshuffle('x', 0, 'x', 'x'))
210 |
211 | # Select the conv-net channels at the state position (S1,S2). This is the FCN thing.
212 | self.h_out = self.h3[T.extra_ops.repeat(T.arange(self.h3.shape[0]), state_batch_size), :, in_s1.flatten(),
213 | in_s2.flatten()]
214 |
215 | # softmax output weights
216 | self.w_o = init_weights_T(l_3, 8)
217 | self.output = T.nnet.softmax(T.dot(self.h_out, self.w_o))
218 |
219 | self.params = [self.w1, self.b1, self.w2, self.b2, self.w3, self.b3, self.w_o]
220 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | COPYRIGHT
2 |
3 | All contributions by the University of California:
4 | Copyright (c) 2015, 2016 The Regents of the University of California (Regents)
5 | All rights reserved.
6 |
7 | All other contributions:
8 | Copyright (c) 2015, 2016, the respective contributors
9 | All rights reserved.
10 |
11 | VIN uses a shared copyright model: each contributor holds copyright over
12 | their contributions to the VIN codebase. The project versioning records all such
13 | contribution and copyright details. If a contributor wants to further mark
14 | their specific copyright on a particular contribution, they should indicate
15 | their copyright solely in the commit message of the change when it is
16 | committed.
17 |
18 | LICENSE
19 |
20 | Redistribution and use in source and binary forms, with or without
21 | modification, are permitted provided that the following conditions are met:
22 |
23 | 1. Redistributions of source code must retain the above copyright notice, this
24 | list of conditions and the following disclaimer.
25 | 2. Redistributions in binary form must reproduce the above copyright notice,
26 | this list of conditions and the following disclaimer in the documentation
27 | and/or other materials provided with the distribution.
28 |
29 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
30 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
31 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
32 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
33 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
34 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
35 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
36 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
38 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 |
40 | CONTRIBUTION AGREEMENT
41 |
42 | By contributing to the VIN repository through pull-request, comment,
43 | or otherwise, the contributor releases their content to the
44 | license and copyright terms herein.
45 |
--------------------------------------------------------------------------------
/MDPs/@Gridworld_Graph8/GetOptTraj.m:
--------------------------------------------------------------------------------
1 | function [states_xy, states_one_hot] = GetOptTraj(M,s0)
2 | % Optimal trajectories from initial state to goal
3 | % return states_xy: cell array of trajectories in xy-space
4 | % states_one_hot: cell array of trajectories in one-hot vectors for
5 | % x and y
6 | [G,W] = M.getGraph_inv;
7 | G_inv = G'; % transpose graph for tranposing single-node SP -> single destination SP
8 | N = size(G,1);
9 | Ns = 1;
10 | init_states = s0;
11 | goal_s = M.map_ind_to_state(M.targetRow,M.targetCol);
12 | states = cell(Ns,1);
13 | states_xy = cell(Ns,1);
14 | states_one_hot = cell(Ns,1);
15 | i = 1;
16 | options.edge_weight = W;
17 | [~, pred] = shortest_paths(G_inv,goal_s,options); % all SP from goal
18 | for n = 1:Ns
19 | [path] = SP(pred,goal_s,init_states(n)); % get SP from goal->init
20 | path = path(end:-1:1)'; % reverse path since we want init->goal
21 | states{i} = path;
22 | i = i+1;
23 | end
24 | for i = 1:length(states)
25 | L = length(states{i});
26 | [r,c] = M.getCoords(states{i});
27 | row_mat = zeros(L,M.Nrow);
28 | col_mat = zeros(L,M.Ncol);
29 | for j = 1:L
30 | row_mat(j,r(j)) = 1;
31 | col_mat(j,c(j)) = 1;
32 | end
33 | states_one_hot{i} = [row_mat col_mat];
34 | states_xy{i} = [r,c];
35 | end
--------------------------------------------------------------------------------
/MDPs/@Gridworld_Graph8/Gridworld_Graph8.m:
--------------------------------------------------------------------------------
1 | classdef Gridworld_Graph8 < Finite_MDP_class
2 | % Gridworld domain with obstacles. Actions are to
3 | % {n,s,e,w,ne,nw,se,sw}. Transitions are deterministic
4 | properties
5 | Nrow = 1; % image rows
6 | Ncol = 1; % image columns
7 | img = []; % image
8 | obstacles = [];% indices of obtacles in image
9 | non_obstacles; % indices of obtacles in image
10 | targetRow = 1;
11 | targetCol = 1;
12 | G = []; % transition graph
13 | W = [];
14 | state_map_col; % map from states to col values
15 | state_map_row; % map from states to row values
16 | end
17 | methods (Static)
18 | function [newrow,newcol] = north(row,col,Nrow,Ncol,im)
19 | newrow = max(row-1,1);
20 | newcol = col;
21 | if im(newrow,newcol) == 0 % obstacle
22 | newrow = row;
23 | newcol = col;
24 | end
25 | end
26 | function [newrow,newcol] = northeast(row,col,Nrow,Ncol,im)
27 | newrow = max(row-1,1);
28 | newcol = min(col+1,Ncol);
29 | if im(newrow,newcol) == 0 % obstacle
30 | newrow = row;
31 | newcol = col;
32 | end
33 | end
34 | function [newrow,newcol] = northwest(row,col,Nrow,Ncol,im)
35 | newrow = max(row-1,1);
36 | newcol = max(col-1,1);
37 | if im(newrow,newcol) == 0 % obstacle
38 | newrow = row;
39 | newcol = col;
40 | end
41 | end
42 | function [newrow,newcol] = south(row,col,Nrow,Ncol,im)
43 | newrow = min(row+1,Nrow);
44 | newcol = col;
45 | if im(newrow,newcol) == 0 % obstacle
46 | newrow = row;
47 | newcol = col;
48 | end
49 | end
50 | function [newrow,newcol] = southeast(row,col,Nrow,Ncol,im)
51 | newrow = min(row+1,Nrow);
52 | newcol = min(col+1,Ncol);
53 | if im(newrow,newcol) == 0 % obstacle
54 | newrow = row;
55 | newcol = col;
56 | end
57 | end
58 | function [newrow,newcol] = southwest(row,col,Nrow,Ncol,im)
59 | newrow = min(row+1,Nrow);
60 | newcol = max(col-1,1);
61 | if im(newrow,newcol) == 0 % obstacle
62 | newrow = row;
63 | newcol = col;
64 | end
65 | end
66 | function [newrow,newcol] = east(row,col,Nrow,Ncol,im)
67 | newrow = row;
68 | newcol = min(col+1,Ncol);
69 | if im(newrow,newcol) == 0 % obstacle
70 | newrow = row;
71 | newcol = col;
72 | end
73 | end
74 | function [newrow,newcol] = west(row,col,Nrow,Ncol,im)
75 | newrow = row;
76 | newcol = max(col-1,1);
77 | if im(newrow,newcol) == 0 % obstacle
78 | newrow = row;
79 | newcol = col;
80 | end
81 | end
82 | function [rows,cols] = neighbors(row,col,Nrow,Ncol,im)
83 | [rows,cols] = Gridworld_Graph8.north(row,col,Nrow,Ncol,im);
84 | [newrow,newcol] = Gridworld_Graph8.south(row,col,Nrow,Ncol,im);
85 | rows = [rows,newrow]; cols = [cols,newcol];
86 | [newrow,newcol] = Gridworld_Graph8.east(row,col,Nrow,Ncol,im);
87 | rows = [rows,newrow]; cols = [cols,newcol];
88 | [newrow,newcol] = Gridworld_Graph8.west(row,col,Nrow,Ncol,im);
89 | rows = [rows,newrow]; cols = [cols,newcol];
90 | [newrow,newcol] = Gridworld_Graph8.northeast(row,col,Nrow,Ncol,im);
91 | rows = [rows,newrow]; cols = [cols,newcol];
92 | [newrow,newcol] = Gridworld_Graph8.northwest(row,col,Nrow,Ncol,im);
93 | rows = [rows,newrow]; cols = [cols,newcol];
94 | [newrow,newcol] = Gridworld_Graph8.southeast(row,col,Nrow,Ncol,im);
95 | rows = [rows,newrow]; cols = [cols,newcol];
96 | [newrow,newcol] = Gridworld_Graph8.southwest(row,col,Nrow,Ncol,im);
97 | rows = [rows,newrow]; cols = [cols,newcol];
98 | end
99 | end
100 | methods
101 | function obj = Gridworld_Graph8(ImageFile,targetRow,targetCol)
102 | if ischar(ImageFile)
103 | % construct graph from image file
104 | im = imread(ImageFile);
105 | img = double(rgb2gray(im));
106 | else
107 | % image is already a matrix
108 | img = ImageFile;
109 | end
110 | Nrow = size(img,1);
111 | Ncol = size(img,2);
112 | obstacles = find(img == 0);
113 | non_obstacles = find(img ~= 0);
114 | target = sub2ind([Nrow,Ncol],targetRow,targetCol);
115 | Ns = Nrow*Ncol;
116 | Na = 8;
117 | Pn = zeros(Ns,Ns); % north
118 | Ps = zeros(Ns,Ns); % south
119 | Pe = zeros(Ns,Ns); % east
120 | Pw = zeros(Ns,Ns); % west
121 | Pne = zeros(Ns,Ns); % north east
122 | Pnw = zeros(Ns,Ns); % north west
123 | Pse = zeros(Ns,Ns); % south east
124 | Psw = zeros(Ns,Ns); % south west
125 | G = zeros(Ns,Ns);
126 | R = -1*ones(Ns,Na);
127 | R(:,5:8) = R(:,5:8)*sqrt(2); % diagonal cost
128 | R(target,:) = 0;
129 | for row = 1:Nrow
130 | for col = 1:Ncol
131 | curpos = sub2ind([Nrow,Ncol],row,col);
132 | [rows,cols] = Gridworld_Graph8.neighbors(row,col,Nrow,Ncol,img);
133 | neighbor_inds = sub2ind([Nrow,Ncol],rows,cols);
134 | Pn(curpos,neighbor_inds(1)) = Pn(curpos,neighbor_inds(1)) + 1;
135 | Ps(curpos,neighbor_inds(2)) = Ps(curpos,neighbor_inds(2)) + 1;
136 | Pe(curpos,neighbor_inds(3)) = Pe(curpos,neighbor_inds(3)) + 1;
137 | Pw(curpos,neighbor_inds(4)) = Pw(curpos,neighbor_inds(4)) + 1;
138 | Pne(curpos,neighbor_inds(5)) = Pne(curpos,neighbor_inds(5)) + 1;
139 | Pnw(curpos,neighbor_inds(6)) = Pnw(curpos,neighbor_inds(6)) + 1;
140 | Pse(curpos,neighbor_inds(7)) = Pse(curpos,neighbor_inds(7)) + 1;
141 | Psw(curpos,neighbor_inds(8)) = Psw(curpos,neighbor_inds(8)) + 1;
142 | end
143 | end
144 | G = Pn | Ps | Pe | Pw | Pne | Pnw | Pse | Psw;
145 | W = max(max(max(max(max(max(max(Pn,Ps),Pe),Pw),sqrt(2)*Pne),sqrt(2)*Pnw),sqrt(2)*Pse),sqrt(2)*Psw);
146 | Pn = Pn(non_obstacles,:); Pn = Pn(:,non_obstacles);
147 | Ps = Ps(non_obstacles,:); Ps = Ps(:,non_obstacles);
148 | Pe = Pe(non_obstacles,:); Pe = Pe(:,non_obstacles);
149 | Pw = Pw(non_obstacles,:); Pw = Pw(:,non_obstacles);
150 | Pne = Pne(non_obstacles,:); Pne = Pne(:,non_obstacles);
151 | Pnw = Pnw(non_obstacles,:); Pnw = Pnw(:,non_obstacles);
152 | Pse = Pse(non_obstacles,:); Pse = Pse(:,non_obstacles);
153 | Psw = Psw(non_obstacles,:); Psw = Psw(:,non_obstacles);
154 | G = G(non_obstacles,:); G = G(:,non_obstacles);
155 | W = W(non_obstacles,:); W = W(:,non_obstacles);
156 | R = R(non_obstacles,:);
157 | P = cat(3,Pn,Ps,Pe,Pw,Pne,Pnw,Pse,Psw);
158 | obj@Finite_MDP_class(P,R);
159 | obj.Nrow = Nrow;
160 | obj.Ncol = Ncol;
161 | obj.img = img;
162 | obj.obstacles = obstacles;
163 | obj.non_obstacles = non_obstacles;
164 | obj.targetRow = targetRow;
165 | obj.targetCol = targetCol;
166 | obj.G = G;
167 | obj.W = W;
168 | [state_map_col, state_map_row] = meshgrid(1:Ncol,1:Nrow);
169 | obj.state_map_row = state_map_row(non_obstacles);
170 | obj.state_map_col = state_map_col(non_obstacles);
171 | end
172 | function [G,W] = getGraph(obj)
173 | % return directed graph G with weights W for gridworld
174 | G = sparse(double(obj.G));
175 | W = obj.W(obj.W~=0);
176 | end
177 | function [G,W] = getGraph_inv(obj)
178 | % return inverse directed graph G with weights W for gridworld
179 | G = sparse(double(obj.G'));
180 | W_inv = obj.W';
181 | W = W_inv(W_inv~=0);
182 | end
183 | function [im] = val2image(obj,val)
184 | % put values (for states) on the image
185 | im = zeros(obj.Nrow,obj.Ncol);
186 | im(obj.non_obstacles) = val;
187 | end
188 | function [im] = getValuePrior(obj)
189 | % get a prior for the value function (just Euclidean distance to goal)
190 | [s_map_col, s_map_row] = meshgrid(1:obj.Ncol,1:obj.Nrow);
191 | im = sqrt((s_map_col-obj.targetCol).^2 + (s_map_row-obj.targetRow).^2);
192 | end
193 | function [im] = getRewardPrior(obj)
194 | % get a prior for the reward function (just -1 for every non-goal state)
195 | im = -1*ones(obj.Nrow,obj.Ncol);
196 | im(obj.targetRow,obj.targetCol) = 10;
197 | end
198 | function [im] = getStateImage(obj, row, col)
199 | % get an image for the current state (just 0 for every other state)
200 | im = zeros(obj.Nrow,obj.Ncol);
201 | im(row,col) = 1;
202 | end
203 | function [s] = map_ind_to_state(obj,row,col)
204 | % find state index for given row and col
205 | s = find(obj.state_map_row == row & obj.state_map_col == col);
206 | end
207 | function [r,c] = getCoords(obj,states)
208 | [r,c] = ind2sub([obj.Nrow,obj.Ncol],obj.non_obstacles(states));
209 | end
210 | function [Nrow,Ncol] = getSize(obj)
211 | Nrow = obj.Nrow;
212 | Ncol = obj.Ncol;
213 | end
214 | end
215 | end
--------------------------------------------------------------------------------
/MDPs/@Gridworld_Graph8/OptimalActionsOnPath.m:
--------------------------------------------------------------------------------
1 | function [states_xy, states_one_hot] = OptimalActionsOnPath(M,traj)
2 | % returns the optimal next states (shortest distance to goal) along path in
3 | % xy-space
4 | % return states_xy: cell array of trajectories in xy-space
5 | % states_one_hot: cell array of trajectories in one-hot vectors for
6 | % x and y
7 | [G,W] = M.getGraph_inv;
8 | G_inv = G'; % transpose graph for tranposing single-node SP -> single destination SP
9 | % [dist] = all_shortest_paths(G);
10 | N = size(G,1);
11 | Ns = size(traj,1);
12 | goal_s = M.map_ind_to_state(M.targetRow,M.targetCol);
13 | states = zeros(Ns,1);
14 | states_xy = zeros(Ns,2);
15 | r_one_hot = zeros(Ns,M.Nrow);
16 | c_one_hot = zeros(Ns,M.Ncol);
17 | options.edge_weight = W;
18 | [~, pred] = shortest_paths(G_inv,goal_s,options); % all SP from goal
19 | for s = 1:Ns
20 | curr_s = M.map_ind_to_state(traj(s,2),traj(s,1)); % TODO - figure out why?
21 | next_s = pred(curr_s);
22 | if next_s == 0
23 | next_s = curr_s;
24 | end
25 | [r,c] = M.getCoords(next_s);
26 | states(s) = next_s;
27 | states_xy(s,:) = [r,c];
28 | r_one_hot(s,r) = 1;
29 | c_one_hot(s,c) = 1;
30 | end
31 | states_one_hot = [r_one_hot, c_one_hot];
--------------------------------------------------------------------------------
/MDPs/@Gridworld_Graph8/SampleGraphTraj.m:
--------------------------------------------------------------------------------
1 | function [states_xy, states_one_hot] = SampleGraphTraj(M,Ns)
2 | % sample Ns states trajectories from random nodes in graph object M to goal
3 | % return states_xy: cell array of trajectories in xy-space
4 | % states_one_hot: cell array of trajectories in one-hot vectors for
5 | % x and y
6 | [G,W] = M.getGraph_inv;
7 | G_inv = G'; % transpose graph for tranposing single-node SP -> single destination SP
8 | N = size(G,1);
9 | if N >= Ns
10 | rand_ind = randperm(N);
11 | else
12 | rand_ind = repmat(randperm(N),1,10); % hack for small domains
13 | end
14 |
15 | init_states = rand_ind(1:Ns);
16 | goal_s = M.map_ind_to_state(M.targetRow,M.targetCol);
17 | states = cell(Ns,1);
18 | states_xy = cell(Ns,1);
19 | states_one_hot = cell(Ns,1);
20 | i = 1;
21 | options.edge_weight = W;
22 | [~, pred] = shortest_paths(G_inv,goal_s,options); % all SP from goal
23 | for n = 1:Ns
24 | [path] = SP(pred,goal_s,init_states(n)); % get SP from goal->init
25 | path = path(end:-1:1)'; % reverse path since we want init->goal
26 | states{i} = path;
27 | i = i+1;
28 | end
29 | for i = 1:length(states)
30 | L = length(states{i});
31 | [r,c] = M.getCoords(states{i});
32 | row_mat = zeros(L,M.Nrow);
33 | col_mat = zeros(L,M.Ncol);
34 | for j = 1:L
35 | row_mat(j,r(j)) = 1;
36 | col_mat(j,c(j)) = 1;
37 | end
38 | states_one_hot{i} = [row_mat col_mat];
39 | states_xy{i} = [r,c];
40 | end
--------------------------------------------------------------------------------
/MDPs/Finite_MDP_class.m:
--------------------------------------------------------------------------------
1 | classdef Finite_MDP_class < MDP_class
2 | % Finite state and action MDP
3 | properties
4 | P = []; % transition kernel
5 | R = []; % reward
6 | A = []; % possible actions at each state
7 | Ns = 0; % number of states
8 | Na = 0; % number of actions
9 | end
10 | methods
11 | function obj = Finite_MDP_class(P,R,A)
12 | % constructor:
13 | % P is Ns*Ns*Na matrix of transitions P(s'|s,a)
14 | % R is Ns*Na matrix of deterministic rewards r(s,a)
15 | % A is Ns*Na binary matrix of available actions at each state
16 | % (default - all actions are possible).
17 | obj.P = P;
18 | obj.R = R;
19 | obj.Ns = size(P,1);
20 | obj.Na = size(P,3);
21 | if nargin < 3
22 | A = ones(obj.Ns,obj.Na);
23 | end
24 | obj.A = A;
25 | end
26 |
27 | function Ns= getNumStates(obj)
28 | Ns = obj.Ns;
29 | end
30 |
31 | function Na = getNumActions(obj)
32 | Na = obj.Na;
33 | end
34 |
35 | function a = getActions(obj,s)
36 | a = find(obj.A(s,:));
37 | end
38 |
39 | function r = getReward(obj,s,a)
40 | r = obj.R(s,a)';
41 | end
42 |
43 | function p = nextStateProb(obj,s,a)
44 | % get next state probability for action a
45 | % if a is a scalar the function returns a row vector
46 | % if a is a vector then a matrix is returned with the
47 | % probabilities on rows
48 | if numel(a) == 1
49 | p = squeeze(obj.P(s,:,a));
50 | else
51 | p = squeeze(obj.P(s,:,a))';
52 | end
53 | end
54 |
55 | function snext = sampleNextState(obj,s,a)
56 | % sample a next state given s and a
57 | snext = rand_choose(obj.nextStateProb(s,a));
58 | end
59 | end
60 | end
61 |
--------------------------------------------------------------------------------
/MDPs/MDP_class.m:
--------------------------------------------------------------------------------
1 | classdef MDP_class < matlab.mixin.Copyable
2 | % Interface for MDP
3 | methods (Abstract)
4 | Ns= getNumStates(obj); % total states
5 | a = getNumActions(obj); % total possible actions
6 | a = getActions(obj,s); % actions at state s
7 | r = getReward(obj,s,a);
8 | p = nextStateProb(obj,s,a);
9 | snext = sampleNextState(obj,s,a);
10 | end
11 | end
12 |
--------------------------------------------------------------------------------
/NN_run_training.py:
--------------------------------------------------------------------------------
1 | from NNobj import *
2 | from vin import vin
3 | from vin_untied import vin_untied
4 | from FCN import fcn
5 | from CNN import cnn
6 |
7 |
8 | def main():
9 | import argparse
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument("--input")
12 | parser.add_argument("--output", default="None")
13 | parser.add_argument("--epochs", type=int, default=10)
14 | parser.add_argument("--profile", action="store_true")
15 | parser.add_argument("--dropout", action="store_true")
16 | parser.add_argument("--stepsize", type=float, default=.0002)
17 | parser.add_argument("--model",
18 | choices=["dense1", "dense2", "dense3", "conv", "valIterMultiBatch", "valIterBatch",
19 | "valIterMars", "valIterMarsSingle", "valIterBatchUntied", "fcn", "cnn"],
20 | default="dense")
21 | parser.add_argument("--unittest", action="store_true")
22 | parser.add_argument("--grad_check", action="store_true")
23 | parser.add_argument("--devtype", choices=["cpu", "gpu"], default="cpu")
24 | parser.add_argument("--warmstart", default="None")
25 | parser.add_argument("--reg", type=float, default=.0)
26 | parser.add_argument("--imsize", type=int, default=28)
27 | parser.add_argument("--k", type=int, default=10)
28 | parser.add_argument("--batchsize", type=int, default=128)
29 | parser.add_argument("--statebatchsize", type=int, default=1)
30 | parser.add_argument("--stepdecreaserate", type=float, default=1.0)
31 | parser.add_argument("--stepdecreasetime", type=int, default=10000)
32 | parser.add_argument("--data_fraction", type=float, default=1.0)
33 | args = parser.parse_args()
34 |
35 | if args.model == "fcn":
36 | # FCN network
37 | my_nn = fcn(model=args.model, im_size=[args.imsize, args.imsize], dropout=args.dropout,
38 | devtype=args.devtype, grad_check=args.grad_check, reg=args.reg,
39 | batchsize=args.batchsize, statebatchsize=args.statebatchsize)
40 | elif args.model == "cnn":
41 | # FCN network
42 | my_nn = cnn(model=args.model, im_size=[args.imsize, args.imsize], dropout=args.dropout,
43 | devtype=args.devtype, grad_check=args.grad_check, reg=args.reg,
44 | batchsize=args.batchsize)
45 | elif args.model == "valIterBatch":
46 | # VI network
47 | my_nn = vin(model=args.model, im_size=[args.imsize, args.imsize], dropout=args.dropout,
48 | devtype=args.devtype, grad_check=args.grad_check, reg=args.reg, k=args.k,
49 | batchsize=args.batchsize, statebatchsize=args.statebatchsize)
50 | elif args.model == "valIterBatchUntied":
51 | # VI network with untied weights
52 | my_nn = vin_untied(model=args.model, im_size=[args.imsize, args.imsize], dropout=args.dropout,
53 | devtype=args.devtype, grad_check=args.grad_check, reg=args.reg, k=args.k,
54 | batchsize=args.batchsize, statebatchsize=args.statebatchsize)
55 | else:
56 | # FC network
57 | my_nn = NNobj(model=args.model, im_size=[args.imsize, args.imsize], dropout=args.dropout,
58 | devtype=args.devtype, grad_check=args.grad_check, reg=args.reg)
59 | if args.warmstart != "None":
60 | print('warmstarting...')
61 | my_nn.load_weights(args.warmstart)
62 | my_nn.run_training(input=str(args.input), stepsize=args.stepsize, epochs=args.epochs,
63 | grad_check=args.grad_check, batch_size=args.batchsize, data_fraction=args.data_fraction)
64 | my_nn.save_weights(outfile=str(args.output))
65 |
66 | if __name__ == "__main__":
67 | main()
68 |
--------------------------------------------------------------------------------
/NNobj.py:
--------------------------------------------------------------------------------
1 | # interface for NN object
2 |
3 | import numpy as np
4 | import pickle
5 | import scipy.io as sio
6 | import time
7 |
8 |
9 | class NNobj:
10 | "Class for a multi-layer perceptron object"
11 | def __init__(self):
12 | raise RuntimeError("Not implemented")
13 |
14 | def save_weights(self, outfile="weight_dump.pk"):
15 | pickle.dump([n.op.get_value() for n in self.params], open(outfile, 'w'))
16 |
17 | def load_weights(self, infile="weight_dump.pk"):
18 | dump = pickle.load(open(infile, 'r'))
19 | [n.op.set_value(p) for n, p in zip(self.params, dump)]
20 |
21 |
22 | # helper methods to print nice table (taken from CGT code)
23 | def fmt_item(x, l):
24 | if isinstance(x, np.ndarray):
25 | assert x.ndim==0
26 | x = x.item()
27 | if isinstance(x, float): rep = "%g"%x
28 | else: rep = str(x)
29 | return " "*(l - len(rep)) + rep
30 |
31 |
32 | def fmt_row(width, row, header=False):
33 | out = " | ".join(fmt_item(x, width) for x in row)
34 | if header: out = out + "\n" + "-"*len(out)
35 | return out
36 |
--------------------------------------------------------------------------------
/NNpredict.m:
--------------------------------------------------------------------------------
1 | function [y] = NNpredict(nn,im,value,x,y,maxX,maxY)
2 | % call python to generate prediction for nn object, with input image and
3 | % x,y state (0 0
82 | p = cube(obj, i, j, 0, 1);
83 | % p.FaceColor = 'interp';
84 | % p.FaceLighting = 'gouraud';
85 | end
86 | end
87 | end
88 | view(3);
89 | end
90 | function [res] = add_border(obj)
91 | im_try = insertShape(obj.dom, 'Rectangle', [1, 1, obj.domsize(1), obj.domsize(2)], ...
92 | 'LineWidth', 1,'Opacity',1,'SmoothEdges',false);
93 | if obj.check_mask(im_try)
94 | res = 1;
95 | else
96 | obj.dom = im_try;
97 | res = 0;
98 | end
99 | end
100 | function p = cube(obj, X0, Y0, Z0, C0)
101 | X1 = [0;0;1;1], Y1 = [0;1;1;0], Z1 = [0;0;0;0];
102 | X2 = [0;0;1;1], Y2 = [0;1;1;0], Z2 = [1;1;1;1];
103 | Y3 = [0;0;1;1], Z3 = [0;1;1;0], X3 = [0;0;0;0];
104 | Y4 = [0;0;1;1], Z4 = [0;1;1;0], X4 = [1;1;1;1];
105 | X5 = [0;0;1;1], Z5 = [0;1;1;0], Y5 = [0;0;0;0];
106 | X6 = [0;0;1;1], Z6 = [0;1;1;0], Y6 = [1;1;1;1];
107 | X = [X1,X2,X3,X4,X5,X6] + X0;
108 | Y = [Y1,Y2,Y3,Y4,Y5,Y6] + Y0;
109 | Z = [Z1,Z2,Z3,Z4,Z5,Z6] + Z0;
110 | C = C0*rand(size(X));
111 | p = patch(X,Y,Z,C);
112 | end
113 | end
114 | end
115 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Value Iteration Networks
2 | Code for NIPS 2016 paper:
3 |
4 | Value Iteration Networks
5 |
6 | Aviv Tamar, Yi Wu, Garrett Thomas, Sergey Levine, and Pieter Abbeel
7 |
8 | UC Berkeley
9 |
10 |
11 | Requires:
12 | - Python (2.7)
13 | - Theano (0.8)
14 |
15 | For generating the gridworld data and visualizing results, also requires:
16 | - Matlab (2015 or later required for calling python objects for visualizing trajectories)
17 | - Matlab BGL: http://www.mathworks.com/matlabcentral/fileexchange/10922-matlabbgl
18 | Put it in matlab_bgl folder.
19 |
20 | To start: the scripts directory contains scripts for generating the data,
21 | and training the different models.
22 |
23 | scripts/make_data_gridworld_nips.m generates the training data (random grid worlds).
24 | Alternatively, you can use the existing data files in the data folder (instead of generating them).
25 |
26 | scripts/nips_gridworld_experiments_VIN.sh shows how to train the VIN models.
27 |
28 | After training, a weights file (e.g., /results/grid28_VIN.pk) will be created. You can then run:
29 | - script_viz_policy.m to run the trained VIN with the learned weights and view the trajectories
30 | it produces (line 17 selects the weights file).
31 | - test_network.m to numerically evaluate the learned network on a test set (needs to be generated).
32 |
33 |
34 | # Related implementations:
35 | Kent Sommer's implementation of VINs (including data generation) in python + pytorch
36 |
37 | https://github.com/kentsommer/pytorch-value-iteration-networks
38 |
39 | Abhishek Kumar's implementation of VINs in Tensor Flow
40 |
41 | https://github.com/TheAbhiKumar/tensorflow-value-iteration-networks
42 |
--------------------------------------------------------------------------------
/addpaths.m:
--------------------------------------------------------------------------------
1 | addpath(genpath('./matlab_bgl'));
2 | addpath('MDPs');
3 | addpath('ObstacleGenerators');
4 | addpath('util');
--------------------------------------------------------------------------------
/data/gridworld_16.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avivt/VIN/fe11bb1ae8ad9bcb3a02e2cc5e21b9499ccf0db4/data/gridworld_16.mat
--------------------------------------------------------------------------------
/data/gridworld_16_test.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avivt/VIN/fe11bb1ae8ad9bcb3a02e2cc5e21b9499ccf0db4/data/gridworld_16_test.mat
--------------------------------------------------------------------------------
/data/gridworld_28.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avivt/VIN/fe11bb1ae8ad9bcb3a02e2cc5e21b9499ccf0db4/data/gridworld_28.mat
--------------------------------------------------------------------------------
/data/gridworld_28_test.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avivt/VIN/fe11bb1ae8ad9bcb3a02e2cc5e21b9499ccf0db4/data/gridworld_28_test.mat
--------------------------------------------------------------------------------
/data/gridworld_8.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avivt/VIN/fe11bb1ae8ad9bcb3a02e2cc5e21b9499ccf0db4/data/gridworld_8.mat
--------------------------------------------------------------------------------
/data/gridworld_8_test.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avivt/VIN/fe11bb1ae8ad9bcb3a02e2cc5e21b9499ccf0db4/data/gridworld_8_test.mat
--------------------------------------------------------------------------------
/extract_action.m:
--------------------------------------------------------------------------------
1 | function [actions] = extract_action(traj)
2 | % extract actions from state trajectory
3 | % an action corresponds to difference in state (n,s,e,w,ne,nw,se,sw)
4 | numActions = 8;
5 | action_vecs = ([[-1,0; 1,0; 0,1; 0,-1]; 1/sqrt(2)*[-1,1; -1,-1; 1,1; 1,-1]])'; % state difference unit vectors for each action
6 | % action_vecs_unnorm = ([-1,0; 1,0; 0,1; 0,-1; -1,1; -1,-1; 1,1; 1,-1]); % un-normalized state difference vectors
7 |
8 | state_diff = diff(traj); % state difference
9 | norm_state_diff = state_diff.*repmat(1./sqrt(sum(state_diff.^2,2)),1,size(state_diff,2)); % normalized state difference
10 | prj_state_diff = norm_state_diff*action_vecs; % project state difference on action vectors
11 | actions_one_hot = abs(prj_state_diff-1)<1e-5; % action corresponds to projection==1
12 | actions = actions_one_hot * (1:numActions)'; % action labels
--------------------------------------------------------------------------------
/script_make_data.m:
--------------------------------------------------------------------------------
1 | % script to generate training data for learning trajectories
2 | % The data is organized in batches of multiple states from the same domain.
3 | % The batch size is determined by state_batch_size.
4 | % In addition, a flattened data (non-batched) organization is maintained.
5 |
6 | addpaths;
7 | % set parameters (defaults)
8 | set_var('size_1',28); set_var('size_2',28);
9 | dom_size = [size_1,size_2]; % domain size
10 | maxTrajLen = (size_1+size_2); % this is approximate, just to preallocate memory
11 | set_var('Ndomains', 10000); % number of domains
12 | set_var('maxObs', 10); % maximum number of obstacles in a domain
13 | set_var('maxObsSize',0.0); % maximum obstacle size
14 | set_var('Ntrajs', 1); % trajectories from each domain
15 | set_var('goal', [1,1]); % goal position
16 | set_var('rand_goal', false); % random goal position
17 | set_var('state_batch_size', 1); % batchsize for states per each data sample
18 |
19 | % containers for flattened data
20 | maxSamples = Ndomains*Ntrajs*maxTrajLen/2; % this is approximate, just to preallocate memory
21 | im_data = uint8(zeros([maxSamples, size_1*size_2])); % obstacle image
22 | value_data = uint8(zeros([maxSamples, size_1*size_2])); % value function prior (e.g., a reward function)
23 | state_onehot_data = uint8(zeros([maxSamples, size_1+size_2])); % 1-hot vectors of position for each dimension (x,y)
24 | state_xy_data = uint8(zeros([maxSamples, 2])); % position (in both coordinates)
25 | label_data = uint8(zeros([maxSamples, 1])); % action
26 |
27 | % containers for batched data
28 | numSamples = 1;
29 | all_states_xy = cell(Ndomains*Ntrajs,1);
30 | all_doms = cell(Ndomains*Ntrajs,1);
31 | numTrajs = 1;
32 | maxBatches = ceil(Ndomains*Ntrajs*maxTrajLen/state_batch_size);
33 | numBatches = 1;
34 | batch_im_data = uint8(zeros([maxBatches, size_1*size_2])); % obstacle image
35 | batch_value_data = uint8(zeros([maxBatches, size_1*size_2])); % value function prior
36 | state_x_data = uint8(zeros([maxBatches, state_batch_size])); % position (in 1st coordinate)
37 | state_y_data = uint8(zeros([maxBatches, state_batch_size])); % position (in 2nd coordinate)
38 | batch_label_data = uint8(zeros([maxBatches, state_batch_size])); % action
39 |
40 | %% make data
41 | figure;
42 | dom = 1;
43 | while dom <= Ndomains
44 | % allocate buffers for batched data from this domain
45 | s1_buffer = uint8(zeros([ceil(Ntrajs*maxTrajLen/state_batch_size), 1]));
46 | s2_buffer = uint8(zeros([ceil(Ntrajs*maxTrajLen/state_batch_size), 1]));
47 | label_buffer = uint8(zeros([ceil(Ntrajs*maxTrajLen/state_batch_size), 1]));
48 | % generate random domain
49 | buffer_pos = 1;
50 | if rand_goal
51 | goal(1,1) = 1+randi(size_1-1);
52 | goal(1,2) = 1+randi(size_2-1);
53 | end
54 | % generate random obstacles
55 | obs = obstacle_gen(dom_size,goal,maxObsSize);
56 | n_obs = obs.add_N_rand_obs(randi(maxObs));
57 | add_border_res = obs.add_border;
58 | if n_obs == 0 || add_border_res
59 | disp('no obstacles added, or problem with border, regenerating map')
60 | continue; % no obstacles added, or problem with border, skip
61 | end
62 | im = double(rgb2gray(obs.getimage));
63 | im = max(max(im)) - im; im = im./max(max(im)); imagesc(im); drawnow;
64 | % make graph (deterministic MDP)
65 | G = Gridworld_Graph8(im,goal(1),goal(2));
66 | value_prior = G.getRewardPrior;
67 | % sample shortest-path trajectories in graph
68 | [states_xy, states_one_hot] = SampleGraphTraj(G,Ntrajs);
69 | hold on;
70 | for i = 1:Ntrajs % loop over trajectories in domain
71 | if ~isempty(states_xy{i}) && size(states_xy{i},1)>1
72 | % calculate the actions along the trajectory
73 | actions = extract_action(states_xy{i});
74 | ns = size(states_xy{i},1)-1;
75 | % add trajecory to dataset
76 | % we transpose - since python is row major order
77 | % we subtract 1 - since python indexing starts at zero
78 | im_data(numSamples:numSamples+ns-1,:) = repmat(reshape(im',1,[]),ns,1);
79 | value_data(numSamples:numSamples+ns-1,:) = repmat(reshape(value_prior',1,[]),ns,1);
80 | state_onehot_data(numSamples:numSamples+ns-1,:) = states_one_hot{i}(1:ns,:);
81 | state_xy_data(numSamples:numSamples+ns-1,:) = states_xy{i}(1:ns,:)-1;
82 | s1_buffer(buffer_pos:buffer_pos+ns-1,:) = states_xy{i}(1:ns,1)-1;
83 | s2_buffer(buffer_pos:buffer_pos+ns-1,:) = states_xy{i}(1:ns,2)-1;
84 | label_data(numSamples:numSamples+ns-1,:) = actions - 1;
85 | label_buffer(buffer_pos:buffer_pos+ns-1,:) = actions - 1;
86 | % update sample counters and flattened data containers
87 | numSamples = numSamples+ns;
88 | buffer_pos = buffer_pos+ns;
89 | all_states_xy{numTrajs} = states_xy{i};
90 | all_doms{numTrajs} = uint8(im);
91 | numTrajs = numTrajs + 1;
92 | % plot
93 | plot(states_xy{i}(:,2),states_xy{i}(:,1));drawnow;
94 | end
95 | end
96 | % batch size is fixed. We replicate the last sample to fill the batch.
97 | if mod(buffer_pos-1,state_batch_size)~=0
98 | samples_to_fill = state_batch_size-mod(buffer_pos,state_batch_size);
99 | s1_buffer(buffer_pos : buffer_pos+samples_to_fill) = s1_buffer(buffer_pos-1);
100 | s2_buffer(buffer_pos : buffer_pos+samples_to_fill) = s2_buffer(buffer_pos-1);
101 | label_buffer(buffer_pos : buffer_pos+samples_to_fill) = label_buffer(buffer_pos-1);
102 | buffer_pos = buffer_pos+samples_to_fill+1;
103 | end
104 | % fill data containers with random permutation of the data
105 | s1_buffer = s1_buffer(1:buffer_pos-1);
106 | s2_buffer = s2_buffer(1:buffer_pos-1);
107 | label_buffer = label_buffer(1:buffer_pos-1);
108 | rand_ind = randperm(buffer_pos-1);
109 | s1_buffer = s1_buffer(rand_ind);
110 | s2_buffer = s2_buffer(rand_ind);
111 | label_buffer = label_buffer(rand_ind);
112 | s1_batch = reshape(s1_buffer,state_batch_size,[])';
113 | s2_batch = reshape(s2_buffer,state_batch_size,[])';
114 | label_batch = reshape(label_buffer,state_batch_size,[])';
115 | cur_batch_size = size(s1_batch,1);
116 | state_x_data(numBatches:numBatches+cur_batch_size-1,:) = s1_batch;
117 | state_y_data(numBatches:numBatches+cur_batch_size-1,:) = s2_batch;
118 | batch_label_data(numBatches:numBatches+cur_batch_size-1,:) = label_batch;
119 | batch_im_data(numBatches:numBatches+cur_batch_size-1,:) = repmat(reshape(im',1,[]),cur_batch_size,1);
120 | batch_value_data(numBatches:numBatches+cur_batch_size-1,:) = repmat(reshape(value_prior',1,[]),cur_batch_size,1);
121 | numBatches = numBatches+cur_batch_size;
122 | % pause;
123 | disp([num2str(Ndomains - dom) ' remaining domains']);
124 | hold off;
125 | dom = dom + 1;
126 | end
127 | % remove empty (preallocated) space in containers
128 | im_data = im_data(1:numSamples-1,:);
129 | value_data = value_data(1:numSamples-1,:);
130 | state_onehot_data = state_onehot_data(1:numSamples-1,:);
131 | state_xy_data = state_xy_data(1:numSamples-1,:);
132 | label_data = label_data(1:numSamples-1,:);
133 | all_states_xy = all_states_xy(1:numTrajs-1);
134 | all_doms = all_doms(1:numTrajs-1);
135 | state_x_data = state_x_data(1:numBatches-1,:);
136 | state_y_data = state_y_data(1:numBatches-1,:);
137 | batch_label_data = batch_label_data(1:numBatches-1,:);
138 | batch_im_data = batch_im_data(1:numBatches-1,:);
139 | batch_value_data = batch_value_data(1:numBatches-1,:);
140 | %% save data
141 | disp('saving data');
142 | set_var('data_dir', '~/Data/LearnTraj/');
143 | set_var('data_file', 'data.mat'); % store training data variables
144 | save([data_dir data_file],'im_data','state_onehot_data','label_data','value_data',...
145 | 'state_xy_data','state_x_data','state_y_data','batch_label_data','batch_im_data','batch_value_data');
--------------------------------------------------------------------------------
/script_viz_policy.m:
--------------------------------------------------------------------------------
1 | % script to visualize trajectories from a trained NN policy
2 |
3 | tmp = py.vin.vin; clear tmp; % to load Python
4 |
5 | % set parameters and load NN
6 | size_1 = 28; size_2 = 28;
7 | k = 36;
8 | prior = 'reward';
9 | model = 'vin';
10 | if strcmp(model,'cnn')
11 | nn = py.cnn.cnn(pyargs('im_size',int32([size_1,size_2]),'batchsize',int32(1),'statebatchsize',int32(1)));
12 | elseif strcmp(model,'vin')
13 | nn = py.vin.vin(pyargs('im_size',int32([size_1,size_2]),'k',int32(k),'batchsize',int32(1),'statebatchsize',int32(1)));
14 | elseif strcmp(model,'fcn')
15 | nn = py.FCN.fcn(pyargs('im_size',int32([size_1,size_2]),'batchsize',int32(1),'statebatchsize',int32(1)));
16 | end
17 | weight_file = './results/grid28_VIN.pk';
18 | nn.load_weights(pyargs('infile',weight_file));
19 |
20 | %% Evaluate NN
21 | % Predict trajectories in closed-loop, and compare with shortest path
22 | dom_size = [size_1,size_2]; % domain size
23 | Ndomains = 100; % number of domains to evaluate
24 | maxObs = 50; % maximum number of obstacles in a domain
25 | maxObsSize = 2.0; % maximum obstacle size
26 | Ntrajs = 1; % trajectories from each domain
27 | numActions = 8;
28 | action_vecs = ([[-1,0; 1,0; 0,1; 0,-1]; 1/sqrt(2)*[-1,1; -1,-1; 1,1; 1,-1]])'; % state difference unit vectors for each action
29 | action_vecs_unnorm = ([-1,0; 1,0; 0,1; 0,-1; -1,1; -1,-1; 1,1; 1,-1]); % un-normalized state difference vectors
30 | plot_value = false;
31 |
32 | % containers for data
33 | numSamples = 1;
34 | numTrajs = 1;
35 | figure(1);
36 | for dom = 1:Ndomains
37 | % generate random domain
38 | goal(1,1) = 1+randi(size_1-1);
39 | goal(1,2) = 1+randi(size_2-1);
40 | % generate random obstacles
41 | obs = obstacle_gen(dom_size,goal,maxObsSize);
42 | n_obs = obs.add_N_rand_obs(randi(maxObs));
43 | add_border_res = obs.add_border;
44 | if n_obs == 0 || add_border_res
45 | disp('no obstacles added, or problem with border, regenerating map')
46 | continue; % no obstacles added, or problem with border, skip
47 | end
48 | im = double(rgb2gray(obs.getimage));
49 | im = max(max(im)) - im; im = im./max(max(im)); imagesc(im); drawnow;
50 | % make graph (deterministic MDP)
51 | G = Gridworld_Graph8(im,goal(1),goal(2));
52 | value_prior = G.getRewardPrior;
53 | % sample shortest-path trajectories in graph
54 | [states_xy, states_one_hot] = SampleGraphTraj(G,Ntrajs);
55 | figure(1); hold on;
56 | for i = 1:Ntrajs
57 | if ~isempty(states_xy{i}) && size(states_xy{i},1)>1
58 | L = size(states_xy{i},1)*2;
59 | pred_traj = zeros(L,2);
60 | pred_traj(1,:) = states_xy{i}(1,:);
61 | for j = 2:L
62 | % creat state vector and image vector, and save to file
63 | state_xy_data = uint8([pred_traj(j-1,1)-1, pred_traj(j-1,2)-1]);
64 | im_data = uint8(reshape(im',1,[]));
65 | value_data = uint8(reshape(value_prior',1,[]));
66 | % call NN to predict action from input file (passing data directly from Matlab to python is difficult)
67 | save('test_input.mat','im_data','value_data','state_xy_data');
68 | a = nn.predict(pyargs('input', 'test_input.mat'))+1;
69 | % calculate next state based on action
70 | s = G.map_ind_to_state(pred_traj(j-1,1),pred_traj(j-1,2));
71 | ns = G.sampleNextState(s,a);
72 | [nr,nc] = G.getCoords(ns);
73 | pred_traj(j,2) = nc;
74 | pred_traj(j,1) = nr;
75 | if (nr == goal(1)) && (nc == goal(2))
76 | pred_traj(j+1:end,2) = nc;
77 | pred_traj(j+1:end,1) = nr;
78 | break;
79 | end
80 | end
81 | % plot stuff
82 | figure(1);
83 | plot(states_xy{i}(:,2),states_xy{i}(:,1));drawnow;
84 | plot(pred_traj(:,2),pred_traj(:,1),'-X');drawnow;
85 | legend('Shortest path','Predicted path');
86 | plot(states_xy{i}(1,2),states_xy{i}(1,1),'-o');drawnow;
87 | plot(states_xy{i}(end,2),states_xy{i}(end,1),'-s');drawnow;
88 | hold off;
89 | if plot_value
90 | figure(2);
91 | pred_val = nn.predict_value(pyargs('input', 'test_input.mat'));
92 | val_map = python_ndarray_to_matrix(pred_val(1),[size_1,size_2]);
93 | r_map = python_ndarray_to_matrix(pred_val(2),[size_1,size_2]);
94 | subplot(1,2,1);
95 | imagesc(r_map);
96 | title('Learned Reward');
97 | subplot(1,2,2);
98 | imagesc(val_map);
99 | title('Learned Value');
100 | drawnow;
101 | end
102 | pause;%(0.6);
103 | end
104 | end
105 | end
106 |
--------------------------------------------------------------------------------
/scripts/make_data_gridworld_nips.m:
--------------------------------------------------------------------------------
1 | % script to make data for nips CNN experiments
2 | clear all;
3 | data_dir = './data';
4 | dodraw = false;
5 | %% Generate 8x8 map data
6 | data_file = 'gridworld_8.mat';
7 | size_1 = 8;
8 | size_2 = 8;
9 | add_border = true;
10 | maxObs = 30;
11 | maxObsSize = 0.0;
12 | Ndomains = 5000;
13 | Ntrajs = 7;
14 | prior = 'reward';
15 | rand_goal = true;
16 | zero_min_action = true;
17 | state_batch_size = 1;
18 | script_make_data;
19 | clear all;
20 |
21 | %% Generate 16x16 map data
22 | data_dir = './data';
23 | data_file = 'gridworld_16.mat';
24 | size_1 = 16;
25 | size_2 = 16;
26 | add_border = true;
27 | maxObs = 40;
28 | maxObsSize = 1.0;
29 | Ndomains = 5000;
30 | Ntrajs = 7;
31 | prior = 'reward';
32 | rand_goal = true;
33 | zero_min_action = true;
34 | state_batch_size = 1;
35 | script_make_data;
36 | clear all;
37 |
38 | %% Generate 28x28 map data
39 | data_dir = './data';
40 | data_file = 'gridworld_28.mat';
41 | size_1 = 28;
42 | size_2 = 28;
43 | add_border = true;
44 | maxObs = 50;
45 | maxObsSize = 2.0;
46 | Ndomains = 5000;
47 | Ntrajs = 7;
48 | prior = 'reward';
49 | rand_goal = true;
50 | zero_min_action = true;
51 | state_batch_size = 1;
52 | script_make_data;
53 | clear all;
54 |
--------------------------------------------------------------------------------
/scripts/nips_gridworld_experiments_CNN.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Script for running icml gridworld experiments with CNN networks
3 | # 8x8 map
4 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_8_state_channel.mat --output ./nips16results/gridworld/grid8_CNN.pk --epochs 20 --model cnn --stepsize 0.01 --imsize 8 --reg 0.0 --batchsize 128 | tee -a ./nips16results/gridworld/out_grid8_CNN.txt ;
5 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_8_state_channel.mat --output ./nips16results/gridworld/grid8_CNN.pk --epochs 20 --model cnn --stepsize 0.002 --imsize 8 --reg 0.0 --batchsize 128 --warmstart ./nips16results/gridworld/grid8_CNN.pk | tee -a ./nips16results/gridworld/out_grid8_CNN.txt ;
6 | # 16x16 map
7 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16_state_channel.mat --output ./nips16results/gridworld/grid16_CNN.pk --epochs 20 --model cnn --stepsize 0.01 --imsize 16 --reg 0.0 --batchsize 128 | tee -a ./nips16results/gridworld/out_grid16_CNN.txt ;
8 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16_state_channel.mat --output ./nips16results/gridworld/grid16_CNN.pk --epochs 20 --model cnn --stepsize 0.002 --imsize 16 --reg 0.0 --batchsize 128 --warmstart ./nips16results/gridworld/grid16_CNN.pk | tee -a ./nips16results/gridworld/out_grid16_CNN.txt ;
9 | # 28x28 map
10 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28_state_channel.mat --output ./nips16results/gridworld/grid28_CNN.pk --epochs 20 --model cnn --stepsize 0.01 --imsize 28 --reg 0.0 --batchsize 128 | tee -a ./nips16results/gridworld/out_grid28_CNN.txt ;
11 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28_state_channel.mat --output ./nips16results/gridworld/grid28_CNN.pk --epochs 20 --model cnn --stepsize 0.002 --imsize 28 --reg 0.0 --batchsize 128 --warmstart ./nips16results/gridworld/grid28_CNN.pk | tee -a ./nips16results/gridworld/out_grid28_CNN.txt ;
12 |
--------------------------------------------------------------------------------
/scripts/nips_gridworld_experiments_FCN.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Script for running icml gridworld experiments with FCN networks
3 | # 8x8 map
4 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./nips16results/gridworld/grid8_FCN.pk --epochs 30 --model fcn --stepsize 0.01 --imsize 8 --reg 0.0 --batchsize 12 --statebatchsize 10 | tee -a ./nips16results/gridworld/out_grid8_FCN.txt ;
5 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./nips16results/gridworld/grid8_FCN.pk --epochs 30 --model fcn --stepsize 0.005 --imsize 8 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid8_FCN.pk | tee -a ./nips16results/gridworld/out_grid8_FCN.txt ;
6 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid8_FCN.pk --epochs 30 --model fcn --stepsize 0.002 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN.pk | tee -a ./nips16results/gridworld/out_grid16_FCN.txt ;
7 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid8_FCN.pk --epochs 30 --model fcn --stepsize 0.001 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN.pk | tee -a ./nips16results/gridworld/out_grid16_FCN.txt ;
8 |
9 | # 16x16 map
10 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN.pk --epochs 30 --model fcn --stepsize 0.01 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 | tee -a ./nips16results/gridworld/out_grid16_FCN.txt ;
11 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN.pk --epochs 30 --model fcn --stepsize 0.005 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN.pk | tee -a ./nips16results/gridworld/out_grid16_FCN.txt ;
12 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN.pk --epochs 30 --model fcn --stepsize 0.002 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN.pk | tee -a ./nips16results/gridworld/out_grid16_FCN.txt ;
13 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN.pk --epochs 30 --model fcn --stepsize 0.001 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN.pk | tee -a ./nips16results/gridworld/out_grid16_FCN.txt ;
14 |
15 | # 28x28 map
16 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_FCN.pk --epochs 30 --model fcn --stepsize 0.01 --imsize 28 --reg 0.0 --batchsize 12 --statebatchsize 10 | tee -a ./nips16results/gridworld/out_grid28_FCN.txt ;
17 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_FCN.pk --epochs 30 --model fcn --stepsize 0.005 --imsize 28 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid28_FCN.pk | tee -a ./nips16results/gridworld/out_grid28_FCN.txt ;
18 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_FCN.pk --epochs 30 --model fcn --stepsize 0.002 --imsize 28 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid28_FCN.pk | tee -a ./nips16results/gridworld/out_grid28_FCN.txt ;
19 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_FCN.pk --epochs 30 --model fcn --stepsize 0.001 --imsize 28 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid28_FCN.pk | tee -a ./nips16results/gridworld/out_grid28_FCN.txt ;
20 |
21 | # 16x16 map 0.5 data
22 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN_05.pk --epochs 30 --model fcn --stepsize 0.01 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_FCN_05.txt ;
23 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN_05.pk --epochs 30 --model fcn --stepsize 0.005 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN_05.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_FCN_05.txt ;
24 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN_05.pk --epochs 30 --model fcn --stepsize 0.002 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN_05.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_FCN_05.txt ;
25 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_FCN_05.pk --epochs 30 --model fcn --stepsize 0.001 --imsize 16 --reg 0.0 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_FCN_05.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_FCN_05.txt ;
26 |
27 |
--------------------------------------------------------------------------------
/scripts/nips_gridworld_experiments_VIN.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Script for running nips gridworld experiments with VIN networks
3 | mkdir -p results
4 | # 8x8 map
5 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./results/grid8_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.01 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 | tee ./results/out_grid8_VIN.txt ;
6 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./results/grid8_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.005 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid8_VIN.pk | tee -a ./results/out_grid8_VIN.txt ;
7 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./results/grid8_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.002 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid8_VIN.pk | tee -a ./results/out_grid8_VIN.txt ;
8 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./results/grid8_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.001 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid8_VIN.pk | tee -a ./results/out_grid8_VIN.txt ;
9 | # 16x16 map
10 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./results/grid16_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.01 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 | tee ./results/out_grid16_VIN.txt ;
11 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./results/grid16_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid16_VIN.pk | tee -a ./results/out_grid16_VIN.txt ;
12 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./results/grid16_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.002 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid16_VIN.pk | tee -a ./results/out_grid16_VIN.txt ;
13 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./results/grid16_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.001 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid16_VIN.pk | tee -a ./results/out_grid16_VIN.txt ;
14 | # 28x28 map
15 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./results/grid28_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.01 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 | tee ./results/out_grid28_VIN.txt ;
16 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./results/grid28_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.005 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid28_VIN.pk | tee -a ./results/out_grid28_VIN.txt ;
17 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./results/grid28_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.002 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid28_VIN.pk | tee -a ./results/out_grid28_VIN.txt ;
18 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./results/grid28_VIN.pk --epochs 30 --model valIterBatch --stepsize 0.001 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 --warmstart ./results/grid28_VIN.pk | tee -a ./results/out_grid28_VIN.txt ;
19 |
20 |
--------------------------------------------------------------------------------
/scripts/nips_gridworld_experiments_VIN_untied.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Script for running icml gridworld experiments with VIN -res networks
3 | # 8x8 map
4 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./nips16results/gridworld/grid8_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.01 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 | tee -a ./nips16results/gridworld/out_grid8_VIN_untied.txt ;
5 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./nips16results/gridworld/grid8_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.005 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid8_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid8_VIN_untied.txt ;
6 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./nips16results/gridworld/grid8_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.002 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid8_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid8_VIN_untied.txt ;
7 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_8.mat --output ./nips16results/gridworld/grid8_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.001 --imsize 8 --reg 0.0 --k 10 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid8_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid8_VIN_untied.txt ;
8 | # 16x16 map
9 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.01 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied.txt ;
10 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid16_VIN_untied.txt ;
11 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.002 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid16_VIN_untied.txt ;
12 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.001 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid16_VIN_untied.txt ;
13 | # 28x28 map
14 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.01 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 | tee -a ./nips16results/gridworld/out_grid28_VIN_untied.txt ;
15 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.005 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid28_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid28_VIN_untied.txt ;
16 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.002 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid28_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid28_VIN_untied.txt ;
17 | THEANO_FLAGS='floatX=float32,device=cpu' python NN_run_training.py --input ./data/gridworld_28.mat --output ./nips16results/gridworld/grid28_VIN_untied.pk --epochs 30 --model valIterBatchUntied --stepsize 0.001 --imsize 28 --reg 0.0 --k 36 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid28_VIN_untied.pk | tee -a ./nips16results/gridworld/out_grid28_VIN_untied.txt ;
18 |
19 |
--------------------------------------------------------------------------------
/scripts/nips_gridworld_experiments_VIN_untied_data_fraction.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Script for running icml gridworld experiments with VIN networks
3 | # 16x16 map
4 |
5 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.01 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_02_data.txt ;
6 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_02_data.txt ;
7 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.002 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_02_data.txt ;
8 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.001 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied_02_data.pk --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_02_data.txt ;
9 |
10 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_02_data.pk --epochs 30 --model valIterBatch --stepsize 0.01 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_02_data.txt ;
11 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_02_data.pk --epochs 30 --model valIterBatch --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_02_data.pk --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_02_data.txt ;
12 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_02_data.pk --epochs 30 --model valIterBatch --stepsize 0.002 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_02_data.pk --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_02_data.txt ;
13 | #THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_02_data.pk --epochs 30 --model valIterBatch --stepsize 0.001 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_02_data.pk --data_fraction 0.2 | tee -a ./nips16results/gridworld/out_grid16_VIN_02_data.txt ;
14 |
15 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.01 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_05_data.txt ;
16 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_05_data.txt ;
17 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_05_data.txt ;
18 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --epochs 30 --model valIterBatchUntied --stepsize 0.001 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_untied_05_data.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_untied_05_data.txt ;
19 |
20 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_05_data.pk --epochs 30 --model valIterBatch --stepsize 0.01 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_05_data.txt ;
21 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_05_data.pk --epochs 30 --model valIterBatch --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_05_data.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_05_data.txt ;
22 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_05_data.pk --epochs 30 --model valIterBatch --stepsize 0.005 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_05_data.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_05_data.txt ;
23 | THEANO_FLAGS='floatX=float32,device=gpu' python NN_run_training.py --input ./data/gridworld_16.mat --output ./nips16results/gridworld/grid16_VIN_05_data.pk --epochs 30 --model valIterBatch --stepsize 0.001 --imsize 16 --reg 0.0 --k 20 --batchsize 12 --statebatchsize 10 --warmstart ./nips16results/gridworld/grid16_VIN_05_data.pk --data_fraction 0.5 | tee -a ./nips16results/gridworld/out_grid16_VIN_05_data.txt ;
24 |
--------------------------------------------------------------------------------
/test_network.m:
--------------------------------------------------------------------------------
1 | function [optimal_lengths,pred_lengths] = test_network(model, weight_file, test_file, imsize, k)
2 | % script to evaluate success rate of network on a test-set of trajectories
3 |
4 | tmp = py.convNN.convNN; clear tmp; % to load Python
5 | size_1 = imsize(1); size_2 = imsize(2);
6 | if strcmp(model,'VIN')
7 | nn = py.convBatch.convBatch(pyargs('im_size',int32([size_1,size_2]),'k',int32(k),'batchsize',int32(1),'statebatchsize',int32(1)));
8 | elseif strcmp(model,'untiedVIN')
9 | nn = py.vin_untied.vin_untied(pyargs('im_size',int32([size_1,size_2]),'k',int32(k),'batchsize',int32(1),'statebatchsize',int32(1)));
10 | elseif strcmp(model,'FCN')
11 | nn = py.FCN.fcn(pyargs('im_size',int32([size_1,size_2]),'batchsize',int32(1),'statebatchsize',int32(1)));
12 | elseif strcmp(model,'CNN')
13 | nn = py.CNN.cnn(pyargs('im_size',int32([size_1,size_2]),'batchsize',int32(1)));
14 | end
15 | nn.load_weights(pyargs('infile',weight_file));
16 | load(test_file);
17 | %% Evaluate NN
18 | % Predict trajectories in closed-loop, and compare with shortest path
19 | Ndomains = size(all_im_data,1); % number of domains
20 |
21 | % containers for data
22 | optimal_lengths = zeros(Ndomains,1);
23 | pred_lengths = zeros(Ndomains,1);
24 | no_obs_im = ones(size_1,size_2);
25 | for dom = 1:Ndomains
26 | goal = all_states_xy{dom}(end,:);
27 | start = all_states_xy{dom}(1,:);
28 | optimal_lengths(dom) = length(all_states_xy{dom});
29 | im = reshape(all_im_data(dom,:),size_1,size_2);
30 | G = Gridworld_Graph8(im,goal(1),goal(2));
31 | G_no_obs = Gridworld_Graph8(no_obs_im,goal(1),goal(2));
32 | value_prior = reshape(all_value_data(dom,:),size_1,size_2);
33 | if ~isempty(all_states_xy{dom}) && size(all_states_xy{dom},1)>1
34 | L = size(all_states_xy{dom},1)*2;
35 | pred_traj = zeros(L,2);
36 | pred_traj(1,:) = all_states_xy{dom}(1,:);
37 | for j = 2:L
38 | % creat current state vector and image vector, and save to file
39 | state_xy_data = uint8([pred_traj(j-1,1)-1, pred_traj(j-1,2)-1]);
40 | im_data = uint8(reshape(im',1,[]));
41 | value_data = uint8(reshape(value_prior',1,[]));
42 | % call NN to predict action from input file
43 | save('test_input.mat','im_data','value_data','state_xy_data');
44 | a = nn.predict(pyargs('input', 'test_input.mat'))+1;
45 | % calculate next state based on action
46 | s = G.map_ind_to_state(pred_traj(j-1,1),pred_traj(j-1,2));
47 | ns = G.sampleNextState(s,a);
48 | [nr,nc] = G.getCoords(ns);
49 | pred_traj(j,2) = nc;
50 | pred_traj(j,1) = nr;
51 | if (nr == goal(1)) && (nc == goal(2))
52 | pred_traj(j+1:end,2) = nc;
53 | pred_traj(j+1:end,1) = nr;
54 | pred_lengths(dom) = j;
55 | break;
56 | end
57 | end
58 | end
59 | disp(Ndomains-dom);
60 | end
61 | end
62 |
--------------------------------------------------------------------------------
/theano_utils.py:
--------------------------------------------------------------------------------
1 | # THEANO NN utils
2 | import numpy as np
3 | import theano
4 | import theano.tensor as T
5 |
6 |
7 | def init_weights_T(*shape):
8 | return theano.shared((np.random.randn(*shape) * 0.01).astype(theano.config.floatX))
9 |
10 |
11 | def conv2D_keep_shape(x, w, image_shape, filter_shape, subsample=(1, 1)):
12 | # crop output to same size as input
13 | fs = T.shape(w)[2] - 1 # this is the filter size minus 1
14 | ims = T.shape(x)[2] # this is the image size
15 | return theano.sandbox.cuda.dnn.dnn_conv(img=x,
16 | kerns=w,
17 | border_mode='full',
18 | subsample=subsample,
19 | )[:, :, fs/2:ims+fs/2, fs/2:ims+fs/2]
20 |
21 |
22 | def rmsprop_updates_T(cost, params, stepsize=0.001, rho=0.9, epsilon=1e-6):
23 | # rmsprop in Theano
24 | grads = T.grad(cost=cost, wrt=params)
25 | updates = []
26 | for p, g in zip(params, grads):
27 | acc = theano.shared(p.get_value() * 0.)
28 | acc_new = rho * acc + (1 - rho) * g ** 2
29 | gradient_scaling = T.sqrt(acc_new + epsilon)
30 | g = g / gradient_scaling
31 | updates.append((acc, acc_new))
32 | updates.append((p, p - stepsize * g))
33 | return updates
34 |
35 |
36 | def flip_filter(w):
37 | if w.ndim == 4:
38 | t = w.copy()
39 | s = t.shape
40 | for i in range(0, s[0]):
41 | for j in range(0, s[1]):
42 | t[i][j] = np.fliplr(t[i][j])
43 | t[i][j] = np.flipud(t[i][j])
44 | return t
45 | else:
46 | return w
47 |
48 |
49 | class ConvLayer(object):
50 | """Pool Layer of a convolutional network, copied from Theano tutorial """
51 | def __init__(self, input_tensor, filter_shape, image_shape, poolsize=(2, 2)):
52 | assert image_shape[1] == filter_shape[1]
53 | self.input = input_tensor
54 | fan_in = np.prod(filter_shape[1:])
55 | fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) //
56 | np.prod(poolsize))
57 |
58 | # initialize weights with random weights
59 | W_bound = np.sqrt(6. / (fan_in + fan_out))
60 | self.W = theano.shared(
61 | np.asarray(np.random.uniform(low=-W_bound, high=W_bound, size=filter_shape),
62 | dtype=theano.config.floatX),
63 | )
64 | b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX)
65 | self.b = theano.shared(value=b_values, borrow=True)
66 |
67 | # convolve input feature maps with filters
68 | conv_out = conv2D_keep_shape(
69 | x=input_tensor,
70 | w=self.W,
71 | image_shape=image_shape,
72 | filter_shape=filter_shape
73 | )
74 |
75 | # downsample each feature map individually, using maxpooling
76 | pooled_out = theano.tensor.signal.pool.pool_2d(
77 | input=conv_out,
78 | ds=poolsize,
79 | ignore_border=True
80 | )
81 |
82 | # add the bias term. Since the bias is a vector (1D array), we first
83 | # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
84 | # thus be broadcasted across mini-batches and feature map
85 | # width & height
86 | self.output = T.nnet.relu(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
87 |
88 | self.out_shape = (image_shape[0], filter_shape[0],
89 | int(np.floor(image_shape[2]/poolsize[0])),
90 | int(np.floor(image_shape[3]/poolsize[1])))
91 |
92 | # store parameters of this layer
93 | self.params = [self.W, self.b]
94 |
95 | # keep track of model input
96 | self.input = input_tensor
97 |
98 |
--------------------------------------------------------------------------------
/util/SP.m:
--------------------------------------------------------------------------------
1 | function [path] = SP(pred,s,t)
2 | % trace-back shortest path from s(ource) to t(arget), with predecessor list
3 | % pred, calculated before-hand
4 | max_len = 1e3;
5 | path = zeros(max_len,1);
6 | i = max_len;
7 | path(i) = t;
8 | while path(i)~=s && i>1
9 | try
10 | path(i-1) = pred(path(i));
11 | i = i-1;
12 | catch
13 | warning('no path found, continuing');
14 | path = [];
15 | return
16 | end
17 | end
18 | if i>=1
19 | path = path(i:end);
20 | else
21 | path = NaN;
22 | end
23 | end
--------------------------------------------------------------------------------
/util/python_ndarray_to_matrix.m:
--------------------------------------------------------------------------------
1 | function m = python_ndarray_to_matrix(p,psize)
2 | cP = cell(p);
3 | flat = cP{1,1}.flatten();
4 | flatlist = flat.tolist();
5 | m = zeros(psize);
6 | ind = 1;
7 | for i = 1:psize(1)
8 | for j = 1:psize(2)
9 | m(i,j) = double(flatlist{ind});
10 | ind = ind+1;
11 | end
12 | end
--------------------------------------------------------------------------------
/util/rand_choose.m:
--------------------------------------------------------------------------------
1 | function res = rand_choose(in_vec)
2 | % sample an element from probability vector in_vec
3 | if size(in_vec,2)==1
4 | in_vec = in_vec';
5 | end
6 |
7 | tmp = [0 cumsum(in_vec)];
8 | q = rand;
9 | res = find(q>tmp(1:end-1) & q0)=1;
10 | if ~doesexist
11 | assignin('caller',name,val);
12 | end
--------------------------------------------------------------------------------
/vin.py:
--------------------------------------------------------------------------------
1 | # VI network using THEANO, takes batches of state input
2 | from NNobj import *
3 | from theano_utils import *
4 |
5 |
6 | class vin(NNobj):
7 | "Class for a neural network that does k iterations of value iteration"
8 | def __init__(self, model="VIN", im_size=[28, 28], dropout=False, devtype="cpu", grad_check=False, reg=0, k=10,
9 | statebatchsize=10, batchsize=128):
10 | self.im_size = im_size # input image size
11 | self.model = model
12 | self.reg = reg # regularization (currently not implemented)
13 | self.k = k # number of VI iterations
14 | self.batchsize = batchsize # batch size for training
15 | self.statebatchsize = statebatchsize # number of state inputs for every image input, since each image is the
16 | # same for many states in the data
17 | np.random.seed(0)
18 | print(model)
19 | theano.config.blas.ldflags = "-L/usr/local/lib -lopenblas"
20 |
21 | # X input : l=2 stacked images: obstacle map and reward function prior
22 | self.X = T.ftensor4(name="X")
23 | # S1,S2 input : state position (vertical and horizontal position)
24 | self.S1 = T.bmatrix("S1") # state first dimension * statebatchsize
25 | self.S2 = T.bmatrix("S2") # state second dimension * statebatchsize
26 | self.y = T.bvector("y") # output action * statebatchsize
27 |
28 | l = 2 # channels in input layer
29 | l_h = 150 # channels in initial hidden layer
30 | l_q = 10 # channels in q layer (~actions)
31 |
32 | self.vin_net = VinBlock(in_x=self.X, in_s1=self.S1, in_s2=self.S2, in_x_channels=l, imsize=self.im_size,
33 | batchsize=self.batchsize, state_batch_size=self.statebatchsize, l_h=l_h, l_q=l_q,
34 | k=self.k)
35 | self.p_of_y = self.vin_net.output
36 | self.params = self.vin_net.params
37 | # Total 1910 parameters
38 |
39 | self.cost = -T.mean(T.log(self.p_of_y)[T.arange(self.y.shape[0]),
40 | self.y], dtype=theano.config.floatX)
41 | self.y_pred = T.argmax(self.p_of_y, axis=1)
42 | self.err = T.mean(T.neq(self.y_pred, self.y.flatten()), dtype=theano.config.floatX)
43 |
44 | self.computeloss = theano.function(inputs=[self.X, self.S1, self.S2, self.y],
45 | outputs=[self.err, self.cost])
46 | self.y_out = theano.function(inputs=[self.X, self.S1, self.S2], outputs=[self.y_pred])
47 |
48 | def run_training(self, input, stepsize=0.01, epochs=10, output='None', batch_size=128, grad_check=True,
49 | profile=False, data_fraction=1):
50 | # run training from input matlab data file, and save test data prediction in output file
51 | # load data from Matlab file, including
52 | # im_data: flattened images
53 | # state_data: concatenated one-hot vectors for each state variable
54 | # state_xy_data: state variable (x,y position)
55 | # label_data: one-hot vector for action (state difference)
56 | matlab_data = sio.loadmat(input)
57 | im_data = matlab_data["batch_im_data"]
58 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0
59 | value_data = matlab_data["batch_value_data"]
60 | state1_data = matlab_data["state_x_data"]
61 | state2_data = matlab_data["state_y_data"]
62 | label_data = matlab_data["batch_label_data"]
63 | ydata = label_data.astype('int8')
64 | Xim_data = im_data.astype(theano.config.floatX)
65 | Xim_data = Xim_data.reshape(-1, 1, self.im_size[0], self.im_size[1])
66 | Xval_data = value_data.astype(theano.config.floatX)
67 | Xval_data = Xval_data.reshape(-1, 1, self.im_size[0], self.im_size[1])
68 | Xdata = np.append(Xim_data, Xval_data, axis=1)
69 | S1data = state1_data.astype('int8')
70 | S2data = state2_data.astype('int8')
71 |
72 | all_training_samples = int(6/7.0*Xdata.shape[0])
73 | training_samples = int(data_fraction * all_training_samples)
74 | Xtrain = Xdata[0:training_samples]
75 | S1train = S1data[0:training_samples]
76 | S2train = S2data[0:training_samples]
77 | ytrain = ydata[0:training_samples]
78 |
79 | Xtest = Xdata[all_training_samples:]
80 | S1test = S1data[all_training_samples:]
81 | S2test = S2data[all_training_samples:]
82 | ytest = ydata[all_training_samples:]
83 | ytest = ytest.flatten()
84 |
85 | sortinds = np.random.permutation(training_samples)
86 | Xtrain = Xtrain[sortinds]
87 | S1train = S1train[sortinds]
88 | S2train = S2train[sortinds]
89 | ytrain = ytrain[sortinds]
90 | ytrain = ytrain.flatten()
91 |
92 | self.updates = rmsprop_updates_T(self.cost, self.params, stepsize=stepsize)
93 | self.train = theano.function(inputs=[self.X, self.S1, self.S2, self.y], outputs=[], updates=self.updates)
94 |
95 | print fmt_row(10, ["Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time"])
96 | for i_epoch in xrange(int(epochs)):
97 | tstart = time.time()
98 | # do training
99 | for start in xrange(0, Xtrain.shape[0], batch_size):
100 | end = start+batch_size
101 | if end <= Xtrain.shape[0]:
102 | self.train(Xtrain[start:end], S1train[start:end], S2train[start:end],
103 | ytrain[start*self.statebatchsize:end*self.statebatchsize])
104 | elapsed = time.time() - tstart
105 | # compute losses
106 | trainerr = 0.
107 | trainloss = 0.
108 | testerr = 0.
109 | testloss = 0.
110 | num = 0
111 | for start in xrange(0, Xtest.shape[0], batch_size):
112 | end = start+batch_size
113 | if end <= Xtest.shape[0]:
114 | num += 1
115 | trainerr_, trainloss_ = self.computeloss(Xtrain[start:end], S1train[start:end], S2train[start:end],
116 | ytrain[start*self.statebatchsize:end*self.statebatchsize])
117 | testerr_, testloss_ = self.computeloss(Xtest[start:end], S1test[start:end], S2test[start:end],
118 | ytest[start*self.statebatchsize:end*self.statebatchsize])
119 | trainerr += trainerr_
120 | trainloss += trainloss_
121 | testerr += testerr_
122 | testloss += testloss_
123 | print fmt_row(10, [i_epoch, trainloss/num, trainerr/num, testloss/num, testerr/num, elapsed])
124 |
125 | def predict(self, input):
126 | # NN output for a single input, read from file
127 | matlab_data = sio.loadmat(input)
128 | im_data = matlab_data["im_data"]
129 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0
130 | state_data = matlab_data["state_xy_data"]
131 | value_data = matlab_data["value_data"]
132 | xim_test = im_data.astype(theano.config.floatX)
133 | xim_test = xim_test.reshape(-1, 1, self.im_size[0], self.im_size[1])
134 | xval_test = value_data.astype(theano.config.floatX)
135 | xval_test = xval_test.reshape(-1, 1, self.im_size[0], self.im_size[1])
136 | x_test = np.append(xim_test, xval_test, axis=1)
137 | s_test = state_data.astype('int8')
138 | s1_test = s_test[:, 0].reshape([1, 1])
139 | s2_test = s_test[:, 1].reshape([1, 1])
140 | out = self.y_out(x_test, s1_test, s2_test)
141 | return out[0][0]
142 |
143 | def predict_value(self, input):
144 | # Value and reward for a single input, read from file
145 | val_pred = theano.function(inputs=[self.X], outputs=[self.vin_net.v])
146 | r_pred = theano.function(inputs=[self.X], outputs=[self.vin_net.r])
147 | matlab_data = sio.loadmat(input)
148 | im_data = matlab_data["im_data"]
149 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0
150 | value_data = matlab_data["value_data"]
151 | xim_test = im_data.astype(theano.config.floatX)
152 | xim_test = xim_test.reshape(-1, 1, self.im_size[0], self.im_size[1])
153 | xval_test = value_data.astype(theano.config.floatX)
154 | xval_test = xval_test.reshape(-1, 1, self.im_size[0], self.im_size[1])
155 | x_test = np.append(xim_test, xval_test, axis=1)
156 | out_v = val_pred(x_test)
157 | out_r = r_pred(x_test)
158 | return [out_v[0][0], out_r[0][0]]
159 |
160 | def load_weights(self, infile="weight_dump.pk"):
161 | dump = pickle.load(open(infile, 'r'))
162 | [n.set_value(p) for n, p in zip(self.params, dump)]
163 |
164 | def save_weights(self, outfile="weight_dump.pk"):
165 | pickle.dump([n.get_value() for n in self.params], open(outfile, 'w'))
166 |
167 |
168 | class VinBlock(object):
169 | """VIN block"""
170 | def __init__(self, in_x, in_s1, in_s2, in_x_channels, imsize, batchsize=128,
171 | state_batch_size=1, l_h=150, l_q=10, k=0):
172 | """
173 | Allocate a VIN block with shared variable internal parameters.
174 |
175 | :type in_x: theano.tensor.dtensor4
176 | :param in_x: symbolic input image tensor, of shape [batchsize, in_x_channels, imsize[0], imsize[1]]
177 | Typically : first channel is image, second is the reward prior.
178 |
179 | :type in_s1: theano.tensor.bmatrix
180 | :param in_s1: symbolic input batches of vertical positions, of shape [batchsize, state_batch_size]
181 |
182 | :type in_s2: theano.tensor.bmatrix
183 | :param in_s2: symbolic input batches of horizontal positions, of shape [batchsize, state_batch_size]
184 |
185 | :type in_x_channels: int32
186 | :param in_x_channels: number of input channels
187 |
188 | :type imsize: tuple or list of length 2
189 | :param imsize: (image height, image width)
190 |
191 | :type batchsize: int32
192 | :param batchsize: batch size
193 |
194 | :type state_batch_size: int32
195 | :param state_batch_size: number of state inputs for each sample
196 |
197 | :type l_h: int32
198 | :param l_h: number of channels in first hidden layer
199 |
200 | :type l_q: int32
201 | :param l_q: number of channels in q layer (~actions)
202 |
203 | :type k: int32
204 | :param k: number of VI iterations (actually, real number of iterations is k+1)
205 |
206 | """
207 | self.bias = theano.shared((np.random.randn(l_h) * 0.01).astype(theano.config.floatX)) # 150 parameters
208 | self.w0 = init_weights_T(l_h, in_x_channels, 3, 3) # 1350 parameters
209 | # initial conv layer over image+reward prior
210 | self.h = conv2D_keep_shape(in_x, self.w0, image_shape=[batchsize, self.w0.shape.eval()[1],
211 | imsize[0], imsize[1]],
212 | filter_shape=self.w0.shape.eval())
213 | self.h = self.h + self.bias.dimshuffle('x', 0, 'x', 'x')
214 |
215 | self.w1 = init_weights_T(1, l_h, 1, 1) # 150 parameters
216 | self.r = conv2D_keep_shape(self.h, self.w1, image_shape=[batchsize, self.w0.shape.eval()[0],
217 | imsize[0], imsize[1]],
218 | filter_shape=self.w1.shape.eval())
219 |
220 | # weights from inputs to q layer (~reward in Bellman equation)
221 | self.w = init_weights_T(l_q, 1, 3, 3) # 90 parameters
222 | # feedback weights from v layer into q layer (~transition probabilities in Bellman equation)
223 | self.w_fb = init_weights_T(l_q, 1, 3, 3) # 90 parameters
224 |
225 | self.q = conv2D_keep_shape(self.r, self.w, image_shape=[batchsize, self.w1.shape.eval()[0],
226 | imsize[0], imsize[1]],
227 | filter_shape=self.w.shape.eval())
228 | self.v = T.max(self.q, axis=1, keepdims=True)
229 |
230 | for i in range(0, k-1):
231 | self.q = conv2D_keep_shape(T.concatenate([self.r, self.v], axis=1), T.concatenate([self.w, self.w_fb],
232 | axis=1),
233 | image_shape=[batchsize, self.w1.shape.eval()[0]+1, imsize[0], imsize[1]],
234 | filter_shape=T.concatenate([self.w, self.w_fb], axis=1).shape.eval())
235 | self.v = T.max(self.q, axis=1, keepdims=True)
236 |
237 | # do one last convolution
238 | self.q = conv2D_keep_shape(T.concatenate([self.r, self.v], axis=1), T.concatenate([self.w, self.w_fb], axis=1),
239 | image_shape=[batchsize, self.w1.shape.eval()[0]+1, imsize[0], imsize[1]],
240 | filter_shape=T.concatenate([self.w, self.w_fb], axis=1).shape.eval())
241 |
242 | # Select the conv-net channels at the state position (S1,S2).
243 | # This intuitively corresponds to each channel representing an action, and the convnet the Q function.
244 | # The tricky thing is we want to select the same (S1,S2) position *for each* channel and for each sample
245 | self.q_out = self.q[T.extra_ops.repeat(T.arange(self.q.shape[0]), state_batch_size), :, in_s1.flatten(),
246 | in_s2.flatten()]
247 |
248 | # softmax output weights
249 | self.w_o = init_weights_T(l_q, 8) # 80 parameters
250 | self.output = T.nnet.softmax(T.dot(self.q_out, self.w_o))
251 |
252 | self.params = [self.w0, self.bias, self.w1, self.w, self.w_fb, self.w_o]
253 |
--------------------------------------------------------------------------------
/vin_untied.py:
--------------------------------------------------------------------------------
1 | # VI network using THEANO, takes batches of state input
2 | from NNobj import *
3 | from theano_utils import *
4 |
5 |
6 | class vin_untied(NNobj):
7 | "Class for a neural network that does k iterations of value iteration"
8 | def __init__(self, model="VIN", im_size=[28, 28], dropout=False, devtype="cpu", grad_check=False, reg=0, k=10,
9 | statebatchsize=10, batchsize=128):
10 | self.im_size = im_size # input image size
11 | self.model = model
12 | self.reg = reg # regularization (currently not implemented)
13 | self.k = k # number of VI iterations
14 | self.batchsize = batchsize # batch size for training
15 | self.statebatchsize = statebatchsize # number of state inputs for every image input, since each image is the
16 | # same for many states in the data
17 | np.random.seed(0)
18 | print(model)
19 | theano.config.blas.ldflags = "-L/usr/local/lib -lopenblas"
20 |
21 | # X input : l=2 stacked images: obstacle map and reward function prior
22 | self.X = T.ftensor4(name="X")
23 | # S1,S2 input : state position (vertical and horizontal position)
24 | self.S1 = T.bmatrix("S1") # state first dimension * statebatchsize
25 | self.S2 = T.bmatrix("S2") # state second dimension * statebatchsize
26 | self.y = T.bvector("y") # output action * statebatchsize
27 |
28 | l = 2 # channels in input layer
29 | l_h = 150 # channels in initial hidden layer
30 | l_q = 10 # channels in q layer (~actions)
31 |
32 | self.vin_net = VinBlock(in_x=self.X, in_s1=self.S1, in_s2=self.S2, in_x_channels=l, imsize=self.im_size,
33 | batchsize=self.batchsize, state_batch_size=self.statebatchsize, l_h=l_h, l_q=l_q,
34 | k=self.k)
35 | self.p_of_y = self.vin_net.output
36 | self.params = self.vin_net.params
37 |
38 | self.cost = -T.mean(T.log(self.p_of_y)[T.arange(self.y.shape[0]),
39 | self.y], dtype=theano.config.floatX)
40 | self.y_pred = T.argmax(self.p_of_y, axis=1)
41 | self.err = T.mean(T.neq(self.y_pred, self.y.flatten()), dtype=theano.config.floatX)
42 |
43 | self.computeloss = theano.function(inputs=[self.X, self.S1, self.S2, self.y],
44 | outputs=[self.err, self.cost])
45 | self.y_out = theano.function(inputs=[self.X, self.S1, self.S2], outputs=[self.y_pred])
46 |
47 | def run_training(self, input, stepsize=0.01, epochs=10, output='None', batch_size=128, grad_check=True,
48 | profile=False, data_fraction=1):
49 | # run training from input matlab data file, and save test data prediction in output file
50 | # load data from Matlab file, including
51 | # im_data: flattened images
52 | # state_data: concatenated one-hot vectors for each state variable
53 | # state_xy_data: state variable (x,y position)
54 | # label_data: one-hot vector for action (state difference)
55 | matlab_data = sio.loadmat(input)
56 | im_data = matlab_data["batch_im_data"]
57 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0
58 | value_data = matlab_data["batch_value_data"]
59 | state1_data = matlab_data["state_x_data"]
60 | state2_data = matlab_data["state_y_data"]
61 | label_data = matlab_data["batch_label_data"]
62 | ydata = label_data.astype('int8')
63 | Xim_data = im_data.astype(theano.config.floatX)
64 | Xim_data = Xim_data.reshape(-1, 1, self.im_size[0], self.im_size[1])
65 | Xval_data = value_data.astype(theano.config.floatX)
66 | Xval_data = Xval_data.reshape(-1, 1, self.im_size[0], self.im_size[1])
67 | Xdata = np.append(Xim_data, Xval_data, axis=1)
68 | S1data = state1_data.astype('int8')
69 | S2data = state2_data.astype('int8')
70 |
71 | all_training_samples = int(6/7.0*Xdata.shape[0])
72 | training_samples = int(data_fraction * all_training_samples)
73 | Xtrain = Xdata[0:training_samples]
74 | S1train = S1data[0:training_samples]
75 | S2train = S2data[0:training_samples]
76 | ytrain = ydata[0:training_samples]
77 |
78 | Xtest = Xdata[all_training_samples:]
79 | S1test = S1data[all_training_samples:]
80 | S2test = S2data[all_training_samples:]
81 | ytest = ydata[all_training_samples:]
82 | ytest = ytest.flatten()
83 |
84 | sortinds = np.random.permutation(training_samples)
85 | Xtrain = Xtrain[sortinds]
86 | S1train = S1train[sortinds]
87 | S2train = S2train[sortinds]
88 | ytrain = ytrain[sortinds]
89 | ytrain = ytrain.flatten()
90 |
91 | self.updates = rmsprop_updates_T(self.cost, self.params, stepsize=stepsize)
92 | self.train = theano.function(inputs=[self.X, self.S1, self.S2, self.y], outputs=[], updates=self.updates)
93 |
94 | print fmt_row(10, ["Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time"])
95 | for i_epoch in xrange(int(epochs)):
96 | tstart = time.time()
97 | # do training
98 | for start in xrange(0, Xtrain.shape[0], batch_size):
99 | end = start+batch_size
100 | if end <= Xtrain.shape[0]:
101 | self.train(Xtrain[start:end], S1train[start:end], S2train[start:end],
102 | ytrain[start*self.statebatchsize:end*self.statebatchsize])
103 | elapsed = time.time() - tstart
104 | # compute losses
105 | trainerr = 0.
106 | trainloss = 0.
107 | testerr = 0.
108 | testloss = 0.
109 | num = 0
110 | for start in xrange(0, Xtest.shape[0], batch_size):
111 | end = start+batch_size
112 | if end <= Xtest.shape[0]:
113 | num += 1
114 | trainerr_, trainloss_ = self.computeloss(Xtrain[start:end], S1train[start:end], S2train[start:end],
115 | ytrain[start*self.statebatchsize:end*self.statebatchsize])
116 | testerr_, testloss_ = self.computeloss(Xtest[start:end], S1test[start:end], S2test[start:end],
117 | ytest[start*self.statebatchsize:end*self.statebatchsize])
118 | trainerr += trainerr_
119 | trainloss += trainloss_
120 | testerr += testerr_
121 | testloss += testloss_
122 | print fmt_row(10, [i_epoch, trainloss/num, trainerr/num, testloss/num, testerr/num, elapsed])
123 |
124 | def predict(self, input):
125 | # NN output for a single input, read from file
126 | matlab_data = sio.loadmat(input)
127 | im_data = matlab_data["im_data"]
128 | im_data = (im_data - 1)/255 # obstacles = 1, free zone = 0
129 | state_data = matlab_data["state_xy_data"]
130 | value_data = matlab_data["value_data"]
131 | xim_test = im_data.astype(theano.config.floatX)
132 | xim_test = xim_test.reshape(-1, 1, self.im_size[0], self.im_size[1])
133 | xval_test = value_data.astype(theano.config.floatX)
134 | xval_test = xval_test.reshape(-1, 1, self.im_size[0], self.im_size[1])
135 | x_test = np.append(xim_test, xval_test, axis=1)
136 | s_test = state_data.astype('int8')
137 | s1_test = s_test[:, 0].reshape([1, 1])
138 | s2_test = s_test[:, 1].reshape([1, 1])
139 | out = self.y_out(x_test, s1_test, s2_test)
140 | return out[0][0]
141 |
142 | def load_weights(self, infile="weight_dump.pk"):
143 | dump = pickle.load(open(infile, 'r'))
144 | [n.set_value(p) for n, p in zip(self.params, dump)]
145 |
146 | def save_weights(self, outfile="weight_dump.pk"):
147 | pickle.dump([n.get_value() for n in self.params], open(outfile, 'w'))
148 |
149 |
150 | class VinBlock(object):
151 | """VIN block"""
152 | def __init__(self, in_x, in_s1, in_s2, in_x_channels, imsize, batchsize=128,
153 | state_batch_size=1, l_h=150, l_q=10, k=0):
154 | """
155 | Allocate a VIN block with shared variable internal parameters.
156 |
157 | :type in_x: theano.tensor.dtensor4
158 | :param in_x: symbolic input image tensor, of shape [batchsize, in_x_channels, imsize[0], imsize[1]]
159 | Typically : first channel is image, second is the reward prior.
160 |
161 | :type in_s1: theano.tensor.bmatrix
162 | :param in_s1: symbolic input batches of vertical positions, of shape [batchsize, state_batch_size]
163 |
164 | :type in_s2: theano.tensor.bmatrix
165 | :param in_s2: symbolic input batches of horizontal positions, of shape [batchsize, state_batch_size]
166 |
167 | :type in_x_channels: int32
168 | :param in_x_channels: number of input channels
169 |
170 | :type imsize: tuple or list of length 2
171 | :param imsize: (image height, image width)
172 |
173 | :type batchsize: int32
174 | :param batchsize: batch size
175 |
176 | :type state_batch_size: int32
177 | :param state_batch_size: number of state inputs for each sample
178 |
179 | :type l_h: int32
180 | :param l_h: number of channels in first hidden layer
181 |
182 | :type l_q: int32
183 | :param l_q: number of channels in q layer (~actions)
184 |
185 | :type k: int32
186 | :param k: number of VI iterations (actually, real number of iterations is k+1)
187 |
188 | """
189 | self.bias = theano.shared((np.random.randn(l_h) * 0.01).astype(theano.config.floatX)) # 150 parameters
190 | self.w0 = init_weights_T(l_h, in_x_channels, 3, 3) # 1350 parameters
191 | # initial conv layer over image+reward prior
192 | self.h = conv2D_keep_shape(in_x, self.w0, image_shape=[batchsize, self.w0.shape.eval()[1],
193 | imsize[0], imsize[1]],
194 | filter_shape=self.w0.shape.eval())
195 | self.h = self.h + self.bias.dimshuffle('x', 0, 'x', 'x')
196 |
197 | self.w1 = init_weights_T(1, l_h, 1, 1) # 150 parameters
198 | self.r = conv2D_keep_shape(self.h, self.w1, image_shape=[batchsize, self.w0.shape.eval()[0],
199 | imsize[0], imsize[1]],
200 | filter_shape=self.w1.shape.eval())
201 |
202 | # weights from inputs to q layer (~reward in Bellman equation)
203 | self.w_list = [init_weights_T(l_q, 1, 3, 3) for i in range(0,k+1)] # 90 parameters
204 | # feedback weights from v layer into q layer (~transition probabilities in Bellman equation)
205 | self.w_fb_list = [init_weights_T(l_q, 1, 3, 3) for i in range(0,k)] # 90 parameters
206 |
207 | self.q = conv2D_keep_shape(self.r, self.w_list[0], image_shape=[batchsize, self.w1.shape.eval()[0],
208 | imsize[0], imsize[1]],
209 | filter_shape=self.w_list[0].shape.eval())
210 | self.v = T.max(self.q, axis=1, keepdims=True)
211 |
212 | for i in range(0, k-1):
213 | self.q = conv2D_keep_shape(T.concatenate([self.r, self.v], axis=1), T.concatenate([self.w_list[i+1], self.w_fb_list[i]],
214 | axis=1),
215 | image_shape=[batchsize, self.w1.shape.eval()[0]+1, imsize[0], imsize[1]],
216 | filter_shape=T.concatenate([self.w_list[i+1], self.w_fb_list[i]], axis=1).shape.eval())
217 | self.v = T.max(self.q, axis=1, keepdims=True)
218 |
219 | # do one last convolution
220 | self.q = conv2D_keep_shape(T.concatenate([self.r, self.v], axis=1), T.concatenate([self.w_list[k], self.w_fb_list[k-1]], axis=1),
221 | image_shape=[batchsize, self.w1.shape.eval()[0]+1, imsize[0], imsize[1]],
222 | filter_shape=T.concatenate([self.w_list[k], self.w_fb_list[k-1]], axis=1).shape.eval())
223 |
224 | # Select the conv-net channels at the state position (S1,S2).
225 | # This intuitively corresponds to each channel representing an action, and the convnet the Q function.
226 | # The tricky thing is we want to select the same (S1,S2) position *for each* channel and for each sample
227 | self.q_out = self.q[T.extra_ops.repeat(T.arange(self.q.shape[0]), state_batch_size), :, in_s1.flatten(),
228 | in_s2.flatten()]
229 |
230 | # softmax output weights
231 | self.w_o = init_weights_T(l_q, 8) # 80 parameters
232 | self.output = T.nnet.softmax(T.dot(self.q_out, self.w_o))
233 |
234 | self.params = self.w_list + self.w_fb_list + [self.w0, self.bias, self.w1, self.w_o]
235 |
--------------------------------------------------------------------------------