├── README.md
├── lasagne_utils.py
├── numpy_dataset.py
├── LICENSE
├── cnn.py
├── data.py
├── cnn_test.py
└── augmentation.py


/README.md:
--------------------------------------------------------------------------------
 1 | Seed-per-pod estimation for plant breeding using deep learning
 2 | ==============================================================
 3 | Uzal, L.C., Grinblat, G.L., Namías, R., Larese, M.G., Bianchi, J.S., Morandi, E.N. and Granitto, P.M.
 4 | 
 5 | 
 6 | Code for reproducing the CNN experiments. Execute with
 7 | 
 8 | python cnn_test.py
 9 | 
10 | 


--------------------------------------------------------------------------------
/lasagne_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import lasagne
 3 | 
 4 | 
 5 | def save_network_values(network, filepath):
 6 |     np.savez(filepath, *lasagne.layers.get_all_param_values(network))
 7 | 
 8 | 
 9 | def load_network_values(network, filepath):
10 |     with np.load(filepath) as f:
11 |         param_values = [f['arr_%d' % i] for i in range(len(f.files))]
12 |     lasagne.layers.set_all_param_values(network, param_values)
13 | 
14 | 


--------------------------------------------------------------------------------
/numpy_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Numpy_dataset(object):
 6 | 
 7 |     def __init__(self, data_dir, file_name='train.npz'):
 8 | 
 9 |         file_path = os.path.join(data_dir,file_name)
10 |         with np.load(file_path) as data:
11 |             self.X = data['X']
12 |             self.y = data['y'].astype(np.uint8)
13 |             self.samples_names = data["samples_names"]
14 |             self.groups2 = data["fold_number"]
15 |             self.groups = data["fold_session"]
16 |         if len(self.X.shape) == 3:
17 |             self.X = self.X.reshape(-1,1,self.X.shape[1],self.X.shape[2])
18 | 
19 |         self.y = self.y - self.y.min()
20 |         self.nclasses = self.y.max()+1
21 | 
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | COPYRIGHT
 2 | 
 3 | All contributions by Lucas C. Uzal:
 4 | Copyright (c) 2017, Lucas C. Uzal.
 5 | All rights reserved.
 6 | 
 7 | Contributions in augmentation.py by François Chollet:
 8 | Copyright (c) 2015, François Chollet.
 9 | All rights reserved.
10 | 
11 | All other contributions:
12 | Copyright (c) 2015 - 2017, the respective contributors.
13 | All rights reserved.
14 | 
15 | LICENSE
16 | 
17 | The MIT License (MIT)
18 | 
19 | Permission is hereby granted, free of charge, to any person obtaining a copy
20 | of this software and associated documentation files (the "Software"), to deal
21 | in the Software without restriction, including without limitation the rights
22 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
23 | copies of the Software, and to permit persons to whom the Software is
24 | furnished to do so, subject to the following conditions:
25 | 
26 | The above copyright notice and this permission notice shall be included in all
27 | copies or substantial portions of the Software.
28 | 
29 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 | SOFTWARE.
36 | 
37 | 


--------------------------------------------------------------------------------
/cnn.py:
--------------------------------------------------------------------------------
 1 | import theano.tensor as T
 2 | import lasagne
 3 | from lasagne.layers import InputLayer, DenseLayer, Conv2DLayer, NonlinearityLayer, GlobalPoolLayer
 4 | from lasagne.layers.normalization import BatchNormLayer
 5 | from lasagne.layers import FlattenLayer
 6 | from lasagne.nonlinearities import softmax, linear
 7 | # from lasagne.nonlinearities import rectify as relu
 8 | from lasagne.nonlinearities import leaky_rectify as lrelu
 9 | 
10 | def build_representation(img_size=[64,64], nchannels=3, ndf=64,
11 |                      vis_filter_size=5, filters_size=5, global_pool=True, strides = [2, 2, 2, 2]):
12 |     print 'cnn'
13 |     #if img_size[0] % 32 is not 0 or img_size[1]!=img_size[0]:
14 |     #    # La imagen debe ser cuadrada y multiplo de 32
15 |     #    raise 1
16 | 
17 |     depth = len(strides)
18 |     w_sizes = [filters_size] * depth
19 |     w_sizes[0] = vis_filter_size
20 | 
21 |     X = InputLayer((None, nchannels, img_size[0], img_size[1]))
22 |     ishape = lasagne.layers.get_output_shape(X)
23 |     # print ishape
24 | 
25 |     wf = 1
26 |     h = X
27 |     for i, s in enumerate(strides):
28 |         wf *= s
29 |         filter_size = w_sizes[i]
30 |         x1 = Conv2DLayer(h, num_filters=wf * ndf, filter_size=filter_size, stride=s, pad='same',
31 |                          b=None, nonlinearity=None, name='cnn_l%d_Conv'%i)
32 |         x2 = BatchNormLayer(x1, name='cnn_l%d_BN'%i)
33 |         h = NonlinearityLayer(x2, nonlinearity=lrelu)
34 |         ishape = lasagne.layers.get_output_shape(x1)
35 |         # print ishape
36 | 
37 |     if global_pool:
38 |         h = GlobalPoolLayer(h, pool_function=T.max, name='cnn_last_code')
39 |     else:
40 |         h = FlattenLayer(h, name='cnn_last_code')
41 | 
42 | 
43 |     return h
44 | 
45 | def build_classifier(nclasses, img_size=[64,64], nchannels=3, ndf=64,
46 |                      vis_filter_size=5, filters_size=5, global_pool=True, strides = [2, 2, 2, 2]):
47 | 
48 |     h = build_representation(img_size, nchannels, ndf, vis_filter_size, filters_size, global_pool, strides)
49 | 
50 |     y = DenseLayer(h, num_units=nclasses, nonlinearity=softmax, name='softmax')
51 | 
52 |     return y
53 | 
54 | 
55 | def build_classifier_and_regressor(nclasses, img_size=[64, 64], nchannels=3, ndf=64,
56 |                      vis_filter_size=5, filters_size=5, global_pool=True, strides=[2, 2, 2, 2]):
57 |     h = build_representation(img_size, nchannels, ndf, vis_filter_size, filters_size, global_pool, strides)
58 | 
59 |     c = DenseLayer(h, num_units=nclasses, nonlinearity=softmax, name='softmax')
60 | 
61 |     r = DenseLayer(h, num_units=1, nonlinearity=linear, name='linear_out')
62 | 
63 |     return c, r
64 | 


--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import theano
  3 | 
  4 | def floatX(X):
  5 |     return np.asarray(X, dtype=theano.config.floatX)
  6 | 
  7 | def OneHot(X, n=None, negative_class=0.):
  8 |     X = np.asarray(X).flatten()
  9 |     if n is None:
 10 |         n = np.max(X) + 1
 11 |     Xoh = np.ones((len(X), n)) * negative_class
 12 |     Xoh[np.arange(len(X)), X] = 1.
 13 |     return floatX(Xoh)
 14 | 
 15 | 
 16 | def get_batch_indexes(iter, batch_size, max_size):
 17 |     n1 = (iter * batch_size) % max_size
 18 |     n2 = ((iter + 1) * batch_size - 1) % max_size + 1
 19 |     if n1 > n2:
 20 |         return np.concatenate((np.arange(n1,max_size),np.arange(0,n2)))
 21 |     else:
 22 |         return np.arange(n1,n2)
 23 | 
 24 | def scale_data(X, max, min, new_min=-1.0, new_max=1.0):
 25 |     scale = float(max - min)
 26 |     new_scale = float(new_max - new_min)
 27 |     return floatX((X - min) * new_scale / scale + new_min)
 28 | 
 29 | class Dataset(object):
 30 | 
 31 |     def __init__(self, X_train=None, y_train=None,
 32 |                  X_valid=None, y_valid=None,
 33 |                  X_test =None, y_test=None,
 34 |                  X_unlab=None, x_min=0, x_max=255, ymin=0.00, ymax=1.00, nclasses=None, **kw):
 35 | 
 36 |         self.__dict__.update(kw)
 37 |         del kw # We don't want this in attrs
 38 |         self.__dict__.update(locals())
 39 |         del self.self # We don't need this either
 40 |         if X_train is not None:
 41 |             self._nchannels = X_train.shape[1]
 42 |             self.first_class = int(y_train.min()) # para corregir si el numero de clase arranca de 1 o 0
 43 |             if nclasses is None:
 44 |                 nclasses = int(y_train.max()) - self.first_class + 1
 45 |         elif X_unlab is not None:
 46 |             self._nchannels = X_unlab.shape[1]
 47 |         else:
 48 |             self._nchannels = None
 49 | 
 50 | 
 51 |     def get_train_batch(self, index, batch_size, one_hot=True):
 52 |         X = self._get_batch(self.X_train,index,batch_size)
 53 |         y = self._get_batch(self.y_train,index,batch_size)
 54 | 
 55 |         if one_hot:
 56 |             y = floatX(OneHot(y.astype(int)-self.first_class,n=self.nclasses))
 57 |             return X, self.smooth_labels(y,self.ymin,self.ymax)
 58 |         else:
 59 |             return X, y.astype(int)-self.first_class
 60 | 
 61 | 
 62 |     def get_valid_batch(self, index, batch_size, one_hot=True):
 63 |         X = self._get_batch(self.X_valid,index,batch_size)
 64 |         y = self._get_batch(self.y_valid,index,batch_size)
 65 |         
 66 |         if one_hot:
 67 |             y = floatX(OneHot(y.astype(int)-self.first_class,n=self.nclasses))
 68 |             return X, self.smooth_labels(y,self.ymin,self.ymax)
 69 |         else:
 70 |             return X, y.astype(int)-self.first_class
 71 | 
 72 |     def get_test_batch(self, index, batch_size, one_hot=True):
 73 |         X = self._get_batch(self.X_test,index,batch_size)
 74 |         y = self._get_batch(self.y_test,index,batch_size)
 75 |         
 76 |         if one_hot:
 77 |             y = floatX(OneHot(y.astype(int)-self.first_class,n=self.nclasses))
 78 |             return X, self.smooth_labels(y,self.ymin,self.ymax)
 79 |         else:
 80 |             return X, y.astype(int)-self.first_class
 81 | 
 82 |     def get_unlab_batch(self, index, batch_size):
 83 |         X = self._get_batch(self.X_unlab,index,batch_size)
 84 |         return X
 85 | 
 86 |     def _get_batch(self, X, index, batch_size):
 87 |         size = X.shape[0]
 88 |         n1 = (index*batch_size)%size
 89 |         n2 = ((index+1)*batch_size-1)%size+1
 90 |         if n1>n2:
 91 |             return floatX(np.concatenate((X[n1:], X[:n2])))
 92 |         else:
 93 |             return floatX(X[n1:n2])
 94 | 
 95 |     def scale_data(self, X, new_min=-1.0, new_max=1.0):
 96 |         self.new_min = new_min
 97 |         self.new_max = new_max
 98 |         scale = self.x_max - self.x_min
 99 |         new_scale = new_max - new_min
100 |         return floatX((X-self.x_min)*new_scale/scale+new_min)
101 | 
102 |     def smooth_labels(self, y, ymin=0.1, ymax=0.9):
103 |         return y*(ymax-ymin)+ymin
104 | 
105 |     def image_crop(self, X, ph, pw=None, random_state=None):
106 | 
107 |         if pw is None:
108 |             pw = ph
109 | 
110 |         h, w = X.shape[2:4]
111 | 
112 |         if h == ph and w == pw:
113 |             return X
114 | 
115 |         if random_state:
116 |             j = random_state.random_integers(0, h - ph)
117 |             i = random_state.random_integers(0, w - pw)
118 |         else:
119 |             j = int(round((h - ph)/2.))
120 |             i = int(round((w - pw)/2.))
121 | 
122 |         return X[:,:,j:j+ph, i:i+pw]
123 | 
124 |     def inv_scale_data(self, X, old_min=-1.0, old_max=1.0):
125 |         scale = self.x_max - self.x_min
126 |         old_scale = old_max - old_min
127 |         return floatX((X-old_min)*scale/old_scale+self.x_min)
128 | 
129 | 


--------------------------------------------------------------------------------
/cnn_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2.7
  2 | 
  3 | import os
  4 | import argparse
  5 | import pickle
  6 | from six.moves import urllib
  7 | import tarfile
  8 | from time import time
  9 | import numpy as np
 10 | from numpy.random import RandomState
 11 | import theano
 12 | import theano.tensor as T
 13 | import lasagne
 14 | from lasagne.updates import adam
 15 | from lasagne.init import Normal
 16 | 
 17 | from lasagne_utils import load_network_values, save_network_values
 18 | 
 19 | from numpy_dataset import Numpy_dataset
 20 | from data import get_batch_indexes, scale_data, Dataset, floatX
 21 | from augmentation import Augmentation, zoom_and_crop
 22 | from cnn import build_classifier
 23 | from sklearn.model_selection import GroupKFold
 24 | 
 25 | from lasagne.objectives import categorical_crossentropy
 26 | from lasagne.regularization import regularize_layer_params, l2
 27 | 
 28 | from sklearn.metrics import confusion_matrix
 29 | 
 30 | def build_predict_function(classifier, X=T.tensor4()):
 31 |     test_clsX = lasagne.layers.get_output(classifier, X, deterministic=True)
 32 |     return theano.function(inputs=[X],outputs=test_clsX,updates=None)
 33 | 
 34 | def mean_acc_by_fold(y_true, y_pred, groups):
 35 | 
 36 |     n_groups = max(groups) + 1
 37 |     n_splits = n_groups if n_groups <= 10 else 5
 38 |     gkf = GroupKFold(n_splits=n_splits)
 39 |     acc = []
 40 |     for _, group_indexes in gkf.split(y_pred, y_true, groups=groups):
 41 |         comp = y_true[group_indexes]==y_pred[group_indexes]
 42 |         acc.append(comp.mean())
 43 |     acc = np.asarray(acc)
 44 |     return acc.mean(), acc.std()
 45 | 
 46 | 
 47 | 
 48 | 
 49 | def train_model(params, data, test_groups, samples_dir='samples', best_model_dir='best_model', grid_size=8, verbosity=0):
 50 |     batch_size = params["batch_size"]
 51 |     n_iter = params["n_iter"]
 52 |     lr = 10.**params["log10lr"]
 53 |     iter_save = params["iter_save"]
 54 |     model_width = params["model_width"]
 55 |     seed = params["seed"]
 56 |     wd = np.asscalar(floatX(10.**params["log10wd"]))
 57 |     rotation_range = params["rotation_range"]
 58 |     width_shift_range = params["width_shift_range"]
 59 |     height_shift_range = params["height_shift_range"]
 60 |     shear_range = params["shear_range"]
 61 |     zoom_range = (params["zoom_range_center"]-params["zoom_range_range"]*0.5,
 62 |                   params["zoom_range_center"]+params["zoom_range_range"]*0.5)
 63 |     random_curves_strength = params["random_curves_strength"]
 64 |     n_layers_per_block = params["n_layers_per_block"]
 65 |     n_blocks = params["n_blocks"]
 66 | 
 67 | 
 68 |     test_size = data.X_test.shape[0]
 69 |     valid_size = data.X_valid.shape[0]
 70 |     train_size = data.X_train.shape[0]
 71 |     nchannels = data.X_train.shape[1]
 72 |     img_shape = data.X_train.shape[2:4]
 73 |     nclasses = data.nclasses
 74 |     class_labels = None  # TODO
 75 |     y_test_pred = np.zeros((test_size + batch_size,))
 76 |     assert(test_size==data.y_test.shape[0])
 77 | 
 78 |     # MODELS
 79 |     strides = ([1] * (n_layers_per_block - 1) + [2]) * n_blocks
 80 |     classifier = build_classifier(nclasses=nclasses,
 81 |                                   img_size=img_shape,
 82 |                                   nchannels=nchannels,
 83 |                                   ndf=model_width,
 84 |                                   global_pool=True,
 85 |                                   strides=strides)
 86 | 
 87 |     it_best = 0
 88 |     best_acc = 0.0
 89 |     if n_iter>0:
 90 | 
 91 |         if not os.path.exists(best_model_dir):
 92 |             os.makedirs(best_model_dir)
 93 |         if not os.path.exists(samples_dir):
 94 |             os.makedirs(samples_dir)
 95 | 
 96 |         augm = Augmentation(rotation_range=rotation_range,  # In degrees
 97 |                             width_shift_range=width_shift_range,
 98 |                             height_shift_range=height_shift_range,
 99 |                             horizontal_flip=True,
100 |                             shear_range=shear_range,  # In radians
101 |                             zoom_range=zoom_range,  # >1 zoom out; <1 zoom in
102 |                             channel_shift_range=0.0,  # 0-255
103 |                             fill_mode='constant',  # 'nearest',
104 |                             random_curves_strength=random_curves_strength,
105 |                             seed=seed)
106 | 
107 |         if verbosity>0:
108 |             print  data.X_train.shape, img_shape
109 | 
110 |         # SYMBOLIC INPUTS
111 |         X = T.tensor4()
112 |         y = T.matrix()
113 | 
114 | 
115 |         clsX = lasagne.layers.get_output(classifier, X)
116 | 
117 |         # LOSS FUNCTIONS
118 |         weight_decay = regularize_layer_params(classifier, l2)
119 |         cls_loss = categorical_crossentropy(clsX,y).mean() +  wd * weight_decay
120 | 
121 |         # PARAMS
122 |         cls_params = lasagne.layers.get_all_params(classifier, trainable=True)
123 | 
124 |         # UPDATES
125 |         cls_updates = adam(cls_loss, cls_params, learning_rate= floatX(lr), beta1=floatX(0.9), beta2=floatX(0.999))
126 | 
127 |         # TRAINING FUNCTIONS
128 |         if verbosity>0:
129 |             print 'COMPILING TRAINING FUNCTIONS'
130 |         t = time()
131 |         train_cls = theano.function([X, y], cls_loss, updates=cls_updates)
132 |         if verbosity>0:
133 |             print '%.2f seconds to compile theano functions' % (time() - t)
134 | 
135 |         # MONITOR
136 |         if verbosity > 0:
137 |             print 'COMPILING MONITOR FUNCTIONS'
138 |         t = time()
139 |         predict = build_predict_function(classifier, X)
140 |         if verbosity > 0:
141 |             print '%.2f seconds to compile theano functions' % (time() - t)
142 | 
143 |         if verbosity > 0:
144 |             print "starting training"
145 |             with open(best_model_dir + '/accuracies.log', 'w') as f:
146 |                 f.write('# iter data_seen epoch cls_loss train_acc valid_acc')
147 |                 f.write('\n')
148 |             with open(best_model_dir + '/best_acc.log', 'w') as f:
149 |                 f.write('# iter data_seen epoch valid_acc test_acc test_acc_mean test_acc_std')
150 |                 f.write('\n')
151 | 
152 |         n_epochs =  n_iter* batch_size/train_size
153 | 
154 |         last_it = 0
155 |         t = time()
156 |         for it in xrange(0, n_iter):
157 |             epoch = it* batch_size/train_size
158 | 
159 |             X_batch, y_batch = data.get_train_batch(it, batch_size)
160 |             X_batch = augm.random_transform(X_batch)
161 |             X_batch = data.scale_data(X_batch)
162 |             cls_loss_value = train_cls(X_batch, y_batch)
163 | 
164 |             if (it %  iter_save == 0) or (it % 10 == 0 and it <  iter_save):
165 |                 y_pred = np.argmax(predict(X_batch), axis=1)
166 |                 y_true = np.argmax(y_batch, axis=1)
167 |                 train_acc = (y_pred == y_true).mean()
168 | 
169 |                 y_pred = np.asarray([])
170 |                 y_true = np.asarray([])
171 |                 for valit in range(valid_size/ batch_size):
172 |                     X_valid, y_valid = data.get_valid_batch(valit, batch_size)
173 |                     X_valid = data.scale_data(X_valid)
174 |                     y_pred = np.append(y_pred, np.argmax(predict(X_valid), axis=1))
175 |                     y_true = np.append(y_true, np.argmax(y_valid, axis=1))
176 |                 valid_acc = (y_pred == y_true).mean()
177 |                 valid_cm = confusion_matrix(y_true,y_pred,class_labels)
178 | 
179 |                 if verbosity>0:
180 |                     print train_acc, valid_acc
181 | 
182 |                 if best_acc<valid_acc:
183 |                     best_acc = valid_acc
184 |                     it_best = it + 1
185 |                     if verbosity>2:
186 |                         save_network_values(classifier, os.path.join(best_model_dir, 'classifier.npz'))
187 |                         pickle.dump(params,
188 |                                     open("%s/ p" % best_model_dir, "wb"))
189 | 
190 |                     for testit in range(test_size / batch_size + 1):
191 |                         X_test, y_test = data.get_test_batch(testit,  batch_size)
192 |                         X_test = data.scale_data(X_test)
193 |                         pred = predict(X_test)
194 |                         y_test_pred[testit * batch_size:(testit + 1) * batch_size] = np.argmax(pred, axis=1)
195 | 
196 | 
197 |                     y_p = y_test_pred[:test_size]
198 |                     y_t = data.y_test
199 |                     test_acc_mean, test_acc_std = mean_acc_by_fold(y_p, y_t, test_groups)
200 |                     test_acc = (y_p == y_t).mean()
201 |                     test_cm = confusion_matrix(y_t, y_p, class_labels)
202 | 
203 |                     with open(best_model_dir + '/best_acc.log', 'a') as f:
204 |                         np.savetxt(f, [[it + 1, (it + 1) *  batch_size, epoch,
205 |                                         valid_acc, test_acc, test_acc_mean, test_acc_std]], fmt='%1.3e')
206 |                     if verbosity>0:
207 |                         print "Best valid accuracy reached: %2.2f%%  "%(valid_acc*100)
208 |                         print "Test Acc.: %2.2f%%"%(test_acc*100)
209 |                         print "Mean Test Acc.: %1.3f +/- %1.3f"%(test_acc_mean,test_acc_std)
210 |                         print "valid CM\n", valid_cm
211 |                         print "test CM\n", test_cm
212 | 
213 |                 with open(best_model_dir + '/accuracies.log', 'a') as f:
214 |                     np.savetxt(f, [[it+1, (it+1)* batch_size, epoch,
215 |                                   cls_loss_value, train_acc, valid_acc]], fmt='%1.3e')
216 | 
217 |                 if verbosity>0:
218 |                     t2 = time()-t
219 |                     t += t2
220 |                     horas = t2/(1+it-last_it)/3600.*10000
221 |                     print "iter:%d/%d; epoch:%d;    %4.2f hours for 10000 iterations"%(it+1, n_iter,epoch,horas)
222 |                     last_it = it+1
223 | 
224 | 
225 |         print "End train\n"
226 | 
227 |     load_network_values(classifier, os.path.join(best_model_dir, 'classifier.npz'))
228 |     best_predict = build_predict_function(classifier)
229 | 
230 |     return best_acc, it_best, best_predict
231 | 
232 | 
233 | def mk_classification_images(data, which_set, file, predict, samples_dir, batch_size=32):
234 | 
235 |     if not os.path.exists(samples_dir):
236 |         os.makedirs(samples_dir)
237 | 
238 |     if which_set=='train':
239 |         X = data.X_train
240 |     elif which_set=='valid':
241 |         X = data.X_valid
242 |     elif which_set=='test':
243 |         X = data.X_test
244 |     size = X.shape[0]
245 |     y_pred = np.zeros((size+batch_size,))
246 |     y_true = np.zeros((size+batch_size,))
247 |     y_maxprob = np.zeros((size+batch_size,))
248 | 
249 |     for it in range(size / batch_size + 1):
250 |         if which_set == 'train':
251 |             X_batch, y_batch = data.get_train_batch(it, batch_size)
252 |         elif which_set == 'valid':
253 |             X_batch, y_batch = data.get_valid_batch(it, batch_size)
254 |         elif which_set == 'test':
255 |             X_batch, y_batch = data.get_test_batch(it, batch_size)
256 |         X_batch = data.scale_data(X_batch)
257 | 
258 |         pred = predict(X_batch)
259 |         y_pred[it*batch_size:(it+1)*batch_size] = np.argmax(pred, axis=1)
260 |         y_true[it*batch_size:(it+1)*batch_size] = np.argmax(y_batch, axis=1)
261 |         y_maxprob[it*batch_size:(it+1)*batch_size] = np.max(pred, axis=1)
262 |     y_pred_arr = y_pred[:size]
263 |     y_true_arr = y_true[:size]
264 |     y_maxprob_arr = y_maxprob[:size]
265 | 
266 | 
267 | def eval_params(params,data,test_data,samples_dir='samples',best_model_dir='best_model',grid_size=8, verbosity=2):
268 | 
269 |     n_groups = max(data.groups) + 1
270 |     n_splits = n_groups if n_groups<=10 else 5
271 |     gkf = GroupKFold(n_splits=n_splits)
272 |     accuracies = []
273 |     for train_indexes, valid_indexes in gkf.split(data.X, data.y, groups=data.groups):
274 |         np.random.shuffle(train_indexes)
275 |         dataset = Dataset(X_train = data.X[train_indexes],
276 |                           y_train = data.y[train_indexes],
277 |                           X_valid = data.X[valid_indexes],
278 |                           y_valid = data.y[valid_indexes],
279 |                           X_test = test_data.X,
280 |                           y_test = test_data.y, nclasses = 3)
281 | 
282 | 
283 |         acc, n_iter, _ = train_model(params,dataset,test_data.groups,samples_dir,best_model_dir,grid_size,verbosity=verbosity)
284 |         print acc, n_iter
285 |         accuracies.append(acc)
286 | 
287 |     accuracies = np.asarray(accuracies)
288 |     print "Mean Accuracy: (%2.2f +/- %2.2f)"%(accuracies.mean()*100, accuracies.std()*100)
289 |     return accuracies
290 | 
291 | def main(args):
292 |     # print args
293 | 
294 |     # DATASET
295 |     DATA_URL  = 'http://www.cifasis-conicet.gov.ar/uzal/dataset/soybean_pods.tar.gz'
296 |     filepath = os.path.join(args.datapath,'soybean_pods.tar.gz')
297 |     datapath = os.path.join(args.datapath, 'soybean_pods/')
298 |     if not os.path.exists(datapath):
299 |         #os.makedirs(args.datapath)
300 |         print('Downloading soybean_pods.tar.gz')
301 |         filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath)
302 |         print()
303 |         statinfo = os.stat(filepath)
304 |         print('Succesfully downloaded soybean_pods.tar.gz', statinfo.st_size, 'bytes.')
305 |         tarfile.open(filepath, 'r:gz').extractall(args.datapath)
306 | 
307 |     data_train_dir = os.path.join(datapath,"Season1")
308 |     data_test_dir = os.path.join(datapath,"Season2")
309 |     data = Numpy_dataset(data_train_dir,'train.npz')
310 |     test_data = Numpy_dataset(data_test_dir,'train.npz')
311 | 
312 |     params = vars(args)
313 | 
314 |     n_groups = max(data.groups) + 1
315 |     n_splits = n_groups if n_groups <= 10 else 5
316 |     gkf = GroupKFold(n_splits=n_splits)
317 |     fold = 1
318 |     for train_indexes, valid_indexes in gkf.split(data.X, data.y, groups=data.groups):
319 |         np.random.shuffle(train_indexes)
320 |         dataset = Dataset(X_train=data.X[train_indexes],
321 |                           y_train=data.y[train_indexes],
322 |                           X_valid=data.X[valid_indexes],
323 |                           y_valid=data.y[valid_indexes],
324 |                           X_test=test_data.X,
325 |                           y_test=test_data.y,nclasses=3)
326 | 
327 | 
328 |         acc, n_iter, predict = train_model(params,dataset,test_data.groups,
329 |                                   samples_dir='samples_fold%d'%fold,
330 |                                   best_model_dir='best_model_fold%d'%fold,
331 |                                   grid_size=8,verbosity=10)
332 |         print acc, n_iter
333 | 
334 |         fold = fold + 1
335 | 
336 | 
337 | if __name__ == '__main__':
338 | 
339 |     parser = argparse.ArgumentParser()
340 |     parser.add_argument("--datapath", type=str, default='/tmp')
341 |     parser.add_argument("--n_iter", type=int, default=6000)
342 |     parser.add_argument("--grid_size", type=int, default=8)
343 |     parser.add_argument("--iter_save", type=int, default=100)
344 |     parser.add_argument("--seed", type=int, default=17)
345 |     parser.add_argument('--n_layers_per_block', type=int, default=4)
346 |     parser.add_argument('--n_blocks', type=int, default=3)
347 |     parser.add_argument('--batch_size', type=int, default=128)
348 |     parser.add_argument('--model_width', type=int, default=16)
349 |     parser.add_argument('--log10wd', type=float, default=-1.099842)
350 |     parser.add_argument('--log10lr', type=float, default=-2.42706)
351 |     parser.add_argument('--rotation_range', type=int, default=20)
352 |     parser.add_argument('--width_shift_range', type=float, default=0.028)
353 |     parser.add_argument('--height_shift_range', type=float, default=0.016)
354 |     parser.add_argument('--shear_range', type=float, default=0.14)  # In radians
355 |     parser.add_argument('--zoom_range_center', type=float, default=0.97)
356 |     parser.add_argument('--zoom_range_range', type=float, default=0.18)
357 |     parser.add_argument('--random_curves_strength', type=float, default=0.58)
358 |     parser.add_argument('--occlusion', type=int, default=5)
359 | 
360 |     args = parser.parse_args()
361 | 
362 |     main(args)
363 | 


--------------------------------------------------------------------------------
/augmentation.py:
--------------------------------------------------------------------------------
  1 | """Fairly basic set of tools for real-time data augmentation on image data.
  2 | Can easily be extended to include new transformations
  3 | https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py
  4 | """
  5 | 
  6 | import numpy as np
  7 | import scipy.ndimage as ndi
  8 | 
  9 | 
 10 | def random_rotation(x, rg, row_axis=1, col_axis=2, channel_axis=0,
 11 |                     fill_mode='nearest', cval=0.):
 12 |     """Performs a random rotation of a Numpy image tensor.
 13 |     # Arguments
 14 |         x: Input tensor. Must be 3D.
 15 |         rg: Rotation range, in degrees.
 16 |         row_axis: Index of axis for rows in the input tensor.
 17 |         col_axis: Index of axis for columns in the input tensor.
 18 |         channel_axis: Index of axis for channels in the input tensor.
 19 |         fill_mode: Points outside the boundaries of the input
 20 |             are filled according to the given mode
 21 |             (one of `{'constant', 'nearest', 'reflect', 'wrap'}`).
 22 |         cval: Value used for points outside the boundaries
 23 |             of the input if `mode='constant'`.
 24 |     # Returns
 25 |         Rotated Numpy image tensor.
 26 |     """
 27 |     theta = np.pi / 180 * np.random.uniform(-rg, rg)
 28 |     rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
 29 |                                 [np.sin(theta), np.cos(theta), 0],
 30 |                                 [0, 0, 1]])
 31 | 
 32 |     h, w = x.shape[row_axis], x.shape[col_axis]
 33 |     transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
 34 |     x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval)
 35 |     return x
 36 | 
 37 | 
 38 | def random_shift(x, wrg, hrg, row_axis=1, col_axis=2, channel_axis=0,
 39 |                  fill_mode='nearest', cval=0.):
 40 |     """Performs a random spatial shift of a Numpy image tensor.
 41 |     # Arguments
 42 |         x: Input tensor. Must be 3D.
 43 |         wrg: Width shift range, as a float fraction of the width.
 44 |         hrg: Height shift range, as a float fraction of the height.
 45 |         row_axis: Index of axis for rows in the input tensor.
 46 |         col_axis: Index of axis for columns in the input tensor.
 47 |         channel_axis: Index of axis for channels in the input tensor.
 48 |         fill_mode: Points outside the boundaries of the input
 49 |             are filled according to the given mode
 50 |             (one of `{'constant', 'nearest', 'reflect', 'wrap'}`).
 51 |         cval: Value used for points outside the boundaries
 52 |             of the input if `mode='constant'`.
 53 |     # Returns
 54 |         Shifted Numpy image tensor.
 55 |     """
 56 |     h, w = x.shape[row_axis], x.shape[col_axis]
 57 |     tx = np.random.uniform(-hrg, hrg) * h
 58 |     ty = np.random.uniform(-wrg, wrg) * w
 59 |     translation_matrix = np.array([[1, 0, tx],
 60 |                                    [0, 1, ty],
 61 |                                    [0, 0, 1]])
 62 | 
 63 |     transform_matrix = translation_matrix  # no need to do offset
 64 |     x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval)
 65 |     return x
 66 | 
 67 | 
 68 | def random_shear(x, intensity, row_axis=1, col_axis=2, channel_axis=0,
 69 |                  fill_mode='nearest', cval=0.):
 70 |     """Performs a random spatial shear of a Numpy image tensor.
 71 |     # Arguments
 72 |         x: Input tensor. Must be 3D.
 73 |         intensity: Transformation intensity.
 74 |         row_axis: Index of axis for rows in the input tensor.
 75 |         col_axis: Index of axis for columns in the input tensor.
 76 |         channel_axis: Index of axis for channels in the input tensor.
 77 |         fill_mode: Points outside the boundaries of the input
 78 |             are filled according to the given mode
 79 |             (one of `{'constant', 'nearest', 'reflect', 'wrap'}`).
 80 |         cval: Value used for points outside the boundaries
 81 |             of the input if `mode='constant'`.
 82 |     # Returns
 83 |         Sheared Numpy image tensor.
 84 |     """
 85 |     shear = np.random.uniform(-intensity, intensity)
 86 |     shear_matrix = np.array([[1, -np.sin(shear), 0],
 87 |                              [0, np.cos(shear), 0],
 88 |                              [0, 0, 1]])
 89 | 
 90 |     h, w = x.shape[row_axis], x.shape[col_axis]
 91 |     transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
 92 |     x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval)
 93 |     return x
 94 | 
 95 | 
 96 | def random_zoom(x, zoom_range, row_axis=1, col_axis=2, channel_axis=0,
 97 |                 fill_mode='nearest', cval=0.):
 98 |     """Performs a random spatial zoom of a Numpy image tensor.
 99 |     # Arguments
100 |         x: Input tensor. Must be 3D.
101 |         zoom_range: Tuple of floats; zoom range for width and height.
102 |         row_axis: Index of axis for rows in the input tensor.
103 |         col_axis: Index of axis for columns in the input tensor.
104 |         channel_axis: Index of axis for channels in the input tensor.
105 |         fill_mode: Points outside the boundaries of the input
106 |             are filled according to the given mode
107 |             (one of `{'constant', 'nearest', 'reflect', 'wrap'}`).
108 |         cval: Value used for points outside the boundaries
109 |             of the input if `mode='constant'`.
110 |     # Returns
111 |         Zoomed Numpy image tensor.
112 |     # Raises
113 |         ValueError: if `zoom_range` isn't a tuple.
114 |     """
115 |     if len(zoom_range) != 2:
116 |         raise ValueError('`zoom_range` should be a tuple or list of two floats. '
117 |                          'Received arg: ', zoom_range)
118 | 
119 |     if zoom_range[0] == 1 and zoom_range[1] == 1:
120 |         zx, zy = 1, 1
121 |     else:
122 |         zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2)
123 |     zoom_matrix = np.array([[zx, 0, 0],
124 |                             [0, zy, 0],
125 |                             [0, 0, 1]])
126 | 
127 |     h, w = x.shape[row_axis], x.shape[col_axis]
128 |     transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
129 |     x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval)
130 |     return x
131 | 
132 | 
133 | def zoom_and_crop(X_batch, zoom, crop_shape, row_axis=1, col_axis=2, channel_axis=0,
134 |                 fill_mode='nearest', cval=0.):
135 | 
136 |     zoom_matrix = np.array([[zoom, 0, 0],
137 |                             [0, zoom, 0],
138 |                             [0, 0, 1]])
139 | 
140 |     rt_batch = np.zeros_like(X_batch)
141 |     for i in xrange(X_batch.shape[0]):
142 |         x = X_batch[i]
143 |         h, w = x.shape[row_axis], x.shape[col_axis]
144 |         transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
145 |         x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval)
146 |         rt_batch[i] = x
147 | 
148 |     return image_crop(rt_batch, crop_shape[0], crop_shape[1])
149 | 
150 | def image_crop(X, ph, pw=None):
151 | 
152 |         if pw is None:
153 |             pw = ph
154 | 
155 |         h, w = X.shape[2:4]
156 | 
157 |         if h == ph and w == pw:
158 |             return X
159 | 
160 |         j = int(round((h - ph)/2.))
161 |         i = int(round((w - pw)/2.))
162 | 
163 |         return X[:,:,j:j+ph, i:i+pw]
164 | 
165 | def random_channel_shift(x, intensity, channel_axis=0):
166 |     x = np.rollaxis(x, channel_axis, 0)
167 |     min_x, max_x = np.min(x), np.max(x)
168 |     channel_images = [np.clip(x_channel + np.random.uniform(-intensity, intensity), min_x, max_x)
169 |                       for x_channel in x]
170 |     x = np.stack(channel_images, axis=0)
171 |     x = np.rollaxis(x, 0, channel_axis + 1)
172 |     return x
173 | 
174 | # For curving soybean pods. L.C.Uzal
175 | def random_curves_transform(x, strength=0.1, range=(0.,255.)):
176 |     low, high = range
177 |     delta = (high - low) * strength / 2.
178 |     xp = np.random.uniform(low=low + delta, high=high - delta)
179 |     yp = np.random.uniform(low=xp-delta, high=xp+delta)
180 |     xp = np.asarray([low, xp, high])
181 |     yp = np.asarray([low, yp, high])
182 |     return np.interp(x,xp,yp)
183 | 
184 | def transform_matrix_offset_center(matrix, x, y):
185 |     o_x = float(x) / 2 + 0.5
186 |     o_y = float(y) / 2 + 0.5
187 |     offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
188 |     reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
189 |     transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
190 |     return transform_matrix
191 | 
192 | 
193 | def apply_transform(x,
194 |                     transform_matrix,
195 |                     channel_axis=0,
196 |                     fill_mode='nearest',
197 |                     cval=0.):
198 |     """Apply the image transformation specified by a matrix.
199 |     # Arguments
200 |         x: 2D numpy array, single image.
201 |         transform_matrix: Numpy array specifying the geometric transformation.
202 |         channel_axis: Index of axis for channels in the input tensor.
203 |         fill_mode: Points outside the boundaries of the input
204 |             are filled according to the given mode
205 |             (one of `{'constant', 'nearest', 'reflect', 'wrap'}`).
206 |         cval: Value used for points outside the boundaries
207 |             of the input if `mode='constant'`.
208 |     # Returns
209 |         The transformed version of the input.
210 |     """
211 |     x = np.rollaxis(x, channel_axis, 0)
212 |     final_affine_matrix = transform_matrix[:2, :2]
213 |     final_offset = transform_matrix[:2, 2]
214 |     channel_images = [ndi.interpolation.affine_transform(
215 |         x_channel,
216 |         final_affine_matrix,
217 |         final_offset,
218 |         order=1,
219 |         mode=fill_mode,
220 |         cval=cval) for x_channel in x]
221 |     x = np.stack(channel_images, axis=0)
222 |     x = np.rollaxis(x, 0, channel_axis + 1)
223 |     return x
224 | 
225 | 
226 | def flip_axis(x, axis):
227 |     x = np.asarray(x).swapaxes(axis, 0)
228 |     x = x[::-1, ...]
229 |     x = x.swapaxes(0, axis)
230 |     return x
231 | 
232 | 
233 | class Augmentation(object):
234 |     """Transform minibatches of image data with real-time data augmentation.
235 |     # Arguments
236 |         rotation_range: degrees (0 to 180).
237 |         width_shift_range: fraction of total width.
238 |         height_shift_range: fraction of total height.
239 |         shear_range: shear intensity (shear angle in radians).
240 |         zoom_range: amount of zoom. if scalar z, zoom will be randomly picked
241 |             in the range [1-z, 1+z]. A sequence of two can be passed instead
242 |             to select this range.
243 |         channel_shift_range: shift range for each channels.
244 |         fill_mode: points outside the boundaries are filled according to the
245 |             given mode ('constant', 'nearest', 'reflect' or 'wrap'). Default
246 |             is 'nearest'.
247 |         cval: value used for points outside the boundaries when fill_mode is
248 |             'constant'. Default is 0.
249 |         horizontal_flip: whether to randomly flip images horizontally.
250 |         vertical_flip: whether to randomly flip images vertically.
251 |         rescale: rescaling factor. If None or 0, no rescaling is applied,
252 |             otherwise we multiply the data by the value provided
253 |             (before applying any other transformation).
254 |     """
255 | 
256 |     def __init__(self,
257 |                  samplewise_center=False,
258 |                  samplewise_std_normalization=False,
259 |                  rotation_range=0.,
260 |                  width_shift_range=0.,
261 |                  height_shift_range=0.,
262 |                  shear_range=0.,
263 |                  zoom_range=0.,
264 |                  channel_shift_range=0.,
265 |                  fill_mode='nearest',
266 |                  cval=0.,
267 |                  horizontal_flip=False,
268 |                  vertical_flip=False,
269 |                  rescale=None,
270 |                  random_curves_strength=0.,
271 |                  seed=None):
272 | 
273 |         self.samplewise_center = samplewise_center
274 |         self.samplewise_std_normalization = samplewise_std_normalization
275 |         self.rotation_range = rotation_range
276 |         self.width_shift_range = width_shift_range
277 |         self.height_shift_range = height_shift_range
278 |         self.shear_range = shear_range
279 |         self.zoom_range = zoom_range
280 |         self.channel_shift_range = channel_shift_range
281 |         self.fill_mode = fill_mode
282 |         self.cval = cval
283 |         self.horizontal_flip = horizontal_flip
284 |         self.vertical_flip = vertical_flip
285 |         self.rescale = rescale
286 |         self.random_curves_strength = random_curves_strength
287 | 
288 |         self.data_format = 'channels_first'
289 |         self.channel_axis = 1
290 |         self.row_axis = 2
291 |         self.col_axis = 3
292 | 
293 |         if np.isscalar(zoom_range):
294 |             self.zoom_range = [1 - zoom_range, 1 + zoom_range]
295 |         elif len(zoom_range) == 2:
296 |             self.zoom_range = [zoom_range[0], zoom_range[1]]
297 |         else:
298 |             raise ValueError('`zoom_range` should be a float or '
299 |                              'a tuple or list of two floats. '
300 |                              'Received arg: ', zoom_range)
301 | 
302 |         if seed is not None:
303 |             np.random.seed(seed)
304 | 
305 | 
306 |     def random_transform(self, x_batch):
307 |         """Randomly augment a minibatch of images tensor.
308 |         # Arguments
309 |             x: 4D tensor, minibatch of images.
310 |         # Returns
311 |             A randomly transformed version of the input (same shape).
312 |         """
313 |         # x is a single image, so it doesn't have image number at index 0
314 |         img_row_axis = self.row_axis - 1
315 |         img_col_axis = self.col_axis - 1
316 |         img_channel_axis = self.channel_axis - 1
317 | 
318 |         rt_batch = np.zeros_like(x_batch)
319 |         for i in xrange(x_batch.shape[0]):
320 |             x = x_batch[i]
321 | 
322 |             # use composition of homographies
323 |             # to generate final transform that needs to be applied
324 |             if self.rotation_range:
325 |                 theta = np.pi / 180 * np.random.uniform(-self.rotation_range, self.rotation_range)
326 |             else:
327 |                 theta = 0
328 | 
329 |             if self.height_shift_range:
330 |                 tx = np.random.uniform(-self.height_shift_range, self.height_shift_range) * x.shape[img_row_axis]
331 |             else:
332 |                 tx = 0
333 | 
334 |             if self.width_shift_range:
335 |                 ty = np.random.uniform(-self.width_shift_range, self.width_shift_range) * x.shape[img_col_axis]
336 |             else:
337 |                 ty = 0
338 | 
339 |             if self.shear_range:
340 |                 shear = np.random.uniform(-self.shear_range, self.shear_range)
341 |             else:
342 |                 shear = 0
343 | 
344 |             if self.zoom_range[0] == 1 and self.zoom_range[1] == 1:
345 |                 zx, zy = 1, 1
346 |             else:
347 |                 zx, zy = np.random.uniform(self.zoom_range[0], self.zoom_range[1], 2)
348 | 
349 |             transform_matrix = None
350 |             if theta != 0:
351 |                 rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
352 |                                             [np.sin(theta), np.cos(theta), 0],
353 |                                             [0, 0, 1]])
354 |                 transform_matrix = rotation_matrix
355 | 
356 |             if tx != 0 or ty != 0:
357 |                 shift_matrix = np.array([[1, 0, tx],
358 |                                          [0, 1, ty],
359 |                                          [0, 0, 1]])
360 |                 transform_matrix = shift_matrix if transform_matrix is None else np.dot(transform_matrix, shift_matrix)
361 | 
362 |             if shear != 0:
363 |                 shear_matrix = np.array([[1, -np.sin(shear), 0],
364 |                                         [0, np.cos(shear), 0],
365 |                                         [0, 0, 1]])
366 |                 transform_matrix = shear_matrix if transform_matrix is None else np.dot(transform_matrix, shear_matrix)
367 | 
368 |             if zx != 1 or zy != 1:
369 |                 zoom_matrix = np.array([[zx, 0, 0],
370 |                                         [0, zy, 0],
371 |                                         [0, 0, 1]])
372 |                 transform_matrix = zoom_matrix if transform_matrix is None else np.dot(transform_matrix, zoom_matrix)
373 | 
374 |             if transform_matrix is not None:
375 |                 h, w = x.shape[img_row_axis], x.shape[img_col_axis]
376 |                 transform_matrix = transform_matrix_offset_center(transform_matrix, h, w)
377 |                 x = apply_transform(x, transform_matrix, img_channel_axis,
378 |                                     fill_mode=self.fill_mode, cval=self.cval)
379 | 
380 |             if self.channel_shift_range != 0:
381 |                 x = random_channel_shift(x,
382 |                                          self.channel_shift_range,
383 |                                          img_channel_axis)
384 |             if self.horizontal_flip:
385 |                 if np.random.random() < 0.5:
386 |                     x = flip_axis(x, img_col_axis)
387 | 
388 |             if self.vertical_flip:
389 |                 if np.random.random() < 0.5:
390 |                     x = flip_axis(x, img_row_axis)
391 | 
392 |             if self.random_curves_strength > 0.:
393 |                 x = random_curves_transform(x,self.random_curves_strength)
394 | 
395 |             rt_batch[i] = x
396 | 
397 |         return rt_batch
398 | 
399 | 


--------------------------------------------------------------------------------