├── README.md ├── lasagne_utils.py ├── numpy_dataset.py ├── LICENSE ├── cnn.py ├── data.py ├── cnn_test.py └── augmentation.py /README.md: -------------------------------------------------------------------------------- 1 | Seed-per-pod estimation for plant breeding using deep learning 2 | ============================================================== 3 | Uzal, L.C., Grinblat, G.L., Namías, R., Larese, M.G., Bianchi, J.S., Morandi, E.N. and Granitto, P.M. 4 | 5 | 6 | Code for reproducing the CNN experiments. Execute with 7 | 8 | python cnn_test.py 9 | 10 | -------------------------------------------------------------------------------- /lasagne_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import lasagne 3 | 4 | 5 | def save_network_values(network, filepath): 6 | np.savez(filepath, *lasagne.layers.get_all_param_values(network)) 7 | 8 | 9 | def load_network_values(network, filepath): 10 | with np.load(filepath) as f: 11 | param_values = [f['arr_%d' % i] for i in range(len(f.files))] 12 | lasagne.layers.set_all_param_values(network, param_values) 13 | 14 | -------------------------------------------------------------------------------- /numpy_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | 5 | class Numpy_dataset(object): 6 | 7 | def __init__(self, data_dir, file_name='train.npz'): 8 | 9 | file_path = os.path.join(data_dir,file_name) 10 | with np.load(file_path) as data: 11 | self.X = data['X'] 12 | self.y = data['y'].astype(np.uint8) 13 | self.samples_names = data["samples_names"] 14 | self.groups2 = data["fold_number"] 15 | self.groups = data["fold_session"] 16 | if len(self.X.shape) == 3: 17 | self.X = self.X.reshape(-1,1,self.X.shape[1],self.X.shape[2]) 18 | 19 | self.y = self.y - self.y.min() 20 | self.nclasses = self.y.max()+1 21 | 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | COPYRIGHT 2 | 3 | All contributions by Lucas C. Uzal: 4 | Copyright (c) 2017, Lucas C. Uzal. 5 | All rights reserved. 6 | 7 | Contributions in augmentation.py by François Chollet: 8 | Copyright (c) 2015, François Chollet. 9 | All rights reserved. 10 | 11 | All other contributions: 12 | Copyright (c) 2015 - 2017, the respective contributors. 13 | All rights reserved. 14 | 15 | LICENSE 16 | 17 | The MIT License (MIT) 18 | 19 | Permission is hereby granted, free of charge, to any person obtaining a copy 20 | of this software and associated documentation files (the "Software"), to deal 21 | in the Software without restriction, including without limitation the rights 22 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 23 | copies of the Software, and to permit persons to whom the Software is 24 | furnished to do so, subject to the following conditions: 25 | 26 | The above copyright notice and this permission notice shall be included in all 27 | copies or substantial portions of the Software. 28 | 29 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 30 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 31 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 32 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 33 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 34 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 | SOFTWARE. 36 | 37 | -------------------------------------------------------------------------------- /cnn.py: -------------------------------------------------------------------------------- 1 | import theano.tensor as T 2 | import lasagne 3 | from lasagne.layers import InputLayer, DenseLayer, Conv2DLayer, NonlinearityLayer, GlobalPoolLayer 4 | from lasagne.layers.normalization import BatchNormLayer 5 | from lasagne.layers import FlattenLayer 6 | from lasagne.nonlinearities import softmax, linear 7 | # from lasagne.nonlinearities import rectify as relu 8 | from lasagne.nonlinearities import leaky_rectify as lrelu 9 | 10 | def build_representation(img_size=[64,64], nchannels=3, ndf=64, 11 | vis_filter_size=5, filters_size=5, global_pool=True, strides = [2, 2, 2, 2]): 12 | print 'cnn' 13 | #if img_size[0] % 32 is not 0 or img_size[1]!=img_size[0]: 14 | # # La imagen debe ser cuadrada y multiplo de 32 15 | # raise 1 16 | 17 | depth = len(strides) 18 | w_sizes = [filters_size] * depth 19 | w_sizes[0] = vis_filter_size 20 | 21 | X = InputLayer((None, nchannels, img_size[0], img_size[1])) 22 | ishape = lasagne.layers.get_output_shape(X) 23 | # print ishape 24 | 25 | wf = 1 26 | h = X 27 | for i, s in enumerate(strides): 28 | wf *= s 29 | filter_size = w_sizes[i] 30 | x1 = Conv2DLayer(h, num_filters=wf * ndf, filter_size=filter_size, stride=s, pad='same', 31 | b=None, nonlinearity=None, name='cnn_l%d_Conv'%i) 32 | x2 = BatchNormLayer(x1, name='cnn_l%d_BN'%i) 33 | h = NonlinearityLayer(x2, nonlinearity=lrelu) 34 | ishape = lasagne.layers.get_output_shape(x1) 35 | # print ishape 36 | 37 | if global_pool: 38 | h = GlobalPoolLayer(h, pool_function=T.max, name='cnn_last_code') 39 | else: 40 | h = FlattenLayer(h, name='cnn_last_code') 41 | 42 | 43 | return h 44 | 45 | def build_classifier(nclasses, img_size=[64,64], nchannels=3, ndf=64, 46 | vis_filter_size=5, filters_size=5, global_pool=True, strides = [2, 2, 2, 2]): 47 | 48 | h = build_representation(img_size, nchannels, ndf, vis_filter_size, filters_size, global_pool, strides) 49 | 50 | y = DenseLayer(h, num_units=nclasses, nonlinearity=softmax, name='softmax') 51 | 52 | return y 53 | 54 | 55 | def build_classifier_and_regressor(nclasses, img_size=[64, 64], nchannels=3, ndf=64, 56 | vis_filter_size=5, filters_size=5, global_pool=True, strides=[2, 2, 2, 2]): 57 | h = build_representation(img_size, nchannels, ndf, vis_filter_size, filters_size, global_pool, strides) 58 | 59 | c = DenseLayer(h, num_units=nclasses, nonlinearity=softmax, name='softmax') 60 | 61 | r = DenseLayer(h, num_units=1, nonlinearity=linear, name='linear_out') 62 | 63 | return c, r 64 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | 4 | def floatX(X): 5 | return np.asarray(X, dtype=theano.config.floatX) 6 | 7 | def OneHot(X, n=None, negative_class=0.): 8 | X = np.asarray(X).flatten() 9 | if n is None: 10 | n = np.max(X) + 1 11 | Xoh = np.ones((len(X), n)) * negative_class 12 | Xoh[np.arange(len(X)), X] = 1. 13 | return floatX(Xoh) 14 | 15 | 16 | def get_batch_indexes(iter, batch_size, max_size): 17 | n1 = (iter * batch_size) % max_size 18 | n2 = ((iter + 1) * batch_size - 1) % max_size + 1 19 | if n1 > n2: 20 | return np.concatenate((np.arange(n1,max_size),np.arange(0,n2))) 21 | else: 22 | return np.arange(n1,n2) 23 | 24 | def scale_data(X, max, min, new_min=-1.0, new_max=1.0): 25 | scale = float(max - min) 26 | new_scale = float(new_max - new_min) 27 | return floatX((X - min) * new_scale / scale + new_min) 28 | 29 | class Dataset(object): 30 | 31 | def __init__(self, X_train=None, y_train=None, 32 | X_valid=None, y_valid=None, 33 | X_test =None, y_test=None, 34 | X_unlab=None, x_min=0, x_max=255, ymin=0.00, ymax=1.00, nclasses=None, **kw): 35 | 36 | self.__dict__.update(kw) 37 | del kw # We don't want this in attrs 38 | self.__dict__.update(locals()) 39 | del self.self # We don't need this either 40 | if X_train is not None: 41 | self._nchannels = X_train.shape[1] 42 | self.first_class = int(y_train.min()) # para corregir si el numero de clase arranca de 1 o 0 43 | if nclasses is None: 44 | nclasses = int(y_train.max()) - self.first_class + 1 45 | elif X_unlab is not None: 46 | self._nchannels = X_unlab.shape[1] 47 | else: 48 | self._nchannels = None 49 | 50 | 51 | def get_train_batch(self, index, batch_size, one_hot=True): 52 | X = self._get_batch(self.X_train,index,batch_size) 53 | y = self._get_batch(self.y_train,index,batch_size) 54 | 55 | if one_hot: 56 | y = floatX(OneHot(y.astype(int)-self.first_class,n=self.nclasses)) 57 | return X, self.smooth_labels(y,self.ymin,self.ymax) 58 | else: 59 | return X, y.astype(int)-self.first_class 60 | 61 | 62 | def get_valid_batch(self, index, batch_size, one_hot=True): 63 | X = self._get_batch(self.X_valid,index,batch_size) 64 | y = self._get_batch(self.y_valid,index,batch_size) 65 | 66 | if one_hot: 67 | y = floatX(OneHot(y.astype(int)-self.first_class,n=self.nclasses)) 68 | return X, self.smooth_labels(y,self.ymin,self.ymax) 69 | else: 70 | return X, y.astype(int)-self.first_class 71 | 72 | def get_test_batch(self, index, batch_size, one_hot=True): 73 | X = self._get_batch(self.X_test,index,batch_size) 74 | y = self._get_batch(self.y_test,index,batch_size) 75 | 76 | if one_hot: 77 | y = floatX(OneHot(y.astype(int)-self.first_class,n=self.nclasses)) 78 | return X, self.smooth_labels(y,self.ymin,self.ymax) 79 | else: 80 | return X, y.astype(int)-self.first_class 81 | 82 | def get_unlab_batch(self, index, batch_size): 83 | X = self._get_batch(self.X_unlab,index,batch_size) 84 | return X 85 | 86 | def _get_batch(self, X, index, batch_size): 87 | size = X.shape[0] 88 | n1 = (index*batch_size)%size 89 | n2 = ((index+1)*batch_size-1)%size+1 90 | if n1>n2: 91 | return floatX(np.concatenate((X[n1:], X[:n2]))) 92 | else: 93 | return floatX(X[n1:n2]) 94 | 95 | def scale_data(self, X, new_min=-1.0, new_max=1.0): 96 | self.new_min = new_min 97 | self.new_max = new_max 98 | scale = self.x_max - self.x_min 99 | new_scale = new_max - new_min 100 | return floatX((X-self.x_min)*new_scale/scale+new_min) 101 | 102 | def smooth_labels(self, y, ymin=0.1, ymax=0.9): 103 | return y*(ymax-ymin)+ymin 104 | 105 | def image_crop(self, X, ph, pw=None, random_state=None): 106 | 107 | if pw is None: 108 | pw = ph 109 | 110 | h, w = X.shape[2:4] 111 | 112 | if h == ph and w == pw: 113 | return X 114 | 115 | if random_state: 116 | j = random_state.random_integers(0, h - ph) 117 | i = random_state.random_integers(0, w - pw) 118 | else: 119 | j = int(round((h - ph)/2.)) 120 | i = int(round((w - pw)/2.)) 121 | 122 | return X[:,:,j:j+ph, i:i+pw] 123 | 124 | def inv_scale_data(self, X, old_min=-1.0, old_max=1.0): 125 | scale = self.x_max - self.x_min 126 | old_scale = old_max - old_min 127 | return floatX((X-old_min)*scale/old_scale+self.x_min) 128 | 129 | -------------------------------------------------------------------------------- /cnn_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2.7 2 | 3 | import os 4 | import argparse 5 | import pickle 6 | from six.moves import urllib 7 | import tarfile 8 | from time import time 9 | import numpy as np 10 | from numpy.random import RandomState 11 | import theano 12 | import theano.tensor as T 13 | import lasagne 14 | from lasagne.updates import adam 15 | from lasagne.init import Normal 16 | 17 | from lasagne_utils import load_network_values, save_network_values 18 | 19 | from numpy_dataset import Numpy_dataset 20 | from data import get_batch_indexes, scale_data, Dataset, floatX 21 | from augmentation import Augmentation, zoom_and_crop 22 | from cnn import build_classifier 23 | from sklearn.model_selection import GroupKFold 24 | 25 | from lasagne.objectives import categorical_crossentropy 26 | from lasagne.regularization import regularize_layer_params, l2 27 | 28 | from sklearn.metrics import confusion_matrix 29 | 30 | def build_predict_function(classifier, X=T.tensor4()): 31 | test_clsX = lasagne.layers.get_output(classifier, X, deterministic=True) 32 | return theano.function(inputs=[X],outputs=test_clsX,updates=None) 33 | 34 | def mean_acc_by_fold(y_true, y_pred, groups): 35 | 36 | n_groups = max(groups) + 1 37 | n_splits = n_groups if n_groups <= 10 else 5 38 | gkf = GroupKFold(n_splits=n_splits) 39 | acc = [] 40 | for _, group_indexes in gkf.split(y_pred, y_true, groups=groups): 41 | comp = y_true[group_indexes]==y_pred[group_indexes] 42 | acc.append(comp.mean()) 43 | acc = np.asarray(acc) 44 | return acc.mean(), acc.std() 45 | 46 | 47 | 48 | 49 | def train_model(params, data, test_groups, samples_dir='samples', best_model_dir='best_model', grid_size=8, verbosity=0): 50 | batch_size = params["batch_size"] 51 | n_iter = params["n_iter"] 52 | lr = 10.**params["log10lr"] 53 | iter_save = params["iter_save"] 54 | model_width = params["model_width"] 55 | seed = params["seed"] 56 | wd = np.asscalar(floatX(10.**params["log10wd"])) 57 | rotation_range = params["rotation_range"] 58 | width_shift_range = params["width_shift_range"] 59 | height_shift_range = params["height_shift_range"] 60 | shear_range = params["shear_range"] 61 | zoom_range = (params["zoom_range_center"]-params["zoom_range_range"]*0.5, 62 | params["zoom_range_center"]+params["zoom_range_range"]*0.5) 63 | random_curves_strength = params["random_curves_strength"] 64 | n_layers_per_block = params["n_layers_per_block"] 65 | n_blocks = params["n_blocks"] 66 | 67 | 68 | test_size = data.X_test.shape[0] 69 | valid_size = data.X_valid.shape[0] 70 | train_size = data.X_train.shape[0] 71 | nchannels = data.X_train.shape[1] 72 | img_shape = data.X_train.shape[2:4] 73 | nclasses = data.nclasses 74 | class_labels = None # TODO 75 | y_test_pred = np.zeros((test_size + batch_size,)) 76 | assert(test_size==data.y_test.shape[0]) 77 | 78 | # MODELS 79 | strides = ([1] * (n_layers_per_block - 1) + [2]) * n_blocks 80 | classifier = build_classifier(nclasses=nclasses, 81 | img_size=img_shape, 82 | nchannels=nchannels, 83 | ndf=model_width, 84 | global_pool=True, 85 | strides=strides) 86 | 87 | it_best = 0 88 | best_acc = 0.0 89 | if n_iter>0: 90 | 91 | if not os.path.exists(best_model_dir): 92 | os.makedirs(best_model_dir) 93 | if not os.path.exists(samples_dir): 94 | os.makedirs(samples_dir) 95 | 96 | augm = Augmentation(rotation_range=rotation_range, # In degrees 97 | width_shift_range=width_shift_range, 98 | height_shift_range=height_shift_range, 99 | horizontal_flip=True, 100 | shear_range=shear_range, # In radians 101 | zoom_range=zoom_range, # >1 zoom out; <1 zoom in 102 | channel_shift_range=0.0, # 0-255 103 | fill_mode='constant', # 'nearest', 104 | random_curves_strength=random_curves_strength, 105 | seed=seed) 106 | 107 | if verbosity>0: 108 | print data.X_train.shape, img_shape 109 | 110 | # SYMBOLIC INPUTS 111 | X = T.tensor4() 112 | y = T.matrix() 113 | 114 | 115 | clsX = lasagne.layers.get_output(classifier, X) 116 | 117 | # LOSS FUNCTIONS 118 | weight_decay = regularize_layer_params(classifier, l2) 119 | cls_loss = categorical_crossentropy(clsX,y).mean() + wd * weight_decay 120 | 121 | # PARAMS 122 | cls_params = lasagne.layers.get_all_params(classifier, trainable=True) 123 | 124 | # UPDATES 125 | cls_updates = adam(cls_loss, cls_params, learning_rate= floatX(lr), beta1=floatX(0.9), beta2=floatX(0.999)) 126 | 127 | # TRAINING FUNCTIONS 128 | if verbosity>0: 129 | print 'COMPILING TRAINING FUNCTIONS' 130 | t = time() 131 | train_cls = theano.function([X, y], cls_loss, updates=cls_updates) 132 | if verbosity>0: 133 | print '%.2f seconds to compile theano functions' % (time() - t) 134 | 135 | # MONITOR 136 | if verbosity > 0: 137 | print 'COMPILING MONITOR FUNCTIONS' 138 | t = time() 139 | predict = build_predict_function(classifier, X) 140 | if verbosity > 0: 141 | print '%.2f seconds to compile theano functions' % (time() - t) 142 | 143 | if verbosity > 0: 144 | print "starting training" 145 | with open(best_model_dir + '/accuracies.log', 'w') as f: 146 | f.write('# iter data_seen epoch cls_loss train_acc valid_acc') 147 | f.write('\n') 148 | with open(best_model_dir + '/best_acc.log', 'w') as f: 149 | f.write('# iter data_seen epoch valid_acc test_acc test_acc_mean test_acc_std') 150 | f.write('\n') 151 | 152 | n_epochs = n_iter* batch_size/train_size 153 | 154 | last_it = 0 155 | t = time() 156 | for it in xrange(0, n_iter): 157 | epoch = it* batch_size/train_size 158 | 159 | X_batch, y_batch = data.get_train_batch(it, batch_size) 160 | X_batch = augm.random_transform(X_batch) 161 | X_batch = data.scale_data(X_batch) 162 | cls_loss_value = train_cls(X_batch, y_batch) 163 | 164 | if (it % iter_save == 0) or (it % 10 == 0 and it < iter_save): 165 | y_pred = np.argmax(predict(X_batch), axis=1) 166 | y_true = np.argmax(y_batch, axis=1) 167 | train_acc = (y_pred == y_true).mean() 168 | 169 | y_pred = np.asarray([]) 170 | y_true = np.asarray([]) 171 | for valit in range(valid_size/ batch_size): 172 | X_valid, y_valid = data.get_valid_batch(valit, batch_size) 173 | X_valid = data.scale_data(X_valid) 174 | y_pred = np.append(y_pred, np.argmax(predict(X_valid), axis=1)) 175 | y_true = np.append(y_true, np.argmax(y_valid, axis=1)) 176 | valid_acc = (y_pred == y_true).mean() 177 | valid_cm = confusion_matrix(y_true,y_pred,class_labels) 178 | 179 | if verbosity>0: 180 | print train_acc, valid_acc 181 | 182 | if best_acc2: 186 | save_network_values(classifier, os.path.join(best_model_dir, 'classifier.npz')) 187 | pickle.dump(params, 188 | open("%s/ p" % best_model_dir, "wb")) 189 | 190 | for testit in range(test_size / batch_size + 1): 191 | X_test, y_test = data.get_test_batch(testit, batch_size) 192 | X_test = data.scale_data(X_test) 193 | pred = predict(X_test) 194 | y_test_pred[testit * batch_size:(testit + 1) * batch_size] = np.argmax(pred, axis=1) 195 | 196 | 197 | y_p = y_test_pred[:test_size] 198 | y_t = data.y_test 199 | test_acc_mean, test_acc_std = mean_acc_by_fold(y_p, y_t, test_groups) 200 | test_acc = (y_p == y_t).mean() 201 | test_cm = confusion_matrix(y_t, y_p, class_labels) 202 | 203 | with open(best_model_dir + '/best_acc.log', 'a') as f: 204 | np.savetxt(f, [[it + 1, (it + 1) * batch_size, epoch, 205 | valid_acc, test_acc, test_acc_mean, test_acc_std]], fmt='%1.3e') 206 | if verbosity>0: 207 | print "Best valid accuracy reached: %2.2f%% "%(valid_acc*100) 208 | print "Test Acc.: %2.2f%%"%(test_acc*100) 209 | print "Mean Test Acc.: %1.3f +/- %1.3f"%(test_acc_mean,test_acc_std) 210 | print "valid CM\n", valid_cm 211 | print "test CM\n", test_cm 212 | 213 | with open(best_model_dir + '/accuracies.log', 'a') as f: 214 | np.savetxt(f, [[it+1, (it+1)* batch_size, epoch, 215 | cls_loss_value, train_acc, valid_acc]], fmt='%1.3e') 216 | 217 | if verbosity>0: 218 | t2 = time()-t 219 | t += t2 220 | horas = t2/(1+it-last_it)/3600.*10000 221 | print "iter:%d/%d; epoch:%d; %4.2f hours for 10000 iterations"%(it+1, n_iter,epoch,horas) 222 | last_it = it+1 223 | 224 | 225 | print "End train\n" 226 | 227 | load_network_values(classifier, os.path.join(best_model_dir, 'classifier.npz')) 228 | best_predict = build_predict_function(classifier) 229 | 230 | return best_acc, it_best, best_predict 231 | 232 | 233 | def mk_classification_images(data, which_set, file, predict, samples_dir, batch_size=32): 234 | 235 | if not os.path.exists(samples_dir): 236 | os.makedirs(samples_dir) 237 | 238 | if which_set=='train': 239 | X = data.X_train 240 | elif which_set=='valid': 241 | X = data.X_valid 242 | elif which_set=='test': 243 | X = data.X_test 244 | size = X.shape[0] 245 | y_pred = np.zeros((size+batch_size,)) 246 | y_true = np.zeros((size+batch_size,)) 247 | y_maxprob = np.zeros((size+batch_size,)) 248 | 249 | for it in range(size / batch_size + 1): 250 | if which_set == 'train': 251 | X_batch, y_batch = data.get_train_batch(it, batch_size) 252 | elif which_set == 'valid': 253 | X_batch, y_batch = data.get_valid_batch(it, batch_size) 254 | elif which_set == 'test': 255 | X_batch, y_batch = data.get_test_batch(it, batch_size) 256 | X_batch = data.scale_data(X_batch) 257 | 258 | pred = predict(X_batch) 259 | y_pred[it*batch_size:(it+1)*batch_size] = np.argmax(pred, axis=1) 260 | y_true[it*batch_size:(it+1)*batch_size] = np.argmax(y_batch, axis=1) 261 | y_maxprob[it*batch_size:(it+1)*batch_size] = np.max(pred, axis=1) 262 | y_pred_arr = y_pred[:size] 263 | y_true_arr = y_true[:size] 264 | y_maxprob_arr = y_maxprob[:size] 265 | 266 | 267 | def eval_params(params,data,test_data,samples_dir='samples',best_model_dir='best_model',grid_size=8, verbosity=2): 268 | 269 | n_groups = max(data.groups) + 1 270 | n_splits = n_groups if n_groups<=10 else 5 271 | gkf = GroupKFold(n_splits=n_splits) 272 | accuracies = [] 273 | for train_indexes, valid_indexes in gkf.split(data.X, data.y, groups=data.groups): 274 | np.random.shuffle(train_indexes) 275 | dataset = Dataset(X_train = data.X[train_indexes], 276 | y_train = data.y[train_indexes], 277 | X_valid = data.X[valid_indexes], 278 | y_valid = data.y[valid_indexes], 279 | X_test = test_data.X, 280 | y_test = test_data.y, nclasses = 3) 281 | 282 | 283 | acc, n_iter, _ = train_model(params,dataset,test_data.groups,samples_dir,best_model_dir,grid_size,verbosity=verbosity) 284 | print acc, n_iter 285 | accuracies.append(acc) 286 | 287 | accuracies = np.asarray(accuracies) 288 | print "Mean Accuracy: (%2.2f +/- %2.2f)"%(accuracies.mean()*100, accuracies.std()*100) 289 | return accuracies 290 | 291 | def main(args): 292 | # print args 293 | 294 | # DATASET 295 | DATA_URL = 'http://www.cifasis-conicet.gov.ar/uzal/dataset/soybean_pods.tar.gz' 296 | filepath = os.path.join(args.datapath,'soybean_pods.tar.gz') 297 | datapath = os.path.join(args.datapath, 'soybean_pods/') 298 | if not os.path.exists(datapath): 299 | #os.makedirs(args.datapath) 300 | print('Downloading soybean_pods.tar.gz') 301 | filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath) 302 | print() 303 | statinfo = os.stat(filepath) 304 | print('Succesfully downloaded soybean_pods.tar.gz', statinfo.st_size, 'bytes.') 305 | tarfile.open(filepath, 'r:gz').extractall(args.datapath) 306 | 307 | data_train_dir = os.path.join(datapath,"Season1") 308 | data_test_dir = os.path.join(datapath,"Season2") 309 | data = Numpy_dataset(data_train_dir,'train.npz') 310 | test_data = Numpy_dataset(data_test_dir,'train.npz') 311 | 312 | params = vars(args) 313 | 314 | n_groups = max(data.groups) + 1 315 | n_splits = n_groups if n_groups <= 10 else 5 316 | gkf = GroupKFold(n_splits=n_splits) 317 | fold = 1 318 | for train_indexes, valid_indexes in gkf.split(data.X, data.y, groups=data.groups): 319 | np.random.shuffle(train_indexes) 320 | dataset = Dataset(X_train=data.X[train_indexes], 321 | y_train=data.y[train_indexes], 322 | X_valid=data.X[valid_indexes], 323 | y_valid=data.y[valid_indexes], 324 | X_test=test_data.X, 325 | y_test=test_data.y,nclasses=3) 326 | 327 | 328 | acc, n_iter, predict = train_model(params,dataset,test_data.groups, 329 | samples_dir='samples_fold%d'%fold, 330 | best_model_dir='best_model_fold%d'%fold, 331 | grid_size=8,verbosity=10) 332 | print acc, n_iter 333 | 334 | fold = fold + 1 335 | 336 | 337 | if __name__ == '__main__': 338 | 339 | parser = argparse.ArgumentParser() 340 | parser.add_argument("--datapath", type=str, default='/tmp') 341 | parser.add_argument("--n_iter", type=int, default=6000) 342 | parser.add_argument("--grid_size", type=int, default=8) 343 | parser.add_argument("--iter_save", type=int, default=100) 344 | parser.add_argument("--seed", type=int, default=17) 345 | parser.add_argument('--n_layers_per_block', type=int, default=4) 346 | parser.add_argument('--n_blocks', type=int, default=3) 347 | parser.add_argument('--batch_size', type=int, default=128) 348 | parser.add_argument('--model_width', type=int, default=16) 349 | parser.add_argument('--log10wd', type=float, default=-1.099842) 350 | parser.add_argument('--log10lr', type=float, default=-2.42706) 351 | parser.add_argument('--rotation_range', type=int, default=20) 352 | parser.add_argument('--width_shift_range', type=float, default=0.028) 353 | parser.add_argument('--height_shift_range', type=float, default=0.016) 354 | parser.add_argument('--shear_range', type=float, default=0.14) # In radians 355 | parser.add_argument('--zoom_range_center', type=float, default=0.97) 356 | parser.add_argument('--zoom_range_range', type=float, default=0.18) 357 | parser.add_argument('--random_curves_strength', type=float, default=0.58) 358 | parser.add_argument('--occlusion', type=int, default=5) 359 | 360 | args = parser.parse_args() 361 | 362 | main(args) 363 | -------------------------------------------------------------------------------- /augmentation.py: -------------------------------------------------------------------------------- 1 | """Fairly basic set of tools for real-time data augmentation on image data. 2 | Can easily be extended to include new transformations 3 | https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py 4 | """ 5 | 6 | import numpy as np 7 | import scipy.ndimage as ndi 8 | 9 | 10 | def random_rotation(x, rg, row_axis=1, col_axis=2, channel_axis=0, 11 | fill_mode='nearest', cval=0.): 12 | """Performs a random rotation of a Numpy image tensor. 13 | # Arguments 14 | x: Input tensor. Must be 3D. 15 | rg: Rotation range, in degrees. 16 | row_axis: Index of axis for rows in the input tensor. 17 | col_axis: Index of axis for columns in the input tensor. 18 | channel_axis: Index of axis for channels in the input tensor. 19 | fill_mode: Points outside the boundaries of the input 20 | are filled according to the given mode 21 | (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). 22 | cval: Value used for points outside the boundaries 23 | of the input if `mode='constant'`. 24 | # Returns 25 | Rotated Numpy image tensor. 26 | """ 27 | theta = np.pi / 180 * np.random.uniform(-rg, rg) 28 | rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], 29 | [np.sin(theta), np.cos(theta), 0], 30 | [0, 0, 1]]) 31 | 32 | h, w = x.shape[row_axis], x.shape[col_axis] 33 | transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w) 34 | x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval) 35 | return x 36 | 37 | 38 | def random_shift(x, wrg, hrg, row_axis=1, col_axis=2, channel_axis=0, 39 | fill_mode='nearest', cval=0.): 40 | """Performs a random spatial shift of a Numpy image tensor. 41 | # Arguments 42 | x: Input tensor. Must be 3D. 43 | wrg: Width shift range, as a float fraction of the width. 44 | hrg: Height shift range, as a float fraction of the height. 45 | row_axis: Index of axis for rows in the input tensor. 46 | col_axis: Index of axis for columns in the input tensor. 47 | channel_axis: Index of axis for channels in the input tensor. 48 | fill_mode: Points outside the boundaries of the input 49 | are filled according to the given mode 50 | (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). 51 | cval: Value used for points outside the boundaries 52 | of the input if `mode='constant'`. 53 | # Returns 54 | Shifted Numpy image tensor. 55 | """ 56 | h, w = x.shape[row_axis], x.shape[col_axis] 57 | tx = np.random.uniform(-hrg, hrg) * h 58 | ty = np.random.uniform(-wrg, wrg) * w 59 | translation_matrix = np.array([[1, 0, tx], 60 | [0, 1, ty], 61 | [0, 0, 1]]) 62 | 63 | transform_matrix = translation_matrix # no need to do offset 64 | x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval) 65 | return x 66 | 67 | 68 | def random_shear(x, intensity, row_axis=1, col_axis=2, channel_axis=0, 69 | fill_mode='nearest', cval=0.): 70 | """Performs a random spatial shear of a Numpy image tensor. 71 | # Arguments 72 | x: Input tensor. Must be 3D. 73 | intensity: Transformation intensity. 74 | row_axis: Index of axis for rows in the input tensor. 75 | col_axis: Index of axis for columns in the input tensor. 76 | channel_axis: Index of axis for channels in the input tensor. 77 | fill_mode: Points outside the boundaries of the input 78 | are filled according to the given mode 79 | (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). 80 | cval: Value used for points outside the boundaries 81 | of the input if `mode='constant'`. 82 | # Returns 83 | Sheared Numpy image tensor. 84 | """ 85 | shear = np.random.uniform(-intensity, intensity) 86 | shear_matrix = np.array([[1, -np.sin(shear), 0], 87 | [0, np.cos(shear), 0], 88 | [0, 0, 1]]) 89 | 90 | h, w = x.shape[row_axis], x.shape[col_axis] 91 | transform_matrix = transform_matrix_offset_center(shear_matrix, h, w) 92 | x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval) 93 | return x 94 | 95 | 96 | def random_zoom(x, zoom_range, row_axis=1, col_axis=2, channel_axis=0, 97 | fill_mode='nearest', cval=0.): 98 | """Performs a random spatial zoom of a Numpy image tensor. 99 | # Arguments 100 | x: Input tensor. Must be 3D. 101 | zoom_range: Tuple of floats; zoom range for width and height. 102 | row_axis: Index of axis for rows in the input tensor. 103 | col_axis: Index of axis for columns in the input tensor. 104 | channel_axis: Index of axis for channels in the input tensor. 105 | fill_mode: Points outside the boundaries of the input 106 | are filled according to the given mode 107 | (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). 108 | cval: Value used for points outside the boundaries 109 | of the input if `mode='constant'`. 110 | # Returns 111 | Zoomed Numpy image tensor. 112 | # Raises 113 | ValueError: if `zoom_range` isn't a tuple. 114 | """ 115 | if len(zoom_range) != 2: 116 | raise ValueError('`zoom_range` should be a tuple or list of two floats. ' 117 | 'Received arg: ', zoom_range) 118 | 119 | if zoom_range[0] == 1 and zoom_range[1] == 1: 120 | zx, zy = 1, 1 121 | else: 122 | zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2) 123 | zoom_matrix = np.array([[zx, 0, 0], 124 | [0, zy, 0], 125 | [0, 0, 1]]) 126 | 127 | h, w = x.shape[row_axis], x.shape[col_axis] 128 | transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w) 129 | x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval) 130 | return x 131 | 132 | 133 | def zoom_and_crop(X_batch, zoom, crop_shape, row_axis=1, col_axis=2, channel_axis=0, 134 | fill_mode='nearest', cval=0.): 135 | 136 | zoom_matrix = np.array([[zoom, 0, 0], 137 | [0, zoom, 0], 138 | [0, 0, 1]]) 139 | 140 | rt_batch = np.zeros_like(X_batch) 141 | for i in xrange(X_batch.shape[0]): 142 | x = X_batch[i] 143 | h, w = x.shape[row_axis], x.shape[col_axis] 144 | transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w) 145 | x = apply_transform(x, transform_matrix, channel_axis, fill_mode, cval) 146 | rt_batch[i] = x 147 | 148 | return image_crop(rt_batch, crop_shape[0], crop_shape[1]) 149 | 150 | def image_crop(X, ph, pw=None): 151 | 152 | if pw is None: 153 | pw = ph 154 | 155 | h, w = X.shape[2:4] 156 | 157 | if h == ph and w == pw: 158 | return X 159 | 160 | j = int(round((h - ph)/2.)) 161 | i = int(round((w - pw)/2.)) 162 | 163 | return X[:,:,j:j+ph, i:i+pw] 164 | 165 | def random_channel_shift(x, intensity, channel_axis=0): 166 | x = np.rollaxis(x, channel_axis, 0) 167 | min_x, max_x = np.min(x), np.max(x) 168 | channel_images = [np.clip(x_channel + np.random.uniform(-intensity, intensity), min_x, max_x) 169 | for x_channel in x] 170 | x = np.stack(channel_images, axis=0) 171 | x = np.rollaxis(x, 0, channel_axis + 1) 172 | return x 173 | 174 | # For curving soybean pods. L.C.Uzal 175 | def random_curves_transform(x, strength=0.1, range=(0.,255.)): 176 | low, high = range 177 | delta = (high - low) * strength / 2. 178 | xp = np.random.uniform(low=low + delta, high=high - delta) 179 | yp = np.random.uniform(low=xp-delta, high=xp+delta) 180 | xp = np.asarray([low, xp, high]) 181 | yp = np.asarray([low, yp, high]) 182 | return np.interp(x,xp,yp) 183 | 184 | def transform_matrix_offset_center(matrix, x, y): 185 | o_x = float(x) / 2 + 0.5 186 | o_y = float(y) / 2 + 0.5 187 | offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]]) 188 | reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]]) 189 | transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix) 190 | return transform_matrix 191 | 192 | 193 | def apply_transform(x, 194 | transform_matrix, 195 | channel_axis=0, 196 | fill_mode='nearest', 197 | cval=0.): 198 | """Apply the image transformation specified by a matrix. 199 | # Arguments 200 | x: 2D numpy array, single image. 201 | transform_matrix: Numpy array specifying the geometric transformation. 202 | channel_axis: Index of axis for channels in the input tensor. 203 | fill_mode: Points outside the boundaries of the input 204 | are filled according to the given mode 205 | (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). 206 | cval: Value used for points outside the boundaries 207 | of the input if `mode='constant'`. 208 | # Returns 209 | The transformed version of the input. 210 | """ 211 | x = np.rollaxis(x, channel_axis, 0) 212 | final_affine_matrix = transform_matrix[:2, :2] 213 | final_offset = transform_matrix[:2, 2] 214 | channel_images = [ndi.interpolation.affine_transform( 215 | x_channel, 216 | final_affine_matrix, 217 | final_offset, 218 | order=1, 219 | mode=fill_mode, 220 | cval=cval) for x_channel in x] 221 | x = np.stack(channel_images, axis=0) 222 | x = np.rollaxis(x, 0, channel_axis + 1) 223 | return x 224 | 225 | 226 | def flip_axis(x, axis): 227 | x = np.asarray(x).swapaxes(axis, 0) 228 | x = x[::-1, ...] 229 | x = x.swapaxes(0, axis) 230 | return x 231 | 232 | 233 | class Augmentation(object): 234 | """Transform minibatches of image data with real-time data augmentation. 235 | # Arguments 236 | rotation_range: degrees (0 to 180). 237 | width_shift_range: fraction of total width. 238 | height_shift_range: fraction of total height. 239 | shear_range: shear intensity (shear angle in radians). 240 | zoom_range: amount of zoom. if scalar z, zoom will be randomly picked 241 | in the range [1-z, 1+z]. A sequence of two can be passed instead 242 | to select this range. 243 | channel_shift_range: shift range for each channels. 244 | fill_mode: points outside the boundaries are filled according to the 245 | given mode ('constant', 'nearest', 'reflect' or 'wrap'). Default 246 | is 'nearest'. 247 | cval: value used for points outside the boundaries when fill_mode is 248 | 'constant'. Default is 0. 249 | horizontal_flip: whether to randomly flip images horizontally. 250 | vertical_flip: whether to randomly flip images vertically. 251 | rescale: rescaling factor. If None or 0, no rescaling is applied, 252 | otherwise we multiply the data by the value provided 253 | (before applying any other transformation). 254 | """ 255 | 256 | def __init__(self, 257 | samplewise_center=False, 258 | samplewise_std_normalization=False, 259 | rotation_range=0., 260 | width_shift_range=0., 261 | height_shift_range=0., 262 | shear_range=0., 263 | zoom_range=0., 264 | channel_shift_range=0., 265 | fill_mode='nearest', 266 | cval=0., 267 | horizontal_flip=False, 268 | vertical_flip=False, 269 | rescale=None, 270 | random_curves_strength=0., 271 | seed=None): 272 | 273 | self.samplewise_center = samplewise_center 274 | self.samplewise_std_normalization = samplewise_std_normalization 275 | self.rotation_range = rotation_range 276 | self.width_shift_range = width_shift_range 277 | self.height_shift_range = height_shift_range 278 | self.shear_range = shear_range 279 | self.zoom_range = zoom_range 280 | self.channel_shift_range = channel_shift_range 281 | self.fill_mode = fill_mode 282 | self.cval = cval 283 | self.horizontal_flip = horizontal_flip 284 | self.vertical_flip = vertical_flip 285 | self.rescale = rescale 286 | self.random_curves_strength = random_curves_strength 287 | 288 | self.data_format = 'channels_first' 289 | self.channel_axis = 1 290 | self.row_axis = 2 291 | self.col_axis = 3 292 | 293 | if np.isscalar(zoom_range): 294 | self.zoom_range = [1 - zoom_range, 1 + zoom_range] 295 | elif len(zoom_range) == 2: 296 | self.zoom_range = [zoom_range[0], zoom_range[1]] 297 | else: 298 | raise ValueError('`zoom_range` should be a float or ' 299 | 'a tuple or list of two floats. ' 300 | 'Received arg: ', zoom_range) 301 | 302 | if seed is not None: 303 | np.random.seed(seed) 304 | 305 | 306 | def random_transform(self, x_batch): 307 | """Randomly augment a minibatch of images tensor. 308 | # Arguments 309 | x: 4D tensor, minibatch of images. 310 | # Returns 311 | A randomly transformed version of the input (same shape). 312 | """ 313 | # x is a single image, so it doesn't have image number at index 0 314 | img_row_axis = self.row_axis - 1 315 | img_col_axis = self.col_axis - 1 316 | img_channel_axis = self.channel_axis - 1 317 | 318 | rt_batch = np.zeros_like(x_batch) 319 | for i in xrange(x_batch.shape[0]): 320 | x = x_batch[i] 321 | 322 | # use composition of homographies 323 | # to generate final transform that needs to be applied 324 | if self.rotation_range: 325 | theta = np.pi / 180 * np.random.uniform(-self.rotation_range, self.rotation_range) 326 | else: 327 | theta = 0 328 | 329 | if self.height_shift_range: 330 | tx = np.random.uniform(-self.height_shift_range, self.height_shift_range) * x.shape[img_row_axis] 331 | else: 332 | tx = 0 333 | 334 | if self.width_shift_range: 335 | ty = np.random.uniform(-self.width_shift_range, self.width_shift_range) * x.shape[img_col_axis] 336 | else: 337 | ty = 0 338 | 339 | if self.shear_range: 340 | shear = np.random.uniform(-self.shear_range, self.shear_range) 341 | else: 342 | shear = 0 343 | 344 | if self.zoom_range[0] == 1 and self.zoom_range[1] == 1: 345 | zx, zy = 1, 1 346 | else: 347 | zx, zy = np.random.uniform(self.zoom_range[0], self.zoom_range[1], 2) 348 | 349 | transform_matrix = None 350 | if theta != 0: 351 | rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], 352 | [np.sin(theta), np.cos(theta), 0], 353 | [0, 0, 1]]) 354 | transform_matrix = rotation_matrix 355 | 356 | if tx != 0 or ty != 0: 357 | shift_matrix = np.array([[1, 0, tx], 358 | [0, 1, ty], 359 | [0, 0, 1]]) 360 | transform_matrix = shift_matrix if transform_matrix is None else np.dot(transform_matrix, shift_matrix) 361 | 362 | if shear != 0: 363 | shear_matrix = np.array([[1, -np.sin(shear), 0], 364 | [0, np.cos(shear), 0], 365 | [0, 0, 1]]) 366 | transform_matrix = shear_matrix if transform_matrix is None else np.dot(transform_matrix, shear_matrix) 367 | 368 | if zx != 1 or zy != 1: 369 | zoom_matrix = np.array([[zx, 0, 0], 370 | [0, zy, 0], 371 | [0, 0, 1]]) 372 | transform_matrix = zoom_matrix if transform_matrix is None else np.dot(transform_matrix, zoom_matrix) 373 | 374 | if transform_matrix is not None: 375 | h, w = x.shape[img_row_axis], x.shape[img_col_axis] 376 | transform_matrix = transform_matrix_offset_center(transform_matrix, h, w) 377 | x = apply_transform(x, transform_matrix, img_channel_axis, 378 | fill_mode=self.fill_mode, cval=self.cval) 379 | 380 | if self.channel_shift_range != 0: 381 | x = random_channel_shift(x, 382 | self.channel_shift_range, 383 | img_channel_axis) 384 | if self.horizontal_flip: 385 | if np.random.random() < 0.5: 386 | x = flip_axis(x, img_col_axis) 387 | 388 | if self.vertical_flip: 389 | if np.random.random() < 0.5: 390 | x = flip_axis(x, img_row_axis) 391 | 392 | if self.random_curves_strength > 0.: 393 | x = random_curves_transform(x,self.random_curves_strength) 394 | 395 | rt_batch[i] = x 396 | 397 | return rt_batch 398 | 399 | --------------------------------------------------------------------------------