├── README.md
├── paper
    ├── DFAL_actif.pdf
    ├── DFAL_supplementary.pdf
    └── supplementary_Transferability.pdf
├── bayesian_cnn.py
├── regulizer.py
├── core_set.py
├── adversarial_active_criterion.py
└── active_framework.py


/README.md:
--------------------------------------------------------------------------------
1 | # Adversarial_Active_Learning
2 | Adversarial Active Learning for Deep Networks
3 | 


--------------------------------------------------------------------------------
/paper/DFAL_actif.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AAAI2018submission/Adversarial_Active_Learning/HEAD/paper/DFAL_actif.pdf


--------------------------------------------------------------------------------
/paper/DFAL_supplementary.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AAAI2018submission/Adversarial_Active_Learning/HEAD/paper/DFAL_supplementary.pdf


--------------------------------------------------------------------------------
/paper/supplementary_Transferability.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AAAI2018submission/Adversarial_Active_Learning/HEAD/paper/supplementary_Transferability.pdf


--------------------------------------------------------------------------------
/bayesian_cnn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Mon Nov 13 14:10:27 2017
 4 | 
 5 | @author: mducoffe
 6 | 
 7 | Bayesian CNN
 8 | """
 9 | import keras.backend as K
10 | import numpy as np
11 | 
12 | def predict_bayesian(model):
13 |     
14 |     f = K.function([K.learning_phase(), model.get_input_at(0)], model.get_output_at(0))
15 |     
16 |     def function(x):
17 |         return f([1, x])
18 |         
19 |     return function
20 |     
21 | def bald(data, model, T):
22 |     f_bayes = predict_bayesian(model)
23 |     samples = np.array([ f_bayes(data) for i in range(T)]) # shape (T, N, c)
24 |     var_A = (1./T)*np.sum(samples, axis=0)
25 |     var_B = np.log(var_A)
26 |     
27 |     var_C = (1./T)*np.sum(samples*np.log(samples), axis=(0,2))
28 |     
29 |     bald_scores = -np.sum(var_A*var_B, axis=1) + var_C
30 |     
31 |     index = np.argsort(bald_scores)[::-1]
32 |     
33 |     return index
34 |     
35 |     
36 |     
37 | 
38 | 


--------------------------------------------------------------------------------
/regulizer.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Dec 18 11:30:26 2017
  4 | 
  5 | @author: mducoffe
  6 | 
  7 | SVD regularization
  8 | """
  9 | import keras
 10 | import keras.backend as K
 11 | import numpy as np
 12 | from numpy.linalg import norm
 13 | from keras.regularizers import Regularizer
 14 | from keras.constraints import Constraint
 15 | 
 16 | def get_SVD(Mat):
 17 |     #Mat = mat.get_value()
 18 |     n = Mat.shape[0]
 19 |     m = Mat.shape[1]
 20 |     
 21 |     v_0 = np.random.ranf(m)
 22 |     
 23 |     error = 11
 24 |     for i in range(1):
 25 |         w_1=np.dot(Mat, v_0)
 26 |         alpha_1=norm(w_1)
 27 |         u_1=w_1/alpha_1
 28 |         z_1=np.dot(Mat.T, u_1)
 29 |         beta_1=norm(z_1)
 30 |         v_1=z_1/beta_1
 31 |         
 32 |         v_0=v_1
 33 |         
 34 |         error = norm( np.dot(Mat, v_1)-beta_1*u_1)
 35 |     return beta_1, u_1.astype('float32'), v_1.astype('float32')
 36 |     # return u_1, v_1
 37 |     
 38 | def get_SVD_support(mat, U, V):
 39 |     Mat = mat.get_value()
 40 |     _, u, v = get_SVD(Mat)
 41 |     U.set_value(u)
 42 |     V.set_value(v)
 43 | 
 44 | #%%    
 45 | def get_SVD_gpu(Mat):
 46 |     n = Mat.shape[0]
 47 |     m = Mat.shape[1]
 48 |     
 49 | 
 50 | #%%    
 51 | class SVD_Lipschitz(Regularizer):
 52 |     """Regularizer for maximum singular value.
 53 |     # Arguments
 54 |     """
 55 | 
 56 |     def __init__(self, alpha=1.):
 57 |         self.alpha=alpha
 58 |         self.shape=None
 59 |         self.U=None
 60 |         self.V=None
 61 |         
 62 |     def svd(self, x):
 63 |         if self.shape is None:
 64 |             self.shape=x.shape.eval()
 65 |             self.U=K.variable(np.zeros((self.shape[0],)))
 66 |             self.V=K.variable(np.zeros((self.shape[1],)))
 67 |             
 68 |         get_SVD_support(x, self.U, self.V)
 69 |         return K.dot(self.U, K.dot(x, self.V))
 70 | 
 71 |     def __call__(self, x):
 72 |         regularization = 0.
 73 |      
 74 |         if x.ndim==2:
 75 |             regularization=self.svd(x)
 76 |         return self.alpha*regularization
 77 | 
 78 |     def get_config(self):
 79 |         return {}
 80 |         
 81 | def get_regularizer(reg_name, reg_value):
 82 |     if reg_name is None:
 83 |         return None
 84 |     assert (reg_name in ['l2', 'spectral']), ('unknown regularization method {}'.format(reg_name))
 85 |     
 86 |     reg_obj=None
 87 |     if reg_name=='spectral':
 88 |         reg_obj= SVD_Lipschitz(reg_value)
 89 |     if reg_name=='jacobian':
 90 |         raise NotImplementedError()
 91 |     if reg_name=='l2':
 92 |         reg_obj=keras.regularizers.l2(0.)
 93 |         
 94 |     return reg_obj
 95 |         
 96 |         
 97 | 
 98 | 
 99 |         
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/core_set.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Jan 24 08:57:40 2018
  4 | 
  5 | @author: mducoffe
  6 | 
  7 | heuristic for the k center problem
  8 | with distance metric d
  9 | """
 10 | 
 11 | # Mixed Integer Programming Solver
 12 | from ortools.linear_solver import pywraplp
 13 | import numpy as np
 14 | 
 15 | def create_variables(solver, N=100):
 16 |     # create u : N variables
 17 |     u = [solver.IntVar(0.0, 1.1, 'u_{}'.format(i)) for i in range(N)]
 18 |     # create w: NxN variables
 19 |     w = [[solver.IntVar(-0.1, 1.1, 'w_{}_{}'.format(i, j)) for i in range(N)] for j in range(N)]
 20 |     # create eta: NxN variables
 21 |     e=  [[solver.IntVar(0.0, 1.1, 'e_{}_{}'.format(i, j)) for i in range(N)] for j in range(N)]
 22 |     
 23 |     return u, w, e
 24 |     
 25 | def create_constraints(solver, u, w, e, dict_values, delta):
 26 |     b = dict_values['b']
 27 |     s_0 = dict_values['s_0']
 28 | 
 29 |     N = len(u)
 30 |     constraints=[]    
 31 |     
 32 |     constraints_1 = [[solver.Constraint(-solver.infinity(), 0) for i in range(N)] for j in range(N)]
 33 |     constraints_2 =[]
 34 |     constraints_3 = []
 35 |     constraints_4 = [solver.Constraint(1, 1) for i in range(N)] 
 36 |     constraint_5 = solver.Constraint(len(s_0)+b, len(s_0)+b)
 37 |     for i in range(N):
 38 |         constraint_5.SetCoefficient(u[i], 1.)
 39 |         
 40 |         if i in s_0:
 41 |             constraint_3_tmp = solver.Constraint(1, 1)
 42 |             constraint_3_tmp.SetCoefficient(u[i], 1.)
 43 |             constraints_3.append(constraint_3_tmp)
 44 |         
 45 |         for j in range(N):
 46 |             
 47 |             constraints_1[i][j].SetCoefficient(u[i], -1.)
 48 |             constraints_1[i][j].SetCoefficient(w[i][j], 1.)
 49 |             
 50 |             if delta[i][j]==0:
 51 |                 constraint_2_tmp = solver.Constraint(0, 0)
 52 |                 constraint_2_tmp.SetCoefficient(w[j][i], 1.)
 53 |                 constraint_2_tmp.SetCoefficient(e[j][i], -1.)
 54 |                 constraints_2.append(constraint_2_tmp)
 55 |             constraints_4[i].SetCoefficient(w[j][i], 1.)
 56 |            
 57 |             
 58 |     
 59 |     constraints+=constraints_1
 60 |     constraints+=constraints_2
 61 |     constraints+=constraints_3
 62 |     #constraints+=constraints_4
 63 |     #constraints.append(constraint_5)
 64 |     
 65 |     return constraints
 66 |     
 67 | def create_objective(solver, e):
 68 |     objective = solver.Objective()
 69 |     N = len(e)
 70 |     for i in range(N):
 71 |         for j in range(N):
 72 |             objective.SetCoefficient(e[i][j], 1.)
 73 |     objective.SetMinimization()
 74 |     
 75 | def feasible(dict_values, delta):
 76 |     solver = pywraplp.Solver('CoreSetIntegerProblem',
 77 |                            pywraplp.Solver.CBC_MIXED_INTEGER_PROGRAMMING)
 78 |     N = len(delta)
 79 |     u,w, e = create_variables(solver, N)
 80 |     constraints=create_constraints(solver, u, w, e, dict_values, delta)
 81 |     objective=create_objective(solver, e)
 82 |     
 83 |     # Invoke the solver and display the results
 84 |     result_status=solver.Solve()
 85 |     # the solution looks legit
 86 |     assert solver.VerifySolution(1e-7, True)
 87 |     
 88 |     u_values = [u[i].solution_value() for i in range(N)]
 89 |     e_values=[[e[j][i].solution_value() for i in range(N)] for j in range(N)]
 90 |     e_values = np.array(e_values)
 91 |     
 92 |     outliers = dict_values['outliers']
 93 |     return np.sum(e_values)<=outliers, u_values
 94 |     
 95 |     """
 96 |     for variable in u:
 97 |         print('%s=%d'%(variable.name(), variable.solution_value()))
 98 |         
 99 |     e_values=[[e[j][i].solution_value() for i in range(N)] for j in range(N)]
100 |     e_values = np.array(e_values)
101 |     #print(e_values)
102 |     
103 |     w_values=[[w[j][i].solution_value() for i in range(N)] for j in range(N)]
104 |     w_values = np.array(w_values)
105 |     #print(w_values)
106 |     """
107 |     
108 | #%%
109 | def build_delta(model, x, threshold):
110 |     
111 |     
112 |     N = len(x)
113 |     delta = np.zeros((N,N), dtype='uint8')
114 |     lb = np.inf
115 |     ub = threshold
116 |     for i in range(N):
117 |         for j in range(N):
118 |             distance = np.linalg.norm(x[i] - x[j])
119 |             if distance <= threshold:
120 |                 delta[i,j]=1
121 |                 delta[j,i]=1
122 |                 if distance >ub:
123 |                     ub = distance
124 |             else:
125 |                 if distance<lb:
126 |                     lb = distance
127 | 
128 |     return delta, lb, ub
129 | 
130 | def greedy_k_center(model, x, c_0, b):
131 |     centers = range(c_0)
132 |     N = len(x)
133 |     candidates = range(c_0, N)
134 |     
135 |     for query in range(b):
136 |         distances = [np.min([np.linalg.norm(x[i]-x[j]) for j in centers]) for i in candidates]
137 |         new_cluster_index = np.argmax(distances)
138 |         centers.append(len(centers)+new_cluster_index)
139 |         candidates.pop(new_cluster_index)
140 |     # recompute distance
141 |     distances = [np.min([np.linalg.norm(x[i]-x[j]) for j in centers]) for i in candidates]
142 |     return np.max(distances)
143 |     
144 | 
145 | def robust_k_center(model, labelled_data, unlabelled_data, b):
146 |     x_0 = model.predict(labelled_data[0])
147 |     x_1 = model.predict(unlabelled_data[0])
148 |     x = np.concatenate([x_0, x_1], axis=0)
149 |     opt = greedy_k_center(model, x, len(labelled_data[0]),b)
150 |     s_0=range(len(labelled_data[0]))
151 |     N = len(labelled_data[0]) + len(unlabelled_data[0])
152 |     outliers=1e-4*N
153 |     dict_values={'s_0':s_0, 'outliers': outliers, 'b':b}
154 |     ub=opt; lb=opt/2.
155 |     assignment=None
156 |     print(opt)
157 |     while True:
158 |         print(('iter', ub, lb))
159 |         threshold= (ub+lb)/2.
160 |         delta, lb_, ub_ = build_delta(model, x, threshold)
161 |         bool_test, centroids = feasible(dict_values, delta)
162 |         if bool_test:
163 |             ub=ub_
164 |             assignment=centroids
165 |         else:
166 |             lb=lb_
167 |         if lb >=ub or (ub - lb)<5*1e-3:
168 |             break
169 | 
170 |     n_start = len(labelled_data[0])
171 |     assignment = np.array(assignment[n_start:])
172 |     return np.where(assignment==1), np.where(assignment==0)
173 |  
174 |                                                                                                                                                                                                                                          
175 |  
176 | #%%
177 | def toy_example():
178 |     delta=[]
179 |     delta.append([1,1,1,0,0,0,0,0]) #0
180 |     delta.append([1,1,0,0,0,0,0,0]) #1
181 |     delta.append([1,0,1,0,0,0,0,0]) #2
182 |     delta.append([0,0,0,1,0,0,0,0]) #3
183 |     delta.append([0,0,0,0,1,1,1,0]) #4
184 |     delta.append([0,0,0,0,1,1,0,0]) #5
185 |     delta.append([0,0,0,0,1,0,1,0]) #6
186 |     delta.append([0,0,0,0,0,0,0,1]) #7
187 |     
188 |     delta = np.array(delta).astype(np.uint)
189 |     
190 |     s_0=[4]
191 |     outliers=1
192 |     b=1
193 |     dict_values={'s_0':s_0, 'outliers': outliers, 'b':b}
194 |     
195 |     bool_test, centroids = feasible(dict_values, delta)
196 |     print(bool_test)
197 |     
198 |     
199 |                     
200 |     
201 |   #%%  
202 | if __name__=="__main__":
203 |     # Instantiate a mixed-integer solver, naming it SolveIntegerProblem.
204 |     toy_example()
205 | 


--------------------------------------------------------------------------------
/adversarial_active_criterion.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Éditeur de Spyder
  4 | 
  5 | Ceci est un script temporaire.
  6 | 
  7 | author : mducoffe
  8 | 
  9 | Step 1 : deep fool as an active learning criterion
 10 | """
 11 | import numpy as np
 12 | import keras.backend as K
 13 | import scipy
 14 | from contextlib import closing
 15 | import pickle as pkl
 16 | import os
 17 | from keras.models import Model
 18 | 
 19 | 
 20 | class Adversarial_example(object):
 21 |     
 22 |     def __init__(self, model, n_channels=3, img_nrows=32, img_ncols=32, 
 23 |                  nb_class=10):
 24 |         """
 25 |         if K.image_dim_ordering() == 'th':
 26 |             img_shape = (1, n_channels, img_nrows, img_ncols)
 27 |             adversarial_image = K.placeholder((1, n_channels, img_nrows, img_ncols))
 28 |             adversarial_target = K.placeholder((1, nb_class))
 29 |             adv_noise = K.placeholder((1, n_channels, img_nrows, img_ncols))
 30 |         else:
 31 |             img_shape = (1,img_nrows, img_ncols, n_channels)
 32 |             adversarial_image = K.placeholder((1, img_nrows, img_ncols, n_channels))
 33 |             adversarial_target = K.placeholder((1, nb_class))
 34 |             adv_noise = K.placeholder((1, img_nrows, img_ncols, n_channels))
 35 |         """
 36 |         
 37 |         img_shape = (1,n_channels, img_nrows, img_ncols)
 38 |         self.img_shape=img_shape
 39 |         adversarial_image = K.placeholder((1, n_channels, img_nrows, img_ncols))
 40 |         adversarial_target = K.placeholder((1, nb_class))
 41 |         adv_noise = K.placeholder((1, n_channels, img_nrows, img_ncols))
 42 |             
 43 |         self.model = model
 44 |         
 45 |         def get_weights():
 46 |             layers = self.model.layers
 47 |             norm_weights = 1
 48 |             for layer in layers:
 49 |                 if hasattr(layer, 'kernel'):
 50 |                     w = np.linalg.norm(layer.kernel.get_value().flatten())
 51 |                     norm_weights*=w
 52 |             return norm_weights
 53 |         
 54 |         self.norm_weights = get_weights()
 55 |         
 56 |         
 57 |         """
 58 |         self.model.trainable=False
 59 |         for layer in self.model.layers:
 60 |             layer.trainable=False
 61 |         """
 62 |         self.adversarial_image= adversarial_image
 63 |         self.adversarial_target = adversarial_target
 64 |         self.adv_noise = adv_noise
 65 |         self.img_shape = img_shape
 66 |         self.nb_class = nb_class
 67 |         
 68 |         
 69 |         prediction = self.model.call(self.adversarial_image)
 70 |         self.predict_ = K.function([K.learning_phase(), self.adversarial_image], K.argmax(prediction, axis=1))
 71 | 
 72 |     def generate(data):
 73 |         raise NotImplementedError()
 74 |         
 75 |     def predict(self,image):
 76 |         return self.predict_([0, image])
 77 |     
 78 |     def prediction(self, image):
 79 |         return self.output_([0, image])
 80 |         
 81 |     def generate_sample(self, true_image):
 82 |         raise NotImplementedError()
 83 | 
 84 | class Adversarial_Szegedy(Adversarial_example):
 85 |     
 86 |     def __init__(self,**kwargs):
 87 |         super(Adversarial_Szegedy, self).__init__(**kwargs)
 88 |         
 89 |         loss_classif=K.mean(K.categorical_crossentropy(self.model(self.adv_noise), self.adversarial_target)) +\
 90 |                         0.001*K.sum(K.abs(self.adv_noise - self.adversarial_image))
 91 | 
 92 |         grad_adversarial = K.gradients(loss_classif, self.adv_noise)
 93 | 
 94 |         f_loss = K.function([K.learning_phase(), self.adv_noise, self.adversarial_image, self.adversarial_target], loss_classif)
 95 |         f_grad = K.function([K.learning_phase(), self.adv_noise, self.adversarial_image, self.adversarial_target], grad_adversarial)
 96 |         
 97 |         def eval_loss(adv_label, true_image):
 98 |             
 99 |             def function(noise):
100 |                 r = noise.astype('float32')
101 |                 r = r.reshape(self.img_shape)
102 |                 x = true_image.astype('float32')
103 |                 x = x.reshape(self.img_shape)
104 |                 y = np.array([0.]*(self.nb_class))
105 |                 # pick random choice
106 |                 y[adv_label] = 1.
107 |                 y = y[None,:]
108 |                 return f_loss([0, r, x, y]).astype('float64')
109 |             return function
110 |         
111 |         self.eval_loss = eval_loss
112 |         
113 |         def eval_grad(adv_label, true_image):
114 |             
115 |             def function(noise):
116 |                 r = noise.astype('float32')
117 |                 r = r.reshape(self.img_shape)
118 |                 x = true_image.astype('float32')
119 |                 x = x.reshape(self.img_shape)
120 |                 y = np.array([0.]*(self.nb_class))
121 |                 # pick random choice
122 |                 y[adv_label] = 1.
123 |                 y = y[None,:]
124 |                 return f_grad([0, r, x, y]).flatten().astype('float64')
125 |             return function
126 |         
127 |         self.eval_grad = eval_grad
128 |         prediction = self.model.call(self.adv_noise)
129 | 
130 |         
131 |     def generate_sample(self, x, adv_label):
132 | 
133 |         
134 |         image_adv = np.copy(x.flatten())
135 |         for i in range(100):
136 |             image_adv = image_adv.flatten()
137 |             eval_loss = self.eval_loss(adv_label, image_adv)
138 |             eval_grad = self.eval_grad(adv_label, image_adv)
139 |             noise = np.copy(x).flatten()
140 |             results = scipy.optimize.fmin_l_bfgs_b(eval_loss, noise,
141 |                                                    fprime=eval_grad, 
142 |                                                    maxfun=100)
143 |             noise = results[0].astype('float32')
144 |             image_adv = noise.reshape((self.img_shape[0], self.img_shape[1], self.img_shape[2], self.img_shape[3]))
145 |             label_pred = self.predict(image_adv)
146 |             if label_pred == adv_label:
147 |                 return image_adv, True
148 |         #print('FAIL')
149 |         return x, False
150 | 
151 | class Adversarial_DeepFool(Adversarial_example):
152 |     
153 |     def __init__(self,  **kwargs):
154 |         super(Adversarial_DeepFool, self).__init__(**kwargs)
155 |         
156 |         # HERE check for the softmax
157 |         
158 |         # the network is evaluated without the softmax
159 |         # you need to retrieve the last layer (Activation('softmax'))
160 |         last_dense = self.model.layers[-2].output
161 |         second_model = Model(self.model.input, last_dense)
162 |         loss_classif = K.mean(second_model.call(self.adversarial_image)[0, K.argmax(self.adversarial_target)])
163 |         grad_adversarial = K.gradients(loss_classif, self.adversarial_image)
164 |         self.f_loss = K.function([K.learning_phase(), self.adversarial_image, self.adversarial_target], loss_classif)
165 |         self.f_grad = K.function([K.learning_phase(), self.adversarial_image, self.adversarial_target], grad_adversarial)
166 |         
167 |         def eval_loss(x,y):
168 |             y_vec = np.zeros((1, self.nb_class))
169 |             y_vec[:,y] +=1
170 |             return self.f_loss([0., x, y_vec])
171 |         
172 |         def eval_grad(x,y):
173 |             y_vec = np.zeros((1, self.nb_class))
174 |             y_vec[:,y] +=1
175 |             return self.f_grad([0., x, y_vec]) 
176 |         
177 |         self.eval_loss = eval_loss
178 |         self.eval_grad = eval_grad
179 |         
180 |     
181 |     def generate(self, data, option='euclidian'):
182 |         #perturbations=[self.generate_sample(data[i:i+1]) for i in range(len(data))]
183 | 
184 |         perturbations = []
185 |         adv_attacks = []
186 |         for i in range(len(data)):
187 |             r_i, x_i = self.generate_sample(data[i:i+1], option=option)
188 |             perturbations.append(r_i)
189 |             adv_attacks.append(x_i[0])
190 |             
191 |         """
192 |         # compute also the second bar
193 |         uncertainty = []
194 |         for i in range(len(data)):
195 |             uncertainty.append(self.lower_bound_sample(data[i:i+1]))
196 |         """
197 |         
198 |         index_perturbation = np.argsort(perturbations)
199 |         tmp = np.array(adv_attacks)
200 |         return index_perturbation, tmp[index_perturbation]
201 |         """
202 |         uncertainty = np.array(uncertainty)/self.norm_weights
203 |         index_perturbation = np.argsort(perturbations)
204 |         
205 |         perturbations = perturbations[index_perturbation]
206 |         uncertainty = uncertainty[index_perturbation]
207 |         
208 |         N = len(data)
209 |         sorted_index = np.arange(N)
210 |         
211 |         sorted_index = self.priv_sort_interval(perturbations, uncertainty, sorted_index)
212 |         import pdb; pdb.set_trace()
213 |         index_perturbation = index_perturbation[sorted_index]
214 |         #return np.argsort(perturbations)
215 |         return index_perturbation
216 |         """
217 |     
218 |     def priv_sort_interval(self,array_a, array_b, sorted_index):
219 |         
220 |         # array_a : upper bound
221 |         # array_b : lower bound
222 |         N = len(array_a)
223 |         for i in range(N-1):
224 |             index_0 = sorted_index[i]
225 |             index_1 = sorted_index[i+1]
226 |             if array_a[index_0]<=array_b[index_1]:
227 |                 continue
228 |             # array_a[i] > array_b[i+1]
229 |             if array_b[index_1]>=array_b[index_0]:
230 |                 continue
231 |             
232 |             if array_b[index_0]> array_b[index_1]:
233 |                 proba = (array_b[index_0] - array_b[index_1])/(array_a[index_0] - array_b[index_1])
234 |                 if proba >=0.5:
235 |                     
236 |                     sorted_index[i]=index_1
237 |                     sorted_index[i+1]=index_0
238 |                     return self.priv_sort_interval(array_a, array_b, sorted_index)
239 |                 else:
240 |                     continue
241 |         return sorted_index
242 |             
243 |     
244 |     def lower_bound_sample(self, true_image):
245 |         true_label = self.predict(true_image)
246 |         f_x = self.model.predict(true_image).flatten()
247 |         
248 |         score = np.inf
249 |         index=-1
250 |         for i in range(self.nb_class):
251 |             if i==true_label:
252 |                 continue
253 |             vector = np.zeros((self.nb_class))
254 |             vector[true_label]=np.sqrt(2)
255 |             vector[i]=-np.sqrt(2)
256 |             score_i = np.dot(vector, f_x)
257 |             
258 |             if score_i <score:
259 |                 score=score_i
260 |                 index=i
261 | 
262 |         return score
263 |          
264 |     def generate_sample(self, true_image, option='euclidian'):
265 |         assert option in ['euclidian', 'inf'], ('unknown option %s',option)
266 |         if option == 'euclidian':
267 |             return self.generate_sample_euclidian(true_image)
268 |         else:
269 |             return self.generate_sample_infinity(true_image)
270 |     
271 |     def generate_sample_infinity(self, true_image):
272 | 
273 |         true_label = self.predict(true_image)
274 | 
275 |         x_i = np.copy(true_image); i=0
276 |         while self.predict(x_i) == true_label and i<10:
277 |             other_labels = range(self.nb_class)
278 |             other_labels.remove(true_label)
279 |             w_labels=[]; f_labels=[]
280 |             for k in other_labels:
281 |                 w_k = (self.eval_grad(x_i,k).flatten() - self.eval_grad(x_i, true_label).flatten())
282 |                 f_k = np.abs(self.eval_loss(x_i, k).flatten() - self.eval_loss(x_i, true_label).flatten())
283 |                 w_labels.append(w_k); f_labels.append(f_k)
284 |             result = [f_k/(sum(np.abs(w_k))) for f_k, w_k in zip(f_labels, w_labels)]
285 |             label_adv = np.argmin(result)
286 |             
287 |             r_i = (f_labels[label_adv]/(np.sum(np.abs(w_labels[label_adv]))) )*np.sign(w_labels[label_adv])
288 |             #print(self.predict(x_i), f_labels[label_adv], np.mean(x_i), np.mean(r_i))
289 |             if np.max(np.isnan(r_i))==True:
290 |                 return 0, true_image
291 |             x_i += r_i.reshape(true_image.shape)
292 |             #x_i = np.clip(x_i, self.mean - self.std, self.mean+self.std)
293 |             i+=1
294 |             
295 |             
296 |         adv_image = x_i
297 |         adv_label = self.predict(adv_image)
298 |         if adv_label == true_label:
299 |             return np.inf, x_i
300 |         else:
301 |             perturbation = (x_i - true_image).flatten()
302 |             #return np.linalg.norm(perturbation)
303 |             return np.max(np.abs(perturbation)), x_i
304 |         
305 |     
306 |     
307 |     def generate_sample_euclidian(self, true_image):
308 |         true_label = self.predict(true_image)
309 | 
310 |         x_i = np.copy(true_image); i=0
311 |         while self.predict(x_i) == true_label and i<10:
312 |             other_labels = range(self.nb_class)
313 |             other_labels.remove(true_label)
314 |             w_labels=[]; f_labels=[]
315 |             for k in other_labels:
316 |                 w_k = (self.eval_grad(x_i,k).flatten() - self.eval_grad(x_i, true_label).flatten())
317 |                 f_k = np.abs(self.eval_loss(x_i, k).flatten() - self.eval_loss(x_i, true_label).flatten())
318 |                 w_labels.append(w_k); f_labels.append(f_k)
319 |             result = [f_k/(np.linalg.norm(w_k)) for f_k, w_k in zip(f_labels, w_labels)]
320 |             label_adv = np.argmin(result)
321 |             
322 |             r_i = (f_labels[label_adv]/(np.linalg.norm(w_labels[label_adv])**2) )*w_labels[label_adv]
323 |             #print(self.predict(x_i), f_labels[label_adv], np.mean(x_i), np.mean(r_i))
324 |             if np.max(np.isnan(r_i))==True:
325 |                 return np.inf, true_image
326 |             x_i += r_i.reshape(true_image.shape)
327 |             #x_i = np.clip(x_i, self.mean - self.std, self.mean+self.std)
328 |             i+=1
329 |             
330 |             
331 |         adv_image = x_i
332 |         adv_label = self.predict(adv_image)
333 |         if adv_label == true_label:
334 |             return np.inf, x_i
335 |         else:
336 |             perturbation = (x_i - true_image).flatten()
337 |             return np.linalg.norm(perturbation), x_i
338 | 
339 | 


--------------------------------------------------------------------------------
/active_framework.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Nov  9 18:03:13 2017
  4 | 
  5 | @author: mducoffe
  6 | """
  7 | 
  8 | import sys
  9 | 
 10 | import numpy as np
 11 | import sklearn.metrics as metrics
 12 | import argparse
 13 | import keras
 14 | 
 15 | from keras import backend as K
 16 | #from snapshot import SnapshotCallbackBuilder
 17 | import csv
 18 | from contextlib import closing
 19 | import os
 20 | from build_model import build_model_func
 21 | from build_data import build_data_func, getSize
 22 | from adversarial_active_criterion import Adversarial_DeepFool
 23 | from bayesian_cnn import bald
 24 | import keras.utils.np_utils as kutils
 25 | import pickle
 26 | import gc
 27 | from keras.preprocessing.image import ImageDataGenerator
 28 | 
 29 | #%%
 30 | import resource
 31 | from keras.callbacks import Callback
 32 | class MemoryCallback(Callback):
 33 |     def on_epoch_end(self, epoch, log={}):
 34 |         print(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
 35 | 
 36 | 
 37 | 
 38 | #%%
 39 | def active_training(labelled_data, network_name, img_size,
 40 |                     batch_size=64, epochs=100, repeat=5):
 41 |     
 42 |     x_L, y_L = labelled_data 
 43 |     
 44 |     # split into train and validation
 45 |     
 46 |     N = len(y_L)
 47 |     n_train = (int) (N*0.8)
 48 | 
 49 |     batch_train = min(batch_size, len(x_L))
 50 |     steps_per_epoch = int(n_train/batch_train)+1
 51 |     best_model = None
 52 |     best_loss = np.inf
 53 |     for i in range(repeat):
 54 |         # shuffle data and split train and val
 55 |         index = np.random.permutation(N)
 56 |         x_train , y_train = (x_L[index[:n_train]], y_L[index[:n_train]])
 57 |         x_val , y_val = (x_L[index[n_train:]], y_L[index[n_train:]])
 58 |         
 59 |         generator_train = ImageDataGenerator()
 60 |         generator_train.fit(x_train, seed=0, augment=True)
 61 |         tmp = generator_train.flow(x_train, y_train, batch_size=batch_size)
 62 |         model = build_model_func(network_name, img_size)
 63 |         earlyStopping=keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='auto')
 64 |         hist = model.fit_generator(tmp, steps_per_epoch, epochs=epochs,
 65 |                                    verbose=0,
 66 |                                    callbacks=[earlyStopping],
 67 |                                    validation_data=(x_val, y_val))
 68 |                                    
 69 |         loss, acc = model.evaluate(x_val, y_val, verbose=0)
 70 |         if loss < best_loss:
 71 |             best_loss = loss;
 72 |             best_model = model
 73 | 
 74 |     del model
 75 |     del hist
 76 |     del loss
 77 |     del acc
 78 |     i=gc.collect()
 79 |     while(i!=0):
 80 |         i=gc.collect()
 81 |     return best_model
 82 | 
 83 | #%%
 84 | def evaluate(model, percentage, test_data, nb_exp, repo, filename):
 85 |     x_test, y_test = test_data
 86 |     loss, acc = model.evaluate(x_test, y_test, verbose=0)
 87 |     
 88 |     with closing(open(os.path.join(repo, filename), 'a')) as csvfile:
 89 |         # TO DO
 90 |         writer = csv.writer(csvfile, delimiter=';',
 91 |                             quotechar='|', quoting=csv.QUOTE_MINIMAL)
 92 |         writer.writerow([str(nb_exp), str(percentage), str(acc)])
 93 |          
 94 |     #return query, unlabelled_pool
 95 | 
 96 | #%%
 97 | def get_weights(model):
 98 |     layers = model.layers
 99 |     weights=[]
100 |     for layer in layers:
101 |         if layer.trainable_weights:
102 |             weights_layer = layer.trainable_weights
103 |             weights+=[elem.get_value() for elem in weights_layer]
104 |     return weights
105 |     
106 | def load_weights(model, weights):
107 |     layers = model.layers
108 |     index=0
109 |     for layer in layers:
110 |         if layer.trainable_weights:
111 |             weights_layer = layer.trainable_weights
112 |             for elem in weights_layer:
113 |                 elem.set_value(weights[index])
114 |                 index+=1
115 |     return model
116 |                 
117 |                 
118 | def loading(repo, filename, num_sample, network_name, data_name):
119 |     # check if file exists
120 |     img_size = getSize(data_name) # TO DO
121 |     model=build_model_func(network_name, img_size)
122 |     filename = filename.split('.pkl')
123 |     f_weights = filename[0]+'_weights.pkl'
124 |     f_l_data = filename[0]+'_labelled.pkl'
125 |     f_u_data = filename[0]+'_unlabelled.pkl'
126 |     f_t_data = filename[0]+'_test.pkl'
127 |     if (os.path.isfile(os.path.join(repo, f_weights)) and \
128 |         os.path.isfile(os.path.join(repo, f_l_data)) and \
129 |         os.path.isfile(os.path.join(repo, f_u_data)) and \
130 |         os.path.isfile(os.path.join(repo, f_t_data))):
131 |         
132 |         
133 |         
134 |         with closing(open(os.path.join(repo, f_weights), 'rb')) as f:
135 |             weights = pickle.load(f)
136 |             model = load_weights(model, weights)
137 |             
138 |         with closing(open(os.path.join(repo, f_l_data), 'rb')) as f:
139 |             labelled_data = pickle.load(f)   
140 |             
141 |         with closing(open(os.path.join(repo, f_u_data), 'rb')) as f:
142 |             unlabelled_data = pickle.load(f) 
143 |             
144 |         with closing(open(os.path.join(repo, f_t_data), 'rb')) as f:
145 |             test_data = pickle.load(f)
146 |     else:
147 |         # TO DO !!!
148 |         print('no previous savings, starting from scratch')
149 |         labelled_data, unlabelled_data, test_data = build_data_func(data_name, num_sample=num_sample)
150 |     
151 |     return model, labelled_data, unlabelled_data, test_data
152 | 
153 | def saving(model, labelled_data, unlabelled_data, test_data, repo, filename):
154 |     weights = get_weights(model)
155 |     #data = (weights, labelled_data, unlabelled_data, test_data)
156 |     
157 |     filename = filename.split('.pkl')
158 |     f_weights = filename[0]+'_weights.pkl'
159 |     f_l_data = filename[0]+'_labelled.pkl'
160 |     f_u_data = filename[0]+'_unlabelled.pkl'
161 |     f_t_data = filename[0]+'_test.pkl'
162 |     
163 |     with closing(open(os.path.join(repo, f_weights), 'wb')) as f:
164 |         pickle.dump(weights, f)
165 |     with closing(open(os.path.join(repo, f_l_data), 'wb')) as f:
166 |         pickle.dump(labelled_data, f)
167 |     with closing(open(os.path.join(repo, f_u_data), 'wb')) as f:
168 |         pickle.dump(unlabelled_data, f)
169 |     with closing(open(os.path.join(repo, f_t_data), 'wb')) as f:
170 |         pickle.dump(test_data, f)
171 | 
172 | #%%
173 | 
174 | def active_selection(model, unlabelled_data, nb_data, active_method, repo, tmp_adv):
175 |     assert active_method in ['uncertainty', 'egl', 'random', 'aaq', 'saaq', 'ceal', 'bayesian'], ('Unknown active criterion %s', active_method)
176 |     if active_method=='uncertainty':
177 |         query, unlabelled_data = uncertainty_selection(model, unlabelled_data, nb_data)
178 |     if active_method=='random':
179 |         query, unlabelled_data = random_selection(unlabelled_data, nb_data)
180 |     if active_method=='egl':
181 |         query, unlabelled_data = egl_selection(model, unlabelled_data, nb_data)
182 |     if active_method=='aaq':
183 |         tmp_adv=None
184 |         query, unlabelled_data = adversarial_selection(model, unlabelled_data, nb_data, False, repo, tmp_adv)
185 |     if active_method=='saaq':
186 |         tmp_adv=None
187 |         query, unlabelled_data = adversarial_selection(model, unlabelled_data, nb_data, True, repo, tmp_adv)    
188 |     if active_method=='ceal':
189 |         query, unlabelled_data = ceal_selection(model, unlabelled_data, nb_data)
190 |     if active_method=='bayesian':
191 |         query, unlabelled_data = bald_selection(model, unlabelled_data, nb_data)
192 |         
193 |     return query, unlabelled_data
194 |     
195 | def random_selection(unlabelled_data, nb_data):
196 |     index = np.random.permutation(len(unlabelled_data[0]))
197 |     index_query = index[:nb_data]
198 |     index_unlabelled = index[nb_data:]
199 |     
200 |     return (unlabelled_data[0][index_query], unlabelled_data[1][index_query]), \
201 |            (unlabelled_data[0][index_unlabelled], unlabelled_data[1][index_unlabelled])
202 |            
203 | def bald_selection(model, unlabelled_data, nb_data):
204 |     n = min(100, len(unlabelled_data[0]))
205 |     subset_index = np.random.permutation(len(unlabelled_data[0]))
206 |     subset = unlabelled_data[0][subset_index[:n]]
207 |     index = bald(subset, model, 10)
208 |         
209 |     index_query = subset_index[index[:nb_data]]
210 |     index_unlabelled = subset_index[index[nb_data:]]
211 | 
212 |     new_data = unlabelled_data[0][index_query]
213 |     new_labels = unlabelled_data[1][index_query]
214 | 
215 |     return (new_data, new_labels), \
216 |            (np.concatenate([unlabelled_data[0][index_unlabelled], unlabelled_data[0][subset_index[n:]]], axis=0), np.concatenate([unlabelled_data[1][index_unlabelled], unlabelled_data[1][subset_index[n:]]], axis=0))
217 | 
218 | # add CEAL
219 | def uncertainty_selection(model, unlabelled_data, nb_data):
220 | 
221 |     preds = model.predict(unlabelled_data[0])
222 |     log_pred = -np.log(preds)
223 |     entropy = np.sum(preds*log_pred, axis=1)
224 |     # do entropy
225 |     index = np.argsort(entropy)[::-1]
226 |     
227 |     index_query = index[:nb_data]
228 |     index_unlabelled = index[nb_data:]
229 | 
230 |     new_data = unlabelled_data[0][index_query]
231 |     new_labels = unlabelled_data[1][index_query]
232 |     """
233 |     else:
234 |         new_data = np.concatenate([labelled_data[0], unlabelled_data[0][index_query]], axis=0)
235 |         new_labels = np.concatenate([labelled_data[1], unlabelled_data[1][index_query]], axis=0)
236 |     """
237 |     return (new_data, new_labels), \
238 |            (unlabelled_data[0][index_unlabelled], unlabelled_data[1][index_unlabelled])
239 |            
240 | 
241 | def pseudo_label(model, unlabelled_data, nb_data, threshold):
242 |     # do not consider the real labels
243 |     n = min(300, len(unlabelled_data[0]))
244 |     subset_index = np.random.permutation(len(unlabelled_data[0]))
245 |     subset = unlabelled_data[0][subset_index[:n]]
246 |     
247 |     preds = model.predict(subset)
248 |     log_pred = -np.log(preds)
249 |     entropy = np.sum(preds*log_pred, axis=1)
250 |     # do entropy
251 |     index = np.argsort(entropy)
252 |     
253 |     delta_index = np.argmin( (entropy[index] < threshold))
254 |     if delta_index==0:
255 |         if entropy[index][0]<threshold:
256 |             labelled_data=(unlabelled_data[0][subset_index[:n]], unlabelled_data[1][subset_index[:n]])
257 |             unlabelled_data=(unlabelled_data[0][subset_index[n:]], unlabelled_data[1][subset_index[n:]])
258 |             return labelled_data, unlabelled_data
259 |         return ([],[]), unlabelled_data
260 |         #return unlabelled_data, ([],[])
261 |         #return ([], []), \
262 |         #       unlabelled_data
263 |     else:
264 |         print('pseudo labelling...')
265 |         delta_index-=1
266 |         index_query = index[:delta_index]
267 |         labels = kutils.to_categorical(np.argmax(preds[index_query], axis=1), num_classes=10)
268 |         index_unlabelled = index[delta_index:]
269 |         
270 |         labelled_data=(unlabelled_data[0][index_query], labels)
271 |         unlabelled_data=(np.concatenate([unlabelled_data[0][subset_index[n:]], unlabelled_data[0][index_unlabelled]],axis=0),\
272 |                          np.concatenate([unlabelled_data[1][subset_index[n:]], unlabelled_data[1][index_unlabelled]],axis=0))
273 |         
274 |         return labelled_data, unlabelled_data
275 |         #return (unlabelled_data[0][index_query], labels), \
276 |         #       (unlabelled_data[0][index_unlabelled], unlabelled_data[1][index_unlabelled])
277 |                
278 |                
279 | def ceal_selection(model, unlabelled_data, nb_data):
280 |     # consider the lowest entropy for pseudo labelling
281 |     N_data = model.get_output_shape_at(0)[-1]
282 |     threshold=0
283 |     if N_data==10:
284 |         threshold=0.05
285 |     if N_data==4:
286 |         threshold=0.08
287 |     if N_data==2:
288 |         threshold=0.19
289 |     
290 |     threshold=0.002
291 |     labelled_data, unlabelled_data = pseudo_label(model, unlabelled_data, nb_data, threshold)
292 | 
293 |     preds = model.predict(unlabelled_data[0])
294 |     log_pred = -np.log(preds)
295 |     entropy = np.sum(preds*log_pred, axis=1)
296 | 
297 |     # do entropy
298 |     index = np.argsort(entropy)[::-1]
299 |     
300 |     index_query = index[:nb_data]
301 |     index_unlabelled = index[nb_data:]
302 | 
303 |     new_data = unlabelled_data[0][index_query]
304 |     new_labels = unlabelled_data[1][index_query]
305 |     """
306 |     else:
307 |         new_data = np.concatenate([labelled_data[0], unlabelled_data[0][index_query]], axis=0)
308 |         new_labels = np.concatenate([labelled_data[1], unlabelled_data[1][index_query]], axis=0)
309 |     """
310 |     return (new_data, new_labels), \
311 |            (unlabelled_data[0][index_unlabelled], unlabelled_data[1][index_unlabelled])
312 | 
313 | def egl_selection(model, unlabelled_data, nb_data):
314 |     
315 |     num_classes = model.get_output_shape_at(0)[-1]
316 |     def get_gradient(model):
317 |         input_shape = model.get_input_shape_at(0)
318 |         output_shape = model.get_output_shape_at(0)
319 |         x = K.placeholder(input_shape)
320 |         y = K.placeholder(output_shape)
321 |         y_pred = model.call(x)
322 |         loss = K.mean(keras.losses.categorical_crossentropy(y, y_pred))
323 |         weights = [tensor for tensor in model.trainable_weights]
324 |         optimizer = model.optimizer
325 |         gradient = optimizer.get_gradients(loss, weights)
326 |     
327 |         return K.function([K.learning_phase(), x, y], gradient)
328 | 
329 |     f_grad = get_gradient(model)
330 |     
331 |     def compute_egl(image):    
332 |         # test
333 |         grad = []
334 |         
335 |         for k in range(num_classes):
336 |             y_label = np.zeros((1, num_classes))
337 |             y_label[0,k] = 1
338 |             grad_k = f_grad([0, image, y_label])
339 |             grad_k = np.concatenate([np.array(grad_w).flatten() for grad_w in grad_k])
340 |             grad.append(grad_k)
341 |             
342 |         grad = np.mean(grad, axis=0)
343 |         return np.linalg.norm(grad)
344 | 
345 |     n = min(300, len(unlabelled_data[0]))
346 |     subset_index = np.random.permutation(len(unlabelled_data[0]))
347 |     subset = unlabelled_data[0][subset_index[:n]]
348 |     scores = [compute_egl(subset[i:i+1]) for i in range(len(subset))]
349 |     index = np.argsort(scores)[::-1]
350 |     index_query = subset_index[index[:nb_data]]
351 |     index_unlabelled = np.concatenate( (subset_index[index[nb_data:]], subset_index[n:]))
352 | 
353 |     return (unlabelled_data[0][index_query], unlabelled_data[1][index_query]), \
354 |            (unlabelled_data[0][index_unlabelled], unlabelled_data[1][index_unlabelled])
355 |            
356 | def adversarial_selection(model, unlabelled_data, nb_data, add_adv=False, repo='.', filename = None):
357 |     img_size = model.get_input_shape_at(0)
358 |     n_channels, img_nrows, img_ncols = img_size[1:]
359 |     nb_classes = model.get_output_shape_at(0)[-1]
360 |     active = Adversarial_DeepFool(model=model, n_channels=n_channels,
361 |                                   img_nrows=img_nrows, img_ncols=img_ncols, nb_class=nb_classes)
362 |     # select a subset of size 10*nb_data
363 |     n = min(300, len(unlabelled_data[0]))
364 |     subset_index = np.random.permutation(len(unlabelled_data[0]))
365 |     subset = unlabelled_data[0][subset_index[:n]]
366 |     # here consider or not the adv examples for pseudo labelling
367 |     # pick option
368 |     adversarial, attacks = active.generate(subset)
369 |         
370 |     if not(filename is None):
371 |         # save the first adv
372 |         img = unlabelled_data[0][subset_index[adversarial[0]]]
373 |         adv_img = attacks[0]
374 |         #save_adv(repo, filename, img, adv_img)
375 |     index_query = subset_index[adversarial[:nb_data]]
376 |     index_unlabelled = np.concatenate( (subset_index[adversarial[nb_data:]], subset_index[n:]))
377 |     
378 |     if add_adv:
379 |         new_data = np.concatenate([unlabelled_data[0][index_query], attacks[:nb_data]], axis=0)
380 |         new_labels = np.concatenate([unlabelled_data[1][index_query], unlabelled_data[1][index_query]], axis=0)
381 |         
382 |         return (new_data, new_labels), \
383 |               (unlabelled_data[0][index_unlabelled], unlabelled_data[1][index_unlabelled])
384 |     else:
385 |         return (unlabelled_data[0][index_query], unlabelled_data[1][index_query]), \
386 |                (unlabelled_data[0][index_unlabelled], unlabelled_data[1][index_unlabelled])
387 |                
388 | def save_adv(repo, filename, img, adv_img):
389 |     i = 0
390 |     assert os.path.isdir(repo), ('unknown repository %s', repo)
391 |     while os.path.isfile(os.path.join(repo, filename+'_'+str(i)+'.pkl')):
392 |         i+=1
393 |         
394 |     filename = os.path.join(repo, filename+'_'+str(i)+'.pkl')
395 |     
396 |     with closing(open(filename, 'wb')) as f:
397 |         pickle.dump([img, adv_img], f, protocol =pickle.HIGHEST_PROTOCOL)
398 | 
399 | #%%
400 | def active_learning(num_sample, data_name, network_name, active_name,
401 |                     nb_exp=0, nb_query=10, repo='test', filename='test.csv'):
402 |     
403 |     # create a model and do a reinit function
404 |     tmp_filename = 'tmp_{}_{}_{}.pkl'.format(data_name, network_name, active_name)
405 |     tmp_adv = None
406 |     if active_name in ['aaq', 'saaq']:
407 |         tmp_adv = 'adv_{}_{}_{}'.format(data_name, network_name, active_name)
408 |     filename = filename+'_{}_{}_{}'.format(data_name, network_name, active_name)
409 |     img_size = getSize(data_name)
410 |     # TO DO filename
411 |     
412 |     model, labelled_data, unlabelled_data, test_data = loading(repo, tmp_filename, num_sample, network_name, data_name)
413 |     batch_size = 32
414 |     percentage_data = len(labelled_data[0])
415 |     N_pool = len(labelled_data[0]) + len(unlabelled_data[0])
416 |     print('START')
417 |     # load data
418 |     i=0
419 |     while( percentage_data<=N_pool):
420 | 
421 |         i+=1
422 |         model = active_training(labelled_data, network_name, img_size, batch_size=batch_size)
423 |         
424 |         query, unlabelled_data = active_selection(model, unlabelled_data, nb_query, active_name, repo, tmp_adv) # TO DO
425 |         print('SUCCEED')
426 |         evaluate(model, percentage_data, test_data, nb_exp, repo, filename)
427 |         # SAVE
428 |         saving(model, labelled_data, unlabelled_data, test_data, repo, tmp_filename)
429 |         #print('SUCEED')
430 |         #print('step B')
431 |         i=gc.collect()
432 |         while(i!=0):
433 |             i = gc.collect()
434 | 
435 |         # add query to the labelled set
436 |         labelled_data_0 = np.concatenate((labelled_data[0], query[0]), axis=0)
437 |         labelled_data_1 = np.concatenate((labelled_data[1], query[1]), axis=0)
438 |         labelled_data = (labelled_data_0, labelled_data_1)
439 |         #update percentage_data
440 |         percentage_data +=nb_query
441 |         
442 | #%%
443 | if __name__=="__main__":
444 |     
445 |     parser = argparse.ArgumentParser(description='Active Learning')
446 | 
447 |     parser.add_argument('--id_experiment', type=int, default=4, help='id number of experiment')
448 |     parser.add_argument('--repo', type=str, default='.', help='repository for log')
449 |     parser.add_argument('--filename', type=str, default='test_0', help='csv filename')
450 |     parser.add_argument('--num_sample', type=int, default=10, help='size of the initial training set')
451 |     parser.add_argument('--data_name', type=str, default='bag_shoes', help='dataset')
452 |     parser.add_argument('--network_name', type=str, default='LeNet5', help='network')
453 |     parser.add_argument('--active', type=str, default='ceal', help='active techniques')
454 |     args = parser.parse_args()
455 |                                                                                                              
456 | 
457 | 
458 | 
459 |                                                                                                                 
460 | 
461 |     nb_exp = args.id_experiment
462 |     repo=args.repo
463 |     filename=args.filename
464 |     if filename.split('.')[-1]=='csv':
465 |         filename=filename.split('.csv')[0]
466 |         
467 |     data_name = args.data_name
468 |     network_name = args.network_name
469 |     active_option = args.active
470 |     num_sample = args.num_sample
471 |     
472 |     active_learning(num_sample=num_sample,
473 |                     data_name=data_name,
474 |                     network_name=network_name,
475 |                     active_name=active_option,
476 |                     nb_exp=nb_exp,
477 |                     repo=repo,
478 |                     filename=filename)
479 | 
480 | 
481 | 


--------------------------------------------------------------------------------