├── README.md
├── feature_selection_ga.py
├── fitness_function.py
├── heuristics.py
├── mbaco.py
└── plotaco.py


/README.md:
--------------------------------------------------------------------------------
1 | # modifiedACO
2 | A feature selection method based on modified binary ant colony optimization algorithm
3 | 


--------------------------------------------------------------------------------
/feature_selection_ga.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | from deap import base, creator
  4 | from deap import tools
  5 | import fitness_function as ff
  6 | 
  7 | 
  8 | class FeatureSelectionGA:
  9 | 
 10 |     def __init__(self,model,x,y,cv_split=5,verbose=0):
 11 | 
 12 |         self.model =  model
 13 |         self.n_features = x.shape[1]
 14 |         self.toolbox = None
 15 |         self.creator = self._create()
 16 |         self.cv_split = cv_split
 17 |         self.x = x
 18 |         self.y = y
 19 |         self.verbose = verbose
 20 |         if self.verbose==1:
 21 |             print("Model {} will select best features among {} features using cv_split :{}.".format(model,x.shape[1],cv_split))
 22 |             print("Shape of train_x: {} and target: {}".format(x.shape,y.shape))
 23 |         self.final_fitness = []
 24 |         self.fitness_in_generation = {}
 25 |         self.best_ind = None
 26 |     
 27 |     def evaluate(self,individual):
 28 |         fit_obj = ff.FitenessFunction(self.cv_split)
 29 |         np_ind = np.asarray(individual)
 30 |         if np.sum(np_ind) == 0:
 31 |             fitness = 0.0
 32 |         else:
 33 |             feature_idx = np.where(np_ind==1)[0]
 34 |             fitness = fit_obj.calculate_fitness(self.model,self.x[:,feature_idx],self.y)
 35 |         
 36 |         if self.verbose == 1:
 37 |             print("Individual: {}  Fitness_score: {} ".format(individual,fitness))
 38 |             
 39 |         return fitness,
 40 |     
 41 |     
 42 |     def _create(self):
 43 |         creator.create("FeatureSelect", base.Fitness, weights=(1.0,))
 44 |         creator.create("Individual", list, fitness=creator.FeatureSelect)
 45 |         return creator
 46 | 
 47 |         
 48 |     def register_toolbox(self,toolbox):
 49 |         toolbox.register("evaluate", self.evaluate)
 50 |         self.toolbox = toolbox
 51 |      
 52 |     
 53 |     def _init_toolbox(self):
 54 |         toolbox = base.Toolbox()
 55 |         toolbox.register("attr_bool", random.randint, 0, 1)
 56 |         # Structure initializers
 57 |         toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, self.n_features)
 58 |         toolbox.register("population", tools.initRepeat, list, toolbox.individual)
 59 |         return toolbox
 60 |         
 61 |         
 62 |     def _default_toolbox(self):
 63 |         toolbox = self._init_toolbox()
 64 |         toolbox.register("mate", tools.cxTwoPoint)
 65 |         toolbox.register("mutate", tools.mutFlipBit, indpb=0.1)
 66 |         toolbox.register("select", tools.selTournament, tournsize=3)
 67 |         toolbox.register("evaluate", self.evaluate)
 68 |         return toolbox
 69 |     
 70 |     def get_final_scores(self,pop,fits):
 71 |         self.final_fitness = list(zip(pop,fits))
 72 |         
 73 |     
 74 |         
 75 |     def generate(self,n_pop,cxpb = 0.9,mutxpb = 0.01,ngen=5,set_toolbox = False):
 76 |         
 77 |         if self.verbose==1:
 78 |             print("Population: {}, crossover_probablity: {}, mutation_probablity: {}, total generations: {}".format(n_pop,cxpb,mutxpb,ngen))
 79 | 
 80 |         self.toolbox = self._default_toolbox()
 81 |         pop = self.toolbox.population(n_pop)
 82 |         CXPB, MUTPB, NGEN = cxpb,mutxpb,ngen
 83 | 
 84 |         # Evaluate the entire population
 85 |         print("EVOLVING.......")
 86 |         fitnesses = list(map(self.toolbox.evaluate, pop))
 87 |         
 88 |         for ind, fit in zip(pop, fitnesses):
 89 |             ind.fitness.values = fit
 90 | 
 91 |         for g in range(NGEN):
 92 |             print("-- GENERATION {} --".format(g+1))
 93 |             offspring = self.toolbox.select(pop, len(pop))
 94 |             self.fitness_in_generation[str(g+1)] = max([ind.fitness.values[0] for ind in pop])
 95 |             # Clone the selected individuals
 96 |             offspring = list(map(self.toolbox.clone, offspring))
 97 | 
 98 |             # Apply crossover and mutation on the offspring
 99 |             for child1, child2 in zip(offspring[::2], offspring[1::2]):
100 |                 if random.random() < CXPB:
101 |                     self.toolbox.mate(child1, child2)
102 |                     del child1.fitness.values
103 |                     del child2.fitness.values
104 | 
105 |             for mutant in offspring:
106 |                 if random.random() < MUTPB:
107 |                     self.toolbox.mutate(mutant)
108 |                     del mutant.fitness.values
109 | 
110 |             # Evaluate the individuals with an invalid fitness
111 |             weak_ind = [ind for ind in offspring if not ind.fitness.valid]
112 |             fitnesses = list(map(self.toolbox.evaluate, weak_ind))
113 |             for ind, fit in zip(weak_ind, fitnesses):
114 |                 ind.fitness.values = fit
115 |             print("Evaluated %i individuals" % len(weak_ind))
116 | 
117 |             # The population is entirely replaced by the offspring
118 |             pop[:] = offspring
119 |             
120 |                     # Gather all the fitnesses in one list and print the stats
121 |         fits = [ind.fitness.values[0] for ind in pop]
122 |         
123 |         length = len(pop)
124 |         mean = sum(fits) / length
125 |         sum2 = sum(x*x for x in fits)
126 |         std = abs(sum2 / length - mean**2)**0.5
127 |         if self.verbose==1:
128 |             print("  Min %s" % min(fits))
129 |             print("  Max %s" % max(fits))
130 |             print("  Avg %s" % mean)
131 |             print("  Std %s" % std)
132 | 
133 | 
134 |         self.best_ind = tools.selBest(pop, 1)[0]
135 |         print("Best individual is %s, %s" % (self.best_ind, self.best_ind.fitness.values))
136 |         self.get_final_scores(pop,fits)
137 |         
138 |         return pop,self.best_ind,self.best_ind.fitness.values
139 | 
140 | 
141 | # model = sklearn.linear_model.LogisticRegression(solver='liblinear')
142 | # model = svm.LinearSVC()
143 | # iris = load_iris()
144 | # x_train = iris.data
145 | # y_train = iris.target
146 | # with warnings.catch_warnings():
147 | #     warnings.simplefilter('ignore')
148 | #     fsga = FeatureSelectionGA(model,x_train,y_train)
149 | #     pop = fsga.generate(100)
150 | #     print(pop)
151 |     
152 |     
153 | 


--------------------------------------------------------------------------------
/fitness_function.py:
--------------------------------------------------------------------------------
 1 | from sklearn.model_selection import StratifiedKFold
 2 | import numpy as np
 3 | from sklearn.metrics import accuracy_score
 4 | 
 5 | class FitenessFunction:
 6 |     
 7 |     def __init__(self,n_splits = 5,*args,**kwargs):
 8 |         """
 9 |             Parameters
10 |             -----------
11 |             n_splits :int, 
12 |                 Number of splits for cv
13 |             
14 |             verbose: 0 or 1
15 |         """
16 |         self.n_splits = n_splits
17 |     
18 | 
19 |     def calculate_fitness(self,model,x,y):
20 |         cv_set = np.repeat(-1.,x.shape[0])
21 |         skf = StratifiedKFold(n_splits = self.n_splits)
22 |         for train_index,test_index in skf.split(x,y):
23 |             x_train,x_test = x[train_index],x[test_index]
24 |             y_train,y_test = y[train_index],y[test_index]
25 |             if x_train.shape[0] != y_train.shape[0]:
26 |                 raise Exception()
27 |             model.fit(x_train,y_train)
28 |             predicted_y = model.predict(x_test)
29 |             cv_set[test_index] = predicted_y
30 |         return accuracy_score(y,cv_set)
31 | 


--------------------------------------------------------------------------------
/heuristics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sklearn
 3 | 
 4 | def hueristic_value_ascore(feature_num, dataset, targets):
 5 |     roads_E = np.zeros(feature_num*feature_num*4, dtype="float64").reshape(4, feature_num, feature_num)
 6 | 
 7 | #     arr = np.corrcoef(dataset)
 8 | #     R = abs(arr)
 9 | 
10 |     ## F-score :
11 |     classes = np.unique(targets)
12 |     class_num = len(classes)
13 |     total_mean_a = dataset.mean(0)
14 |     nominator = 0
15 |     denominator = 0
16 | 
17 | #     nominator = np.zeros(feature_num, dtype="int64")
18 | #     denominator = np.zeros(feature_num, dtype="int64")
19 | 
20 |     sample_num_of_this_tag = np.zeros(class_num, dtype="int64")
21 |     for i in range(0, class_num):
22 |         tags = np.zeros((len(targets)), dtype="int64")
23 |         bool_arr = np.equal(targets, classes[i])
24 |         tags[bool_arr] = 1
25 |         sample_num_of_this_tag[i] = np.sum(tags)
26 |         dataset_only_class = dataset[bool_arr, :]
27 |         class_mean_a = dataset_only_class.mean(0)
28 |         class_mean_a = np.round(class_mean_a, decimals=4)
29 | 
30 | 
31 |         nominator = nominator + np.power(np.subtract(class_mean_a, total_mean_a), 2)
32 |         denominator = denominator + sum(np.power(np.subtract(dataset_only_class, np.matlib.repmat(total_mean_a, dataset_only_class.shape[0],1)), 2)) / (sample_num_of_this_tag[i]-1)
33 | 
34 |     Acc_score = np.divide(nominator, denominator)
35 | 
36 | 
37 |     roads_E[0, :, :] = (0.5/feature_num) * sum(Acc_score)
38 |     roads_E[1, :, :] = np.matlib.repmat(Acc_score, feature_num, 1)
39 | 
40 |     roads_E[2, :, :] = (0.5/feature_num) * sum(Acc_score)
41 |     roads_E[3, :, :] = np.matlib.repmat(Acc_score, feature_num, 1)
42 | 
43 |     return roads_E
44 | 
45 | 


--------------------------------------------------------------------------------
/mbaco.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from sklearn import svm
  3 | import timeit
  4 | from sklearn.datasets import load_breast_cancer
  5 | from plotaco import *
  6 | import warnings
  7 | import feature_selection_ga as fga
  8 | from numpy import matlib
  9 | import sys
 10 | 
 11 | 
 12 | def baco(best_individual, x_data, y_data, alpha = 1.0, beta = 0.0, t_percent=40, iter_num=10):
 13 | 
 14 | 
 15 |     (my_bool, msg_err) = check_baco_args(t_percent, iter_num)
 16 |     if(not my_bool):
 17 |         print("problem with arguments for mbaco()!!!")
 18 |         print(msg_err)
 19 |         exit() #############
 20 | 
 21 | 
 22 | 
 23 |     train_percentage = 100 - int(t_percent)
 24 | 
 25 |     time_temp = 0
 26 |     start = timeit.default_timer()
 27 |     (best_fitnesses_each_iter, average_fitnesses_each_iter, num_of_features_selected_by_best_ant_each_iter, best_fit_so_far, best_ant_road) = run_feature_selection(best_individual,generations = iter_num, alpha = alpha, beta = beta, T0 = 0.1, T1= 0.2, Min_T = 0.1, Max_T = 6, q = 0.95, Q = 0.3, ant_num = 50, feature_num = len(x_data[1]), dataset=x_data, targets=y_data, train_percentage=train_percentage)
 28 |     end = timeit.default_timer()
 29 |     time_temp = time_temp + (end - start)
 30 | 
 31 | 
 32 |     acc_before_run = get_single_fit(x_data, y_data, train_percentage)
 33 | 
 34 |     total_feature_num = len(x_data[1])
 35 |     sample_num = len(x_data[:,1])
 36 | 
 37 |     best_selected_features_num = np.sum(best_ant_road)
 38 |     return (best_ant_road, acc_before_run, best_fit_so_far, total_feature_num, best_selected_features_num, best_fitnesses_each_iter, average_fitnesses_each_iter ,num_of_features_selected_by_best_ant_each_iter, time_temp, sample_num)
 39 | 
 40 | 
 41 | def check_baco_args(t_percent, iter_num):
 42 |     msg_err = ""
 43 |     try:
 44 |         int(t_percent)
 45 |     except Exception as e:
 46 |         msg_err = "t_percent should be integer!"
 47 |         return (False, msg_err)
 48 | 
 49 |     try:
 50 |         int(iter_num)
 51 |     except Exception as e:
 52 |         msg_err = "iter_num should be integer!"
 53 |         return (False, msg_err)
 54 | 
 55 |     if(iter_num > 100):
 56 |         msg_err = "iter_num should be less than 100!"
 57 |         return (False, msg_err)
 58 | 
 59 |     if(iter_num < 5):
 60 |         msg_err = "iter_num should be more than 5!"
 61 |         return (False, msg_err)
 62 | 
 63 | 
 64 |     return (True, msg_err)
 65 | 
 66 | 
 67 | def run_feature_selection(best_individual, generations, alpha, beta , T0, T1, Min_T, Max_T, q, Q, ant_num, feature_num, dataset, targets, train_percentage):
 68 | 
 69 |     best_fitnesses_each_iter = []
 70 |     average_fitnesses_each_iter = []
 71 |     num_of_features_selected_by_best_ant_each_iter = []
 72 |     road_map = np.random.randint(2, size=ant_num*feature_num).reshape((ant_num, feature_num))
 73 |     road_maps = np.zeros(ant_num*feature_num*generations, dtype="int64").reshape(generations, ant_num, feature_num)
 74 |     best_roads_list = []
 75 | 
 76 |     best_fit_so_far = 0
 77 |     best_road_so_far = np.zeros(feature_num, dtype="int64")
 78 | 
 79 |     np.set_printoptions(suppress=True, threshold=1000)
 80 | 
 81 |     pheremones_1 = T0 * np.asarray(best_individual) + T1
 82 |     pheremones_1 = np.matlib.repmat(pheremones_1,feature_num,1).reshape(feature_num,feature_num)
 83 |     opp_best_individual = np.subtract(np.ones(feature_num),best_individual)
 84 |     pheremones_2 = T0 * opp_best_individual + T1
 85 |     pheremones_2 = np.matlib.repmat(pheremones_2,feature_num,1).reshape(feature_num,feature_num)
 86 |     pheremone = np.vstack((pheremones_2,pheremones_1))
 87 |     pheremone = np.vstack((pheremone,pheremone))
 88 |     pheremone = pheremone.reshape(4,feature_num,feature_num)
 89 |     pheremones= np.zeros(feature_num*feature_num*4, dtype="float64").reshape(4, feature_num, feature_num) + pheremone
 90 | 
 91 | 
 92 |     for i in range(0, generations):
 93 | 
 94 |         visibility = ascore(feature_num, dataset, targets)
 95 | 
 96 | 
 97 |         (road_map, pointer) = baco_road_selection(pheremones, visibility, alpha, beta, ant_num, feature_num)
 98 | 
 99 |         (iter_best_fit, best_road_so_far, best_fit_so_far, iter_best_road, fitnesses, iter_average_fit, ants_num_of_features_selected) = do_calculations(road_map, dataset, targets, best_fit_so_far, best_road_so_far, train_percentage)
100 | 
101 |         pheremones= trial_update(fitnesses, pheremones, Min_T, Max_T, Q, q, iter_best_road, feature_num)
102 | 
103 |         road_maps[i] = road_map
104 |         best_fitnesses_each_iter.append(iter_best_fit)
105 |         average_fitnesses_each_iter.append(iter_average_fit)
106 |         num_of_features_selected_by_best_ant_each_iter.append(sum(best_road_so_far))
107 |         best_roads_list.append(best_road_so_far)
108 | 
109 | 
110 |     ccc = 0
111 |     maxx = max(best_fitnesses_each_iter)
112 |     for each in best_fitnesses_each_iter:
113 |         if(each == maxx):
114 |             my_indx = ccc
115 |         ccc = ccc + 1
116 |     return (best_fitnesses_each_iter, average_fitnesses_each_iter, num_of_features_selected_by_best_ant_each_iter, best_fit_so_far, best_roads_list[my_indx])
117 | 
118 | 
119 | def ascore(feature_num, dataset, targets):
120 |     visibility = np.zeros(feature_num*feature_num*4, dtype="float64").reshape(4, feature_num, feature_num)
121 | 
122 | #     arr = np.corrcoef(dataset)
123 | #     R = abs(arr)
124 | 
125 |     ## F-score :
126 |     classes = np.unique(targets)
127 |     class_num = len(classes)
128 |     total_mean_a = dataset.mean(0)
129 |     nominator = 0
130 |     denominator = 0
131 | 
132 | #     nominator = np.zeros(feature_num, dtype="int64")
133 | #     denominator = np.zeros(feature_num, dtype="int64")
134 | 
135 |     sample_num_of_this_tag = np.zeros(class_num, dtype="int64")
136 |     for i in range(0, class_num):
137 |         tags = np.zeros((len(targets)), dtype="int64")
138 |         bool_arr = np.equal(targets, classes[i])
139 |         tags[bool_arr] = 1
140 |         sample_num_of_this_tag[i] = np.sum(tags)
141 |         dataset_only_class = dataset[bool_arr, :]
142 |         class_mean_a = dataset_only_class.mean(0)
143 |         class_mean_a = np.round(class_mean_a, decimals=4)
144 | 
145 | 
146 |         nominator = nominator + np.power(np.subtract(class_mean_a, total_mean_a), 2)
147 |         denominator = denominator + sum(np.power(np.subtract(dataset_only_class, np.matlib.repmat(total_mean_a, dataset_only_class.shape[0],1)), 2)) / (sample_num_of_this_tag[i]-1)
148 | 
149 |     Acc_score = np.divide(nominator, denominator)
150 | 
151 | 
152 |     visibility[0, :, :] = (0.5/feature_num) * sum(Acc_score)
153 |     visibility[1, :, :] = np.matlib.repmat(Acc_score, feature_num, 1)
154 | 
155 |     visibility[2, :, :] = (0.5/feature_num) * sum(Acc_score)
156 |     visibility[3, :, :] = np.matlib.repmat(Acc_score, feature_num, 1)
157 | 
158 |     return visibility
159 | 
160 | 
161 | def get_accuracy_for_this_solution(train_dataset, train_targets, test_dataset, test_targets):
162 |     lin_clf = svm.LinearSVC()
163 |     lin_clf.fit(train_dataset,train_targets)
164 |     predicted_targets = lin_clf.predict(test_dataset)
165 |     l = len(test_targets)
166 |     num_of_correct = 0
167 |     for i in range(l):
168 |         if(test_targets[i] == predicted_targets[i]):
169 |             num_of_correct = num_of_correct + 1
170 |     return num_of_correct/l
171 | 
172 | 
173 | def separate_datasets(dataset, targets, train_percentage):
174 | 
175 |     # in case you wanted the data to be random every single time you wanted get fitnesses
176 |     leng = len(dataset[:, 0])
177 |     s = int(leng*(train_percentage/100))
178 | 
179 |     samples_list = random.sample(range(0, leng), s)
180 | 
181 |     mask = np.zeros((leng), dtype=bool)
182 |     mask[samples_list] = True
183 | 
184 |     train_dataset = dataset[mask, :]
185 |     test_dataset = dataset[~mask, :]
186 | 
187 |     train_targets = targets[mask]
188 |     test_targets = targets[~mask]
189 | 
190 | 
191 |     return (train_dataset, test_dataset, train_targets, test_targets)
192 | 
193 | 
194 | def get_fitnesses(road_map, dataset, targets, train_percentage):
195 |     total_feature_num = len(road_map[1])
196 |     total_sample_num = len(dataset[:,0])
197 |     num_of_features_selected = list()
198 |     fitnesses = list()
199 | 
200 |     count = 0
201 |     for ant_solution in road_map:
202 |         count = count + 1
203 |         if np.sum(ant_solution) == 0:
204 |             fitnesses.append(0)
205 |         else:
206 |             new_dataset = np.zeros(total_sample_num, dtype="float64").reshape(total_sample_num, 1)
207 | 
208 |             for i in range(0, len(ant_solution)):
209 |                 if(ant_solution[i] == 1):
210 |                     new_dataset = np.append(new_dataset, dataset[:, i].reshape(total_sample_num, 1), axis=1)
211 | 
212 |             new_dataset = np.delete(new_dataset, 0, axis=1) # removing first column
213 | 
214 |             num_of_features_selected.append(new_dataset.shape[1])
215 | 
216 |             (train_dataset, test_dataset, train_targets, test_targets) = separate_datasets(new_dataset, targets, train_percentage)
217 | 
218 |             fitnesses.append(get_accuracy_for_this_solution(train_dataset, train_targets, test_dataset, test_targets))
219 | 
220 |     return num_of_features_selected, fitnesses
221 | 
222 | 
223 | def get_single_fit(dataset, targets, train_percentage):
224 | 
225 |     (train_dataset, test_dataset, train_targets, test_targets) = separate_datasets(dataset, targets, train_percentage)
226 | 
227 |     return get_accuracy_for_this_solution(train_dataset, train_targets, test_dataset, test_targets)
228 | 
229 | 
230 | def pick_next_location(probs, feature_num):
231 |     sum = 0
232 |     zero_or_one = 1
233 |     r = np.random.random_sample()
234 |     for x in range(len(probs)):
235 |         sum = sum + probs[x]
236 |         if(r < sum):
237 |             index = x
238 |             # because it is now (feature_num + feature_num) long, we should correct it :
239 |             if(index >= feature_num):
240 |                 index = index - feature_num
241 |                 zero_or_one = 1
242 |             else:
243 |                 zero_or_one = 0
244 |             return (index, zero_or_one)
245 | 
246 | 
247 | def baco_road_selection(pheremones, visibility, alpha, beta, ant_num, feature_num):
248 |     road_map = np.zeros(ant_num*feature_num, dtype="int64").reshape(ant_num, feature_num)
249 |     pointer = np.zeros(ant_num*feature_num, dtype="int64").reshape(ant_num, feature_num)
250 | 
251 |     for k in range(0, ant_num):
252 |         indx = np.multiply(np.power(pheremones, alpha), np.power(visibility, beta))
253 |         for j in range(0, feature_num):
254 | 
255 |             # for the first feature :
256 |             if(j == 0):
257 |                 cur_feature = np.random.randint(0, feature_num, 1)[0]
258 |                 pointer[k,j] = cur_feature
259 |                 # this is just for selection of 0 or 1 for the first feature (if it's more interesting the likelihood is higher)
260 |                 temp = np.sum(pheremones[0, :, cur_feature] + pheremones[2, :, cur_feature]) / np.sum(pheremones[0, :, cur_feature] + pheremones[1, :, cur_feature] + pheremones[2, :, cur_feature] + pheremones[3, :, cur_feature])
261 |                 rand = np.random.random_sample()
262 | 
263 |                 if (rand < temp):
264 |                     road_map[k, cur_feature] = 0
265 |                 else:
266 |                     road_map[k, cur_feature] = 1
267 | 
268 |             else:
269 |                 if(road_map[k, pointer[k,j-1]] == 1):
270 |                     nominator = np.hstack((indx[2, pointer[k,j-1], :], indx[3, pointer[k,j-1], :]))
271 |                     denominator = sum(nominator) ##################################### should be right!!!!!
272 |                     probability = np.divide(nominator, denominator) # total=total/sum(total) # should be editted.it is not
273 |                     (selected_feature_indx, zero_or_one) = pick_next_location(probability, feature_num)
274 |                     pointer[k,j] = selected_feature_indx
275 | 
276 | 
277 |                     if(zero_or_one == 0):
278 |                         road_map[k, pointer[k,j]] = 0
279 |                     else:
280 |                         road_map[k, pointer[k,j]] = 1
281 | 
282 |                 else: # == 0
283 |                     nominator = np.hstack((indx[0, pointer[k,j-1], :], indx[1, pointer[k,j-1], :]))
284 |                     denominator = sum(nominator)
285 |                     probability = np.divide(nominator, denominator)
286 |                     (selected_feature_indx, zero_or_one) = pick_next_location(probability, feature_num)
287 |                     pointer[k,j] = selected_feature_indx
288 | 
289 | 
290 |                     if(zero_or_one == 0):
291 |                         road_map[k, pointer[k,j]] = 0
292 |                     else:
293 |                         road_map[k, pointer[k,j]] = 1
294 | 
295 |             # update indx (so by doing this, the probability of selection for this feature, is gonna be zero!)
296 |             indx[:, :, pointer[k, j]] = 0
297 |     return (road_map, pointer)
298 | 
299 | 
300 | def do_calculations(road_map, dataset, targets, best_fit_so_far, best_road_so_far, train_percentage):
301 | 
302 |     ants_num_of_features_selected, fitnesses = get_fitnesses(road_map, dataset, targets, train_percentage)
303 | 
304 |     iter_average_fit = np.mean(fitnesses, axis=0)
305 |     iter_best_fit = max(fitnesses)
306 |     iter_best_ant = fitnesses.index(iter_best_fit)
307 |     iter_best_road = road_map[iter_best_ant, :]
308 | 
309 |     if(iter_best_fit > best_fit_so_far):
310 |         best_fit_so_far = iter_best_fit
311 |         best_road_so_far = iter_best_road
312 |     return (iter_best_fit, best_road_so_far, best_fit_so_far, iter_best_road, fitnesses, iter_average_fit, ants_num_of_features_selected)
313 | 
314 | 
315 | def trial_update(fitnesses, pheremones, Min_T, Max_T, Q, q, iter_best_road, feature_num):
316 | 
317 |     pheremones= pheremones* q #pheromone evaporation
318 |     # class_err = 1 - fitnesses # not this because fitnesses is a list and doesn't work this way
319 |     class_err = np.array([1-i for i in fitnesses])
320 |     min_err = min(class_err)
321 |     min_err_indx = np.where(class_err == min_err)[0][0]
322 |     max_fit = max(fitnesses)
323 |     # max_fit_indx = np.where(fitnesses == max_fit)[0][0]
324 | 
325 | 
326 |     change_pheremones = np.zeros(feature_num*feature_num*4, dtype="float64").reshape(4, feature_num, feature_num)
327 | 
328 |     # here we assign one to best road edges in change_pheremones.
329 |     for i in range(0, len(iter_best_road)):
330 |         if(iter_best_road[i] == 0):
331 |             change_pheremones[0, :, i] = 1
332 |             change_pheremones[2, :, i] = 1
333 |         else:
334 |             change_pheremones[1, :, i] = 1
335 |             change_pheremones[3, :, i] = 1
336 | 
337 | 
338 |     # if(class_err[min_err_indx] == 0):
339 |     #     change_pheremones = (Q/(class_err[min_err_indx] + 0.001)) * change_pheremones
340 |     # else:
341 |     #     change_pheremones = (Q/(class_err[min_err_indx])) * change_pheremones
342 | 
343 |     if(max_fit == 0):
344 |         change_pheremones = (1/(max_fit+0.001)) * change_pheremones
345 |     else:
346 |         change_pheremones = (1/(max_fit)) * change_pheremones
347 | 
348 |     pheremones= pheremones+ change_pheremones
349 |     # now we make sure all of them are in interval :
350 |     for each in np.nditer(pheremones, op_flags=['readwrite']):
351 |         if(each > Max_T):
352 |             each[...] = Max_T
353 |         else:
354 |             if(each < Min_T):
355 |                 each[...] = Min_T
356 | 
357 | 
358 |     return pheremones
359 | 
360 | 
361 | alpha = float(input("Enter alpha : "))
362 | beta = float(input("Enter beta : "))
363 | iris = load_breast_cancer()
364 | x_train = iris.data
365 | y_train = iris.target
366 | model = svm.LinearSVC()
367 | with warnings.catch_warnings():
368 |     warnings.simplefilter('ignore')
369 |     fsga = fga.FeatureSelectionGA(model,x_train,y_train)
370 |     (pop,best,fitness_value) = fsga.generate(20)
371 |     solution = baco(best, x_train, y_train,alpha=alpha,beta=beta, t_percent=40, iter_num=10)
372 |     (best_ant_road, acc_before_run, best_fit_so_far, total_feature_num, best_selected_features_num,
373 |      best_fitnesses_each_iter, average_fitnesses_each_iter, num_of_features_selected_by_best_ant_each_iter, time_temp,
374 |      sample_num) = solution
375 |     if fitness_value[0] < best_fitnesses_each_iter[-1]:
376 |         print("Binary Representation of Feature selection : ",best_ant_road)
377 |         print("Total number of features in Dataset : ", total_feature_num)
378 |         print("Number of features selected : ",num_of_features_selected_by_best_ant_each_iter[-1])
379 |         print("Accuracy before MBACO : ",acc_before_run)
380 |         print("Accuracy with best fit : ",best_fit_so_far)
381 |         print("Accuracy of best ant in last iteration : ",best_fitnesses_each_iter[-1])
382 |         draw_baco(solution)
383 |     else:
384 |         print("GA gave the best individual which is",best," (",fitness_value,")")
385 | 


--------------------------------------------------------------------------------
/plotaco.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import matplotlib.ticker as plticker
 3 | import os
 4 | import numpy as np
 5 | 
 6 | 
 7 | 
 8 | def show_res_for_this_run(best_fitnesses_each_iter, average_fitnesses_each_iter, num_of_features_selected_by_best_ant_each_iter, feature_num):
 9 | 
10 |     iterations = np.arange(1,len(best_fitnesses_each_iter)+1, dtype="int64")
11 | 
12 |     # Spacing between each line
13 |     intervals = 1
14 |     loc = plticker.MultipleLocator(base=intervals)
15 | 
16 |     # ax.xaxis.set_major_locator(loc)
17 | 
18 |     ##################################
19 |     fig, ax1 = plt.subplots(figsize=(10,8))
20 | 
21 |     plt.subplot(221)
22 | 
23 | 
24 |     xx1 = np.array(iterations)
25 |     yy1 = np.array(best_fitnesses_each_iter)
26 | 
27 |     plt.plot(xx1, yy1, 'bo', xx1, yy1, 'k')
28 | 
29 |     plt.xlabel('iteration num')
30 |     plt.ylabel('accuracy (fitness)')
31 |     plt.title('Visualization of Accuracy over each Iteration')
32 | 
33 |     ax1 = fig.gca()
34 |     ax1.xaxis.set_major_locator(loc)
35 |     plt.grid(True)
36 | 
37 | 
38 | 
39 |     ##################################
40 |     plt.subplot(222)
41 | 
42 |     xx2 = np.array(iterations)
43 |     yy2 = np.array(average_fitnesses_each_iter)
44 | 
45 |     plt.plot(xx2, yy2, 'bo', xx2, yy2, 'k')
46 | 
47 |     plt.xlabel('iteration num')
48 |     plt.ylabel('average accuracy')
49 |     plt.title('Visualization of Average of Accuracy over each Iteration')
50 | 
51 |     ax2 = fig.gca()
52 |     ax2.xaxis.set_major_locator(loc)
53 | 
54 |     ##################################
55 |     # plt.subplot(223)
56 |     #
57 |     # N = len(num_of_features_selected_by_best_ant_each_iter)
58 |     #
59 |     # ind = np.arange(N)  # the x locations for the groups
60 |     # width = 0.25       # the width of the bars
61 | 
62 |     # ax3 = fig.gca()
63 |     # rects = ax3.bar(ind, num_of_features_selected_by_best_ant_each_iter, width, color='c')
64 |     #
65 |     #
66 |     # ax3.set_ylabel('num of selected features (by best ant)')
67 |     # # ax3.set_title('selected features over each iteration')
68 |     # ax3.set_xticks(ind + width / 2)
69 |     # ax3.set_xticklabels(np.arange(1, N+1))
70 |     # ax3.set_ylim([0, feature_num])
71 | 
72 |     def autolabel(rects):
73 |         for rect in rects:
74 |             height = rect.get_height()
75 |             # ax3.text(rect.get_x() + rect.get_width()/2., 1.05*height,
76 |             #         '%d' % int(height),
77 |             #         ha='center', va='bottom')
78 | 
79 |     # autolabel(rects)
80 | 
81 |     plt.show()
82 | 
83 | def draw_baco(solution):
84 | 
85 |     if(len(solution) != 10):
86 |         print("+++ can't draw the solution due to problem with it! +++")
87 |         return
88 | 
89 |     (best_ant_road, acc_before_run, best_fit_so_far, total_feature_num, best_selected_features_num, best_fitnesses_each_iter, average_fitnesses_each_iter ,num_of_features_selected_by_best_ant_each_iter, time_temp, sample_num) = solution
90 | 
91 |     show_res_for_this_run(best_fitnesses_each_iter, average_fitnesses_each_iter, num_of_features_selected_by_best_ant_each_iter, total_feature_num)


--------------------------------------------------------------------------------