├── README.md ├── feature_selection_ga.py ├── fitness_function.py ├── heuristics.py ├── mbaco.py └── plotaco.py /README.md: -------------------------------------------------------------------------------- 1 | # modifiedACO 2 | A feature selection method based on modified binary ant colony optimization algorithm 3 | -------------------------------------------------------------------------------- /feature_selection_ga.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from deap import base, creator 4 | from deap import tools 5 | import fitness_function as ff 6 | 7 | 8 | class FeatureSelectionGA: 9 | 10 | def __init__(self,model,x,y,cv_split=5,verbose=0): 11 | 12 | self.model = model 13 | self.n_features = x.shape[1] 14 | self.toolbox = None 15 | self.creator = self._create() 16 | self.cv_split = cv_split 17 | self.x = x 18 | self.y = y 19 | self.verbose = verbose 20 | if self.verbose==1: 21 | print("Model {} will select best features among {} features using cv_split :{}.".format(model,x.shape[1],cv_split)) 22 | print("Shape of train_x: {} and target: {}".format(x.shape,y.shape)) 23 | self.final_fitness = [] 24 | self.fitness_in_generation = {} 25 | self.best_ind = None 26 | 27 | def evaluate(self,individual): 28 | fit_obj = ff.FitenessFunction(self.cv_split) 29 | np_ind = np.asarray(individual) 30 | if np.sum(np_ind) == 0: 31 | fitness = 0.0 32 | else: 33 | feature_idx = np.where(np_ind==1)[0] 34 | fitness = fit_obj.calculate_fitness(self.model,self.x[:,feature_idx],self.y) 35 | 36 | if self.verbose == 1: 37 | print("Individual: {} Fitness_score: {} ".format(individual,fitness)) 38 | 39 | return fitness, 40 | 41 | 42 | def _create(self): 43 | creator.create("FeatureSelect", base.Fitness, weights=(1.0,)) 44 | creator.create("Individual", list, fitness=creator.FeatureSelect) 45 | return creator 46 | 47 | 48 | def register_toolbox(self,toolbox): 49 | toolbox.register("evaluate", self.evaluate) 50 | self.toolbox = toolbox 51 | 52 | 53 | def _init_toolbox(self): 54 | toolbox = base.Toolbox() 55 | toolbox.register("attr_bool", random.randint, 0, 1) 56 | # Structure initializers 57 | toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, self.n_features) 58 | toolbox.register("population", tools.initRepeat, list, toolbox.individual) 59 | return toolbox 60 | 61 | 62 | def _default_toolbox(self): 63 | toolbox = self._init_toolbox() 64 | toolbox.register("mate", tools.cxTwoPoint) 65 | toolbox.register("mutate", tools.mutFlipBit, indpb=0.1) 66 | toolbox.register("select", tools.selTournament, tournsize=3) 67 | toolbox.register("evaluate", self.evaluate) 68 | return toolbox 69 | 70 | def get_final_scores(self,pop,fits): 71 | self.final_fitness = list(zip(pop,fits)) 72 | 73 | 74 | 75 | def generate(self,n_pop,cxpb = 0.9,mutxpb = 0.01,ngen=5,set_toolbox = False): 76 | 77 | if self.verbose==1: 78 | print("Population: {}, crossover_probablity: {}, mutation_probablity: {}, total generations: {}".format(n_pop,cxpb,mutxpb,ngen)) 79 | 80 | self.toolbox = self._default_toolbox() 81 | pop = self.toolbox.population(n_pop) 82 | CXPB, MUTPB, NGEN = cxpb,mutxpb,ngen 83 | 84 | # Evaluate the entire population 85 | print("EVOLVING.......") 86 | fitnesses = list(map(self.toolbox.evaluate, pop)) 87 | 88 | for ind, fit in zip(pop, fitnesses): 89 | ind.fitness.values = fit 90 | 91 | for g in range(NGEN): 92 | print("-- GENERATION {} --".format(g+1)) 93 | offspring = self.toolbox.select(pop, len(pop)) 94 | self.fitness_in_generation[str(g+1)] = max([ind.fitness.values[0] for ind in pop]) 95 | # Clone the selected individuals 96 | offspring = list(map(self.toolbox.clone, offspring)) 97 | 98 | # Apply crossover and mutation on the offspring 99 | for child1, child2 in zip(offspring[::2], offspring[1::2]): 100 | if random.random() < CXPB: 101 | self.toolbox.mate(child1, child2) 102 | del child1.fitness.values 103 | del child2.fitness.values 104 | 105 | for mutant in offspring: 106 | if random.random() < MUTPB: 107 | self.toolbox.mutate(mutant) 108 | del mutant.fitness.values 109 | 110 | # Evaluate the individuals with an invalid fitness 111 | weak_ind = [ind for ind in offspring if not ind.fitness.valid] 112 | fitnesses = list(map(self.toolbox.evaluate, weak_ind)) 113 | for ind, fit in zip(weak_ind, fitnesses): 114 | ind.fitness.values = fit 115 | print("Evaluated %i individuals" % len(weak_ind)) 116 | 117 | # The population is entirely replaced by the offspring 118 | pop[:] = offspring 119 | 120 | # Gather all the fitnesses in one list and print the stats 121 | fits = [ind.fitness.values[0] for ind in pop] 122 | 123 | length = len(pop) 124 | mean = sum(fits) / length 125 | sum2 = sum(x*x for x in fits) 126 | std = abs(sum2 / length - mean**2)**0.5 127 | if self.verbose==1: 128 | print(" Min %s" % min(fits)) 129 | print(" Max %s" % max(fits)) 130 | print(" Avg %s" % mean) 131 | print(" Std %s" % std) 132 | 133 | 134 | self.best_ind = tools.selBest(pop, 1)[0] 135 | print("Best individual is %s, %s" % (self.best_ind, self.best_ind.fitness.values)) 136 | self.get_final_scores(pop,fits) 137 | 138 | return pop,self.best_ind,self.best_ind.fitness.values 139 | 140 | 141 | # model = sklearn.linear_model.LogisticRegression(solver='liblinear') 142 | # model = svm.LinearSVC() 143 | # iris = load_iris() 144 | # x_train = iris.data 145 | # y_train = iris.target 146 | # with warnings.catch_warnings(): 147 | # warnings.simplefilter('ignore') 148 | # fsga = FeatureSelectionGA(model,x_train,y_train) 149 | # pop = fsga.generate(100) 150 | # print(pop) 151 | 152 | 153 | -------------------------------------------------------------------------------- /fitness_function.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import StratifiedKFold 2 | import numpy as np 3 | from sklearn.metrics import accuracy_score 4 | 5 | class FitenessFunction: 6 | 7 | def __init__(self,n_splits = 5,*args,**kwargs): 8 | """ 9 | Parameters 10 | ----------- 11 | n_splits :int, 12 | Number of splits for cv 13 | 14 | verbose: 0 or 1 15 | """ 16 | self.n_splits = n_splits 17 | 18 | 19 | def calculate_fitness(self,model,x,y): 20 | cv_set = np.repeat(-1.,x.shape[0]) 21 | skf = StratifiedKFold(n_splits = self.n_splits) 22 | for train_index,test_index in skf.split(x,y): 23 | x_train,x_test = x[train_index],x[test_index] 24 | y_train,y_test = y[train_index],y[test_index] 25 | if x_train.shape[0] != y_train.shape[0]: 26 | raise Exception() 27 | model.fit(x_train,y_train) 28 | predicted_y = model.predict(x_test) 29 | cv_set[test_index] = predicted_y 30 | return accuracy_score(y,cv_set) 31 | -------------------------------------------------------------------------------- /heuristics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sklearn 3 | 4 | def hueristic_value_ascore(feature_num, dataset, targets): 5 | roads_E = np.zeros(feature_num*feature_num*4, dtype="float64").reshape(4, feature_num, feature_num) 6 | 7 | # arr = np.corrcoef(dataset) 8 | # R = abs(arr) 9 | 10 | ## F-score : 11 | classes = np.unique(targets) 12 | class_num = len(classes) 13 | total_mean_a = dataset.mean(0) 14 | nominator = 0 15 | denominator = 0 16 | 17 | # nominator = np.zeros(feature_num, dtype="int64") 18 | # denominator = np.zeros(feature_num, dtype="int64") 19 | 20 | sample_num_of_this_tag = np.zeros(class_num, dtype="int64") 21 | for i in range(0, class_num): 22 | tags = np.zeros((len(targets)), dtype="int64") 23 | bool_arr = np.equal(targets, classes[i]) 24 | tags[bool_arr] = 1 25 | sample_num_of_this_tag[i] = np.sum(tags) 26 | dataset_only_class = dataset[bool_arr, :] 27 | class_mean_a = dataset_only_class.mean(0) 28 | class_mean_a = np.round(class_mean_a, decimals=4) 29 | 30 | 31 | nominator = nominator + np.power(np.subtract(class_mean_a, total_mean_a), 2) 32 | denominator = denominator + sum(np.power(np.subtract(dataset_only_class, np.matlib.repmat(total_mean_a, dataset_only_class.shape[0],1)), 2)) / (sample_num_of_this_tag[i]-1) 33 | 34 | Acc_score = np.divide(nominator, denominator) 35 | 36 | 37 | roads_E[0, :, :] = (0.5/feature_num) * sum(Acc_score) 38 | roads_E[1, :, :] = np.matlib.repmat(Acc_score, feature_num, 1) 39 | 40 | roads_E[2, :, :] = (0.5/feature_num) * sum(Acc_score) 41 | roads_E[3, :, :] = np.matlib.repmat(Acc_score, feature_num, 1) 42 | 43 | return roads_E 44 | 45 | -------------------------------------------------------------------------------- /mbaco.py: -------------------------------------------------------------------------------- 1 | import random 2 | from sklearn import svm 3 | import timeit 4 | from sklearn.datasets import load_breast_cancer 5 | from plotaco import * 6 | import warnings 7 | import feature_selection_ga as fga 8 | from numpy import matlib 9 | import sys 10 | 11 | 12 | def baco(best_individual, x_data, y_data, alpha = 1.0, beta = 0.0, t_percent=40, iter_num=10): 13 | 14 | 15 | (my_bool, msg_err) = check_baco_args(t_percent, iter_num) 16 | if(not my_bool): 17 | print("problem with arguments for mbaco()!!!") 18 | print(msg_err) 19 | exit() ############# 20 | 21 | 22 | 23 | train_percentage = 100 - int(t_percent) 24 | 25 | time_temp = 0 26 | start = timeit.default_timer() 27 | (best_fitnesses_each_iter, average_fitnesses_each_iter, num_of_features_selected_by_best_ant_each_iter, best_fit_so_far, best_ant_road) = run_feature_selection(best_individual,generations = iter_num, alpha = alpha, beta = beta, T0 = 0.1, T1= 0.2, Min_T = 0.1, Max_T = 6, q = 0.95, Q = 0.3, ant_num = 50, feature_num = len(x_data[1]), dataset=x_data, targets=y_data, train_percentage=train_percentage) 28 | end = timeit.default_timer() 29 | time_temp = time_temp + (end - start) 30 | 31 | 32 | acc_before_run = get_single_fit(x_data, y_data, train_percentage) 33 | 34 | total_feature_num = len(x_data[1]) 35 | sample_num = len(x_data[:,1]) 36 | 37 | best_selected_features_num = np.sum(best_ant_road) 38 | return (best_ant_road, acc_before_run, best_fit_so_far, total_feature_num, best_selected_features_num, best_fitnesses_each_iter, average_fitnesses_each_iter ,num_of_features_selected_by_best_ant_each_iter, time_temp, sample_num) 39 | 40 | 41 | def check_baco_args(t_percent, iter_num): 42 | msg_err = "" 43 | try: 44 | int(t_percent) 45 | except Exception as e: 46 | msg_err = "t_percent should be integer!" 47 | return (False, msg_err) 48 | 49 | try: 50 | int(iter_num) 51 | except Exception as e: 52 | msg_err = "iter_num should be integer!" 53 | return (False, msg_err) 54 | 55 | if(iter_num > 100): 56 | msg_err = "iter_num should be less than 100!" 57 | return (False, msg_err) 58 | 59 | if(iter_num < 5): 60 | msg_err = "iter_num should be more than 5!" 61 | return (False, msg_err) 62 | 63 | 64 | return (True, msg_err) 65 | 66 | 67 | def run_feature_selection(best_individual, generations, alpha, beta , T0, T1, Min_T, Max_T, q, Q, ant_num, feature_num, dataset, targets, train_percentage): 68 | 69 | best_fitnesses_each_iter = [] 70 | average_fitnesses_each_iter = [] 71 | num_of_features_selected_by_best_ant_each_iter = [] 72 | road_map = np.random.randint(2, size=ant_num*feature_num).reshape((ant_num, feature_num)) 73 | road_maps = np.zeros(ant_num*feature_num*generations, dtype="int64").reshape(generations, ant_num, feature_num) 74 | best_roads_list = [] 75 | 76 | best_fit_so_far = 0 77 | best_road_so_far = np.zeros(feature_num, dtype="int64") 78 | 79 | np.set_printoptions(suppress=True, threshold=1000) 80 | 81 | pheremones_1 = T0 * np.asarray(best_individual) + T1 82 | pheremones_1 = np.matlib.repmat(pheremones_1,feature_num,1).reshape(feature_num,feature_num) 83 | opp_best_individual = np.subtract(np.ones(feature_num),best_individual) 84 | pheremones_2 = T0 * opp_best_individual + T1 85 | pheremones_2 = np.matlib.repmat(pheremones_2,feature_num,1).reshape(feature_num,feature_num) 86 | pheremone = np.vstack((pheremones_2,pheremones_1)) 87 | pheremone = np.vstack((pheremone,pheremone)) 88 | pheremone = pheremone.reshape(4,feature_num,feature_num) 89 | pheremones= np.zeros(feature_num*feature_num*4, dtype="float64").reshape(4, feature_num, feature_num) + pheremone 90 | 91 | 92 | for i in range(0, generations): 93 | 94 | visibility = ascore(feature_num, dataset, targets) 95 | 96 | 97 | (road_map, pointer) = baco_road_selection(pheremones, visibility, alpha, beta, ant_num, feature_num) 98 | 99 | (iter_best_fit, best_road_so_far, best_fit_so_far, iter_best_road, fitnesses, iter_average_fit, ants_num_of_features_selected) = do_calculations(road_map, dataset, targets, best_fit_so_far, best_road_so_far, train_percentage) 100 | 101 | pheremones= trial_update(fitnesses, pheremones, Min_T, Max_T, Q, q, iter_best_road, feature_num) 102 | 103 | road_maps[i] = road_map 104 | best_fitnesses_each_iter.append(iter_best_fit) 105 | average_fitnesses_each_iter.append(iter_average_fit) 106 | num_of_features_selected_by_best_ant_each_iter.append(sum(best_road_so_far)) 107 | best_roads_list.append(best_road_so_far) 108 | 109 | 110 | ccc = 0 111 | maxx = max(best_fitnesses_each_iter) 112 | for each in best_fitnesses_each_iter: 113 | if(each == maxx): 114 | my_indx = ccc 115 | ccc = ccc + 1 116 | return (best_fitnesses_each_iter, average_fitnesses_each_iter, num_of_features_selected_by_best_ant_each_iter, best_fit_so_far, best_roads_list[my_indx]) 117 | 118 | 119 | def ascore(feature_num, dataset, targets): 120 | visibility = np.zeros(feature_num*feature_num*4, dtype="float64").reshape(4, feature_num, feature_num) 121 | 122 | # arr = np.corrcoef(dataset) 123 | # R = abs(arr) 124 | 125 | ## F-score : 126 | classes = np.unique(targets) 127 | class_num = len(classes) 128 | total_mean_a = dataset.mean(0) 129 | nominator = 0 130 | denominator = 0 131 | 132 | # nominator = np.zeros(feature_num, dtype="int64") 133 | # denominator = np.zeros(feature_num, dtype="int64") 134 | 135 | sample_num_of_this_tag = np.zeros(class_num, dtype="int64") 136 | for i in range(0, class_num): 137 | tags = np.zeros((len(targets)), dtype="int64") 138 | bool_arr = np.equal(targets, classes[i]) 139 | tags[bool_arr] = 1 140 | sample_num_of_this_tag[i] = np.sum(tags) 141 | dataset_only_class = dataset[bool_arr, :] 142 | class_mean_a = dataset_only_class.mean(0) 143 | class_mean_a = np.round(class_mean_a, decimals=4) 144 | 145 | 146 | nominator = nominator + np.power(np.subtract(class_mean_a, total_mean_a), 2) 147 | denominator = denominator + sum(np.power(np.subtract(dataset_only_class, np.matlib.repmat(total_mean_a, dataset_only_class.shape[0],1)), 2)) / (sample_num_of_this_tag[i]-1) 148 | 149 | Acc_score = np.divide(nominator, denominator) 150 | 151 | 152 | visibility[0, :, :] = (0.5/feature_num) * sum(Acc_score) 153 | visibility[1, :, :] = np.matlib.repmat(Acc_score, feature_num, 1) 154 | 155 | visibility[2, :, :] = (0.5/feature_num) * sum(Acc_score) 156 | visibility[3, :, :] = np.matlib.repmat(Acc_score, feature_num, 1) 157 | 158 | return visibility 159 | 160 | 161 | def get_accuracy_for_this_solution(train_dataset, train_targets, test_dataset, test_targets): 162 | lin_clf = svm.LinearSVC() 163 | lin_clf.fit(train_dataset,train_targets) 164 | predicted_targets = lin_clf.predict(test_dataset) 165 | l = len(test_targets) 166 | num_of_correct = 0 167 | for i in range(l): 168 | if(test_targets[i] == predicted_targets[i]): 169 | num_of_correct = num_of_correct + 1 170 | return num_of_correct/l 171 | 172 | 173 | def separate_datasets(dataset, targets, train_percentage): 174 | 175 | # in case you wanted the data to be random every single time you wanted get fitnesses 176 | leng = len(dataset[:, 0]) 177 | s = int(leng*(train_percentage/100)) 178 | 179 | samples_list = random.sample(range(0, leng), s) 180 | 181 | mask = np.zeros((leng), dtype=bool) 182 | mask[samples_list] = True 183 | 184 | train_dataset = dataset[mask, :] 185 | test_dataset = dataset[~mask, :] 186 | 187 | train_targets = targets[mask] 188 | test_targets = targets[~mask] 189 | 190 | 191 | return (train_dataset, test_dataset, train_targets, test_targets) 192 | 193 | 194 | def get_fitnesses(road_map, dataset, targets, train_percentage): 195 | total_feature_num = len(road_map[1]) 196 | total_sample_num = len(dataset[:,0]) 197 | num_of_features_selected = list() 198 | fitnesses = list() 199 | 200 | count = 0 201 | for ant_solution in road_map: 202 | count = count + 1 203 | if np.sum(ant_solution) == 0: 204 | fitnesses.append(0) 205 | else: 206 | new_dataset = np.zeros(total_sample_num, dtype="float64").reshape(total_sample_num, 1) 207 | 208 | for i in range(0, len(ant_solution)): 209 | if(ant_solution[i] == 1): 210 | new_dataset = np.append(new_dataset, dataset[:, i].reshape(total_sample_num, 1), axis=1) 211 | 212 | new_dataset = np.delete(new_dataset, 0, axis=1) # removing first column 213 | 214 | num_of_features_selected.append(new_dataset.shape[1]) 215 | 216 | (train_dataset, test_dataset, train_targets, test_targets) = separate_datasets(new_dataset, targets, train_percentage) 217 | 218 | fitnesses.append(get_accuracy_for_this_solution(train_dataset, train_targets, test_dataset, test_targets)) 219 | 220 | return num_of_features_selected, fitnesses 221 | 222 | 223 | def get_single_fit(dataset, targets, train_percentage): 224 | 225 | (train_dataset, test_dataset, train_targets, test_targets) = separate_datasets(dataset, targets, train_percentage) 226 | 227 | return get_accuracy_for_this_solution(train_dataset, train_targets, test_dataset, test_targets) 228 | 229 | 230 | def pick_next_location(probs, feature_num): 231 | sum = 0 232 | zero_or_one = 1 233 | r = np.random.random_sample() 234 | for x in range(len(probs)): 235 | sum = sum + probs[x] 236 | if(r < sum): 237 | index = x 238 | # because it is now (feature_num + feature_num) long, we should correct it : 239 | if(index >= feature_num): 240 | index = index - feature_num 241 | zero_or_one = 1 242 | else: 243 | zero_or_one = 0 244 | return (index, zero_or_one) 245 | 246 | 247 | def baco_road_selection(pheremones, visibility, alpha, beta, ant_num, feature_num): 248 | road_map = np.zeros(ant_num*feature_num, dtype="int64").reshape(ant_num, feature_num) 249 | pointer = np.zeros(ant_num*feature_num, dtype="int64").reshape(ant_num, feature_num) 250 | 251 | for k in range(0, ant_num): 252 | indx = np.multiply(np.power(pheremones, alpha), np.power(visibility, beta)) 253 | for j in range(0, feature_num): 254 | 255 | # for the first feature : 256 | if(j == 0): 257 | cur_feature = np.random.randint(0, feature_num, 1)[0] 258 | pointer[k,j] = cur_feature 259 | # this is just for selection of 0 or 1 for the first feature (if it's more interesting the likelihood is higher) 260 | temp = np.sum(pheremones[0, :, cur_feature] + pheremones[2, :, cur_feature]) / np.sum(pheremones[0, :, cur_feature] + pheremones[1, :, cur_feature] + pheremones[2, :, cur_feature] + pheremones[3, :, cur_feature]) 261 | rand = np.random.random_sample() 262 | 263 | if (rand < temp): 264 | road_map[k, cur_feature] = 0 265 | else: 266 | road_map[k, cur_feature] = 1 267 | 268 | else: 269 | if(road_map[k, pointer[k,j-1]] == 1): 270 | nominator = np.hstack((indx[2, pointer[k,j-1], :], indx[3, pointer[k,j-1], :])) 271 | denominator = sum(nominator) ##################################### should be right!!!!! 272 | probability = np.divide(nominator, denominator) # total=total/sum(total) # should be editted.it is not 273 | (selected_feature_indx, zero_or_one) = pick_next_location(probability, feature_num) 274 | pointer[k,j] = selected_feature_indx 275 | 276 | 277 | if(zero_or_one == 0): 278 | road_map[k, pointer[k,j]] = 0 279 | else: 280 | road_map[k, pointer[k,j]] = 1 281 | 282 | else: # == 0 283 | nominator = np.hstack((indx[0, pointer[k,j-1], :], indx[1, pointer[k,j-1], :])) 284 | denominator = sum(nominator) 285 | probability = np.divide(nominator, denominator) 286 | (selected_feature_indx, zero_or_one) = pick_next_location(probability, feature_num) 287 | pointer[k,j] = selected_feature_indx 288 | 289 | 290 | if(zero_or_one == 0): 291 | road_map[k, pointer[k,j]] = 0 292 | else: 293 | road_map[k, pointer[k,j]] = 1 294 | 295 | # update indx (so by doing this, the probability of selection for this feature, is gonna be zero!) 296 | indx[:, :, pointer[k, j]] = 0 297 | return (road_map, pointer) 298 | 299 | 300 | def do_calculations(road_map, dataset, targets, best_fit_so_far, best_road_so_far, train_percentage): 301 | 302 | ants_num_of_features_selected, fitnesses = get_fitnesses(road_map, dataset, targets, train_percentage) 303 | 304 | iter_average_fit = np.mean(fitnesses, axis=0) 305 | iter_best_fit = max(fitnesses) 306 | iter_best_ant = fitnesses.index(iter_best_fit) 307 | iter_best_road = road_map[iter_best_ant, :] 308 | 309 | if(iter_best_fit > best_fit_so_far): 310 | best_fit_so_far = iter_best_fit 311 | best_road_so_far = iter_best_road 312 | return (iter_best_fit, best_road_so_far, best_fit_so_far, iter_best_road, fitnesses, iter_average_fit, ants_num_of_features_selected) 313 | 314 | 315 | def trial_update(fitnesses, pheremones, Min_T, Max_T, Q, q, iter_best_road, feature_num): 316 | 317 | pheremones= pheremones* q #pheromone evaporation 318 | # class_err = 1 - fitnesses # not this because fitnesses is a list and doesn't work this way 319 | class_err = np.array([1-i for i in fitnesses]) 320 | min_err = min(class_err) 321 | min_err_indx = np.where(class_err == min_err)[0][0] 322 | max_fit = max(fitnesses) 323 | # max_fit_indx = np.where(fitnesses == max_fit)[0][0] 324 | 325 | 326 | change_pheremones = np.zeros(feature_num*feature_num*4, dtype="float64").reshape(4, feature_num, feature_num) 327 | 328 | # here we assign one to best road edges in change_pheremones. 329 | for i in range(0, len(iter_best_road)): 330 | if(iter_best_road[i] == 0): 331 | change_pheremones[0, :, i] = 1 332 | change_pheremones[2, :, i] = 1 333 | else: 334 | change_pheremones[1, :, i] = 1 335 | change_pheremones[3, :, i] = 1 336 | 337 | 338 | # if(class_err[min_err_indx] == 0): 339 | # change_pheremones = (Q/(class_err[min_err_indx] + 0.001)) * change_pheremones 340 | # else: 341 | # change_pheremones = (Q/(class_err[min_err_indx])) * change_pheremones 342 | 343 | if(max_fit == 0): 344 | change_pheremones = (1/(max_fit+0.001)) * change_pheremones 345 | else: 346 | change_pheremones = (1/(max_fit)) * change_pheremones 347 | 348 | pheremones= pheremones+ change_pheremones 349 | # now we make sure all of them are in interval : 350 | for each in np.nditer(pheremones, op_flags=['readwrite']): 351 | if(each > Max_T): 352 | each[...] = Max_T 353 | else: 354 | if(each < Min_T): 355 | each[...] = Min_T 356 | 357 | 358 | return pheremones 359 | 360 | 361 | alpha = float(input("Enter alpha : ")) 362 | beta = float(input("Enter beta : ")) 363 | iris = load_breast_cancer() 364 | x_train = iris.data 365 | y_train = iris.target 366 | model = svm.LinearSVC() 367 | with warnings.catch_warnings(): 368 | warnings.simplefilter('ignore') 369 | fsga = fga.FeatureSelectionGA(model,x_train,y_train) 370 | (pop,best,fitness_value) = fsga.generate(20) 371 | solution = baco(best, x_train, y_train,alpha=alpha,beta=beta, t_percent=40, iter_num=10) 372 | (best_ant_road, acc_before_run, best_fit_so_far, total_feature_num, best_selected_features_num, 373 | best_fitnesses_each_iter, average_fitnesses_each_iter, num_of_features_selected_by_best_ant_each_iter, time_temp, 374 | sample_num) = solution 375 | if fitness_value[0] < best_fitnesses_each_iter[-1]: 376 | print("Binary Representation of Feature selection : ",best_ant_road) 377 | print("Total number of features in Dataset : ", total_feature_num) 378 | print("Number of features selected : ",num_of_features_selected_by_best_ant_each_iter[-1]) 379 | print("Accuracy before MBACO : ",acc_before_run) 380 | print("Accuracy with best fit : ",best_fit_so_far) 381 | print("Accuracy of best ant in last iteration : ",best_fitnesses_each_iter[-1]) 382 | draw_baco(solution) 383 | else: 384 | print("GA gave the best individual which is",best," (",fitness_value,")") 385 | -------------------------------------------------------------------------------- /plotaco.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib.ticker as plticker 3 | import os 4 | import numpy as np 5 | 6 | 7 | 8 | def show_res_for_this_run(best_fitnesses_each_iter, average_fitnesses_each_iter, num_of_features_selected_by_best_ant_each_iter, feature_num): 9 | 10 | iterations = np.arange(1,len(best_fitnesses_each_iter)+1, dtype="int64") 11 | 12 | # Spacing between each line 13 | intervals = 1 14 | loc = plticker.MultipleLocator(base=intervals) 15 | 16 | # ax.xaxis.set_major_locator(loc) 17 | 18 | ################################## 19 | fig, ax1 = plt.subplots(figsize=(10,8)) 20 | 21 | plt.subplot(221) 22 | 23 | 24 | xx1 = np.array(iterations) 25 | yy1 = np.array(best_fitnesses_each_iter) 26 | 27 | plt.plot(xx1, yy1, 'bo', xx1, yy1, 'k') 28 | 29 | plt.xlabel('iteration num') 30 | plt.ylabel('accuracy (fitness)') 31 | plt.title('Visualization of Accuracy over each Iteration') 32 | 33 | ax1 = fig.gca() 34 | ax1.xaxis.set_major_locator(loc) 35 | plt.grid(True) 36 | 37 | 38 | 39 | ################################## 40 | plt.subplot(222) 41 | 42 | xx2 = np.array(iterations) 43 | yy2 = np.array(average_fitnesses_each_iter) 44 | 45 | plt.plot(xx2, yy2, 'bo', xx2, yy2, 'k') 46 | 47 | plt.xlabel('iteration num') 48 | plt.ylabel('average accuracy') 49 | plt.title('Visualization of Average of Accuracy over each Iteration') 50 | 51 | ax2 = fig.gca() 52 | ax2.xaxis.set_major_locator(loc) 53 | 54 | ################################## 55 | # plt.subplot(223) 56 | # 57 | # N = len(num_of_features_selected_by_best_ant_each_iter) 58 | # 59 | # ind = np.arange(N) # the x locations for the groups 60 | # width = 0.25 # the width of the bars 61 | 62 | # ax3 = fig.gca() 63 | # rects = ax3.bar(ind, num_of_features_selected_by_best_ant_each_iter, width, color='c') 64 | # 65 | # 66 | # ax3.set_ylabel('num of selected features (by best ant)') 67 | # # ax3.set_title('selected features over each iteration') 68 | # ax3.set_xticks(ind + width / 2) 69 | # ax3.set_xticklabels(np.arange(1, N+1)) 70 | # ax3.set_ylim([0, feature_num]) 71 | 72 | def autolabel(rects): 73 | for rect in rects: 74 | height = rect.get_height() 75 | # ax3.text(rect.get_x() + rect.get_width()/2., 1.05*height, 76 | # '%d' % int(height), 77 | # ha='center', va='bottom') 78 | 79 | # autolabel(rects) 80 | 81 | plt.show() 82 | 83 | def draw_baco(solution): 84 | 85 | if(len(solution) != 10): 86 | print("+++ can't draw the solution due to problem with it! +++") 87 | return 88 | 89 | (best_ant_road, acc_before_run, best_fit_so_far, total_feature_num, best_selected_features_num, best_fitnesses_each_iter, average_fitnesses_each_iter ,num_of_features_selected_by_best_ant_each_iter, time_temp, sample_num) = solution 90 | 91 | show_res_for_this_run(best_fitnesses_each_iter, average_fitnesses_each_iter, num_of_features_selected_by_best_ant_each_iter, total_feature_num) --------------------------------------------------------------------------------