├── 001_christine.py ├── 001_jasmine.py ├── 001_madeline.py ├── 001_philippine.py ├── 001_sylvine.py ├── 002_albert.py ├── 002_dilbert.py ├── 002_fabert.py ├── 002_robert.py ├── 002_volkert.py ├── 003_alexis.py ├── 003_dionis.py ├── 003_grigoris.py ├── 003_jannis.py ├── 003_wallis.py ├── 004_evita.py ├── 004_flora.py ├── 004_helena.py ├── 004_tania.py ├── 004_yolanda.py ├── DeepFeedNet.py ├── FeedForwardNet.py ├── LICENSE ├── README.md └── RegDeepNet.py /001_christine.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | import sklearn.cross_validation 6 | 7 | import autosklearn 8 | import autosklearn.data 9 | import autosklearn.data.data_manager 10 | import autosklearn.models.evaluator 11 | from ParamSklearn.classification import ParamSklearnClassifier 12 | 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('input') 16 | parser.add_argument('output') 17 | args = parser.parse_args() 18 | 19 | input = args.input 20 | dataset = 'christine' 21 | output = args.output 22 | 23 | D = autosklearn.data.data_manager.DataManager(dataset, input) 24 | X = D.data['X_train'] 25 | y = D.data['Y_train'] 26 | X_valid = D.data['X_valid'] 27 | X_test = D.data['X_test'] 28 | 29 | weights = np.array([1.0]) 30 | 31 | # Choosing the single best model without feature selection by RFE (but by 32 | # select percentile classification which is in the auto-sklearn pipeline) seems 33 | # to work best here 34 | configurations = [ 35 | {'balancing:strategy': 'none', 36 | 'classifier': 'libsvm_svc', 37 | 'imputation:strategy': 'median', 38 | 'libsvm_svc:C': '5.06888516101', 39 | 'libsvm_svc:class_weight': 'None', 40 | 'libsvm_svc:gamma': '0.0870955322069', 41 | 'libsvm_svc:kernel': 'rbf', 42 | 'libsvm_svc:max_iter': '-1.0', 43 | 'libsvm_svc:shrinking': 'False', 44 | 'libsvm_svc:tol': '2.62849564978e-05', 45 | 'preprocessor': 'select_percentile_classification', 46 | 'rescaling:strategy': 'min/max', 47 | 'select_percentile_classification:percentile': '36.4058569521', 48 | 'select_percentile_classification:score_func': 'f_classif'} 49 | ] 50 | 51 | classifiers = [] 52 | predictions_valid = [] 53 | predictions_test = [] 54 | 55 | # Make predictions and weight them 56 | for weight, configuration in zip(weights, configurations): 57 | for param in configuration: 58 | try: 59 | configuration[param] = int(configuration[param]) 60 | except Exception: 61 | try: 62 | configuration[param] = float(configuration[param]) 63 | except Exception: 64 | pass 65 | 66 | classifier = ParamSklearnClassifier(configuration, 1) 67 | classifiers.append(classifier) 68 | try: 69 | classifier.fit(X.copy(), y.copy()) 70 | predictions_valid.append( 71 | classifier.predict_proba(X_valid.copy()) * weight) 72 | predictions_test.append( 73 | classifier.predict_proba(X_test.copy()) * weight) 74 | except Exception as e: 75 | print e 76 | print configuration 77 | 78 | # Output the predictions 79 | for name, predictions in [('valid', predictions_valid), 80 | ('test', predictions_test)]: 81 | predictions = np.array(predictions) 82 | predictions = np.sum(predictions, axis=0) 83 | predictions = predictions[:, 1].reshape((-1, 1)) 84 | 85 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 86 | np.savetxt(filepath, predictions, delimiter=' ') -------------------------------------------------------------------------------- /001_jasmine.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | 6 | import autosklearn 7 | import autosklearn.data 8 | import autosklearn.data.data_manager 9 | import autosklearn.models.evaluator 10 | from ParamSklearn.classification import ParamSklearnClassifier 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('input') 14 | parser.add_argument('output') 15 | args = parser.parse_args() 16 | 17 | input = args.input 18 | dataset = 'jasmine' 19 | output = args.output 20 | 21 | D = autosklearn.data.data_manager.DataManager(dataset, input) 22 | X = D.data['X_train'] 23 | y = D.data['Y_train'] 24 | X_valid = D.data['X_valid'] 25 | X_test = D.data['X_test'] 26 | 27 | # Subset of features found with RFE. Feature with least importance in sklearn 28 | # RF removed. Afterwards, trained RF on remaining features with 5CV. In the 29 | # end, choose feature set with lowest error 30 | features = [6, 8, 10, 12, 16, 18, 20, 21, 22, 25, 26, 33, 37, 38, 39, 40, 42, 31 | 44, 46, 47, 52, 55, 56, 58, 62, 77, 78, 79, 82, 85, 91, 92, 94, 96, 32 | 101, 104, 106, 108, 110, 119, 122, 125, 130, 131, 133, 137, 139, 33 | 140, 141] 34 | 35 | X = X[:, features] 36 | X_valid = X_valid[:, features] 37 | X_test = X_test[:, features] 38 | 39 | # Weights of the ensemble members as determined by Ensemble Selection 40 | weights = np.array([0.140000, 0.120000, 0.080000, 0.060000, 0.040000, 0.040000, 41 | 0.040000, 0.040000, 0.040000, 0.040000, 0.020000, 0.020000, 42 | 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 43 | 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 44 | 0.020000, 0.020000, 0.020000, 0.020000]) 45 | 46 | # Ensemble members found by SMAC 47 | configurations = [ 48 | {'balancing:strategy': 'weighting', 49 | 'classifier': 'random_forest', 50 | 'imputation:strategy': 'median', 51 | 'preprocessor': 'select_percentile_classification', 52 | 'random_forest:bootstrap': 'True', 53 | 'random_forest:criterion': 'gini', 54 | 'random_forest:max_depth': 'None', 55 | 'random_forest:max_features': '1.58545644982', 56 | 'random_forest:max_leaf_nodes': 'None', 57 | 'random_forest:min_samples_leaf': '3.0', 58 | 'random_forest:min_samples_split': '2.0', 59 | 'random_forest:n_estimators': '100.0', 60 | 'rescaling:strategy': 'min/max', 61 | 'select_percentile_classification:percentile': '39.9235093683', 62 | 'select_percentile_classification:score_func': 'f_classif'}, 63 | {'balancing:strategy': 'weighting', 64 | 'classifier': 'random_forest', 65 | 'imputation:strategy': 'most_frequent', 66 | 'preprocessor': 'select_rates', 67 | 'random_forest:bootstrap': 'False', 68 | 'random_forest:criterion': 'entropy', 69 | 'random_forest:max_depth': 'None', 70 | 'random_forest:max_features': '0.6715305958', 71 | 'random_forest:max_leaf_nodes': 'None', 72 | 'random_forest:min_samples_leaf': '4.0', 73 | 'random_forest:min_samples_split': '3.0', 74 | 'random_forest:n_estimators': '100.0', 75 | 'rescaling:strategy': 'standard', 76 | 'select_rates:alpha': '0.486873466534', 77 | 'select_rates:mode': 'fwe', 78 | 'select_rates:score_func': 'f_classif'}, 79 | {'balancing:strategy': 'weighting', 80 | 'classifier': 'random_forest', 81 | 'imputation:strategy': 'mean', 82 | 'preprocessor': 'select_percentile_classification', 83 | 'random_forest:bootstrap': 'False', 84 | 'random_forest:criterion': 'gini', 85 | 'random_forest:max_depth': 'None', 86 | 'random_forest:max_features': '1.82773631717', 87 | 'random_forest:max_leaf_nodes': 'None', 88 | 'random_forest:min_samples_leaf': '2.0', 89 | 'random_forest:min_samples_split': '3.0', 90 | 'random_forest:n_estimators': '100.0', 91 | 'rescaling:strategy': 'min/max', 92 | 'select_percentile_classification:percentile': '50.0', 93 | 'select_percentile_classification:score_func': 'chi2'}, 94 | {'balancing:strategy': 'none', 95 | 'classifier': 'random_forest', 96 | 'fast_ica:algorithm': 'deflation', 97 | 'fast_ica:fun': 'logcosh', 98 | 'fast_ica:n_components': '832.0', 99 | 'fast_ica:whiten': 'False', 100 | 'imputation:strategy': 'median', 101 | 'preprocessor': 'fast_ica', 102 | 'random_forest:bootstrap': 'False', 103 | 'random_forest:criterion': 'gini', 104 | 'random_forest:max_depth': 'None', 105 | 'random_forest:max_features': '2.93148979051', 106 | 'random_forest:max_leaf_nodes': 'None', 107 | 'random_forest:min_samples_leaf': '5.0', 108 | 'random_forest:min_samples_split': '7.0', 109 | 'random_forest:n_estimators': '100.0', 110 | 'rescaling:strategy': 'min/max'}, 111 | {'balancing:strategy': 'weighting', 112 | 'classifier': 'random_forest', 113 | 'imputation:strategy': 'mean', 114 | 'preprocessor': 'select_percentile_classification', 115 | 'random_forest:bootstrap': 'False', 116 | 'random_forest:criterion': 'entropy', 117 | 'random_forest:max_depth': 'None', 118 | 'random_forest:max_features': '1.79654377812', 119 | 'random_forest:max_leaf_nodes': 'None', 120 | 'random_forest:min_samples_leaf': '1.0', 121 | 'random_forest:min_samples_split': '6.0', 122 | 'random_forest:n_estimators': '100.0', 123 | 'rescaling:strategy': 'min/max', 124 | 'select_percentile_classification:percentile': '50.0', 125 | 'select_percentile_classification:score_func': 'chi2'}, 126 | {'balancing:strategy': 'weighting', 127 | 'classifier': 'extra_trees', 128 | 'extra_trees:bootstrap': 'False', 129 | 'extra_trees:criterion': 'entropy', 130 | 'extra_trees:max_depth': 'None', 131 | 'extra_trees:max_features': '1.81061189332', 132 | 'extra_trees:min_samples_leaf': '1.0', 133 | 'extra_trees:min_samples_split': '3.0', 134 | 'extra_trees:n_estimators': '100.0', 135 | 'imputation:strategy': 'mean', 136 | 'preprocessor': 'select_rates', 137 | 'rescaling:strategy': 'none', 138 | 'select_rates:alpha': '0.201722721361', 139 | 'select_rates:mode': 'fwe', 140 | 'select_rates:score_func': 'f_classif'}, 141 | {'balancing:strategy': 'weighting', 142 | 'classifier': 'extra_trees', 143 | 'extra_trees:bootstrap': 'False', 144 | 'extra_trees:criterion': 'gini', 145 | 'extra_trees:max_depth': 'None', 146 | 'extra_trees:max_features': '1.76442905847', 147 | 'extra_trees:min_samples_leaf': '4.0', 148 | 'extra_trees:min_samples_split': '6.0', 149 | 'extra_trees:n_estimators': '100.0', 150 | 'imputation:strategy': 'mean', 151 | 'preprocessor': 'select_rates', 152 | 'rescaling:strategy': 'min/max', 153 | 'select_rates:alpha': '0.113572172949', 154 | 'select_rates:mode': 'fwe', 155 | 'select_rates:score_func': 'f_classif'}, 156 | {'balancing:strategy': 'weighting', 157 | 'classifier': 'random_forest', 158 | 'imputation:strategy': 'median', 159 | 'preprocessor': 'select_rates', 160 | 'random_forest:bootstrap': 'False', 161 | 'random_forest:criterion': 'entropy', 162 | 'random_forest:max_depth': 'None', 163 | 'random_forest:max_features': '2.87832643035', 164 | 'random_forest:max_leaf_nodes': 'None', 165 | 'random_forest:min_samples_leaf': '1.0', 166 | 'random_forest:min_samples_split': '19.0', 167 | 'random_forest:n_estimators': '100.0', 168 | 'rescaling:strategy': 'min/max', 169 | 'select_rates:alpha': '0.110716868617', 170 | 'select_rates:mode': 'fwe', 171 | 'select_rates:score_func': 'f_classif'}, 172 | {'balancing:strategy': 'weighting', 173 | 'classifier': 'extra_trees', 174 | 'extra_trees:bootstrap': 'True', 175 | 'extra_trees:criterion': 'entropy', 176 | 'extra_trees:max_depth': 'None', 177 | 'extra_trees:max_features': '3.23138088334', 178 | 'extra_trees:min_samples_leaf': '3.0', 179 | 'extra_trees:min_samples_split': '6.0', 180 | 'extra_trees:n_estimators': '100.0', 181 | 'imputation:strategy': 'mean', 182 | 'preprocessor': 'select_percentile_classification', 183 | 'rescaling:strategy': 'min/max', 184 | 'select_percentile_classification:percentile': '45.1994111355', 185 | 'select_percentile_classification:score_func': 'chi2'}, 186 | {'balancing:strategy': 'none', 187 | 'classifier': 'random_forest', 188 | 'fast_ica:algorithm': 'deflation', 189 | 'fast_ica:fun': 'logcosh', 190 | 'fast_ica:n_components': '509.0', 191 | 'fast_ica:whiten': 'True', 192 | 'imputation:strategy': 'mean', 193 | 'preprocessor': 'fast_ica', 194 | 'random_forest:bootstrap': 'False', 195 | 'random_forest:criterion': 'entropy', 196 | 'random_forest:max_depth': 'None', 197 | 'random_forest:max_features': '2.2727882732', 198 | 'random_forest:max_leaf_nodes': 'None', 199 | 'random_forest:min_samples_leaf': '2.0', 200 | 'random_forest:min_samples_split': '12.0', 201 | 'random_forest:n_estimators': '100.0', 202 | 'rescaling:strategy': 'min/max'}, 203 | {'balancing:strategy': 'weighting', 204 | 'classifier': 'random_forest', 205 | 'imputation:strategy': 'median', 206 | 'preprocessor': 'select_percentile_classification', 207 | 'random_forest:bootstrap': 'False', 208 | 'random_forest:criterion': 'entropy', 209 | 'random_forest:max_depth': 'None', 210 | 'random_forest:max_features': '2.32162402484', 211 | 'random_forest:max_leaf_nodes': 'None', 212 | 'random_forest:min_samples_leaf': '1.0', 213 | 'random_forest:min_samples_split': '12.0', 214 | 'random_forest:n_estimators': '100.0', 215 | 'rescaling:strategy': 'min/max', 216 | 'select_percentile_classification:percentile': '41.8671636453', 217 | 'select_percentile_classification:score_func': 'f_classif'}, 218 | {'balancing:strategy': 'weighting', 219 | 'classifier': 'random_forest', 220 | 'fast_ica:algorithm': 'deflation', 221 | 'fast_ica:fun': 'logcosh', 222 | 'fast_ica:n_components': '690.0', 223 | 'fast_ica:whiten': 'True', 224 | 'imputation:strategy': 'mean', 225 | 'preprocessor': 'fast_ica', 226 | 'random_forest:bootstrap': 'False', 227 | 'random_forest:criterion': 'entropy', 228 | 'random_forest:max_depth': 'None', 229 | 'random_forest:max_features': '2.3355464987', 230 | 'random_forest:max_leaf_nodes': 'None', 231 | 'random_forest:min_samples_leaf': '2.0', 232 | 'random_forest:min_samples_split': '11.0', 233 | 'random_forest:n_estimators': '100.0', 234 | 'rescaling:strategy': 'min/max'}, 235 | {'balancing:strategy': 'weighting', 236 | 'classifier': 'random_forest', 237 | 'imputation:strategy': 'median', 238 | 'preprocessor': 'select_rates', 239 | 'random_forest:bootstrap': 'True', 240 | 'random_forest:criterion': 'entropy', 241 | 'random_forest:max_depth': 'None', 242 | 'random_forest:max_features': '4.2700093411', 243 | 'random_forest:max_leaf_nodes': 'None', 244 | 'random_forest:min_samples_leaf': '4.0', 245 | 'random_forest:min_samples_split': '11.0', 246 | 'random_forest:n_estimators': '100.0', 247 | 'rescaling:strategy': 'min/max', 248 | 'select_rates:alpha': '0.294021193269', 249 | 'select_rates:mode': 'fwe', 250 | 'select_rates:score_func': 'f_classif'}, 251 | {'balancing:strategy': 'weighting', 252 | 'classifier': 'random_forest', 253 | 'fast_ica:algorithm': 'deflation', 254 | 'fast_ica:fun': 'logcosh', 255 | 'fast_ica:n_components': '613.0', 256 | 'fast_ica:whiten': 'True', 257 | 'imputation:strategy': 'median', 258 | 'preprocessor': 'fast_ica', 259 | 'random_forest:bootstrap': 'False', 260 | 'random_forest:criterion': 'entropy', 261 | 'random_forest:max_depth': 'None', 262 | 'random_forest:max_features': '1.8000767552', 263 | 'random_forest:max_leaf_nodes': 'None', 264 | 'random_forest:min_samples_leaf': '2.0', 265 | 'random_forest:min_samples_split': '7.0', 266 | 'random_forest:n_estimators': '100.0', 267 | 'rescaling:strategy': 'min/max'}, 268 | {'balancing:strategy': 'none', 269 | 'classifier': 'random_forest', 270 | 'fast_ica:algorithm': 'deflation', 271 | 'fast_ica:fun': 'logcosh', 272 | 'fast_ica:n_components': '661.0', 273 | 'fast_ica:whiten': 'False', 274 | 'imputation:strategy': 'mean', 275 | 'preprocessor': 'fast_ica', 276 | 'random_forest:bootstrap': 'False', 277 | 'random_forest:criterion': 'entropy', 278 | 'random_forest:max_depth': 'None', 279 | 'random_forest:max_features': '2.23424202393', 280 | 'random_forest:max_leaf_nodes': 'None', 281 | 'random_forest:min_samples_leaf': '3.0', 282 | 'random_forest:min_samples_split': '10.0', 283 | 'random_forest:n_estimators': '100.0', 284 | 'rescaling:strategy': 'min/max'}, 285 | {'balancing:strategy': 'none', 286 | 'classifier': 'random_forest', 287 | 'fast_ica:algorithm': 'deflation', 288 | 'fast_ica:fun': 'logcosh', 289 | 'fast_ica:n_components': '606.0', 290 | 'fast_ica:whiten': 'True', 291 | 'imputation:strategy': 'median', 292 | 'preprocessor': 'fast_ica', 293 | 'random_forest:bootstrap': 'False', 294 | 'random_forest:criterion': 'entropy', 295 | 'random_forest:max_depth': 'None', 296 | 'random_forest:max_features': '1.82743208676', 297 | 'random_forest:max_leaf_nodes': 'None', 298 | 'random_forest:min_samples_leaf': '3.0', 299 | 'random_forest:min_samples_split': '11.0', 300 | 'random_forest:n_estimators': '100.0', 301 | 'rescaling:strategy': 'min/max'}, 302 | {'balancing:strategy': 'weighting', 303 | 'classifier': 'extra_trees', 304 | 'extra_trees:bootstrap': 'True', 305 | 'extra_trees:criterion': 'gini', 306 | 'extra_trees:max_depth': 'None', 307 | 'extra_trees:max_features': '4.32850858484', 308 | 'extra_trees:min_samples_leaf': '3.0', 309 | 'extra_trees:min_samples_split': '5.0', 310 | 'extra_trees:n_estimators': '100.0', 311 | 'imputation:strategy': 'mean', 312 | 'preprocessor': 'select_rates', 313 | 'rescaling:strategy': 'min/max', 314 | 'select_rates:alpha': '0.118453703147', 315 | 'select_rates:mode': 'fpr', 316 | 'select_rates:score_func': 'f_classif'}, 317 | {'balancing:strategy': 'weighting', 318 | 'classifier': 'random_forest', 319 | 'fast_ica:algorithm': 'deflation', 320 | 'fast_ica:fun': 'logcosh', 321 | 'fast_ica:n_components': '1098.0', 322 | 'fast_ica:whiten': 'True', 323 | 'imputation:strategy': 'most_frequent', 324 | 'preprocessor': 'fast_ica', 325 | 'random_forest:bootstrap': 'False', 326 | 'random_forest:criterion': 'entropy', 327 | 'random_forest:max_depth': 'None', 328 | 'random_forest:max_features': '4.83031750621', 329 | 'random_forest:max_leaf_nodes': 'None', 330 | 'random_forest:min_samples_leaf': '1.0', 331 | 'random_forest:min_samples_split': '15.0', 332 | 'random_forest:n_estimators': '100.0', 333 | 'rescaling:strategy': 'min/max'}, 334 | {'balancing:strategy': 'weighting', 335 | 'classifier': 'random_forest', 336 | 'imputation:strategy': 'median', 337 | 'preprocessor': 'select_rates', 338 | 'random_forest:bootstrap': 'False', 339 | 'random_forest:criterion': 'gini', 340 | 'random_forest:max_depth': 'None', 341 | 'random_forest:max_features': '3.52038352463', 342 | 'random_forest:max_leaf_nodes': 'None', 343 | 'random_forest:min_samples_leaf': '4.0', 344 | 'random_forest:min_samples_split': '4.0', 345 | 'random_forest:n_estimators': '100.0', 346 | 'rescaling:strategy': 'standard', 347 | 'select_rates:alpha': '0.441859738474', 348 | 'select_rates:mode': 'fpr', 349 | 'select_rates:score_func': 'f_classif'}, 350 | {'balancing:strategy': 'none', 351 | 'classifier': 'random_forest', 352 | 'fast_ica:algorithm': 'deflation', 353 | 'fast_ica:fun': 'logcosh', 354 | 'fast_ica:n_components': '743.0', 355 | 'fast_ica:whiten': 'False', 356 | 'imputation:strategy': 'median', 357 | 'preprocessor': 'fast_ica', 358 | 'random_forest:bootstrap': 'False', 359 | 'random_forest:criterion': 'entropy', 360 | 'random_forest:max_depth': 'None', 361 | 'random_forest:max_features': '2.37406180812', 362 | 'random_forest:max_leaf_nodes': 'None', 363 | 'random_forest:min_samples_leaf': '2.0', 364 | 'random_forest:min_samples_split': '17.0', 365 | 'random_forest:n_estimators': '100.0', 366 | 'rescaling:strategy': 'min/max'}, 367 | {'balancing:strategy': 'none', 368 | 'classifier': 'random_forest', 369 | 'fast_ica:algorithm': 'deflation', 370 | 'fast_ica:fun': 'logcosh', 371 | 'fast_ica:n_components': '531.0', 372 | 'fast_ica:whiten': 'True', 373 | 'imputation:strategy': 'mean', 374 | 'preprocessor': 'fast_ica', 375 | 'random_forest:bootstrap': 'False', 376 | 'random_forest:criterion': 'entropy', 377 | 'random_forest:max_depth': 'None', 378 | 'random_forest:max_features': '2.38993786345', 379 | 'random_forest:max_leaf_nodes': 'None', 380 | 'random_forest:min_samples_leaf': '4.0', 381 | 'random_forest:min_samples_split': '16.0', 382 | 'random_forest:n_estimators': '100.0', 383 | 'rescaling:strategy': 'min/max'}, 384 | {'balancing:strategy': 'weighting', 385 | 'classifier': 'extra_trees', 386 | 'extra_trees:bootstrap': 'False', 387 | 'extra_trees:criterion': 'entropy', 388 | 'extra_trees:max_depth': 'None', 389 | 'extra_trees:max_features': '1.60284209578', 390 | 'extra_trees:min_samples_leaf': '4.0', 391 | 'extra_trees:min_samples_split': '10.0', 392 | 'extra_trees:n_estimators': '100.0', 393 | 'imputation:strategy': 'most_frequent', 394 | 'preprocessor': 'select_rates', 395 | 'rescaling:strategy': 'min/max', 396 | 'select_rates:alpha': '0.486662334462', 397 | 'select_rates:mode': 'fwe', 398 | 'select_rates:score_func': 'chi2'}, 399 | {'balancing:strategy': 'weighting', 400 | 'classifier': 'random_forest', 401 | 'fast_ica:algorithm': 'deflation', 402 | 'fast_ica:fun': 'logcosh', 403 | 'fast_ica:n_components': '1082.0', 404 | 'fast_ica:whiten': 'False', 405 | 'imputation:strategy': 'median', 406 | 'preprocessor': 'fast_ica', 407 | 'random_forest:bootstrap': 'False', 408 | 'random_forest:criterion': 'entropy', 409 | 'random_forest:max_depth': 'None', 410 | 'random_forest:max_features': '1.47545539014', 411 | 'random_forest:max_leaf_nodes': 'None', 412 | 'random_forest:min_samples_leaf': '2.0', 413 | 'random_forest:min_samples_split': '15.0', 414 | 'random_forest:n_estimators': '100.0', 415 | 'rescaling:strategy': 'min/max'}, 416 | {'balancing:strategy': 'weighting', 417 | 'classifier': 'random_forest', 418 | 'fast_ica:algorithm': 'deflation', 419 | 'fast_ica:fun': 'logcosh', 420 | 'fast_ica:n_components': '985.0', 421 | 'fast_ica:whiten': 'True', 422 | 'imputation:strategy': 'most_frequent', 423 | 'preprocessor': 'fast_ica', 424 | 'random_forest:bootstrap': 'False', 425 | 'random_forest:criterion': 'gini', 426 | 'random_forest:max_depth': 'None', 427 | 'random_forest:max_features': '3.87640604363', 428 | 'random_forest:max_leaf_nodes': 'None', 429 | 'random_forest:min_samples_leaf': '2.0', 430 | 'random_forest:min_samples_split': '11.0', 431 | 'random_forest:n_estimators': '100.0', 432 | 'rescaling:strategy': 'min/max'}, 433 | {'balancing:strategy': 'weighting', 434 | 'classifier': 'gradient_boosting', 435 | 'gradient_boosting:learning_rate': '0.236639577539', 436 | 'gradient_boosting:max_depth': '5.0', 437 | 'gradient_boosting:max_features': '1.94802938969', 438 | 'gradient_boosting:min_samples_leaf': '3.0', 439 | 'gradient_boosting:min_samples_split': '4.0', 440 | 'gradient_boosting:n_estimators': '100.0', 441 | 'gradient_boosting:subsample': '0.499388145134', 442 | 'imputation:strategy': 'most_frequent', 443 | 'preprocessor': 'select_rates', 444 | 'rescaling:strategy': 'min/max', 445 | 'select_rates:alpha': '0.078631031495', 446 | 'select_rates:mode': 'fwe', 447 | 'select_rates:score_func': 'f_classif'}, 448 | {'balancing:strategy': 'none', 449 | 'classifier': 'random_forest', 450 | 'imputation:strategy': 'mean', 451 | 'preprocessor': 'select_percentile_classification', 452 | 'random_forest:bootstrap': 'False', 453 | 'random_forest:criterion': 'gini', 454 | 'random_forest:max_depth': 'None', 455 | 'random_forest:max_features': '2.89271865035', 456 | 'random_forest:max_leaf_nodes': 'None', 457 | 'random_forest:min_samples_leaf': '9.0', 458 | 'random_forest:min_samples_split': '2.0', 459 | 'random_forest:n_estimators': '100.0', 460 | 'rescaling:strategy': 'min/max', 461 | 'select_percentile_classification:percentile': '58.6633457276', 462 | 'select_percentile_classification:score_func': 'chi2'}, 463 | {'balancing:strategy': 'none', 464 | 'classifier': 'random_forest', 465 | 'fast_ica:algorithm': 'deflation', 466 | 'fast_ica:fun': 'logcosh', 467 | 'fast_ica:n_components': '1299.0', 468 | 'fast_ica:whiten': 'False', 469 | 'imputation:strategy': 'mean', 470 | 'preprocessor': 'fast_ica', 471 | 'random_forest:bootstrap': 'False', 472 | 'random_forest:criterion': 'entropy', 473 | 'random_forest:max_depth': 'None', 474 | 'random_forest:max_features': '4.38103060363', 475 | 'random_forest:max_leaf_nodes': 'None', 476 | 'random_forest:min_samples_leaf': '3.0', 477 | 'random_forest:min_samples_split': '2.0', 478 | 'random_forest:n_estimators': '100.0', 479 | 'rescaling:strategy': 'min/max'}, 480 | {'balancing:strategy': 'none', 481 | 'classifier': 'random_forest', 482 | 'fast_ica:algorithm': 'deflation', 483 | 'fast_ica:fun': 'logcosh', 484 | 'fast_ica:n_components': '1653.0', 485 | 'fast_ica:whiten': 'True', 486 | 'imputation:strategy': 'median', 487 | 'preprocessor': 'fast_ica', 488 | 'random_forest:bootstrap': 'False', 489 | 'random_forest:criterion': 'entropy', 490 | 'random_forest:max_depth': 'None', 491 | 'random_forest:max_features': '2.58731902957', 492 | 'random_forest:max_leaf_nodes': 'None', 493 | 'random_forest:min_samples_leaf': '8.0', 494 | 'random_forest:min_samples_split': '19.0', 495 | 'random_forest:n_estimators': '100.0', 496 | 'rescaling:strategy': 'min/max'}, 497 | ] 498 | 499 | classifiers = [] 500 | predictions_valid = [] 501 | predictions_test = [] 502 | 503 | # Make predictions and weight them 504 | for weight, configuration in zip(weights, configurations): 505 | for param in configuration: 506 | try: 507 | configuration[param] = int(configuration[param]) 508 | except Exception: 509 | try: 510 | configuration[param] = float(configuration[param]) 511 | except Exception: 512 | pass 513 | 514 | classifier = ParamSklearnClassifier(configuration, 1) 515 | classifiers.append(classifier) 516 | try: 517 | classifier.fit(X.copy(), y.copy()) 518 | predictions_valid.append(classifier.predict_proba(X_valid.copy()) * weight) 519 | predictions_test.append(classifier.predict_proba(X_test.copy()) * weight) 520 | except Exception as e: 521 | print e 522 | print configuration 523 | 524 | # Output the predictions 525 | for name, predictions in [('valid', predictions_valid), 526 | ('test', predictions_test)]: 527 | predictions = np.array(predictions) 528 | predictions = np.sum(predictions, axis=0) 529 | predictions = predictions[:, 1].reshape((-1, 1)) 530 | 531 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 532 | np.savetxt(filepath, predictions, delimiter=' ') -------------------------------------------------------------------------------- /001_madeline.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | 6 | import autosklearn 7 | import autosklearn.data 8 | import autosklearn.data.data_manager 9 | import autosklearn.models.evaluator 10 | from ParamSklearn.classification import ParamSklearnClassifier 11 | 12 | 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('input') 15 | parser.add_argument('output') 16 | args = parser.parse_args() 17 | 18 | input = args.input 19 | dataset = 'madeline' 20 | output = args.output 21 | 22 | D = autosklearn.data.data_manager.DataManager(dataset, input) 23 | X = D.data['X_train'] 24 | y = D.data['Y_train'] 25 | X_valid = D.data['X_valid'] 26 | X_test = D.data['X_test'] 27 | 28 | # Subset of features found with RFE. Feature with least importance in sklearn 29 | # RF removed. Afterwards, trained RF on remaining features with 5CV. In the 30 | # end, choose feature set with lowest error 31 | features = [52, 70, 74, 83, 85, 135, 162, 183, 184, 185, 191, 197, 232, 237, 32 | 239, 252] 33 | 34 | X = X[:, features] 35 | X_valid = X_valid[:, features] 36 | X_test = X_test[:, features] 37 | 38 | # Weights of the ensemble members as determined by Ensemble Selection 39 | weights = np.array([0.100000, 0.080000, 0.080000, 0.060000, 0.060000, 40 | 0.060000, 0.060000, 0.040000, 0.040000, 0.040000, 41 | 0.040000, 0.040000, 0.020000, 0.020000, 0.020000, 42 | 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 43 | 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 44 | 0.020000, 0.020000]) 45 | 46 | # Ensemble members found by SMAC 47 | configurations = [ 48 | {'balancing:strategy': 'weighting', 49 | 'classifier': 'k_nearest_neighbors', 50 | 'imputation:strategy': 'median', 51 | 'k_nearest_neighbors:algorithm': 'auto', 52 | 'k_nearest_neighbors:leaf_size': '30.0', 53 | 'k_nearest_neighbors:n_neighbors': '4.0', 54 | 'k_nearest_neighbors:p': '1.0', 55 | 'k_nearest_neighbors:weights': 'distance', 56 | 'preprocessor': 'select_rates', 57 | 'rescaling:strategy': 'standard', 58 | 'select_rates:alpha': '0.124513266268', 59 | 'select_rates:mode': 'fdr', 60 | 'select_rates:score_func': 'f_classif'}, 61 | {'balancing:strategy': 'weighting', 62 | 'classifier': 'qda', 63 | 'imputation:strategy': 'mean', 64 | 'kitchen_sinks:gamma': '0.802981892271', 65 | 'kitchen_sinks:n_components': '704.0', 66 | 'preprocessor': 'kitchen_sinks', 67 | 'qda:reg_param': '7.66537661987', 68 | 'qda:tol': '0.000779904033875', 69 | 'rescaling:strategy': 'standard'}, 70 | {'balancing:strategy': 'none', 71 | 'classifier': 'qda', 72 | 'imputation:strategy': 'mean', 73 | 'kitchen_sinks:gamma': '0.658527701661', 74 | 'kitchen_sinks:n_components': '499.0', 75 | 'preprocessor': 'kitchen_sinks', 76 | 'qda:reg_param': '4.13193776587', 77 | 'qda:tol': '0.0026677961139', 78 | 'rescaling:strategy': 'standard'}, 79 | {'balancing:strategy': 'none', 80 | 'classifier': 'qda', 81 | 'imputation:strategy': 'mean', 82 | 'kitchen_sinks:gamma': '0.658527701661', 83 | 'kitchen_sinks:n_components': '498.0', 84 | 'preprocessor': 'kitchen_sinks', 85 | 'qda:reg_param': '7.39545021165', 86 | 'qda:tol': '0.00116251661342', 87 | 'rescaling:strategy': 'standard'}, 88 | {'balancing:strategy': 'none', 89 | 'classifier': 'qda', 90 | 'imputation:strategy': 'mean', 91 | 'kitchen_sinks:gamma': '0.758771699267', 92 | 'kitchen_sinks:n_components': '794.0', 93 | 'preprocessor': 'kitchen_sinks', 94 | 'qda:reg_param': '4.57263430441', 95 | 'qda:tol': '0.00284918317943', 96 | 'rescaling:strategy': 'standard'}, 97 | {'balancing:strategy': 'none', 98 | 'classifier': 'k_nearest_neighbors', 99 | 'imputation:strategy': 'most_frequent', 100 | 'k_nearest_neighbors:algorithm': 'auto', 101 | 'k_nearest_neighbors:leaf_size': '30.0', 102 | 'k_nearest_neighbors:n_neighbors': '5.0', 103 | 'k_nearest_neighbors:p': '1.0', 104 | 'k_nearest_neighbors:weights': 'distance', 105 | 'preprocessor': 'select_rates', 106 | 'rescaling:strategy': 'min/max', 107 | 'select_rates:alpha': '0.0683198728939', 108 | 'select_rates:mode': 'fdr', 109 | 'select_rates:score_func': 'f_classif'}, 110 | {'balancing:strategy': 'none', 111 | 'classifier': 'qda', 112 | 'imputation:strategy': 'mean', 113 | 'kitchen_sinks:gamma': '0.773869494191', 114 | 'kitchen_sinks:n_components': '608.0', 115 | 'preprocessor': 'kitchen_sinks', 116 | 'qda:reg_param': '5.34388968302', 117 | 'qda:tol': '0.000118437687463', 118 | 'rescaling:strategy': 'standard'}, 119 | {'balancing:strategy': 'weighting', 120 | 'classifier': 'k_nearest_neighbors', 121 | 'imputation:strategy': 'mean', 122 | 'k_nearest_neighbors:algorithm': 'auto', 123 | 'k_nearest_neighbors:leaf_size': '30.0', 124 | 'k_nearest_neighbors:n_neighbors': '4.0', 125 | 'k_nearest_neighbors:p': '1.0', 126 | 'k_nearest_neighbors:weights': 'distance', 127 | 'preprocessor': 'select_rates', 128 | 'rescaling:strategy': 'min/max', 129 | 'select_rates:alpha': '0.0953909302386', 130 | 'select_rates:mode': 'fdr', 131 | 'select_rates:score_func': 'chi2'}, 132 | {'balancing:strategy': 'none', 133 | 'classifier': 'qda', 134 | 'imputation:strategy': 'mean', 135 | 'kitchen_sinks:gamma': '0.722743897655', 136 | 'kitchen_sinks:n_components': '952.0', 137 | 'preprocessor': 'kitchen_sinks', 138 | 'qda:reg_param': '3.61200930387', 139 | 'qda:tol': '0.000911935213882', 140 | 'rescaling:strategy': 'standard'}, 141 | {'balancing:strategy': 'weighting', 142 | 'classifier': 'k_nearest_neighbors', 143 | 'imputation:strategy': 'most_frequent', 144 | 'k_nearest_neighbors:algorithm': 'auto', 145 | 'k_nearest_neighbors:leaf_size': '30.0', 146 | 'k_nearest_neighbors:n_neighbors': '3.0', 147 | 'k_nearest_neighbors:p': '2.0', 148 | 'k_nearest_neighbors:weights': 'distance', 149 | 'preprocessor': 'select_rates', 150 | 'rescaling:strategy': 'standard', 151 | 'select_rates:alpha': '0.12499749257', 152 | 'select_rates:mode': 'fdr', 153 | 'select_rates:score_func': 'f_classif'}, 154 | {'balancing:strategy': 'none', 155 | 'classifier': 'qda', 156 | 'imputation:strategy': 'most_frequent', 157 | 'kitchen_sinks:gamma': '0.521009778754', 158 | 'kitchen_sinks:n_components': '581.0', 159 | 'preprocessor': 'kitchen_sinks', 160 | 'qda:reg_param': '0.570532656005', 161 | 'qda:tol': '0.00759604479274', 162 | 'rescaling:strategy': 'standard'}, 163 | {'balancing:strategy': 'none', 164 | 'classifier': 'qda', 165 | 'imputation:strategy': 'median', 166 | 'kitchen_sinks:gamma': '0.736334496442', 167 | 'kitchen_sinks:n_components': '590.0', 168 | 'preprocessor': 'kitchen_sinks', 169 | 'qda:reg_param': '8.78913455152', 170 | 'qda:tol': '0.0417125881025', 171 | 'rescaling:strategy': 'standard'}, 172 | {'balancing:strategy': 'weighting', 173 | 'classifier': 'k_nearest_neighbors', 174 | 'imputation:strategy': 'median', 175 | 'k_nearest_neighbors:algorithm': 'auto', 176 | 'k_nearest_neighbors:leaf_size': '30.0', 177 | 'k_nearest_neighbors:n_neighbors': '10.0', 178 | 'k_nearest_neighbors:p': '2.0', 179 | 'k_nearest_neighbors:weights': 'distance', 180 | 'preprocessor': 'select_rates', 181 | 'rescaling:strategy': 'min/max', 182 | 'select_rates:alpha': '0.065583595323', 183 | 'select_rates:mode': 'fdr', 184 | 'select_rates:score_func': 'f_classif'}, 185 | {'balancing:strategy': 'none', 186 | 'classifier': 'qda', 187 | 'imputation:strategy': 'mean', 188 | 'kitchen_sinks:gamma': '0.725282605688', 189 | 'kitchen_sinks:n_components': '591.0', 190 | 'preprocessor': 'kitchen_sinks', 191 | 'qda:reg_param': '4.32023431675', 192 | 'qda:tol': '2.95483713232e-05', 193 | 'rescaling:strategy': 'standard'}, 194 | {'balancing:strategy': 'none', 195 | 'classifier': 'qda', 196 | 'imputation:strategy': 'mean', 197 | 'kitchen_sinks:gamma': '0.686955501206', 198 | 'kitchen_sinks:n_components': '646.0', 199 | 'preprocessor': 'kitchen_sinks', 200 | 'qda:reg_param': '9.58493774318', 201 | 'qda:tol': '0.00612419830773', 202 | 'rescaling:strategy': 'standard'}, 203 | {'balancing:strategy': 'none', 204 | 'classifier': 'k_nearest_neighbors', 205 | 'imputation:strategy': 'median', 206 | 'k_nearest_neighbors:algorithm': 'auto', 207 | 'k_nearest_neighbors:leaf_size': '30.0', 208 | 'k_nearest_neighbors:n_neighbors': '6.0', 209 | 'k_nearest_neighbors:p': '2.0', 210 | 'k_nearest_neighbors:weights': 'distance', 211 | 'preprocessor': 'select_rates', 212 | 'rescaling:strategy': 'min/max', 213 | 'select_rates:alpha': '0.276130352686', 214 | 'select_rates:mode': 'fdr', 215 | 'select_rates:score_func': 'f_classif'}, 216 | {'balancing:strategy': 'none', 217 | 'classifier': 'qda', 218 | 'imputation:strategy': 'most_frequent', 219 | 'kitchen_sinks:gamma': '0.549862378472', 220 | 'kitchen_sinks:n_components': '591.0', 221 | 'preprocessor': 'kitchen_sinks', 222 | 'qda:reg_param': '1.11536443906', 223 | 'qda:tol': '4.98941924261e-05', 224 | 'rescaling:strategy': 'standard'}, 225 | {'balancing:strategy': 'none', 226 | 'classifier': 'qda', 227 | 'imputation:strategy': 'median', 228 | 'kitchen_sinks:gamma': '0.551878628115', 229 | 'kitchen_sinks:n_components': '913.0', 230 | 'preprocessor': 'kitchen_sinks', 231 | 'qda:reg_param': '2.80643663684', 232 | 'qda:tol': '0.0030955537468', 233 | 'rescaling:strategy': 'standard'}, 234 | {'balancing:strategy': 'none', 235 | 'classifier': 'qda', 236 | 'imputation:strategy': 'mean', 237 | 'kitchen_sinks:gamma': '0.797948222068', 238 | 'kitchen_sinks:n_components': '856.0', 239 | 'preprocessor': 'kitchen_sinks', 240 | 'qda:reg_param': '0.753439507859', 241 | 'qda:tol': '0.000179635997544', 242 | 'rescaling:strategy': 'standard'}, 243 | {'balancing:strategy': 'weighting', 244 | 'classifier': 'k_nearest_neighbors', 245 | 'imputation:strategy': 'median', 246 | 'k_nearest_neighbors:algorithm': 'auto', 247 | 'k_nearest_neighbors:leaf_size': '30.0', 248 | 'k_nearest_neighbors:n_neighbors': '6.0', 249 | 'k_nearest_neighbors:p': '2.0', 250 | 'k_nearest_neighbors:weights': 'distance', 251 | 'preprocessor': 'select_rates', 252 | 'rescaling:strategy': 'standard', 253 | 'select_rates:alpha': '0.121674691962', 254 | 'select_rates:mode': 'fdr', 255 | 'select_rates:score_func': 'f_classif'}, 256 | {'balancing:strategy': 'none', 257 | 'classifier': 'qda', 258 | 'imputation:strategy': 'median', 259 | 'kitchen_sinks:gamma': '0.870787144807', 260 | 'kitchen_sinks:n_components': '591.0', 261 | 'preprocessor': 'kitchen_sinks', 262 | 'qda:reg_param': '3.25265485261', 263 | 'qda:tol': '0.000232802336471', 264 | 'rescaling:strategy': 'standard'}, 265 | {'balancing:strategy': 'none', 266 | 'classifier': 'qda', 267 | 'imputation:strategy': 'mean', 268 | 'kitchen_sinks:gamma': '0.725282605688', 269 | 'kitchen_sinks:n_components': '469.0', 270 | 'preprocessor': 'kitchen_sinks', 271 | 'qda:reg_param': '4.32023431675', 272 | 'qda:tol': '6.11461737038e-05', 273 | 'rescaling:strategy': 'standard'}, 274 | {'balancing:strategy': 'none', 275 | 'classifier': 'qda', 276 | 'imputation:strategy': 'mean', 277 | 'kitchen_sinks:gamma': '0.742290491524', 278 | 'kitchen_sinks:n_components': '699.0', 279 | 'preprocessor': 'kitchen_sinks', 280 | 'qda:reg_param': '1.80605719583', 281 | 'qda:tol': '0.00759903394814', 282 | 'rescaling:strategy': 'standard'}, 283 | {'balancing:strategy': 'weighting', 284 | 'classifier': 'k_nearest_neighbors', 285 | 'imputation:strategy': 'mean', 286 | 'k_nearest_neighbors:algorithm': 'auto', 287 | 'k_nearest_neighbors:leaf_size': '30.0', 288 | 'k_nearest_neighbors:n_neighbors': '4.0', 289 | 'k_nearest_neighbors:p': '2.0', 290 | 'k_nearest_neighbors:weights': 'distance', 291 | 'preprocessor': 'select_rates', 292 | 'rescaling:strategy': 'min/max', 293 | 'select_rates:alpha': '0.0556366440458', 294 | 'select_rates:mode': 'fdr', 295 | 'select_rates:score_func': 'f_classif'}, 296 | {'balancing:strategy': 'none', 297 | 'classifier': 'qda', 298 | 'imputation:strategy': 'mean', 299 | 'kitchen_sinks:gamma': '0.69436212216', 300 | 'kitchen_sinks:n_components': '477.0', 301 | 'preprocessor': 'kitchen_sinks', 302 | 'qda:reg_param': '7.19343875838', 303 | 'qda:tol': '0.00130430743783', 304 | 'rescaling:strategy': 'standard'}, 305 | {'balancing:strategy': 'weighting', 306 | 'classifier': 'k_nearest_neighbors', 307 | 'imputation:strategy': 'median', 308 | 'k_nearest_neighbors:algorithm': 'auto', 309 | 'k_nearest_neighbors:leaf_size': '30.0', 310 | 'k_nearest_neighbors:n_neighbors': '8.0', 311 | 'k_nearest_neighbors:p': '1.0', 312 | 'k_nearest_neighbors:weights': 'distance', 313 | 'preprocessor': 'select_rates', 314 | 'rescaling:strategy': 'standard', 315 | 'select_rates:alpha': '0.0962781949808', 316 | 'select_rates:mode': 'fdr', 317 | 'select_rates:score_func': 'f_classif'}, 318 | {'balancing:strategy': 'none', 319 | 'classifier': 'qda', 320 | 'imputation:strategy': 'mean', 321 | 'kitchen_sinks:gamma': '0.680526800011', 322 | 'kitchen_sinks:n_components': '627.0', 323 | 'preprocessor': 'kitchen_sinks', 324 | 'qda:reg_param': '3.3758872613', 325 | 'qda:tol': '0.0025551077682', 326 | 'rescaling:strategy': 'standard'}, 327 | ] 328 | 329 | classifiers = [] 330 | predictions_valid = [] 331 | predictions_test = [] 332 | 333 | # Make predictions and weight them 334 | for weight, configuration in zip(weights, configurations): 335 | for param in configuration: 336 | try: 337 | configuration[param] = int(configuration[param]) 338 | except Exception: 339 | try: 340 | configuration[param] = float(configuration[param]) 341 | except Exception: 342 | pass 343 | 344 | classifier = ParamSklearnClassifier(configuration, 1) 345 | classifiers.append(classifier) 346 | try: 347 | classifier.fit(X.copy(), y.copy()) 348 | predictions_valid.append( 349 | classifier.predict_proba(X_valid.copy()) * weight) 350 | predictions_test.append( 351 | classifier.predict_proba(X_test.copy()) * weight) 352 | except Exception as e: 353 | print e 354 | print configuration 355 | 356 | # Output the predictions 357 | for name, predictions in [('valid', predictions_valid), 358 | ('test', predictions_test)]: 359 | predictions = np.array(predictions) 360 | predictions = np.sum(predictions, axis=0) 361 | predictions = predictions[:, 1].reshape((-1, 1)) 362 | 363 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 364 | np.savetxt(filepath, predictions, delimiter=' ') -------------------------------------------------------------------------------- /001_philippine.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | 6 | import autosklearn 7 | import autosklearn.data 8 | import autosklearn.data.data_manager 9 | import autosklearn.models.evaluator 10 | from ParamSklearn.classification import ParamSklearnClassifier 11 | 12 | 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('input') 15 | parser.add_argument('output') 16 | args = parser.parse_args() 17 | 18 | input = args.input 19 | dataset = 'philippine' 20 | output = args.output 21 | 22 | D = autosklearn.data.data_manager.DataManager(dataset, input) 23 | X = D.data['X_train'] 24 | y = D.data['Y_train'] 25 | X_valid = D.data['X_valid'] 26 | X_test = D.data['X_test'] 27 | 28 | # Subset of features found with RFE. Feature with least importance in sklearn 29 | # RF removed. Afterwards, trained RF on remaining features with 5CV. In the 30 | # end, choose feature set with lowest error 31 | features = [33, 89, 140, 168, 178, 271] 32 | 33 | X = X[:, features] 34 | X_valid = X_valid[:, features] 35 | X_test = X_test[:, features] 36 | 37 | # Weights of the ensemble members as determined by Ensemble Selection 38 | weights = np.array([0.100000, 0.080000, 0.080000, 0.060000, 0.040000, 39 | 0.040000, 0.040000, 0.040000, 0.040000, 0.040000, 40 | 0.040000, 0.020000, 0.020000, 0.020000, 0.020000, 41 | 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 42 | 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 43 | 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 44 | 0.020000]) 45 | 46 | # Ensemble members found by SMAC 47 | configurations = [ 48 | {'adaboost:algorithm': 'SAMME.R', 49 | 'adaboost:learning_rate': '0.243038132773', 50 | 'adaboost:max_depth': '9.0', 51 | 'adaboost:n_estimators': '475.0', 52 | 'balancing:strategy': 'none', 53 | 'classifier': 'adaboost', 54 | 'feature_agglomeration:affinity': 'cosine', 55 | 'feature_agglomeration:linkage': 'complete', 56 | 'feature_agglomeration:n_clusters': '287.0', 57 | 'imputation:strategy': 'most_frequent', 58 | 'preprocessor': 'feature_agglomeration', 59 | 'rescaling:strategy': 'none', 60 | }, 61 | { 62 | 'adaboost:algorithm': 'SAMME.R', 63 | 'adaboost:learning_rate': '0.246430392425', 64 | 'adaboost:max_depth': '9.0', 65 | 'adaboost:n_estimators': '436.0', 66 | 'balancing:strategy': 'weighting', 67 | 'classifier': 'adaboost', 68 | 'feature_agglomeration:affinity': 'manhattan', 69 | 'feature_agglomeration:linkage': 'average', 70 | 'feature_agglomeration:n_clusters': '156.0', 71 | 'imputation:strategy': 'median', 72 | 'preprocessor': 'feature_agglomeration', 73 | 'rescaling:strategy': 'standard', 74 | }, 75 | { 76 | 'adaboost:algorithm': 'SAMME.R', 77 | 'adaboost:learning_rate': '0.205679811363', 78 | 'adaboost:max_depth': '9.0', 79 | 'adaboost:n_estimators': '485.0', 80 | 'balancing:strategy': 'none', 81 | 'classifier': 'adaboost', 82 | 'feature_agglomeration:affinity': 'euclidean', 83 | 'feature_agglomeration:linkage': 'complete', 84 | 'feature_agglomeration:n_clusters': '79.0', 85 | 'imputation:strategy': 'most_frequent', 86 | 'preprocessor': 'feature_agglomeration', 87 | 'rescaling:strategy': 'min/max', 88 | }, 89 | { 90 | 'adaboost:algorithm': 'SAMME.R', 91 | 'adaboost:learning_rate': '0.250841964136', 92 | 'adaboost:max_depth': '10.0', 93 | 'adaboost:n_estimators': '479.0', 94 | 'balancing:strategy': 'none', 95 | 'classifier': 'adaboost', 96 | 'feature_agglomeration:affinity': 'euclidean', 97 | 'feature_agglomeration:linkage': 'average', 98 | 'feature_agglomeration:n_clusters': '352.0', 99 | 'imputation:strategy': 'median', 100 | 'preprocessor': 'feature_agglomeration', 101 | 'rescaling:strategy': 'none', 102 | }, 103 | { 104 | 'adaboost:algorithm': 'SAMME.R', 105 | 'adaboost:learning_rate': '0.329040651125', 106 | 'adaboost:max_depth': '10.0', 107 | 'adaboost:n_estimators': '493.0', 108 | 'balancing:strategy': 'weighting', 109 | 'classifier': 'adaboost', 110 | 'feature_agglomeration:affinity': 'manhattan', 111 | 'feature_agglomeration:linkage': 'average', 112 | 'feature_agglomeration:n_clusters': '268.0', 113 | 'imputation:strategy': 'most_frequent', 114 | 'preprocessor': 'feature_agglomeration', 115 | 'rescaling:strategy': 'min/max', 116 | }, 117 | { 118 | 'adaboost:algorithm': 'SAMME.R', 119 | 'adaboost:learning_rate': '0.376704790019', 120 | 'adaboost:max_depth': '10.0', 121 | 'adaboost:n_estimators': '400.0', 122 | 'balancing:strategy': 'weighting', 123 | 'classifier': 'adaboost', 124 | 'feature_agglomeration:affinity': 'euclidean', 125 | 'feature_agglomeration:linkage': 'ward', 126 | 'feature_agglomeration:n_clusters': '344.0', 127 | 'imputation:strategy': 'median', 128 | 'preprocessor': 'feature_agglomeration', 129 | 'rescaling:strategy': 'min/max', 130 | }, 131 | { 132 | 'adaboost:algorithm': 'SAMME.R', 133 | 'adaboost:learning_rate': '0.483824181899', 134 | 'adaboost:max_depth': '9.0', 135 | 'adaboost:n_estimators': '479.0', 136 | 'balancing:strategy': 'weighting', 137 | 'classifier': 'adaboost', 138 | 'feature_agglomeration:affinity': 'cosine', 139 | 'feature_agglomeration:linkage': 'average', 140 | 'feature_agglomeration:n_clusters': '310.0', 141 | 'imputation:strategy': 'most_frequent', 142 | 'preprocessor': 'feature_agglomeration', 143 | 'rescaling:strategy': 'min/max', 144 | }, 145 | { 146 | 'adaboost:algorithm': 'SAMME.R', 147 | 'adaboost:learning_rate': '0.246430392425', 148 | 'adaboost:max_depth': '9.0', 149 | 'adaboost:n_estimators': '494.0', 150 | 'balancing:strategy': 'weighting', 151 | 'classifier': 'adaboost', 152 | 'feature_agglomeration:affinity': 'cosine', 153 | 'feature_agglomeration:linkage': 'average', 154 | 'feature_agglomeration:n_clusters': '156.0', 155 | 'imputation:strategy': 'median', 156 | 'preprocessor': 'feature_agglomeration', 157 | 'rescaling:strategy': 'min/max', 158 | }, 159 | { 160 | 'adaboost:algorithm': 'SAMME.R', 161 | 'adaboost:learning_rate': '0.319596208353', 162 | 'adaboost:max_depth': '10.0', 163 | 'adaboost:n_estimators': '446.0', 164 | 'balancing:strategy': 'weighting', 165 | 'classifier': 'adaboost', 166 | 'feature_agglomeration:affinity': 'euclidean', 167 | 'feature_agglomeration:linkage': 'complete', 168 | 'feature_agglomeration:n_clusters': '65.0', 169 | 'imputation:strategy': 'mean', 170 | 'preprocessor': 'feature_agglomeration', 171 | 'rescaling:strategy': 'min/max', 172 | }, 173 | { 174 | 'adaboost:algorithm': 'SAMME.R', 175 | 'adaboost:learning_rate': '0.208071429428', 176 | 'adaboost:max_depth': '9.0', 177 | 'adaboost:n_estimators': '487.0', 178 | 'balancing:strategy': 'weighting', 179 | 'classifier': 'adaboost', 180 | 'feature_agglomeration:affinity': 'cosine', 181 | 'feature_agglomeration:linkage': 'complete', 182 | 'feature_agglomeration:n_clusters': '219.0', 183 | 'imputation:strategy': 'most_frequent', 184 | 'preprocessor': 'feature_agglomeration', 185 | 'rescaling:strategy': 'none', 186 | }, 187 | { 188 | 'adaboost:algorithm': 'SAMME.R', 189 | 'adaboost:learning_rate': '0.362379903949', 190 | 'adaboost:max_depth': '10.0', 191 | 'adaboost:n_estimators': '389.0', 192 | 'balancing:strategy': 'none', 193 | 'classifier': 'adaboost', 194 | 'feature_agglomeration:affinity': 'cosine', 195 | 'feature_agglomeration:linkage': 'complete', 196 | 'feature_agglomeration:n_clusters': '123.0', 197 | 'imputation:strategy': 'most_frequent', 198 | 'preprocessor': 'feature_agglomeration', 199 | 'rescaling:strategy': 'min/max', 200 | }, 201 | { 202 | 'adaboost:algorithm': 'SAMME.R', 203 | 'adaboost:learning_rate': '0.468508930474', 204 | 'adaboost:max_depth': '10.0', 205 | 'adaboost:n_estimators': '477.0', 206 | 'balancing:strategy': 'weighting', 207 | 'classifier': 'adaboost', 208 | 'feature_agglomeration:affinity': 'euclidean', 209 | 'feature_agglomeration:linkage': 'average', 210 | 'feature_agglomeration:n_clusters': '244.0', 211 | 'imputation:strategy': 'median', 212 | 'preprocessor': 'feature_agglomeration', 213 | 'rescaling:strategy': 'min/max', 214 | }, 215 | { 216 | 'adaboost:algorithm': 'SAMME.R', 217 | 'adaboost:learning_rate': '0.284273806405', 218 | 'adaboost:max_depth': '9.0', 219 | 'adaboost:n_estimators': '483.0', 220 | 'balancing:strategy': 'none', 221 | 'classifier': 'adaboost', 222 | 'feature_agglomeration:affinity': 'cosine', 223 | 'feature_agglomeration:linkage': 'complete', 224 | 'feature_agglomeration:n_clusters': '174.0', 225 | 'imputation:strategy': 'median', 226 | 'preprocessor': 'feature_agglomeration', 227 | 'rescaling:strategy': 'min/max', 228 | }, 229 | { 230 | 'adaboost:algorithm': 'SAMME.R', 231 | 'adaboost:learning_rate': '0.2635286978', 232 | 'adaboost:max_depth': '10.0', 233 | 'adaboost:n_estimators': '482.0', 234 | 'balancing:strategy': 'none', 235 | 'classifier': 'adaboost', 236 | 'feature_agglomeration:affinity': 'manhattan', 237 | 'feature_agglomeration:linkage': 'average', 238 | 'feature_agglomeration:n_clusters': '118.0', 239 | 'imputation:strategy': 'most_frequent', 240 | 'preprocessor': 'feature_agglomeration', 241 | 'rescaling:strategy': 'min/max', 242 | }, 243 | { 244 | 'adaboost:algorithm': 'SAMME.R', 245 | 'adaboost:learning_rate': '0.326966274076', 246 | 'adaboost:max_depth': '10.0', 247 | 'adaboost:n_estimators': '494.0', 248 | 'balancing:strategy': 'none', 249 | 'classifier': 'adaboost', 250 | 'feature_agglomeration:affinity': 'euclidean', 251 | 'feature_agglomeration:linkage': 'average', 252 | 'feature_agglomeration:n_clusters': '87.0', 253 | 'imputation:strategy': 'most_frequent', 254 | 'preprocessor': 'feature_agglomeration', 255 | 'rescaling:strategy': 'min/max', 256 | }, 257 | { 258 | 'adaboost:algorithm': 'SAMME.R', 259 | 'adaboost:learning_rate': '0.239427049389', 260 | 'adaboost:max_depth': '9.0', 261 | 'adaboost:n_estimators': '393.0', 262 | 'balancing:strategy': 'none', 263 | 'classifier': 'adaboost', 264 | 'feature_agglomeration:affinity': 'euclidean', 265 | 'feature_agglomeration:linkage': 'complete', 266 | 'feature_agglomeration:n_clusters': '331.0', 267 | 'imputation:strategy': 'most_frequent', 268 | 'preprocessor': 'feature_agglomeration', 269 | 'rescaling:strategy': 'min/max', 270 | }, 271 | { 272 | 'adaboost:algorithm': 'SAMME.R', 273 | 'adaboost:learning_rate': '0.272345990341', 274 | 'adaboost:max_depth': '10.0', 275 | 'adaboost:n_estimators': '478.0', 276 | 'balancing:strategy': 'none', 277 | 'classifier': 'adaboost', 278 | 'feature_agglomeration:affinity': 'manhattan', 279 | 'feature_agglomeration:linkage': 'average', 280 | 'feature_agglomeration:n_clusters': '20.0', 281 | 'imputation:strategy': 'most_frequent', 282 | 'preprocessor': 'feature_agglomeration', 283 | 'rescaling:strategy': 'standard', 284 | }, 285 | { 286 | 'adaboost:algorithm': 'SAMME.R', 287 | 'adaboost:learning_rate': '0.36300772469', 288 | 'adaboost:max_depth': '10.0', 289 | 'adaboost:n_estimators': '430.0', 290 | 'balancing:strategy': 'weighting', 291 | 'classifier': 'adaboost', 292 | 'feature_agglomeration:affinity': 'euclidean', 293 | 'feature_agglomeration:linkage': 'complete', 294 | 'feature_agglomeration:n_clusters': '88.0', 295 | 'imputation:strategy': 'median', 296 | 'preprocessor': 'feature_agglomeration', 297 | 'rescaling:strategy': 'min/max', 298 | }, 299 | { 300 | 'adaboost:algorithm': 'SAMME.R', 301 | 'adaboost:learning_rate': '0.29318612753', 302 | 'adaboost:max_depth': '10.0', 303 | 'adaboost:n_estimators': '418.0', 304 | 'balancing:strategy': 'weighting', 305 | 'classifier': 'adaboost', 306 | 'feature_agglomeration:affinity': 'cosine', 307 | 'feature_agglomeration:linkage': 'complete', 308 | 'feature_agglomeration:n_clusters': '220.0', 309 | 'imputation:strategy': 'median', 310 | 'preprocessor': 'feature_agglomeration', 311 | 'rescaling:strategy': 'standard', 312 | }, 313 | { 314 | 'adaboost:algorithm': 'SAMME.R', 315 | 'adaboost:learning_rate': '0.315769388471', 316 | 'adaboost:max_depth': '10.0', 317 | 'adaboost:n_estimators': '494.0', 318 | 'balancing:strategy': 'none', 319 | 'classifier': 'adaboost', 320 | 'feature_agglomeration:affinity': 'euclidean', 321 | 'feature_agglomeration:linkage': 'average', 322 | 'feature_agglomeration:n_clusters': '270.0', 323 | 'imputation:strategy': 'median', 324 | 'preprocessor': 'feature_agglomeration', 325 | 'rescaling:strategy': 'min/max', 326 | }, 327 | { 328 | 'adaboost:algorithm': 'SAMME.R', 329 | 'adaboost:learning_rate': '0.295544282435', 330 | 'adaboost:max_depth': '9.0', 331 | 'adaboost:n_estimators': '478.0', 332 | 'balancing:strategy': 'none', 333 | 'classifier': 'adaboost', 334 | 'feature_agglomeration:affinity': 'euclidean', 335 | 'feature_agglomeration:linkage': 'average', 336 | 'feature_agglomeration:n_clusters': '195.0', 337 | 'imputation:strategy': 'most_frequent', 338 | 'preprocessor': 'feature_agglomeration', 339 | 'rescaling:strategy': 'min/max', 340 | }, 341 | { 342 | 'adaboost:algorithm': 'SAMME.R', 343 | 'adaboost:learning_rate': '0.298219714131', 344 | 'adaboost:max_depth': '9.0', 345 | 'adaboost:n_estimators': '473.0', 346 | 'balancing:strategy': 'none', 347 | 'classifier': 'adaboost', 348 | 'feature_agglomeration:affinity': 'euclidean', 349 | 'feature_agglomeration:linkage': 'average', 350 | 'feature_agglomeration:n_clusters': '39.0', 351 | 'imputation:strategy': 'mean', 352 | 'preprocessor': 'feature_agglomeration', 353 | 'rescaling:strategy': 'standard', 354 | }, 355 | { 356 | 'adaboost:algorithm': 'SAMME.R', 357 | 'adaboost:learning_rate': '0.370877623224', 358 | 'adaboost:max_depth': '10.0', 359 | 'adaboost:n_estimators': '382.0', 360 | 'balancing:strategy': 'none', 361 | 'classifier': 'adaboost', 362 | 'feature_agglomeration:affinity': 'euclidean', 363 | 'feature_agglomeration:linkage': 'average', 364 | 'feature_agglomeration:n_clusters': '331.0', 365 | 'imputation:strategy': 'most_frequent', 366 | 'preprocessor': 'feature_agglomeration', 367 | 'rescaling:strategy': 'min/max', 368 | }, 369 | { 370 | 'adaboost:algorithm': 'SAMME.R', 371 | 'adaboost:learning_rate': '0.339058617161', 372 | 'adaboost:max_depth': '10.0', 373 | 'adaboost:n_estimators': '466.0', 374 | 'balancing:strategy': 'none', 375 | 'classifier': 'adaboost', 376 | 'feature_agglomeration:affinity': 'manhattan', 377 | 'feature_agglomeration:linkage': 'complete', 378 | 'feature_agglomeration:n_clusters': '38.0', 379 | 'imputation:strategy': 'most_frequent', 380 | 'preprocessor': 'feature_agglomeration', 381 | 'rescaling:strategy': 'standard', 382 | }, 383 | { 384 | 'adaboost:algorithm': 'SAMME.R', 385 | 'adaboost:learning_rate': '0.272345990341', 386 | 'adaboost:max_depth': '10.0', 387 | 'adaboost:n_estimators': '478.0', 388 | 'balancing:strategy': 'weighting', 389 | 'classifier': 'adaboost', 390 | 'feature_agglomeration:affinity': 'cosine', 391 | 'feature_agglomeration:linkage': 'average', 392 | 'feature_agglomeration:n_clusters': '68.0', 393 | 'imputation:strategy': 'most_frequent', 394 | 'preprocessor': 'feature_agglomeration', 395 | 'rescaling:strategy': 'none', 396 | }, 397 | { 398 | 'adaboost:algorithm': 'SAMME.R', 399 | 'adaboost:learning_rate': '0.268568387674', 400 | 'adaboost:max_depth': '10.0', 401 | 'adaboost:n_estimators': '499.0', 402 | 'balancing:strategy': 'none', 403 | 'classifier': 'adaboost', 404 | 'feature_agglomeration:affinity': 'manhattan', 405 | 'feature_agglomeration:linkage': 'average', 406 | 'feature_agglomeration:n_clusters': '78.0', 407 | 'imputation:strategy': 'most_frequent', 408 | 'preprocessor': 'feature_agglomeration', 409 | 'rescaling:strategy': 'standard', 410 | }, 411 | { 412 | 'adaboost:algorithm': 'SAMME.R', 413 | 'adaboost:learning_rate': '0.286357615604', 414 | 'adaboost:max_depth': '9.0', 415 | 'adaboost:n_estimators': '490.0', 416 | 'balancing:strategy': 'weighting', 417 | 'classifier': 'adaboost', 418 | 'feature_agglomeration:affinity': 'euclidean', 419 | 'feature_agglomeration:linkage': 'ward', 420 | 'feature_agglomeration:n_clusters': '220.0', 421 | 'imputation:strategy': 'median', 422 | 'preprocessor': 'feature_agglomeration', 423 | 'rescaling:strategy': 'min/max', 424 | }, 425 | { 426 | 'adaboost:algorithm': 'SAMME.R', 427 | 'adaboost:learning_rate': '0.377112372612', 428 | 'adaboost:max_depth': '10.0', 429 | 'adaboost:n_estimators': '458.0', 430 | 'balancing:strategy': 'weighting', 431 | 'classifier': 'adaboost', 432 | 'feature_agglomeration:affinity': 'euclidean', 433 | 'feature_agglomeration:linkage': 'ward', 434 | 'feature_agglomeration:n_clusters': '125.0', 435 | 'imputation:strategy': 'most_frequent', 436 | 'preprocessor': 'feature_agglomeration', 437 | 'rescaling:strategy': 'min/max', 438 | }, 439 | { 440 | 'adaboost:algorithm': 'SAMME.R', 441 | 'adaboost:learning_rate': '0.400954561452', 442 | 'adaboost:max_depth': '10.0', 443 | 'adaboost:n_estimators': '408.0', 444 | 'balancing:strategy': 'none', 445 | 'classifier': 'adaboost', 446 | 'feature_agglomeration:affinity': 'euclidean', 447 | 'feature_agglomeration:linkage': 'average', 448 | 'feature_agglomeration:n_clusters': '345.0', 449 | 'imputation:strategy': 'median', 450 | 'preprocessor': 'feature_agglomeration', 451 | 'rescaling:strategy': 'min/max', 452 | }, 453 | { 454 | 'adaboost:algorithm': 'SAMME.R', 455 | 'adaboost:learning_rate': '0.196044249482', 456 | 'adaboost:max_depth': '9.0', 457 | 'adaboost:n_estimators': '494.0', 458 | 'balancing:strategy': 'none', 459 | 'classifier': 'adaboost', 460 | 'feature_agglomeration:affinity': 'manhattan', 461 | 'feature_agglomeration:linkage': 'average', 462 | 'feature_agglomeration:n_clusters': '182.0', 463 | 'imputation:strategy': 'median', 464 | 'preprocessor': 'feature_agglomeration', 465 | 'rescaling:strategy': 'min/max', 466 | }, 467 | { 468 | 'adaboost:algorithm': 'SAMME.R', 469 | 'adaboost:learning_rate': '0.312315129765', 470 | 'adaboost:max_depth': '10.0', 471 | 'adaboost:n_estimators': '442.0', 472 | 'balancing:strategy': 'weighting', 473 | 'classifier': 'adaboost', 474 | 'feature_agglomeration:affinity': 'manhattan', 475 | 'feature_agglomeration:linkage': 'complete', 476 | 'feature_agglomeration:n_clusters': '347.0', 477 | 'imputation:strategy': 'median', 478 | 'preprocessor': 'feature_agglomeration', 479 | 'rescaling:strategy': 'none'} 480 | ] 481 | 482 | classifiers = [] 483 | predictions_valid = [] 484 | predictions_test = [] 485 | 486 | # Make predictions and weight them 487 | for weight, configuration in zip(weights, configurations): 488 | for param in configuration: 489 | try: 490 | configuration[param] = int(configuration[param]) 491 | except Exception: 492 | try: 493 | configuration[param] = float(configuration[param]) 494 | except Exception: 495 | pass 496 | 497 | classifier = ParamSklearnClassifier(configuration, 1) 498 | classifiers.append(classifier) 499 | try: 500 | classifier.fit(X.copy(), y.copy()) 501 | predictions_valid.append( 502 | classifier.predict_proba(X_valid.copy()) * weight) 503 | predictions_test.append( 504 | classifier.predict_proba(X_test.copy()) * weight) 505 | except Exception as e: 506 | print e 507 | print configuration 508 | 509 | # Output the predictions 510 | for name, predictions in [('valid', predictions_valid), 511 | ('test', predictions_test)]: 512 | predictions = np.array(predictions) 513 | predictions = np.sum(predictions, axis=0) 514 | predictions = predictions[:, 1].reshape((-1, 1)) 515 | 516 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 517 | np.savetxt(filepath, predictions, delimiter=' ') -------------------------------------------------------------------------------- /001_sylvine.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | 6 | import autosklearn 7 | import autosklearn.data 8 | import autosklearn.data.data_manager 9 | import autosklearn.models.evaluator 10 | from ParamSklearn.classification import ParamSklearnClassifier 11 | 12 | 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('input') 15 | parser.add_argument('output') 16 | args = parser.parse_args() 17 | 18 | input = args.input 19 | dataset = 'sylvine' 20 | output = args.output 21 | 22 | D = autosklearn.data.data_manager.DataManager(dataset, input) 23 | X = D.data['X_train'] 24 | y = D.data['Y_train'] 25 | X_valid = D.data['X_valid'] 26 | X_test = D.data['X_test'] 27 | 28 | # Subset of features found with RFE. Feature with least importance in sklearn 29 | # RF removed. Afterwards, trained RF on remaining features with 5CV. In the 30 | # end, choose feature set with lowest error 31 | features = [6, 8, 9, 14] 32 | 33 | X = X[:, features] 34 | X_valid = X_valid[:, features] 35 | X_test = X_test[:, features] 36 | 37 | # Weights of the ensemble members as determined by Ensemble Selection 38 | weights = np.array([0.420000, 0.360000, 0.060000, 0.040000, 0.040000, 39 | 0.040000, 0.020000, 0.020000]) 40 | 41 | # Ensemble members found by SMAC 42 | configurations = [ 43 | {'balancing:strategy': 'none', 44 | 'classifier': 'qda', 45 | 'imputation:strategy': 'median', 46 | 'kitchen_sinks:gamma': '1.92120672046', 47 | 'kitchen_sinks:n_components': '716.0', 48 | 'preprocessor': 'kitchen_sinks', 49 | 'qda:reg_param': '1.58062868571', 50 | 'qda:tol': '0.0247837474409', 51 | 'rescaling:strategy': 'standard', }, 52 | {'balancing:strategy': 'none', 53 | 'classifier': 'qda', 54 | 'imputation:strategy': 'most_frequent', 55 | 'kitchen_sinks:gamma': '1.61329137115', 56 | 'kitchen_sinks:n_components': '500.0', 57 | 'preprocessor': 'kitchen_sinks', 58 | 'qda:reg_param': '5.45636866541', 59 | 'qda:tol': '5.69425859943e-05', 60 | 'rescaling:strategy': 'min/max', }, 61 | {'balancing:strategy': 'weighting', 62 | 'classifier': 'qda', 63 | 'imputation:strategy': 'most_frequent', 64 | 'kitchen_sinks:gamma': '1.95127135806', 65 | 'kitchen_sinks:n_components': '564.0', 66 | 'preprocessor': 'kitchen_sinks', 67 | 'qda:reg_param': '0.512205857283', 68 | 'qda:tol': '0.000168304749916', 69 | 'rescaling:strategy': 'standard', }, 70 | {'balancing:strategy': 'weighting', 71 | 'classifier': 'qda', 72 | 'imputation:strategy': 'median', 73 | 'kitchen_sinks:gamma': '1.8592926955', 74 | 'kitchen_sinks:n_components': '539.0', 75 | 'preprocessor': 'kitchen_sinks', 76 | 'qda:reg_param': '7.384724657', 77 | 'qda:tol': '0.0200780040497', 78 | 'rescaling:strategy': 'standard', }, 79 | {'balancing:strategy': 'none', 80 | 'classifier': 'qda', 81 | 'imputation:strategy': 'median', 82 | 'kitchen_sinks:gamma': '0.968569589575', 83 | 'kitchen_sinks:n_components': '528.0', 84 | 'preprocessor': 'kitchen_sinks', 85 | 'qda:reg_param': '5.73540397488', 86 | 'qda:tol': '0.00632432527713', 87 | 'rescaling:strategy': 'min/max', }, 88 | {'balancing:strategy': 'weighting', 89 | 'classifier': 'qda', 90 | 'imputation:strategy': 'most_frequent', 91 | 'kitchen_sinks:gamma': '1.7159380388', 92 | 'kitchen_sinks:n_components': '586.0', 93 | 'preprocessor': 'kitchen_sinks', 94 | 'qda:reg_param': '4.84995966137', 95 | 'qda:tol': '0.0143521983037', 96 | 'rescaling:strategy': 'standard', }, 97 | {'balancing:strategy': 'weighting', 98 | 'classifier': 'qda', 99 | 'imputation:strategy': 'median', 100 | 'nystroem_sampler:gamma': '3.79316084659', 101 | 'nystroem_sampler:kernel': 'rbf', 102 | 'nystroem_sampler:n_components': '516.0', 103 | 'preprocessor': 'nystroem_sampler', 104 | 'qda:reg_param': '9.63571710058', 105 | 'qda:tol': '0.00901955088569', 106 | 'rescaling:strategy': 'min/max', }, 107 | {'balancing:strategy': 'weighting', 108 | 'classifier': 'qda', 109 | 'imputation:strategy': 'most_frequent', 110 | 'kitchen_sinks:gamma': '1.85336603609', 111 | 'kitchen_sinks:n_components': '509.0', 112 | 'preprocessor': 'kitchen_sinks', 113 | 'qda:reg_param': '8.57076337966', 114 | 'qda:tol': '0.000361249119707', 115 | 'rescaling:strategy': 'standard'} 116 | ] 117 | 118 | classifiers = [] 119 | predictions_valid = [] 120 | predictions_test = [] 121 | 122 | # Make predictions and weight them 123 | for weight, configuration in zip(weights, configurations): 124 | for param in configuration: 125 | try: 126 | configuration[param] = int(configuration[param]) 127 | except Exception: 128 | try: 129 | configuration[param] = float(configuration[param]) 130 | except Exception: 131 | pass 132 | 133 | classifier = ParamSklearnClassifier(configuration, 1) 134 | classifiers.append(classifier) 135 | try: 136 | classifier.fit(X.copy(), y.copy()) 137 | predictions_valid.append( 138 | classifier.predict_proba(X_valid.copy()) * weight) 139 | predictions_test.append( 140 | classifier.predict_proba(X_test.copy()) * weight) 141 | except Exception as e: 142 | print e 143 | print configuration 144 | 145 | # Output the predictions 146 | for name, predictions in [('valid', predictions_valid), 147 | ('test', predictions_test)]: 148 | predictions = np.array(predictions) 149 | predictions = np.sum(predictions, axis=0) 150 | predictions = predictions[:,1].reshape((-1, 1)) 151 | 152 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 153 | np.savetxt(filepath, predictions, delimiter=' ') -------------------------------------------------------------------------------- /002_albert.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | from sklearn.cross_validation import StratifiedKFold 6 | 7 | import autosklearn 8 | import autosklearn.data 9 | import autosklearn.data.competition_data_manager 10 | from autosklearn.evaluation.util import calculate_score 11 | from ParamSklearn.classification import ParamSklearnClassifier 12 | 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('input') 16 | parser.add_argument('output') 17 | args = parser.parse_args() 18 | 19 | input = args.input 20 | dataset = 'albert' 21 | output = args.output 22 | 23 | path = os.path.join(input, dataset) 24 | D = autosklearn.data.competition_data_manager.CompetitionDataManager(path) 25 | X = D.data['X_train'] 26 | y = D.data['Y_train'] 27 | X_valid = D.data['X_valid'] 28 | X_test = D.data['X_test'] 29 | 30 | # Replace the following array by a new ensemble 31 | choices = \ 32 | [(1.0, ParamSklearnClassifier(configuration={ 33 | 'balancing:strategy': 'weighting', 34 | 'classifier:__choice__': 'sgd', 35 | 'classifier:sgd:loss': 'hinge', 36 | 'classifier:sgd:penalty': 'l2', 37 | 'classifier:sgd:alpha': 0.0001, 38 | 'classifier:sgd:fit_intercept': True, 39 | 'classifier:sgd:n_iter': 5, 40 | 'classifier:sgd:learning_rate': 'optimal', 41 | 'classifier:sgd:eta0': 0.01, 42 | 'classifier:sgd:average': True, 43 | 'imputation:strategy': 'mean', 44 | 'one_hot_encoding:use_minimum_fraction': 'True', 45 | 'one_hot_encoding:minimum_fraction': 0.1, 46 | 'preprocessor:__choice__': 'no_preprocessing', 47 | 'rescaling:__choice__': 'min/max'}))] 48 | 49 | classifiers = [] 50 | targets = [] 51 | predictions = [] 52 | predictions_valid = [] 53 | predictions_test = [] 54 | 55 | # Make predictions and weight them 56 | iteration = 0 57 | for weight, classifier in choices: 58 | iteration += 1 59 | print dataset, "Iteration %d/%d" % (iteration, len(choices)) 60 | classifiers.append(classifier) 61 | try: 62 | classifier.fit(X.copy(), y.copy()) 63 | predictions_valid.append( 64 | classifier.predict_proba(X_valid.copy()) * weight) 65 | predictions_test.append( 66 | classifier.predict_proba(X_test.copy()) * weight) 67 | except Exception as e: 68 | print e 69 | print classifier.configuration 70 | 71 | # Output the predictions 72 | for name, predictions in [('valid', predictions_valid), 73 | ('test', predictions_test)]: 74 | predictions = np.array(predictions) 75 | predictions = np.sum(predictions, axis=0) 76 | predictions = predictions[:, 1].reshape((-1, 1)) 77 | 78 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 79 | np.savetxt(filepath, predictions, delimiter=' ') 80 | -------------------------------------------------------------------------------- /002_dilbert.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | from sklearn.cross_validation import StratifiedKFold 6 | 7 | import autosklearn 8 | import autosklearn.data 9 | import autosklearn.data.competition_data_manager 10 | from autosklearn.evaluation.util import calculate_score 11 | from ParamSklearn.classification import ParamSklearnClassifier 12 | 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('input') 16 | parser.add_argument('output') 17 | args = parser.parse_args() 18 | 19 | input = args.input 20 | dataset = 'dilbert' 21 | output = args.output 22 | 23 | path = os.path.join(input, dataset) 24 | D = autosklearn.data.competition_data_manager.CompetitionDataManager(path) 25 | X = D.data['X_train'] 26 | y = D.data['Y_train'] 27 | X_valid = D.data['X_valid'] 28 | X_test = D.data['X_test'] 29 | 30 | # Replace the following array by a new ensemble 31 | choices = \ 32 | [(0.220000, ParamSklearnClassifier( 33 | configuration={ 34 | 'balancing:strategy': 'weighting', 35 | 'classifier:__choice__': 'passive_aggressive', 36 | 'classifier:passive_aggressive:C': 0.0022574783522003694, 37 | 'classifier:passive_aggressive:fit_intercept': 'True', 38 | 'classifier:passive_aggressive:loss': 'hinge', 39 | 'classifier:passive_aggressive:n_iter': 119, 40 | 'imputation:strategy': 'most_frequent', 41 | 'one_hot_encoding:minimum_fraction': 0.1898871876010834, 42 | 'one_hot_encoding:use_minimum_fraction': 'True', 43 | 'preprocessor:__choice__': 'gem', 44 | 'preprocessor:gem:N': 20, 45 | 'preprocessor:gem:precond': 0.27540716190663134, 46 | 'rescaling:__choice__': 'min/max'})), 47 | (0.160000, ParamSklearnClassifier( 48 | configuration={ 49 | 'balancing:strategy': 'none', 50 | 'classifier:__choice__': 'passive_aggressive', 51 | 'classifier:passive_aggressive:C': 8.011168723835382, 52 | 'classifier:passive_aggressive:fit_intercept': 'True', 53 | 'classifier:passive_aggressive:loss': 'hinge', 54 | 'classifier:passive_aggressive:n_iter': 20, 55 | 'imputation:strategy': 'median', 56 | 'one_hot_encoding:minimum_fraction': 0.020771877142610626, 57 | 'one_hot_encoding:use_minimum_fraction': 'True', 58 | 'preprocessor:__choice__': 'gem', 59 | 'preprocessor:gem:N': 16, 60 | 'preprocessor:gem:precond': 0.035878450355803344, 61 | 'rescaling:__choice__': 'min/max'})), 62 | (0.160000, ParamSklearnClassifier( 63 | configuration={ 64 | 'balancing:strategy': 'none', 65 | 'classifier:__choice__': 'passive_aggressive', 66 | 'classifier:passive_aggressive:C': 0.00010934133255683256, 67 | 'classifier:passive_aggressive:fit_intercept': 'True', 68 | 'classifier:passive_aggressive:loss': 'hinge', 69 | 'classifier:passive_aggressive:n_iter': 235, 70 | 'imputation:strategy': 'mean', 71 | 'one_hot_encoding:minimum_fraction': 0.022038507512545786, 72 | 'one_hot_encoding:use_minimum_fraction': 'True', 73 | 'preprocessor:__choice__': 'gem', 74 | 'preprocessor:gem:N': 17, 75 | 'preprocessor:gem:precond': 0.02104468261583234, 76 | 'rescaling:__choice__': 'min/max'})), 77 | (0.140000, ParamSklearnClassifier( 78 | configuration={ 79 | 'balancing:strategy': 'none', 80 | 'classifier:__choice__': 'passive_aggressive', 81 | 'classifier:passive_aggressive:C': 8.011168723835382, 82 | 'classifier:passive_aggressive:fit_intercept': 'True', 83 | 'classifier:passive_aggressive:loss': 'hinge', 84 | 'classifier:passive_aggressive:n_iter': 20, 85 | 'imputation:strategy': 'mean', 86 | 'one_hot_encoding:minimum_fraction': 0.020771877142610626, 87 | 'one_hot_encoding:use_minimum_fraction': 'True', 88 | 'preprocessor:__choice__': 'gem', 89 | 'preprocessor:gem:N': 16, 90 | 'preprocessor:gem:precond': 0.047677121638912856, 91 | 'rescaling:__choice__': 'min/max'})), 92 | (0.140000, ParamSklearnClassifier( 93 | configuration={ 94 | 'balancing:strategy': 'none', 95 | 'classifier:__choice__': 'passive_aggressive', 96 | 'classifier:passive_aggressive:C': 8.011168723835382, 97 | 'classifier:passive_aggressive:fit_intercept': 'True', 98 | 'classifier:passive_aggressive:loss': 'squared_hinge', 99 | 'classifier:passive_aggressive:n_iter': 301, 100 | 'imputation:strategy': 'median', 101 | 'one_hot_encoding:minimum_fraction': 0.028040769173853935, 102 | 'one_hot_encoding:use_minimum_fraction': 'True', 103 | 'preprocessor:__choice__': 'gem', 104 | 'preprocessor:gem:N': 20, 105 | 'preprocessor:gem:precond': 0.047677121638912856, 106 | 'rescaling:__choice__': 'min/max'})), 107 | (0.120000, ParamSklearnClassifier( 108 | configuration={ 109 | 'balancing:strategy': 'none', 110 | 'classifier:__choice__': 'passive_aggressive', 111 | 'classifier:passive_aggressive:C': 0.00010934133255683256, 112 | 'classifier:passive_aggressive:fit_intercept': 'True', 113 | 'classifier:passive_aggressive:loss': 'hinge', 114 | 'classifier:passive_aggressive:n_iter': 235, 115 | 'imputation:strategy': 'mean', 116 | 'one_hot_encoding:minimum_fraction': 0.041303833357502165, 117 | 'one_hot_encoding:use_minimum_fraction': 'True', 118 | 'preprocessor:__choice__': 'gem', 119 | 'preprocessor:gem:N': 18, 120 | 'preprocessor:gem:precond': 0.09599232591423834, 121 | 'rescaling:__choice__': 'min/max'})), 122 | (0.040000, ParamSklearnClassifier( 123 | configuration={ 124 | 'balancing:strategy': 'none', 125 | 'classifier:__choice__': 'liblinear_svc', 126 | 'classifier:liblinear_svc:C': 37.176582995422606, 127 | 'classifier:liblinear_svc:dual': 'False', 128 | 'classifier:liblinear_svc:fit_intercept': 'True', 129 | 'classifier:liblinear_svc:intercept_scaling': 1, 130 | 'classifier:liblinear_svc:loss': 'squared_hinge', 131 | 'classifier:liblinear_svc:multi_class': 'ovr', 132 | 'classifier:liblinear_svc:penalty': 'l2', 133 | 'classifier:liblinear_svc:tol': 0.00016373824508657717, 134 | 'imputation:strategy': 'median', 135 | 'one_hot_encoding:minimum_fraction': 0.0008207509562933506, 136 | 'one_hot_encoding:use_minimum_fraction': 'True', 137 | 'preprocessor:__choice__': 'gem', 138 | 'preprocessor:gem:N': 15, 139 | 'preprocessor:gem:precond': 0.1010713117945701, 140 | 'rescaling:__choice__': 'min/max'})), 141 | (0.020000, ParamSklearnClassifier( 142 | configuration={ 143 | 'balancing:strategy': 'none', 144 | 'classifier:__choice__': 'passive_aggressive', 145 | 'classifier:passive_aggressive:C': 8.011168723835382, 146 | 'classifier:passive_aggressive:fit_intercept': 'True', 147 | 'classifier:passive_aggressive:loss': 'squared_hinge', 148 | 'classifier:passive_aggressive:n_iter': 20, 149 | 'imputation:strategy': 'median', 150 | 'one_hot_encoding:minimum_fraction': 0.028040769173853935, 151 | 'one_hot_encoding:use_minimum_fraction': 'True', 152 | 'preprocessor:__choice__': 'gem', 153 | 'preprocessor:gem:N': 20, 154 | 'preprocessor:gem:precond': 0.047677121638912856, 155 | 'rescaling:__choice__': 'min/max'})) 156 | ] 157 | 158 | classifiers = [] 159 | targets = [] 160 | predictions = [] 161 | predictions_valid = [] 162 | predictions_test = [] 163 | 164 | # Make predictions and weight them 165 | iteration = 0 166 | for weight, classifier in choices: 167 | iteration += 1 168 | print dataset, "Iteration %d/%d" % (iteration, len(choices)) 169 | 170 | classifiers.append(classifier) 171 | try: 172 | classifier.fit(X.copy(), y.copy()) 173 | predictions_valid.append( 174 | classifier.predict_proba(X_valid.copy()) * weight) 175 | predictions_test.append( 176 | classifier.predict_proba(X_test.copy()) * weight) 177 | except Exception as e: 178 | print e 179 | print classifier 180 | 181 | # Output the predictions 182 | for name, predictions in [('valid', predictions_valid), 183 | ('test', predictions_test)]: 184 | predictions = np.array(predictions) 185 | predictions = np.sum(predictions, axis=0) 186 | 187 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 188 | np.savetxt(filepath, predictions, delimiter=' ') 189 | -------------------------------------------------------------------------------- /002_fabert.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | from sklearn.cross_validation import StratifiedKFold 6 | 7 | import autosklearn 8 | import autosklearn.data 9 | import autosklearn.data.competition_data_manager 10 | from autosklearn.evaluation.util import calculate_score 11 | from ParamSklearn.classification import ParamSklearnClassifier 12 | 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('input') 16 | parser.add_argument('output') 17 | args = parser.parse_args() 18 | 19 | input = args.input 20 | dataset = 'fabert' 21 | output = args.output 22 | 23 | path = os.path.join(input, dataset) 24 | D = autosklearn.data.competition_data_manager.CompetitionDataManager(path) 25 | X = D.data['X_train'] 26 | y = D.data['Y_train'] 27 | X_valid = D.data['X_valid'] 28 | X_test = D.data['X_test'] 29 | 30 | # Replace the following array by a new ensemble 31 | choices = \ 32 | [(0.580000, ParamSklearnClassifier( 33 | configuration={ 34 | 'balancing:strategy': 'weighting', 35 | 'classifier:__choice__': 'extra_trees', 36 | 'classifier:extra_trees:bootstrap': 'True', 37 | 'classifier:extra_trees:criterion': 'gini', 38 | 'classifier:extra_trees:max_depth': 'None', 39 | 'classifier:extra_trees:max_features': 1.4927328322706173, 40 | 'classifier:extra_trees:min_samples_leaf': 1, 41 | 'classifier:extra_trees:min_samples_split': 5, 42 | 'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 43 | 'classifier:extra_trees:n_estimators': 100, 44 | 'imputation:strategy': 'mean', 45 | 'one_hot_encoding:use_minimum_fraction': 'False', 46 | 'preprocessor:__choice__': 'select_rates', 47 | 'preprocessor:select_rates:alpha': 0.4308279694614349, 48 | 'preprocessor:select_rates:mode': 'fwe', 49 | 'preprocessor:select_rates:score_func': 'f_classif', 50 | 'rescaling:__choice__': 'min/max'})), 51 | (0.200000, ParamSklearnClassifier( 52 | configuration={ 53 | 'balancing:strategy': 'none', 54 | 'classifier:__choice__': 'sgd', 55 | 'classifier:sgd:alpha': 5.707045187542232e-06, 56 | 'classifier:sgd:average': 'True', 57 | 'classifier:sgd:eta0': 0.059208215107360226, 58 | 'classifier:sgd:fit_intercept': 'True', 59 | 'classifier:sgd:l1_ratio': 0.5696965689983325, 60 | 'classifier:sgd:learning_rate': 'constant', 61 | 'classifier:sgd:loss': 'log', 62 | 'classifier:sgd:n_iter': 809, 63 | 'classifier:sgd:penalty': 'elasticnet', 64 | 'imputation:strategy': 'median', 65 | 'one_hot_encoding:minimum_fraction': 0.45801169150718357, 66 | 'one_hot_encoding:use_minimum_fraction': 'True', 67 | 'preprocessor:__choice__': 'liblinear_svc_preprocessor', 68 | 'preprocessor:liblinear_svc_preprocessor:C': 9.102297055334894, 69 | 'preprocessor:liblinear_svc_preprocessor:dual': 'False', 70 | 'preprocessor:liblinear_svc_preprocessor:fit_intercept': 'True', 71 | 'preprocessor:liblinear_svc_preprocessor:intercept_scaling': 1, 72 | 'preprocessor:liblinear_svc_preprocessor:loss': 'squared_hinge', 73 | 'preprocessor:liblinear_svc_preprocessor:multi_class': 'ovr', 74 | 'preprocessor:liblinear_svc_preprocessor:penalty': 'l1', 75 | 'preprocessor:liblinear_svc_preprocessor:tol': 9.129411357422978e-05, 76 | 'rescaling:__choice__': 'normalize'})), 77 | (0.060000, ParamSklearnClassifier( 78 | configuration={ 79 | 'balancing:strategy': 'weighting', 80 | 'classifier:__choice__': 'sgd', 81 | 'classifier:sgd:alpha': 3.104241273548187e-05, 82 | 'classifier:sgd:average': 'False', 83 | 'classifier:sgd:eta0': 0.050396014246875294, 84 | 'classifier:sgd:fit_intercept': 'True', 85 | 'classifier:sgd:l1_ratio': 0.7121576951214108, 86 | 'classifier:sgd:learning_rate': 'optimal', 87 | 'classifier:sgd:loss': 'log', 88 | 'classifier:sgd:n_iter': 649, 89 | 'classifier:sgd:penalty': 'elasticnet', 90 | 'imputation:strategy': 'mean', 91 | 'one_hot_encoding:use_minimum_fraction': 'False', 92 | 'preprocessor:__choice__': 'no_preprocessing', 93 | 'rescaling:__choice__': 'min/max'})), 94 | (0.060000, ParamSklearnClassifier( 95 | configuration={ 96 | 'balancing:strategy': 'none', 97 | 'classifier:__choice__': 'passive_aggressive', 98 | 'classifier:passive_aggressive:C': 0.023003251414120036, 99 | 'classifier:passive_aggressive:fit_intercept': 'True', 100 | 'classifier:passive_aggressive:loss': 'hinge', 101 | 'classifier:passive_aggressive:n_iter': 57, 102 | 'imputation:strategy': 'most_frequent', 103 | 'one_hot_encoding:minimum_fraction': 0.012167961375954476, 104 | 'one_hot_encoding:use_minimum_fraction': 'True', 105 | 'preprocessor:__choice__': 'liblinear_svc_preprocessor', 106 | 'preprocessor:liblinear_svc_preprocessor:C': 0.07417606253933476, 107 | 'preprocessor:liblinear_svc_preprocessor:dual': 'False', 108 | 'preprocessor:liblinear_svc_preprocessor:fit_intercept': 'True', 109 | 'preprocessor:liblinear_svc_preprocessor:intercept_scaling': 1, 110 | 'preprocessor:liblinear_svc_preprocessor:loss': 'squared_hinge', 111 | 'preprocessor:liblinear_svc_preprocessor:multi_class': 'ovr', 112 | 'preprocessor:liblinear_svc_preprocessor:penalty': 'l1', 113 | 'preprocessor:liblinear_svc_preprocessor:tol': 0.0009557179607902859, 114 | 'rescaling:__choice__': 'none'})), 115 | (0.040000, ParamSklearnClassifier( 116 | configuration={ 117 | 'balancing:strategy': 'none', 118 | 'classifier:__choice__': 'liblinear_svc', 119 | 'classifier:liblinear_svc:C': 491.8319475226706, 120 | 'classifier:liblinear_svc:dual': 'False', 121 | 'classifier:liblinear_svc:fit_intercept': 'True', 122 | 'classifier:liblinear_svc:intercept_scaling': 1, 123 | 'classifier:liblinear_svc:loss': 'squared_hinge', 124 | 'classifier:liblinear_svc:multi_class': 'ovr', 125 | 'classifier:liblinear_svc:penalty': 'l2', 126 | 'classifier:liblinear_svc:tol': 0.0008252238346618138, 127 | 'imputation:strategy': 'most_frequent', 128 | 'one_hot_encoding:minimum_fraction': 0.00028396835704950287, 129 | 'one_hot_encoding:use_minimum_fraction': 'True', 130 | 'preprocessor:__choice__': 'liblinear_svc_preprocessor', 131 | 'preprocessor:liblinear_svc_preprocessor:C': 0.11029125786578071, 132 | 'preprocessor:liblinear_svc_preprocessor:dual': 'False', 133 | 'preprocessor:liblinear_svc_preprocessor:fit_intercept': 'True', 134 | 'preprocessor:liblinear_svc_preprocessor:intercept_scaling': 1, 135 | 'preprocessor:liblinear_svc_preprocessor:loss': 'squared_hinge', 136 | 'preprocessor:liblinear_svc_preprocessor:multi_class': 'ovr', 137 | 'preprocessor:liblinear_svc_preprocessor:penalty': 'l1', 138 | 'preprocessor:liblinear_svc_preprocessor:tol': 0.0003417183512181233, 139 | 'rescaling:__choice__': 'min/max'})), 140 | (0.040000, ParamSklearnClassifier( 141 | configuration={ 142 | 'balancing:strategy': 'weighting', 143 | 'classifier:__choice__': 'sgd', 144 | 'classifier:sgd:alpha': 2.618489922233997e-06, 145 | 'classifier:sgd:average': 'False', 146 | 'classifier:sgd:eta0': 0.0785971926323006, 147 | 'classifier:sgd:fit_intercept': 'True', 148 | 'classifier:sgd:l1_ratio': 0.1596938886542899, 149 | 'classifier:sgd:learning_rate': 'constant', 150 | 'classifier:sgd:loss': 'hinge', 151 | 'classifier:sgd:n_iter': 509, 152 | 'classifier:sgd:penalty': 'elasticnet', 153 | 'imputation:strategy': 'mean', 154 | 'one_hot_encoding:use_minimum_fraction': 'False', 155 | 'preprocessor:__choice__': 'select_rates', 156 | 'preprocessor:select_rates:alpha': 0.25578392394574817, 157 | 'preprocessor:select_rates:mode': 'fpr', 158 | 'preprocessor:select_rates:score_func': 'chi2', 159 | 'rescaling:__choice__': 'min/max'})), 160 | (0.020000, ParamSklearnClassifier( 161 | configuration={ 162 | 'balancing:strategy': 'weighting', 163 | 'classifier:__choice__': 'extra_trees', 164 | 'classifier:extra_trees:bootstrap': 'False', 165 | 'classifier:extra_trees:criterion': 'gini', 166 | 'classifier:extra_trees:max_depth': 'None', 167 | 'classifier:extra_trees:max_features': 2.1694048668692454, 168 | 'classifier:extra_trees:min_samples_leaf': 1, 169 | 'classifier:extra_trees:min_samples_split': 8, 170 | 'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 171 | 'classifier:extra_trees:n_estimators': 100, 172 | 'imputation:strategy': 'median', 173 | 'one_hot_encoding:minimum_fraction': 0.23760831456778012, 174 | 'one_hot_encoding:use_minimum_fraction': 'True', 175 | 'preprocessor:__choice__': 'no_preprocessing', 176 | 'rescaling:__choice__': 'standardize'})), ] 177 | 178 | classifiers = [] 179 | targets = [] 180 | predictions = [] 181 | predictions_valid = [] 182 | predictions_test = [] 183 | 184 | # Make predictions and weight them 185 | iteration = 0 186 | for weight, classifier in choices: 187 | iteration += 1 188 | print dataset, "Iteration %d/%d" % (iteration, len(choices)) 189 | 190 | classifiers.append(classifier) 191 | try: 192 | classifier.fit(X.copy(), y.copy()) 193 | predictions_valid.append( 194 | classifier.predict_proba(X_valid.copy()) * weight) 195 | predictions_test.append( 196 | classifier.predict_proba(X_test.copy()) * weight) 197 | except Exception as e: 198 | print e 199 | print classifier 200 | 201 | # Output the predictions 202 | for name, predictions in [('valid', predictions_valid), 203 | ('test', predictions_test)]: 204 | predictions = np.array(predictions) 205 | predictions = np.sum(predictions, axis=0) 206 | 207 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 208 | np.savetxt(filepath, predictions, delimiter=' ') 209 | -------------------------------------------------------------------------------- /002_volkert.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | from sklearn.cross_validation import StratifiedKFold 6 | 7 | import autosklearn 8 | import autosklearn.data 9 | import autosklearn.data.competition_data_manager 10 | from autosklearn.evaluation.util import calculate_score 11 | from ParamSklearn.classification import ParamSklearnClassifier 12 | 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('input') 16 | parser.add_argument('output') 17 | args = parser.parse_args() 18 | 19 | input = args.input 20 | dataset = 'volkert' 21 | output = args.output 22 | 23 | path = os.path.join(input, dataset) 24 | D = autosklearn.data.competition_data_manager.CompetitionDataManager(path) 25 | X = D.data['X_train'] 26 | y = D.data['Y_train'] 27 | X_valid = D.data['X_valid'] 28 | X_test = D.data['X_test'] 29 | 30 | # Replace the following array by a new ensemble 31 | choices = \ 32 | [(0.480000, ParamSklearnClassifier(configuration={ 33 | 'balancing:strategy': 'none', 34 | 'classifier:__choice__': 'random_forest', 35 | 'classifier:random_forest:bootstrap': 'True', 36 | 'classifier:random_forest:criterion': 'entropy', 37 | 'classifier:random_forest:max_depth': 'None', 38 | 'classifier:random_forest:max_features': 4.885151102990943, 39 | 'classifier:random_forest:max_leaf_nodes': 'None', 40 | 'classifier:random_forest:min_samples_leaf': 2, 41 | 'classifier:random_forest:min_samples_split': 2, 42 | 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 43 | 'classifier:random_forest:n_estimators': 100, 44 | 'imputation:strategy': 'median', 45 | 'one_hot_encoding:minimum_fraction': 0.059297498551361, 46 | 'one_hot_encoding:use_minimum_fraction': 'True', 47 | 'preprocessor:__choice__': 'gem', 48 | 'preprocessor:gem:N': 13, 49 | 'preprocessor:gem:precond': 0.31299029323203487, 50 | 'rescaling:__choice__': 'min/max'})), 51 | (0.300000, ParamSklearnClassifier( 52 | configuration={ 53 | 'balancing:strategy': 'none', 54 | 'classifier:__choice__': 'random_forest', 55 | 'classifier:random_forest:bootstrap': 'False', 56 | 'classifier:random_forest:criterion': 'entropy', 57 | 'classifier:random_forest:max_depth': 'None', 58 | 'classifier:random_forest:max_features': 4.908992016092793, 59 | 'classifier:random_forest:max_leaf_nodes': 'None', 60 | 'classifier:random_forest:min_samples_leaf': 2, 61 | 'classifier:random_forest:min_samples_split': 6, 62 | 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 63 | 'classifier:random_forest:n_estimators': 100, 64 | 'imputation:strategy': 'mean', 65 | 'one_hot_encoding:minimum_fraction': 0.009349768412523697, 66 | 'one_hot_encoding:use_minimum_fraction': 'True', 67 | 'preprocessor:__choice__': 'fast_ica', 68 | 'preprocessor:fast_ica:algorithm': 'deflation', 69 | 'preprocessor:fast_ica:fun': 'exp', 70 | 'preprocessor:fast_ica:whiten': 'False', 71 | 'rescaling:__choice__': 'none'})), 72 | (0.180000, 73 | ParamSklearnClassifier( 74 | configuration={ 75 | 'balancing:strategy': 'weighting', 76 | 'classifier:__choice__': 'libsvm_svc', 77 | 'classifier:libsvm_svc:C': 445.91825904609124, 78 | 'classifier:libsvm_svc:gamma': 0.03873498413280048, 79 | 'classifier:libsvm_svc:kernel': 'rbf', 80 | 'classifier:libsvm_svc:max_iter': -1, 81 | 'classifier:libsvm_svc:shrinking': 'True', 82 | 'classifier:libsvm_svc:tol': 0.0008078719040695308, 83 | 'imputation:strategy': 'median', 84 | 'one_hot_encoding:use_minimum_fraction': 'False', 85 | 'preprocessor:__choice__': 'pca', 86 | 'preprocessor:pca:keep_variance': 0.7596970304901425, 87 | 'preprocessor:pca:whiten': 'True', 88 | 'rescaling:__choice__': 'standardize'})), 89 | (0.040000, ParamSklearnClassifier( 90 | configuration={ 91 | 'balancing:strategy': 'none', 92 | 'classifier:__choice__': 'random_forest', 93 | 'classifier:random_forest:bootstrap': 'False', 94 | 'classifier:random_forest:criterion': 'entropy', 95 | 'classifier:random_forest:max_depth': 'None', 96 | 'classifier:random_forest:max_features': 3.5340547102377364, 97 | 'classifier:random_forest:max_leaf_nodes': 'None', 98 | 'classifier:random_forest:min_samples_leaf': 2, 99 | 'classifier:random_forest:min_samples_split': 6, 100 | 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 101 | 'classifier:random_forest:n_estimators': 100, 102 | 'imputation:strategy': 'mean', 103 | 'one_hot_encoding:minimum_fraction': 0.008518947433195237, 104 | 'one_hot_encoding:use_minimum_fraction': 'True', 105 | 'preprocessor:__choice__': 'fast_ica', 106 | 'preprocessor:fast_ica:algorithm': 'deflation', 107 | 'preprocessor:fast_ica:fun': 'cube', 108 | 'preprocessor:fast_ica:whiten': 'False', 109 | 'rescaling:__choice__': 'none'})), ] 110 | 111 | classifiers = [] 112 | targets = [] 113 | predictions = [] 114 | predictions_valid = [] 115 | predictions_test = [] 116 | 117 | # Make predictions and weight them 118 | iteration = 0 119 | for weight, classifier in choices: 120 | iteration += 1 121 | print dataset, "Iteration %d/%d" % (iteration, len(choices)) 122 | 123 | classifiers.append(classifier) 124 | try: 125 | classifier.fit(X.copy(), y.copy()) 126 | predictions_valid.append( 127 | classifier.predict_proba(X_valid.copy()) * weight) 128 | predictions_test.append( 129 | classifier.predict_proba(X_test.copy()) * weight) 130 | except Exception as e: 131 | print e 132 | print classifier 133 | 134 | # Output the predictions 135 | for name, predictions in [('valid', predictions_valid), 136 | ('test', predictions_test)]: 137 | predictions = np.array(predictions) 138 | predictions = np.sum(predictions, axis=0) 139 | 140 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 141 | np.savetxt(filepath, predictions, delimiter=' ') 142 | -------------------------------------------------------------------------------- /003_dionis.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | from joblib import Parallel, delayed 5 | import numpy as np 6 | 7 | import autosklearn 8 | import autosklearn.data 9 | import autosklearn.data.competition_data_manager 10 | from autosklearn.pipeline.classification import SimpleClassificationPipeline 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('input') 14 | parser.add_argument('output') 15 | args = parser.parse_args() 16 | 17 | input = args.input 18 | dataset = 'dionis' 19 | output = args.output 20 | 21 | path = os.path.join(input, dataset) 22 | D = autosklearn.data.competition_data_manager.CompetitionDataManager(path) 23 | X = D.data['X_train'] 24 | y = D.data['Y_train'] 25 | X_valid = D.data['X_valid'] 26 | X_test = D.data['X_test'] 27 | 28 | # Replace the following array by a new ensemble 29 | choices = \ 30 | [(0.520000, SimpleClassificationPipeline(configuration={ 31 | 'balancing:strategy': 'none', 32 | 'classifier:__choice__': 'qda', 33 | 'classifier:qda:reg_param': 7.017044041208607, 34 | 'imputation:strategy': 'most_frequent', 35 | 'one_hot_encoding:use_minimum_fraction': 'False', 36 | 'preprocessor:__choice__': 'no_preprocessing', 37 | 'rescaling:__choice__': 'normalize'})), 38 | (0.360000, SimpleClassificationPipeline(configuration={ 39 | 'balancing:strategy': 'none', 40 | 'classifier:__choice__': 'qda', 41 | 'classifier:qda:reg_param': 0.5, 42 | 'imputation:strategy': 'most_frequent', 43 | 'one_hot_encoding:use_minimum_fraction': 'False', 44 | 'preprocessor:__choice__': 'select_rates', 45 | 'preprocessor:select_rates:alpha': 0.1, 46 | 'preprocessor:select_rates:mode': 'fpr', 47 | 'preprocessor:select_rates:score_func': 'chi2', 48 | 'rescaling:__choice__': 'min/max'})), 49 | (0.020000, SimpleClassificationPipeline(configuration={ 50 | 'balancing:strategy': 'none', 51 | 'classifier:__choice__': 'k_nearest_neighbors', 52 | 'classifier:k_nearest_neighbors:n_neighbors': 53, 53 | 'classifier:k_nearest_neighbors:p': 2, 54 | 'classifier:k_nearest_neighbors:weights': 'uniform', 55 | 'imputation:strategy': 'most_frequent', 56 | 'one_hot_encoding:minimum_fraction': 0.004107223932117523, 57 | 'one_hot_encoding:use_minimum_fraction': 'True', 58 | 'preprocessor:__choice__': 'select_rates', 59 | 'preprocessor:select_rates:alpha': 0.06365705922416094, 60 | 'preprocessor:select_rates:mode': 'fpr', 61 | 'preprocessor:select_rates:score_func': 'f_classif', 62 | 'rescaling:__choice__': 'min/max'})), 63 | (0.020000, SimpleClassificationPipeline(configuration={ 64 | 'balancing:strategy': 'weighting', 65 | 'classifier:__choice__': 'liblinear_svc', 66 | 'classifier:liblinear_svc:C': 1288.9425457179896, 67 | 'classifier:liblinear_svc:dual': 'False', 68 | 'classifier:liblinear_svc:fit_intercept': 'True', 69 | 'classifier:liblinear_svc:intercept_scaling': 1, 70 | 'classifier:liblinear_svc:loss': 'squared_hinge', 71 | 'classifier:liblinear_svc:multi_class': 'ovr', 72 | 'classifier:liblinear_svc:penalty': 'l2', 73 | 'classifier:liblinear_svc:tol': 6.852190351970404e-05, 74 | 'imputation:strategy': 'most_frequent', 75 | 'one_hot_encoding:minimum_fraction': 0.016322736180045382, 76 | 'one_hot_encoding:use_minimum_fraction': 'True', 77 | 'preprocessor:__choice__': 'select_rates', 78 | 'preprocessor:select_rates:alpha': 0.48582026589548283, 79 | 'preprocessor:select_rates:mode': 'fpr', 80 | 'preprocessor:select_rates:score_func': 'chi2', 81 | 'rescaling:__choice__': 'min/max'})), 82 | (0.020000, SimpleClassificationPipeline(configuration={ 83 | 'balancing:strategy': 'weighting', 84 | 'classifier:__choice__': 'extra_trees', 85 | 'classifier:extra_trees:bootstrap': 'False', 86 | 'classifier:extra_trees:criterion': 'gini', 87 | 'classifier:extra_trees:max_depth': 'None', 88 | 'classifier:extra_trees:max_features': 0.6872563090086077, 89 | 'classifier:extra_trees:min_samples_leaf': 9, 90 | 'classifier:extra_trees:min_samples_split': 8, 91 | 'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 92 | 'classifier:extra_trees:n_estimators': 100, 93 | 'imputation:strategy': 'median', 94 | 'one_hot_encoding:minimum_fraction': 0.00048281479349728755, 95 | 'one_hot_encoding:use_minimum_fraction': 'True', 96 | 'preprocessor:__choice__': 'feature_agglomeration', 97 | 'preprocessor:feature_agglomeration:affinity': 'manhattan', 98 | 'preprocessor:feature_agglomeration:linkage': 'average', 99 | 'preprocessor:feature_agglomeration:n_clusters': 170, 100 | 'preprocessor:feature_agglomeration:pooling_func': 'mean', 101 | 'rescaling:__choice__': 'normalize'})), 102 | (0.020000, SimpleClassificationPipeline(configuration={ 103 | 'balancing:strategy': 'weighting', 104 | 'classifier:__choice__': 'liblinear_svc', 105 | 'classifier:liblinear_svc:C': 737.3354222113379, 106 | 'classifier:liblinear_svc:dual': 'False', 107 | 'classifier:liblinear_svc:fit_intercept': 'True', 108 | 'classifier:liblinear_svc:intercept_scaling': 1, 109 | 'classifier:liblinear_svc:loss': 'squared_hinge', 110 | 'classifier:liblinear_svc:multi_class': 'ovr', 111 | 'classifier:liblinear_svc:penalty': 'l2', 112 | 'classifier:liblinear_svc:tol': 0.029993063054990464, 113 | 'imputation:strategy': 'median', 114 | 'one_hot_encoding:minimum_fraction': 0.0007084092083452885, 115 | 'one_hot_encoding:use_minimum_fraction': 'True', 116 | 'preprocessor:__choice__': 'select_rates', 117 | 'preprocessor:select_rates:alpha': 0.28020088992913833, 118 | 'preprocessor:select_rates:mode': 'fdr', 119 | 'preprocessor:select_rates:score_func': 'f_classif', 120 | 'rescaling:__choice__': 'standardize'})), 121 | (0.020000, SimpleClassificationPipeline(configuration={ 122 | 'balancing:strategy': 'none', 123 | 'classifier:__choice__': 'k_nearest_neighbors', 124 | 'classifier:k_nearest_neighbors:n_neighbors': 1, 125 | 'classifier:k_nearest_neighbors:p': 2, 126 | 'classifier:k_nearest_neighbors:weights': 'uniform', 127 | 'imputation:strategy': 'median', 128 | 'one_hot_encoding:minimum_fraction': 0.015690633649222446, 129 | 'one_hot_encoding:use_minimum_fraction': 'True', 130 | 'preprocessor:__choice__': 'no_preprocessing', 131 | 'rescaling:__choice__': 'min/max'})), 132 | (0.020000, SimpleClassificationPipeline(configuration={ 133 | 'balancing:strategy': 'weighting', 134 | 'classifier:__choice__': 'extra_trees', 135 | 'classifier:extra_trees:bootstrap': 'False', 136 | 'classifier:extra_trees:criterion': 'gini', 137 | 'classifier:extra_trees:max_depth': 'None', 138 | 'classifier:extra_trees:max_features': 1.0, 139 | 'classifier:extra_trees:min_samples_leaf': 10, 140 | 'classifier:extra_trees:min_samples_split': 2, 141 | 'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 142 | 'classifier:extra_trees:n_estimators': 100, 143 | 'imputation:strategy': 'median', 144 | 'one_hot_encoding:minimum_fraction': 0.01, 145 | 'one_hot_encoding:use_minimum_fraction': 'True', 146 | 'preprocessor:__choice__': 'select_rates', 147 | 'preprocessor:select_rates:alpha': 0.1, 148 | 'preprocessor:select_rates:mode': 'fpr', 149 | 'preprocessor:select_rates:score_func': 'chi2', 150 | 'rescaling:__choice__': 'none'})), 151 | ] 152 | 153 | targets = [] 154 | predictions = [] 155 | predictions_valid = [] 156 | predictions_test = [] 157 | 158 | 159 | def fit_and_predict(estimator, weight, X, y): 160 | try: 161 | estimator.fit(X.copy(), y.copy()) 162 | pv = estimator.predict_proba(X_valid.copy()) * weight 163 | pt = estimator.predict_proba(X_test.copy()) * weight 164 | except Exception as e: 165 | print(e) 166 | print(estimator.configuration) 167 | pv = None 168 | pt = None 169 | return pv, pt 170 | 171 | 172 | # Make predictions and weight them 173 | all_predictions = Parallel(n_jobs=-1)(delayed(fit_and_predict) \ 174 | (estimator, weight, X, y) for 175 | weight, estimator in choices) 176 | for pv, pt in all_predictions: 177 | predictions_valid.append(pv) 178 | predictions_test.append(pt) 179 | 180 | # Output the predictions 181 | for name, predictions in [('valid', predictions_valid), 182 | ('test', predictions_test)]: 183 | predictions = np.array(predictions) 184 | predictions = np.sum(predictions, axis=0).astype(np.float32) 185 | 186 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 187 | np.savetxt(filepath, predictions, delimiter=' ', fmt='%.4e') 188 | -------------------------------------------------------------------------------- /003_grigoris.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | from joblib import Parallel, delayed 5 | import numpy as np 6 | 7 | import autosklearn 8 | import autosklearn.data 9 | import autosklearn.data.competition_data_manager 10 | from autosklearn.pipeline.classification import SimpleClassificationPipeline 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('input') 14 | parser.add_argument('output') 15 | args = parser.parse_args() 16 | 17 | input = args.input 18 | dataset = 'grigoris' 19 | output = args.output 20 | 21 | path = os.path.join(input, dataset) 22 | D = autosklearn.data.competition_data_manager.CompetitionDataManager(path) 23 | X = D.data['X_train'] 24 | y = D.data['Y_train'] 25 | X_valid = D.data['X_valid'] 26 | X_test = D.data['X_test'] 27 | 28 | # Replace the following array by a new ensemble 29 | choices = \ 30 | [(0.720000, SimpleClassificationPipeline(configuration={ 31 | 'balancing:strategy': 'none', 32 | 'classifier:__choice__': 'liblinear_svc', 33 | 'classifier:liblinear_svc:C': 0.0665747065156058, 34 | 'classifier:liblinear_svc:dual': 'False', 35 | 'classifier:liblinear_svc:fit_intercept': 'True', 36 | 'classifier:liblinear_svc:intercept_scaling': 1, 37 | 'classifier:liblinear_svc:loss': 'squared_hinge', 38 | 'classifier:liblinear_svc:multi_class': 'ovr', 39 | 'classifier:liblinear_svc:penalty': 'l2', 40 | 'classifier:liblinear_svc:tol': 0.002362381246384099, 41 | 'imputation:strategy': 'mean', 42 | 'one_hot_encoding:minimum_fraction': 0.0972585384393519, 43 | 'one_hot_encoding:use_minimum_fraction': 'True', 44 | 'preprocessor:__choice__': 'no_preprocessing', 45 | 'rescaling:__choice__': 'normalize'})), 46 | (0.100000, SimpleClassificationPipeline(configuration={ 47 | 'balancing:strategy': 'weighting', 48 | 'classifier:__choice__': 'liblinear_svc', 49 | 'classifier:liblinear_svc:C': 7.705276414124367, 50 | 'classifier:liblinear_svc:dual': 'False', 51 | 'classifier:liblinear_svc:fit_intercept': 'True', 52 | 'classifier:liblinear_svc:intercept_scaling': 1, 53 | 'classifier:liblinear_svc:loss': 'squared_hinge', 54 | 'classifier:liblinear_svc:multi_class': 'ovr', 55 | 'classifier:liblinear_svc:penalty': 'l2', 56 | 'classifier:liblinear_svc:tol': 0.028951969755081776, 57 | 'imputation:strategy': 'most_frequent', 58 | 'one_hot_encoding:use_minimum_fraction': 'False', 59 | 'preprocessor:__choice__': 'no_preprocessing', 60 | 'rescaling:__choice__': 'normalize'})), 61 | (0.080000, SimpleClassificationPipeline(configuration={ 62 | 'balancing:strategy': 'weighting', 63 | 'classifier:__choice__': 'liblinear_svc', 64 | 'classifier:liblinear_svc:C': 1.0, 65 | 'classifier:liblinear_svc:dual': 'False', 66 | 'classifier:liblinear_svc:fit_intercept': 'True', 67 | 'classifier:liblinear_svc:intercept_scaling': 1, 68 | 'classifier:liblinear_svc:loss': 'squared_hinge', 69 | 'classifier:liblinear_svc:multi_class': 'ovr', 70 | 'classifier:liblinear_svc:penalty': 'l2', 71 | 'classifier:liblinear_svc:tol': 0.0001, 72 | 'imputation:strategy': 'median', 73 | 'one_hot_encoding:minimum_fraction': 0.0033856971814438443, 74 | 'one_hot_encoding:use_minimum_fraction': 'True', 75 | 'preprocessor:__choice__': 'no_preprocessing', 76 | 'rescaling:__choice__': 'normalize'})), 77 | (0.080000, SimpleClassificationPipeline(configuration={ 78 | 'balancing:strategy': 'weighting', 79 | 'classifier:__choice__': 'liblinear_svc', 80 | 'classifier:liblinear_svc:C': 0.2598769185905466, 81 | 'classifier:liblinear_svc:dual': 'False', 82 | 'classifier:liblinear_svc:fit_intercept': 'True', 83 | 'classifier:liblinear_svc:intercept_scaling': 1, 84 | 'classifier:liblinear_svc:loss': 'squared_hinge', 85 | 'classifier:liblinear_svc:multi_class': 'ovr', 86 | 'classifier:liblinear_svc:penalty': 'l2', 87 | 'classifier:liblinear_svc:tol': 0.001007160236770467, 88 | 'imputation:strategy': 'median', 89 | 'one_hot_encoding:minimum_fraction': 0.019059927375795167, 90 | 'one_hot_encoding:use_minimum_fraction': 'True', 91 | 'preprocessor:__choice__': 'no_preprocessing', 92 | 'rescaling:__choice__': 'normalize'})), 93 | (0.020000, SimpleClassificationPipeline(configuration={ 94 | 'balancing:strategy': 'weighting', 95 | 'classifier:__choice__': 'liblinear_svc', 96 | 'classifier:liblinear_svc:C': 0.6849477125990308, 97 | 'classifier:liblinear_svc:dual': 'False', 98 | 'classifier:liblinear_svc:fit_intercept': 'True', 99 | 'classifier:liblinear_svc:intercept_scaling': 1, 100 | 'classifier:liblinear_svc:loss': 'squared_hinge', 101 | 'classifier:liblinear_svc:multi_class': 'ovr', 102 | 'classifier:liblinear_svc:penalty': 'l2', 103 | 'classifier:liblinear_svc:tol': 1.2676147487949745e-05, 104 | 'imputation:strategy': 'mean', 105 | 'one_hot_encoding:minimum_fraction': 0.003803817610653382, 106 | 'one_hot_encoding:use_minimum_fraction': 'True', 107 | 'preprocessor:__choice__': 'no_preprocessing', 108 | 'rescaling:__choice__': 'normalize'})), 109 | ] 110 | 111 | targets = [] 112 | predictions = [] 113 | predictions_valid = [] 114 | predictions_test = [] 115 | 116 | 117 | def fit_and_predict(estimator, weight, X, y): 118 | try: 119 | estimator.fit(X.copy(), y.copy()) 120 | pv = estimator.predict_proba(X_valid.copy()) * weight 121 | pt = estimator.predict_proba(X_test.copy()) * weight 122 | except Exception as e: 123 | print(e) 124 | print(estimator.configuration) 125 | pv = None 126 | pt = None 127 | return pv, pt 128 | 129 | 130 | # Make predictions and weight them 131 | all_predictions = Parallel(n_jobs=-1)(delayed(fit_and_predict) \ 132 | (estimator, weight, X, y) for 133 | weight, estimator in choices) 134 | for pv, pt in all_predictions: 135 | predictions_valid.append(pv) 136 | predictions_test.append(pt) 137 | 138 | # Output the predictions 139 | for name, predictions in [('valid', predictions_valid), 140 | ('test', predictions_test)]: 141 | predictions = np.array(predictions) 142 | predictions = np.sum(predictions, axis=0).astype(np.float32) 143 | 144 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 145 | np.savetxt(filepath, predictions, delimiter=' ', fmt='%.4e') 146 | -------------------------------------------------------------------------------- /003_wallis.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | 6 | import autosklearn 7 | import autosklearn.data 8 | import autosklearn.data.competition_data_manager 9 | from autosklearn.pipeline.classification import SimpleClassificationPipeline 10 | 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('input') 14 | parser.add_argument('output') 15 | args = parser.parse_args() 16 | 17 | input = args.input 18 | dataset = 'wallis' 19 | output = args.output 20 | 21 | path = os.path.join(input, dataset) 22 | D = autosklearn.data.competition_data_manager.CompetitionDataManager(path) 23 | X = D.data['X_train'] 24 | y = D.data['Y_train'] 25 | X_valid = D.data['X_valid'] 26 | X_test = D.data['X_test'] 27 | 28 | # Replace the following array by a new ensemble 29 | choices = \ 30 | [(0.580000, SimpleClassificationPipeline(configuration={ 31 | 'balancing:strategy': 'weighting', 32 | 'classifier:__choice__': 'passive_aggressive', 33 | 'classifier:passive_aggressive:C': 0.0006373873391108438, 34 | 'classifier:passive_aggressive:fit_intercept': 'True', 35 | 'classifier:passive_aggressive:loss': 'squared_hinge', 36 | 'classifier:passive_aggressive:n_iter': 18, 37 | 'imputation:strategy': 'median', 38 | 'one_hot_encoding:use_minimum_fraction': 'False', 39 | 'preprocessor:__choice__': 'no_preprocessing', 40 | 'rescaling:__choice__': 'normalize'})), 41 | (0.200000, SimpleClassificationPipeline(configuration={ 42 | 'balancing:strategy': 'weighting', 43 | 'classifier:__choice__': 'passive_aggressive', 44 | 'classifier:passive_aggressive:C': 0.000465329983806252, 45 | 'classifier:passive_aggressive:fit_intercept': 'True', 46 | 'classifier:passive_aggressive:loss': 'squared_hinge', 47 | 'classifier:passive_aggressive:n_iter': 34, 48 | 'imputation:strategy': 'median', 49 | 'one_hot_encoding:use_minimum_fraction': 'False', 50 | 'preprocessor:__choice__': 'kernel_pca', 51 | 'preprocessor:kernel_pca:kernel': 'cosine', 52 | 'preprocessor:kernel_pca:n_components': 1351, 53 | 'rescaling:__choice__': 'normalize'})), 54 | (0.180000, SimpleClassificationPipeline(configuration={ 55 | 'balancing:strategy': 'none', 56 | 'classifier:__choice__': 'liblinear_svc', 57 | 'classifier:liblinear_svc:C': 0.7416809477859192, 58 | 'classifier:liblinear_svc:dual': 'False', 59 | 'classifier:liblinear_svc:fit_intercept': 'True', 60 | 'classifier:liblinear_svc:intercept_scaling': 1, 61 | 'classifier:liblinear_svc:loss': 'squared_hinge', 62 | 'classifier:liblinear_svc:multi_class': 'ovr', 63 | 'classifier:liblinear_svc:penalty': 'l2', 64 | 'classifier:liblinear_svc:tol': 0.0048882934000166346, 65 | 'imputation:strategy': 'most_frequent', 66 | 'one_hot_encoding:use_minimum_fraction': 'False', 67 | 'preprocessor:__choice__': 'select_percentile_classification', 68 | 'preprocessor:select_percentile_classification:percentile': 19.775149789978155, 69 | 'preprocessor:select_percentile_classification:score_func': 'chi2', 70 | 'rescaling:__choice__': 'normalize'})), 71 | (0.020000, SimpleClassificationPipeline(configuration={ 72 | 'balancing:strategy': 'none', 73 | 'classifier:__choice__': 'liblinear_svc', 74 | 'classifier:liblinear_svc:C': 0.4010081266689033, 75 | 'classifier:liblinear_svc:dual': 'False', 76 | 'classifier:liblinear_svc:fit_intercept': 'True', 77 | 'classifier:liblinear_svc:intercept_scaling': 1, 78 | 'classifier:liblinear_svc:loss': 'squared_hinge', 79 | 'classifier:liblinear_svc:multi_class': 'ovr', 80 | 'classifier:liblinear_svc:penalty': 'l2', 81 | 'classifier:liblinear_svc:tol': 0.003197120920655818, 82 | 'imputation:strategy': 'mean', 83 | 'one_hot_encoding:minimum_fraction': 0.0002497904559463802, 84 | 'one_hot_encoding:use_minimum_fraction': 'True', 85 | 'preprocessor:__choice__': 'no_preprocessing', 86 | 'rescaling:__choice__': 'normalize'})), 87 | (0.020000, SimpleClassificationPipeline(configuration={ 88 | 'balancing:strategy': 'none', 89 | 'classifier:__choice__': 'liblinear_svc', 90 | 'classifier:liblinear_svc:C': 0.7444178979935873, 91 | 'classifier:liblinear_svc:dual': 'False', 92 | 'classifier:liblinear_svc:fit_intercept': 'True', 93 | 'classifier:liblinear_svc:intercept_scaling': 1, 94 | 'classifier:liblinear_svc:loss': 'squared_hinge', 95 | 'classifier:liblinear_svc:multi_class': 'ovr', 96 | 'classifier:liblinear_svc:penalty': 'l2', 97 | 'classifier:liblinear_svc:tol': 0.00359411438055, 98 | 'imputation:strategy': 'mean', 99 | 'one_hot_encoding:minimum_fraction': 0.0018636449908690695, 100 | 'one_hot_encoding:use_minimum_fraction': 'True', 101 | 'preprocessor:__choice__': 'nystroem_sampler', 102 | 'preprocessor:nystroem_sampler:kernel': 'cosine', 103 | 'preprocessor:nystroem_sampler:n_components': 5183, 104 | 'rescaling:__choice__': 'normalize'})), 105 | ] 106 | 107 | targets = [] 108 | predictions = [] 109 | predictions_valid = [] 110 | predictions_test = [] 111 | 112 | # Make predictions and weight them 113 | iteration = 0 114 | for weight, classifier in choices: 115 | iteration += 1 116 | print(dataset, "Iteration %d/%d" % (iteration, len(choices))) 117 | try: 118 | classifier.fit(X.copy(), y.copy()) 119 | predictions_valid.append( 120 | classifier.predict_proba(X_valid.copy()) * weight) 121 | predictions_test.append( 122 | classifier.predict_proba(X_test.copy()) * weight) 123 | except Exception as e: 124 | print(e) 125 | print(classifier.configuration) 126 | 127 | # Output the predictions 128 | for name, predictions in [('valid', predictions_valid), 129 | ('test', predictions_test)]: 130 | predictions = np.array(predictions) 131 | predictions = np.sum(predictions, axis=0).astype(np.float32) 132 | 133 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 134 | np.savetxt(filepath, predictions, delimiter=' ', fmt = '%.4e') 135 | -------------------------------------------------------------------------------- /004_evita.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | from joblib import Parallel, delayed 5 | import numpy as np 6 | 7 | import autosklearn 8 | import autosklearn.data 9 | import autosklearn.data.competition_data_manager 10 | from autosklearn.pipeline.classification import SimpleClassificationPipeline 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('input') 14 | parser.add_argument('output') 15 | args = parser.parse_args() 16 | 17 | input = args.input 18 | dataset = 'evita' 19 | output = args.output 20 | 21 | path = os.path.join(input, dataset) 22 | D = autosklearn.data.competition_data_manager.CompetitionDataManager(path) 23 | X = D.data['X_train'] 24 | y = D.data['Y_train'] 25 | X_valid = D.data['X_valid'] 26 | X_test = D.data['X_test'] 27 | 28 | # Replace the following array by a new ensemble 29 | choices = \ 30 | [(0.320000, SimpleClassificationPipeline(configuration={ 31 | 'balancing:strategy': 'weighting', 32 | 'classifier:__choice__': 'xgradient_boosting', 33 | 'classifier:xgradient_boosting:base_score': 0.5, 34 | 'classifier:xgradient_boosting:colsample_bylevel': 1, 35 | 'classifier:xgradient_boosting:colsample_bytree': 1, 36 | 'classifier:xgradient_boosting:gamma': 0, 37 | 'classifier:xgradient_boosting:learning_rate': 0.083957576764175909, 38 | 'classifier:xgradient_boosting:max_delta_step': 0, 39 | 'classifier:xgradient_boosting:max_depth': 9, 40 | 'classifier:xgradient_boosting:min_child_weight': 1, 41 | 'classifier:xgradient_boosting:n_estimators': 207, 42 | 'classifier:xgradient_boosting:reg_alpha': 0, 43 | 'classifier:xgradient_boosting:reg_lambda': 1, 44 | 'classifier:xgradient_boosting:scale_pos_weight': 1, 45 | 'classifier:xgradient_boosting:subsample': 0.79041547139233681, 46 | 'imputation:strategy': 'median', 47 | 'one_hot_encoding:use_minimum_fraction': 'False', 48 | 'preprocessor:__choice__': 'select_rates', 49 | 'preprocessor:select_rates:alpha': 0.033271689466917775, 50 | 'preprocessor:select_rates:mode': 'fdr', 51 | 'preprocessor:select_rates:score_func': 'chi2', 52 | 'rescaling:__choice__': 'none'})), 53 | (0.140000, SimpleClassificationPipeline(configuration={ 54 | 'balancing:strategy': 'none', 55 | 'classifier:__choice__': 'extra_trees', 56 | 'classifier:extra_trees:bootstrap': 'False', 57 | 'classifier:extra_trees:criterion': 'gini', 58 | 'classifier:extra_trees:max_depth': 'None', 59 | 'classifier:extra_trees:max_features': 1.0, 60 | 'classifier:extra_trees:min_samples_leaf': 1, 61 | 'classifier:extra_trees:min_samples_split': 2, 62 | 'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 63 | 'classifier:extra_trees:n_estimators': 100, 64 | 'imputation:strategy': 'most_frequent', 65 | 'one_hot_encoding:use_minimum_fraction': 'False', 66 | 'preprocessor:__choice__': 'select_rates', 67 | 'preprocessor:select_rates:alpha': 0.10000000000000001, 68 | 'preprocessor:select_rates:mode': 'fdr', 69 | 'preprocessor:select_rates:score_func': 'chi2', 70 | 'rescaling:__choice__': 'none'})), 71 | (0.100000, SimpleClassificationPipeline(configuration={ 72 | 'balancing:strategy': 'none', 73 | 'classifier:__choice__': 'random_forest', 74 | 'classifier:random_forest:bootstrap': 'False', 75 | 'classifier:random_forest:criterion': 'gini', 76 | 'classifier:random_forest:max_depth': 'None', 77 | 'classifier:random_forest:max_features': 3.904721926856924, 78 | 'classifier:random_forest:max_leaf_nodes': 'None', 79 | 'classifier:random_forest:min_samples_leaf': 2, 80 | 'classifier:random_forest:min_samples_split': 7, 81 | 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 82 | 'classifier:random_forest:n_estimators': 100, 83 | 'imputation:strategy': 'most_frequent', 84 | 'one_hot_encoding:minimum_fraction': 0.036176664478653142, 85 | 'one_hot_encoding:use_minimum_fraction': 'True', 86 | 'preprocessor:__choice__': 'select_percentile_classification', 87 | 'preprocessor:select_percentile_classification:percentile': 91.78175624881186, 88 | 'preprocessor:select_percentile_classification:score_func': 'chi2', 89 | 'rescaling:__choice__': 'none'})), 90 | (0.080000, SimpleClassificationPipeline(configuration={ 91 | 'balancing:strategy': 'none', 92 | 'classifier:__choice__': 'random_forest', 93 | 'classifier:random_forest:bootstrap': 'True', 94 | 'classifier:random_forest:criterion': 'gini', 95 | 'classifier:random_forest:max_depth': 'None', 96 | 'classifier:random_forest:max_features': 1.0, 97 | 'classifier:random_forest:max_leaf_nodes': 'None', 98 | 'classifier:random_forest:min_samples_leaf': 1, 99 | 'classifier:random_forest:min_samples_split': 2, 100 | 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 101 | 'classifier:random_forest:n_estimators': 100, 102 | 'imputation:strategy': 'median', 103 | 'one_hot_encoding:use_minimum_fraction': 'False', 104 | 'preprocessor:__choice__': 'select_rates', 105 | 'preprocessor:select_rates:alpha': 0.18915206967606921, 106 | 'preprocessor:select_rates:mode': 'fpr', 107 | 'preprocessor:select_rates:score_func': 'chi2', 108 | 'rescaling:__choice__': 'standardize'})), 109 | (0.080000, SimpleClassificationPipeline(configuration={ 110 | 'balancing:strategy': 'none', 111 | 'classifier:__choice__': 'extra_trees', 112 | 'classifier:extra_trees:bootstrap': 'False', 113 | 'classifier:extra_trees:criterion': 'gini', 114 | 'classifier:extra_trees:max_depth': 'None', 115 | 'classifier:extra_trees:max_features': 0.59875097583441961, 116 | 'classifier:extra_trees:min_samples_leaf': 1, 117 | 'classifier:extra_trees:min_samples_split': 2, 118 | 'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 119 | 'classifier:extra_trees:n_estimators': 100, 120 | 'imputation:strategy': 'mean', 121 | 'one_hot_encoding:use_minimum_fraction': 'False', 122 | 'preprocessor:__choice__': 'select_rates', 123 | 'preprocessor:select_rates:alpha': 0.13663946292601112, 124 | 'preprocessor:select_rates:mode': 'fpr', 125 | 'preprocessor:select_rates:score_func': 'chi2', 126 | 'rescaling:__choice__': 'standardize'})), 127 | (0.060000, SimpleClassificationPipeline(configuration={ 128 | 'balancing:strategy': 'weighting', 129 | 'classifier:__choice__': 'random_forest', 130 | 'classifier:random_forest:bootstrap': 'True', 131 | 'classifier:random_forest:criterion': 'gini', 132 | 'classifier:random_forest:max_depth': 'None', 133 | 'classifier:random_forest:max_features': 1.0, 134 | 'classifier:random_forest:max_leaf_nodes': 'None', 135 | 'classifier:random_forest:min_samples_leaf': 1, 136 | 'classifier:random_forest:min_samples_split': 2, 137 | 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 138 | 'classifier:random_forest:n_estimators': 100, 139 | 'imputation:strategy': 'median', 140 | 'one_hot_encoding:use_minimum_fraction': 'False', 141 | 'preprocessor:__choice__': 'select_rates', 142 | 'preprocessor:select_rates:alpha': 0.10000000000000001, 143 | 'preprocessor:select_rates:mode': 'fpr', 144 | 'preprocessor:select_rates:score_func': 'chi2', 145 | 'rescaling:__choice__': 'none'})), 146 | (0.040000, SimpleClassificationPipeline(configuration={ 147 | 'balancing:strategy': 'none', 148 | 'classifier:__choice__': 'extra_trees', 149 | 'classifier:extra_trees:bootstrap': 'False', 150 | 'classifier:extra_trees:criterion': 'gini', 151 | 'classifier:extra_trees:max_depth': 'None', 152 | 'classifier:extra_trees:max_features': 2.4071018354857294, 153 | 'classifier:extra_trees:min_samples_leaf': 2, 154 | 'classifier:extra_trees:min_samples_split': 9, 155 | 'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 156 | 'classifier:extra_trees:n_estimators': 100, 157 | 'imputation:strategy': 'median', 158 | 'one_hot_encoding:use_minimum_fraction': 'False', 159 | 'preprocessor:__choice__': 'select_rates', 160 | 'preprocessor:select_rates:alpha': 0.34844304591109215, 161 | 'preprocessor:select_rates:mode': 'fpr', 162 | 'preprocessor:select_rates:score_func': 'chi2', 163 | 'rescaling:__choice__': 'none'})), 164 | (0.040000, SimpleClassificationPipeline(configuration={ 165 | 'balancing:strategy': 'weighting', 166 | 'classifier:__choice__': 'random_forest', 167 | 'classifier:random_forest:bootstrap': 'False', 168 | 'classifier:random_forest:criterion': 'gini', 169 | 'classifier:random_forest:max_depth': 'None', 170 | 'classifier:random_forest:max_features': 2.3037777871550227, 171 | 'classifier:random_forest:max_leaf_nodes': 'None', 172 | 'classifier:random_forest:min_samples_leaf': 1, 173 | 'classifier:random_forest:min_samples_split': 6, 174 | 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 175 | 'classifier:random_forest:n_estimators': 100, 176 | 'imputation:strategy': 'mean', 177 | 'one_hot_encoding:use_minimum_fraction': 'False', 178 | 'preprocessor:__choice__': 'no_preprocessing', 179 | 'rescaling:__choice__': 'standardize'})), 180 | (0.040000, SimpleClassificationPipeline(configuration={ 181 | 'balancing:strategy': 'weighting', 182 | 'classifier:__choice__': 'random_forest', 183 | 'classifier:random_forest:bootstrap': 'False', 184 | 'classifier:random_forest:criterion': 'entropy', 185 | 'classifier:random_forest:max_depth': 'None', 186 | 'classifier:random_forest:max_features': 3.9417933307381925, 187 | 'classifier:random_forest:max_leaf_nodes': 'None', 188 | 'classifier:random_forest:min_samples_leaf': 2, 189 | 'classifier:random_forest:min_samples_split': 3, 190 | 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 191 | 'classifier:random_forest:n_estimators': 100, 192 | 'imputation:strategy': 'median', 193 | 'one_hot_encoding:minimum_fraction': 0.076515481895064422, 194 | 'one_hot_encoding:use_minimum_fraction': 'True', 195 | 'preprocessor:__choice__': 'select_rates', 196 | 'preprocessor:select_rates:alpha': 0.39998541946519961, 197 | 'preprocessor:select_rates:mode': 'fpr', 198 | 'preprocessor:select_rates:score_func': 'chi2', 199 | 'rescaling:__choice__': 'standardize'})), 200 | (0.020000, SimpleClassificationPipeline(configuration={ 201 | 'balancing:strategy': 'weighting', 202 | 'classifier:__choice__': 'extra_trees', 203 | 'classifier:extra_trees:bootstrap': 'True', 204 | 'classifier:extra_trees:criterion': 'gini', 205 | 'classifier:extra_trees:max_depth': 'None', 206 | 'classifier:extra_trees:max_features': 2.6560184696178109, 207 | 'classifier:extra_trees:min_samples_leaf': 1, 208 | 'classifier:extra_trees:min_samples_split': 9, 209 | 'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 210 | 'classifier:extra_trees:n_estimators': 100, 211 | 'imputation:strategy': 'most_frequent', 212 | 'one_hot_encoding:use_minimum_fraction': 'False', 213 | 'preprocessor:__choice__': 'select_rates', 214 | 'preprocessor:select_rates:alpha': 0.49576705570976692, 215 | 'preprocessor:select_rates:mode': 'fdr', 216 | 'preprocessor:select_rates:score_func': 'chi2', 217 | 'rescaling:__choice__': 'none'})), 218 | (0.020000, SimpleClassificationPipeline(configuration={ 219 | 'balancing:strategy': 'weighting', 220 | 'classifier:__choice__': 'extra_trees', 221 | 'classifier:extra_trees:bootstrap': 'True', 222 | 'classifier:extra_trees:criterion': 'gini', 223 | 'classifier:extra_trees:max_depth': 'None', 224 | 'classifier:extra_trees:max_features': 2.8762254807814838, 225 | 'classifier:extra_trees:min_samples_leaf': 7, 226 | 'classifier:extra_trees:min_samples_split': 7, 227 | 'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 228 | 'classifier:extra_trees:n_estimators': 100, 229 | 'imputation:strategy': 'mean', 230 | 'one_hot_encoding:minimum_fraction': 0.00037525617209727315, 231 | 'one_hot_encoding:use_minimum_fraction': 'True', 232 | 'preprocessor:__choice__': 'select_rates', 233 | 'preprocessor:select_rates:alpha': 0.36323622954313295, 234 | 'preprocessor:select_rates:mode': 'fpr', 235 | 'preprocessor:select_rates:score_func': 'chi2', 236 | 'rescaling:__choice__': 'min/max'})), 237 | (0.020000, SimpleClassificationPipeline(configuration={ 238 | 'balancing:strategy': 'weighting', 239 | 'classifier:__choice__': 'random_forest', 240 | 'classifier:random_forest:bootstrap': 'False', 241 | 'classifier:random_forest:criterion': 'gini', 242 | 'classifier:random_forest:max_depth': 'None', 243 | 'classifier:random_forest:max_features': 4.7911724862642, 244 | 'classifier:random_forest:max_leaf_nodes': 'None', 245 | 'classifier:random_forest:min_samples_leaf': 1, 246 | 'classifier:random_forest:min_samples_split': 11, 247 | 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 248 | 'classifier:random_forest:n_estimators': 100, 249 | 'imputation:strategy': 'median', 250 | 'one_hot_encoding:use_minimum_fraction': 'False', 251 | 'preprocessor:__choice__': 'select_rates', 252 | 'preprocessor:select_rates:alpha': 0.47510655107871991, 253 | 'preprocessor:select_rates:mode': 'fdr', 254 | 'preprocessor:select_rates:score_func': 'chi2', 255 | 'rescaling:__choice__': 'standardize'})), 256 | (0.020000, SimpleClassificationPipeline(configuration={ 257 | 'balancing:strategy': 'none', 258 | 'classifier:__choice__': 'random_forest', 259 | 'classifier:random_forest:bootstrap': 'False', 260 | 'classifier:random_forest:criterion': 'entropy', 261 | 'classifier:random_forest:max_depth': 'None', 262 | 'classifier:random_forest:max_features': 4.9237570615905248, 263 | 'classifier:random_forest:max_leaf_nodes': 'None', 264 | 'classifier:random_forest:min_samples_leaf': 13, 265 | 'classifier:random_forest:min_samples_split': 15, 266 | 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 267 | 'classifier:random_forest:n_estimators': 100, 268 | 'imputation:strategy': 'most_frequent', 269 | 'one_hot_encoding:minimum_fraction': 0.00028264986304734767, 270 | 'one_hot_encoding:use_minimum_fraction': 'True', 271 | 'preprocessor:__choice__': 'select_rates', 272 | 'preprocessor:select_rates:alpha': 0.27910583898194102, 273 | 'preprocessor:select_rates:mode': 'fdr', 274 | 'preprocessor:select_rates:score_func': 'chi2', 275 | 'rescaling:__choice__': 'none'})), 276 | (0.020000, SimpleClassificationPipeline(configuration={ 277 | 'balancing:strategy': 'weighting', 278 | 'classifier:__choice__': 'random_forest', 279 | 'classifier:random_forest:bootstrap': 'False', 280 | 'classifier:random_forest:criterion': 'entropy', 281 | 'classifier:random_forest:max_depth': 'None', 282 | 'classifier:random_forest:max_features': 3.0988613659452917, 283 | 'classifier:random_forest:max_leaf_nodes': 'None', 284 | 'classifier:random_forest:min_samples_leaf': 3, 285 | 'classifier:random_forest:min_samples_split': 3, 286 | 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 287 | 'classifier:random_forest:n_estimators': 100, 288 | 'imputation:strategy': 'most_frequent', 289 | 'one_hot_encoding:use_minimum_fraction': 'False', 290 | 'preprocessor:__choice__': 'no_preprocessing', 291 | 'rescaling:__choice__': 'none'})), 292 | ] 293 | 294 | targets = [] 295 | predictions = [] 296 | predictions_valid = [] 297 | predictions_test = [] 298 | 299 | 300 | def fit_and_predict(estimator, weight, X, y): 301 | try: 302 | estimator.fit(X.copy(), y.copy()) 303 | pv = estimator.predict_proba(X_valid.copy()) * weight 304 | pt = estimator.predict_proba(X_test.copy()) * weight 305 | except Exception as e: 306 | print(e) 307 | print(estimator.configuration) 308 | pv = None 309 | pt = None 310 | return pv, pt 311 | 312 | 313 | # Make predictions and weight them 314 | all_predictions = Parallel(n_jobs=-1)(delayed(fit_and_predict) \ 315 | (estimator, weight, X, y) for 316 | weight, estimator in choices) 317 | for pv, pt in all_predictions: 318 | predictions_valid.append(pv) 319 | predictions_test.append(pt) 320 | 321 | # Output the predictions 322 | for name, predictions in [('valid', predictions_valid), 323 | ('test', predictions_test)]: 324 | predictions = np.array(predictions) 325 | predictions = np.sum(predictions, axis=0).astype(np.float32) 326 | predictions = predictions[:, 1].reshape((-1, 1)) 327 | 328 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 329 | np.savetxt(filepath, predictions, delimiter=' ', fmt='%.4e') 330 | -------------------------------------------------------------------------------- /004_flora.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | from joblib import Parallel, delayed 5 | import numpy as np 6 | 7 | import autosklearn 8 | import autosklearn.data 9 | import autosklearn.data.competition_data_manager 10 | from autosklearn.pipeline.regression import SimpleRegressionPipeline 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('input') 14 | parser.add_argument('output') 15 | args = parser.parse_args() 16 | 17 | input = args.input 18 | dataset = 'flora' 19 | output = args.output 20 | 21 | path = os.path.join(input, dataset) 22 | D = autosklearn.data.competition_data_manager.CompetitionDataManager(path) 23 | X = D.data['X_train'] 24 | y = D.data['Y_train'] 25 | X_valid = D.data['X_valid'] 26 | X_test = D.data['X_test'] 27 | 28 | # Replace the following array by a new ensemble 29 | choices = \ 30 | [(0.220000, SimpleRegressionPipeline(configuration={ 31 | 'imputation:strategy': 'most_frequent', 32 | 'one_hot_encoding:use_minimum_fraction': 'False', 33 | 'preprocessor:__choice__': 'no_preprocessing', 34 | 'regressor:__choice__': 'xgradient_boosting', 35 | 'regressor:xgradient_boosting:base_score': 0.5, 36 | 'regressor:xgradient_boosting:colsample_bylevel': 1, 37 | 'regressor:xgradient_boosting:colsample_bytree': 1, 38 | 'regressor:xgradient_boosting:gamma': 0, 39 | 'regressor:xgradient_boosting:learning_rate': 0.056838908807173093, 40 | 'regressor:xgradient_boosting:max_delta_step': 0, 41 | 'regressor:xgradient_boosting:max_depth': 8, 42 | 'regressor:xgradient_boosting:min_child_weight': 16, 43 | 'regressor:xgradient_boosting:n_estimators': 178, 44 | 'regressor:xgradient_boosting:reg_alpha': 0, 45 | 'regressor:xgradient_boosting:reg_lambda': 1, 46 | 'regressor:xgradient_boosting:scale_pos_weight': 1, 47 | 'regressor:xgradient_boosting:subsample': 0.70026686345272005, 48 | 'rescaling:__choice__': 'none'})), 49 | (0.160000, SimpleRegressionPipeline(configuration={ 50 | 'imputation:strategy': 'mean', 51 | 'one_hot_encoding:minimum_fraction': 0.028721299365033225, 52 | 'one_hot_encoding:use_minimum_fraction': 'True', 53 | 'preprocessor:__choice__': 'no_preprocessing', 54 | 'regressor:__choice__': 'xgradient_boosting', 55 | 'regressor:xgradient_boosting:base_score': 0.5, 56 | 'regressor:xgradient_boosting:colsample_bylevel': 1, 57 | 'regressor:xgradient_boosting:colsample_bytree': 1, 58 | 'regressor:xgradient_boosting:gamma': 0, 59 | 'regressor:xgradient_boosting:learning_rate': 0.10000000000000002, 60 | 'regressor:xgradient_boosting:max_delta_step': 0, 61 | 'regressor:xgradient_boosting:max_depth': 6, 62 | 'regressor:xgradient_boosting:min_child_weight': 13, 63 | 'regressor:xgradient_boosting:n_estimators': 100, 64 | 'regressor:xgradient_boosting:reg_alpha': 0, 65 | 'regressor:xgradient_boosting:reg_lambda': 1, 66 | 'regressor:xgradient_boosting:scale_pos_weight': 1, 67 | 'regressor:xgradient_boosting:subsample': 1.0, 68 | 'rescaling:__choice__': 'none'})), 69 | (0.120000, SimpleRegressionPipeline(configuration={ 70 | 'imputation:strategy': 'median', 71 | 'one_hot_encoding:minimum_fraction': 0.00076890296310299397, 72 | 'one_hot_encoding:use_minimum_fraction': 'True', 73 | 'preprocessor:__choice__': 'no_preprocessing', 74 | 'regressor:__choice__': 'xgradient_boosting', 75 | 'regressor:xgradient_boosting:base_score': 0.5, 76 | 'regressor:xgradient_boosting:colsample_bylevel': 1, 77 | 'regressor:xgradient_boosting:colsample_bytree': 1, 78 | 'regressor:xgradient_boosting:gamma': 0, 79 | 'regressor:xgradient_boosting:learning_rate': 0.10000000000000002, 80 | 'regressor:xgradient_boosting:max_delta_step': 0, 81 | 'regressor:xgradient_boosting:max_depth': 8, 82 | 'regressor:xgradient_boosting:min_child_weight': 1, 83 | 'regressor:xgradient_boosting:n_estimators': 100, 84 | 'regressor:xgradient_boosting:reg_alpha': 0, 85 | 'regressor:xgradient_boosting:reg_lambda': 1, 86 | 'regressor:xgradient_boosting:scale_pos_weight': 1, 87 | 'regressor:xgradient_boosting:subsample': 1.0, 88 | 'rescaling:__choice__': 'none'})), 89 | (0.080000, SimpleRegressionPipeline(configuration={ 90 | 'imputation:strategy': 'most_frequent', 91 | 'one_hot_encoding:use_minimum_fraction': 'False', 92 | 'preprocessor:__choice__': 'no_preprocessing', 93 | 'regressor:__choice__': 'xgradient_boosting', 94 | 'regressor:xgradient_boosting:base_score': 0.5, 95 | 'regressor:xgradient_boosting:colsample_bylevel': 1, 96 | 'regressor:xgradient_boosting:colsample_bytree': 1, 97 | 'regressor:xgradient_boosting:gamma': 0, 98 | 'regressor:xgradient_boosting:learning_rate': 0.10000000000000002, 99 | 'regressor:xgradient_boosting:max_delta_step': 0, 100 | 'regressor:xgradient_boosting:max_depth': 7, 101 | 'regressor:xgradient_boosting:min_child_weight': 1, 102 | 'regressor:xgradient_boosting:n_estimators': 100, 103 | 'regressor:xgradient_boosting:reg_alpha': 0, 104 | 'regressor:xgradient_boosting:reg_lambda': 1, 105 | 'regressor:xgradient_boosting:scale_pos_weight': 1, 106 | 'regressor:xgradient_boosting:subsample': 1.0, 107 | 'rescaling:__choice__': 'none'})), 108 | (0.080000, SimpleRegressionPipeline(configuration={ 109 | 'imputation:strategy': 'median', 110 | 'one_hot_encoding:minimum_fraction': 0.0023636879664826662, 111 | 'one_hot_encoding:use_minimum_fraction': 'True', 112 | 'preprocessor:__choice__': 'no_preprocessing', 113 | 'regressor:__choice__': 'liblinear_svr', 114 | 'regressor:liblinear_svr:C': 1756.3281019761341, 115 | 'regressor:liblinear_svr:dual': 'False', 116 | 'regressor:liblinear_svr:epsilon': 0.12958135960591446, 117 | 'regressor:liblinear_svr:fit_intercept': 'True', 118 | 'regressor:liblinear_svr:intercept_scaling': 1, 119 | 'regressor:liblinear_svr:loss': 'squared_epsilon_insensitive', 120 | 'regressor:liblinear_svr:tol': 6.7973376271281637e-05, 121 | 'rescaling:__choice__': 'none'})), 122 | (0.060000, SimpleRegressionPipeline(configuration={ 123 | 'imputation:strategy': 'mean', 124 | 'one_hot_encoding:minimum_fraction': 0.0078832566242014457, 125 | 'one_hot_encoding:use_minimum_fraction': 'True', 126 | 'preprocessor:__choice__': 'kernel_pca', 127 | 'preprocessor:kernel_pca:coef0': 0.830468268944067, 128 | 'preprocessor:kernel_pca:kernel': 'sigmoid', 129 | 'preprocessor:kernel_pca:n_components': 1297, 130 | 'regressor:__choice__': 'sgd', 131 | 'regressor:sgd:alpha': 7.1922597888891864e-06, 132 | 'regressor:sgd:average': 'True', 133 | 'regressor:sgd:epsilon': 0.002325854486140731, 134 | 'regressor:sgd:eta0': 0.09745049410405518, 135 | 'regressor:sgd:fit_intercept': 'True', 136 | 'regressor:sgd:learning_rate': 'invscaling', 137 | 'regressor:sgd:loss': 'squared_epsilon_insensitive', 138 | 'regressor:sgd:n_iter': 56, 139 | 'regressor:sgd:penalty': 'l1', 140 | 'regressor:sgd:power_t': 0.2820868931235419, 141 | 'rescaling:__choice__': 'standardize'})), 142 | (0.040000, SimpleRegressionPipeline(configuration={ 143 | 'imputation:strategy': 'median', 144 | 'one_hot_encoding:use_minimum_fraction': 'False', 145 | 'preprocessor:__choice__': 'no_preprocessing', 146 | 'regressor:__choice__': 'xgradient_boosting', 147 | 'regressor:xgradient_boosting:base_score': 0.5, 148 | 'regressor:xgradient_boosting:colsample_bylevel': 1, 149 | 'regressor:xgradient_boosting:colsample_bytree': 1, 150 | 'regressor:xgradient_boosting:gamma': 0, 151 | 'regressor:xgradient_boosting:learning_rate': 0.39354372832974382, 152 | 'regressor:xgradient_boosting:max_delta_step': 0, 153 | 'regressor:xgradient_boosting:max_depth': 3, 154 | 'regressor:xgradient_boosting:min_child_weight': 19, 155 | 'regressor:xgradient_boosting:n_estimators': 73, 156 | 'regressor:xgradient_boosting:reg_alpha': 0, 157 | 'regressor:xgradient_boosting:reg_lambda': 1, 158 | 'regressor:xgradient_boosting:scale_pos_weight': 1, 159 | 'regressor:xgradient_boosting:subsample': 0.51160818820515941, 160 | 'rescaling:__choice__': 'standardize'})), 161 | (0.040000, SimpleRegressionPipeline(configuration={ 162 | 'imputation:strategy': 'most_frequent', 163 | 'one_hot_encoding:minimum_fraction': 0.0001292396238727452, 164 | 'one_hot_encoding:use_minimum_fraction': 'True', 165 | 'preprocessor:__choice__': 'no_preprocessing', 166 | 'regressor:__choice__': 'xgradient_boosting', 167 | 'regressor:xgradient_boosting:base_score': 0.5, 168 | 'regressor:xgradient_boosting:colsample_bylevel': 1, 169 | 'regressor:xgradient_boosting:colsample_bytree': 1, 170 | 'regressor:xgradient_boosting:gamma': 0, 171 | 'regressor:xgradient_boosting:learning_rate': 0.10000000000000002, 172 | 'regressor:xgradient_boosting:max_delta_step': 0, 173 | 'regressor:xgradient_boosting:max_depth': 5, 174 | 'regressor:xgradient_boosting:min_child_weight': 1, 175 | 'regressor:xgradient_boosting:n_estimators': 100, 176 | 'regressor:xgradient_boosting:reg_alpha': 0, 177 | 'regressor:xgradient_boosting:reg_lambda': 1, 178 | 'regressor:xgradient_boosting:scale_pos_weight': 1, 179 | 'regressor:xgradient_boosting:subsample': 1.0, 180 | 'rescaling:__choice__': 'none'})), 181 | (0.040000, SimpleRegressionPipeline(configuration={ 182 | 'imputation:strategy': 'median', 183 | 'one_hot_encoding:minimum_fraction': 0.0010042712846593592, 184 | 'one_hot_encoding:use_minimum_fraction': 'True', 185 | 'preprocessor:__choice__': 'extra_trees_preproc_for_regression', 186 | 'preprocessor:extra_trees_preproc_for_regression:bootstrap': 'False', 187 | 'preprocessor:extra_trees_preproc_for_regression:criterion': 'mse', 188 | 'preprocessor:extra_trees_preproc_for_regression:max_depth': 'None', 189 | 'preprocessor:extra_trees_preproc_for_regression:max_features': 4.4366238138449141, 190 | 'preprocessor:extra_trees_preproc_for_regression:min_samples_leaf': 5, 191 | 'preprocessor:extra_trees_preproc_for_regression:min_samples_split': 2, 192 | 'preprocessor:extra_trees_preproc_for_regression:min_weight_fraction_leaf': 0.0, 193 | 'preprocessor:extra_trees_preproc_for_regression:n_estimators': 100, 194 | 'regressor:__choice__': 'xgradient_boosting', 195 | 'regressor:xgradient_boosting:base_score': 0.5, 196 | 'regressor:xgradient_boosting:colsample_bylevel': 1, 197 | 'regressor:xgradient_boosting:colsample_bytree': 1, 198 | 'regressor:xgradient_boosting:gamma': 0, 199 | 'regressor:xgradient_boosting:learning_rate': 0.24786184996967336, 200 | 'regressor:xgradient_boosting:max_delta_step': 0, 201 | 'regressor:xgradient_boosting:max_depth': 4, 202 | 'regressor:xgradient_boosting:min_child_weight': 12, 203 | 'regressor:xgradient_boosting:n_estimators': 487, 204 | 'regressor:xgradient_boosting:reg_alpha': 0, 205 | 'regressor:xgradient_boosting:reg_lambda': 1, 206 | 'regressor:xgradient_boosting:scale_pos_weight': 1, 207 | 'regressor:xgradient_boosting:subsample': 0.51768561001523961, 208 | 'rescaling:__choice__': 'standardize'})), 209 | (0.040000, SimpleRegressionPipeline(configuration={ 210 | 'imputation:strategy': 'most_frequent', 211 | 'one_hot_encoding:use_minimum_fraction': 'False', 212 | 'preprocessor:__choice__': 'no_preprocessing', 213 | 'regressor:__choice__': 'xgradient_boosting', 214 | 'regressor:xgradient_boosting:base_score': 0.5, 215 | 'regressor:xgradient_boosting:colsample_bylevel': 1, 216 | 'regressor:xgradient_boosting:colsample_bytree': 1, 217 | 'regressor:xgradient_boosting:gamma': 0, 218 | 'regressor:xgradient_boosting:learning_rate': 0.056838908807173093, 219 | 'regressor:xgradient_boosting:max_delta_step': 0, 220 | 'regressor:xgradient_boosting:max_depth': 6, 221 | 'regressor:xgradient_boosting:min_child_weight': 20, 222 | 'regressor:xgradient_boosting:n_estimators': 178, 223 | 'regressor:xgradient_boosting:reg_alpha': 0, 224 | 'regressor:xgradient_boosting:reg_lambda': 1, 225 | 'regressor:xgradient_boosting:scale_pos_weight': 1, 226 | 'regressor:xgradient_boosting:subsample': 0.81655152788480145, 227 | 'rescaling:__choice__': 'none'})), 228 | (0.020000, SimpleRegressionPipeline(configuration={ 229 | 'imputation:strategy': 'median', 230 | 'one_hot_encoding:use_minimum_fraction': 'False', 231 | 'preprocessor:__choice__': 'truncatedSVD', 232 | 'preprocessor:truncatedSVD:target_dim': 222, 233 | 'regressor:__choice__': 'xgradient_boosting', 234 | 'regressor:xgradient_boosting:base_score': 0.5, 235 | 'regressor:xgradient_boosting:colsample_bylevel': 1, 236 | 'regressor:xgradient_boosting:colsample_bytree': 1, 237 | 'regressor:xgradient_boosting:gamma': 0, 238 | 'regressor:xgradient_boosting:learning_rate': 0.10000000000000002, 239 | 'regressor:xgradient_boosting:max_delta_step': 0, 240 | 'regressor:xgradient_boosting:max_depth': 3, 241 | 'regressor:xgradient_boosting:min_child_weight': 1, 242 | 'regressor:xgradient_boosting:n_estimators': 100, 243 | 'regressor:xgradient_boosting:reg_alpha': 0, 244 | 'regressor:xgradient_boosting:reg_lambda': 1, 245 | 'regressor:xgradient_boosting:scale_pos_weight': 1, 246 | 'regressor:xgradient_boosting:subsample': 1.0, 247 | 'rescaling:__choice__': 'none'})), 248 | (0.020000, SimpleRegressionPipeline(configuration={ 249 | 'imputation:strategy': 'most_frequent', 250 | 'one_hot_encoding:use_minimum_fraction': 'False', 251 | 'preprocessor:__choice__': 'truncatedSVD', 252 | 'preprocessor:truncatedSVD:target_dim': 156, 253 | 'regressor:__choice__': 'decision_tree', 254 | 'regressor:decision_tree:criterion': 'mse', 255 | 'regressor:decision_tree:max_depth': 1.4573346058635357, 256 | 'regressor:decision_tree:max_features': 1.0, 257 | 'regressor:decision_tree:max_leaf_nodes': 'None', 258 | 'regressor:decision_tree:min_samples_leaf': 17, 259 | 'regressor:decision_tree:min_samples_split': 8, 260 | 'regressor:decision_tree:min_weight_fraction_leaf': 0.0, 261 | 'regressor:decision_tree:splitter': 'best', 262 | 'rescaling:__choice__': 'normalize'})), 263 | (0.020000, SimpleRegressionPipeline(configuration={ 264 | 'imputation:strategy': 'mean', 265 | 'one_hot_encoding:use_minimum_fraction': 'False', 266 | 'preprocessor:__choice__': 'no_preprocessing', 267 | 'regressor:__choice__': 'xgradient_boosting', 268 | 'regressor:xgradient_boosting:base_score': 0.5, 269 | 'regressor:xgradient_boosting:colsample_bylevel': 1, 270 | 'regressor:xgradient_boosting:colsample_bytree': 1, 271 | 'regressor:xgradient_boosting:gamma': 0, 272 | 'regressor:xgradient_boosting:learning_rate': 0.10000000000000002, 273 | 'regressor:xgradient_boosting:max_delta_step': 0, 274 | 'regressor:xgradient_boosting:max_depth': 5, 275 | 'regressor:xgradient_boosting:min_child_weight': 13, 276 | 'regressor:xgradient_boosting:n_estimators': 100, 277 | 'regressor:xgradient_boosting:reg_alpha': 0, 278 | 'regressor:xgradient_boosting:reg_lambda': 1, 279 | 'regressor:xgradient_boosting:scale_pos_weight': 1, 280 | 'regressor:xgradient_boosting:subsample': 1.0, 281 | 'rescaling:__choice__': 'none'})), 282 | (0.020000, SimpleRegressionPipeline(configuration={ 283 | 'imputation:strategy': 'median', 284 | 'one_hot_encoding:minimum_fraction': 0.0030893906804030156, 285 | 'one_hot_encoding:use_minimum_fraction': 'True', 286 | 'preprocessor:__choice__': 'truncatedSVD', 287 | 'preprocessor:truncatedSVD:target_dim': 67, 288 | 'regressor:__choice__': 'k_nearest_neighbors', 289 | 'regressor:k_nearest_neighbors:n_neighbors': 29, 290 | 'regressor:k_nearest_neighbors:p': 2, 291 | 'regressor:k_nearest_neighbors:weights': 'distance', 292 | 'rescaling:__choice__': 'normalize'})), 293 | (0.020000, SimpleRegressionPipeline(configuration={ 294 | 'imputation:strategy': 'most_frequent', 295 | 'one_hot_encoding:minimum_fraction': 0.0027171559129851464, 296 | 'one_hot_encoding:use_minimum_fraction': 'True', 297 | 'preprocessor:__choice__': 'truncatedSVD', 298 | 'preprocessor:truncatedSVD:target_dim': 35, 299 | 'regressor:__choice__': 'liblinear_svr', 300 | 'regressor:liblinear_svr:C': 0.0485964760119761, 301 | 'regressor:liblinear_svr:dual': 'False', 302 | 'regressor:liblinear_svr:epsilon': 0.01333919934708307, 303 | 'regressor:liblinear_svr:fit_intercept': 'True', 304 | 'regressor:liblinear_svr:intercept_scaling': 1, 305 | 'regressor:liblinear_svr:loss': 'squared_epsilon_insensitive', 306 | 'regressor:liblinear_svr:tol': 0.030573671793931671, 307 | 'rescaling:__choice__': 'min/max'})), 308 | (0.020000, SimpleRegressionPipeline(configuration={ 309 | 'imputation:strategy': 'mean', 310 | 'one_hot_encoding:use_minimum_fraction': 'False', 311 | 'preprocessor:__choice__': 'no_preprocessing', 312 | 'regressor:__choice__': 'decision_tree', 313 | 'regressor:decision_tree:criterion': 'mse', 314 | 'regressor:decision_tree:max_depth': 0.031442410091469419, 315 | 'regressor:decision_tree:max_features': 1.0, 316 | 'regressor:decision_tree:max_leaf_nodes': 'None', 317 | 'regressor:decision_tree:min_samples_leaf': 15, 318 | 'regressor:decision_tree:min_samples_split': 10, 319 | 'regressor:decision_tree:min_weight_fraction_leaf': 0.0, 320 | 'regressor:decision_tree:splitter': 'best', 321 | 'rescaling:__choice__': 'normalize'})), 322 | ] 323 | 324 | targets = [] 325 | predictions = [] 326 | predictions_valid = [] 327 | predictions_test = [] 328 | 329 | 330 | def fit_and_predict(estimator, weight, X, y): 331 | try: 332 | estimator.fit(X.copy(), y.copy()) 333 | pv = estimator.predict(X_valid.copy()) * weight 334 | pt = estimator.predict(X_test.copy()) * weight 335 | except Exception as e: 336 | print(e) 337 | print(estimator.configuration) 338 | pv = None 339 | pt = None 340 | return pv, pt 341 | 342 | 343 | # Make predictions and weight them 344 | all_predictions = Parallel(n_jobs=-1)(delayed(fit_and_predict) \ 345 | (estimator, weight, X, y) for 346 | weight, estimator in choices) 347 | for pv, pt in all_predictions: 348 | predictions_valid.append(pv) 349 | predictions_test.append(pt) 350 | 351 | # Output the predictions 352 | for name, predictions in [('valid', predictions_valid), 353 | ('test', predictions_test)]: 354 | predictions = np.array(predictions) 355 | predictions = np.sum(predictions, axis=0).astype(np.float32) 356 | 357 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 358 | np.savetxt(filepath, predictions, delimiter=' ', fmt='%.4e') 359 | -------------------------------------------------------------------------------- /004_tania.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | from joblib import Parallel, delayed 5 | import numpy as np 6 | 7 | import autosklearn 8 | import autosklearn.data 9 | import autosklearn.data.competition_data_manager 10 | from autosklearn.pipeline.classification import SimpleClassificationPipeline 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('input') 14 | parser.add_argument('output') 15 | args = parser.parse_args() 16 | 17 | input = args.input 18 | dataset = 'tania' 19 | output = args.output 20 | 21 | path = os.path.join(input, dataset) 22 | D = autosklearn.data.competition_data_manager.CompetitionDataManager(path) 23 | X = D.data['X_train'] 24 | y = D.data['Y_train'] 25 | X_valid = D.data['X_valid'] 26 | X_test = D.data['X_test'] 27 | 28 | # Use this version of lasagne commit of the lasagne master branch: 29 | # 24c9ed2ffc25504c3b0df4598afb1e63fdd59eee 30 | # https://github.com/Lasagne/Lasagne/commit/24c9ed2ffc25504c3b0df4598afb1e63fdd59eee 31 | # Copy the file DeepFeedNet into autosklearn.pipeline.components.classification 32 | # Copy the file FeedForwardNet into autosklearn.pipeline.implementations 33 | 34 | choices = \ 35 | [(0.220000, SimpleClassificationPipeline(configuration={ 36 | 'balancing:strategy': 'none', 37 | 'classifier:DeepFeedNet:activation': 'relu', 38 | 'classifier:DeepFeedNet:batch_size': 1526, 39 | 'classifier:DeepFeedNet:dropout_layer_1': 0.07375877191623954, 40 | 'classifier:DeepFeedNet:dropout_layer_2': 0.25061726159515596, 41 | 'classifier:DeepFeedNet:dropout_output': 0.44276742232825533, 42 | 'classifier:DeepFeedNet:lambda2': 0.00559189810319557, 43 | 'classifier:DeepFeedNet:learning_rate': 0.01, 44 | 'classifier:DeepFeedNet:num_layers': 'd', 45 | 'classifier:DeepFeedNet:num_units_layer_1': 3512, 46 | 'classifier:DeepFeedNet:num_units_layer_2': 2456, 47 | 'classifier:DeepFeedNet:number_updates': 1019, 48 | 'classifier:DeepFeedNet:solver': 'smorm3s', 49 | 'classifier:DeepFeedNet:std_layer_1': 0.0031572295374762784, 50 | 'classifier:DeepFeedNet:std_layer_2': 0.024102151721155526, 51 | 'classifier:__choice__': 'DeepFeedNet', 52 | 'imputation:strategy': 'median', 53 | 'one_hot_encoding:use_minimum_fraction': 'False', 54 | 'preprocessor:truncatedSVD:target_dim': 169, 55 | 'preprocessor:__choice__': 'truncatedSVD', 56 | 'rescaling:__choice__': 'normalize'})), 57 | (0.180000, SimpleClassificationPipeline(configuration={ 58 | 'balancing:strategy': 'weighting', 59 | 'classifier:__choice__': 'sgd', 60 | 'classifier:sgd:alpha': 1e-06, 61 | 'classifier:sgd:average': 'False', 62 | 'classifier:sgd:eta0': 1e-07, 63 | 'classifier:sgd:fit_intercept': 'True', 64 | 'classifier:sgd:learning_rate': 'optimal', 65 | 'classifier:sgd:loss': 'log', 66 | 'classifier:sgd:n_iter': 5, 67 | 'classifier:sgd:penalty': 'l2', 68 | 'imputation:strategy': 'mean', 69 | 'one_hot_encoding:use_minimum_fraction': 'False', 70 | 'preprocessor:__choice__': 'no_preprocessing', 71 | 'rescaling:__choice__': 'normalize'})), 72 | (0.140000, SimpleClassificationPipeline(configuration={ 73 | 'balancing:strategy': 'none', 74 | 'classifier:DeepFeedNet:activation': 'relu', 75 | 'classifier:DeepFeedNet:batch_size': 1526, 76 | 'classifier:DeepFeedNet:dropout_layer_1': 0.07375877191623954, 77 | 'classifier:DeepFeedNet:dropout_layer_2': 0.25061726159515596, 78 | 'classifier:DeepFeedNet:dropout_output': 0.5318548466903714, 79 | 'classifier:DeepFeedNet:lambda2': 0.00559189810319557, 80 | 'classifier:DeepFeedNet:learning_rate': 0.01, 81 | 'classifier:DeepFeedNet:num_layers': 'd', 82 | 'classifier:DeepFeedNet:num_units_layer_1': 3512, 83 | 'classifier:DeepFeedNet:num_units_layer_2': 2456, 84 | 'classifier:DeepFeedNet:number_updates': 942, 85 | 'classifier:DeepFeedNet:solver': 'smorm3s', 86 | 'classifier:DeepFeedNet:std_layer_1': 0.0031572295374762784, 87 | 'classifier:DeepFeedNet:std_layer_2': 0.024102151721155526, 88 | 'classifier:__choice__': 'DeepFeedNet', 89 | 'imputation:strategy': 'median', 90 | 'one_hot_encoding:use_minimum_fraction': 'False', 91 | 'preprocessor:truncatedSVD:target_dim': 169, 92 | 'preprocessor:__choice__': 'truncatedSVD', 93 | 'rescaling:__choice__': 'normalize'})), 94 | (0.100000, SimpleClassificationPipeline(configuration={ 95 | 'balancing:strategy': 'weighting', 96 | 'classifier:DeepFeedNet:activation': 'relu', 97 | 'classifier:DeepFeedNet:batch_size': 1526, 98 | 'classifier:DeepFeedNet:dropout_layer_1': 0.07375877191623954, 99 | 'classifier:DeepFeedNet:dropout_layer_2': 0.25061726159515596, 100 | 'classifier:DeepFeedNet:dropout_output': 0.5318548466903714, 101 | 'classifier:DeepFeedNet:lambda2': 0.00559189810319557, 102 | 'classifier:DeepFeedNet:learning_rate': 0.01, 103 | 'classifier:DeepFeedNet:num_layers': 'd', 104 | 'classifier:DeepFeedNet:num_units_layer_1': 2825, 105 | 'classifier:DeepFeedNet:num_units_layer_2': 2456, 106 | 'classifier:DeepFeedNet:number_updates': 942, 107 | 'classifier:DeepFeedNet:solver': 'smorm3s', 108 | 'classifier:DeepFeedNet:std_layer_1': 0.0031572295374762784, 109 | 'classifier:DeepFeedNet:std_layer_2': 0.024102151721155526, 110 | 'classifier:__choice__': 'DeepFeedNet', 111 | 'imputation:strategy': 'median', 112 | 'one_hot_encoding:use_minimum_fraction': 'False', 113 | 'preprocessor:truncatedSVD:target_dim': 169, 114 | 'preprocessor:__choice__': 'truncatedSVD', 115 | 'rescaling:__choice__': 'normalize'})), 116 | (0.080000, SimpleClassificationPipeline(configuration={ 117 | 'balancing:strategy': 'none', 118 | 'classifier:DeepFeedNet:activation': 'relu', 119 | 'classifier:DeepFeedNet:batch_size': 1526, 120 | 'classifier:DeepFeedNet:dropout_layer_1': 0.07375877191623954, 121 | 'classifier:DeepFeedNet:dropout_layer_2': 0.25061726159515596, 122 | 'classifier:DeepFeedNet:dropout_output': 0.6315030660705527, 123 | 'classifier:DeepFeedNet:lambda2': 0.00559189810319557, 124 | 'classifier:DeepFeedNet:learning_rate': 0.01, 125 | 'classifier:DeepFeedNet:num_layers': 'd', 126 | 'classifier:DeepFeedNet:num_units_layer_1': 3512, 127 | 'classifier:DeepFeedNet:num_units_layer_2': 2456, 128 | 'classifier:DeepFeedNet:number_updates': 942, 129 | 'classifier:DeepFeedNet:solver': 'smorm3s', 130 | 'classifier:DeepFeedNet:std_layer_1': 0.0031572295374762784, 131 | 'classifier:DeepFeedNet:std_layer_2': 0.024102151721155526, 132 | 'classifier:__choice__': 'DeepFeedNet', 133 | 'imputation:strategy': 'median', 134 | 'one_hot_encoding:use_minimum_fraction': 'False', 135 | 'preprocessor:truncatedSVD:target_dim': 169, 136 | 'preprocessor:__choice__': 'truncatedSVD', 137 | 'rescaling:__choice__': 'normalize'})), 138 | (0.080000, SimpleClassificationPipeline(configuration={ 139 | 'balancing:strategy': 'none', 140 | 'classifier:DeepFeedNet:activation': 'relu', 141 | 'classifier:DeepFeedNet:batch_size': 2124, 142 | 'classifier:DeepFeedNet:dropout_layer_1': 0.01360549061849139, 143 | 'classifier:DeepFeedNet:dropout_output': 0.2644391773986185, 144 | 'classifier:DeepFeedNet:lambda2': 0.004871660362477711, 145 | 'classifier:DeepFeedNet:learning_rate': 0.01, 146 | 'classifier:DeepFeedNet:num_layers': 'c', 147 | 'classifier:DeepFeedNet:num_units_layer_1': 2812, 148 | 'classifier:DeepFeedNet:number_updates': 2710, 149 | 'classifier:DeepFeedNet:solver': 'smorm3s', 150 | 'classifier:DeepFeedNet:std_layer_1': 0.09316319189582598, 151 | 'classifier:__choice__': 'DeepFeedNet', 152 | 'imputation:strategy': 'median', 153 | 'one_hot_encoding:use_minimum_fraction': 'False', 154 | 'preprocessor:truncatedSVD:target_dim': 186, 155 | 'preprocessor:__choice__': 'truncatedSVD', 156 | 'rescaling:__choice__': 'normalize'})), 157 | (0.040000, SimpleClassificationPipeline(configuration={ 158 | 'balancing:strategy': 'weighting', 159 | 'classifier:DeepFeedNet:activation': 'relu', 160 | 'classifier:DeepFeedNet:batch_size': 1867, 161 | 'classifier:DeepFeedNet:dropout_layer_1': 0.01908790794742743, 162 | 'classifier:DeepFeedNet:dropout_output': 0.3448188758299382, 163 | 'classifier:DeepFeedNet:lambda2': 0.0007755741149255707, 164 | 'classifier:DeepFeedNet:learning_rate': 0.01, 165 | 'classifier:DeepFeedNet:num_layers': 'c', 166 | 'classifier:DeepFeedNet:num_units_layer_1': 3665, 167 | 'classifier:DeepFeedNet:number_updates': 2512, 168 | 'classifier:DeepFeedNet:solver': 'smorm3s', 169 | 'classifier:DeepFeedNet:std_layer_1': 0.0024468150980905207, 170 | 'classifier:__choice__': 'DeepFeedNet', 171 | 'imputation:strategy': 'most_frequent', 172 | 'one_hot_encoding:minimum_fraction': 0.05266063283992454, 173 | 'one_hot_encoding:use_minimum_fraction': 'True', 174 | 'preprocessor:truncatedSVD:target_dim': 166, 175 | 'preprocessor:__choice__': 'truncatedSVD', 176 | 'rescaling:__choice__': 'normalize'})), 177 | (0.040000, SimpleClassificationPipeline(configuration={ 178 | 'balancing:strategy': 'weighting', 179 | 'classifier:DeepFeedNet:activation': 'relu', 180 | 'classifier:DeepFeedNet:batch_size': 2281, 181 | 'classifier:DeepFeedNet:dropout_layer_1': 0.09094796094063819, 182 | 'classifier:DeepFeedNet:dropout_output': 0.4958339054016198, 183 | 'classifier:DeepFeedNet:lambda2': 1.805699319151882e-05, 184 | 'classifier:DeepFeedNet:learning_rate': 0.001, 185 | 'classifier:DeepFeedNet:num_layers': 'c', 186 | 'classifier:DeepFeedNet:num_units_layer_1': 2651, 187 | 'classifier:DeepFeedNet:number_updates': 3403, 188 | 'classifier:DeepFeedNet:solver': 'smorm3s', 189 | 'classifier:DeepFeedNet:std_layer_1': 0.007630682901621406, 190 | 'classifier:__choice__': 'DeepFeedNet', 191 | 'imputation:strategy': 'mean', 192 | 'one_hot_encoding:use_minimum_fraction': 'False', 193 | 'preprocessor:truncatedSVD:target_dim': 197, 194 | 'preprocessor:__choice__': 'truncatedSVD', 195 | 'rescaling:__choice__': 'none'})), 196 | (0.040000, SimpleClassificationPipeline(configuration={ 197 | 'balancing:strategy': 'none', 198 | 'classifier:DeepFeedNet:activation': 'relu', 199 | 'classifier:DeepFeedNet:batch_size': 2086, 200 | 'classifier:DeepFeedNet:dropout_layer_1': 0.1030823826758656, 201 | 'classifier:DeepFeedNet:dropout_output': 0.22142344211272239, 202 | 'classifier:DeepFeedNet:lambda2': 3.4109499881542005e-06, 203 | 'classifier:DeepFeedNet:learning_rate': 0.01, 204 | 'classifier:DeepFeedNet:num_layers': 'c', 205 | 'classifier:DeepFeedNet:num_units_layer_1': 3317, 206 | 'classifier:DeepFeedNet:number_updates': 711, 207 | 'classifier:DeepFeedNet:solver': 'smorm3s', 208 | 'classifier:DeepFeedNet:std_layer_1': 0.0012484056182083289, 209 | 'classifier:__choice__': 'DeepFeedNet', 210 | 'imputation:strategy': 'most_frequent', 211 | 'one_hot_encoding:minimum_fraction': 0.030925614928477674, 212 | 'one_hot_encoding:use_minimum_fraction': 'True', 213 | 'preprocessor:truncatedSVD:target_dim': 159, 214 | 'preprocessor:__choice__': 'truncatedSVD', 215 | 'rescaling:__choice__': 'min/max'})), 216 | (0.020000, SimpleClassificationPipeline(configuration={ 217 | 'balancing:strategy': 'weighting', 218 | 'classifier:DeepFeedNet:activation': 'relu', 219 | 'classifier:DeepFeedNet:batch_size': 1336, 220 | 'classifier:DeepFeedNet:dropout_layer_1': 0.0331786272132608, 221 | 'classifier:DeepFeedNet:dropout_output': 0.3783990976694647, 222 | 'classifier:DeepFeedNet:lambda2': 0.006318427713029419, 223 | 'classifier:DeepFeedNet:learning_rate': 0.01, 224 | 'classifier:DeepFeedNet:num_layers': 'c', 225 | 'classifier:DeepFeedNet:num_units_layer_1': 2491, 226 | 'classifier:DeepFeedNet:number_updates': 3437, 227 | 'classifier:DeepFeedNet:solver': 'smorm3s', 228 | 'classifier:DeepFeedNet:std_layer_1': 0.09522419264016894, 229 | 'classifier:__choice__': 'DeepFeedNet', 230 | 'imputation:strategy': 'median', 231 | 'one_hot_encoding:minimum_fraction': 0.03562984523180951, 232 | 'one_hot_encoding:use_minimum_fraction': 'True', 233 | 'preprocessor:truncatedSVD:target_dim': 189, 234 | 'preprocessor:__choice__': 'truncatedSVD', 235 | 'rescaling:__choice__': 'normalize'})), 236 | (0.020000, SimpleClassificationPipeline(configuration={ 237 | 'balancing:strategy': 'none', 238 | 'classifier:DeepFeedNet:activation': 'relu', 239 | 'classifier:DeepFeedNet:batch_size': 1967, 240 | 'classifier:DeepFeedNet:dropout_layer_1': 0.06971989322917795, 241 | 'classifier:DeepFeedNet:dropout_output': 0.14345632673233852, 242 | 'classifier:DeepFeedNet:lambda2': 0.0008778987660283575, 243 | 'classifier:DeepFeedNet:learning_rate': 0.01, 244 | 'classifier:DeepFeedNet:num_layers': 'c', 245 | 'classifier:DeepFeedNet:num_units_layer_1': 3587, 246 | 'classifier:DeepFeedNet:number_updates': 3182, 247 | 'classifier:DeepFeedNet:solver': 'smorm3s', 248 | 'classifier:DeepFeedNet:std_layer_1': 0.0015311970092555642, 249 | 'classifier:__choice__': 'DeepFeedNet', 250 | 'imputation:strategy': 'median', 251 | 'one_hot_encoding:use_minimum_fraction': 'False', 252 | 'preprocessor:truncatedSVD:target_dim': 135, 253 | 'preprocessor:__choice__': 'truncatedSVD', 254 | 'rescaling:__choice__': 'normalize'})), 255 | (0.020000, SimpleClassificationPipeline(configuration={ 256 | 'balancing:strategy': 'weighting', 257 | 'classifier:DeepFeedNet:activation': 'relu', 258 | 'classifier:DeepFeedNet:batch_size': 1882, 259 | 'classifier:DeepFeedNet:dropout_layer_1': 0.007184660164183019, 260 | 'classifier:DeepFeedNet:dropout_output': 0.35789769788034004, 261 | 'classifier:DeepFeedNet:lambda2': 0.008162829194808478, 262 | 'classifier:DeepFeedNet:learning_rate': 0.01, 263 | 'classifier:DeepFeedNet:num_layers': 'c', 264 | 'classifier:DeepFeedNet:num_units_layer_1': 3376, 265 | 'classifier:DeepFeedNet:number_updates': 2868, 266 | 'classifier:DeepFeedNet:solver': 'smorm3s', 267 | 'classifier:DeepFeedNet:std_layer_1': 0.0010604662105437909, 268 | 'classifier:__choice__': 'DeepFeedNet', 269 | 'imputation:strategy': 'median', 270 | 'one_hot_encoding:use_minimum_fraction': 'False', 271 | 'preprocessor:truncatedSVD:target_dim': 199, 272 | 'preprocessor:__choice__': 'truncatedSVD', 273 | 'rescaling:__choice__': 'normalize'})), 274 | (0.020000, SimpleClassificationPipeline(configuration={ 275 | 'balancing:strategy': 'none', 276 | 'classifier:DeepFeedNet:activation': 'relu', 277 | 'classifier:DeepFeedNet:batch_size': 2086, 278 | 'classifier:DeepFeedNet:dropout_layer_1': 0.15565773821145037, 279 | 'classifier:DeepFeedNet:dropout_output': 0.22142344211272239, 280 | 'classifier:DeepFeedNet:lambda2': 1.7925329564209397e-06, 281 | 'classifier:DeepFeedNet:learning_rate': 0.01, 282 | 'classifier:DeepFeedNet:num_layers': 'c', 283 | 'classifier:DeepFeedNet:num_units_layer_1': 3317, 284 | 'classifier:DeepFeedNet:number_updates': 711, 285 | 'classifier:DeepFeedNet:solver': 'smorm3s', 286 | 'classifier:DeepFeedNet:std_layer_1': 0.0012484056182083289, 287 | 'classifier:__choice__': 'DeepFeedNet', 288 | 'imputation:strategy': 'most_frequent', 289 | 'one_hot_encoding:minimum_fraction': 0.030925614928477674, 290 | 'one_hot_encoding:use_minimum_fraction': 'True', 291 | 'preprocessor:truncatedSVD:target_dim': 159, 292 | 'preprocessor:__choice__': 'truncatedSVD', 293 | 'rescaling:__choice__': 'min/max'})), 294 | ] 295 | 296 | targets = [] 297 | predictions = [] 298 | predictions_valid = [] 299 | predictions_test = [] 300 | 301 | 302 | def fit_and_predict(estimator, weight, X, y): 303 | try: 304 | estimator.fit(X.copy(), y.copy()) 305 | pv = estimator.predict_proba(X_valid.copy()) * weight 306 | pt = estimator.predict_proba(X_test.copy()) * weight 307 | except Exception as e: 308 | print(e) 309 | print(estimator.configuration) 310 | pv = None 311 | pt = None 312 | return pv, pt 313 | 314 | 315 | # Make predictions and weight them 316 | all_predictions = Parallel(n_jobs=-1)(delayed(fit_and_predict) \ 317 | (estimator, weight, X, y) for 318 | weight, estimator in choices) 319 | for pv, pt in all_predictions: 320 | predictions_valid.append(pv) 321 | predictions_test.append(pt) 322 | 323 | # Output the predictions 324 | for name, predictions in [('valid', predictions_valid), 325 | ('test', predictions_test)]: 326 | predictions = np.array(predictions) 327 | predictions = np.sum(predictions, axis=0).astype(np.float32) 328 | 329 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 330 | np.savetxt(filepath, predictions, delimiter=' ', fmt='%.4e') 331 | -------------------------------------------------------------------------------- /004_yolanda.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | from joblib import Parallel, delayed 5 | import numpy as np 6 | 7 | import autosklearn 8 | import autosklearn.data 9 | import autosklearn.data.competition_data_manager 10 | from autosklearn.pipeline.regression import SimpleRegressionPipeline 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('input') 14 | parser.add_argument('output') 15 | args = parser.parse_args() 16 | 17 | input = args.input 18 | dataset = 'yolanda' 19 | output = args.output 20 | 21 | path = os.path.join(input, dataset) 22 | D = autosklearn.data.competition_data_manager.CompetitionDataManager(path) 23 | X = D.data['X_train'] 24 | y = D.data['Y_train'] 25 | X_valid = D.data['X_valid'] 26 | X_test = D.data['X_test'] 27 | 28 | # Use this version of lasagne commit of the lasagne master branch: 29 | # 24c9ed2ffc25504c3b0df4598afb1e63fdd59eee 30 | # https://github.com/Lasagne/Lasagne/commit/24c9ed2ffc25504c3b0df4598afb1e63fdd59eee 31 | # Copy the file RegDeepNet into autosklearn.pipeline.components.regression 32 | # Copy the file FeedForwardNet into autosklearn.pipeline.implementations 33 | 34 | choices = \ 35 | [(0.360000, SimpleRegressionPipeline(configuration={ 36 | 'imputation:strategy': 'mean', 37 | 'one_hot_encoding:minimum_fraction': 0.049682918006307676, 38 | 'one_hot_encoding:use_minimum_fraction': 'True', 39 | 'preprocessor:__choice__': 'no_preprocessing', 40 | 'regressor:RegDeepNet:activation': 'tanh', 41 | 'regressor:RegDeepNet:batch_size': 1865, 42 | 'regressor:RegDeepNet:dropout_layer_1': 0.017462492577406473, 43 | 'regressor:RegDeepNet:dropout_layer_2': 0.048354205627225436, 44 | 'regressor:RegDeepNet:dropout_output': 0.00962149073006804, 45 | 'regressor:RegDeepNet:lambda2': 1.0282444549550921e-05, 46 | 'regressor:RegDeepNet:learning_rate': 0.001, 47 | 'regressor:RegDeepNet:num_layers': 'd', 48 | 'regressor:RegDeepNet:num_units_layer_1': 2615, 49 | 'regressor:RegDeepNet:num_units_layer_2': 252, 50 | 'regressor:RegDeepNet:number_updates': 3225, 51 | 'regressor:RegDeepNet:solver': 'smorm3s', 52 | 'regressor:RegDeepNet:std_layer_1': 0.006861129306844183, 53 | 'regressor:RegDeepNet:std_layer_2': 0.002395977520245193, 54 | 'regressor:__choice__': 'RegDeepNet', 55 | 'rescaling:__choice__': 'standardize'})), 56 | (0.320000, SimpleRegressionPipeline(configuration={ 57 | 'imputation:strategy': 'mean', 58 | 'one_hot_encoding:minimum_fraction': 0.05112532429613385, 59 | 'one_hot_encoding:use_minimum_fraction': 'True', 60 | 'preprocessor:__choice__': 'no_preprocessing', 61 | 'regressor:RegDeepNet:activation': 'sigmoid', 62 | 'regressor:RegDeepNet:batch_size': 1840, 63 | 'regressor:RegDeepNet:dropout_layer_1': 0.15186663743978646, 64 | 'regressor:RegDeepNet:dropout_layer_2': 0.11387781420379316, 65 | 'regressor:RegDeepNet:dropout_layer_3': 0.19220971946536616, 66 | 'regressor:RegDeepNet:dropout_output': 0.5509953660515314, 67 | 'regressor:RegDeepNet:lambda2': 2.3655442216865217e-06, 68 | 'regressor:RegDeepNet:learning_rate': 0.1, 69 | 'regressor:RegDeepNet:num_layers': 'e', 70 | 'regressor:RegDeepNet:num_units_layer_1': 173, 71 | 'regressor:RegDeepNet:num_units_layer_2': 690, 72 | 'regressor:RegDeepNet:num_units_layer_3': 2761, 73 | 'regressor:RegDeepNet:number_updates': 4173, 74 | 'regressor:RegDeepNet:solver': 'smorm3s', 75 | 'regressor:RegDeepNet:std_layer_1': 0.006483588902887654, 76 | 'regressor:RegDeepNet:std_layer_2': 0.006696161430555593, 77 | 'regressor:RegDeepNet:std_layer_3': 0.0030798462419321746, 78 | 'regressor:__choice__': 'RegDeepNet', 79 | 'rescaling:__choice__': 'standardize'})), 80 | (0.160000, SimpleRegressionPipeline(configuration={ 81 | 'imputation:strategy': 'mean', 82 | 'one_hot_encoding:minimum_fraction': 0.00044746581915706805, 83 | 'one_hot_encoding:use_minimum_fraction': 'True', 84 | 'preprocessor:__choice__': 'no_preprocessing', 85 | 'regressor:RegDeepNet:activation': 'tanh', 86 | 'regressor:RegDeepNet:batch_size': 1867, 87 | 'regressor:RegDeepNet:dropout_layer_1': 0.0044842379741719856, 88 | 'regressor:RegDeepNet:dropout_output': 0.029970881815609602, 89 | 'regressor:RegDeepNet:lambda2': 3.922344043854585e-05, 90 | 'regressor:RegDeepNet:learning_rate': 0.001, 91 | 'regressor:RegDeepNet:num_layers': 'c', 92 | 'regressor:RegDeepNet:num_units_layer_1': 2775, 93 | 'regressor:RegDeepNet:number_updates': 4672, 94 | 'regressor:RegDeepNet:solver': 'smorm3s', 95 | 'regressor:RegDeepNet:std_layer_1': 0.0011091871005401157, 96 | 'regressor:__choice__': 'RegDeepNet', 97 | 'rescaling:__choice__': 'standardize'})), 98 | (0.100000, SimpleRegressionPipeline(configuration={ 99 | 'imputation:strategy': 'mean', 100 | 'one_hot_encoding:minimum_fraction': 0.0006151267694526832, 101 | 'one_hot_encoding:use_minimum_fraction': 'True', 102 | 'preprocessor:__choice__': 'no_preprocessing', 103 | 'regressor:RegDeepNet:activation': 'tanh', 104 | 'regressor:RegDeepNet:batch_size': 1293, 105 | 'regressor:RegDeepNet:dropout_layer_1': 0.024322298790122678, 106 | 'regressor:RegDeepNet:dropout_layer_2': 0.4831886801640319, 107 | 'regressor:RegDeepNet:dropout_layer_3': 0.7303058944461246, 108 | 'regressor:RegDeepNet:dropout_output': 0.43112081941910074, 109 | 'regressor:RegDeepNet:lambda2': 4.561723820100022e-06, 110 | 'regressor:RegDeepNet:learning_rate': 0.001, 111 | 'regressor:RegDeepNet:num_layers': 'e', 112 | 'regressor:RegDeepNet:num_units_layer_1': 2999, 113 | 'regressor:RegDeepNet:num_units_layer_2': 1630, 114 | 'regressor:RegDeepNet:num_units_layer_3': 897, 115 | 'regressor:RegDeepNet:number_updates': 4471, 116 | 'regressor:RegDeepNet:solver': 'smorm3s', 117 | 'regressor:RegDeepNet:std_layer_1': 0.0013646791717249367, 118 | 'regressor:RegDeepNet:std_layer_2': 0.012431732856634247, 119 | 'regressor:RegDeepNet:std_layer_3': 0.002351992156794049, 120 | 'regressor:__choice__': 'RegDeepNet', 121 | 'rescaling:__choice__': 'standardize'})), 122 | (0.060000, SimpleRegressionPipeline(configuration={ 123 | 'imputation:strategy': 'mean', 124 | 'one_hot_encoding:minimum_fraction': 0.006283026157824821, 125 | 'one_hot_encoding:use_minimum_fraction': 'True', 126 | 'preprocessor:__choice__': 'no_preprocessing', 127 | 'regressor:RegDeepNet:activation': 'tanh', 128 | 'regressor:RegDeepNet:batch_size': 1802, 129 | 'regressor:RegDeepNet:dropout_layer_1': 0.01257793094940521, 130 | 'regressor:RegDeepNet:dropout_output': 0.023821950297696383, 131 | 'regressor:RegDeepNet:lambda2': 8.078248563082777e-05, 132 | 'regressor:RegDeepNet:learning_rate': 0.001, 133 | 'regressor:RegDeepNet:num_layers': 'c', 134 | 'regressor:RegDeepNet:num_units_layer_1': 3293, 135 | 'regressor:RegDeepNet:number_updates': 4842, 136 | 'regressor:RegDeepNet:solver': 'smorm3s', 137 | 'regressor:RegDeepNet:std_layer_1': 0.001130906938022124, 138 | 'regressor:__choice__': 'RegDeepNet', 139 | 'rescaling:__choice__': 'standardize'})), 140 | ] 141 | 142 | targets = [] 143 | predictions = [] 144 | predictions_valid = [] 145 | predictions_test = [] 146 | 147 | 148 | def fit_and_predict(estimator, weight, X, y): 149 | try: 150 | estimator.fit(X.copy(), y.copy()) 151 | pv = estimator.predict(X_valid.copy()) * weight 152 | pt = estimator.predict(X_test.copy()) * weight 153 | except Exception as e: 154 | print(e) 155 | print(estimator.configuration) 156 | pv = None 157 | pt = None 158 | return pv, pt 159 | 160 | 161 | # Make predictions and weight them 162 | all_predictions = Parallel(n_jobs=-1)(delayed(fit_and_predict) \ 163 | (estimator, weight, X, y) for 164 | weight, estimator in choices) 165 | for pv, pt in all_predictions: 166 | predictions_valid.append(pv) 167 | predictions_test.append(pt) 168 | 169 | # Output the predictions 170 | for name, predictions in [('valid', predictions_valid), 171 | ('test', predictions_test)]: 172 | predictions = np.array(predictions) 173 | predictions = np.sum(predictions, axis=0).astype(np.float32) 174 | 175 | filepath = os.path.join(output, '%s_%s_000.predict' % (dataset, name)) 176 | np.savetxt(filepath, predictions, delimiter=' ', fmt='%.4e') 177 | -------------------------------------------------------------------------------- /DeepFeedNet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse as sp 3 | 4 | from ConfigSpace.configuration_space import ConfigurationSpace 5 | from ConfigSpace.conditions import EqualsCondition, InCondition 6 | from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ 7 | UniformIntegerHyperparameter, CategoricalHyperparameter, Constant 8 | 9 | from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm 10 | from autosklearn.pipeline.constants import * 11 | 12 | 13 | class DeepFeedNet(AutoSklearnClassificationAlgorithm): 14 | 15 | def __init__(self, number_updates, batch_size, num_layers, num_units_layer_1, 16 | dropout_layer_1, dropout_output, std_layer_1, 17 | learning_rate, solver, lambda2, activation, 18 | num_units_layer_2=10, num_units_layer_3=10, num_units_layer_4=10, 19 | dropout_layer_2=0.5, dropout_layer_3=0.5, dropout_layer_4=0.5, 20 | std_layer_2=0.005, std_layer_3=0.005, std_layer_4=0.005, 21 | momentum=0.99, beta1=0.9, beta2=0.9, rho=0.95, 22 | lr_policy='fixed', gamma=0.01, power=1.0, epoch_step=2, 23 | random_state=None): 24 | self.number_updates = number_updates 25 | self.batch_size = batch_size 26 | # Hacky implementation of condition on number of layers 27 | self.num_layers = ord(num_layers) - ord('a') 28 | self.dropout_output = dropout_output 29 | self.learning_rate = learning_rate 30 | self.lr_policy = lr_policy 31 | self.lambda2 = lambda2 32 | self.momentum = momentum 33 | # Added 1-beta due to change in config space 34 | self.beta1 = 1-beta1 35 | self.beta2 = 1-beta2 36 | self.rho = rho 37 | self.solver = solver 38 | self.activation = activation 39 | self.gamma = gamma 40 | self.power = power 41 | self.epoch_step = epoch_step 42 | 43 | # Empty features and shape 44 | self.n_features = None 45 | self.input_shape = None 46 | self.m_issparse = False 47 | self.m_isbinary = False 48 | self.m_ismultilabel = False 49 | 50 | # To avoid eval call. Could be done with **karws 51 | args = locals() 52 | 53 | self.num_units_per_layer = [] 54 | self.dropout_per_layer = [] 55 | self.std_per_layer = [] 56 | for i in range(1, self.num_layers): 57 | self.num_units_per_layer.append(int(args.get("num_units_layer_" + str(i)))) 58 | self.dropout_per_layer.append(float(args.get("dropout_layer_" + str(i)))) 59 | self.std_per_layer.append(float(args.get("std_layer_" + str(i)))) 60 | self.estimator = None 61 | 62 | def _prefit(self, X, y): 63 | self.batch_size = int(self.batch_size) 64 | self.n_features = X.shape[1] 65 | self.input_shape = (self.batch_size, self.n_features) 66 | 67 | assert len(self.num_units_per_layer) == self.num_layers - 1,\ 68 | "Number of created layers is different than actual layers" 69 | assert len(self.dropout_per_layer) == self.num_layers - 1,\ 70 | "Number of created layers is different than actual layers" 71 | 72 | # TODO: Better if statement 73 | if len(y.shape) == 2 and y.shape[1] > 1: # Multilabel 74 | self.m_ismultilabel = True 75 | self.num_output_units = y.shape[1] 76 | else: 77 | number_classes = len(np.unique(y.astype(int))) 78 | if number_classes == 2: # Make it binary 79 | self.m_isbinary = True 80 | self.num_output_units = 1 81 | if len(y.shape) == 1: 82 | y = y[:, np.newaxis] 83 | else: 84 | self.num_output_units = number_classes 85 | 86 | self.m_issparse = sp.issparse(X) 87 | 88 | return X, y 89 | 90 | def fit(self, X, y): 91 | 92 | Xf, yf = self._prefit(X, y) 93 | 94 | epoch = (self.number_updates * self.batch_size)//X.shape[0] 95 | number_epochs = min(max(2, epoch), 50) # Capping of epochs 96 | 97 | from ...implementations import FeedForwardNet 98 | self.estimator = FeedForwardNet.FeedForwardNet(batch_size=self.batch_size, 99 | input_shape=self.input_shape, 100 | num_layers=self.num_layers, 101 | num_units_per_layer=self.num_units_per_layer, 102 | dropout_per_layer=self.dropout_per_layer, 103 | std_per_layer=self.std_per_layer, 104 | num_output_units=self.num_output_units, 105 | dropout_output=self.dropout_output, 106 | learning_rate=self.learning_rate, 107 | lr_policy=self.lr_policy, 108 | lambda2=self.lambda2, 109 | momentum=self.momentum, 110 | beta1=self.beta1, 111 | beta2=self.beta2, 112 | rho=self.rho, 113 | solver=self.solver, 114 | activation=self.activation, 115 | num_epochs=number_epochs, 116 | gamma=self.gamma, 117 | power=self.power, 118 | epoch_step=self.epoch_step, 119 | is_sparse=self.m_issparse, 120 | is_binary=self.m_isbinary, 121 | is_multilabel=self.m_ismultilabel) 122 | self.estimator.fit(Xf, yf) 123 | return self 124 | 125 | def predict(self, X): 126 | if self.estimator is None: 127 | raise NotImplementedError 128 | return self.estimator.predict(X, self.m_issparse) 129 | 130 | def predict_proba(self, X): 131 | if self.estimator is None: 132 | raise NotImplementedError() 133 | return self.estimator.predict_proba(X, self.m_issparse) 134 | 135 | @staticmethod 136 | def get_properties(dataset_properties=None): 137 | return {'shortname': 'feed_nn', 138 | 'name': 'Feed Forward Neural Network', 139 | 'handles_regression': False, 140 | 'handles_classification': True, 141 | 'handles_multiclass': True, 142 | 'handles_multilabel': True, 143 | 'is_deterministic': True, 144 | 'input': (DENSE, SPARSE, UNSIGNED_DATA), 145 | 'output': (PREDICTIONS,)} 146 | 147 | @staticmethod 148 | def get_hyperparameter_search_space(dataset_properties=None): 149 | # GPUTRACK: Based on http://svail.github.io/rnn_perf/ 150 | # We make batch size and number of units multiples of 64 151 | 152 | # Hacky way to condition layers params based on the number of layers 153 | # GPUTRACK: Reduced number of layers 154 | # 'c'=1, 'd'=2, 'e'=3 ,'f'=4 + output_layer 155 | # layer_choices = [chr(i) for i in xrange(ord('c'), ord('e'))] 156 | 157 | layer_choices = ['c', 'd', 'e'] 158 | 159 | batch_size = UniformIntegerHyperparameter("batch_size", 160 | 32, 4096, 161 | log=True, 162 | default=32) 163 | 164 | number_updates = UniformIntegerHyperparameter("number_updates", 165 | 200, 3500, 166 | log=True, 167 | default=200) 168 | 169 | num_layers = CategoricalHyperparameter("num_layers", 170 | choices=layer_choices, 171 | default='c') 172 | 173 | num_units_layer_1 = UniformIntegerHyperparameter("num_units_layer_1", 174 | 64, 4096, 175 | log=True, 176 | default=256) 177 | 178 | num_units_layer_2 = UniformIntegerHyperparameter("num_units_layer_2", 179 | 64, 4096, 180 | log=True, 181 | default=128) 182 | 183 | num_units_layer_3 = UniformIntegerHyperparameter("num_units_layer_3", 184 | 64, 4096, 185 | log=True, 186 | default=128) 187 | 188 | dropout_layer_1 = UniformFloatHyperparameter("dropout_layer_1", 189 | 0.0, 0.99, 190 | default=0.5) 191 | 192 | dropout_layer_2 = UniformFloatHyperparameter("dropout_layer_2", 193 | 0.0, 0.99, 194 | default=0.5) 195 | 196 | dropout_layer_3 = UniformFloatHyperparameter("dropout_layer_3", 197 | 0.0, 0.99, 198 | default=0.5) 199 | 200 | dropout_output = UniformFloatHyperparameter("dropout_output", 201 | 0.0, 0.99, 202 | default=0.5) 203 | 204 | lr = CategoricalHyperparameter("learning_rate", 205 | choices=[1e-1, 1e-2, 1e-3, 1e-4], 206 | default=1e-2) 207 | 208 | l2 = UniformFloatHyperparameter("lambda2", 1e-6, 1e-2, log=True, 209 | default=1e-3) 210 | 211 | std_layer_1 = UniformFloatHyperparameter("std_layer_1", 0.001, 0.1, 212 | log=True, 213 | default=0.005) 214 | 215 | std_layer_2 = UniformFloatHyperparameter("std_layer_2", 0.001, 0.1, 216 | log=True, 217 | default=0.005) 218 | 219 | std_layer_3 = UniformFloatHyperparameter("std_layer_3", 0.001, 0.1, 220 | log=True, 221 | default=0.005) 222 | 223 | # Using Tobias' adam 224 | solver = Constant(name="solver", value="smorm3s") 225 | non_linearities = CategoricalHyperparameter(name='activation', 226 | choices=['relu', 'tanh'], 227 | default='relu') 228 | 229 | cs = ConfigurationSpace() 230 | # cs.add_hyperparameter(number_epochs) 231 | cs.add_hyperparameter(number_updates) 232 | cs.add_hyperparameter(batch_size) 233 | cs.add_hyperparameter(num_layers) 234 | cs.add_hyperparameter(num_units_layer_1) 235 | cs.add_hyperparameter(num_units_layer_2) 236 | cs.add_hyperparameter(num_units_layer_3) 237 | cs.add_hyperparameter(dropout_layer_1) 238 | cs.add_hyperparameter(dropout_layer_2) 239 | cs.add_hyperparameter(dropout_layer_3) 240 | cs.add_hyperparameter(dropout_output) 241 | cs.add_hyperparameter(std_layer_1) 242 | cs.add_hyperparameter(std_layer_2) 243 | cs.add_hyperparameter(std_layer_3) 244 | cs.add_hyperparameter(lr) 245 | cs.add_hyperparameter(l2) 246 | cs.add_hyperparameter(solver) 247 | cs.add_hyperparameter(non_linearities) 248 | 249 | layer_2_condition = InCondition(num_units_layer_2, num_layers, 250 | ['d', 'e']) 251 | layer_3_condition = InCondition(num_units_layer_3, num_layers, 252 | ['e']) 253 | cs.add_condition(layer_2_condition) 254 | cs.add_condition(layer_3_condition) 255 | 256 | # Condition dropout parameter on layer choice 257 | dropout_2_condition = InCondition(dropout_layer_2, num_layers, 258 | ['d', 'e']) 259 | dropout_3_condition = InCondition(dropout_layer_3, num_layers, 260 | ['e']) 261 | cs.add_condition(dropout_2_condition) 262 | cs.add_condition(dropout_3_condition) 263 | 264 | # Condition std parameter on layer choice 265 | std_2_condition = InCondition(std_layer_2, num_layers, ['d', 'e']) 266 | std_3_condition = InCondition(std_layer_3, num_layers, ['e']) 267 | cs.add_condition(std_2_condition) 268 | cs.add_condition(std_3_condition) 269 | 270 | return cs 271 | -------------------------------------------------------------------------------- /FeedForwardNet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jul 22, 2015 3 | Modified on Apr 21, 2016 4 | 5 | @author: Aaron Klein 6 | @modified: Hector Mendoza 7 | """ 8 | import numpy as np 9 | import theano 10 | import theano.tensor as T 11 | import theano.sparse as S 12 | import lasagne 13 | 14 | DEBUG = True 15 | 16 | 17 | def sharedX(X, dtype=theano.config.floatX, name=None): 18 | return theano.shared(np.asarray(X, dtype=dtype), name=name) 19 | 20 | 21 | def smorms3(cost, params, learning_rate=1e-3, eps=1e-16, gather=False): 22 | updates = [] 23 | optim_params = [] 24 | grads = T.grad(cost, params) 25 | 26 | for p, grad in zip(params, grads): 27 | mem = sharedX(p.get_value() * 0. + 1.) 28 | g = sharedX(p.get_value() * 0.) 29 | g2 = sharedX(p.get_value() * 0.) 30 | if gather: 31 | optim_params.append(mem) 32 | optim_params.append(g) 33 | optim_params.append(g2) 34 | 35 | r_t = 1. / (mem + 1) 36 | g_t = (1 - r_t) * g + r_t * grad 37 | g2_t = (1 - r_t) * g2 + r_t * grad**2 38 | p_t = p - grad * T.minimum(learning_rate, g_t * g_t / (g2_t + eps)) / \ 39 | (T.sqrt(g2_t + eps) + eps) 40 | mem_t = 1 + mem * (1 - g_t * g_t / (g2_t + eps)) 41 | 42 | updates.append((g, g_t)) 43 | updates.append((g2, g2_t)) 44 | updates.append((p, p_t)) 45 | updates.append((mem, mem_t)) 46 | 47 | return updates 48 | 49 | 50 | def iterate_minibatches(inputs, targets, batchsize, shuffle=False): 51 | assert inputs.shape[0] == targets.shape[0],\ 52 | "The number of training points is not the same" 53 | if shuffle: 54 | indices = np.arange(inputs.shape[0]) 55 | np.random.shuffle(indices) 56 | for start_idx in range(0, inputs.shape[0] - batchsize + 1, batchsize): 57 | if shuffle: 58 | excerpt = indices[start_idx:start_idx + batchsize] 59 | else: 60 | excerpt = slice(start_idx, start_idx + batchsize) 61 | yield inputs[excerpt], targets[excerpt] 62 | 63 | 64 | class FeedForwardNet(object): 65 | def __init__(self, input_shape=(100, 28*28), 66 | batch_size=100, num_layers=4, num_units_per_layer=(10, 10, 10), 67 | dropout_per_layer=(0.5, 0.5, 0.5), std_per_layer=(0.005, 0.005, 0.005), 68 | num_output_units=2, dropout_output=0.5, learning_rate=0.01, 69 | lambda2=1e-4, momentum=0.9, beta1=0.9, beta2=0.9, 70 | rho=0.95, solver="adam", num_epochs=2, activation='relu', 71 | lr_policy="fixed", gamma=0.01, power=1.0, epoch_step=1, 72 | is_sparse=False, is_binary=False, is_regression=False, is_multilabel=False): 73 | 74 | self.batch_size = batch_size 75 | self.input_shape = input_shape 76 | self.num_layers = num_layers 77 | self.num_units_per_layer = num_units_per_layer 78 | self.dropout_per_layer = np.asarray(dropout_per_layer, dtype=theano.config.floatX) 79 | self.num_output_units = num_output_units 80 | self.dropout_output = T.cast(dropout_output, dtype=theano.config.floatX) 81 | self.std_per_layer = np.asarray(std_per_layer, dtype=theano.config.floatX) 82 | self.momentum = T.cast(momentum, dtype=theano.config.floatX) 83 | self.learning_rate = np.asarray(learning_rate, dtype=theano.config.floatX) 84 | self.lambda2 = T.cast(lambda2, dtype=theano.config.floatX) 85 | self.beta1 = T.cast(beta1, dtype=theano.config.floatX) 86 | self.beta2 = T.cast(beta2, dtype=theano.config.floatX) 87 | self.rho = T.cast(rho, dtype=theano.config.floatX) 88 | # self.number_updates = number_updates 89 | self.num_epochs = num_epochs 90 | self.lr_policy = lr_policy 91 | self.gamma = np.asarray(gamma, dtype=theano.config.floatX) 92 | if power > 1.0: 93 | print('hyperparameter must be between 0 and 1') 94 | self.power = np.asarray(1.0, dtype=theano.config.floatX) 95 | else: 96 | self.power = np.asarray(power, dtype=theano.config.floatX) 97 | self.epoch_step = np.asarray(epoch_step, dtype=theano.config.floatX) 98 | self.is_binary = is_binary 99 | self.is_regression = is_regression 100 | self.is_multilabel = is_multilabel 101 | self.is_sparse = is_sparse 102 | self.solver = solver 103 | self.activation = activation 104 | 105 | if is_sparse: 106 | input_var = S.csr_matrix('inputs', dtype=theano.config.floatX) 107 | else: 108 | input_var = T.matrix('inputs') 109 | 110 | if self.is_binary or self.is_multilabel or self.is_regression: 111 | target_var = T.matrix('targets') 112 | else: 113 | target_var = T.ivector('targets') 114 | 115 | if DEBUG: 116 | if self.is_binary: 117 | print("... using binary loss") 118 | if self.is_multilabel: 119 | print("... using multilabel prediction") 120 | if self.is_regression: 121 | print("... using regression loss") 122 | print("... building network") 123 | print input_shape 124 | print("... with number of epochs") 125 | print(num_epochs) 126 | 127 | self.network = lasagne.layers.InputLayer(shape=input_shape, 128 | input_var=input_var) 129 | 130 | # Choose hidden activation function 131 | if self.is_binary or self.is_multilabel or self.is_regression: 132 | activation_function = self.binary_activation.get(self.activation, 133 | lasagne.nonlinearities.tanh) 134 | else: 135 | activation_function = self.multiclass_activation.get(self.activation, 136 | lasagne.nonlinearities.rectify) 137 | 138 | # Define each layer 139 | for i in range(num_layers - 1): 140 | self.network = lasagne.layers.DenseLayer( 141 | lasagne.layers.dropout(self.network, 142 | p=self.dropout_per_layer[i]), 143 | num_units=self.num_units_per_layer[i], 144 | W=lasagne.init.GlorotNormal(gain=1.0), 145 | b=lasagne.init.Constant(val=0.0), 146 | nonlinearity=activation_function) 147 | 148 | # Define output layer and nonlinearity of last layer 149 | if self.is_regression: 150 | output_activation = lasagne.nonlinearities.linear 151 | elif self.is_binary or self.is_multilabel: 152 | output_activation = lasagne.nonlinearities.sigmoid 153 | else: 154 | output_activation = lasagne.nonlinearities.softmax 155 | 156 | self.network = lasagne.layers.DenseLayer( 157 | lasagne.layers.dropout(self.network, 158 | p=self.dropout_output), 159 | num_units=self.num_output_units, 160 | W=lasagne.init.GlorotNormal(), 161 | b=lasagne.init.Constant(), 162 | nonlinearity=output_activation) 163 | 164 | prediction = lasagne.layers.get_output(self.network) 165 | 166 | if self.is_regression: 167 | loss_function = lasagne.objectives.squared_error 168 | elif self.is_binary or self.is_multilabel: 169 | loss_function = lasagne.objectives.binary_crossentropy 170 | else: 171 | loss_function = lasagne.objectives.categorical_crossentropy 172 | 173 | loss = loss_function(prediction, target_var) 174 | 175 | # Aggregate loss mean function with l2 Regularization on all layers' params 176 | if self.is_binary or self.is_multilabel: 177 | loss = T.sum(loss, dtype=theano.config.floatX) 178 | else: 179 | loss = T.mean(loss, dtype=theano.config.floatX) 180 | l2_penalty = self.lambda2 * lasagne.regularization.regularize_network_params( 181 | self.network, lasagne.regularization.l2) 182 | loss += l2_penalty 183 | params = lasagne.layers.get_all_params(self.network, trainable=True) 184 | 185 | # Create the symbolic scalar lr for loss & updates function 186 | lr_scalar = T.scalar('lr', dtype=theano.config.floatX) 187 | 188 | if solver == "nesterov": 189 | updates = lasagne.updates.nesterov_momentum(loss, params, 190 | learning_rate=lr_scalar, 191 | momentum=self.momentum) 192 | elif solver == "adam": 193 | updates = lasagne.updates.adam(loss, params, 194 | learning_rate=lr_scalar, 195 | beta1=self.beta1, beta2=self.beta2) 196 | elif solver == "adadelta": 197 | updates = lasagne.updates.adadelta(loss, params, 198 | learning_rate=lr_scalar, 199 | rho=self.rho) 200 | elif solver == "adagrad": 201 | updates = lasagne.updates.adagrad(loss, params, 202 | learning_rate=lr_scalar) 203 | elif solver == "sgd": 204 | updates = lasagne.updates.sgd(loss, params, 205 | learning_rate=lr_scalar) 206 | elif solver == "momentum": 207 | updates = lasagne.updates.momentum(loss, params, 208 | learning_rate=lr_scalar, 209 | momentum=self.momentum) 210 | elif solver == "smorm3s": 211 | updates = smorms3(loss, params, 212 | learning_rate=lr_scalar) 213 | else: 214 | updates = lasagne.updates.sgd(loss, params, 215 | learning_rate=lr_scalar) 216 | 217 | if DEBUG: 218 | print("... compiling theano functions") 219 | self.train_fn = theano.function([input_var, target_var, lr_scalar], 220 | loss, 221 | updates=updates, 222 | allow_input_downcast=True, 223 | profile=False, 224 | on_unused_input='warn', 225 | name='train_fn') 226 | if DEBUG: 227 | print('... compiling update function') 228 | self.update_function = self._policy_function() 229 | 230 | def _policy_function(self): 231 | epoch, gm, powr, step = T.scalars('epoch', 'gm', 'powr', 'step') 232 | if self.lr_policy == 'inv': 233 | decay = T.power(1.0+gm*epoch, -powr) 234 | elif self.lr_policy == 'exp': 235 | decay = gm ** epoch 236 | elif self.lr_policy == 'step': 237 | decay = T.switch(T.eq(T.mod_check(epoch, step), 0.0), 238 | T.power(gm, T.floor_div(epoch, step)), 239 | 1.0) 240 | elif self.lr_policy == 'fixed': 241 | decay = T.constant(1.0, name='fixed', dtype=theano.config.floatX) 242 | 243 | return theano.function([gm, epoch, powr, step], 244 | decay, 245 | allow_input_downcast=True, 246 | on_unused_input='ignore', 247 | name='update_fn') 248 | 249 | def fit(self, X, y): 250 | if self.batch_size > X.shape[0]: 251 | self.batch_size = X.shape[0] 252 | print('One update per epoch batch size') 253 | 254 | if self.is_sparse: 255 | X = X.astype(np.float32) 256 | else: 257 | try: 258 | X = np.asarray(X, dtype=theano.config.floatX) 259 | y = np.asarray(y, dtype=theano.config.floatX) 260 | except Exception as E: 261 | print('Fit casting error: %s' % E) 262 | 263 | for epoch in range(self.num_epochs): 264 | train_err = 0 265 | train_batches = 0 266 | for inputs, targets in iterate_minibatches(X, y, self.batch_size, shuffle=True): 267 | train_err += self.train_fn(inputs, targets, self.learning_rate) 268 | train_batches += 1 269 | decay = self.update_function(self.gamma, epoch+1.0, 270 | self.power, self.epoch_step) 271 | self.learning_rate *= decay 272 | print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) 273 | return self 274 | 275 | def predict(self, X, is_sparse=False): 276 | predictions = self.predict_proba(X, is_sparse) 277 | if self.is_multilabel: 278 | return np.round(predictions) 279 | elif self.is_regression: 280 | return predictions 281 | else: 282 | return np.argmax(predictions, axis=1) 283 | 284 | def predict_proba(self, X, is_sparse=False): 285 | if is_sparse: 286 | X = X.astype(np.float32) 287 | X = S.as_sparse_or_tensor_variable(X) 288 | else: 289 | try: 290 | X = np.asarray(X, dtype=theano.config.floatX) 291 | except Exception as E: 292 | print('Prediction casting error: %s' % E) 293 | 294 | predictions = lasagne.layers.get_output(self.network, X, deterministic=True).eval() 295 | if self.is_binary: 296 | return np.append(1.0 - predictions, predictions, axis=1) 297 | else: 298 | return predictions 299 | 300 | # TODO: Maybe create a utility module for constants 301 | multiclass_activation = { 302 | 'softmax': lasagne.nonlinearities.softmax, 303 | 'relu': lasagne.nonlinearities.rectify, 304 | 'leaky': lasagne.nonlinearities.leaky_rectify, 305 | 'very_leaky': lasagne.nonlinearities.very_leaky_rectify, 306 | 'elu': lasagne.nonlinearities.elu, 307 | 'softplus': lasagne.nonlinearities.softplus, 308 | 'linear': lasagne.nonlinearities.linear, 309 | 'scaledTanh': lasagne.nonlinearities.ScaledTanH(scale_in=2./3., 310 | scale_out=1.7159) 311 | } 312 | 313 | binary_activation = { 314 | 'sigmoid': lasagne.nonlinearities.sigmoid, 315 | 'softplus': lasagne.nonlinearities.softplus, 316 | 'tahn': lasagne.nonlinearities.tanh, 317 | 'scaledTanh': lasagne.nonlinearities.ScaledTanH(scale_in=2./3., 318 | scale_out=1.7159), 319 | 'elu': lasagne.nonlinearities.elu, 320 | 'relu': lasagne.nonlinearities.rectify, 321 | } 322 | 323 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, automl 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Code to reproduce tweakathon submissions of the team aad_freiburg for the Chalearn Automatic Machine Learning Challenge 2015 2 | 3 | # Final 1 4 | 5 | Code runs with ParamSklearn and auto-sklearn as in http://aad.informatik.uni-freiburg.de/downloads/automl_competition_2015_001.zip 6 | 7 | # Final 2 8 | 9 | Code runs with ParamSklearn and auto-sklearn as in http://aad.informatik.uni-freiburg.de/downloads/automl_competition_2015_002.zip 10 | 11 | # Final 3 12 | 13 | Code runs with auto-sklearn as in http://aad.informatik.uni-freiburg.de/downloads/automl_competition_2015_003.zip 14 | To obtain predictions for alexis, one has to install Lasagne and Theano. 15 | 16 | # Final 4 17 | 18 | Code runs with development branch of auto-sklearn as of commit b76a2b8e51856f7a2f9db53082b6d0f1cb23ed5a (https://github.com/automl/auto-sklearn/commit/b76a2b8e51856f7a2f9db53082b6d0f1cb23ed5a). 19 | To obtain predictions for dataset 4 and 5 one needs lasagne and theano as written in the respective python files. 20 | -------------------------------------------------------------------------------- /RegDeepNet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse as sp 3 | 4 | from ConfigSpace.configuration_space import ConfigurationSpace 5 | from ConfigSpace.conditions import EqualsCondition, InCondition 6 | from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ 7 | UniformIntegerHyperparameter, CategoricalHyperparameter, Constant 8 | 9 | from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm 10 | from autosklearn.pipeline.constants import * 11 | 12 | 13 | class RegDeepNet(AutoSklearnRegressionAlgorithm): 14 | 15 | def __init__(self, number_updates, batch_size, num_layers, num_units_layer_1, 16 | dropout_layer_1, dropout_output, std_layer_1, 17 | learning_rate, solver, lambda2, activation, 18 | num_units_layer_2=10, num_units_layer_3=10, num_units_layer_4=10, 19 | num_units_layer_5=10, num_units_layer_6=10, 20 | dropout_layer_2=0.5, dropout_layer_3=0.5, dropout_layer_4=0.5, 21 | dropout_layer_5=0.5, dropout_layer_6=0.5, 22 | std_layer_2=0.005, std_layer_3=0.005, std_layer_4=0.005, 23 | std_layer_5=0.005, std_layer_6=0.005, 24 | momentum=0.99, beta1=0.9, beta2=0.9, rho=0.95, 25 | lr_policy='fixed', gamma=0.01, power=1.0, epoch_step=2, 26 | random_state=None): 27 | self.number_updates = number_updates 28 | self.batch_size = batch_size 29 | # Hacky implementation of condition on number of layers 30 | self.num_layers = ord(num_layers) - ord('a') 31 | self.dropout_output = dropout_output 32 | self.learning_rate = learning_rate 33 | self.lr_policy = lr_policy 34 | self.lambda2 = lambda2 35 | self.momentum = momentum 36 | self.beta1 = 1-beta1 37 | self.beta2 = 1-beta2 38 | self.rho = rho 39 | self.solver = solver 40 | self.activation = activation 41 | self.gamma = gamma 42 | self.power = power 43 | self.epoch_step = epoch_step 44 | 45 | # Empty features and shape 46 | self.n_features = None 47 | self.input_shape = None 48 | self.m_issparse = False 49 | self.m_isregression = True 50 | 51 | # To avoid eval call. Could be done with **karws 52 | args = locals() 53 | 54 | self.num_units_per_layer = [] 55 | self.dropout_per_layer = [] 56 | self.std_per_layer = [] 57 | for i in range(1, self.num_layers): 58 | self.num_units_per_layer.append(int(args.get("num_units_layer_" + str(i)))) 59 | self.dropout_per_layer.append(float(args.get("dropout_layer_" + str(i)))) 60 | self.std_per_layer.append(float(args.get("std_layer_" + str(i)))) 61 | self.estimator = None 62 | 63 | def _prefit(self, X, y): 64 | self.batch_size = int(self.batch_size) 65 | self.n_features = X.shape[1] 66 | self.input_shape = (self.batch_size, self.n_features) 67 | 68 | assert len(self.num_units_per_layer) == self.num_layers - 1,\ 69 | "Number of created layers is different than actual layers" 70 | assert len(self.dropout_per_layer) == self.num_layers - 1,\ 71 | "Number of created layers is different than actual layers" 72 | 73 | self.num_output_units = 1 # Regression 74 | # Normalize the output - Suggestion on 24.04 75 | self.mean_y = np.mean(y) 76 | self.std_y = np.std(y) 77 | y = (y - self.mean_y) / self.std_y 78 | if len(y.shape) == 1: 79 | y = y[:, np.newaxis] 80 | 81 | self.m_issparse = sp.issparse(X) 82 | 83 | return X, y 84 | 85 | def fit(self, X, y): 86 | 87 | Xf, yf = self._prefit(X, y) 88 | 89 | epoch = (self.number_updates * self.batch_size)//X.shape[0] 90 | number_epochs = min(max(2, epoch), 50) # Cap the max number of possible epochs 91 | 92 | from ...implementations import FeedForwardNet 93 | self.estimator = FeedForwardNet.FeedForwardNet(batch_size=self.batch_size, 94 | input_shape=self.input_shape, 95 | num_layers=self.num_layers, 96 | num_units_per_layer=self.num_units_per_layer, 97 | dropout_per_layer=self.dropout_per_layer, 98 | std_per_layer=self.std_per_layer, 99 | num_output_units=self.num_output_units, 100 | dropout_output=self.dropout_output, 101 | learning_rate=self.learning_rate, 102 | lr_policy=self.lr_policy, 103 | lambda2=self.lambda2, 104 | momentum=self.momentum, 105 | beta1=self.beta1, 106 | beta2=self.beta2, 107 | rho=self.rho, 108 | solver=self.solver, 109 | activation=self.activation, 110 | num_epochs=number_epochs, 111 | gamma=self.gamma, 112 | power=self.power, 113 | epoch_step=self.epoch_step, 114 | is_sparse=self.m_issparse, 115 | is_binary=False, 116 | is_regression=self.m_isregression) 117 | self.estimator.fit(Xf, yf) 118 | return self 119 | 120 | def predict(self, X): 121 | if self.estimator is None: 122 | raise NotImplementedError 123 | preds = self.estimator.predict(X, self. m_issparse) 124 | return preds * self.std_y + self.mean_y 125 | 126 | def predict_proba(self, X): 127 | if self.estimator is None: 128 | raise NotImplementedError() 129 | return self.estimator.predict_proba(X, self.m_issparse) 130 | 131 | @staticmethod 132 | def get_properties(dataset_properties=None): 133 | return {'shortname': 'feed_nn', 134 | 'name': 'Feed Forward Neural Network', 135 | 'handles_regression': True, 136 | 'handles_classification': False, 137 | 'handles_multiclass': False, 138 | 'handles_multilabel': False, 139 | 'is_deterministic': True, 140 | 'input': (DENSE, SPARSE, UNSIGNED_DATA), 141 | 'output': (PREDICTIONS,)} 142 | 143 | @staticmethod 144 | def get_hyperparameter_search_space(dataset_properties=None): 145 | # GPUTRACK: Based on http://svail.github.io/rnn_perf/ 146 | # We make batch size and number of units multiples of 64 147 | 148 | # Hacky way to condition layers params based on the number of layers 149 | # GPUTRACK: Reduced number of layers 150 | # 'c'=1, 'd'=2, 'e'=3 ,'f'=4 + output_layer 151 | # layer_choices = [chr(i) for i in xrange(ord('c'), ord('e'))] 152 | 153 | layer_choices = ['c', 'd', 'e'] 154 | 155 | batch_size = UniformIntegerHyperparameter("batch_size", 156 | 64, 2048, 157 | default=550) 158 | 159 | number_updates = UniformIntegerHyperparameter("number_updates", 160 | 200, 5500, 161 | log=True, 162 | default=512) 163 | 164 | num_layers = CategoricalHyperparameter("num_layers", 165 | choices=layer_choices, 166 | default='c') 167 | 168 | num_units_layer_1 = UniformIntegerHyperparameter("num_units_layer_1", 169 | 64, 4096, 170 | default=128) 171 | 172 | num_units_layer_2 = UniformIntegerHyperparameter("num_units_layer_2", 173 | 64, 4096, 174 | default=128) 175 | num_units_layer_3 = UniformIntegerHyperparameter("num_units_layer_3", 176 | 64, 4096, 177 | log=True, 178 | default=128) 179 | 180 | dropout_layer_1 = UniformFloatHyperparameter("dropout_layer_1", 181 | 0.0, 0.99, 182 | default=0.5) 183 | 184 | dropout_layer_2 = UniformFloatHyperparameter("dropout_layer_2", 185 | 0.0, 0.99, 186 | default=0.5) 187 | 188 | dropout_layer_3 = UniformFloatHyperparameter("dropout_layer_3", 189 | 0.0, 0.99, 190 | default=0.5) 191 | 192 | dropout_output = UniformFloatHyperparameter("dropout_output", 193 | 0.0, 0.99, 194 | default=0.5) 195 | 196 | lr = CategoricalHyperparameter("learning_rate", 197 | choices=[1e-1, 1e-2, 1e-3, 1e-4], 198 | default=1e-2) 199 | 200 | l2 = UniformFloatHyperparameter("lambda2", 1e-6, 1e-2, log=True, 201 | default=1e-3) 202 | 203 | std_layer_1 = UniformFloatHyperparameter("std_layer_1", 0.001, 0.1, 204 | log=True, 205 | default=0.005) 206 | 207 | std_layer_2 = UniformFloatHyperparameter("std_layer_2", 0.001, 0.1, 208 | log=True, 209 | default=0.005) 210 | 211 | std_layer_3 = UniformFloatHyperparameter("std_layer_3", 0.001, 0.1, 212 | log=True, 213 | default=0.005) 214 | 215 | # Using Tobias' adam 216 | solver = Constant(name="solver", value="smorm3s") 217 | 218 | non_linearities = CategoricalHyperparameter(name='activation', 219 | choices=['tanh', 'scaledTanh', 'sigmoid'], 220 | default='tanh') 221 | 222 | cs = ConfigurationSpace() 223 | # cs.add_hyperparameter(number_epochs) 224 | cs.add_hyperparameter(number_updates) 225 | cs.add_hyperparameter(batch_size) 226 | cs.add_hyperparameter(num_layers) 227 | cs.add_hyperparameter(num_units_layer_1) 228 | cs.add_hyperparameter(num_units_layer_2) 229 | cs.add_hyperparameter(num_units_layer_3) 230 | cs.add_hyperparameter(dropout_layer_1) 231 | cs.add_hyperparameter(dropout_layer_2) 232 | cs.add_hyperparameter(dropout_layer_3) 233 | cs.add_hyperparameter(dropout_output) 234 | cs.add_hyperparameter(std_layer_1) 235 | cs.add_hyperparameter(std_layer_2) 236 | cs.add_hyperparameter(std_layer_3) 237 | cs.add_hyperparameter(lr) 238 | cs.add_hyperparameter(l2) 239 | cs.add_hyperparameter(solver) 240 | cs.add_hyperparameter(non_linearities) 241 | 242 | layer_2_condition = InCondition(num_units_layer_2, num_layers, 243 | ['d', 'e']) 244 | layer_3_condition = InCondition(num_units_layer_3, num_layers, 245 | ['e']) 246 | cs.add_condition(layer_2_condition) 247 | cs.add_condition(layer_3_condition) 248 | 249 | # Condition dropout parameter on layer choice 250 | dropout_2_condition = InCondition(dropout_layer_2, num_layers, 251 | ['d', 'e']) 252 | dropout_3_condition = InCondition(dropout_layer_3, num_layers, 253 | ['e']) 254 | cs.add_condition(dropout_2_condition) 255 | cs.add_condition(dropout_3_condition) 256 | 257 | # Condition std parameter on layer choice 258 | std_2_condition = InCondition(std_layer_2, num_layers, ['d', 'e']) 259 | std_3_condition = InCondition(std_layer_3, num_layers, ['e']) 260 | cs.add_condition(std_2_condition) 261 | cs.add_condition(std_3_condition) 262 | 263 | return cs 264 | --------------------------------------------------------------------------------