├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── RFTest.py ├── __init__.py ├── constructors ├── ISM.py ├── __init__.py ├── ensemble.py ├── genesim.py ├── guide ├── inTrees.py ├── quest └── treeconstructor.py ├── data ├── __init__.py ├── austra.data ├── breast-cancer-wisconsin.data ├── car.data ├── ecoli.data ├── glass.data ├── heart.dat ├── labor.arff ├── led7.data ├── load_all_datasets.py ├── load_datasets.py ├── lymph.data ├── magic04.data ├── migbase.csv ├── migbase_noise20.csv ├── migbase_noise33.csv ├── migbase_noise50.csv ├── nursery.data ├── pima.data ├── reduced_migbase.csv ├── shuttle.tst ├── shuttle_full.trn ├── vehicle.data ├── waveform.data ├── wine.data └── yeast.data ├── decisiontree.py ├── doc ├── constructors │ ├── ISM.m.html │ ├── ensemble.m.html │ ├── genesim.m.html │ ├── inTrees.m.html │ ├── index.html │ └── treeconstructor.m.html ├── data │ ├── index.html │ ├── load_all_datasets.m.html │ └── load_datasets.m.html ├── decisiontree.m.html ├── example.m.html └── index.html ├── evolving_tree.gif ├── example.py └── install.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.pyc 3 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use an official Python runtime as a base image 2 | FROM python:2.7-slim 3 | 4 | # Installing some command line tools, required to build all dependencies 5 | RUN apt-get update 6 | RUN apt-get -y install libc-dev 7 | RUN apt-get -y install gcc 8 | RUN apt-get -y install g++ 9 | RUN apt-get -y install git 10 | RUN apt-get -y install wget 11 | RUN apt-get -y install gfortran 12 | RUN apt-get -y install build-essential 13 | RUN apt-get -y install r-base 14 | RUN apt-get -y install libcurl4-openssl-dev 15 | RUN apt-get -y install tk 16 | RUN apt-get -y install libcurl4-gnutls-dev 17 | RUN apt-get -y install libssl-dev 18 | 19 | # Clone the repo, set working dir 20 | RUN git clone https://github.com/GillesVandewiele/GENESIM-1 21 | WORKDIR /GENESIM-1 22 | 23 | # Install the required python libraries 24 | RUN pip install pandas 25 | RUN pip install numpy 26 | RUN pip install sklearn 27 | RUN pip install matplotlib 28 | RUN pip install -U imbalanced-learn 29 | RUN pip install graphviz 30 | RUN pip install xgboost 31 | RUN pip install rpy2 32 | RUN pip install pylatex 33 | RUN pip install orange 34 | RUN pip install bayesian-optimization 35 | 36 | # Install R 3.3.2 37 | RUN wget https://cran.rstudio.com/src/base/R-3/R-3.3.2.tar.gz 38 | RUN tar -xvzf R-3.3.2.tar.gz 39 | RUN cd R-3.3.2 && ./configure --with-readline=no --with-x=no && make && make install 40 | 41 | # Special care needed for C45Learner from Orange 42 | RUN wget http://www.rulequest.com/Personal/c4.5r8.tar.gz 43 | RUN tar -xvzf c4.5r8.tar.gz 44 | RUN cd R8/Src && wget https://raw.githubusercontent.com/biolab/orange/master/Orange/orng/buildC45.py && wget https://raw.githubusercontent.com/biolab/orange/master/Orange/orng/ensemble.c && python buildC45.py 45 | 46 | # Install some R packages 47 | RUN wget https://cran.r-project.org/src/contrib/randomForest_4.6-12.tar.gz 48 | RUN tar -xvzf randomForest_4.6-12.tar.gz 49 | RUN R -e 'install.packages("'$(pwd)'/randomForest", repos=NULL, type="source")' 50 | RUN wget https://cran.r-project.org/src/contrib/inTrees_1.1.tar.gz 51 | RUN tar -xvzf inTrees_1.1.tar.gz 52 | RUN R -e 'install.packages("devtools", repos="http://cran.us.r-project.org")' 53 | RUN R -e 'library(devtools); install("'$(pwd)'/inTrees", dependencies=TRUE)' 54 | 55 | CMD ["python", "example.py"] 56 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Ghent University and iMinds vzw with offices at Technologiepark 15, 9052 Ghent, Belgium. 2 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software for non-commercial educational and research use, including without limitation the rights to use, copy, modify, merge, publish, distribute and/or sublicense copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 3 | 1. The above copyright notice and this permission notice shall be included in all copies of the Software. 4 | 2. Permission is restricted to non-commercial educational and research use: the use of the Software is allowed for teaching purposes and academic research. Usage by non-academic parties is allowed in a strict research environment only. The use of the results of the research for commercial purposes or inclusion in commercial activities requires the permission of iMinds vzw. 5 | 3. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GENESIM: GENetic Extraction of a Single, Interpretable Model 2 | 3 |

4 | 5 |

6 | 7 | This repository contains an innovative algorithm that constructs an ensemble using well-known decision tree induction algorithms such as CART, C4.5, QUEST and GUIDE combined with bagging and boosting. Then, this ensemble is converted to a single, interpretable decision tree in a genetic fashion. For a certain number of iterations, random pairs of decision trees are merged together by first converting them to sets of k-dimensional hyperplanes and then calculating the intersection of these two sets (a classic problem from computational geometry). Moreover, in each iteration, an individual is mutated with a certain probabibility. After these iterations, the accuracy on a validation set is measured for each of the decision trees in the population and the one with the highest accuracy (and lowest number of nodes in case of a tie) is returned. Example.py has run code for all implemented algorithms and returns their average predictive performance, computational complexity and model complexity on a number of dataset 8 | 9 | ## Dependencies 10 | 11 | An install.sh script is provided that will install all required dependencies 12 | 13 | ## Documentation 14 | 15 | A nicely looking documentation page is available in the doc/ directory. Download the complete directory and open index.html 16 | 17 | ## Decision Tree Induction Algorithm Wrappers 18 | 19 | A wrapper is written around [Orange C4.5](http://docs.orange.biolab.si/2/reference/rst/Orange.classification.tree.html#Orange.classification.tree.C45Learner), [sklearn CART](http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html), [GUIDE](https://www.stat.wisc.edu/~loh/guide.html) and [QUEST](https://www.stat.wisc.edu/~loh/quest.html). The returned object is a Decision Tree, which can be found in `decisiontree.py`. Moreover, different methods are available on this decision tree: classify new, unknown samples; visualise the tree; export it to string, JSON and DOT; etc. 20 | 21 | ## Ensemble Technique Wrappers 22 | 23 | A wrapper is written around the well-known state-of-the-art ensemble techniques [XGBoost](http://xgboost.readthedocs.io/en/latest/python/python_intro.html) and [Random Forests](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) 24 | 25 | ## Similar techniques 26 | 27 | A wrapper written around the R package [inTrees](https://arxiv.org/abs/1408.5456) and an implementation of [ISM](https://lirias.kuleuven.be/handle/123456789/146229) can be found in the constructors package. 28 | 29 | ## New dataset 30 | 31 | A new dataset can easily be plugged in into the benchmark. For this, a `load_dataset()` function must be written in `load_datasets.py` 32 | 33 | ## Contact 34 | 35 | You can contact me at givdwiel.vandewiele at ugent.be for any questions, proposals or if you wish to contribute. 36 | 37 | ## Referring 38 | 39 | Please refer to my work when you use it. A reference to this github or to the following (yet unpublished) paper: 40 | 41 | ` 42 | @article{vandewiele2016genesim, 43 | title={GENESIM: genetic extraction of a single, interpretable model}, 44 | author={Vandewiele, Gilles and Janssens, Olivier and Ongenae, Femke and De Turck, Filip and Van Hoecke, Sofie}, 45 | journal={arXiv preprint arXiv:1611.05722}, 46 | year={2016} 47 | } 48 | ` 49 | 50 | -------------------------------------------------------------------------------- /RFTest.py: -------------------------------------------------------------------------------- 1 | from sklearn.cross_validation import StratifiedKFold 2 | from sklearn.metrics import confusion_matrix 3 | 4 | from constructors.ensemble import RFClassification 5 | from data.load_all_datasets import load_all_datasets 6 | 7 | import numpy as np 8 | 9 | from decisiontree import DecisionTree 10 | 11 | from refined_rf import RefinedRandomForest 12 | 13 | rf = RFClassification() 14 | 15 | NR_FOLDS = 5 16 | 17 | 18 | def _convert_to_tree(dt, features): 19 | """Convert a sklearn object to a `decisiontree.decisiontree` object""" 20 | n_nodes = dt.tree_.node_count 21 | children_left = dt.tree_.children_left 22 | children_right = dt.tree_.children_right 23 | feature = dt.tree_.feature 24 | threshold = dt.tree_.threshold 25 | classes = dt.classes_ 26 | 27 | # The tree structure can be traversed to compute various properties such 28 | # as the depth of each node and whether or not it is a leaf. 29 | node_depth = np.zeros(shape=n_nodes) 30 | decision_trees = [None] * n_nodes 31 | for i in range(n_nodes): 32 | decision_trees[i] = DecisionTree() 33 | is_leaves = np.zeros(shape=n_nodes, dtype=bool) 34 | stack = [(0, -1)] # seed is the root node id and its parent depth 35 | while len(stack) > 0: 36 | node_id, parent_depth = stack.pop() 37 | node_depth[node_id] = parent_depth + 1 38 | 39 | # If we have a test node 40 | if children_left[node_id] != children_right[node_id]: 41 | stack.append((children_left[node_id], parent_depth + 1)) 42 | stack.append((children_right[node_id], parent_depth + 1)) 43 | else: 44 | is_leaves[node_id] = True 45 | 46 | for i in range(n_nodes): 47 | 48 | if children_left[i] > 0: 49 | decision_trees[i].left = decision_trees[children_left[i]] 50 | 51 | if children_right[i] > 0: 52 | decision_trees[i].right = decision_trees[children_right[i]] 53 | 54 | if is_leaves[i]: 55 | decision_trees[i].label = dt.classes_[np.argmax(dt.tree_.value[i][0])] 56 | decision_trees[i].value = None 57 | else: 58 | decision_trees[i].label = features[feature[i]] 59 | decision_trees[i].value = threshold[i] 60 | 61 | return decision_trees[0] 62 | 63 | 64 | for dataset in load_all_datasets(): 65 | df = dataset['dataframe'] 66 | label_col = dataset['label_col'] 67 | feature_cols = dataset['feature_cols'] 68 | 69 | skf = StratifiedKFold(df[label_col], n_folds=NR_FOLDS, shuffle=True, random_state=1337) 70 | 71 | for fold, (train_idx, test_idx) in enumerate(skf): 72 | print 'Fold', fold+1, '/', NR_FOLDS, 'for dataset', dataset['name'] 73 | train = df.iloc[train_idx, :].reset_index(drop=True) 74 | X_train = train.drop(label_col, axis=1) 75 | y_train = train[label_col] 76 | test = df.iloc[test_idx, :].reset_index(drop=True) 77 | X_test = test.drop(label_col, axis=1) 78 | y_test = test[label_col] 79 | 80 | rf.construct_classifier(train, feature_cols, label_col) 81 | 82 | for estimator in rf.clf.estimators_: 83 | print estimator.tree_ 84 | print _convert_to_tree(estimator, feature_cols) 85 | 86 | predictions = rf.evaluate_multiple(X_test).astype(int) 87 | conf_matrix = confusion_matrix(y_test, predictions) 88 | print conf_matrix 89 | diagonal_sum = sum( 90 | [conf_matrix[i][i] for i in range(len(conf_matrix))]) 91 | norm_diagonal_sum = sum( 92 | [float(conf_matrix[i][i]) / float(sum(conf_matrix[i])) for i in 93 | range(len(conf_matrix))]) 94 | total_count = np.sum(conf_matrix) 95 | print 'Accuracy:', float(diagonal_sum) / float(total_count) 96 | print 'Balanced accuracy:', float(norm_diagonal_sum) / float(conf_matrix.shape[0]) -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/predict-idlab/GENESIM/44925de91ae408fea30ea8ece3688f9e42d4f040/__init__.py -------------------------------------------------------------------------------- /constructors/ISM.py: -------------------------------------------------------------------------------- 1 | """ 2 | Interpretable Single Model 3 | -------------------------- 4 | 5 | Merges different decision trees in an ensemble together in a single, interpretable decision tree 6 | 7 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent. 8 | 9 | Reference: 10 | Van Assche, Anneleen, and Hendrik Blockeel. 11 | "Seeing the forest through the trees: Learning a comprehensible model from an ensemble." 12 | European Conference on Machine Learning. Springer Berlin Heidelberg, 2007. 13 | """ 14 | 15 | from collections import Counter 16 | import math 17 | 18 | import numpy as np 19 | 20 | import decisiontree 21 | 22 | 23 | def _extract_tests(tree, _tests=set()): 24 | """ 25 | Given a decision tree, extract all tests from the nodes 26 | 27 | :param tree: the decision tree to extract tests from (decisiontree.py) 28 | :param _tests: recursive parameter, don't touch 29 | :return: a set of possible tests (feature_label <= threshold_value); each entry is a tuple (label, value) 30 | """ 31 | if tree.value is not None: 32 | _tests.add((tree.label, tree.value)) 33 | _extract_tests(tree.left, _tests) 34 | _extract_tests(tree.right, _tests) 35 | return _tests 36 | 37 | 38 | def _calculate_entropy(probabilities): 39 | """ 40 | Calculate the entropy of given probabilities 41 | 42 | :param probabilities: a list of floats between [0, 1] (sum(probabilities) must be 1) 43 | :return: the entropy 44 | """ 45 | return sum([-prob * np.log(prob)/np.log(2) if prob != 0 else 0 for prob in probabilities]) 46 | 47 | 48 | def _get_most_occurring_class(data, class_label): 49 | """ 50 | Get the most occurring class in a dataframe of data 51 | 52 | :param data: a pandas dataframe 53 | :param class_label: the column of the class labels 54 | :return: the most occurring class 55 | """ 56 | return Counter(data[class_label].values.tolist()).most_common(1)[0][0] 57 | 58 | 59 | def _calculate_prob(tree, label, value, prior_tests, negate=False): 60 | """ 61 | Estimate the probabilities from a decision tree by propagating down from the root to the leaves 62 | 63 | :param tree: the decision tree to estimate the probabilities from 64 | :param label: the label of the test being evaluated 65 | :param value: the value of the test being evaluated 66 | :param prior_tests: tests that are already in the conjunctions 67 | :param negate: is it a negative or positive test 68 | :return: a vector of probabilities for each class 69 | """ 70 | if tree.value is None: # If the value is None, we're at a leaf, return a vector of probabilities 71 | return np.divide(list(map(float, list(tree.class_probabilities.values()))), float(sum(list(tree.class_probabilities.values())))) 72 | else: 73 | if (tree.label, tree.value) in prior_tests: 74 | # The test in the current node is already in the conjunction, take the correct path 75 | if prior_tests[(tree.label, tree.value)]: 76 | return _calculate_prob(tree.left, label, value, prior_tests, negate) 77 | else: 78 | return _calculate_prob(tree.right, label, value, prior_tests, negate) 79 | elif not (tree.label == label and tree.value == value): 80 | # The test of current node is not yet in conjunction and is not the test we're looking for 81 | # Keep propagating (but add weights (estimate how many times the test succeeds/fails))! 82 | samples_sum = sum(list(tree.class_probabilities.values())) 83 | if samples_sum == 0: 84 | left_fraction = 1.0 85 | right_fraction = 1.0 86 | else: 87 | left_fraction = sum(list(tree.left.class_probabilities.values())) / samples_sum 88 | right_fraction = sum(list(tree.right.class_probabilities.values())) / samples_sum 89 | 90 | return np.add(left_fraction * _calculate_prob(tree.left, label, value, prior_tests, negate), 91 | right_fraction * _calculate_prob(tree.right, label, value, prior_tests, negate)) 92 | elif not negate: 93 | # We found the test we are looking for 94 | # If negate is False, then it is a positive test and we take the left subtree 95 | return _calculate_prob(tree.left, label, value, prior_tests, negate) 96 | else: 97 | return _calculate_prob(tree.right, label, value, prior_tests, negate) 98 | 99 | 100 | def _calculate_prob_dict(tree, label, value, prior_tests, negate=False): 101 | """ 102 | Wrapper around calculate_prob, so we know which probability belongs to which class 103 | """ 104 | return dict(zip(tree.class_probabilities.keys(), _calculate_prob(tree, label, value, prior_tests, negate))) 105 | 106 | 107 | def ism(decision_trees, data, class_label, min_nr_samples=1, calc_fracs_from_ensemble=False): 108 | """ 109 | Return a single decision tree from an ensemble of decision tree, using the normalized information gain as 110 | split criterion, estimated from the ensemble. This is a wrapper function around `constructors.ISM.build_dt_from_ensemble`, 111 | which first calculate the required parameters for this method. 112 | 113 | **Params** 114 | ---------- 115 | - `decision_trees` (list of `decisiontree.DecisionTree` objects): the ensemble of decision trees to be merged 116 | 117 | - `data` (pandas DataFrame): the data frame with training data 118 | 119 | - `class_label` (string): the column identifier for the column with class labels in the data 120 | 121 | - `min_nr_samples` (int): pre-prune condition, stop searching if number of samples is smaller or equal than threshold 122 | 123 | - `calc_fracs_from_ensemble` (boolean): if `True`, the different probabilities are calculated using the ensemble. Else, the data is used 124 | 125 | **Returns** 126 | ----------- 127 | a single decision tree based on the ensemble of decision trees 128 | """ 129 | X = data.drop(class_label, axis=1).reset_index(drop=True) 130 | y = data[class_label].reset_index(drop=True) 131 | 132 | non_empty_decision_trees = [] 133 | for tree in decision_trees: 134 | if tree.count_nodes() > 1: non_empty_decision_trees.append(tree) 135 | decision_trees = non_empty_decision_trees 136 | 137 | prior_entropy = 0 138 | tests = set() 139 | tests.clear() 140 | for dt in decision_trees: 141 | tests = tests | _extract_tests(dt, set()) 142 | prior_entropy += _calculate_entropy(np.divide(list(dt.class_probabilities.values()), 143 | sum(dt.class_probabilities.values()))) 144 | prior_entropy /= len(decision_trees) 145 | 146 | combined_dt = build_dt_from_ensemble(decision_trees, data, class_label, tests, prior_entropy, {}, min_nr_samples, 147 | calc_fracs_from_ensemble) 148 | combined_dt.populate_samples(X, y) 149 | 150 | return combined_dt 151 | 152 | 153 | def _add_reduce_by_key(A, B): 154 | """ 155 | Reduces two dicts by key using add operator 156 | 157 | :param A: dict one 158 | :param B: dict two 159 | :return: a new dict, containing a of the values if the two dicts have the same key, else just the value 160 | """ 161 | return {x: A.get(x, 0) + B.get(x, 0) for x in set(A).union(B)} 162 | 163 | 164 | def build_dt_from_ensemble(decision_trees, data, class_label, tests, prior_entropy, prior_tests={}, min_nr_samples=1, 165 | calc_fracs_from_ensemble=False): 166 | """ 167 | Given an ensemble of decision trees, build a single decision tree using estimates from the ensemble 168 | 169 | **Params** 170 | ---------- 171 | - `decision_trees` (list of `decisiontree.DecisionTree` objects): the ensemble of decision trees to be merged 172 | 173 | - `data` (pandas DataFrame): the data frame with training data 174 | 175 | - `class_label` (string): the column identifier for the column with class labels in the data 176 | 177 | - `tests` (set of tuples): all possible tests (extracted from the ensemble) 178 | 179 | - `prior_entropy` (float): recursive parameter to calculate information gain 180 | 181 | - `prior_tests` (set of tuples): the tests that are already picked for our final decision tree 182 | 183 | - `min_nr_samples` (int): pre-prune condition, stop searching if number of samples is smaller or equal than threshold 184 | 185 | - `calc_fracs_from_ensemble` (boolean): if `True`, the different probabilities are calculated using the ensemble. Else, the data is used 186 | 187 | **Returns** 188 | ----------- 189 | a single decision tree, calculated using information from the ensemble 190 | """ 191 | # Pre-pruning conditions: 192 | # - if the length of data is <= min_nr_samples 193 | # - when we have no tests left 194 | # - when there is only 1 unique class in the data left 195 | # print len(data), len(tests), np.unique(data[class_label].values) 196 | if len(data) > min_nr_samples and len(tests) > 0 and len(np.unique(data[class_label].values)) > 1: 197 | max_ig = 0 198 | best_pos_data, best_neg_data, best_pos_entropy, best_neg_entropy = [None]*4 199 | best_dt = decisiontree.DecisionTree() 200 | # Find the test that results in the maximum information gain 201 | for test in tests: 202 | pos_avg_probs, neg_avg_probs, pos_fraction, neg_fraction = {}, {}, 0.0, 0.0 203 | for dt in decision_trees: 204 | pos_prob_dict = _calculate_prob_dict(dt, test[0], test[1], prior_tests, False) 205 | neg_prob_dict = _calculate_prob_dict(dt, test[0], test[1], prior_tests, True) 206 | 207 | if not any(math.isnan(x) for x in pos_prob_dict.values()) and not any(math.isnan(x) for x in neg_prob_dict.values()): 208 | pos_avg_probs = _add_reduce_by_key(pos_avg_probs, _calculate_prob_dict(dt, test[0], test[1], prior_tests, False)) 209 | neg_avg_probs = _add_reduce_by_key(neg_avg_probs, _calculate_prob_dict(dt, test[0], test[1], prior_tests, True)) 210 | 211 | if calc_fracs_from_ensemble and len(data) > 0: 212 | pos_fraction += float(len(dt.data[dt.data[test[0]] <= test[1]]))/len(dt.data) 213 | neg_fraction += float(len(dt.data[dt.data[test[0]] > test[1]]))/len(dt.data) 214 | 215 | for key in pos_avg_probs: 216 | pos_avg_probs[key] /= len(decision_trees) 217 | for key in neg_avg_probs: 218 | neg_avg_probs[key] /= len(decision_trees) 219 | 220 | if calc_fracs_from_ensemble: 221 | pos_fraction /= float(len(decision_trees)) 222 | neg_fraction /= float(len(decision_trees)) 223 | 224 | pos_entropy = _calculate_entropy(np.divide(list(pos_avg_probs.values()), len(decision_trees))) 225 | neg_entropy = _calculate_entropy(np.divide(list(neg_avg_probs.values()), len(decision_trees))) 226 | 227 | pos_data = data[data[test[0]] <= test[1]].copy() 228 | neg_data = data[data[test[0]] > test[1]].copy() 229 | 230 | if not calc_fracs_from_ensemble: 231 | pos_fraction = float(len(pos_data)) / float(len(data)) 232 | neg_fraction = float(len(neg_data)) / float(len(data)) 233 | 234 | weighted_entropy = pos_fraction * pos_entropy + neg_fraction * neg_entropy 235 | information_gain = prior_entropy - weighted_entropy 236 | 237 | if information_gain > max_ig and len(pos_data) > 0 and len(neg_data) > 0: 238 | max_ig, best_dt.label, best_dt.value = information_gain, test[0], test[1] 239 | best_pos_data, best_neg_data, best_pos_entropy, best_neg_entropy = pos_data, neg_data, pos_entropy, neg_entropy 240 | 241 | # print max_ig 242 | if max_ig == 0: # If we can't find a test that results in an information gain, we can pre-prune 243 | return decisiontree.DecisionTree(value=None, label=_get_most_occurring_class(data, class_label)) 244 | 245 | # Update some variables and do recursive calls 246 | left_prior_tests = prior_tests.copy() 247 | left_prior_tests.update({(best_dt.label, best_dt.value): True}) 248 | new_tests = tests.copy() 249 | new_tests.remove((best_dt.label, best_dt.value)) 250 | best_dt.left = build_dt_from_ensemble(decision_trees, best_pos_data, class_label, new_tests, 251 | best_pos_entropy, left_prior_tests, min_nr_samples) 252 | 253 | right_prior_tests = prior_tests.copy() 254 | right_prior_tests.update({(best_dt.label, best_dt.value): False}) 255 | best_dt.right = build_dt_from_ensemble(decision_trees, best_neg_data, class_label, new_tests, 256 | best_neg_entropy, right_prior_tests, min_nr_samples) 257 | 258 | return best_dt 259 | else: 260 | return decisiontree.DecisionTree(value=None, label=_get_most_occurring_class(data, class_label)) -------------------------------------------------------------------------------- /constructors/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains implementations for different classifiers: decision tree induction algorithms, ensemble techniques and 3 | GENESIM: GENetic Extraction of a Single, Interpretable Model 4 | 5 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent. 6 | """ -------------------------------------------------------------------------------- /constructors/ensemble.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains wrappers around well-known ensemble techniques: Random Forest and XGBoost. 3 | 4 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent. 5 | """ 6 | 7 | import time 8 | from bayes_opt import BayesianOptimization 9 | from sklearn.cross_validation import cross_val_score 10 | from sklearn.ensemble import AdaBoostClassifier 11 | from xgboost import XGBClassifier 12 | from sklearn.ensemble import RandomForestClassifier 13 | import numpy as np 14 | import decisiontree 15 | 16 | 17 | class EnsembleConstructor(object): 18 | """This class is an interface for all tree induction algorithms.""" 19 | 20 | def __init__(self): 21 | """In the init method, all hyper-parameters should be set.""" 22 | self.clf = None 23 | 24 | def get_name(self): 25 | """Get the name of the induction algorithm implemented.""" 26 | raise NotImplementedError("This method needs to be implemented") 27 | 28 | def construct_classifier(self, train, features, label_col): 29 | """Construct an ensemble classifier. 30 | 31 | **Params** 32 | ---------- 33 | - `train` (pandas DataFrame) - a `Dataframe` containing all the training data 34 | 35 | - `features` (pandas Series or list) - the names of the feature columns 36 | 37 | - `label_col` (string) - the name of the class label column 38 | 39 | **Returns** 40 | ----------- 41 | an ensemble classifier 42 | """ 43 | raise NotImplementedError("This method needs to be implemented") 44 | 45 | def evaluate_multiple(self, feature_vectors): 46 | """Evaluate multiple samples 47 | 48 | **Params** 49 | ---------- 50 | - `feature_vectors` (pandas DataFrame) - a `Dataframe` containing all the feature vectors 51 | 52 | **Returns** 53 | ----------- 54 | a list of predicted class labels 55 | 56 | """ 57 | return self.clf.predict(feature_vectors) 58 | 59 | 60 | class XGBClassification(EnsembleConstructor): 61 | 62 | def get_name(self): 63 | return 'XGBoost' 64 | 65 | def __init__(self): 66 | super(XGBClassification, self).__init__() 67 | self.nr_clf = 0 68 | self.time = 0 69 | 70 | def construct_classifier(self, train, features, label_col): 71 | data = train[features] 72 | target = train[label_col] 73 | 74 | def xgbcv(nr_classifiers, learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma, 75 | reg_lambda): 76 | nr_classifiers = int(nr_classifiers) 77 | max_depth = int(max_depth) 78 | min_child_weight = int(min_child_weight) 79 | return cross_val_score(XGBClassifier(learning_rate=learning_rate, n_estimators=nr_classifiers, 80 | gamma=gamma, subsample=subsample, colsample_bytree=colsample_bytree, 81 | nthread=1, scale_pos_weight=1, reg_lambda=reg_lambda, 82 | min_child_weight=min_child_weight, max_depth=max_depth), 83 | data, target, 'accuracy', cv=5).mean() 84 | 85 | params = { 86 | 'nr_classifiers': (50, 1000), 87 | 'learning_rate': (0.01, 0.3), 88 | 'max_depth': (5, 10), 89 | 'min_child_weight': (2, 10), 90 | 'subsample': (0.7, 0.8), 91 | 'colsample_bytree': (0.5, 0.99), 92 | 'gamma': (0.01, 1.), 93 | 'reg_lambda': (0, 1) 94 | } 95 | 96 | xgbBO = BayesianOptimization(xgbcv, params, verbose=0) 97 | xgbBO.maximize(init_points=10, n_iter=20, n_restarts_optimizer=50) 98 | # xgbBO.maximize(init_points=1, n_iter=1, n_restarts_optimizer=100) 99 | 100 | best_params = xgbBO.res['max']['max_params'] 101 | 102 | best_nr_classifiers = int(best_params['nr_classifiers']) 103 | self.nr_clf = best_nr_classifiers 104 | best_max_depth = int(best_params['max_depth']) 105 | best_min_child_weight = int(best_params['min_child_weight']) 106 | best_colsample_bytree = best_params['colsample_bytree'] 107 | best_subsample = best_params['subsample'] 108 | best_reg_lambda = best_params['reg_lambda'] 109 | best_learning_rate = best_params['learning_rate'] 110 | best_gamma = best_params['gamma'] 111 | 112 | print(best_nr_classifiers) 113 | 114 | self.clf = XGBClassifier(learning_rate=best_learning_rate, n_estimators=best_nr_classifiers, 115 | gamma=best_gamma, subsample=best_subsample, colsample_bytree=best_colsample_bytree, 116 | nthread=1, scale_pos_weight=1, reg_lambda=best_reg_lambda, 117 | min_child_weight=best_min_child_weight, max_depth=best_max_depth) 118 | start = time.time() 119 | self.clf.fit(data, target) 120 | self.time = time.time() - start 121 | 122 | return self 123 | 124 | def evaluate_multiple(self, feature_vectors): 125 | return self.clf.predict(feature_vectors) 126 | 127 | 128 | class RFClassification(EnsembleConstructor): 129 | 130 | def get_name(self): 131 | return 'RF' 132 | 133 | def __init__(self): 134 | super(RFClassification, self).__init__() 135 | self.nr_clf = 0 136 | self.time = 0 137 | 138 | def construct_classifier(self, train, features, label_col): 139 | data = train[features] 140 | target = train[label_col] 141 | 142 | def rfcv(nr_classifiers, max_depth, min_samples_leaf, bootstrap, criterion, max_features): 143 | nr_classifiers = int(nr_classifiers) 144 | max_depth = int(max_depth) 145 | min_samples_leaf = int(min_samples_leaf) 146 | if np.round(bootstrap): 147 | bootstrap = True 148 | else: 149 | bootstrap = False 150 | if np.round(criterion): 151 | criterion = 'gini' 152 | else: 153 | criterion = 'entropy' 154 | if np.round(max_features): 155 | max_features = None 156 | else: 157 | max_features = 1.0 158 | 159 | return cross_val_score(RandomForestClassifier(n_estimators=nr_classifiers, max_depth=max_depth, 160 | min_samples_leaf=min_samples_leaf, bootstrap=bootstrap, 161 | criterion=criterion, max_features=max_features), 162 | data, target, 'accuracy', cv=5).mean() 163 | 164 | params = { 165 | 'nr_classifiers': (10, 1000), 166 | 'max_depth': (5, 10), 167 | 'min_samples_leaf': (2, 10), 168 | 'bootstrap': (0, 1), 169 | 'criterion': (0, 1), 170 | 'max_features': (0, 1) 171 | } 172 | 173 | rfBO = BayesianOptimization(rfcv, params, verbose=0) 174 | rfBO.maximize(init_points=10, n_iter=20, n_restarts_optimizer=50) 175 | # rfBO.maximize(init_points=1, n_iter=1, n_restarts_optimizer=50) 176 | 177 | best_params = rfBO.res['max']['max_params'] 178 | 179 | best_nr_classifiers = int(best_params['nr_classifiers']) 180 | self.nr_clf = best_nr_classifiers 181 | best_max_depth = int(best_params['max_depth']) 182 | best_min_samples_leaf = int(best_params['min_samples_leaf']) 183 | best_bootstrap = best_params['bootstrap'] 184 | best_criterion = best_params['criterion'] 185 | best_max_features = best_params['max_features'] 186 | 187 | if np.round(best_bootstrap): 188 | best_bootstrap = True 189 | else: 190 | best_bootstrap = False 191 | if np.round(best_criterion): 192 | best_criterion = 'gini' 193 | else: 194 | best_criterion = 'entropy' 195 | if np.round(best_max_features): 196 | best_max_features = None 197 | else: 198 | best_max_features = 1.0 199 | 200 | self.clf = RandomForestClassifier(n_estimators=best_nr_classifiers, max_depth=best_max_depth, 201 | min_samples_leaf=best_min_samples_leaf, bootstrap=best_bootstrap, 202 | criterion=best_criterion, max_features=best_max_features) 203 | start = time.time() 204 | self.clf.fit(data, target) 205 | 206 | self.time = time.time() - start 207 | 208 | return self 209 | 210 | def evaluate_multiple(self, feature_vectors): 211 | return self.clf.predict(feature_vectors) 212 | 213 | 214 | def bootstrap(data, class_label, tree_constructors, bootstrap_features=False, nr_classifiers=3, boosting=True): 215 | """ 216 | Bootstrapping ensemble technique 217 | 218 | **Params** 219 | ---------- 220 | - `data` (DataFrame): containing all the data to be bootstrapped 221 | 222 | - `class_label` (string): the column in the dataframe that contains the target variables 223 | 224 | - `tree_constructors` (list): the induction algorithms (`constructors.treeconstructor.TreeConstructor`) used 225 | 226 | - `bootstrap_features` (boolean): if `True`, then apply bootstrapping to the features as well 227 | 228 | - `nr_classifiers` (int): for each `tree_constructor`, how many times must we bootstrap 229 | 230 | - `boosting` (boolean): if `True`, then do create models with AdaBoost too 231 | 232 | **Returns** 233 | ----------- 234 | a vector of fitted classifiers, converted to DecisionTree (`decisiontree.DecisionTree`) 235 | """ 236 | 237 | def _convert_to_tree(classifier, features): 238 | n_nodes = classifier.tree_.node_count 239 | children_left = classifier.tree_.children_left 240 | children_right = classifier.tree_.children_right 241 | feature = classifier.tree_.feature 242 | threshold = classifier.tree_.threshold 243 | classes = classifier.classes_ 244 | 245 | # The tree structure can be traversed to compute various properties such 246 | # as the depth of each node and whether or not it is a leaf. 247 | node_depth = np.zeros(shape=n_nodes) 248 | decision_trees = [None] * n_nodes 249 | for i in range(n_nodes): 250 | decision_trees[i] = decisiontree.DecisionTree() 251 | is_leaves = np.zeros(shape=n_nodes, dtype=bool) 252 | stack = [(0, -1)] # seed is the root node id and its parent depth 253 | while len(stack) > 0: 254 | node_id, parent_depth = stack.pop() 255 | node_depth[node_id] = parent_depth + 1 256 | 257 | # If we have a test node 258 | if children_left[node_id] != children_right[node_id]: 259 | stack.append((children_left[node_id], parent_depth + 1)) 260 | stack.append((children_right[node_id], parent_depth + 1)) 261 | else: 262 | is_leaves[node_id] = True 263 | 264 | for i in range(n_nodes): 265 | if children_left[i] > 0: 266 | decision_trees[i].left = decision_trees[children_left[i]] 267 | 268 | if children_right[i] > 0: 269 | decision_trees[i].right = decision_trees[children_right[i]] 270 | 271 | if is_leaves[i]: 272 | decision_trees[i].label = classes[np.argmax(classifier.tree_.value[i][0])] 273 | decision_trees[i].value = None 274 | else: 275 | decision_trees[i].label = features[feature[i]] 276 | decision_trees[i].value = threshold[i] 277 | return decision_trees[0] 278 | 279 | idx = np.random.randint(0, len(data), (nr_classifiers, len(data))) 280 | decision_trees = [] 281 | 282 | if boosting: 283 | ada = AdaBoostClassifier(base_estimator=None, n_estimators=nr_classifiers, learning_rate=0.25, random_state=1337) 284 | X_train = data.drop(class_label, axis=1).reset_index(drop=True) 285 | y_train = data[class_label].reset_index(drop=True) 286 | ada.fit(X_train, y_train) 287 | for estimator in ada.estimators_: 288 | dt = _convert_to_tree(estimator, X_train.columns) 289 | dt.data = data 290 | dt.populate_samples(X_train, y_train) 291 | decision_trees.append(dt) 292 | 293 | for indices in idx: 294 | if bootstrap_features: 295 | features = list(set(np.random.randint(0, len(data.columns), (1, len(data.columns))).tolist()[0])) 296 | X_bootstrap = data.iloc[indices, features].reset_index(drop=True) 297 | if class_label in X_bootstrap.columns: 298 | X_bootstrap = X_bootstrap.drop(class_label, axis=1) 299 | y_bootstrap = data.iloc[indices][class_label].reset_index(drop=True) 300 | else: 301 | X_bootstrap = data.iloc[indices, :].drop(class_label, axis=1).reset_index(drop=True) 302 | y_bootstrap = data.iloc[indices][class_label].reset_index(drop=True) 303 | 304 | X = data.drop(class_label, axis=1).reset_index(drop=True) 305 | y = data[class_label].reset_index(drop=True) 306 | train_bootstrap = X_bootstrap.copy() 307 | train_bootstrap[y_bootstrap.name] = y_bootstrap 308 | 309 | for tree_constructor in tree_constructors: 310 | tree = tree_constructor.construct_classifier(train_bootstrap, X_bootstrap.columns, y_bootstrap.name) 311 | # print 'Number of nodes in stub:', tree_constructor.get_name(), count_nodes(tree) 312 | # print tree_constructor.get_name(), tree.count_nodes() 313 | tree.data = data.iloc[indices, :].reset_index(drop=True) 314 | tree.populate_samples(X, y) 315 | decision_trees.append(tree) 316 | 317 | return decision_trees 318 | -------------------------------------------------------------------------------- /constructors/guide: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/predict-idlab/GENESIM/44925de91ae408fea30ea8ece3688f9e42d4f040/constructors/guide -------------------------------------------------------------------------------- /constructors/inTrees.py: -------------------------------------------------------------------------------- 1 | """ 2 | inTrees / STEL 3 | -------------- 4 | 5 | Merges different decision trees in an ensemble together in an ordered rule list 6 | 7 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent. 8 | 9 | Reference: 10 | Houtao Deng 11 | "Interpreting Tree Ensembles with inTrees" 12 | """ 13 | 14 | import sys 15 | import re 16 | 17 | import numpy as np 18 | import pandas as pd 19 | import rpy2 20 | from rpy2.robjects import pandas2ri 21 | pandas2ri.activate() 22 | import rpy2.robjects as ro 23 | 24 | from rpy2.robjects.packages import importr 25 | 26 | from constructors import ensemble 27 | 28 | sys.path.append('../') 29 | 30 | def convert_to_r_posixct(obj): 31 | """ 32 | Convert DatetimeIndex or np.datetime array to R POSIXct using 33 | m8[s] format. 34 | 35 | Parameters 36 | ---------- 37 | obj : source pandas object (one of [DatetimeIndex, np.datetime]) 38 | 39 | Returns 40 | ------- 41 | An R POSIXct vector (rpy2.robjects.vectors.POSIXct) 42 | 43 | """ 44 | import time 45 | from rpy2.rinterface import StrSexpVector 46 | 47 | # convert m8[ns] to m8[s] 48 | vals = robj.vectors.FloatSexpVector(obj.values.view('i8') / 1E9) 49 | as_posixct = robj.baseenv.get('as.POSIXct') 50 | origin = StrSexpVector([time.strftime("%Y-%m-%d", 51 | time.gmtime(0)), ]) 52 | 53 | # We will be sending ints as UTC 54 | tz = obj.tz.zone if hasattr( 55 | obj, 'tz') and hasattr(obj.tz, 'zone') else 'UTC' 56 | tz = StrSexpVector([tz]) 57 | utc_tz = StrSexpVector(['UTC']) 58 | 59 | posixct = as_posixct(vals, origin=origin, tz=utc_tz) 60 | posixct.do_slot_assign('tzone', tz) 61 | return posixct 62 | 63 | 64 | class Condition: 65 | """ 66 | Class which represents one part of the rule (which can be seen as a conjunction of conditions) 67 | """ 68 | def __init__(self, feature, test, value): 69 | self.feature = feature 70 | '''The feature on which the test is performed''' 71 | self.test = test 72 | '''What kind of test is done. Must be either `==`, `>` or `<=`''' 73 | self.value = value 74 | '''The threshold value''' 75 | 76 | def evaluate(self, feature_vector): 77 | """Create a prediction for a sample (using its feature vector) 78 | 79 | **Params** 80 | ---------- 81 | - `feature_vector` (pandas Series or dict) - the sample to evaluate, must be a `pandas Series` object or a 82 | `dict`. It is important that the attribute keys in the sample are the same as the labels occuring in the rules. 83 | 84 | **Returns** 85 | ----------- 86 | `True` if feature_vector[] , where is equal to `==`, `>` or `<=` 87 | """ 88 | if self.value is None: 89 | return True 90 | elif self.test == '==': 91 | return feature_vector[self.feature] == self.value 92 | elif self.test == '>': 93 | return feature_vector[self.feature] > self.value 94 | else: 95 | return feature_vector[self.feature] <= self.value 96 | 97 | 98 | class Rule: 99 | """ 100 | Class which represents a rule, which is a conjunction of conditions 101 | """ 102 | def __init__(self, index, conditions, prediction): 103 | self.index = index 104 | '''The index of this rule in a rule list (which is traversed sequentially until a match is found).''' 105 | self.rules = conditions 106 | '''A list of `constructors.inTrees.Condition`''' 107 | self.prediction = prediction 108 | '''This is returned when a sample fully complies to the rule (`True` for all conditions)''' 109 | 110 | def evaluate(self, feature_vector): 111 | """Create a prediction for a sample (using its feature vector) 112 | 113 | **Params** 114 | ---------- 115 | - `feature_vector` (pandas Series or dict) - the sample to evaluate, must be a `pandas Series` object or a 116 | `dict`. It is important that the attribute keys in the sample are the same as the labels occuring in the rules. 117 | 118 | **Returns** 119 | ----------- 120 | `True` if `True` for each condition in conditions 121 | """ 122 | for rule in self.rules: 123 | if not rule.evaluate(feature_vector): return False, -1 124 | return True, self.prediction 125 | 126 | 127 | class OrderedRuleList: 128 | """ 129 | Class which represents a list of rules. To make a prediction, the list is traversed and when a rule is found where 130 | the sample complies to, its prediction is returned. 131 | """ 132 | def __init__(self, rule_list): 133 | self.rule_list = rule_list 134 | '''A list of `constructors.inTrees.Rule`''' 135 | 136 | def _evaluate(self, feature_vector): 137 | for ruleset in sorted(self.rule_list, key=lambda x: x.index): # Sort to make sure they are evaluated in order 138 | rule_evaluation_result, rule_evaluation_pred = ruleset.evaluate(feature_vector) 139 | if rule_evaluation_result: return rule_evaluation_pred 140 | return None 141 | 142 | def print_rules(self): 143 | """Print the rules""" 144 | for rule_set in self.rule_list: 145 | print '*' + ' & '.join([str(rule.feature)+' '+str(rule.test)+' '+str(rule.value) for rule in rule_set.rules]), '==>', rule_set.prediction 146 | 147 | def evaluate_multiple(self, feature_vectors): 148 | """Wrapper method to evaluate multiple vectors at once (just a for loop where evaluate is called) 149 | 150 | **Params** 151 | ---------- 152 | - `feature_vectors` (pandas DataFrame or list of dicts) - the samples to evaluate 153 | 154 | **Returns** 155 | ----------- 156 | a class label 157 | """ 158 | results = [] 159 | 160 | for _index, feature_vector in feature_vectors.iterrows(): 161 | results.append(self._evaluate(feature_vector)) 162 | 163 | return np.asarray(results) 164 | 165 | 166 | class inTreesClassifier: 167 | 168 | def __init__(self): 169 | pass 170 | 171 | def _convert_to_r_dataframe(self, df, strings_as_factors=False): 172 | """ 173 | Convert a pandas DataFrame to a R data.frame. 174 | 175 | Parameters 176 | ---------- 177 | df: The DataFrame being converted 178 | strings_as_factors: Whether to turn strings into R factors (default: False) 179 | 180 | Returns 181 | ------- 182 | A R data.frame 183 | 184 | """ 185 | 186 | import rpy2.rlike.container as rlc 187 | 188 | columns = rlc.OrdDict() 189 | 190 | # FIXME: This doesn't handle MultiIndex 191 | 192 | for column in df: 193 | value = df[column] 194 | value_type = value.dtype.type 195 | 196 | if value_type == np.datetime64: 197 | value = convert_to_r_posixct(value) 198 | else: 199 | value = [item if pd.notnull(item) else rpy2.rinterface.NA_Integer#com.NA_TYPES[value_type] 200 | for item in value] 201 | 202 | value = rpy2.robjects.vectors.FloatVector(value)#com.VECTOR_TYPES[value_type](value) 203 | 204 | if not strings_as_factors: 205 | I = ro.baseenv.get("I") 206 | value = I(value) 207 | 208 | columns[column] = value 209 | 210 | r_dataframe = ro.DataFrame(columns) 211 | del columns 212 | 213 | r_dataframe.rownames = ro.StrVector(list(df.index)) 214 | r_dataframe.colnames = list(df.columns) 215 | 216 | return r_dataframe 217 | 218 | def _tree_to_R_object(self, tree, feature_mapping): 219 | node_mapping = {} 220 | nodes = tree._get_nodes() 221 | nodes.extend(tree._get_leaves()) 222 | for i, node in enumerate(nodes): 223 | node_mapping[node] = i+1 224 | vectors = [] 225 | for node in nodes: 226 | if node.value is not None: 227 | vectors.append([node_mapping[node], node_mapping[node.left], node_mapping[node.right], 228 | feature_mapping[node.label], node.value, 1, 0]) 229 | else: 230 | vectors.append([node_mapping[node], 0, 0, 0, 0.0, -1, node.label]) 231 | 232 | df = pd.DataFrame(vectors) 233 | df.columns = ['id', 'left daughter', 'right daughter', 'split var', 'split point', 'status', 'prediction'] 234 | df = df.set_index('id') 235 | df.index.name = None 236 | 237 | return self._convert_to_r_dataframe(df) 238 | 239 | def construct_rule_list(self, train_df, label_col, tree_constructors, nr_bootstraps=3): 240 | """ Construct an `constructors.inTrees.OrderedRuleList` from an ensemble of decision trees 241 | 242 | **Params** 243 | ---------- 244 | - `train_df` (pandas DataFrame) - the training data 245 | 246 | - `label_col` (string) - the column identifier for the class labels 247 | 248 | - `tree_constructors` (`constructors.treeconstructor.TreeConstructor`) - the decision tree induction algorithms used to create an ensemble with 249 | 250 | - `nr_bootstraps` (pandas DataFrame) - how many times do we apply bootstrapping for each TreeConstructor? The size of the ensemble will be equal to 251 | |tree_constructors|*nr_bootstraps 252 | 253 | **Returns** 254 | ----------- 255 | an OrderedRuleList 256 | """ 257 | y_train = train_df[label_col] 258 | X_train = train_df.copy() 259 | X_train = X_train.drop(label_col, axis=1) 260 | 261 | importr('randomForest') 262 | importr('inTrees') 263 | 264 | ro.globalenv["X"] = pandas2ri.py2ri(X_train) 265 | ro.globalenv["target"] = ro.FactorVector(y_train.values.tolist()) 266 | 267 | feature_mapping = {} 268 | feature_mapping_reverse = {} 269 | for i, feature in enumerate(X_train.columns): 270 | feature_mapping[feature] = i + 1 271 | feature_mapping_reverse[i + 1] = feature 272 | 273 | treeList = [] 274 | for tree in ensemble.bootstrap(train_df, label_col, tree_constructors, nr_classifiers=nr_bootstraps): 275 | if tree.count_nodes() > 1: treeList.append(self._tree_to_R_object(tree, feature_mapping)) 276 | 277 | ro.globalenv["treeList"] = ro.Vector([len(treeList), ro.Vector(treeList)]) 278 | ro.r('names(treeList) <- c("ntree", "list")') 279 | 280 | rules = ro.r('buildLearner(getRuleMetric(extractRules(treeList, X), X, target), X, target)') 281 | rules=list(rules) 282 | conditions=rules[int(0.6*len(rules)):int(0.8*len(rules))] 283 | predictions=rules[int(0.8*len(rules)):] 284 | 285 | # Create a OrderedRuleList 286 | rulesets = [] 287 | for idx, (condition, prediction) in enumerate(zip(conditions, predictions)): 288 | # Split each condition in Rules to form a RuleSet 289 | rulelist = [] 290 | condition_split = [x.lstrip().rstrip() for x in condition.split('&')] 291 | for rule in condition_split: 292 | feature = feature_mapping_reverse[int(re.findall(r',[0-9]+]', rule)[0][1:-1])] 293 | 294 | lte = re.findall(r'<=', rule) 295 | gt = re.findall(r'>', rule) 296 | eq = re.findall(r'==', rule) 297 | cond = lte[0] if len(lte) else (gt[0] if len(gt) else eq[0]) 298 | 299 | extract_value = re.findall(r'[=>]-?[0-9\.]+', rule) 300 | if len(extract_value): 301 | value = float(re.findall(r'[=>]-?[0-9\.]+', rule)[0][1:]) 302 | else: 303 | feature = 'True' 304 | value = None 305 | 306 | rulelist.append(Condition(feature, cond, value)) 307 | rulesets.append(Rule(idx, rulelist, prediction)) 308 | 309 | return OrderedRuleList(rulesets) -------------------------------------------------------------------------------- /constructors/quest: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/predict-idlab/GENESIM/44925de91ae408fea30ea8ece3688f9e42d4f040/constructors/quest -------------------------------------------------------------------------------- /constructors/treeconstructor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains wrappers around well-known decision tree induction algorithms: C4.5, CART, QUEST and GUIDE. 3 | 4 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent. 5 | """ 6 | 7 | import pandas as pd 8 | import numpy as np 9 | from sklearn.cross_validation import StratifiedKFold 10 | from sklearn.metrics import accuracy_score 11 | from sklearn.tree import DecisionTreeClassifier 12 | 13 | import Orange 14 | import operator 15 | import os 16 | import time 17 | import subprocess 18 | 19 | import decisiontree 20 | 21 | 22 | class TreeConstructor(object): 23 | """This class is an interface for all tree induction algorithms.""" 24 | 25 | def __init__(self): 26 | """In the init method, all hyper-parameters should be set.""" 27 | pass 28 | 29 | def get_name(self): 30 | """Get the name of the induction algorithm implemented.""" 31 | raise NotImplementedError("This method needs to be implemented") 32 | 33 | def construct_classifier(self, train, features, label_col): 34 | """Construct a `decisiontree.DecisionTree` object from the given training data 35 | 36 | **Params** 37 | ---------- 38 | - `train` (pandas DataFrame) - a `Dataframe` containing all the training data 39 | 40 | - `features` (list) - the names of the feature columns 41 | 42 | - `label_col` (string) - the name of the class label column 43 | 44 | **Returns** 45 | ----------- 46 | a DecisionTree object 47 | """ 48 | raise NotImplementedError("This method needs to be implemented") 49 | 50 | 51 | # 52 | def _series2descriptor(d, discrete=False): 53 | if d.dtype is np.dtype("float"): 54 | return Orange.feature.Continuous(str(d.name)) 55 | elif d.dtype is np.dtype("int"): 56 | return Orange.feature.Continuous(str(d.name), number_of_decimals=0) 57 | else: 58 | t = d.unique() 59 | if discrete or len(t) < len(d) / 2: 60 | t.sort() 61 | return Orange.feature.Discrete(str(d.name), values=list(t.astype("str"))) 62 | else: 63 | return Orange.feature.String(str(d.name)) 64 | 65 | 66 | def _df2domain(df): 67 | featurelist = [_series2descriptor(df.iloc[:, col]) for col in xrange(len(df.columns))] 68 | return Orange.data.Domain(featurelist) 69 | 70 | 71 | def _df2table(df): 72 | # It seems they are using native python object/lists internally for Orange.data types (?) 73 | # And I didn't find a constructor suitable for pandas.DataFrame since it may carry 74 | # multiple dtypes 75 | # --> the best approximate is Orange.data.Table.__init__(domain, numpy.ndarray), 76 | # --> but the dtype of numpy array can only be "int" and "float" 77 | # --> * refer to src/orange/lib_kernel.cpp 3059: 78 | # --> * if (((*vi)->varType != TValue::INTVAR) && ((*vi)->varType != TValue::FLOATVAR)) 79 | # --> Documents never mentioned >_< 80 | # So we use numpy constructor for those int/float columns, python list constructor for other 81 | 82 | tdomain = _df2domain(df) 83 | ttables = [_series2table(df.iloc[:, i], tdomain[i]) for i in xrange(len(df.columns))] 84 | return Orange.data.Table(ttables) 85 | 86 | 87 | def _series2table(series, variable): 88 | if series.dtype is np.dtype("int") or series.dtype is np.dtype("float"): 89 | # Use numpy 90 | # Table._init__(Domain, numpy.ndarray) 91 | return Orange.data.Table(Orange.data.Domain(variable), series.values[:, np.newaxis]) 92 | else: 93 | # Build instance list 94 | # Table.__init__(Domain, list_of_instances) 95 | tdomain = Orange.data.Domain(variable) 96 | tinsts = [Orange.data.Instance(tdomain, [i]) for i in series] 97 | return Orange.data.Table(tdomain, tinsts) 98 | # 5x performance 99 | 100 | 101 | def _column2df(col): 102 | if type(col.domain[0]) is Orange.feature.Continuous: 103 | return (col.domain[0].name, pd.Series(col.to_numpy()[0].flatten())) 104 | else: 105 | tmp = pd.Series(np.array(list(col)).flatten()) # type(tmp) -> np.array( dtype=list (Orange.data.Value) ) 106 | tmp = tmp.apply(lambda x: str(x[0])) 107 | return (col.domain[0].name, tmp) 108 | 109 | 110 | def _table2df(tab): 111 | # Orange.data.Table().to_numpy() cannot handle strings 112 | # So we must build the array column by column, 113 | # When it comes to strings, python list is used 114 | series = [_column2df(tab.select(i)) for i in xrange(len(tab.domain))] 115 | series_name = [i[0] for i in series] # To keep the order of variables unchanged 116 | series_data = dict(series) 117 | return pd.DataFrame(series_data, columns=series_name) 118 | 119 | # 120 | 121 | 122 | class C45Constructor(TreeConstructor): 123 | """This class contains an implementation of C4.5, written by Quinlan. It uses an extern library 124 | for this called [Orange](http://docs.orange.biolab.si/2/reference/rst/Orange.classification.tree.html#Orange.classification.tree.C45Learner).""" 125 | 126 | def __init__(self, gain_ratio=False, cf=0.15): 127 | super(C45Constructor, self).__init__() 128 | self.gain_ratio = gain_ratio 129 | '''boolean value that indicates if either gain ratio or information gain is used as split metric''' 130 | self.cf = cf 131 | '''pruning confidence level: the lower this value, the more pruning will be done''' 132 | 133 | def get_name(self): 134 | return "C4.5" 135 | 136 | def construct_classifier(self, train, features, label_col, param_opt=True): 137 | training_feature_vectors = train[features].copy() 138 | labels = train[label_col].copy() 139 | if param_opt: 140 | optimal_clf = C45Constructor.get_best_c45_classifier(train, label_col, 141 | StratifiedKFold(train[label_col], n_folds=3, 142 | shuffle=True, random_state=None)) 143 | self.cf = optimal_clf.cf 144 | 145 | # First call df2table on the feature table 146 | orange_feature_table = _df2table(training_feature_vectors) 147 | 148 | # Convert classes to strings and call df2table 149 | orange_labels_table = _df2table(pd.DataFrame(labels.map(str))) 150 | 151 | # Merge two tables 152 | orange_table = Orange.data.Table([orange_feature_table, orange_labels_table]) 153 | 154 | return self._orange_dt_to_my_dt(Orange.classification.tree.C45Learner(orange_table, gain_ratio=self.gain_ratio, 155 | cf=self.cf, min_objs=2, subset=False).tree) 156 | 157 | def _orange_dt_to_my_dt(self, orange_dt_root): 158 | # Check if leaf 159 | if orange_dt_root.node_type == Orange.classification.tree.C45Node.Leaf: 160 | return decisiontree.DecisionTree(left=None, right=None, label=str(int(orange_dt_root.leaf)), data=None, value=None) 161 | else: 162 | dt = decisiontree.DecisionTree(label=orange_dt_root.tested.name, data=None, value=orange_dt_root.cut) 163 | dt.left = self._orange_dt_to_my_dt(orange_dt_root.branch[0]) 164 | dt.right = self._orange_dt_to_my_dt(orange_dt_root.branch[1]) 165 | return dt 166 | 167 | @staticmethod 168 | def get_best_c45_classifier(train, label_col, skf_tune): 169 | """Returns a `treeconstructor.C45Constructor` with optimized hyper-parameters using 170 | [Grid Search](https://en.wikipedia.org/wiki/Hyperparameter_optimization#Grid_search) 171 | 172 | **Params** 173 | ---------- 174 | - `train` (pandas DataFrame) - `a pandas Dataframe` with all training data 175 | 176 | - `label_col` (string) - the column identifier for the label in the `train` Dataframe 177 | 178 | - `skf_tune` (`sklearn.cross_validation.StratifiedKFold`) - cross-validation object to tune parameters 179 | 180 | **Returns** 181 | ----------- 182 | a C45Constructor with optimized hyper-parameters 183 | """ 184 | c45 = C45Constructor() 185 | cfs = np.arange(0.05, 1.05, 0.05) 186 | cfs_errors = {} 187 | for cf in cfs: cfs_errors[cf] = [] 188 | 189 | for train_tune_idx, val_tune_idx in skf_tune: 190 | train_tune = train.iloc[train_tune_idx, :] 191 | X_train_tune = train_tune.drop(label_col, axis=1) 192 | y_train_tune = train_tune[label_col] 193 | val_tune = train.iloc[val_tune_idx, :] 194 | X_val_tune = val_tune.drop(label_col, axis=1) 195 | y_val_tune = val_tune[label_col] 196 | for cf in cfs: 197 | c45.cf = cf 198 | tree = c45.construct_classifier(train_tune, X_train_tune.columns, label_col, param_opt=False) 199 | predictions = tree.evaluate_multiple(X_val_tune).astype(int) 200 | cfs_errors[cf].append(1 - accuracy_score(predictions, y_val_tune, normalize=True)) 201 | 202 | for cf in cfs: 203 | cfs_errors[cf] = np.mean(cfs_errors[cf]) 204 | 205 | c45.cf = min(cfs_errors.items(), key=operator.itemgetter(1))[0] 206 | return c45 207 | 208 | 209 | class CARTConstructor(TreeConstructor): 210 | """This class contains an implementation of CART, written by Breiman. It uses an extern library 211 | for this called [sklearn](http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html).""" 212 | 213 | def __init__(self, criterion='gini', min_samples_leaf=1, min_samples_split=2, max_depth=10): 214 | super(CARTConstructor, self).__init__() 215 | self.min_samples_leaf = min_samples_leaf 216 | '''pre-prune condition: when the current number of samples is lower than this threshold, then stop''' 217 | self.min_samples_split = min_samples_split 218 | '''pre-prune condition: when a split causes the number of samples in one of the two partitions to be lower 219 | than this threshold, then stop''' 220 | self.max_depth = max_depth 221 | '''pre-prune condition: when a depth equal to this parameter is reached, then stop''' 222 | self.criterion = criterion 223 | '''defines which split criterion to use, is either equal to `gini` or `entropy`''' 224 | 225 | def get_name(self): 226 | return "CART" 227 | 228 | def construct_classifier(self, train, features, label_col, param_opt=True): 229 | training_feature_vectors = train[features] 230 | labels = train[label_col] 231 | train = training_feature_vectors.copy() 232 | label_col = labels.name 233 | train[label_col] = labels 234 | if param_opt: 235 | optimal_clf = CARTConstructor.get_best_cart_classifier(train, label_col, 236 | StratifiedKFold(train[label_col], n_folds=3, 237 | shuffle=True, random_state=None)) 238 | self.max_depth = optimal_clf.max_depth 239 | self.min_samples_split = optimal_clf.min_samples_split 240 | 241 | self.features = list(training_feature_vectors.columns) 242 | 243 | self.y = labels.values 244 | self.X = training_feature_vectors[self.features] 245 | 246 | 247 | self.dt = DecisionTreeClassifier(criterion=self.criterion, min_samples_leaf=self.min_samples_leaf, 248 | min_samples_split=self.min_samples_split, max_depth=self.max_depth) 249 | self.dt.fit(self.X, self.y) 250 | 251 | return self._convert_to_tree() 252 | 253 | def _convert_to_tree(self): 254 | """Convert a sklearn object to a `decisiontree.decisiontree` object""" 255 | n_nodes = self.dt.tree_.node_count 256 | children_left = self.dt.tree_.children_left 257 | children_right = self.dt.tree_.children_right 258 | feature = self.dt.tree_.feature 259 | threshold = self.dt.tree_.threshold 260 | classes = self.dt.classes_ 261 | 262 | # The tree structure can be traversed to compute various properties such 263 | # as the depth of each node and whether or not it is a leaf. 264 | node_depth = np.zeros(shape=n_nodes) 265 | decision_trees = [None] * n_nodes 266 | for i in range(n_nodes): 267 | decision_trees[i] = decisiontree.DecisionTree() 268 | is_leaves = np.zeros(shape=n_nodes, dtype=bool) 269 | stack = [(0, -1)] # seed is the root node id and its parent depth 270 | while len(stack) > 0: 271 | node_id, parent_depth = stack.pop() 272 | node_depth[node_id] = parent_depth + 1 273 | 274 | # If we have a test node 275 | if children_left[node_id] != children_right[node_id]: 276 | stack.append((children_left[node_id], parent_depth + 1)) 277 | stack.append((children_right[node_id], parent_depth + 1)) 278 | else: 279 | is_leaves[node_id] = True 280 | 281 | for i in range(n_nodes): 282 | 283 | if children_left[i] > 0: 284 | decision_trees[i].left = decision_trees[children_left[i]] 285 | 286 | if children_right[i] > 0: 287 | decision_trees[i].right = decision_trees[children_right[i]] 288 | 289 | if is_leaves[i]: 290 | decision_trees[i].label = self.dt.classes_[np.argmax(self.dt.tree_.value[i][0])] 291 | decision_trees[i].value = None 292 | else: 293 | decision_trees[i].label = self.features[feature[i]] 294 | decision_trees[i].value = threshold[i] 295 | 296 | return decision_trees[0] 297 | 298 | @staticmethod 299 | def get_best_cart_classifier(train, label_col, skf_tune): 300 | """Returns a `treeconstructor.CARTConstructor` with optimized hyper-parameters using 301 | [Grid Search](https://en.wikipedia.org/wiki/Hyperparameter_optimization#Grid_search) 302 | 303 | **Params** 304 | ---------- 305 | - `train` (pandas DataFrame) - `a pandas Dataframe` with all training data 306 | 307 | - `label_col` (string) - the column identifier for the label in the `train` Dataframe 308 | 309 | - `skf_tune` (`sklearn.cross_validation.StratifiedKFold`) - cross-validation object to tune parameters 310 | 311 | **Returns** 312 | ----------- 313 | a CARTConstructor with optimized hyper-parameters 314 | """ 315 | cart = CARTConstructor() 316 | max_depths = np.arange(1,21,2) 317 | max_depths = np.append(max_depths, None) 318 | min_samples_splits = np.arange(2,20,1) 319 | 320 | errors = {} 321 | for max_depth in max_depths: 322 | for min_samples_split in min_samples_splits: 323 | errors[(max_depth, min_samples_split)] = [] 324 | 325 | for train_tune_idx, val_tune_idx in skf_tune: 326 | train_tune = train.iloc[train_tune_idx, :] 327 | X_train_tune = train_tune.drop(label_col, axis=1) 328 | y_train_tune = train_tune[label_col] 329 | val_tune = train.iloc[val_tune_idx, :] 330 | X_val_tune = val_tune.drop(label_col, axis=1) 331 | y_val_tune = val_tune[label_col] 332 | for max_depth in max_depths: 333 | for min_samples_split in min_samples_splits: 334 | cart.max_depth = max_depth 335 | cart.min_samples_split = min_samples_split 336 | tree = cart.construct_classifier(train_tune, X_train_tune.columns, label_col, param_opt=False) 337 | predictions = tree.evaluate_multiple(X_val_tune).astype(int) 338 | errors[((max_depth, min_samples_split))].append(1 - accuracy_score(predictions, y_val_tune, normalize=True)) 339 | 340 | 341 | for max_depth in max_depths: 342 | for min_samples_split in min_samples_splits: 343 | errors[(max_depth, min_samples_split)] = np.mean(errors[(max_depth, min_samples_split)]) 344 | 345 | best_params = min(errors.items(), key=operator.itemgetter(1))[0] 346 | cart.max_depth = best_params[0] 347 | cart.min_samples_split = best_params[1] 348 | 349 | return cart 350 | 351 | 352 | class QUESTConstructor(TreeConstructor): 353 | """This class contains a wrapper around an implementation of [QUEST](http://www.stat.wisc.edu/~loh/quest.html), 354 | written by Loh.""" 355 | 356 | def __init__(self): 357 | super(QUESTConstructor, self).__init__() 358 | 359 | def get_name(self): 360 | return "QUEST" 361 | 362 | def construct_classifier(self, train, features, label_col): 363 | training_feature_vectors = train[features] 364 | labels = train[label_col] 365 | self._create_desc_and_data_file(training_feature_vectors, labels) 366 | input = open("in.txt", "w") 367 | output = file('out.txt', 'w') 368 | p = subprocess.Popen(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1])+'/quest > log.txt', stdin=subprocess.PIPE, shell=True) 369 | p.stdin.write("2\n") 370 | p.stdin.write("in.txt\n") 371 | p.stdin.write("1\n") 372 | p.stdin.write("out.txt\n") 373 | p.stdin.write("1\n") 374 | p.stdin.write("dsc.txt\n") 375 | p.stdin.write("1\n") 376 | p.stdin.write("\n") 377 | p.wait() 378 | input.close() 379 | output.close() 380 | 381 | while not os.path.exists('in.txt'): 382 | time.sleep(1) 383 | p = subprocess.Popen(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1])+'/quest < in.txt > log.txt', stdin=subprocess.PIPE, shell=True) 384 | p.wait() 385 | 386 | output = file('out.txt', 'r') 387 | lines = output.readlines() 388 | output.close() 389 | 390 | start_index, end_index, counter = 0, 0, 0 391 | for line in lines: 392 | if line == ' Classification tree:\n': 393 | start_index = counter+2 394 | if line == ' Information for each node:\n': 395 | end_index = counter-1 396 | counter += 1 397 | tree = self._decision_tree_from_text(lines[start_index:end_index]) 398 | 399 | self._remove_files() 400 | 401 | return tree 402 | 403 | def _decision_tree_from_text(self, lines): 404 | dt = decisiontree.DecisionTree() 405 | 406 | if '<=' in lines[0] or '>' in lines[0]: 407 | # Intermediate node 408 | node_name = lines[0].split(':')[0].lstrip() 409 | label, value = lines[0].split(':')[1].split('<=') 410 | label = ' '.join(label.lstrip().rstrip().split('.')) 411 | value = value.lstrip().split()[0] 412 | dt.label = label 413 | dt.value = float(value) 414 | dt.left = self._decision_tree_from_text(lines[1:]) 415 | counter = 1 416 | while lines[counter].split(':')[0].lstrip() != node_name: counter+=1 417 | dt.right = self._decision_tree_from_text(lines[counter + 1:]) 418 | else: 419 | # Terminal node 420 | dt.label = int(eval(lines[0].split(':')[1].lstrip())) 421 | 422 | return dt 423 | 424 | def _create_desc_and_data_file(self, training_feature_vectors, labels): 425 | dsc = open("dsc.txt", "w") 426 | data = open("data.txt", "w") 427 | 428 | dsc.write("data.txt\n") 429 | dsc.write("\"?\"\n") 430 | dsc.write("column, var, type\n") 431 | count = 1 432 | for col in training_feature_vectors.columns: 433 | dsc.write(str(count) + ' \"' + str(col) + '\" n\n') 434 | count += 1 435 | dsc.write(str(count) + ' ' + str(labels.name) + ' d') 436 | 437 | for i in range(len(training_feature_vectors)): 438 | sample = training_feature_vectors.iloc[i,:] 439 | for col in training_feature_vectors.columns: 440 | data.write(str(sample[col]) + ' ') 441 | if i != len(training_feature_vectors)-1: 442 | data.write(str(labels[i])+'\n') 443 | else: 444 | data.write(str(labels[i])) 445 | 446 | data.close() 447 | dsc.close() 448 | 449 | def _remove_files(self): 450 | os.remove('data.txt') 451 | os.remove('in.txt') 452 | os.remove('dsc.txt') 453 | os.remove('out.txt') 454 | os.remove('log.txt') 455 | 456 | 457 | class GUIDEConstructor(TreeConstructor): 458 | """This class contains a wrapper around an implementation of [GUIDE](http://www.stat.wisc.edu/~loh/guide.html), 459 | written by Loh.""" 460 | 461 | def __init__(self): 462 | super(GUIDEConstructor, self).__init__() 463 | 464 | def get_name(self): 465 | return "GUIDE" 466 | 467 | def construct_classifier(self, train, features, label_col): 468 | training_feature_vectors = train[features] 469 | labels = train[label_col] 470 | self._create_desc_and_data_file(training_feature_vectors, labels) 471 | input = open("in.txt", "w") 472 | output = file('out.txt', 'w') 473 | p = subprocess.Popen(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1])+'/guide > log.txt', stdin=subprocess.PIPE, shell=True) 474 | p.stdin.write("1\n") 475 | p.stdin.write("in.txt\n") 476 | p.stdin.write("1\n") 477 | p.stdin.write("1\n") 478 | p.stdin.write("out.txt\n") 479 | p.stdin.write("1\n") 480 | p.stdin.write("1\n") 481 | p.stdin.write("1\n") 482 | p.stdin.write("2\n") 483 | p.stdin.write("1\n") 484 | p.stdin.write("3\n") 485 | p.stdin.write("1\n") 486 | p.stdin.write('dsc.txt\n') 487 | p.stdin.write("\n") 488 | p.stdin.write("\n") 489 | p.stdin.write("\n") 490 | p.stdin.write("1\n") 491 | p.stdin.write("1\n") 492 | p.stdin.write("\n") 493 | p.stdin.write("\n") 494 | p.stdin.write("\n") 495 | p.stdin.write("2\n") 496 | p.stdin.write("1\n") 497 | p.stdin.write("1\n") 498 | p.stdin.write("1\n") 499 | p.stdin.write("1\n") 500 | p.stdin.write("\n") 501 | p.wait() 502 | input.close() 503 | output.close() 504 | 505 | while not os.path.exists('in.txt'): 506 | time.sleep(1) 507 | p = subprocess.Popen(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1])+'/guide < in.txt > log.txt', shell=True) 508 | p.wait() 509 | 510 | output = file('out.txt', 'r') 511 | lines = output.readlines() 512 | output.close() 513 | 514 | start_index, end_index, counter = 0, 0, 0 515 | for line in lines: 516 | if line == ' Classification tree:\n': 517 | start_index = counter+2 518 | if line == ' ***************************************************************\n': 519 | end_index = counter-1 520 | counter += 1 521 | tree = self._decision_tree_from_text(lines[start_index:end_index]) 522 | 523 | # self.remove_files() 524 | 525 | # tree.visualise('GUIDE') 526 | return tree 527 | 528 | def _decision_tree_from_text(self, lines): 529 | 530 | dt = decisiontree.DecisionTree() 531 | 532 | if '<=' in lines[0] or '>' in lines[0] or '=' in lines[0]: 533 | # Intermediate node 534 | node_name = lines[0].split(':')[0].lstrip() 535 | # print(lines[0]) 536 | label, value = lines[0].split(':')[1].split('<=') 537 | label = ' '.join(label.lstrip().rstrip().split('.')) 538 | value = value.lstrip().split()[0] 539 | dt.label = label 540 | dt.value = float(value) 541 | dt.left = self._decision_tree_from_text(lines[1:]) 542 | counter = 1 543 | while lines[counter].split(':')[0].lstrip() != node_name: counter+=1 544 | dt.right = self._decision_tree_from_text(lines[counter + 1:]) 545 | else: 546 | # Terminal node 547 | # print lines[0] 548 | dt.label = int(lines[0].split(':')[1].lstrip().split('.')[0]) 549 | 550 | return dt 551 | 552 | def _create_desc_and_data_file(self, training_feature_vectors, labels): 553 | dsc = open("dsc.txt", "w") 554 | data = open("data.txt", "w") 555 | dsc.write("data.txt\n") 556 | dsc.write("\"?\"\n") 557 | dsc.write("1\n") 558 | count = 1 559 | for col in training_feature_vectors.columns: 560 | dsc.write(str(count) + ' \"' + str(col) + '\" n\n') 561 | count += 1 562 | dsc.write(str(count) + ' ' + str(labels.name) + ' d') 563 | 564 | for i in range(len(training_feature_vectors)): 565 | sample = training_feature_vectors.iloc[i,:] 566 | for col in training_feature_vectors.columns: 567 | data.write(str(sample[col]) + ' ') 568 | if i != len(training_feature_vectors)-1: 569 | data.write(str(labels[i])+'\n') 570 | else: 571 | data.write(str(labels[i])) 572 | 573 | data.close() 574 | dsc.close() 575 | 576 | def _remove_files(self): 577 | os.remove('data.txt') 578 | os.remove('in.txt') 579 | os.remove('dsc.txt') 580 | os.remove('out.txt') 581 | os.remove('log.txt') -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains the data files and two python files that are responsible for loading them in easily. In `data.load_datasets`, 3 | a load function for each dataset must be written. In `data.load_all_datasets` python introspection is used to easily 4 | load in all datasets with a load function. 5 | 6 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent. 7 | """ -------------------------------------------------------------------------------- /data/breast-cancer-wisconsin.data: -------------------------------------------------------------------------------- 1 | 1000025,5,1,1,1,2,1,3,1,1,2 2 | 1002945,5,4,4,5,7,10,3,2,1,2 3 | 1015425,3,1,1,1,2,2,3,1,1,2 4 | 1016277,6,8,8,1,3,4,3,7,1,2 5 | 1017023,4,1,1,3,2,1,3,1,1,2 6 | 1017122,8,10,10,8,7,10,9,7,1,4 7 | 1018099,1,1,1,1,2,10,3,1,1,2 8 | 1018561,2,1,2,1,2,1,3,1,1,2 9 | 1033078,2,1,1,1,2,1,1,1,5,2 10 | 1033078,4,2,1,1,2,1,2,1,1,2 11 | 1035283,1,1,1,1,1,1,3,1,1,2 12 | 1036172,2,1,1,1,2,1,2,1,1,2 13 | 1041801,5,3,3,3,2,3,4,4,1,4 14 | 1043999,1,1,1,1,2,3,3,1,1,2 15 | 1044572,8,7,5,10,7,9,5,5,4,4 16 | 1047630,7,4,6,4,6,1,4,3,1,4 17 | 1048672,4,1,1,1,2,1,2,1,1,2 18 | 1049815,4,1,1,1,2,1,3,1,1,2 19 | 1050670,10,7,7,6,4,10,4,1,2,4 20 | 1050718,6,1,1,1,2,1,3,1,1,2 21 | 1054590,7,3,2,10,5,10,5,4,4,4 22 | 1054593,10,5,5,3,6,7,7,10,1,4 23 | 1056784,3,1,1,1,2,1,2,1,1,2 24 | 1057013,8,4,5,1,2,?,7,3,1,4 25 | 1059552,1,1,1,1,2,1,3,1,1,2 26 | 1065726,5,2,3,4,2,7,3,6,1,4 27 | 1066373,3,2,1,1,1,1,2,1,1,2 28 | 1066979,5,1,1,1,2,1,2,1,1,2 29 | 1067444,2,1,1,1,2,1,2,1,1,2 30 | 1070935,1,1,3,1,2,1,1,1,1,2 31 | 1070935,3,1,1,1,1,1,2,1,1,2 32 | 1071760,2,1,1,1,2,1,3,1,1,2 33 | 1072179,10,7,7,3,8,5,7,4,3,4 34 | 1074610,2,1,1,2,2,1,3,1,1,2 35 | 1075123,3,1,2,1,2,1,2,1,1,2 36 | 1079304,2,1,1,1,2,1,2,1,1,2 37 | 1080185,10,10,10,8,6,1,8,9,1,4 38 | 1081791,6,2,1,1,1,1,7,1,1,2 39 | 1084584,5,4,4,9,2,10,5,6,1,4 40 | 1091262,2,5,3,3,6,7,7,5,1,4 41 | 1096800,6,6,6,9,6,?,7,8,1,2 42 | 1099510,10,4,3,1,3,3,6,5,2,4 43 | 1100524,6,10,10,2,8,10,7,3,3,4 44 | 1102573,5,6,5,6,10,1,3,1,1,4 45 | 1103608,10,10,10,4,8,1,8,10,1,4 46 | 1103722,1,1,1,1,2,1,2,1,2,2 47 | 1105257,3,7,7,4,4,9,4,8,1,4 48 | 1105524,1,1,1,1,2,1,2,1,1,2 49 | 1106095,4,1,1,3,2,1,3,1,1,2 50 | 1106829,7,8,7,2,4,8,3,8,2,4 51 | 1108370,9,5,8,1,2,3,2,1,5,4 52 | 1108449,5,3,3,4,2,4,3,4,1,4 53 | 1110102,10,3,6,2,3,5,4,10,2,4 54 | 1110503,5,5,5,8,10,8,7,3,7,4 55 | 1110524,10,5,5,6,8,8,7,1,1,4 56 | 1111249,10,6,6,3,4,5,3,6,1,4 57 | 1112209,8,10,10,1,3,6,3,9,1,4 58 | 1113038,8,2,4,1,5,1,5,4,4,4 59 | 1113483,5,2,3,1,6,10,5,1,1,4 60 | 1113906,9,5,5,2,2,2,5,1,1,4 61 | 1115282,5,3,5,5,3,3,4,10,1,4 62 | 1115293,1,1,1,1,2,2,2,1,1,2 63 | 1116116,9,10,10,1,10,8,3,3,1,4 64 | 1116132,6,3,4,1,5,2,3,9,1,4 65 | 1116192,1,1,1,1,2,1,2,1,1,2 66 | 1116998,10,4,2,1,3,2,4,3,10,4 67 | 1117152,4,1,1,1,2,1,3,1,1,2 68 | 1118039,5,3,4,1,8,10,4,9,1,4 69 | 1120559,8,3,8,3,4,9,8,9,8,4 70 | 1121732,1,1,1,1,2,1,3,2,1,2 71 | 1121919,5,1,3,1,2,1,2,1,1,2 72 | 1123061,6,10,2,8,10,2,7,8,10,4 73 | 1124651,1,3,3,2,2,1,7,2,1,2 74 | 1125035,9,4,5,10,6,10,4,8,1,4 75 | 1126417,10,6,4,1,3,4,3,2,3,4 76 | 1131294,1,1,2,1,2,2,4,2,1,2 77 | 1132347,1,1,4,1,2,1,2,1,1,2 78 | 1133041,5,3,1,2,2,1,2,1,1,2 79 | 1133136,3,1,1,1,2,3,3,1,1,2 80 | 1136142,2,1,1,1,3,1,2,1,1,2 81 | 1137156,2,2,2,1,1,1,7,1,1,2 82 | 1143978,4,1,1,2,2,1,2,1,1,2 83 | 1143978,5,2,1,1,2,1,3,1,1,2 84 | 1147044,3,1,1,1,2,2,7,1,1,2 85 | 1147699,3,5,7,8,8,9,7,10,7,4 86 | 1147748,5,10,6,1,10,4,4,10,10,4 87 | 1148278,3,3,6,4,5,8,4,4,1,4 88 | 1148873,3,6,6,6,5,10,6,8,3,4 89 | 1152331,4,1,1,1,2,1,3,1,1,2 90 | 1155546,2,1,1,2,3,1,2,1,1,2 91 | 1156272,1,1,1,1,2,1,3,1,1,2 92 | 1156948,3,1,1,2,2,1,1,1,1,2 93 | 1157734,4,1,1,1,2,1,3,1,1,2 94 | 1158247,1,1,1,1,2,1,2,1,1,2 95 | 1160476,2,1,1,1,2,1,3,1,1,2 96 | 1164066,1,1,1,1,2,1,3,1,1,2 97 | 1165297,2,1,1,2,2,1,1,1,1,2 98 | 1165790,5,1,1,1,2,1,3,1,1,2 99 | 1165926,9,6,9,2,10,6,2,9,10,4 100 | 1166630,7,5,6,10,5,10,7,9,4,4 101 | 1166654,10,3,5,1,10,5,3,10,2,4 102 | 1167439,2,3,4,4,2,5,2,5,1,4 103 | 1167471,4,1,2,1,2,1,3,1,1,2 104 | 1168359,8,2,3,1,6,3,7,1,1,4 105 | 1168736,10,10,10,10,10,1,8,8,8,4 106 | 1169049,7,3,4,4,3,3,3,2,7,4 107 | 1170419,10,10,10,8,2,10,4,1,1,4 108 | 1170420,1,6,8,10,8,10,5,7,1,4 109 | 1171710,1,1,1,1,2,1,2,3,1,2 110 | 1171710,6,5,4,4,3,9,7,8,3,4 111 | 1171795,1,3,1,2,2,2,5,3,2,2 112 | 1171845,8,6,4,3,5,9,3,1,1,4 113 | 1172152,10,3,3,10,2,10,7,3,3,4 114 | 1173216,10,10,10,3,10,8,8,1,1,4 115 | 1173235,3,3,2,1,2,3,3,1,1,2 116 | 1173347,1,1,1,1,2,5,1,1,1,2 117 | 1173347,8,3,3,1,2,2,3,2,1,2 118 | 1173509,4,5,5,10,4,10,7,5,8,4 119 | 1173514,1,1,1,1,4,3,1,1,1,2 120 | 1173681,3,2,1,1,2,2,3,1,1,2 121 | 1174057,1,1,2,2,2,1,3,1,1,2 122 | 1174057,4,2,1,1,2,2,3,1,1,2 123 | 1174131,10,10,10,2,10,10,5,3,3,4 124 | 1174428,5,3,5,1,8,10,5,3,1,4 125 | 1175937,5,4,6,7,9,7,8,10,1,4 126 | 1176406,1,1,1,1,2,1,2,1,1,2 127 | 1176881,7,5,3,7,4,10,7,5,5,4 128 | 1177027,3,1,1,1,2,1,3,1,1,2 129 | 1177399,8,3,5,4,5,10,1,6,2,4 130 | 1177512,1,1,1,1,10,1,1,1,1,2 131 | 1178580,5,1,3,1,2,1,2,1,1,2 132 | 1179818,2,1,1,1,2,1,3,1,1,2 133 | 1180194,5,10,8,10,8,10,3,6,3,4 134 | 1180523,3,1,1,1,2,1,2,2,1,2 135 | 1180831,3,1,1,1,3,1,2,1,1,2 136 | 1181356,5,1,1,1,2,2,3,3,1,2 137 | 1182404,4,1,1,1,2,1,2,1,1,2 138 | 1182410,3,1,1,1,2,1,1,1,1,2 139 | 1183240,4,1,2,1,2,1,2,1,1,2 140 | 1183246,1,1,1,1,1,?,2,1,1,2 141 | 1183516,3,1,1,1,2,1,1,1,1,2 142 | 1183911,2,1,1,1,2,1,1,1,1,2 143 | 1183983,9,5,5,4,4,5,4,3,3,4 144 | 1184184,1,1,1,1,2,5,1,1,1,2 145 | 1184241,2,1,1,1,2,1,2,1,1,2 146 | 1184840,1,1,3,1,2,?,2,1,1,2 147 | 1185609,3,4,5,2,6,8,4,1,1,4 148 | 1185610,1,1,1,1,3,2,2,1,1,2 149 | 1187457,3,1,1,3,8,1,5,8,1,2 150 | 1187805,8,8,7,4,10,10,7,8,7,4 151 | 1188472,1,1,1,1,1,1,3,1,1,2 152 | 1189266,7,2,4,1,6,10,5,4,3,4 153 | 1189286,10,10,8,6,4,5,8,10,1,4 154 | 1190394,4,1,1,1,2,3,1,1,1,2 155 | 1190485,1,1,1,1,2,1,1,1,1,2 156 | 1192325,5,5,5,6,3,10,3,1,1,4 157 | 1193091,1,2,2,1,2,1,2,1,1,2 158 | 1193210,2,1,1,1,2,1,3,1,1,2 159 | 1193683,1,1,2,1,3,?,1,1,1,2 160 | 1196295,9,9,10,3,6,10,7,10,6,4 161 | 1196915,10,7,7,4,5,10,5,7,2,4 162 | 1197080,4,1,1,1,2,1,3,2,1,2 163 | 1197270,3,1,1,1,2,1,3,1,1,2 164 | 1197440,1,1,1,2,1,3,1,1,7,2 165 | 1197510,5,1,1,1,2,?,3,1,1,2 166 | 1197979,4,1,1,1,2,2,3,2,1,2 167 | 1197993,5,6,7,8,8,10,3,10,3,4 168 | 1198128,10,8,10,10,6,1,3,1,10,4 169 | 1198641,3,1,1,1,2,1,3,1,1,2 170 | 1199219,1,1,1,2,1,1,1,1,1,2 171 | 1199731,3,1,1,1,2,1,1,1,1,2 172 | 1199983,1,1,1,1,2,1,3,1,1,2 173 | 1200772,1,1,1,1,2,1,2,1,1,2 174 | 1200847,6,10,10,10,8,10,10,10,7,4 175 | 1200892,8,6,5,4,3,10,6,1,1,4 176 | 1200952,5,8,7,7,10,10,5,7,1,4 177 | 1201834,2,1,1,1,2,1,3,1,1,2 178 | 1201936,5,10,10,3,8,1,5,10,3,4 179 | 1202125,4,1,1,1,2,1,3,1,1,2 180 | 1202812,5,3,3,3,6,10,3,1,1,4 181 | 1203096,1,1,1,1,1,1,3,1,1,2 182 | 1204242,1,1,1,1,2,1,1,1,1,2 183 | 1204898,6,1,1,1,2,1,3,1,1,2 184 | 1205138,5,8,8,8,5,10,7,8,1,4 185 | 1205579,8,7,6,4,4,10,5,1,1,4 186 | 1206089,2,1,1,1,1,1,3,1,1,2 187 | 1206695,1,5,8,6,5,8,7,10,1,4 188 | 1206841,10,5,6,10,6,10,7,7,10,4 189 | 1207986,5,8,4,10,5,8,9,10,1,4 190 | 1208301,1,2,3,1,2,1,3,1,1,2 191 | 1210963,10,10,10,8,6,8,7,10,1,4 192 | 1211202,7,5,10,10,10,10,4,10,3,4 193 | 1212232,5,1,1,1,2,1,2,1,1,2 194 | 1212251,1,1,1,1,2,1,3,1,1,2 195 | 1212422,3,1,1,1,2,1,3,1,1,2 196 | 1212422,4,1,1,1,2,1,3,1,1,2 197 | 1213375,8,4,4,5,4,7,7,8,2,2 198 | 1213383,5,1,1,4,2,1,3,1,1,2 199 | 1214092,1,1,1,1,2,1,1,1,1,2 200 | 1214556,3,1,1,1,2,1,2,1,1,2 201 | 1214966,9,7,7,5,5,10,7,8,3,4 202 | 1216694,10,8,8,4,10,10,8,1,1,4 203 | 1216947,1,1,1,1,2,1,3,1,1,2 204 | 1217051,5,1,1,1,2,1,3,1,1,2 205 | 1217264,1,1,1,1,2,1,3,1,1,2 206 | 1218105,5,10,10,9,6,10,7,10,5,4 207 | 1218741,10,10,9,3,7,5,3,5,1,4 208 | 1218860,1,1,1,1,1,1,3,1,1,2 209 | 1218860,1,1,1,1,1,1,3,1,1,2 210 | 1219406,5,1,1,1,1,1,3,1,1,2 211 | 1219525,8,10,10,10,5,10,8,10,6,4 212 | 1219859,8,10,8,8,4,8,7,7,1,4 213 | 1220330,1,1,1,1,2,1,3,1,1,2 214 | 1221863,10,10,10,10,7,10,7,10,4,4 215 | 1222047,10,10,10,10,3,10,10,6,1,4 216 | 1222936,8,7,8,7,5,5,5,10,2,4 217 | 1223282,1,1,1,1,2,1,2,1,1,2 218 | 1223426,1,1,1,1,2,1,3,1,1,2 219 | 1223793,6,10,7,7,6,4,8,10,2,4 220 | 1223967,6,1,3,1,2,1,3,1,1,2 221 | 1224329,1,1,1,2,2,1,3,1,1,2 222 | 1225799,10,6,4,3,10,10,9,10,1,4 223 | 1226012,4,1,1,3,1,5,2,1,1,4 224 | 1226612,7,5,6,3,3,8,7,4,1,4 225 | 1227210,10,5,5,6,3,10,7,9,2,4 226 | 1227244,1,1,1,1,2,1,2,1,1,2 227 | 1227481,10,5,7,4,4,10,8,9,1,4 228 | 1228152,8,9,9,5,3,5,7,7,1,4 229 | 1228311,1,1,1,1,1,1,3,1,1,2 230 | 1230175,10,10,10,3,10,10,9,10,1,4 231 | 1230688,7,4,7,4,3,7,7,6,1,4 232 | 1231387,6,8,7,5,6,8,8,9,2,4 233 | 1231706,8,4,6,3,3,1,4,3,1,2 234 | 1232225,10,4,5,5,5,10,4,1,1,4 235 | 1236043,3,3,2,1,3,1,3,6,1,2 236 | 1241232,3,1,4,1,2,?,3,1,1,2 237 | 1241559,10,8,8,2,8,10,4,8,10,4 238 | 1241679,9,8,8,5,6,2,4,10,4,4 239 | 1242364,8,10,10,8,6,9,3,10,10,4 240 | 1243256,10,4,3,2,3,10,5,3,2,4 241 | 1270479,5,1,3,3,2,2,2,3,1,2 242 | 1276091,3,1,1,3,1,1,3,1,1,2 243 | 1277018,2,1,1,1,2,1,3,1,1,2 244 | 128059,1,1,1,1,2,5,5,1,1,2 245 | 1285531,1,1,1,1,2,1,3,1,1,2 246 | 1287775,5,1,1,2,2,2,3,1,1,2 247 | 144888,8,10,10,8,5,10,7,8,1,4 248 | 145447,8,4,4,1,2,9,3,3,1,4 249 | 167528,4,1,1,1,2,1,3,6,1,2 250 | 169356,3,1,1,1,2,?,3,1,1,2 251 | 183913,1,2,2,1,2,1,1,1,1,2 252 | 191250,10,4,4,10,2,10,5,3,3,4 253 | 1017023,6,3,3,5,3,10,3,5,3,2 254 | 1100524,6,10,10,2,8,10,7,3,3,4 255 | 1116116,9,10,10,1,10,8,3,3,1,4 256 | 1168736,5,6,6,2,4,10,3,6,1,4 257 | 1182404,3,1,1,1,2,1,1,1,1,2 258 | 1182404,3,1,1,1,2,1,2,1,1,2 259 | 1198641,3,1,1,1,2,1,3,1,1,2 260 | 242970,5,7,7,1,5,8,3,4,1,2 261 | 255644,10,5,8,10,3,10,5,1,3,4 262 | 263538,5,10,10,6,10,10,10,6,5,4 263 | 274137,8,8,9,4,5,10,7,8,1,4 264 | 303213,10,4,4,10,6,10,5,5,1,4 265 | 314428,7,9,4,10,10,3,5,3,3,4 266 | 1182404,5,1,4,1,2,1,3,2,1,2 267 | 1198641,10,10,6,3,3,10,4,3,2,4 268 | 320675,3,3,5,2,3,10,7,1,1,4 269 | 324427,10,8,8,2,3,4,8,7,8,4 270 | 385103,1,1,1,1,2,1,3,1,1,2 271 | 390840,8,4,7,1,3,10,3,9,2,4 272 | 411453,5,1,1,1,2,1,3,1,1,2 273 | 320675,3,3,5,2,3,10,7,1,1,4 274 | 428903,7,2,4,1,3,4,3,3,1,4 275 | 431495,3,1,1,1,2,1,3,2,1,2 276 | 432809,3,1,3,1,2,?,2,1,1,2 277 | 434518,3,1,1,1,2,1,2,1,1,2 278 | 452264,1,1,1,1,2,1,2,1,1,2 279 | 456282,1,1,1,1,2,1,3,1,1,2 280 | 476903,10,5,7,3,3,7,3,3,8,4 281 | 486283,3,1,1,1,2,1,3,1,1,2 282 | 486662,2,1,1,2,2,1,3,1,1,2 283 | 488173,1,4,3,10,4,10,5,6,1,4 284 | 492268,10,4,6,1,2,10,5,3,1,4 285 | 508234,7,4,5,10,2,10,3,8,2,4 286 | 527363,8,10,10,10,8,10,10,7,3,4 287 | 529329,10,10,10,10,10,10,4,10,10,4 288 | 535331,3,1,1,1,3,1,2,1,1,2 289 | 543558,6,1,3,1,4,5,5,10,1,4 290 | 555977,5,6,6,8,6,10,4,10,4,4 291 | 560680,1,1,1,1,2,1,1,1,1,2 292 | 561477,1,1,1,1,2,1,3,1,1,2 293 | 563649,8,8,8,1,2,?,6,10,1,4 294 | 601265,10,4,4,6,2,10,2,3,1,4 295 | 606140,1,1,1,1,2,?,2,1,1,2 296 | 606722,5,5,7,8,6,10,7,4,1,4 297 | 616240,5,3,4,3,4,5,4,7,1,2 298 | 61634,5,4,3,1,2,?,2,3,1,2 299 | 625201,8,2,1,1,5,1,1,1,1,2 300 | 63375,9,1,2,6,4,10,7,7,2,4 301 | 635844,8,4,10,5,4,4,7,10,1,4 302 | 636130,1,1,1,1,2,1,3,1,1,2 303 | 640744,10,10,10,7,9,10,7,10,10,4 304 | 646904,1,1,1,1,2,1,3,1,1,2 305 | 653777,8,3,4,9,3,10,3,3,1,4 306 | 659642,10,8,4,4,4,10,3,10,4,4 307 | 666090,1,1,1,1,2,1,3,1,1,2 308 | 666942,1,1,1,1,2,1,3,1,1,2 309 | 667204,7,8,7,6,4,3,8,8,4,4 310 | 673637,3,1,1,1,2,5,5,1,1,2 311 | 684955,2,1,1,1,3,1,2,1,1,2 312 | 688033,1,1,1,1,2,1,1,1,1,2 313 | 691628,8,6,4,10,10,1,3,5,1,4 314 | 693702,1,1,1,1,2,1,1,1,1,2 315 | 704097,1,1,1,1,1,1,2,1,1,2 316 | 704168,4,6,5,6,7,?,4,9,1,2 317 | 706426,5,5,5,2,5,10,4,3,1,4 318 | 709287,6,8,7,8,6,8,8,9,1,4 319 | 718641,1,1,1,1,5,1,3,1,1,2 320 | 721482,4,4,4,4,6,5,7,3,1,2 321 | 730881,7,6,3,2,5,10,7,4,6,4 322 | 733639,3,1,1,1,2,?,3,1,1,2 323 | 733639,3,1,1,1,2,1,3,1,1,2 324 | 733823,5,4,6,10,2,10,4,1,1,4 325 | 740492,1,1,1,1,2,1,3,1,1,2 326 | 743348,3,2,2,1,2,1,2,3,1,2 327 | 752904,10,1,1,1,2,10,5,4,1,4 328 | 756136,1,1,1,1,2,1,2,1,1,2 329 | 760001,8,10,3,2,6,4,3,10,1,4 330 | 760239,10,4,6,4,5,10,7,1,1,4 331 | 76389,10,4,7,2,2,8,6,1,1,4 332 | 764974,5,1,1,1,2,1,3,1,2,2 333 | 770066,5,2,2,2,2,1,2,2,1,2 334 | 785208,5,4,6,6,4,10,4,3,1,4 335 | 785615,8,6,7,3,3,10,3,4,2,4 336 | 792744,1,1,1,1,2,1,1,1,1,2 337 | 797327,6,5,5,8,4,10,3,4,1,4 338 | 798429,1,1,1,1,2,1,3,1,1,2 339 | 704097,1,1,1,1,1,1,2,1,1,2 340 | 806423,8,5,5,5,2,10,4,3,1,4 341 | 809912,10,3,3,1,2,10,7,6,1,4 342 | 810104,1,1,1,1,2,1,3,1,1,2 343 | 814265,2,1,1,1,2,1,1,1,1,2 344 | 814911,1,1,1,1,2,1,1,1,1,2 345 | 822829,7,6,4,8,10,10,9,5,3,4 346 | 826923,1,1,1,1,2,1,1,1,1,2 347 | 830690,5,2,2,2,3,1,1,3,1,2 348 | 831268,1,1,1,1,1,1,1,3,1,2 349 | 832226,3,4,4,10,5,1,3,3,1,4 350 | 832567,4,2,3,5,3,8,7,6,1,4 351 | 836433,5,1,1,3,2,1,1,1,1,2 352 | 837082,2,1,1,1,2,1,3,1,1,2 353 | 846832,3,4,5,3,7,3,4,6,1,2 354 | 850831,2,7,10,10,7,10,4,9,4,4 355 | 855524,1,1,1,1,2,1,2,1,1,2 356 | 857774,4,1,1,1,3,1,2,2,1,2 357 | 859164,5,3,3,1,3,3,3,3,3,4 358 | 859350,8,10,10,7,10,10,7,3,8,4 359 | 866325,8,10,5,3,8,4,4,10,3,4 360 | 873549,10,3,5,4,3,7,3,5,3,4 361 | 877291,6,10,10,10,10,10,8,10,10,4 362 | 877943,3,10,3,10,6,10,5,1,4,4 363 | 888169,3,2,2,1,4,3,2,1,1,2 364 | 888523,4,4,4,2,2,3,2,1,1,2 365 | 896404,2,1,1,1,2,1,3,1,1,2 366 | 897172,2,1,1,1,2,1,2,1,1,2 367 | 95719,6,10,10,10,8,10,7,10,7,4 368 | 160296,5,8,8,10,5,10,8,10,3,4 369 | 342245,1,1,3,1,2,1,1,1,1,2 370 | 428598,1,1,3,1,1,1,2,1,1,2 371 | 492561,4,3,2,1,3,1,2,1,1,2 372 | 493452,1,1,3,1,2,1,1,1,1,2 373 | 493452,4,1,2,1,2,1,2,1,1,2 374 | 521441,5,1,1,2,2,1,2,1,1,2 375 | 560680,3,1,2,1,2,1,2,1,1,2 376 | 636437,1,1,1,1,2,1,1,1,1,2 377 | 640712,1,1,1,1,2,1,2,1,1,2 378 | 654244,1,1,1,1,1,1,2,1,1,2 379 | 657753,3,1,1,4,3,1,2,2,1,2 380 | 685977,5,3,4,1,4,1,3,1,1,2 381 | 805448,1,1,1,1,2,1,1,1,1,2 382 | 846423,10,6,3,6,4,10,7,8,4,4 383 | 1002504,3,2,2,2,2,1,3,2,1,2 384 | 1022257,2,1,1,1,2,1,1,1,1,2 385 | 1026122,2,1,1,1,2,1,1,1,1,2 386 | 1071084,3,3,2,2,3,1,1,2,3,2 387 | 1080233,7,6,6,3,2,10,7,1,1,4 388 | 1114570,5,3,3,2,3,1,3,1,1,2 389 | 1114570,2,1,1,1,2,1,2,2,1,2 390 | 1116715,5,1,1,1,3,2,2,2,1,2 391 | 1131411,1,1,1,2,2,1,2,1,1,2 392 | 1151734,10,8,7,4,3,10,7,9,1,4 393 | 1156017,3,1,1,1,2,1,2,1,1,2 394 | 1158247,1,1,1,1,1,1,1,1,1,2 395 | 1158405,1,2,3,1,2,1,2,1,1,2 396 | 1168278,3,1,1,1,2,1,2,1,1,2 397 | 1176187,3,1,1,1,2,1,3,1,1,2 398 | 1196263,4,1,1,1,2,1,1,1,1,2 399 | 1196475,3,2,1,1,2,1,2,2,1,2 400 | 1206314,1,2,3,1,2,1,1,1,1,2 401 | 1211265,3,10,8,7,6,9,9,3,8,4 402 | 1213784,3,1,1,1,2,1,1,1,1,2 403 | 1223003,5,3,3,1,2,1,2,1,1,2 404 | 1223306,3,1,1,1,2,4,1,1,1,2 405 | 1223543,1,2,1,3,2,1,1,2,1,2 406 | 1229929,1,1,1,1,2,1,2,1,1,2 407 | 1231853,4,2,2,1,2,1,2,1,1,2 408 | 1234554,1,1,1,1,2,1,2,1,1,2 409 | 1236837,2,3,2,2,2,2,3,1,1,2 410 | 1237674,3,1,2,1,2,1,2,1,1,2 411 | 1238021,1,1,1,1,2,1,2,1,1,2 412 | 1238464,1,1,1,1,1,?,2,1,1,2 413 | 1238633,10,10,10,6,8,4,8,5,1,4 414 | 1238915,5,1,2,1,2,1,3,1,1,2 415 | 1238948,8,5,6,2,3,10,6,6,1,4 416 | 1239232,3,3,2,6,3,3,3,5,1,2 417 | 1239347,8,7,8,5,10,10,7,2,1,4 418 | 1239967,1,1,1,1,2,1,2,1,1,2 419 | 1240337,5,2,2,2,2,2,3,2,2,2 420 | 1253505,2,3,1,1,5,1,1,1,1,2 421 | 1255384,3,2,2,3,2,3,3,1,1,2 422 | 1257200,10,10,10,7,10,10,8,2,1,4 423 | 1257648,4,3,3,1,2,1,3,3,1,2 424 | 1257815,5,1,3,1,2,1,2,1,1,2 425 | 1257938,3,1,1,1,2,1,1,1,1,2 426 | 1258549,9,10,10,10,10,10,10,10,1,4 427 | 1258556,5,3,6,1,2,1,1,1,1,2 428 | 1266154,8,7,8,2,4,2,5,10,1,4 429 | 1272039,1,1,1,1,2,1,2,1,1,2 430 | 1276091,2,1,1,1,2,1,2,1,1,2 431 | 1276091,1,3,1,1,2,1,2,2,1,2 432 | 1276091,5,1,1,3,4,1,3,2,1,2 433 | 1277629,5,1,1,1,2,1,2,2,1,2 434 | 1293439,3,2,2,3,2,1,1,1,1,2 435 | 1293439,6,9,7,5,5,8,4,2,1,2 436 | 1294562,10,8,10,1,3,10,5,1,1,4 437 | 1295186,10,10,10,1,6,1,2,8,1,4 438 | 527337,4,1,1,1,2,1,1,1,1,2 439 | 558538,4,1,3,3,2,1,1,1,1,2 440 | 566509,5,1,1,1,2,1,1,1,1,2 441 | 608157,10,4,3,10,4,10,10,1,1,4 442 | 677910,5,2,2,4,2,4,1,1,1,2 443 | 734111,1,1,1,3,2,3,1,1,1,2 444 | 734111,1,1,1,1,2,2,1,1,1,2 445 | 780555,5,1,1,6,3,1,2,1,1,2 446 | 827627,2,1,1,1,2,1,1,1,1,2 447 | 1049837,1,1,1,1,2,1,1,1,1,2 448 | 1058849,5,1,1,1,2,1,1,1,1,2 449 | 1182404,1,1,1,1,1,1,1,1,1,2 450 | 1193544,5,7,9,8,6,10,8,10,1,4 451 | 1201870,4,1,1,3,1,1,2,1,1,2 452 | 1202253,5,1,1,1,2,1,1,1,1,2 453 | 1227081,3,1,1,3,2,1,1,1,1,2 454 | 1230994,4,5,5,8,6,10,10,7,1,4 455 | 1238410,2,3,1,1,3,1,1,1,1,2 456 | 1246562,10,2,2,1,2,6,1,1,2,4 457 | 1257470,10,6,5,8,5,10,8,6,1,4 458 | 1259008,8,8,9,6,6,3,10,10,1,4 459 | 1266124,5,1,2,1,2,1,1,1,1,2 460 | 1267898,5,1,3,1,2,1,1,1,1,2 461 | 1268313,5,1,1,3,2,1,1,1,1,2 462 | 1268804,3,1,1,1,2,5,1,1,1,2 463 | 1276091,6,1,1,3,2,1,1,1,1,2 464 | 1280258,4,1,1,1,2,1,1,2,1,2 465 | 1293966,4,1,1,1,2,1,1,1,1,2 466 | 1296572,10,9,8,7,6,4,7,10,3,4 467 | 1298416,10,6,6,2,4,10,9,7,1,4 468 | 1299596,6,6,6,5,4,10,7,6,2,4 469 | 1105524,4,1,1,1,2,1,1,1,1,2 470 | 1181685,1,1,2,1,2,1,2,1,1,2 471 | 1211594,3,1,1,1,1,1,2,1,1,2 472 | 1238777,6,1,1,3,2,1,1,1,1,2 473 | 1257608,6,1,1,1,1,1,1,1,1,2 474 | 1269574,4,1,1,1,2,1,1,1,1,2 475 | 1277145,5,1,1,1,2,1,1,1,1,2 476 | 1287282,3,1,1,1,2,1,1,1,1,2 477 | 1296025,4,1,2,1,2,1,1,1,1,2 478 | 1296263,4,1,1,1,2,1,1,1,1,2 479 | 1296593,5,2,1,1,2,1,1,1,1,2 480 | 1299161,4,8,7,10,4,10,7,5,1,4 481 | 1301945,5,1,1,1,1,1,1,1,1,2 482 | 1302428,5,3,2,4,2,1,1,1,1,2 483 | 1318169,9,10,10,10,10,5,10,10,10,4 484 | 474162,8,7,8,5,5,10,9,10,1,4 485 | 787451,5,1,2,1,2,1,1,1,1,2 486 | 1002025,1,1,1,3,1,3,1,1,1,2 487 | 1070522,3,1,1,1,1,1,2,1,1,2 488 | 1073960,10,10,10,10,6,10,8,1,5,4 489 | 1076352,3,6,4,10,3,3,3,4,1,4 490 | 1084139,6,3,2,1,3,4,4,1,1,4 491 | 1115293,1,1,1,1,2,1,1,1,1,2 492 | 1119189,5,8,9,4,3,10,7,1,1,4 493 | 1133991,4,1,1,1,1,1,2,1,1,2 494 | 1142706,5,10,10,10,6,10,6,5,2,4 495 | 1155967,5,1,2,10,4,5,2,1,1,2 496 | 1170945,3,1,1,1,1,1,2,1,1,2 497 | 1181567,1,1,1,1,1,1,1,1,1,2 498 | 1182404,4,2,1,1,2,1,1,1,1,2 499 | 1204558,4,1,1,1,2,1,2,1,1,2 500 | 1217952,4,1,1,1,2,1,2,1,1,2 501 | 1224565,6,1,1,1,2,1,3,1,1,2 502 | 1238186,4,1,1,1,2,1,2,1,1,2 503 | 1253917,4,1,1,2,2,1,2,1,1,2 504 | 1265899,4,1,1,1,2,1,3,1,1,2 505 | 1268766,1,1,1,1,2,1,1,1,1,2 506 | 1277268,3,3,1,1,2,1,1,1,1,2 507 | 1286943,8,10,10,10,7,5,4,8,7,4 508 | 1295508,1,1,1,1,2,4,1,1,1,2 509 | 1297327,5,1,1,1,2,1,1,1,1,2 510 | 1297522,2,1,1,1,2,1,1,1,1,2 511 | 1298360,1,1,1,1,2,1,1,1,1,2 512 | 1299924,5,1,1,1,2,1,2,1,1,2 513 | 1299994,5,1,1,1,2,1,1,1,1,2 514 | 1304595,3,1,1,1,1,1,2,1,1,2 515 | 1306282,6,6,7,10,3,10,8,10,2,4 516 | 1313325,4,10,4,7,3,10,9,10,1,4 517 | 1320077,1,1,1,1,1,1,1,1,1,2 518 | 1320077,1,1,1,1,1,1,2,1,1,2 519 | 1320304,3,1,2,2,2,1,1,1,1,2 520 | 1330439,4,7,8,3,4,10,9,1,1,4 521 | 333093,1,1,1,1,3,1,1,1,1,2 522 | 369565,4,1,1,1,3,1,1,1,1,2 523 | 412300,10,4,5,4,3,5,7,3,1,4 524 | 672113,7,5,6,10,4,10,5,3,1,4 525 | 749653,3,1,1,1,2,1,2,1,1,2 526 | 769612,3,1,1,2,2,1,1,1,1,2 527 | 769612,4,1,1,1,2,1,1,1,1,2 528 | 798429,4,1,1,1,2,1,3,1,1,2 529 | 807657,6,1,3,2,2,1,1,1,1,2 530 | 8233704,4,1,1,1,1,1,2,1,1,2 531 | 837480,7,4,4,3,4,10,6,9,1,4 532 | 867392,4,2,2,1,2,1,2,1,1,2 533 | 869828,1,1,1,1,1,1,3,1,1,2 534 | 1043068,3,1,1,1,2,1,2,1,1,2 535 | 1056171,2,1,1,1,2,1,2,1,1,2 536 | 1061990,1,1,3,2,2,1,3,1,1,2 537 | 1113061,5,1,1,1,2,1,3,1,1,2 538 | 1116192,5,1,2,1,2,1,3,1,1,2 539 | 1135090,4,1,1,1,2,1,2,1,1,2 540 | 1145420,6,1,1,1,2,1,2,1,1,2 541 | 1158157,5,1,1,1,2,2,2,1,1,2 542 | 1171578,3,1,1,1,2,1,1,1,1,2 543 | 1174841,5,3,1,1,2,1,1,1,1,2 544 | 1184586,4,1,1,1,2,1,2,1,1,2 545 | 1186936,2,1,3,2,2,1,2,1,1,2 546 | 1197527,5,1,1,1,2,1,2,1,1,2 547 | 1222464,6,10,10,10,4,10,7,10,1,4 548 | 1240603,2,1,1,1,1,1,1,1,1,2 549 | 1240603,3,1,1,1,1,1,1,1,1,2 550 | 1241035,7,8,3,7,4,5,7,8,2,4 551 | 1287971,3,1,1,1,2,1,2,1,1,2 552 | 1289391,1,1,1,1,2,1,3,1,1,2 553 | 1299924,3,2,2,2,2,1,4,2,1,2 554 | 1306339,4,4,2,1,2,5,2,1,2,2 555 | 1313658,3,1,1,1,2,1,1,1,1,2 556 | 1313982,4,3,1,1,2,1,4,8,1,2 557 | 1321264,5,2,2,2,1,1,2,1,1,2 558 | 1321321,5,1,1,3,2,1,1,1,1,2 559 | 1321348,2,1,1,1,2,1,2,1,1,2 560 | 1321931,5,1,1,1,2,1,2,1,1,2 561 | 1321942,5,1,1,1,2,1,3,1,1,2 562 | 1321942,5,1,1,1,2,1,3,1,1,2 563 | 1328331,1,1,1,1,2,1,3,1,1,2 564 | 1328755,3,1,1,1,2,1,2,1,1,2 565 | 1331405,4,1,1,1,2,1,3,2,1,2 566 | 1331412,5,7,10,10,5,10,10,10,1,4 567 | 1333104,3,1,2,1,2,1,3,1,1,2 568 | 1334071,4,1,1,1,2,3,2,1,1,2 569 | 1343068,8,4,4,1,6,10,2,5,2,4 570 | 1343374,10,10,8,10,6,5,10,3,1,4 571 | 1344121,8,10,4,4,8,10,8,2,1,4 572 | 142932,7,6,10,5,3,10,9,10,2,4 573 | 183936,3,1,1,1,2,1,2,1,1,2 574 | 324382,1,1,1,1,2,1,2,1,1,2 575 | 378275,10,9,7,3,4,2,7,7,1,4 576 | 385103,5,1,2,1,2,1,3,1,1,2 577 | 690557,5,1,1,1,2,1,2,1,1,2 578 | 695091,1,1,1,1,2,1,2,1,1,2 579 | 695219,1,1,1,1,2,1,2,1,1,2 580 | 824249,1,1,1,1,2,1,3,1,1,2 581 | 871549,5,1,2,1,2,1,2,1,1,2 582 | 878358,5,7,10,6,5,10,7,5,1,4 583 | 1107684,6,10,5,5,4,10,6,10,1,4 584 | 1115762,3,1,1,1,2,1,1,1,1,2 585 | 1217717,5,1,1,6,3,1,1,1,1,2 586 | 1239420,1,1,1,1,2,1,1,1,1,2 587 | 1254538,8,10,10,10,6,10,10,10,1,4 588 | 1261751,5,1,1,1,2,1,2,2,1,2 589 | 1268275,9,8,8,9,6,3,4,1,1,4 590 | 1272166,5,1,1,1,2,1,1,1,1,2 591 | 1294261,4,10,8,5,4,1,10,1,1,4 592 | 1295529,2,5,7,6,4,10,7,6,1,4 593 | 1298484,10,3,4,5,3,10,4,1,1,4 594 | 1311875,5,1,2,1,2,1,1,1,1,2 595 | 1315506,4,8,6,3,4,10,7,1,1,4 596 | 1320141,5,1,1,1,2,1,2,1,1,2 597 | 1325309,4,1,2,1,2,1,2,1,1,2 598 | 1333063,5,1,3,1,2,1,3,1,1,2 599 | 1333495,3,1,1,1,2,1,2,1,1,2 600 | 1334659,5,2,4,1,1,1,1,1,1,2 601 | 1336798,3,1,1,1,2,1,2,1,1,2 602 | 1344449,1,1,1,1,1,1,2,1,1,2 603 | 1350568,4,1,1,1,2,1,2,1,1,2 604 | 1352663,5,4,6,8,4,1,8,10,1,4 605 | 188336,5,3,2,8,5,10,8,1,2,4 606 | 352431,10,5,10,3,5,8,7,8,3,4 607 | 353098,4,1,1,2,2,1,1,1,1,2 608 | 411453,1,1,1,1,2,1,1,1,1,2 609 | 557583,5,10,10,10,10,10,10,1,1,4 610 | 636375,5,1,1,1,2,1,1,1,1,2 611 | 736150,10,4,3,10,3,10,7,1,2,4 612 | 803531,5,10,10,10,5,2,8,5,1,4 613 | 822829,8,10,10,10,6,10,10,10,10,4 614 | 1016634,2,3,1,1,2,1,2,1,1,2 615 | 1031608,2,1,1,1,1,1,2,1,1,2 616 | 1041043,4,1,3,1,2,1,2,1,1,2 617 | 1042252,3,1,1,1,2,1,2,1,1,2 618 | 1057067,1,1,1,1,1,?,1,1,1,2 619 | 1061990,4,1,1,1,2,1,2,1,1,2 620 | 1073836,5,1,1,1,2,1,2,1,1,2 621 | 1083817,3,1,1,1,2,1,2,1,1,2 622 | 1096352,6,3,3,3,3,2,6,1,1,2 623 | 1140597,7,1,2,3,2,1,2,1,1,2 624 | 1149548,1,1,1,1,2,1,1,1,1,2 625 | 1174009,5,1,1,2,1,1,2,1,1,2 626 | 1183596,3,1,3,1,3,4,1,1,1,2 627 | 1190386,4,6,6,5,7,6,7,7,3,4 628 | 1190546,2,1,1,1,2,5,1,1,1,2 629 | 1213273,2,1,1,1,2,1,1,1,1,2 630 | 1218982,4,1,1,1,2,1,1,1,1,2 631 | 1225382,6,2,3,1,2,1,1,1,1,2 632 | 1235807,5,1,1,1,2,1,2,1,1,2 633 | 1238777,1,1,1,1,2,1,1,1,1,2 634 | 1253955,8,7,4,4,5,3,5,10,1,4 635 | 1257366,3,1,1,1,2,1,1,1,1,2 636 | 1260659,3,1,4,1,2,1,1,1,1,2 637 | 1268952,10,10,7,8,7,1,10,10,3,4 638 | 1275807,4,2,4,3,2,2,2,1,1,2 639 | 1277792,4,1,1,1,2,1,1,1,1,2 640 | 1277792,5,1,1,3,2,1,1,1,1,2 641 | 1285722,4,1,1,3,2,1,1,1,1,2 642 | 1288608,3,1,1,1,2,1,2,1,1,2 643 | 1290203,3,1,1,1,2,1,2,1,1,2 644 | 1294413,1,1,1,1,2,1,1,1,1,2 645 | 1299596,2,1,1,1,2,1,1,1,1,2 646 | 1303489,3,1,1,1,2,1,2,1,1,2 647 | 1311033,1,2,2,1,2,1,1,1,1,2 648 | 1311108,1,1,1,3,2,1,1,1,1,2 649 | 1315807,5,10,10,10,10,2,10,10,10,4 650 | 1318671,3,1,1,1,2,1,2,1,1,2 651 | 1319609,3,1,1,2,3,4,1,1,1,2 652 | 1323477,1,2,1,3,2,1,2,1,1,2 653 | 1324572,5,1,1,1,2,1,2,2,1,2 654 | 1324681,4,1,1,1,2,1,2,1,1,2 655 | 1325159,3,1,1,1,2,1,3,1,1,2 656 | 1326892,3,1,1,1,2,1,2,1,1,2 657 | 1330361,5,1,1,1,2,1,2,1,1,2 658 | 1333877,5,4,5,1,8,1,3,6,1,2 659 | 1334015,7,8,8,7,3,10,7,2,3,4 660 | 1334667,1,1,1,1,2,1,1,1,1,2 661 | 1339781,1,1,1,1,2,1,2,1,1,2 662 | 1339781,4,1,1,1,2,1,3,1,1,2 663 | 13454352,1,1,3,1,2,1,2,1,1,2 664 | 1345452,1,1,3,1,2,1,2,1,1,2 665 | 1345593,3,1,1,3,2,1,2,1,1,2 666 | 1347749,1,1,1,1,2,1,1,1,1,2 667 | 1347943,5,2,2,2,2,1,1,1,2,2 668 | 1348851,3,1,1,1,2,1,3,1,1,2 669 | 1350319,5,7,4,1,6,1,7,10,3,4 670 | 1350423,5,10,10,8,5,5,7,10,1,4 671 | 1352848,3,10,7,8,5,8,7,4,1,4 672 | 1353092,3,2,1,2,2,1,3,1,1,2 673 | 1354840,2,1,1,1,2,1,3,1,1,2 674 | 1354840,5,3,2,1,3,1,1,1,1,2 675 | 1355260,1,1,1,1,2,1,2,1,1,2 676 | 1365075,4,1,4,1,2,1,1,1,1,2 677 | 1365328,1,1,2,1,2,1,2,1,1,2 678 | 1368267,5,1,1,1,2,1,1,1,1,2 679 | 1368273,1,1,1,1,2,1,1,1,1,2 680 | 1368882,2,1,1,1,2,1,1,1,1,2 681 | 1369821,10,10,10,10,5,10,10,10,7,4 682 | 1371026,5,10,10,10,4,10,5,6,3,4 683 | 1371920,5,1,1,1,2,1,3,2,1,2 684 | 466906,1,1,1,1,2,1,1,1,1,2 685 | 466906,1,1,1,1,2,1,1,1,1,2 686 | 534555,1,1,1,1,2,1,1,1,1,2 687 | 536708,1,1,1,1,2,1,1,1,1,2 688 | 566346,3,1,1,1,2,1,2,3,1,2 689 | 603148,4,1,1,1,2,1,1,1,1,2 690 | 654546,1,1,1,1,2,1,1,1,8,2 691 | 654546,1,1,1,3,2,1,1,1,1,2 692 | 695091,5,10,10,5,4,5,4,4,1,4 693 | 714039,3,1,1,1,2,1,1,1,1,2 694 | 763235,3,1,1,1,2,1,2,1,2,2 695 | 776715,3,1,1,1,3,2,1,1,1,2 696 | 841769,2,1,1,1,2,1,1,1,1,2 697 | 888820,5,10,10,3,7,3,8,10,2,4 698 | 897471,4,8,6,4,3,4,10,6,1,4 699 | 897471,4,8,8,5,4,5,10,4,1,4 700 | -------------------------------------------------------------------------------- /data/ecoli.data: -------------------------------------------------------------------------------- 1 | AAT_ECOLI 0.49 0.29 0.48 0.50 0.56 0.24 0.35 cp 2 | ACEA_ECOLI 0.07 0.40 0.48 0.50 0.54 0.35 0.44 cp 3 | ACEK_ECOLI 0.56 0.40 0.48 0.50 0.49 0.37 0.46 cp 4 | ACKA_ECOLI 0.59 0.49 0.48 0.50 0.52 0.45 0.36 cp 5 | ADI_ECOLI 0.23 0.32 0.48 0.50 0.55 0.25 0.35 cp 6 | ALKH_ECOLI 0.67 0.39 0.48 0.50 0.36 0.38 0.46 cp 7 | AMPD_ECOLI 0.29 0.28 0.48 0.50 0.44 0.23 0.34 cp 8 | AMY2_ECOLI 0.21 0.34 0.48 0.50 0.51 0.28 0.39 cp 9 | APT_ECOLI 0.20 0.44 0.48 0.50 0.46 0.51 0.57 cp 10 | ARAC_ECOLI 0.42 0.40 0.48 0.50 0.56 0.18 0.30 cp 11 | ASG1_ECOLI 0.42 0.24 0.48 0.50 0.57 0.27 0.37 cp 12 | BTUR_ECOLI 0.25 0.48 0.48 0.50 0.44 0.17 0.29 cp 13 | CAFA_ECOLI 0.39 0.32 0.48 0.50 0.46 0.24 0.35 cp 14 | CAIB_ECOLI 0.51 0.50 0.48 0.50 0.46 0.32 0.35 cp 15 | CFA_ECOLI 0.22 0.43 0.48 0.50 0.48 0.16 0.28 cp 16 | CHEA_ECOLI 0.25 0.40 0.48 0.50 0.46 0.44 0.52 cp 17 | CHEB_ECOLI 0.34 0.45 0.48 0.50 0.38 0.24 0.35 cp 18 | CHEW_ECOLI 0.44 0.27 0.48 0.50 0.55 0.52 0.58 cp 19 | CHEY_ECOLI 0.23 0.40 0.48 0.50 0.39 0.28 0.38 cp 20 | CHEZ_ECOLI 0.41 0.57 0.48 0.50 0.39 0.21 0.32 cp 21 | CRL_ECOLI 0.40 0.45 0.48 0.50 0.38 0.22 0.00 cp 22 | CSPA_ECOLI 0.31 0.23 0.48 0.50 0.73 0.05 0.14 cp 23 | CYNR_ECOLI 0.51 0.54 0.48 0.50 0.41 0.34 0.43 cp 24 | CYPB_ECOLI 0.30 0.16 0.48 0.50 0.56 0.11 0.23 cp 25 | CYPC_ECOLI 0.36 0.39 0.48 0.50 0.48 0.22 0.23 cp 26 | CYSB_ECOLI 0.29 0.37 0.48 0.50 0.48 0.44 0.52 cp 27 | CYSE_ECOLI 0.25 0.40 0.48 0.50 0.47 0.33 0.42 cp 28 | DAPD_ECOLI 0.21 0.51 0.48 0.50 0.50 0.32 0.41 cp 29 | DCP_ECOLI 0.43 0.37 0.48 0.50 0.53 0.35 0.44 cp 30 | DDLA_ECOLI 0.43 0.39 0.48 0.50 0.47 0.31 0.41 cp 31 | DDLB_ECOLI 0.53 0.38 0.48 0.50 0.44 0.26 0.36 cp 32 | DEOC_ECOLI 0.34 0.33 0.48 0.50 0.38 0.35 0.44 cp 33 | DLDH_ECOLI 0.56 0.51 0.48 0.50 0.34 0.37 0.46 cp 34 | EFG_ECOLI 0.40 0.29 0.48 0.50 0.42 0.35 0.44 cp 35 | EFTS_ECOLI 0.24 0.35 0.48 0.50 0.31 0.19 0.31 cp 36 | EFTU_ECOLI 0.36 0.54 0.48 0.50 0.41 0.38 0.46 cp 37 | ENO_ECOLI 0.29 0.52 0.48 0.50 0.42 0.29 0.39 cp 38 | FABB_ECOLI 0.65 0.47 0.48 0.50 0.59 0.30 0.40 cp 39 | FES_ECOLI 0.32 0.42 0.48 0.50 0.35 0.28 0.38 cp 40 | G3P1_ECOLI 0.38 0.46 0.48 0.50 0.48 0.22 0.29 cp 41 | G3P2_ECOLI 0.33 0.45 0.48 0.50 0.52 0.32 0.41 cp 42 | G6PI_ECOLI 0.30 0.37 0.48 0.50 0.59 0.41 0.49 cp 43 | GCVA_ECOLI 0.40 0.50 0.48 0.50 0.45 0.39 0.47 cp 44 | GLNA_ECOLI 0.28 0.38 0.48 0.50 0.50 0.33 0.42 cp 45 | GLPD_ECOLI 0.61 0.45 0.48 0.50 0.48 0.35 0.41 cp 46 | GLYA_ECOLI 0.17 0.38 0.48 0.50 0.45 0.42 0.50 cp 47 | GSHR_ECOLI 0.44 0.35 0.48 0.50 0.55 0.55 0.61 cp 48 | GT_ECOLI 0.43 0.40 0.48 0.50 0.39 0.28 0.39 cp 49 | HEM6_ECOLI 0.42 0.35 0.48 0.50 0.58 0.15 0.27 cp 50 | HEMN_ECOLI 0.23 0.33 0.48 0.50 0.43 0.33 0.43 cp 51 | HPRT_ECOLI 0.37 0.52 0.48 0.50 0.42 0.42 0.36 cp 52 | IF1_ECOLI 0.29 0.30 0.48 0.50 0.45 0.03 0.17 cp 53 | IF2_ECOLI 0.22 0.36 0.48 0.50 0.35 0.39 0.47 cp 54 | ILVY_ECOLI 0.23 0.58 0.48 0.50 0.37 0.53 0.59 cp 55 | IPYR_ECOLI 0.47 0.47 0.48 0.50 0.22 0.16 0.26 cp 56 | KAD_ECOLI 0.54 0.47 0.48 0.50 0.28 0.33 0.42 cp 57 | KDSA_ECOLI 0.51 0.37 0.48 0.50 0.35 0.36 0.45 cp 58 | LEU3_ECOLI 0.40 0.35 0.48 0.50 0.45 0.33 0.42 cp 59 | LON_ECOLI 0.44 0.34 0.48 0.50 0.30 0.33 0.43 cp 60 | LPLA_ECOLI 0.42 0.38 0.48 0.50 0.54 0.34 0.43 cp 61 | LYSR_ECOLI 0.44 0.56 0.48 0.50 0.50 0.46 0.54 cp 62 | MALQ_ECOLI 0.52 0.36 0.48 0.50 0.41 0.28 0.38 cp 63 | MALZ_ECOLI 0.36 0.41 0.48 0.50 0.48 0.47 0.54 cp 64 | MASY_ECOLI 0.18 0.30 0.48 0.50 0.46 0.24 0.35 cp 65 | METB_ECOLI 0.47 0.29 0.48 0.50 0.51 0.33 0.43 cp 66 | METC_ECOLI 0.24 0.43 0.48 0.50 0.54 0.52 0.59 cp 67 | METK_ECOLI 0.25 0.37 0.48 0.50 0.41 0.33 0.42 cp 68 | METR_ECOLI 0.52 0.57 0.48 0.50 0.42 0.47 0.54 cp 69 | METX_ECOLI 0.25 0.37 0.48 0.50 0.43 0.26 0.36 cp 70 | MURF_ECOLI 0.35 0.48 0.48 0.50 0.56 0.40 0.48 cp 71 | NADA_ECOLI 0.26 0.26 0.48 0.50 0.34 0.25 0.35 cp 72 | NFRC_ECOLI 0.44 0.51 0.48 0.50 0.47 0.26 0.36 cp 73 | NHAR_ECOLI 0.37 0.50 0.48 0.50 0.42 0.36 0.45 cp 74 | NIRD_ECOLI 0.44 0.42 0.48 0.50 0.42 0.25 0.20 cp 75 | OMPR_ECOLI 0.24 0.43 0.48 0.50 0.37 0.28 0.38 cp 76 | OTC1_ECOLI 0.42 0.30 0.48 0.50 0.48 0.26 0.36 cp 77 | OTC2_ECOLI 0.48 0.42 0.48 0.50 0.45 0.25 0.35 cp 78 | PEPE_ECOLI 0.41 0.48 0.48 0.50 0.51 0.44 0.51 cp 79 | PFLA_ECOLI 0.44 0.28 0.48 0.50 0.43 0.27 0.37 cp 80 | PFLB_ECOLI 0.29 0.41 0.48 0.50 0.48 0.38 0.46 cp 81 | PGK_ECOLI 0.34 0.28 0.48 0.50 0.41 0.35 0.44 cp 82 | PHOB_ECOLI 0.41 0.43 0.48 0.50 0.45 0.31 0.41 cp 83 | PHOH_ECOLI 0.29 0.47 0.48 0.50 0.41 0.23 0.34 cp 84 | PMBA_ECOLI 0.34 0.55 0.48 0.50 0.58 0.31 0.41 cp 85 | PNP_ECOLI 0.36 0.56 0.48 0.50 0.43 0.45 0.53 cp 86 | PROB_ECOLI 0.40 0.46 0.48 0.50 0.52 0.49 0.56 cp 87 | PT1A_ECOLI 0.50 0.49 0.48 0.50 0.49 0.46 0.53 cp 88 | PT1_ECOLI 0.52 0.44 0.48 0.50 0.37 0.36 0.42 cp 89 | PTCA_ECOLI 0.50 0.51 0.48 0.50 0.27 0.23 0.34 cp 90 | PTCB_ECOLI 0.53 0.42 0.48 0.50 0.16 0.29 0.39 cp 91 | PTFA_ECOLI 0.34 0.46 0.48 0.50 0.52 0.35 0.44 cp 92 | PTGA_ECOLI 0.40 0.42 0.48 0.50 0.37 0.27 0.27 cp 93 | PTHA_ECOLI 0.41 0.43 0.48 0.50 0.50 0.24 0.25 cp 94 | PTHP_ECOLI 0.30 0.45 0.48 0.50 0.36 0.21 0.32 cp 95 | PTKA_ECOLI 0.31 0.47 0.48 0.50 0.29 0.28 0.39 cp 96 | PTKB_ECOLI 0.64 0.76 0.48 0.50 0.45 0.35 0.38 cp 97 | PTNA_ECOLI 0.35 0.37 0.48 0.50 0.30 0.34 0.43 cp 98 | PTWB_ECOLI 0.57 0.54 0.48 0.50 0.37 0.28 0.33 cp 99 | PTWX_ECOLI 0.65 0.55 0.48 0.50 0.34 0.37 0.28 cp 100 | RHAR_ECOLI 0.51 0.46 0.48 0.50 0.58 0.31 0.41 cp 101 | RHAS_ECOLI 0.38 0.40 0.48 0.50 0.63 0.25 0.35 cp 102 | RIMI_ECOLI 0.24 0.57 0.48 0.50 0.63 0.34 0.43 cp 103 | RIMJ_ECOLI 0.38 0.26 0.48 0.50 0.54 0.16 0.28 cp 104 | RIML_ECOLI 0.33 0.47 0.48 0.50 0.53 0.18 0.29 cp 105 | RNB_ECOLI 0.24 0.34 0.48 0.50 0.38 0.30 0.40 cp 106 | RNC_ECOLI 0.26 0.50 0.48 0.50 0.44 0.32 0.41 cp 107 | RND_ECOLI 0.44 0.49 0.48 0.50 0.39 0.38 0.40 cp 108 | RNE_ECOLI 0.43 0.32 0.48 0.50 0.33 0.45 0.52 cp 109 | SERC_ECOLI 0.49 0.43 0.48 0.50 0.49 0.30 0.40 cp 110 | SLYD_ECOLI 0.47 0.28 0.48 0.50 0.56 0.20 0.25 cp 111 | SOXS_ECOLI 0.32 0.33 0.48 0.50 0.60 0.06 0.20 cp 112 | SYA_ECOLI 0.34 0.35 0.48 0.50 0.51 0.49 0.56 cp 113 | SYC_ECOLI 0.35 0.34 0.48 0.50 0.46 0.30 0.27 cp 114 | SYD_ECOLI 0.38 0.30 0.48 0.50 0.43 0.29 0.39 cp 115 | SYE_ECOLI 0.38 0.44 0.48 0.50 0.43 0.20 0.31 cp 116 | SYFA_ECOLI 0.41 0.51 0.48 0.50 0.58 0.20 0.31 cp 117 | SYFB_ECOLI 0.34 0.42 0.48 0.50 0.41 0.34 0.43 cp 118 | SYGA_ECOLI 0.51 0.49 0.48 0.50 0.53 0.14 0.26 cp 119 | SYGB_ECOLI 0.25 0.51 0.48 0.50 0.37 0.42 0.50 cp 120 | SYH_ECOLI 0.29 0.28 0.48 0.50 0.50 0.42 0.50 cp 121 | SYI_ECOLI 0.25 0.26 0.48 0.50 0.39 0.32 0.42 cp 122 | SYK1_ECOLI 0.24 0.41 0.48 0.50 0.49 0.23 0.34 cp 123 | SYK2_ECOLI 0.17 0.39 0.48 0.50 0.53 0.30 0.39 cp 124 | SYL_ECOLI 0.04 0.31 0.48 0.50 0.41 0.29 0.39 cp 125 | SYM_ECOLI 0.61 0.36 0.48 0.50 0.49 0.35 0.44 cp 126 | SYP_ECOLI 0.34 0.51 0.48 0.50 0.44 0.37 0.46 cp 127 | SYQ_ECOLI 0.28 0.33 0.48 0.50 0.45 0.22 0.33 cp 128 | SYR_ECOLI 0.40 0.46 0.48 0.50 0.42 0.35 0.44 cp 129 | SYS_ECOLI 0.23 0.34 0.48 0.50 0.43 0.26 0.37 cp 130 | SYT_ECOLI 0.37 0.44 0.48 0.50 0.42 0.39 0.47 cp 131 | SYV_ECOLI 0.00 0.38 0.48 0.50 0.42 0.48 0.55 cp 132 | SYW_ECOLI 0.39 0.31 0.48 0.50 0.38 0.34 0.43 cp 133 | SYY_ECOLI 0.30 0.44 0.48 0.50 0.49 0.22 0.33 cp 134 | THGA_ECOLI 0.27 0.30 0.48 0.50 0.71 0.28 0.39 cp 135 | THIK_ECOLI 0.17 0.52 0.48 0.50 0.49 0.37 0.46 cp 136 | TYRB_ECOLI 0.36 0.42 0.48 0.50 0.53 0.32 0.41 cp 137 | UBIC_ECOLI 0.30 0.37 0.48 0.50 0.43 0.18 0.30 cp 138 | UGPQ_ECOLI 0.26 0.40 0.48 0.50 0.36 0.26 0.37 cp 139 | USPA_ECOLI 0.40 0.41 0.48 0.50 0.55 0.22 0.33 cp 140 | UVRB_ECOLI 0.22 0.34 0.48 0.50 0.42 0.29 0.39 cp 141 | UVRC_ECOLI 0.44 0.35 0.48 0.50 0.44 0.52 0.59 cp 142 | XGPT_ECOLI 0.27 0.42 0.48 0.50 0.37 0.38 0.43 cp 143 | XYLA_ECOLI 0.16 0.43 0.48 0.50 0.54 0.27 0.37 cp 144 | EMRA_ECOLI 0.06 0.61 0.48 0.50 0.49 0.92 0.37 im 145 | AAS_ECOLI 0.44 0.52 0.48 0.50 0.43 0.47 0.54 im 146 | AMPE_ECOLI 0.63 0.47 0.48 0.50 0.51 0.82 0.84 im 147 | ARAE_ECOLI 0.23 0.48 0.48 0.50 0.59 0.88 0.89 im 148 | ARAH_ECOLI 0.34 0.49 0.48 0.50 0.58 0.85 0.80 im 149 | AROP_ECOLI 0.43 0.40 0.48 0.50 0.58 0.75 0.78 im 150 | ATKB_ECOLI 0.46 0.61 0.48 0.50 0.48 0.86 0.87 im 151 | ATP6_ECOLI 0.27 0.35 0.48 0.50 0.51 0.77 0.79 im 152 | BETT_ECOLI 0.52 0.39 0.48 0.50 0.65 0.71 0.73 im 153 | CODB_ECOLI 0.29 0.47 0.48 0.50 0.71 0.65 0.69 im 154 | CYDA_ECOLI 0.55 0.47 0.48 0.50 0.57 0.78 0.80 im 155 | CYOC_ECOLI 0.12 0.67 0.48 0.50 0.74 0.58 0.63 im 156 | CYOD_ECOLI 0.40 0.50 0.48 0.50 0.65 0.82 0.84 im 157 | DCTA_ECOLI 0.73 0.36 0.48 0.50 0.53 0.91 0.92 im 158 | DHG_ECOLI 0.84 0.44 0.48 0.50 0.48 0.71 0.74 im 159 | DHSC_ECOLI 0.48 0.45 0.48 0.50 0.60 0.78 0.80 im 160 | DHSD_ECOLI 0.54 0.49 0.48 0.50 0.40 0.87 0.88 im 161 | DPPC_ECOLI 0.48 0.41 0.48 0.50 0.51 0.90 0.88 im 162 | DSBB_ECOLI 0.50 0.66 0.48 0.50 0.31 0.92 0.92 im 163 | ENVZ_ECOLI 0.72 0.46 0.48 0.50 0.51 0.66 0.70 im 164 | EXBB_ECOLI 0.47 0.55 0.48 0.50 0.58 0.71 0.75 im 165 | FRDC_ECOLI 0.33 0.56 0.48 0.50 0.33 0.78 0.80 im 166 | FRDD_ECOLI 0.64 0.58 0.48 0.50 0.48 0.78 0.73 im 167 | FTSW_ECOLI 0.54 0.57 0.48 0.50 0.56 0.81 0.83 im 168 | GABP_ECOLI 0.47 0.59 0.48 0.50 0.52 0.76 0.79 im 169 | GALP_ECOLI 0.63 0.50 0.48 0.50 0.59 0.85 0.86 im 170 | GLNP_ECOLI 0.49 0.42 0.48 0.50 0.53 0.79 0.81 im 171 | GLPT_ECOLI 0.31 0.50 0.48 0.50 0.57 0.84 0.85 im 172 | GLTP_ECOLI 0.74 0.44 0.48 0.50 0.55 0.88 0.89 im 173 | KDGL_ECOLI 0.33 0.45 0.48 0.50 0.45 0.88 0.89 im 174 | KGTP_ECOLI 0.45 0.40 0.48 0.50 0.61 0.74 0.77 im 175 | LACY_ECOLI 0.71 0.40 0.48 0.50 0.71 0.70 0.74 im 176 | LGT_ECOLI 0.50 0.37 0.48 0.50 0.66 0.64 0.69 im 177 | LLDP_ECOLI 0.66 0.53 0.48 0.50 0.59 0.66 0.66 im 178 | LNT_ECOLI 0.60 0.61 0.48 0.50 0.54 0.67 0.71 im 179 | LSPA_ECOLI 0.83 0.37 0.48 0.50 0.61 0.71 0.74 im 180 | LYSP_ECOLI 0.34 0.51 0.48 0.50 0.67 0.90 0.90 im 181 | MALF_ECOLI 0.63 0.54 0.48 0.50 0.65 0.79 0.81 im 182 | MALG_ECOLI 0.70 0.40 0.48 0.50 0.56 0.86 0.83 im 183 | MCP3_ECOLI 0.60 0.50 1.00 0.50 0.54 0.77 0.80 im 184 | MSBB_ECOLI 0.16 0.51 0.48 0.50 0.33 0.39 0.48 im 185 | MTR_ECOLI 0.74 0.70 0.48 0.50 0.66 0.65 0.69 im 186 | NANT_ECOLI 0.20 0.46 0.48 0.50 0.57 0.78 0.81 im 187 | NHAA_ECOLI 0.89 0.55 0.48 0.50 0.51 0.72 0.76 im 188 | NHAB_ECOLI 0.70 0.46 0.48 0.50 0.56 0.78 0.73 im 189 | PHEP_ECOLI 0.12 0.43 0.48 0.50 0.63 0.70 0.74 im 190 | PHOR_ECOLI 0.61 0.52 0.48 0.50 0.54 0.67 0.52 im 191 | PNTA_ECOLI 0.33 0.37 0.48 0.50 0.46 0.65 0.69 im 192 | POTE_ECOLI 0.63 0.65 0.48 0.50 0.66 0.67 0.71 im 193 | PROP_ECOLI 0.41 0.51 0.48 0.50 0.53 0.75 0.78 im 194 | PSTA_ECOLI 0.34 0.67 0.48 0.50 0.52 0.76 0.79 im 195 | PSTC_ECOLI 0.58 0.34 0.48 0.50 0.56 0.87 0.81 im 196 | PTAA_ECOLI 0.59 0.56 0.48 0.50 0.55 0.80 0.82 im 197 | PTBA_ECOLI 0.51 0.40 0.48 0.50 0.57 0.62 0.67 im 198 | PTCC_ECOLI 0.50 0.57 0.48 0.50 0.71 0.61 0.66 im 199 | PTDA_ECOLI 0.60 0.46 0.48 0.50 0.45 0.81 0.83 im 200 | PTFB_ECOLI 0.37 0.47 0.48 0.50 0.39 0.76 0.79 im 201 | PTGB_ECOLI 0.58 0.55 0.48 0.50 0.57 0.70 0.74 im 202 | PTHB_ECOLI 0.36 0.47 0.48 0.50 0.51 0.69 0.72 im 203 | PTMA_ECOLI 0.39 0.41 0.48 0.50 0.52 0.72 0.75 im 204 | PTOA_ECOLI 0.35 0.51 0.48 0.50 0.61 0.71 0.74 im 205 | PTTB_ECOLI 0.31 0.44 0.48 0.50 0.50 0.79 0.82 im 206 | RODA_ECOLI 0.61 0.66 0.48 0.50 0.46 0.87 0.88 im 207 | SECE_ECOLI 0.48 0.49 0.48 0.50 0.52 0.77 0.71 im 208 | SECF_ECOLI 0.11 0.50 0.48 0.50 0.58 0.72 0.68 im 209 | SECY_ECOLI 0.31 0.36 0.48 0.50 0.58 0.94 0.94 im 210 | TNAB_ECOLI 0.68 0.51 0.48 0.50 0.71 0.75 0.78 im 211 | XYLE_ECOLI 0.69 0.39 0.48 0.50 0.57 0.76 0.79 im 212 | YCEE_ECOLI 0.52 0.54 0.48 0.50 0.62 0.76 0.79 im 213 | EXBD_ECOLI 0.46 0.59 0.48 0.50 0.36 0.76 0.23 im 214 | FTSL_ECOLI 0.36 0.45 0.48 0.50 0.38 0.79 0.17 im 215 | FTSN_ECOLI 0.00 0.51 0.48 0.50 0.35 0.67 0.44 im 216 | FTSQ_ECOLI 0.10 0.49 0.48 0.50 0.41 0.67 0.21 im 217 | MOTB_ECOLI 0.30 0.51 0.48 0.50 0.42 0.61 0.34 im 218 | TOLA_ECOLI 0.61 0.47 0.48 0.50 0.00 0.80 0.32 im 219 | TOLQ_ECOLI 0.63 0.75 0.48 0.50 0.64 0.73 0.66 im 220 | EMRB_ECOLI 0.71 0.52 0.48 0.50 0.64 1.00 0.99 im 221 | ATKC_ECOLI 0.85 0.53 0.48 0.50 0.53 0.52 0.35 imS 222 | NFRB_ECOLI 0.63 0.49 0.48 0.50 0.54 0.76 0.79 imS 223 | NLPA_ECOLI 0.75 0.55 1.00 1.00 0.40 0.47 0.30 imL 224 | CYOA_ECOLI 0.70 0.39 1.00 0.50 0.51 0.82 0.84 imL 225 | ATKA_ECOLI 0.72 0.42 0.48 0.50 0.65 0.77 0.79 imU 226 | BCR_ECOLI 0.79 0.41 0.48 0.50 0.66 0.81 0.83 imU 227 | CADB_ECOLI 0.83 0.48 0.48 0.50 0.65 0.76 0.79 imU 228 | CAIT_ECOLI 0.69 0.43 0.48 0.50 0.59 0.74 0.77 imU 229 | CPXA_ECOLI 0.79 0.36 0.48 0.50 0.46 0.82 0.70 imU 230 | CRED_ECOLI 0.78 0.33 0.48 0.50 0.57 0.77 0.79 imU 231 | CYDB_ECOLI 0.75 0.37 0.48 0.50 0.64 0.70 0.74 imU 232 | CYOB_ECOLI 0.59 0.29 0.48 0.50 0.64 0.75 0.77 imU 233 | CYOE_ECOLI 0.67 0.37 0.48 0.50 0.54 0.64 0.68 imU 234 | DMSC_ECOLI 0.66 0.48 0.48 0.50 0.54 0.70 0.74 imU 235 | DPPB_ECOLI 0.64 0.46 0.48 0.50 0.48 0.73 0.76 imU 236 | DSBD_ECOLI 0.76 0.71 0.48 0.50 0.50 0.71 0.75 imU 237 | FEPD_ECOLI 0.84 0.49 0.48 0.50 0.55 0.78 0.74 imU 238 | FEPG_ECOLI 0.77 0.55 0.48 0.50 0.51 0.78 0.74 imU 239 | FTSH_ECOLI 0.81 0.44 0.48 0.50 0.42 0.67 0.68 imU 240 | GLTS_ECOLI 0.58 0.60 0.48 0.50 0.59 0.73 0.76 imU 241 | KEFC_ECOLI 0.63 0.42 0.48 0.50 0.48 0.77 0.80 imU 242 | KUP_ECOLI 0.62 0.42 0.48 0.50 0.58 0.79 0.81 imU 243 | MCP1_ECOLI 0.86 0.39 0.48 0.50 0.59 0.89 0.90 imU 244 | MCP2_ECOLI 0.81 0.53 0.48 0.50 0.57 0.87 0.88 imU 245 | MCP4_ECOLI 0.87 0.49 0.48 0.50 0.61 0.76 0.79 imU 246 | MELB_ECOLI 0.47 0.46 0.48 0.50 0.62 0.74 0.77 imU 247 | MOTA_ECOLI 0.76 0.41 0.48 0.50 0.50 0.59 0.62 imU 248 | NUPC_ECOLI 0.70 0.53 0.48 0.50 0.70 0.86 0.87 imU 249 | NUPG_ECOLI 0.64 0.45 0.48 0.50 0.67 0.61 0.66 imU 250 | PNTB_ECOLI 0.81 0.52 0.48 0.50 0.57 0.78 0.80 imU 251 | PTKC_ECOLI 0.73 0.26 0.48 0.50 0.57 0.75 0.78 imU 252 | RHAT_ECOLI 0.49 0.61 1.00 0.50 0.56 0.71 0.74 imU 253 | SECD_ECOLI 0.88 0.42 0.48 0.50 0.52 0.73 0.75 imU 254 | SECG_ECOLI 0.84 0.54 0.48 0.50 0.75 0.92 0.70 imU 255 | TEHA_ECOLI 0.63 0.51 0.48 0.50 0.64 0.72 0.76 imU 256 | TYRP_ECOLI 0.86 0.55 0.48 0.50 0.63 0.81 0.83 imU 257 | UHPB_ECOLI 0.79 0.54 0.48 0.50 0.50 0.66 0.68 imU 258 | TONB_ECOLI 0.57 0.38 0.48 0.50 0.06 0.49 0.33 imU 259 | LEP_ECOLI 0.78 0.44 0.48 0.50 0.45 0.73 0.68 imU 260 | FADL_ECOLI 0.78 0.68 0.48 0.50 0.83 0.40 0.29 om 261 | FHUA_ECOLI 0.63 0.69 0.48 0.50 0.65 0.41 0.28 om 262 | LAMB_ECOLI 0.67 0.88 0.48 0.50 0.73 0.50 0.25 om 263 | NFRA_ECOLI 0.61 0.75 0.48 0.50 0.51 0.33 0.33 om 264 | NMPC_ECOLI 0.67 0.84 0.48 0.50 0.74 0.54 0.37 om 265 | OMPA_ECOLI 0.74 0.90 0.48 0.50 0.57 0.53 0.29 om 266 | OMPC_ECOLI 0.73 0.84 0.48 0.50 0.86 0.58 0.29 om 267 | OMPF_ECOLI 0.75 0.76 0.48 0.50 0.83 0.57 0.30 om 268 | OMPX_ECOLI 0.77 0.57 0.48 0.50 0.88 0.53 0.20 om 269 | PHOE_ECOLI 0.74 0.78 0.48 0.50 0.75 0.54 0.15 om 270 | TSX_ECOLI 0.68 0.76 0.48 0.50 0.84 0.45 0.27 om 271 | BTUB_ECOLI 0.56 0.68 0.48 0.50 0.77 0.36 0.45 om 272 | CIRA_ECOLI 0.65 0.51 0.48 0.50 0.66 0.54 0.33 om 273 | FECA_ECOLI 0.52 0.81 0.48 0.50 0.72 0.38 0.38 om 274 | FEPA_ECOLI 0.64 0.57 0.48 0.50 0.70 0.33 0.26 om 275 | FHUE_ECOLI 0.60 0.76 1.00 0.50 0.77 0.59 0.52 om 276 | OMPP_ECOLI 0.69 0.59 0.48 0.50 0.77 0.39 0.21 om 277 | OMPT_ECOLI 0.63 0.49 0.48 0.50 0.79 0.45 0.28 om 278 | TOLC_ECOLI 0.71 0.71 0.48 0.50 0.68 0.43 0.36 om 279 | PA1_ECOLI 0.68 0.63 0.48 0.50 0.73 0.40 0.30 om 280 | MULI_ECOLI 0.77 0.57 1.00 0.50 0.37 0.54 0.01 omL 281 | NLPB_ECOLI 0.66 0.49 1.00 0.50 0.54 0.56 0.36 omL 282 | NLPE_ECOLI 0.71 0.46 1.00 0.50 0.52 0.59 0.30 omL 283 | PAL_ECOLI 0.67 0.55 1.00 0.50 0.66 0.58 0.16 omL 284 | SLP_ECOLI 0.68 0.49 1.00 0.50 0.62 0.55 0.28 omL 285 | AGP_ECOLI 0.74 0.49 0.48 0.50 0.42 0.54 0.36 pp 286 | AMY1_ECOLI 0.70 0.61 0.48 0.50 0.56 0.52 0.43 pp 287 | ARAF_ECOLI 0.66 0.86 0.48 0.50 0.34 0.41 0.36 pp 288 | ASG2_ECOLI 0.73 0.78 0.48 0.50 0.58 0.51 0.31 pp 289 | BGLX_ECOLI 0.65 0.57 0.48 0.50 0.47 0.47 0.51 pp 290 | C562_ECOLI 0.72 0.86 0.48 0.50 0.17 0.55 0.21 pp 291 | CN16_ECOLI 0.67 0.70 0.48 0.50 0.46 0.45 0.33 pp 292 | CYPH_ECOLI 0.67 0.81 0.48 0.50 0.54 0.49 0.23 pp 293 | CYSP_ECOLI 0.67 0.61 0.48 0.50 0.51 0.37 0.38 pp 294 | DGAL_ECOLI 0.63 1.00 0.48 0.50 0.35 0.51 0.49 pp 295 | DPPA_ECOLI 0.57 0.59 0.48 0.50 0.39 0.47 0.33 pp 296 | DSBA_ECOLI 0.71 0.71 0.48 0.50 0.40 0.54 0.39 pp 297 | DSBC_ECOLI 0.66 0.74 0.48 0.50 0.31 0.38 0.43 pp 298 | ECOT_ECOLI 0.67 0.81 0.48 0.50 0.25 0.42 0.25 pp 299 | ECPD_ECOLI 0.64 0.72 0.48 0.50 0.49 0.42 0.19 pp 300 | FECB_ECOLI 0.68 0.82 0.48 0.50 0.38 0.65 0.56 pp 301 | FECR_ECOLI 0.32 0.39 0.48 0.50 0.53 0.28 0.38 pp 302 | FEPB_ECOLI 0.70 0.64 0.48 0.50 0.47 0.51 0.47 pp 303 | FIMC_ECOLI 0.63 0.57 0.48 0.50 0.49 0.70 0.20 pp 304 | GGT_ECOLI 0.74 0.82 0.48 0.50 0.49 0.49 0.41 pp 305 | GLNH_ECOLI 0.63 0.86 0.48 0.50 0.39 0.47 0.34 pp 306 | GLPQ_ECOLI 0.63 0.83 0.48 0.50 0.40 0.39 0.19 pp 307 | HTRA_ECOLI 0.63 0.71 0.48 0.50 0.60 0.40 0.39 pp 308 | LIVJ_ECOLI 0.71 0.86 0.48 0.50 0.40 0.54 0.32 pp 309 | LIVK_ECOLI 0.68 0.78 0.48 0.50 0.43 0.44 0.42 pp 310 | MALE_ECOLI 0.64 0.84 0.48 0.50 0.37 0.45 0.40 pp 311 | MALM_ECOLI 0.74 0.47 0.48 0.50 0.50 0.57 0.42 pp 312 | MEPA_ECOLI 0.75 0.84 0.48 0.50 0.35 0.52 0.33 pp 313 | MODA_ECOLI 0.63 0.65 0.48 0.50 0.39 0.44 0.35 pp 314 | NRFA_ECOLI 0.69 0.67 0.48 0.50 0.30 0.39 0.24 pp 315 | NRFF_ECOLI 0.70 0.71 0.48 0.50 0.42 0.84 0.85 pp 316 | OPPA_ECOLI 0.69 0.80 0.48 0.50 0.46 0.57 0.26 pp 317 | OSMY_ECOLI 0.64 0.66 0.48 0.50 0.41 0.39 0.20 pp 318 | POTD_ECOLI 0.63 0.80 0.48 0.50 0.46 0.31 0.29 pp 319 | POTF_ECOLI 0.66 0.71 0.48 0.50 0.41 0.50 0.35 pp 320 | PPA_ECOLI 0.69 0.59 0.48 0.50 0.46 0.44 0.52 pp 321 | PPB_ECOLI 0.68 0.67 0.48 0.50 0.49 0.40 0.34 pp 322 | PROX_ECOLI 0.64 0.78 0.48 0.50 0.50 0.36 0.38 pp 323 | PSTS_ECOLI 0.62 0.78 0.48 0.50 0.47 0.49 0.54 pp 324 | PTR_ECOLI 0.76 0.73 0.48 0.50 0.44 0.39 0.39 pp 325 | RBSB_ECOLI 0.64 0.81 0.48 0.50 0.37 0.39 0.44 pp 326 | SPEA_ECOLI 0.29 0.39 0.48 0.50 0.52 0.40 0.48 pp 327 | SUBI_ECOLI 0.62 0.83 0.48 0.50 0.46 0.36 0.40 pp 328 | TBPA_ECOLI 0.56 0.54 0.48 0.50 0.43 0.37 0.30 pp 329 | TESA_ECOLI 0.69 0.66 0.48 0.50 0.41 0.50 0.25 pp 330 | TOLB_ECOLI 0.69 0.65 0.48 0.50 0.63 0.48 0.41 pp 331 | TORA_ECOLI 0.43 0.59 0.48 0.50 0.52 0.49 0.56 pp 332 | TREA_ECOLI 0.74 0.56 0.48 0.50 0.47 0.68 0.30 pp 333 | UGPB_ECOLI 0.71 0.57 0.48 0.50 0.48 0.35 0.32 pp 334 | USHA_ECOLI 0.61 0.60 0.48 0.50 0.44 0.39 0.38 pp 335 | XYLF_ECOLI 0.59 0.61 0.48 0.50 0.42 0.42 0.37 pp 336 | YTFQ_ECOLI 0.74 0.74 0.48 0.50 0.31 0.53 0.52 pp 337 | -------------------------------------------------------------------------------- /data/glass.data: -------------------------------------------------------------------------------- 1 | 1,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.00,1 2 | 2,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.00,1 3 | 3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.00,1 4 | 4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.00,1 5 | 5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.00,1 6 | 6,1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.00,0.26,1 7 | 7,1.51743,13.30,3.60,1.14,73.09,0.58,8.17,0.00,0.00,1 8 | 8,1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.00,0.00,1 9 | 9,1.51918,14.04,3.58,1.37,72.08,0.56,8.30,0.00,0.00,1 10 | 10,1.51755,13.00,3.60,1.36,72.99,0.57,8.40,0.00,0.11,1 11 | 11,1.51571,12.72,3.46,1.56,73.20,0.67,8.09,0.00,0.24,1 12 | 12,1.51763,12.80,3.66,1.27,73.01,0.60,8.56,0.00,0.00,1 13 | 13,1.51589,12.88,3.43,1.40,73.28,0.69,8.05,0.00,0.24,1 14 | 14,1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0.00,0.17,1 15 | 15,1.51763,12.61,3.59,1.31,73.29,0.58,8.50,0.00,0.00,1 16 | 16,1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0.00,0.00,1 17 | 17,1.51784,12.68,3.67,1.16,73.11,0.61,8.70,0.00,0.00,1 18 | 18,1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0.00,0.00,1 19 | 19,1.51911,13.90,3.73,1.18,72.12,0.06,8.89,0.00,0.00,1 20 | 20,1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0.00,0.07,1 21 | 21,1.51750,12.82,3.55,1.49,72.75,0.54,8.52,0.00,0.19,1 22 | 22,1.51966,14.77,3.75,0.29,72.02,0.03,9.00,0.00,0.00,1 23 | 23,1.51736,12.78,3.62,1.29,72.79,0.59,8.70,0.00,0.00,1 24 | 24,1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0.00,0.00,1 25 | 25,1.51720,13.38,3.50,1.15,72.85,0.50,8.43,0.00,0.00,1 26 | 26,1.51764,12.98,3.54,1.21,73.00,0.65,8.53,0.00,0.00,1 27 | 27,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.00,0.00,1 28 | 28,1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0.00,0.00,1 29 | 29,1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0.00,0.00,1 30 | 30,1.51784,13.08,3.49,1.28,72.86,0.60,8.49,0.00,0.00,1 31 | 31,1.51768,12.65,3.56,1.30,73.08,0.61,8.69,0.00,0.14,1 32 | 32,1.51747,12.84,3.50,1.14,73.27,0.56,8.55,0.00,0.00,1 33 | 33,1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,1 34 | 34,1.51753,12.57,3.47,1.38,73.39,0.60,8.55,0.00,0.06,1 35 | 35,1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0.00,0.00,1 36 | 36,1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0.00,0.00,1 37 | 37,1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0.00,1 38 | 38,1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0.00,0.00,1 39 | 39,1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.00,0.00,1 40 | 40,1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.00,0.00,1 41 | 41,1.51793,12.79,3.50,1.12,73.03,0.64,8.77,0.00,0.00,1 42 | 42,1.51755,12.71,3.42,1.20,73.20,0.59,8.64,0.00,0.00,1 43 | 43,1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0.00,0.00,1 44 | 44,1.52210,13.73,3.84,0.72,71.76,0.17,9.74,0.00,0.00,1 45 | 45,1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0.00,0.30,1 46 | 46,1.51900,13.49,3.48,1.35,71.95,0.55,9.00,0.00,0.00,1 47 | 47,1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0.00,0.16,1 48 | 48,1.52667,13.99,3.70,0.71,71.57,0.02,9.82,0.00,0.10,1 49 | 49,1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0.00,0.00,1 50 | 50,1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0.00,0.00,1 51 | 51,1.52320,13.72,3.72,0.51,71.75,0.09,10.06,0.00,0.16,1 52 | 52,1.51926,13.20,3.33,1.28,72.36,0.60,9.14,0.00,0.11,1 53 | 53,1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0.00,0.00,1 54 | 54,1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0.00,0.00,1 55 | 55,1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0.00,0.09,1 56 | 56,1.51769,12.45,2.71,1.29,73.70,0.56,9.06,0.00,0.24,1 57 | 57,1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0.00,0.31,1 58 | 58,1.51824,12.87,3.48,1.29,72.95,0.60,8.43,0.00,0.00,1 59 | 59,1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0.00,0.00,1 60 | 60,1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0.00,0.11,1 61 | 61,1.51905,13.60,3.62,1.11,72.64,0.14,8.76,0.00,0.00,1 62 | 62,1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0.00,1 63 | 63,1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0.00,0.11,1 64 | 64,1.52227,14.17,3.81,0.78,71.35,0.00,9.69,0.00,0.00,1 65 | 65,1.52172,13.48,3.74,0.90,72.01,0.18,9.61,0.00,0.07,1 66 | 66,1.52099,13.69,3.59,1.12,71.96,0.09,9.40,0.00,0.00,1 67 | 67,1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0.00,0.17,1 68 | 68,1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0.00,0.17,1 69 | 69,1.52152,13.12,3.58,0.90,72.20,0.23,9.82,0.00,0.16,1 70 | 70,1.52300,13.31,3.58,0.82,71.99,0.12,10.17,0.00,0.03,1 71 | 71,1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0.00,0.12,2 72 | 72,1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0.00,0.32,2 73 | 73,1.51593,13.09,3.59,1.52,73.10,0.67,7.83,0.00,0.00,2 74 | 74,1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0.00,0.00,2 75 | 75,1.51596,13.02,3.56,1.54,73.11,0.72,7.90,0.00,0.00,2 76 | 76,1.51590,13.02,3.58,1.51,73.12,0.69,7.96,0.00,0.00,2 77 | 77,1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0.00,0.00,2 78 | 78,1.51627,13.00,3.58,1.54,72.83,0.61,8.04,0.00,0.00,2 79 | 79,1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.00,0.14,2 80 | 80,1.51590,12.82,3.52,1.90,72.86,0.69,7.97,0.00,0.00,2 81 | 81,1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0.00,0.00,2 82 | 82,1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0.00,0.00,2 83 | 83,1.51646,13.41,3.55,1.25,72.81,0.68,8.10,0.00,0.00,2 84 | 84,1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0.00,0.09,2 85 | 85,1.51409,14.25,3.09,2.08,72.28,1.10,7.08,0.00,0.00,2 86 | 86,1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0.00,0.00,2 87 | 87,1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0.00,0.00,2 88 | 88,1.51645,13.40,3.49,1.52,72.65,0.67,8.08,0.00,0.10,2 89 | 89,1.51618,13.01,3.50,1.48,72.89,0.60,8.12,0.00,0.00,2 90 | 90,1.51640,12.55,3.48,1.87,73.23,0.63,8.08,0.00,0.09,2 91 | 91,1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0.00,0.22,2 92 | 92,1.51605,12.90,3.44,1.45,73.06,0.44,8.27,0.00,0.00,2 93 | 93,1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0.00,0.19,2 94 | 94,1.51590,13.24,3.34,1.47,73.10,0.39,8.22,0.00,0.00,2 95 | 95,1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0.00,0.00,2 96 | 96,1.51860,13.36,3.43,1.43,72.26,0.51,8.60,0.00,0.00,2 97 | 97,1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0.00,0.15,2 98 | 98,1.51743,12.20,3.25,1.16,73.55,0.62,8.90,0.00,0.24,2 99 | 99,1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0.00,0.00,2 100 | 100,1.51811,12.96,2.96,1.43,72.92,0.60,8.79,0.14,0.00,2 101 | 101,1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,2 102 | 102,1.51730,12.35,2.72,1.63,72.87,0.70,9.23,0.00,0.00,2 103 | 103,1.51820,12.62,2.76,0.83,73.81,0.35,9.42,0.00,0.20,2 104 | 104,1.52725,13.80,3.15,0.66,70.57,0.08,11.64,0.00,0.00,2 105 | 105,1.52410,13.83,2.90,1.17,71.15,0.08,10.79,0.00,0.00,2 106 | 106,1.52475,11.45,0.00,1.88,72.19,0.81,13.24,0.00,0.34,2 107 | 107,1.53125,10.73,0.00,2.10,69.81,0.58,13.30,3.15,0.28,2 108 | 108,1.53393,12.30,0.00,1.00,70.16,0.12,16.19,0.00,0.24,2 109 | 109,1.52222,14.43,0.00,1.00,72.67,0.10,11.52,0.00,0.08,2 110 | 110,1.51818,13.72,0.00,0.56,74.45,0.00,10.99,0.00,0.00,2 111 | 111,1.52664,11.23,0.00,0.77,73.21,0.00,14.68,0.00,0.00,2 112 | 112,1.52739,11.02,0.00,0.75,73.08,0.00,14.96,0.00,0.00,2 113 | 113,1.52777,12.64,0.00,0.67,72.02,0.06,14.40,0.00,0.00,2 114 | 114,1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0.00,0.14,2 115 | 115,1.51847,13.10,3.97,1.19,72.44,0.60,8.43,0.00,0.00,2 116 | 116,1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0.00,0.00,2 117 | 117,1.51829,13.24,3.90,1.41,72.33,0.55,8.31,0.00,0.10,2 118 | 118,1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0.00,0.00,2 119 | 119,1.51673,13.30,3.64,1.53,72.53,0.65,8.03,0.00,0.29,2 120 | 120,1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0.00,0.00,2 121 | 121,1.51844,13.25,3.76,1.32,72.40,0.58,8.42,0.00,0.00,2 122 | 122,1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0.00,0.21,2 123 | 123,1.51687,13.23,3.54,1.48,72.84,0.56,8.10,0.00,0.00,2 124 | 124,1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0.00,0.00,2 125 | 125,1.52177,13.20,3.68,1.15,72.75,0.54,8.52,0.00,0.00,2 126 | 126,1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0.00,0.12,2 127 | 127,1.51667,12.94,3.61,1.26,72.75,0.56,8.60,0.00,0.00,2 128 | 128,1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0.00,0.17,2 129 | 129,1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,2 130 | 130,1.52020,13.98,1.35,1.63,71.76,0.39,10.56,0.00,0.18,2 131 | 131,1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0.00,0.00,2 132 | 132,1.52614,13.70,0.00,1.36,71.24,0.19,13.44,0.00,0.10,2 133 | 133,1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0.00,0.00,2 134 | 134,1.51800,13.71,3.93,1.54,71.81,0.54,8.21,0.00,0.15,2 135 | 135,1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0.00,0.00,2 136 | 136,1.51789,13.19,3.90,1.30,72.33,0.55,8.44,0.00,0.28,2 137 | 137,1.51806,13.00,3.80,1.08,73.07,0.56,8.38,0.00,0.12,2 138 | 138,1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0.00,0.00,2 139 | 139,1.51674,12.79,3.52,1.54,73.36,0.66,7.90,0.00,0.00,2 140 | 140,1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0.00,0.00,2 141 | 141,1.51690,13.33,3.54,1.61,72.54,0.68,8.11,0.00,0.00,2 142 | 142,1.51851,13.20,3.63,1.07,72.83,0.57,8.41,0.09,0.17,2 143 | 143,1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,2 144 | 144,1.51709,13.00,3.47,1.79,72.72,0.66,8.18,0.00,0.00,2 145 | 145,1.51660,12.99,3.18,1.23,72.97,0.58,8.81,0.00,0.24,2 146 | 146,1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0.00,0.35,2 147 | 147,1.51769,13.65,3.66,1.11,72.77,0.11,8.60,0.00,0.00,3 148 | 148,1.51610,13.33,3.53,1.34,72.67,0.56,8.33,0.00,0.00,3 149 | 149,1.51670,13.24,3.57,1.38,72.70,0.56,8.44,0.00,0.10,3 150 | 150,1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.00,0.00,3 151 | 151,1.51665,13.14,3.45,1.76,72.48,0.60,8.38,0.00,0.17,3 152 | 152,1.52127,14.32,3.90,0.83,71.50,0.00,9.49,0.00,0.00,3 153 | 153,1.51779,13.64,3.65,0.65,73.00,0.06,8.93,0.00,0.00,3 154 | 154,1.51610,13.42,3.40,1.22,72.69,0.59,8.32,0.00,0.00,3 155 | 155,1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0.00,0.00,3 156 | 156,1.51646,13.04,3.40,1.26,73.01,0.52,8.58,0.00,0.00,3 157 | 157,1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0.00,0.00,3 158 | 158,1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0.00,0.00,3 159 | 159,1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0.00,0.00,3 160 | 160,1.51796,13.50,3.36,1.63,71.94,0.57,8.81,0.00,0.09,3 161 | 161,1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0.00,0.00,3 162 | 162,1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,3 163 | 163,1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.00,0.37,3 164 | 164,1.51514,14.01,2.68,3.50,69.89,1.68,5.87,2.20,0.00,5 165 | 165,1.51915,12.73,1.85,1.86,72.69,0.60,10.09,0.00,0.00,5 166 | 166,1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0.00,0.00,5 167 | 167,1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0.00,0.00,5 168 | 168,1.51969,12.64,0.00,1.65,73.75,0.38,11.53,0.00,0.00,5 169 | 169,1.51666,12.86,0.00,1.83,73.88,0.97,10.17,0.00,0.00,5 170 | 170,1.51994,13.27,0.00,1.76,73.03,0.47,11.32,0.00,0.00,5 171 | 171,1.52369,13.44,0.00,1.58,72.22,0.32,12.24,0.00,0.00,5 172 | 172,1.51316,13.02,0.00,3.04,70.48,6.21,6.96,0.00,0.00,5 173 | 173,1.51321,13.00,0.00,3.02,70.70,6.21,6.93,0.00,0.00,5 174 | 174,1.52043,13.38,0.00,1.40,72.25,0.33,12.50,0.00,0.00,5 175 | 175,1.52058,12.85,1.61,2.17,72.18,0.76,9.70,0.24,0.51,5 176 | 176,1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0.00,0.28,5 177 | 177,1.51905,14.00,2.39,1.56,72.37,0.00,9.57,0.00,0.00,6 178 | 178,1.51937,13.79,2.41,1.19,72.76,0.00,9.77,0.00,0.00,6 179 | 179,1.51829,14.46,2.24,1.62,72.38,0.00,9.26,0.00,0.00,6 180 | 180,1.51852,14.09,2.19,1.66,72.67,0.00,9.32,0.00,0.00,6 181 | 181,1.51299,14.40,1.74,1.54,74.55,0.00,7.59,0.00,0.00,6 182 | 182,1.51888,14.99,0.78,1.74,72.50,0.00,9.95,0.00,0.00,6 183 | 183,1.51916,14.15,0.00,2.09,72.74,0.00,10.88,0.00,0.00,6 184 | 184,1.51969,14.56,0.00,0.56,73.48,0.00,11.22,0.00,0.00,6 185 | 185,1.51115,17.38,0.00,0.34,75.41,0.00,6.65,0.00,0.00,6 186 | 186,1.51131,13.69,3.20,1.81,72.81,1.76,5.43,1.19,0.00,7 187 | 187,1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0.00,7 188 | 188,1.52315,13.44,3.34,1.23,72.38,0.60,8.83,0.00,0.00,7 189 | 189,1.52247,14.86,2.20,2.06,70.26,0.76,9.76,0.00,0.00,7 190 | 190,1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0.00,7 191 | 191,1.51613,13.88,1.78,1.79,73.10,0.00,8.67,0.76,0.00,7 192 | 192,1.51602,14.85,0.00,2.38,73.28,0.00,8.76,0.64,0.09,7 193 | 193,1.51623,14.20,0.00,2.79,73.46,0.04,9.04,0.40,0.09,7 194 | 194,1.51719,14.75,0.00,2.00,73.02,0.00,8.53,1.59,0.08,7 195 | 195,1.51683,14.56,0.00,1.98,73.29,0.00,8.52,1.57,0.07,7 196 | 196,1.51545,14.14,0.00,2.68,73.39,0.08,9.07,0.61,0.05,7 197 | 197,1.51556,13.87,0.00,2.54,73.23,0.14,9.41,0.81,0.01,7 198 | 198,1.51727,14.70,0.00,2.34,73.28,0.00,8.95,0.66,0.00,7 199 | 199,1.51531,14.38,0.00,2.66,73.10,0.04,9.08,0.64,0.00,7 200 | 200,1.51609,15.01,0.00,2.51,73.05,0.05,8.83,0.53,0.00,7 201 | 201,1.51508,15.15,0.00,2.25,73.50,0.00,8.34,0.63,0.00,7 202 | 202,1.51653,11.95,0.00,1.19,75.18,2.70,8.93,0.00,0.00,7 203 | 203,1.51514,14.85,0.00,2.42,73.72,0.00,8.39,0.56,0.00,7 204 | 204,1.51658,14.80,0.00,1.99,73.11,0.00,8.28,1.71,0.00,7 205 | 205,1.51617,14.95,0.00,2.27,73.30,0.00,8.71,0.67,0.00,7 206 | 206,1.51732,14.95,0.00,1.80,72.99,0.00,8.61,1.55,0.00,7 207 | 207,1.51645,14.94,0.00,1.87,73.11,0.00,8.67,1.38,0.00,7 208 | 208,1.51831,14.39,0.00,1.82,72.86,1.41,6.47,2.88,0.00,7 209 | 209,1.51640,14.37,0.00,2.74,72.85,0.00,9.45,0.54,0.00,7 210 | 210,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.00,7 211 | 211,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.00,7 212 | 212,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.00,7 213 | 213,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.00,7 214 | 214,1.51711,14.23,0.00,2.08,73.36,0.00,8.62,1.67,0.00,7 215 | -------------------------------------------------------------------------------- /data/heart.dat: -------------------------------------------------------------------------------- 1 | 70.0 1.0 4.0 130.0 322.0 0.0 2.0 109.0 0.0 2.4 2.0 3.0 3.0 2 2 | 67.0 0.0 3.0 115.0 564.0 0.0 2.0 160.0 0.0 1.6 2.0 0.0 7.0 1 3 | 57.0 1.0 2.0 124.0 261.0 0.0 0.0 141.0 0.0 0.3 1.0 0.0 7.0 2 4 | 64.0 1.0 4.0 128.0 263.0 0.0 0.0 105.0 1.0 0.2 2.0 1.0 7.0 1 5 | 74.0 0.0 2.0 120.0 269.0 0.0 2.0 121.0 1.0 0.2 1.0 1.0 3.0 1 6 | 65.0 1.0 4.0 120.0 177.0 0.0 0.0 140.0 0.0 0.4 1.0 0.0 7.0 1 7 | 56.0 1.0 3.0 130.0 256.0 1.0 2.0 142.0 1.0 0.6 2.0 1.0 6.0 2 8 | 59.0 1.0 4.0 110.0 239.0 0.0 2.0 142.0 1.0 1.2 2.0 1.0 7.0 2 9 | 60.0 1.0 4.0 140.0 293.0 0.0 2.0 170.0 0.0 1.2 2.0 2.0 7.0 2 10 | 63.0 0.0 4.0 150.0 407.0 0.0 2.0 154.0 0.0 4.0 2.0 3.0 7.0 2 11 | 59.0 1.0 4.0 135.0 234.0 0.0 0.0 161.0 0.0 0.5 2.0 0.0 7.0 1 12 | 53.0 1.0 4.0 142.0 226.0 0.0 2.0 111.0 1.0 0.0 1.0 0.0 7.0 1 13 | 44.0 1.0 3.0 140.0 235.0 0.0 2.0 180.0 0.0 0.0 1.0 0.0 3.0 1 14 | 61.0 1.0 1.0 134.0 234.0 0.0 0.0 145.0 0.0 2.6 2.0 2.0 3.0 2 15 | 57.0 0.0 4.0 128.0 303.0 0.0 2.0 159.0 0.0 0.0 1.0 1.0 3.0 1 16 | 71.0 0.0 4.0 112.0 149.0 0.0 0.0 125.0 0.0 1.6 2.0 0.0 3.0 1 17 | 46.0 1.0 4.0 140.0 311.0 0.0 0.0 120.0 1.0 1.8 2.0 2.0 7.0 2 18 | 53.0 1.0 4.0 140.0 203.0 1.0 2.0 155.0 1.0 3.1 3.0 0.0 7.0 2 19 | 64.0 1.0 1.0 110.0 211.0 0.0 2.0 144.0 1.0 1.8 2.0 0.0 3.0 1 20 | 40.0 1.0 1.0 140.0 199.0 0.0 0.0 178.0 1.0 1.4 1.0 0.0 7.0 1 21 | 67.0 1.0 4.0 120.0 229.0 0.0 2.0 129.0 1.0 2.6 2.0 2.0 7.0 2 22 | 48.0 1.0 2.0 130.0 245.0 0.0 2.0 180.0 0.0 0.2 2.0 0.0 3.0 1 23 | 43.0 1.0 4.0 115.0 303.0 0.0 0.0 181.0 0.0 1.2 2.0 0.0 3.0 1 24 | 47.0 1.0 4.0 112.0 204.0 0.0 0.0 143.0 0.0 0.1 1.0 0.0 3.0 1 25 | 54.0 0.0 2.0 132.0 288.0 1.0 2.0 159.0 1.0 0.0 1.0 1.0 3.0 1 26 | 48.0 0.0 3.0 130.0 275.0 0.0 0.0 139.0 0.0 0.2 1.0 0.0 3.0 1 27 | 46.0 0.0 4.0 138.0 243.0 0.0 2.0 152.0 1.0 0.0 2.0 0.0 3.0 1 28 | 51.0 0.0 3.0 120.0 295.0 0.0 2.0 157.0 0.0 0.6 1.0 0.0 3.0 1 29 | 58.0 1.0 3.0 112.0 230.0 0.0 2.0 165.0 0.0 2.5 2.0 1.0 7.0 2 30 | 71.0 0.0 3.0 110.0 265.0 1.0 2.0 130.0 0.0 0.0 1.0 1.0 3.0 1 31 | 57.0 1.0 3.0 128.0 229.0 0.0 2.0 150.0 0.0 0.4 2.0 1.0 7.0 2 32 | 66.0 1.0 4.0 160.0 228.0 0.0 2.0 138.0 0.0 2.3 1.0 0.0 6.0 1 33 | 37.0 0.0 3.0 120.0 215.0 0.0 0.0 170.0 0.0 0.0 1.0 0.0 3.0 1 34 | 59.0 1.0 4.0 170.0 326.0 0.0 2.0 140.0 1.0 3.4 3.0 0.0 7.0 2 35 | 50.0 1.0 4.0 144.0 200.0 0.0 2.0 126.0 1.0 0.9 2.0 0.0 7.0 2 36 | 48.0 1.0 4.0 130.0 256.0 1.0 2.0 150.0 1.0 0.0 1.0 2.0 7.0 2 37 | 61.0 1.0 4.0 140.0 207.0 0.0 2.0 138.0 1.0 1.9 1.0 1.0 7.0 2 38 | 59.0 1.0 1.0 160.0 273.0 0.0 2.0 125.0 0.0 0.0 1.0 0.0 3.0 2 39 | 42.0 1.0 3.0 130.0 180.0 0.0 0.0 150.0 0.0 0.0 1.0 0.0 3.0 1 40 | 48.0 1.0 4.0 122.0 222.0 0.0 2.0 186.0 0.0 0.0 1.0 0.0 3.0 1 41 | 40.0 1.0 4.0 152.0 223.0 0.0 0.0 181.0 0.0 0.0 1.0 0.0 7.0 2 42 | 62.0 0.0 4.0 124.0 209.0 0.0 0.0 163.0 0.0 0.0 1.0 0.0 3.0 1 43 | 44.0 1.0 3.0 130.0 233.0 0.0 0.0 179.0 1.0 0.4 1.0 0.0 3.0 1 44 | 46.0 1.0 2.0 101.0 197.0 1.0 0.0 156.0 0.0 0.0 1.0 0.0 7.0 1 45 | 59.0 1.0 3.0 126.0 218.0 1.0 0.0 134.0 0.0 2.2 2.0 1.0 6.0 2 46 | 58.0 1.0 3.0 140.0 211.0 1.0 2.0 165.0 0.0 0.0 1.0 0.0 3.0 1 47 | 49.0 1.0 3.0 118.0 149.0 0.0 2.0 126.0 0.0 0.8 1.0 3.0 3.0 2 48 | 44.0 1.0 4.0 110.0 197.0 0.0 2.0 177.0 0.0 0.0 1.0 1.0 3.0 2 49 | 66.0 1.0 2.0 160.0 246.0 0.0 0.0 120.0 1.0 0.0 2.0 3.0 6.0 2 50 | 65.0 0.0 4.0 150.0 225.0 0.0 2.0 114.0 0.0 1.0 2.0 3.0 7.0 2 51 | 42.0 1.0 4.0 136.0 315.0 0.0 0.0 125.0 1.0 1.8 2.0 0.0 6.0 2 52 | 52.0 1.0 2.0 128.0 205.0 1.0 0.0 184.0 0.0 0.0 1.0 0.0 3.0 1 53 | 65.0 0.0 3.0 140.0 417.0 1.0 2.0 157.0 0.0 0.8 1.0 1.0 3.0 1 54 | 63.0 0.0 2.0 140.0 195.0 0.0 0.0 179.0 0.0 0.0 1.0 2.0 3.0 1 55 | 45.0 0.0 2.0 130.0 234.0 0.0 2.0 175.0 0.0 0.6 2.0 0.0 3.0 1 56 | 41.0 0.0 2.0 105.0 198.0 0.0 0.0 168.0 0.0 0.0 1.0 1.0 3.0 1 57 | 61.0 1.0 4.0 138.0 166.0 0.0 2.0 125.0 1.0 3.6 2.0 1.0 3.0 2 58 | 60.0 0.0 3.0 120.0 178.0 1.0 0.0 96.0 0.0 0.0 1.0 0.0 3.0 1 59 | 59.0 0.0 4.0 174.0 249.0 0.0 0.0 143.0 1.0 0.0 2.0 0.0 3.0 2 60 | 62.0 1.0 2.0 120.0 281.0 0.0 2.0 103.0 0.0 1.4 2.0 1.0 7.0 2 61 | 57.0 1.0 3.0 150.0 126.0 1.0 0.0 173.0 0.0 0.2 1.0 1.0 7.0 1 62 | 51.0 0.0 4.0 130.0 305.0 0.0 0.0 142.0 1.0 1.2 2.0 0.0 7.0 2 63 | 44.0 1.0 3.0 120.0 226.0 0.0 0.0 169.0 0.0 0.0 1.0 0.0 3.0 1 64 | 60.0 0.0 1.0 150.0 240.0 0.0 0.0 171.0 0.0 0.9 1.0 0.0 3.0 1 65 | 63.0 1.0 1.0 145.0 233.0 1.0 2.0 150.0 0.0 2.3 3.0 0.0 6.0 1 66 | 57.0 1.0 4.0 150.0 276.0 0.0 2.0 112.0 1.0 0.6 2.0 1.0 6.0 2 67 | 51.0 1.0 4.0 140.0 261.0 0.0 2.0 186.0 1.0 0.0 1.0 0.0 3.0 1 68 | 58.0 0.0 2.0 136.0 319.0 1.0 2.0 152.0 0.0 0.0 1.0 2.0 3.0 2 69 | 44.0 0.0 3.0 118.0 242.0 0.0 0.0 149.0 0.0 0.3 2.0 1.0 3.0 1 70 | 47.0 1.0 3.0 108.0 243.0 0.0 0.0 152.0 0.0 0.0 1.0 0.0 3.0 2 71 | 61.0 1.0 4.0 120.0 260.0 0.0 0.0 140.0 1.0 3.6 2.0 1.0 7.0 2 72 | 57.0 0.0 4.0 120.0 354.0 0.0 0.0 163.0 1.0 0.6 1.0 0.0 3.0 1 73 | 70.0 1.0 2.0 156.0 245.0 0.0 2.0 143.0 0.0 0.0 1.0 0.0 3.0 1 74 | 76.0 0.0 3.0 140.0 197.0 0.0 1.0 116.0 0.0 1.1 2.0 0.0 3.0 1 75 | 67.0 0.0 4.0 106.0 223.0 0.0 0.0 142.0 0.0 0.3 1.0 2.0 3.0 1 76 | 45.0 1.0 4.0 142.0 309.0 0.0 2.0 147.0 1.0 0.0 2.0 3.0 7.0 2 77 | 45.0 1.0 4.0 104.0 208.0 0.0 2.0 148.0 1.0 3.0 2.0 0.0 3.0 1 78 | 39.0 0.0 3.0 94.0 199.0 0.0 0.0 179.0 0.0 0.0 1.0 0.0 3.0 1 79 | 42.0 0.0 3.0 120.0 209.0 0.0 0.0 173.0 0.0 0.0 2.0 0.0 3.0 1 80 | 56.0 1.0 2.0 120.0 236.0 0.0 0.0 178.0 0.0 0.8 1.0 0.0 3.0 1 81 | 58.0 1.0 4.0 146.0 218.0 0.0 0.0 105.0 0.0 2.0 2.0 1.0 7.0 2 82 | 35.0 1.0 4.0 120.0 198.0 0.0 0.0 130.0 1.0 1.6 2.0 0.0 7.0 2 83 | 58.0 1.0 4.0 150.0 270.0 0.0 2.0 111.0 1.0 0.8 1.0 0.0 7.0 2 84 | 41.0 1.0 3.0 130.0 214.0 0.0 2.0 168.0 0.0 2.0 2.0 0.0 3.0 1 85 | 57.0 1.0 4.0 110.0 201.0 0.0 0.0 126.0 1.0 1.5 2.0 0.0 6.0 1 86 | 42.0 1.0 1.0 148.0 244.0 0.0 2.0 178.0 0.0 0.8 1.0 2.0 3.0 1 87 | 62.0 1.0 2.0 128.0 208.0 1.0 2.0 140.0 0.0 0.0 1.0 0.0 3.0 1 88 | 59.0 1.0 1.0 178.0 270.0 0.0 2.0 145.0 0.0 4.2 3.0 0.0 7.0 1 89 | 41.0 0.0 2.0 126.0 306.0 0.0 0.0 163.0 0.0 0.0 1.0 0.0 3.0 1 90 | 50.0 1.0 4.0 150.0 243.0 0.0 2.0 128.0 0.0 2.6 2.0 0.0 7.0 2 91 | 59.0 1.0 2.0 140.0 221.0 0.0 0.0 164.0 1.0 0.0 1.0 0.0 3.0 1 92 | 61.0 0.0 4.0 130.0 330.0 0.0 2.0 169.0 0.0 0.0 1.0 0.0 3.0 2 93 | 54.0 1.0 4.0 124.0 266.0 0.0 2.0 109.0 1.0 2.2 2.0 1.0 7.0 2 94 | 54.0 1.0 4.0 110.0 206.0 0.0 2.0 108.0 1.0 0.0 2.0 1.0 3.0 2 95 | 52.0 1.0 4.0 125.0 212.0 0.0 0.0 168.0 0.0 1.0 1.0 2.0 7.0 2 96 | 47.0 1.0 4.0 110.0 275.0 0.0 2.0 118.0 1.0 1.0 2.0 1.0 3.0 2 97 | 66.0 1.0 4.0 120.0 302.0 0.0 2.0 151.0 0.0 0.4 2.0 0.0 3.0 1 98 | 58.0 1.0 4.0 100.0 234.0 0.0 0.0 156.0 0.0 0.1 1.0 1.0 7.0 2 99 | 64.0 0.0 3.0 140.0 313.0 0.0 0.0 133.0 0.0 0.2 1.0 0.0 7.0 1 100 | 50.0 0.0 2.0 120.0 244.0 0.0 0.0 162.0 0.0 1.1 1.0 0.0 3.0 1 101 | 44.0 0.0 3.0 108.0 141.0 0.0 0.0 175.0 0.0 0.6 2.0 0.0 3.0 1 102 | 67.0 1.0 4.0 120.0 237.0 0.0 0.0 71.0 0.0 1.0 2.0 0.0 3.0 2 103 | 49.0 0.0 4.0 130.0 269.0 0.0 0.0 163.0 0.0 0.0 1.0 0.0 3.0 1 104 | 57.0 1.0 4.0 165.0 289.0 1.0 2.0 124.0 0.0 1.0 2.0 3.0 7.0 2 105 | 63.0 1.0 4.0 130.0 254.0 0.0 2.0 147.0 0.0 1.4 2.0 1.0 7.0 2 106 | 48.0 1.0 4.0 124.0 274.0 0.0 2.0 166.0 0.0 0.5 2.0 0.0 7.0 2 107 | 51.0 1.0 3.0 100.0 222.0 0.0 0.0 143.0 1.0 1.2 2.0 0.0 3.0 1 108 | 60.0 0.0 4.0 150.0 258.0 0.0 2.0 157.0 0.0 2.6 2.0 2.0 7.0 2 109 | 59.0 1.0 4.0 140.0 177.0 0.0 0.0 162.0 1.0 0.0 1.0 1.0 7.0 2 110 | 45.0 0.0 2.0 112.0 160.0 0.0 0.0 138.0 0.0 0.0 2.0 0.0 3.0 1 111 | 55.0 0.0 4.0 180.0 327.0 0.0 1.0 117.0 1.0 3.4 2.0 0.0 3.0 2 112 | 41.0 1.0 2.0 110.0 235.0 0.0 0.0 153.0 0.0 0.0 1.0 0.0 3.0 1 113 | 60.0 0.0 4.0 158.0 305.0 0.0 2.0 161.0 0.0 0.0 1.0 0.0 3.0 2 114 | 54.0 0.0 3.0 135.0 304.0 1.0 0.0 170.0 0.0 0.0 1.0 0.0 3.0 1 115 | 42.0 1.0 2.0 120.0 295.0 0.0 0.0 162.0 0.0 0.0 1.0 0.0 3.0 1 116 | 49.0 0.0 2.0 134.0 271.0 0.0 0.0 162.0 0.0 0.0 2.0 0.0 3.0 1 117 | 46.0 1.0 4.0 120.0 249.0 0.0 2.0 144.0 0.0 0.8 1.0 0.0 7.0 2 118 | 56.0 0.0 4.0 200.0 288.0 1.0 2.0 133.0 1.0 4.0 3.0 2.0 7.0 2 119 | 66.0 0.0 1.0 150.0 226.0 0.0 0.0 114.0 0.0 2.6 3.0 0.0 3.0 1 120 | 56.0 1.0 4.0 130.0 283.0 1.0 2.0 103.0 1.0 1.6 3.0 0.0 7.0 2 121 | 49.0 1.0 3.0 120.0 188.0 0.0 0.0 139.0 0.0 2.0 2.0 3.0 7.0 2 122 | 54.0 1.0 4.0 122.0 286.0 0.0 2.0 116.0 1.0 3.2 2.0 2.0 3.0 2 123 | 57.0 1.0 4.0 152.0 274.0 0.0 0.0 88.0 1.0 1.2 2.0 1.0 7.0 2 124 | 65.0 0.0 3.0 160.0 360.0 0.0 2.0 151.0 0.0 0.8 1.0 0.0 3.0 1 125 | 54.0 1.0 3.0 125.0 273.0 0.0 2.0 152.0 0.0 0.5 3.0 1.0 3.0 1 126 | 54.0 0.0 3.0 160.0 201.0 0.0 0.0 163.0 0.0 0.0 1.0 1.0 3.0 1 127 | 62.0 1.0 4.0 120.0 267.0 0.0 0.0 99.0 1.0 1.8 2.0 2.0 7.0 2 128 | 52.0 0.0 3.0 136.0 196.0 0.0 2.0 169.0 0.0 0.1 2.0 0.0 3.0 1 129 | 52.0 1.0 2.0 134.0 201.0 0.0 0.0 158.0 0.0 0.8 1.0 1.0 3.0 1 130 | 60.0 1.0 4.0 117.0 230.0 1.0 0.0 160.0 1.0 1.4 1.0 2.0 7.0 2 131 | 63.0 0.0 4.0 108.0 269.0 0.0 0.0 169.0 1.0 1.8 2.0 2.0 3.0 2 132 | 66.0 1.0 4.0 112.0 212.0 0.0 2.0 132.0 1.0 0.1 1.0 1.0 3.0 2 133 | 42.0 1.0 4.0 140.0 226.0 0.0 0.0 178.0 0.0 0.0 1.0 0.0 3.0 1 134 | 64.0 1.0 4.0 120.0 246.0 0.0 2.0 96.0 1.0 2.2 3.0 1.0 3.0 2 135 | 54.0 1.0 3.0 150.0 232.0 0.0 2.0 165.0 0.0 1.6 1.0 0.0 7.0 1 136 | 46.0 0.0 3.0 142.0 177.0 0.0 2.0 160.0 1.0 1.4 3.0 0.0 3.0 1 137 | 67.0 0.0 3.0 152.0 277.0 0.0 0.0 172.0 0.0 0.0 1.0 1.0 3.0 1 138 | 56.0 1.0 4.0 125.0 249.0 1.0 2.0 144.0 1.0 1.2 2.0 1.0 3.0 2 139 | 34.0 0.0 2.0 118.0 210.0 0.0 0.0 192.0 0.0 0.7 1.0 0.0 3.0 1 140 | 57.0 1.0 4.0 132.0 207.0 0.0 0.0 168.0 1.0 0.0 1.0 0.0 7.0 1 141 | 64.0 1.0 4.0 145.0 212.0 0.0 2.0 132.0 0.0 2.0 2.0 2.0 6.0 2 142 | 59.0 1.0 4.0 138.0 271.0 0.0 2.0 182.0 0.0 0.0 1.0 0.0 3.0 1 143 | 50.0 1.0 3.0 140.0 233.0 0.0 0.0 163.0 0.0 0.6 2.0 1.0 7.0 2 144 | 51.0 1.0 1.0 125.0 213.0 0.0 2.0 125.0 1.0 1.4 1.0 1.0 3.0 1 145 | 54.0 1.0 2.0 192.0 283.0 0.0 2.0 195.0 0.0 0.0 1.0 1.0 7.0 2 146 | 53.0 1.0 4.0 123.0 282.0 0.0 0.0 95.0 1.0 2.0 2.0 2.0 7.0 2 147 | 52.0 1.0 4.0 112.0 230.0 0.0 0.0 160.0 0.0 0.0 1.0 1.0 3.0 2 148 | 40.0 1.0 4.0 110.0 167.0 0.0 2.0 114.0 1.0 2.0 2.0 0.0 7.0 2 149 | 58.0 1.0 3.0 132.0 224.0 0.0 2.0 173.0 0.0 3.2 1.0 2.0 7.0 2 150 | 41.0 0.0 3.0 112.0 268.0 0.0 2.0 172.0 1.0 0.0 1.0 0.0 3.0 1 151 | 41.0 1.0 3.0 112.0 250.0 0.0 0.0 179.0 0.0 0.0 1.0 0.0 3.0 1 152 | 50.0 0.0 3.0 120.0 219.0 0.0 0.0 158.0 0.0 1.6 2.0 0.0 3.0 1 153 | 54.0 0.0 3.0 108.0 267.0 0.0 2.0 167.0 0.0 0.0 1.0 0.0 3.0 1 154 | 64.0 0.0 4.0 130.0 303.0 0.0 0.0 122.0 0.0 2.0 2.0 2.0 3.0 1 155 | 51.0 0.0 3.0 130.0 256.0 0.0 2.0 149.0 0.0 0.5 1.0 0.0 3.0 1 156 | 46.0 0.0 2.0 105.0 204.0 0.0 0.0 172.0 0.0 0.0 1.0 0.0 3.0 1 157 | 55.0 1.0 4.0 140.0 217.0 0.0 0.0 111.0 1.0 5.6 3.0 0.0 7.0 2 158 | 45.0 1.0 2.0 128.0 308.0 0.0 2.0 170.0 0.0 0.0 1.0 0.0 3.0 1 159 | 56.0 1.0 1.0 120.0 193.0 0.0 2.0 162.0 0.0 1.9 2.0 0.0 7.0 1 160 | 66.0 0.0 4.0 178.0 228.0 1.0 0.0 165.0 1.0 1.0 2.0 2.0 7.0 2 161 | 38.0 1.0 1.0 120.0 231.0 0.0 0.0 182.0 1.0 3.8 2.0 0.0 7.0 2 162 | 62.0 0.0 4.0 150.0 244.0 0.0 0.0 154.0 1.0 1.4 2.0 0.0 3.0 2 163 | 55.0 1.0 2.0 130.0 262.0 0.0 0.0 155.0 0.0 0.0 1.0 0.0 3.0 1 164 | 58.0 1.0 4.0 128.0 259.0 0.0 2.0 130.0 1.0 3.0 2.0 2.0 7.0 2 165 | 43.0 1.0 4.0 110.0 211.0 0.0 0.0 161.0 0.0 0.0 1.0 0.0 7.0 1 166 | 64.0 0.0 4.0 180.0 325.0 0.0 0.0 154.0 1.0 0.0 1.0 0.0 3.0 1 167 | 50.0 0.0 4.0 110.0 254.0 0.0 2.0 159.0 0.0 0.0 1.0 0.0 3.0 1 168 | 53.0 1.0 3.0 130.0 197.0 1.0 2.0 152.0 0.0 1.2 3.0 0.0 3.0 1 169 | 45.0 0.0 4.0 138.0 236.0 0.0 2.0 152.0 1.0 0.2 2.0 0.0 3.0 1 170 | 65.0 1.0 1.0 138.0 282.0 1.0 2.0 174.0 0.0 1.4 2.0 1.0 3.0 2 171 | 69.0 1.0 1.0 160.0 234.0 1.0 2.0 131.0 0.0 0.1 2.0 1.0 3.0 1 172 | 69.0 1.0 3.0 140.0 254.0 0.0 2.0 146.0 0.0 2.0 2.0 3.0 7.0 2 173 | 67.0 1.0 4.0 100.0 299.0 0.0 2.0 125.0 1.0 0.9 2.0 2.0 3.0 2 174 | 68.0 0.0 3.0 120.0 211.0 0.0 2.0 115.0 0.0 1.5 2.0 0.0 3.0 1 175 | 34.0 1.0 1.0 118.0 182.0 0.0 2.0 174.0 0.0 0.0 1.0 0.0 3.0 1 176 | 62.0 0.0 4.0 138.0 294.0 1.0 0.0 106.0 0.0 1.9 2.0 3.0 3.0 2 177 | 51.0 1.0 4.0 140.0 298.0 0.0 0.0 122.0 1.0 4.2 2.0 3.0 7.0 2 178 | 46.0 1.0 3.0 150.0 231.0 0.0 0.0 147.0 0.0 3.6 2.0 0.0 3.0 2 179 | 67.0 1.0 4.0 125.0 254.0 1.0 0.0 163.0 0.0 0.2 2.0 2.0 7.0 2 180 | 50.0 1.0 3.0 129.0 196.0 0.0 0.0 163.0 0.0 0.0 1.0 0.0 3.0 1 181 | 42.0 1.0 3.0 120.0 240.0 1.0 0.0 194.0 0.0 0.8 3.0 0.0 7.0 1 182 | 56.0 0.0 4.0 134.0 409.0 0.0 2.0 150.0 1.0 1.9 2.0 2.0 7.0 2 183 | 41.0 1.0 4.0 110.0 172.0 0.0 2.0 158.0 0.0 0.0 1.0 0.0 7.0 2 184 | 42.0 0.0 4.0 102.0 265.0 0.0 2.0 122.0 0.0 0.6 2.0 0.0 3.0 1 185 | 53.0 1.0 3.0 130.0 246.0 1.0 2.0 173.0 0.0 0.0 1.0 3.0 3.0 1 186 | 43.0 1.0 3.0 130.0 315.0 0.0 0.0 162.0 0.0 1.9 1.0 1.0 3.0 1 187 | 56.0 1.0 4.0 132.0 184.0 0.0 2.0 105.0 1.0 2.1 2.0 1.0 6.0 2 188 | 52.0 1.0 4.0 108.0 233.0 1.0 0.0 147.0 0.0 0.1 1.0 3.0 7.0 1 189 | 62.0 0.0 4.0 140.0 394.0 0.0 2.0 157.0 0.0 1.2 2.0 0.0 3.0 1 190 | 70.0 1.0 3.0 160.0 269.0 0.0 0.0 112.0 1.0 2.9 2.0 1.0 7.0 2 191 | 54.0 1.0 4.0 140.0 239.0 0.0 0.0 160.0 0.0 1.2 1.0 0.0 3.0 1 192 | 70.0 1.0 4.0 145.0 174.0 0.0 0.0 125.0 1.0 2.6 3.0 0.0 7.0 2 193 | 54.0 1.0 2.0 108.0 309.0 0.0 0.0 156.0 0.0 0.0 1.0 0.0 7.0 1 194 | 35.0 1.0 4.0 126.0 282.0 0.0 2.0 156.0 1.0 0.0 1.0 0.0 7.0 2 195 | 48.0 1.0 3.0 124.0 255.0 1.0 0.0 175.0 0.0 0.0 1.0 2.0 3.0 1 196 | 55.0 0.0 2.0 135.0 250.0 0.0 2.0 161.0 0.0 1.4 2.0 0.0 3.0 1 197 | 58.0 0.0 4.0 100.0 248.0 0.0 2.0 122.0 0.0 1.0 2.0 0.0 3.0 1 198 | 54.0 0.0 3.0 110.0 214.0 0.0 0.0 158.0 0.0 1.6 2.0 0.0 3.0 1 199 | 69.0 0.0 1.0 140.0 239.0 0.0 0.0 151.0 0.0 1.8 1.0 2.0 3.0 1 200 | 77.0 1.0 4.0 125.0 304.0 0.0 2.0 162.0 1.0 0.0 1.0 3.0 3.0 2 201 | 68.0 1.0 3.0 118.0 277.0 0.0 0.0 151.0 0.0 1.0 1.0 1.0 7.0 1 202 | 58.0 1.0 4.0 125.0 300.0 0.0 2.0 171.0 0.0 0.0 1.0 2.0 7.0 2 203 | 60.0 1.0 4.0 125.0 258.0 0.0 2.0 141.0 1.0 2.8 2.0 1.0 7.0 2 204 | 51.0 1.0 4.0 140.0 299.0 0.0 0.0 173.0 1.0 1.6 1.0 0.0 7.0 2 205 | 55.0 1.0 4.0 160.0 289.0 0.0 2.0 145.0 1.0 0.8 2.0 1.0 7.0 2 206 | 52.0 1.0 1.0 152.0 298.0 1.0 0.0 178.0 0.0 1.2 2.0 0.0 7.0 1 207 | 60.0 0.0 3.0 102.0 318.0 0.0 0.0 160.0 0.0 0.0 1.0 1.0 3.0 1 208 | 58.0 1.0 3.0 105.0 240.0 0.0 2.0 154.0 1.0 0.6 2.0 0.0 7.0 1 209 | 64.0 1.0 3.0 125.0 309.0 0.0 0.0 131.0 1.0 1.8 2.0 0.0 7.0 2 210 | 37.0 1.0 3.0 130.0 250.0 0.0 0.0 187.0 0.0 3.5 3.0 0.0 3.0 1 211 | 59.0 1.0 1.0 170.0 288.0 0.0 2.0 159.0 0.0 0.2 2.0 0.0 7.0 2 212 | 51.0 1.0 3.0 125.0 245.0 1.0 2.0 166.0 0.0 2.4 2.0 0.0 3.0 1 213 | 43.0 0.0 3.0 122.0 213.0 0.0 0.0 165.0 0.0 0.2 2.0 0.0 3.0 1 214 | 58.0 1.0 4.0 128.0 216.0 0.0 2.0 131.0 1.0 2.2 2.0 3.0 7.0 2 215 | 29.0 1.0 2.0 130.0 204.0 0.0 2.0 202.0 0.0 0.0 1.0 0.0 3.0 1 216 | 41.0 0.0 2.0 130.0 204.0 0.0 2.0 172.0 0.0 1.4 1.0 0.0 3.0 1 217 | 63.0 0.0 3.0 135.0 252.0 0.0 2.0 172.0 0.0 0.0 1.0 0.0 3.0 1 218 | 51.0 1.0 3.0 94.0 227.0 0.0 0.0 154.0 1.0 0.0 1.0 1.0 7.0 1 219 | 54.0 1.0 3.0 120.0 258.0 0.0 2.0 147.0 0.0 0.4 2.0 0.0 7.0 1 220 | 44.0 1.0 2.0 120.0 220.0 0.0 0.0 170.0 0.0 0.0 1.0 0.0 3.0 1 221 | 54.0 1.0 4.0 110.0 239.0 0.0 0.0 126.0 1.0 2.8 2.0 1.0 7.0 2 222 | 65.0 1.0 4.0 135.0 254.0 0.0 2.0 127.0 0.0 2.8 2.0 1.0 7.0 2 223 | 57.0 1.0 3.0 150.0 168.0 0.0 0.0 174.0 0.0 1.6 1.0 0.0 3.0 1 224 | 63.0 1.0 4.0 130.0 330.0 1.0 2.0 132.0 1.0 1.8 1.0 3.0 7.0 2 225 | 35.0 0.0 4.0 138.0 183.0 0.0 0.0 182.0 0.0 1.4 1.0 0.0 3.0 1 226 | 41.0 1.0 2.0 135.0 203.0 0.0 0.0 132.0 0.0 0.0 2.0 0.0 6.0 1 227 | 62.0 0.0 3.0 130.0 263.0 0.0 0.0 97.0 0.0 1.2 2.0 1.0 7.0 2 228 | 43.0 0.0 4.0 132.0 341.0 1.0 2.0 136.0 1.0 3.0 2.0 0.0 7.0 2 229 | 58.0 0.0 1.0 150.0 283.0 1.0 2.0 162.0 0.0 1.0 1.0 0.0 3.0 1 230 | 52.0 1.0 1.0 118.0 186.0 0.0 2.0 190.0 0.0 0.0 2.0 0.0 6.0 1 231 | 61.0 0.0 4.0 145.0 307.0 0.0 2.0 146.0 1.0 1.0 2.0 0.0 7.0 2 232 | 39.0 1.0 4.0 118.0 219.0 0.0 0.0 140.0 0.0 1.2 2.0 0.0 7.0 2 233 | 45.0 1.0 4.0 115.0 260.0 0.0 2.0 185.0 0.0 0.0 1.0 0.0 3.0 1 234 | 52.0 1.0 4.0 128.0 255.0 0.0 0.0 161.0 1.0 0.0 1.0 1.0 7.0 2 235 | 62.0 1.0 3.0 130.0 231.0 0.0 0.0 146.0 0.0 1.8 2.0 3.0 7.0 1 236 | 62.0 0.0 4.0 160.0 164.0 0.0 2.0 145.0 0.0 6.2 3.0 3.0 7.0 2 237 | 53.0 0.0 4.0 138.0 234.0 0.0 2.0 160.0 0.0 0.0 1.0 0.0 3.0 1 238 | 43.0 1.0 4.0 120.0 177.0 0.0 2.0 120.0 1.0 2.5 2.0 0.0 7.0 2 239 | 47.0 1.0 3.0 138.0 257.0 0.0 2.0 156.0 0.0 0.0 1.0 0.0 3.0 1 240 | 52.0 1.0 2.0 120.0 325.0 0.0 0.0 172.0 0.0 0.2 1.0 0.0 3.0 1 241 | 68.0 1.0 3.0 180.0 274.0 1.0 2.0 150.0 1.0 1.6 2.0 0.0 7.0 2 242 | 39.0 1.0 3.0 140.0 321.0 0.0 2.0 182.0 0.0 0.0 1.0 0.0 3.0 1 243 | 53.0 0.0 4.0 130.0 264.0 0.0 2.0 143.0 0.0 0.4 2.0 0.0 3.0 1 244 | 62.0 0.0 4.0 140.0 268.0 0.0 2.0 160.0 0.0 3.6 3.0 2.0 3.0 2 245 | 51.0 0.0 3.0 140.0 308.0 0.0 2.0 142.0 0.0 1.5 1.0 1.0 3.0 1 246 | 60.0 1.0 4.0 130.0 253.0 0.0 0.0 144.0 1.0 1.4 1.0 1.0 7.0 2 247 | 65.0 1.0 4.0 110.0 248.0 0.0 2.0 158.0 0.0 0.6 1.0 2.0 6.0 2 248 | 65.0 0.0 3.0 155.0 269.0 0.0 0.0 148.0 0.0 0.8 1.0 0.0 3.0 1 249 | 60.0 1.0 3.0 140.0 185.0 0.0 2.0 155.0 0.0 3.0 2.0 0.0 3.0 2 250 | 60.0 1.0 4.0 145.0 282.0 0.0 2.0 142.0 1.0 2.8 2.0 2.0 7.0 2 251 | 54.0 1.0 4.0 120.0 188.0 0.0 0.0 113.0 0.0 1.4 2.0 1.0 7.0 2 252 | 44.0 1.0 2.0 130.0 219.0 0.0 2.0 188.0 0.0 0.0 1.0 0.0 3.0 1 253 | 44.0 1.0 4.0 112.0 290.0 0.0 2.0 153.0 0.0 0.0 1.0 1.0 3.0 2 254 | 51.0 1.0 3.0 110.0 175.0 0.0 0.0 123.0 0.0 0.6 1.0 0.0 3.0 1 255 | 59.0 1.0 3.0 150.0 212.0 1.0 0.0 157.0 0.0 1.6 1.0 0.0 3.0 1 256 | 71.0 0.0 2.0 160.0 302.0 0.0 0.0 162.0 0.0 0.4 1.0 2.0 3.0 1 257 | 61.0 1.0 3.0 150.0 243.0 1.0 0.0 137.0 1.0 1.0 2.0 0.0 3.0 1 258 | 55.0 1.0 4.0 132.0 353.0 0.0 0.0 132.0 1.0 1.2 2.0 1.0 7.0 2 259 | 64.0 1.0 3.0 140.0 335.0 0.0 0.0 158.0 0.0 0.0 1.0 0.0 3.0 2 260 | 43.0 1.0 4.0 150.0 247.0 0.0 0.0 171.0 0.0 1.5 1.0 0.0 3.0 1 261 | 58.0 0.0 3.0 120.0 340.0 0.0 0.0 172.0 0.0 0.0 1.0 0.0 3.0 1 262 | 60.0 1.0 4.0 130.0 206.0 0.0 2.0 132.0 1.0 2.4 2.0 2.0 7.0 2 263 | 58.0 1.0 2.0 120.0 284.0 0.0 2.0 160.0 0.0 1.8 2.0 0.0 3.0 2 264 | 49.0 1.0 2.0 130.0 266.0 0.0 0.0 171.0 0.0 0.6 1.0 0.0 3.0 1 265 | 48.0 1.0 2.0 110.0 229.0 0.0 0.0 168.0 0.0 1.0 3.0 0.0 7.0 2 266 | 52.0 1.0 3.0 172.0 199.0 1.0 0.0 162.0 0.0 0.5 1.0 0.0 7.0 1 267 | 44.0 1.0 2.0 120.0 263.0 0.0 0.0 173.0 0.0 0.0 1.0 0.0 7.0 1 268 | 56.0 0.0 2.0 140.0 294.0 0.0 2.0 153.0 0.0 1.3 2.0 0.0 3.0 1 269 | 57.0 1.0 4.0 140.0 192.0 0.0 0.0 148.0 0.0 0.4 2.0 0.0 6.0 1 270 | 67.0 1.0 4.0 160.0 286.0 0.0 2.0 108.0 1.0 1.5 2.0 3.0 3.0 2 271 | -------------------------------------------------------------------------------- /data/labor.arff: -------------------------------------------------------------------------------- 1 | % Date: Tue, 15 Nov 88 15:44:08 EST 2 | % From: stan 3 | % To: aha@ICS.UCI.EDU 4 | % 5 | % 1. Title: Final settlements in labor negotitions in Canadian industry 6 | % 7 | % 2. Source Information 8 | % -- Creators: Collective Barganing Review, montly publication, 9 | % Labour Canada, Industrial Relations Information Service, 10 | % Ottawa, Ontario, K1A 0J2, Canada, (819) 997-3117 11 | % The data includes all collective agreements reached 12 | % in the business and personal services sector for locals 13 | % with at least 500 members (teachers, nurses, university 14 | % staff, police, etc) in Canada in 87 and first quarter of 88. 15 | % -- Donor: Stan Matwin, Computer Science Dept, University of Ottawa, 16 | % 34 Somerset East, K1N 9B4, (stan@uotcsi2.bitnet) 17 | % -- Date: November 1988 18 | % 19 | % 3. Past Usage: 20 | % -- testing concept learning software, in particular 21 | % an experimental method to learn two-tiered concept descriptions. 22 | % The data was used to learn the description of an acceptable 23 | % and unacceptable contract. 24 | % The unacceptable contracts were either obtained by interviewing 25 | % experts, or by inventing near misses. 26 | % Examples of use are described in: 27 | % Bergadano, F., Matwin, S., Michalski, R., 28 | % Zhang, J., Measuring Quality of Concept Descriptions, 29 | % Procs. of the 3rd European Working Sessions on Learning, 30 | % Glasgow, October 1988. 31 | % Bergadano, F., Matwin, S., Michalski, R., Zhang, J., 32 | % Representing and Acquiring Imprecise and Context-dependent 33 | % Concepts in Knowledge-based Systems, Procs. of ISMIS'88, 34 | % North Holland, 1988. 35 | % 4. Relevant Information: 36 | % -- data was used to test 2tier approach with learning 37 | % from positive and negative examples 38 | % 39 | % 5. Number of Instances: 57 40 | % 41 | % 6. Number of Attributes: 16 42 | % 43 | % 7. Attribute Information: 44 | % 1. dur: duration of agreement 45 | % [1..7] 46 | % 2 wage1.wage : wage increase in first year of contract 47 | % [2.0 .. 7.0] 48 | % 3 wage2.wage : wage increase in second year of contract 49 | % [2.0 .. 7.0] 50 | % 4 wage3.wage : wage increase in third year of contract 51 | % [2.0 .. 7.0] 52 | % 5 cola : cost of living allowance 53 | % [none, tcf, tc] 54 | % 6 hours.hrs : number of working hours during week 55 | % [35 .. 40] 56 | % 7 pension : employer contributions to pension plan 57 | % [none, ret_allw, empl_contr] 58 | % 8 stby_pay : standby pay 59 | % [2 .. 25] 60 | % 9 shift_diff : shift differencial : supplement for work on II and III shift 61 | % [1 .. 25] 62 | % 10 educ_allw.boolean : education allowance 63 | % [true false] 64 | % 11 holidays : number of statutory holidays 65 | % [9 .. 15] 66 | % 12 vacation : number of paid vacation days 67 | % [ba, avg, gnr] 68 | % 13 lngtrm_disabil.boolean : 69 | % employer's help during employee longterm disabil 70 | % ity [true , false] 71 | % 14 dntl_ins : employers contribution towards the dental plan 72 | % [none, half, full] 73 | % 15 bereavement.boolean : employer's financial contribution towards the 74 | % covering the costs of bereavement 75 | % [true , false] 76 | % 16 empl_hplan : employer's contribution towards the health plan 77 | % [none, half, full] 78 | % 79 | % 8. Missing Attribute Values: None 80 | % 81 | % 9. Class Distribution: 82 | % 83 | % 10. Exceptions from format instructions: no commas between attribute values. 84 | % 85 | % 86 | @relation labor 87 | @attribute 'duration' real 88 | @attribute 'wage-increase-first-year' real 89 | @attribute 'wage-increase-second-year' real 90 | @attribute 'wage-increase-third-year' real 91 | @attribute 'cost-of-living-adjustment' {'none','tcf','tc'} 92 | @attribute 'working-hours' real 93 | @attribute 'pension' {'none','ret_allw','empl_contr'} 94 | @attribute 'standby-pay' real 95 | @attribute 'shift-differential' real 96 | @attribute 'education-allowance' {'yes','no'} 97 | @attribute 'statutory-holidays' real 98 | @attribute 'vacation' {'below_average','average','generous'} 99 | @attribute 'longterm-disability-assistance' {'yes','no'} 100 | @attribute 'contribution-to-dental-plan' {'none','half','full'} 101 | @attribute 'bereavement-assistance' {'yes','no'} 102 | @attribute 'contribution-to-health-plan' {'none','half','full'} 103 | @attribute 'class' {'bad','good'} 104 | @data 105 | 1,5,?,?,?,40,?,?,2,?,11,'average',?,?,'yes',?,'good' 106 | 2,4.5,5.8,?,?,35,'ret_allw',?,?,'yes',11,'below_average',?,'full',?,'full','good' 107 | ?,?,?,?,?,38,'empl_contr',?,5,?,11,'generous','yes','half','yes','half','good' 108 | 3,3.7,4,5,'tc',?,?,?,?,'yes',?,?,?,?,'yes',?,'good' 109 | 3,4.5,4.5,5,?,40,?,?,?,?,12,'average',?,'half','yes','half','good' 110 | 2,2,2.5,?,?,35,?,?,6,'yes',12,'average',?,?,?,?,'good' 111 | 3,4,5,5,'tc',?,'empl_contr',?,?,?,12,'generous','yes','none','yes','half','good' 112 | 3,6.9,4.8,2.3,?,40,?,?,3,?,12,'below_average',?,?,?,?,'good' 113 | 2,3,7,?,?,38,?,12,25,'yes',11,'below_average','yes','half','yes',?,'good' 114 | 1,5.7,?,?,'none',40,'empl_contr',?,4,?,11,'generous','yes','full',?,?,'good' 115 | 3,3.5,4,4.6,'none',36,?,?,3,?,13,'generous',?,?,'yes','full','good' 116 | 2,6.4,6.4,?,?,38,?,?,4,?,15,?,?,'full',?,?,'good' 117 | 2,3.5,4,?,'none',40,?,?,2,'no',10,'below_average','no','half',?,'half','bad' 118 | 3,3.5,4,5.1,'tcf',37,?,?,4,?,13,'generous',?,'full','yes','full','good' 119 | 1,3,?,?,'none',36,?,?,10,'no',11,'generous',?,?,?,?,'good' 120 | 2,4.5,4,?,'none',37,'empl_contr',?,?,?,11,'average',?,'full','yes',?,'good' 121 | 1,2.8,?,?,?,35,?,?,2,?,12,'below_average',?,?,?,?,'good' 122 | 1,2.1,?,?,'tc',40,'ret_allw',2,3,'no',9,'below_average','yes','half',?,'none','bad' 123 | 1,2,?,?,'none',38,'none',?,?,'yes',11,'average','no','none','no','none','bad' 124 | 2,4,5,?,'tcf',35,?,13,5,?,15,'generous',?,?,?,?,'good' 125 | 2,4.3,4.4,?,?,38,?,?,4,?,12,'generous',?,'full',?,'full','good' 126 | 2,2.5,3,?,?,40,'none',?,?,?,11,'below_average',?,?,?,?,'bad' 127 | 3,3.5,4,4.6,'tcf',27,?,?,?,?,?,?,?,?,?,?,'good' 128 | 2,4.5,4,?,?,40,?,?,4,?,10,'generous',?,'half',?,'full','good' 129 | 1,6,?,?,?,38,?,8,3,?,9,'generous',?,?,?,?,'good' 130 | 3,2,2,2,'none',40,'none',?,?,?,10,'below_average',?,'half','yes','full','bad' 131 | 2,4.5,4.5,?,'tcf',?,?,?,?,'yes',10,'below_average','yes','none',?,'half','good' 132 | 2,3,3,?,'none',33,?,?,?,'yes',12,'generous',?,?,'yes','full','good' 133 | 2,5,4,?,'none',37,?,?,5,'no',11,'below_average','yes','full','yes','full','good' 134 | 3,2,2.5,?,?,35,'none',?,?,?,10,'average',?,?,'yes','full','bad' 135 | 3,4.5,4.5,5,'none',40,?,?,?,'no',11,'average',?,'half',?,?,'good' 136 | 3,3,2,2.5,'tc',40,'none',?,5,'no',10,'below_average','yes','half','yes','full','bad' 137 | 2,2.5,2.5,?,?,38,'empl_contr',?,?,?,10,'average',?,?,?,?,'bad' 138 | 2,4,5,?,'none',40,'none',?,3,'no',10,'below_average','no','none',?,'none','bad' 139 | 3,2,2.5,2.1,'tc',40,'none',2,1,'no',10,'below_average','no','half','yes','full','bad' 140 | 2,2,2,?,'none',40,'none',?,?,'no',11,'average','yes','none','yes','full','bad' 141 | 1,2,?,?,'tc',40,'ret_allw',4,0,'no',11,'generous','no','none','no','none','bad' 142 | 1,2.8,?,?,'none',38,'empl_contr',2,3,'no',9,'below_average','yes','half',?,'none','bad' 143 | 3,2,2.5,2,?,37,'empl_contr',?,?,?,10,'average',?,?,'yes','none','bad' 144 | 2,4.5,4,?,'none',40,?,?,4,?,12,'average','yes','full','yes','half','good' 145 | 1,4,?,?,'none',?,'none',?,?,'yes',11,'average','no','none','no','none','bad' 146 | 2,2,3,?,'none',38,'empl_contr',?,?,'yes',12,'generous','yes','none','yes','full','bad' 147 | 2,2.5,2.5,?,'tc',39,'empl_contr',?,?,?,12,'average',?,?,'yes',?,'bad' 148 | 2,2.5,3,?,'tcf',40,'none',?,?,?,11,'below_average',?,?,'yes',?,'bad' 149 | 2,4,4,?,'none',40,'none',?,3,?,10,'below_average','no','none',?,'none','bad' 150 | 2,4.5,4,?,?,40,?,?,2,'no',10,'below_average','no','half',?,'half','bad' 151 | 2,4.5,4,?,'none',40,?,?,5,?,11,'average',?,'full','yes','full','good' 152 | 2,4.6,4.6,?,'tcf',38,?,?,?,?,?,?,'yes','half',?,'half','good' 153 | 2,5,4.5,?,'none',38,?,14,5,?,11,'below_average','yes',?,?,'full','good' 154 | 2,5.7,4.5,?,'none',40,'ret_allw',?,?,?,11,'average','yes','full','yes','full','good' 155 | 2,7,5.3,?,?,?,?,?,?,?,11,?,'yes','full',?,?,'good' 156 | 3,2,3,?,'tcf',?,'empl_contr',?,?,'yes',?,?,'yes','half','yes',?,'good' 157 | 3,3.5,4,4.5,'tcf',35,?,?,?,?,13,'generous',?,?,'yes','full','good' 158 | 3,4,3.5,?,'none',40,'empl_contr',?,6,?,11,'average','yes','full',?,'full','good' 159 | 3,5,4.4,?,'none',38,'empl_contr',10,6,?,11,'generous','yes',?,?,'full','good' 160 | 3,5,5,5,?,40,?,?,?,?,12,'average',?,'half','yes','half','good' 161 | 3,6,6,4,?,35,?,?,14,?,9,'generous','yes','full','yes','full','good' 162 | % 163 | % 164 | % 165 | -------------------------------------------------------------------------------- /data/load_all_datasets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Uses python introspection to call all function in `data.load_datasets` 3 | 4 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent. 5 | """ 6 | 7 | import data.load_datasets 8 | from inspect import getmembers, isfunction 9 | 10 | 11 | def load_all_datasets(): 12 | """ 13 | Uses python introspection to call all function in `data.load_datasets` 14 | 15 | **Returns** 16 | ----------- 17 | a list of loaded datasets 18 | """ 19 | datasets = [] 20 | for o in getmembers(data.load_datasets): 21 | if isfunction(o[1]): 22 | df, feature_cols, label_col, name = o[1]() 23 | datasets.append({'dataframe': df, 'feature_cols': feature_cols, 'label_col': label_col, 'name': name}) 24 | 25 | return datasets -------------------------------------------------------------------------------- /data/load_datasets.py: -------------------------------------------------------------------------------- 1 | """Contains data set loading functions. If you want the test script to include a new dataset, a new function must 2 | be written in this module that returns a pandas Dataframe, the feature column names, the label column name and the 3 | dataset name. 4 | 5 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent. 6 | """ 7 | 8 | from sklearn import datasets 9 | 10 | import pandas as pd 11 | import numpy as np 12 | import os 13 | 14 | 15 | # def load_wine(): 16 | # columns = ['Class', 'Alcohol', 'Acid', 'Ash', 'Alcalinity', 'Magnesium', 'Phenols', 'Flavanoids', 'Nonflavanoids', 17 | # 'Proanthocyanins', 'Color', 'Hue', 'Diluted', 'Proline'] 18 | # features = ['Alcohol', 'Acid', 'Ash', 'Alcalinity', 'Magnesium', 'Phenols', 'Flavanoids', 'Nonflavanoids', 19 | # 'Proanthocyanins', 'Color', 'Hue', 'Diluted', 'Proline'] 20 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'wine.data')) 21 | # df.columns = columns 22 | # df['Class'] = np.subtract(df['Class'], 1) 23 | # 24 | # return df, features, 'Class', 'wine' 25 | # 26 | # 27 | # def load_cars(): 28 | # columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'Class'] 29 | # features = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'] 30 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'car.data')) 31 | # df.columns = columns 32 | # df = df.reindex(np.random.permutation(df.index)).reset_index(drop=1) 33 | # 34 | # mapping_buy_maint = {'low': 0, 'med': 1, 'high': 2, 'vhigh': 3} 35 | # mapping_doors = {'2': 0, '3': 1, '4': 2, '5more': 3} 36 | # mapping_persons = {'2': 0, '4': 1, 'more': 2} 37 | # mapping_lug = {'small': 0, 'med': 1, 'big': 2} 38 | # mapping_safety = {'low': 0, 'med': 1, 'high': 2} 39 | # mapping_class = {'unacc': 0, 'acc': 1, 'good': 2, 'vgood': 3} 40 | # 41 | # df['maint'] = df['maint'].map(mapping_buy_maint) 42 | # df['buying'] = df['buying'].map(mapping_buy_maint) 43 | # df['doors'] = df['doors'].map(mapping_doors) 44 | # df['persons'] = df['persons'].map(mapping_persons) 45 | # df['lug_boot'] = df['lug_boot'].map(mapping_lug) 46 | # df['safety'] = df['safety'].map(mapping_safety) 47 | # df['Class'] = df['Class'].map(mapping_class).astype(int) 48 | # 49 | # return df, features, 'Class', 'cars' 50 | # 51 | # 52 | # def load_wisconsin_breast_cancer(): 53 | # columns = ['ID', 'ClumpThickness', 'CellSizeUniform', 'CellShapeUniform', 'MargAdhesion', 'EpithCellSize', 'BareNuclei', 54 | # 'BlandChromatin', 'NormalNuclei', 'Mitoses', 'Class'] 55 | # features = ['ClumpThickness', 'CellSizeUniform', 'CellShapeUniform', 'MargAdhesion', 'EpithCellSize', 'BareNuclei', 56 | # 'BlandChromatin', 'NormalNuclei', 'Mitoses'] 57 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'breast-cancer-wisconsin.data')) 58 | # df.columns = columns 59 | # df['Class'] = np.subtract(np.divide(df['Class'], 2), 1) 60 | # df = df.drop('ID', axis=1).reset_index(drop=True) 61 | # df['BareNuclei'] = df['BareNuclei'].replace('?', int(np.mean(df['BareNuclei'][df['BareNuclei'] != '?'].map(int)))) 62 | # df = df.applymap(int) 63 | # 64 | # return df, features, 'Class', 'wisconsinBreast' 65 | from sklearn.preprocessing import LabelEncoder 66 | 67 | 68 | # def load_heart(): 69 | # columns = ['age', 'sex', 'chest pain type', 'resting blood pressure', 'serum cholestoral', 'fasting blood sugar', \ 70 | # 'resting electrocardio', 'max heartrate', 'exercise induced', 'oldpeak', 'slope peak', \ 71 | # 'vessels', 'thal', 'Class'] 72 | # features = ['age', 'sex', 'chest pain type', 'resting blood pressure', 'serum cholestoral', 'fasting blood sugar', \ 73 | # 'resting electrocardio', 'max heartrate', 'exercise induced', 'oldpeak', 'slope peak', \ 74 | # 'vessels', 'thal'] 75 | # 76 | # columns_copy = [] 77 | # for column in columns: 78 | # column=column[:10] 79 | # columns_copy.append(column) 80 | # columns = columns_copy 81 | # 82 | # features_copy = [] 83 | # for feature in features: 84 | # feature=feature[:10] 85 | # features_copy.append(feature) 86 | # features=features_copy 87 | # 88 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'heart.dat'), sep=' ') 89 | # df.columns = columns 90 | # df['Class'] = np.subtract(df['Class'], 1) 91 | # return df, features, 'Class', 'heart' 92 | 93 | 94 | # def load_glass(): 95 | # columns = ['id', 'RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe', 'Class'] 96 | # features = ['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe'] 97 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'glass.data')) 98 | # df.columns = columns 99 | # df = df.drop('id', axis=1).reset_index(drop=True) 100 | # df['Class'] = np.subtract(df['Class'], 1) 101 | # df = df[df['Class'] != 3] 102 | # df['Class'] = df['Class'].map({0:0, 1:1, 2:2, 4: 3, 5: 4, 6: 5}).astype(int) 103 | # return df, features, 'Class', 'glass' 104 | # 105 | # 106 | # def load_austra(): 107 | # columns = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','Class'] 108 | # features = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14'] 109 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'austra.data')) 110 | # df.columns = columns 111 | # df['Class'] = df['Class'].map({'y0': 0, 'y1': 1}).astype(int) 112 | # return df, features, 'Class', 'austra' 113 | # 114 | # 115 | # def load_led7(): 116 | # columns = ['X1','X2','X3','X4','X5','X6','X7','Class'] 117 | # features = ['X1','X2','X3','X4','X5','X6','X7'] 118 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'led7.data')) 119 | # df.columns = columns 120 | # df['Class'] = df['Class'].map({'y0': 0, 'y1': 1, 'y2': 2, 'y3': 3, 'y4': 4, 'y5': 5, 'y6': 6, 121 | # 'y7': 7, 'y8': 8, 'y9': 9}).astype(int) 122 | # df = df[df['Class'] < 8] 123 | # return df, features, 'Class', 'led7' 124 | # 125 | # 126 | # def load_lymph(): 127 | # columns = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18','Class'] 128 | # features = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18'] 129 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'lymph.data')) 130 | # df.columns = columns 131 | # df = df[df['Class'] != 'y1'] 132 | # df = df[df['Class'] != 'y4'] 133 | # df['Class'] = df['Class'].map({'y2': 0, 'y3': 1}).astype(int) 134 | # return df, features, 'Class', 'lymph' 135 | # 136 | # 137 | # def load_pima(): 138 | # columns = ['X1','X2','X3','X4','X5','X6','X7','X8','Class'] 139 | # features = ['X1','X2','X3','X4','X5','X6','X7','X8'] 140 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'pima.data')) 141 | # df.columns = columns 142 | # df['Class'] = df['Class'].map({'y0': 0, 'y1': 1}).astype(int) 143 | # return df, features, 'Class', 'pima' 144 | # 145 | # 146 | # def load_vehicle(): 147 | # columns = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18','Class'] 148 | # features = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18'] 149 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'vehicle.data')) 150 | # df.columns = columns 151 | # df['Class'] = df['Class'].map({'y1': 0, 'y2': 1, 'y3': 2, 'y4': 3}).astype(int) 152 | # return df, features, 'Class', 'vehicle' 153 | # 154 | # 155 | # def load_iris(): 156 | # iris = datasets.load_iris() 157 | # df = pd.DataFrame(iris.data) 158 | # features = ["SepalLength", "SepalWidth", "PetalLength", "PetalWidth"] 159 | # df.columns = features 160 | # df['Class'] = iris.target 161 | # 162 | # return df, features, 'Class', 'iris' 163 | # 164 | # 165 | # def load_ecoli(): 166 | # columns = ['name', 'mcg', 'gvh', 'lip', 'chg', 'aac', 'alm1', 'alm2', 'Class'] 167 | # features = ['mcg', 'gvh', 'lip', 'chg', 'aac', 'alm1', 'alm2'] 168 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'ecoli.data'), delim_whitespace=True, header=0) 169 | # df.columns = columns 170 | # df = df.drop('name', axis=1).reset_index(drop=True) 171 | # mapping_class = {'cp': 0, 'im': 1, 'pp': 2, 'imU': 3, 'om': 4, 'omL': 5, 'imL': 6, 'imS': 7} 172 | # df['Class'] = df['Class'].map(mapping_class).astype(int) 173 | # df = df[df['Class'] < 5] 174 | # return df, features, 'Class', 'ecoli' 175 | # 176 | # 177 | # def load_yeast(): 178 | # columns = ['name', 'mcg', 'gvh', 'alm', 'mit', 'erl', 'pox', 'vac', 'nuc', 'Class'] 179 | # features = ['mcg', 'gvh', 'alm', 'mit', 'erl', 'pox', 'vac', 'nuc'] 180 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'yeast.data'), delim_whitespace=True, header=0) 181 | # df.columns = columns 182 | # df = df.drop('name', axis=1).reset_index(drop=True) 183 | # mapping_class = {'CYT': 0, 'NUC': 1, 'MIT': 2, 'ME3': 3, 'ME2': 4, 'ME1': 5, 'EXC': 6, 'VAC': 7, 'POX': 8, 'ERL': 9} 184 | # df['Class'] = df['Class'].map(mapping_class) 185 | # df = df[df['Class'] < 8] 186 | # return df, features, 'Class', 'yeast' 187 | # 188 | # 189 | # def load_waveform(): 190 | # columns = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18','X19','X20','X21','Class'] 191 | # features = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18','X19','X20','X21'] 192 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'waveform.data')) 193 | # df.columns = columns 194 | # df['Class'] = df['Class'].map({'y0': 0, 'y1': 1, 'y2': 2}).astype(int) 195 | # return df, features, 'Class', 'waveform' 196 | # 197 | # 198 | # def load_magic(): 199 | # columns = ['fLength', 'fWidth', 'fSize', 'fConc', 'fConc1', 'fAsym', 'fM3Long', 'fM3Trans', 'fAlpha', 'fDist', 'Class'] 200 | # features = ['fLength', 'fWidth', 'fSize', 'fConc', 'fConc1', 'fAsym', 'fM3Long', 'fM3Trans', 'fAlpha', 'fDist'] 201 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'magic04.data')) 202 | # df.columns = columns 203 | # for feature in features: 204 | # if np.min(df[feature]) < 0: 205 | # df[feature] += np.min(df[feature]) * (-1) 206 | # mapping_class = {'g': 0, 'h': 1} 207 | # df['Class'] = df['Class'].map(mapping_class).astype(int) 208 | # return df, features, 'Class', 'magic' 209 | # 210 | # 211 | # def load_shuttle(): 212 | # columns = ['feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6', 'feature7', 'feature8', 213 | # 'feature9', 'Class'] 214 | # features = ['feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6', 'feature7', 'feature8', 215 | # 'feature9'] 216 | # 217 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'shuttle.tst'), sep=' ') 218 | # df.columns = columns 219 | # for feature in features: 220 | # if np.min(df[feature]) < 0: 221 | # df[feature] += np.min(df[feature]) * (-1) 222 | # df = df[df['Class'] < 6] 223 | # df['Class'] = np.subtract(df['Class'], 1) 224 | # df = df.reset_index(drop=True) 225 | # 226 | # return df, features, 'Class', 'shuttle' 227 | # 228 | # 229 | # def load_shuttle_full(): 230 | # columns = ['feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6', 'feature7', 'feature8', 231 | # 'feature9', 'Class'] 232 | # features = ['feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6', 'feature7', 'feature8', 233 | # 'feature9'] 234 | # 235 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'shuttle_full.trn'), sep=' ') 236 | # df.columns = columns 237 | # for feature in features: 238 | # if np.min(df[feature]) < 0: 239 | # df[feature] += np.min(df[feature]) * (-1) 240 | # df = df[df['Class'] < 6] 241 | # df['Class'] = np.subtract(df['Class'], 1) 242 | # df = df.reset_index(drop=True) 243 | # 244 | # return df, features, 'Class', 'shuttleFull' 245 | # 246 | # 247 | # def load_nursery(): 248 | # columns = ['parents', 'has_nurs', 'form', 'children', 'housing', 'finance', 'social', 'health', 'Class'] 249 | # features = ['parents', 'has_nurs', 'form', 'children', 'housing', 'finance', 'social', 'health'] 250 | # 251 | # mapping_parents = {'usual': 0, 'pretentious': 1, 'great_pret': 2} 252 | # mapping_has_nurs = {'proper': 0, 'less_proper': 1, 'improper': 2, 'critical': 3, 'very_crit': 4} 253 | # mapping_form = {'complete': 0, 'completed': 1, 'incomplete': 2, 'foster': 3} 254 | # mapping_housing = {'convenient': 0, 'less_conv': 1, 'critical': 2} 255 | # mapping_finance = {'convenient': 0, 'inconv': 1} 256 | # mapping_social = {'nonprob': 0, 'slightly_prob': 1, 'problematic': 2} 257 | # mapping_health = {'recommended': 0, 'priority': 1, 'not_recom': 2} 258 | # mapping_class = {'not_recom': 1, 'recommend': 0, 'very_recom': 2, 'priority': 3, 'spec_prior': 4} 259 | # 260 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'nursery.data'), sep=',') 261 | # df = df.dropna() 262 | # df.columns = columns 263 | # 264 | # df['parents'] = df['parents'].map(mapping_parents) 265 | # df['has_nurs'] = df['has_nurs'].map(mapping_has_nurs) 266 | # df['form'] = df['form'].map(mapping_form) 267 | # df['children'] = df['children'].map(lambda x: 4 if x == 'more' else int(x)) 268 | # df['housing'] = df['housing'].map(mapping_housing) 269 | # df['finance'] = df['finance'].map(mapping_finance) 270 | # df['social'] = df['social'].map(mapping_social) 271 | # df['health'] = df['health'].map(mapping_health) 272 | # df['Class'] = df['Class'].map(mapping_class) 273 | # 274 | # df = df[df['Class'] != 0] 275 | # df['Class'] = np.subtract(df['Class'], 1) 276 | # df = df.reset_index(drop=True) 277 | # 278 | # return df, features, 'Class', 'nursery' 279 | 280 | # def load_aa_gent(): 281 | # label_col = 'RPE' 282 | # # 'H5060', 'H6070', 'Variabele A', 283 | # feature_cols = ['S1', 'S2', 'S3', 'S4', 'S5', 'H7080', 'H8090', 'H90100', 'H5060', 'H6070', 'Idnummer', 284 | # 'Aantal sprints', 'Gemiddelde snelheid (m/s)', 'Totaal tijd (s)', 'Totaal afstand (m)',# 'Variabele A', 'Variabele B', 285 | # 'Temperature', 'Humidity', 'Windspeed', 'Visibility', 'Weather Type', 'Variabele B']#, 'overall', 'phy', 'pac'] 286 | # #, 'ID', 'temperature', 'humidity', 'windspeed', 'visibility', 'weather_type'] 287 | # cols = feature_cols + [label_col] + ['Datum'] 288 | # df = pd.read_csv('aa_gent_with_player_features.csv') 289 | # df = df[cols] 290 | # df['Snelheid'] = df['Gemiddelde snelheid (m/s)'] # Kan evt weggelaten worden? 291 | # df['Variabele B'] = df['Variabele B'].fillna(df['Variabele B'].mean()) 292 | # df['Tijd'] = df['Totaal tijd (s)'] 293 | # df['Afstand'] = df['Totaal afstand (m)'] 294 | # df = df.drop(['Gemiddelde snelheid (m/s)', 'Totaal tijd (s)', 'Totaal afstand (m)'], axis=1) 295 | # print df.head(5) 296 | # df[label_col] = df[label_col].map({1: 2, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 9}).astype(int) 297 | # df = df.drop(['Datum'], axis=1) 298 | # # df = df.drop(['temperature', 'humidity', 'windspeed', 'visibility', 'weather_type'], axis=1) 299 | # df = pd.get_dummies(df, columns=['Idnummer']) 300 | # #df = pd.get_dummies(df, columns=['Weather Type']) 301 | # feature_cols = list(df.columns) 302 | # feature_cols.remove('RPE') 303 | # print feature_cols 304 | # return df, feature_cols, label_col, 'AA Gent' 305 | # df = pd.read_csv('data/aa_gent.csv', sep=";") 306 | # 307 | # label_col = 'RPE' 308 | # feature_cols = ['S1', 'S2', 'S3', 'S4', 'S5', 'HF-zone 80-90', 309 | # 'HF-zone 70-80', 'HF-zone 90-100', 'Aantal sprints', 310 | # 'Gem v', 'Tijd (s)', 'Afstand'] 311 | # print df[label_col].value_counts() 312 | # df = df[feature_cols + [label_col]] 313 | # df[label_col] = df[label_col].map({1: 2, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 9}).astype(int) 314 | # return df, feature_cols, label_col, 'AA Gent' 315 | 316 | 317 | def load_migbase(): 318 | migbase = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 319 | 'migbase_noise20.csv'), sep=',') 320 | if 'Unnamed: 0' in migbase.columns: 321 | migbase = migbase.drop('Unnamed: 0', axis=1) 322 | #encoders = {} 323 | col_mapping = {} 324 | for col in migbase: 325 | # encoders[col] = LabelEncoder() 326 | # migbase[col] = encoders[col].fit_transform(migbase[col]) 327 | col_mapping[col] = col[:10] 328 | 329 | migbase = migbase.rename(index=str, columns=col_mapping) 330 | 331 | feature_cols = list(migbase.columns) 332 | feature_cols.remove('CLASS') 333 | 334 | return migbase, feature_cols, 'CLASS', 'migbase' 335 | -------------------------------------------------------------------------------- /data/lymph.data: -------------------------------------------------------------------------------- 1 | X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16,X17,X18,Y 2 | 3,2,1,1,2,2,1,2,1,2,2,4,3,5,2,2,2,4,y3 3 | 2,1,1,1,1,1,1,2,1,3,2,2,2,8,3,2,2,5,y3 4 | 2,2,1,1,1,1,1,2,1,3,2,2,2,8,3,1,2,5,y3 5 | 4,2,1,1,1,2,1,2,1,4,3,3,3,7,3,2,2,3,y3 6 | 2,2,2,1,2,2,2,2,1,3,3,4,4,5,3,2,2,5,y3 7 | 2,2,1,1,1,2,1,2,1,4,3,4,4,8,3,2,2,7,y3 8 | 4,1,1,1,1,1,1,2,1,4,2,2,4,7,3,2,2,7,y3 9 | 4,2,1,1,2,2,1,2,1,4,3,4,3,4,2,2,2,2,y2 10 | 3,1,1,1,1,1,2,1,1,2,2,4,3,5,2,1,2,1,y2 11 | 4,1,1,1,1,1,1,2,1,3,3,2,2,4,2,2,2,2,y2 12 | 2,2,1,1,1,1,1,2,1,2,2,2,3,8,3,1,2,1,y2 13 | 3,2,1,1,1,2,1,1,1,2,2,4,3,4,1,2,2,2,y2 14 | 2,2,1,1,2,2,1,1,1,2,2,4,2,8,3,2,2,1,y2 15 | 2,2,2,1,2,2,1,2,1,3,2,4,3,5,1,2,2,3,y2 16 | 2,2,1,1,2,2,1,1,1,3,3,4,3,4,3,1,1,1,y2 17 | 3,1,1,1,1,1,1,1,1,2,2,2,2,5,1,1,2,2,y3 18 | 3,1,1,1,1,2,1,2,1,4,2,4,4,2,3,2,2,3,y3 19 | 4,1,1,1,1,2,1,2,1,4,3,4,2,5,3,2,2,2,y3 20 | 2,1,1,1,1,1,1,2,1,3,2,3,3,8,3,2,2,3,y3 21 | 2,1,1,1,1,1,1,2,1,2,2,4,2,8,3,2,2,3,y3 22 | 3,1,1,1,1,1,1,2,2,1,2,2,2,8,3,1,2,8,y3 23 | 3,2,2,1,2,1,1,2,1,2,2,3,3,8,3,1,2,1,y2 24 | 4,2,1,1,2,2,1,1,1,1,3,3,3,3,3,2,2,3,y2 25 | 4,2,1,1,2,2,1,2,1,2,3,2,3,2,3,2,2,4,y2 26 | 2,1,1,1,1,1,1,1,1,1,1,2,2,3,1,2,2,1,y2 27 | 2,2,1,1,1,1,1,2,1,4,3,4,2,8,2,1,2,4,y2 28 | 2,1,1,1,1,1,1,1,1,1,1,1,1,3,1,2,2,1,y2 29 | 3,2,2,1,1,2,1,1,1,2,3,3,3,5,2,1,2,2,y2 30 | 3,2,1,1,1,2,1,2,1,2,2,2,2,1,3,1,1,1,y2 31 | 2,1,1,1,1,1,1,1,1,2,3,2,2,8,1,2,1,1,y2 32 | 2,1,1,1,1,1,1,2,1,2,2,3,3,5,3,1,1,2,y3 33 | 2,2,1,1,1,2,1,2,1,3,3,4,2,8,3,2,2,2,y3 34 | 4,1,1,1,1,1,1,2,1,4,2,4,2,8,3,2,2,6,y3 35 | 3,1,1,1,1,1,1,2,1,4,3,3,4,5,3,2,2,3,y3 36 | 2,1,1,1,1,2,1,2,1,3,3,4,2,8,3,2,2,3,y3 37 | 2,1,1,1,1,1,1,2,1,3,2,2,2,6,3,2,2,6,y3 38 | 2,1,1,1,1,1,1,1,1,2,2,2,3,8,2,1,2,1,y2 39 | 4,2,1,1,1,1,1,1,1,4,3,3,3,4,2,2,1,1,y2 40 | 2,2,1,1,1,2,1,2,1,2,3,3,3,5,3,2,2,1,y2 41 | 4,2,2,1,1,2,1,2,1,2,2,3,3,8,3,2,2,2,y2 42 | 3,2,2,2,2,2,1,2,1,2,3,3,3,4,3,2,2,7,y2 43 | 3,2,2,1,2,2,1,2,1,2,3,3,4,2,2,2,1,1,y2 44 | 2,2,1,1,1,1,1,2,1,2,3,3,3,5,3,1,2,1,y2 45 | 2,2,1,1,1,2,1,2,1,2,3,3,3,5,3,2,2,2,y2 46 | 1,1,1,1,1,2,1,2,1,2,2,1,1,2,1,1,1,2,y1 47 | 3,2,1,1,1,1,1,2,1,3,2,2,4,8,3,2,2,3,y3 48 | 4,1,1,1,1,1,1,2,1,2,2,3,3,3,3,2,1,1,y3 49 | 3,2,2,2,2,2,2,2,1,4,3,3,4,8,3,2,2,7,y3 50 | 2,1,1,1,1,1,1,2,1,3,2,4,4,4,3,2,2,5,y3 51 | 4,1,1,1,1,2,1,2,1,4,2,2,4,7,3,2,2,2,y3 52 | 2,1,1,1,1,1,1,2,1,2,2,2,2,8,2,2,2,1,y3 53 | 2,2,2,1,2,2,1,1,1,2,2,3,3,4,2,1,2,1,y2 54 | 2,2,2,1,2,2,1,2,1,3,3,3,3,8,3,1,2,2,y2 55 | 4,2,1,1,2,2,1,2,1,3,3,4,3,4,3,2,2,2,y2 56 | 3,1,1,1,1,1,1,1,1,2,3,3,3,4,3,1,2,2,y2 57 | 3,2,1,1,1,2,1,2,1,2,3,3,3,2,2,2,2,3,y2 58 | 2,2,2,1,1,1,1,2,1,2,2,4,3,8,2,2,2,2,y2 59 | 2,2,1,1,1,1,1,2,1,3,3,3,3,3,2,1,2,1,y2 60 | 2,2,1,1,1,1,1,2,1,3,3,3,3,2,2,2,1,1,y2 61 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,y1 62 | 3,1,1,1,1,2,1,2,1,4,3,4,2,4,3,2,2,6,y3 63 | 2,1,1,1,1,2,1,2,1,2,2,2,2,8,2,1,2,2,y3 64 | 2,1,1,1,1,1,1,1,1,2,3,2,3,3,2,2,1,1,y3 65 | 2,1,1,1,1,1,1,2,1,3,2,4,2,8,3,2,2,4,y3 66 | 3,2,1,1,1,2,1,2,1,2,2,2,4,8,3,1,2,3,y3 67 | 4,1,1,1,1,1,1,2,1,3,2,2,4,3,2,2,1,1,y3 68 | 2,2,2,1,2,2,1,2,1,3,3,3,3,8,3,2,2,2,y2 69 | 2,2,1,1,1,2,1,2,1,2,3,2,2,4,2,1,1,2,y2 70 | 2,1,1,1,1,1,1,1,1,1,2,2,3,3,1,2,2,1,y2 71 | 4,2,1,1,1,2,1,1,1,3,3,4,3,8,3,2,2,2,y2 72 | 4,2,1,1,2,1,1,2,1,3,3,3,3,2,2,2,2,3,y2 73 | 2,2,2,1,2,2,1,2,1,3,3,4,3,4,3,2,2,6,y2 74 | 3,2,1,1,1,1,1,1,1,2,2,3,3,5,1,1,1,1,y2 75 | 3,2,1,1,1,2,1,2,1,3,3,3,3,4,3,2,2,1,y2 76 | 3,2,2,2,2,2,2,2,3,1,1,2,2,8,1,2,2,4,y4 77 | 2,1,1,1,1,1,1,2,1,3,3,2,3,4,3,2,2,2,y3 78 | 2,1,1,1,1,1,1,2,1,2,2,4,4,8,2,1,2,2,y3 79 | 3,1,1,1,1,2,1,2,1,2,2,4,3,8,2,2,2,1,y3 80 | 4,1,1,1,2,1,2,2,1,3,2,3,4,5,3,2,2,7,y3 81 | 2,1,1,1,1,2,1,2,1,2,2,3,3,3,3,1,2,1,y3 82 | 2,2,1,1,1,2,1,2,1,3,3,3,3,8,3,2,2,4,y3 83 | 2,2,1,1,1,1,1,1,1,2,3,3,3,3,3,1,2,1,y2 84 | 3,2,1,1,1,1,1,2,1,2,2,2,2,5,1,1,1,1,y2 85 | 4,2,2,1,1,1,1,2,1,2,2,2,3,3,2,2,2,2,y2 86 | 3,2,1,1,2,2,1,2,1,3,3,2,3,4,2,2,2,2,y2 87 | 2,2,1,1,1,1,1,1,1,1,2,2,3,3,1,1,1,1,y2 88 | 4,2,1,1,1,2,1,1,1,2,2,3,3,5,2,1,2,1,y2 89 | 2,1,1,1,1,1,1,2,1,2,2,2,2,4,1,2,1,2,y2 90 | 4,1,1,1,1,1,1,1,2,1,3,4,2,8,1,2,2,1,y2 91 | 3,1,1,1,2,2,2,1,3,1,1,4,2,5,3,1,2,4,y4 92 | 3,1,1,1,1,1,1,2,1,2,2,4,4,2,3,2,1,1,y3 93 | 3,1,1,1,1,2,1,2,1,3,3,4,4,4,3,1,2,6,y3 94 | 3,1,1,1,1,1,1,2,1,2,2,4,2,4,3,2,2,3,y3 95 | 3,1,1,1,1,2,1,2,1,4,2,2,2,4,3,2,2,7,y3 96 | 4,2,1,1,2,2,1,2,1,3,3,4,2,5,3,2,2,3,y3 97 | 2,1,1,1,1,2,1,2,1,2,2,2,4,8,1,2,2,2,y3 98 | 2,2,1,1,1,1,1,2,1,2,2,3,3,2,3,2,2,2,y2 99 | 4,2,2,1,2,2,1,2,1,3,3,4,3,8,2,2,2,3,y2 100 | 2,1,1,1,1,1,1,2,1,3,3,3,3,6,3,1,2,4,y2 101 | 3,1,1,1,1,1,1,1,1,2,2,4,3,5,1,2,2,1,y2 102 | 2,2,1,1,1,2,1,1,1,2,2,2,3,5,2,1,2,2,y2 103 | 2,2,1,1,1,1,1,1,1,2,2,3,3,5,2,2,2,1,y2 104 | 2,2,2,2,2,2,1,1,1,2,2,4,3,8,2,2,2,3,y2 105 | 3,2,2,1,2,2,1,2,1,3,2,2,3,4,1,2,2,1,y2 106 | 3,2,2,2,2,2,2,1,2,2,2,4,2,4,3,2,2,7,y4 107 | 2,1,1,1,1,2,1,2,1,3,3,2,2,4,3,2,2,6,y3 108 | 4,1,1,1,1,2,1,2,1,3,3,4,2,4,3,2,2,4,y3 109 | 4,1,1,1,1,1,1,2,1,3,2,4,4,8,3,2,2,1,y3 110 | 4,2,1,1,1,1,1,2,1,2,2,2,4,8,1,1,2,2,y3 111 | 4,1,1,1,2,2,1,2,1,3,3,3,4,5,3,2,2,4,y3 112 | 2,1,1,1,1,1,1,2,1,4,3,4,4,5,3,2,2,5,y3 113 | 2,1,1,1,1,1,1,1,1,2,2,2,2,8,1,1,1,1,y2 114 | 2,2,1,1,1,1,1,1,1,2,3,3,3,2,2,2,2,1,y2 115 | 3,2,1,1,2,2,1,1,1,2,2,4,3,2,1,2,2,3,y2 116 | 2,2,1,1,1,2,1,2,1,2,2,3,3,4,2,1,2,1,y2 117 | 3,2,1,1,1,1,1,2,1,2,3,3,3,5,2,2,1,1,y2 118 | 2,1,1,1,1,1,1,2,1,2,2,4,2,2,1,2,2,1,y2 119 | 3,1,1,1,1,1,1,2,1,2,2,2,3,2,1,2,2,1,y2 120 | 3,1,1,1,2,1,1,2,1,2,3,3,3,5,3,1,1,1,y2 121 | 3,1,1,1,2,2,2,1,3,1,1,2,1,5,3,1,1,7,y4 122 | 2,1,1,1,1,2,1,2,1,3,2,2,2,4,3,1,2,5,y3 123 | 2,2,1,1,1,1,1,2,1,4,3,4,2,7,3,2,2,5,y3 124 | 2,2,2,1,2,2,2,2,1,4,2,2,2,4,3,2,2,6,y3 125 | 2,1,1,1,1,2,1,1,1,2,2,2,2,3,1,1,1,1,y3 126 | 2,1,1,1,1,1,1,2,1,2,2,2,1,7,1,2,2,2,y3 127 | 4,2,2,1,2,2,1,2,1,1,2,2,1,3,1,2,2,2,y3 128 | 4,2,1,1,1,2,1,1,1,2,3,2,3,2,2,1,1,1,y2 129 | 2,2,2,1,1,2,1,1,1,2,3,2,3,3,2,1,1,1,y2 130 | 2,2,1,1,1,1,1,2,1,2,3,2,3,8,2,1,1,1,y2 131 | 4,2,1,1,1,1,1,2,1,2,2,2,2,3,2,1,1,1,y2 132 | 2,1,1,1,1,1,1,1,1,2,3,3,3,8,3,1,1,1,y2 133 | 3,2,1,1,1,1,1,1,1,2,2,3,3,3,3,1,1,2,y2 134 | 4,2,1,1,1,1,1,2,1,2,2,2,3,3,2,2,2,1,y2 135 | 2,2,1,1,1,1,1,2,1,2,3,3,3,5,2,1,2,1,y2 136 | 2,2,1,1,1,2,1,2,1,3,3,3,4,8,3,2,2,2,y3 137 | 4,2,2,2,2,2,2,2,1,4,3,4,4,7,3,2,2,8,y3 138 | 3,1,1,1,1,1,1,1,1,2,2,4,2,8,2,2,2,1,y3 139 | 3,1,1,1,1,2,1,1,1,3,2,3,3,8,3,2,2,2,y3 140 | 4,2,2,2,2,2,1,2,1,4,3,4,4,7,3,2,2,6,y3 141 | 4,1,1,1,1,2,1,2,1,4,3,4,3,3,3,2,2,5,y3 142 | 2,1,1,1,1,1,1,1,1,1,2,4,3,8,2,2,2,2,y2 143 | 3,1,1,1,1,1,1,2,1,3,2,2,2,8,1,1,1,1,y2 144 | 3,2,1,1,1,1,1,2,1,2,3,3,3,8,2,2,2,1,y2 145 | 3,2,2,1,1,2,1,1,1,3,2,3,3,4,3,1,2,2,y2 146 | 3,2,1,1,1,2,1,2,1,3,2,4,3,4,2,2,2,2,y2 147 | 3,2,2,1,2,2,1,1,1,3,3,4,3,4,2,2,2,3,y2 148 | 3,2,1,1,1,2,1,1,1,2,3,4,2,4,1,1,1,1,y2 149 | 2,2,1,1,1,2,1,2,1,2,2,4,4,5,3,2,2,1,y2 150 | -------------------------------------------------------------------------------- /data/wine.data: -------------------------------------------------------------------------------- 1 | 1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065 2 | 1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050 3 | 1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185 4 | 1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480 5 | 1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735 6 | 1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450 7 | 1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290 8 | 1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295 9 | 1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045 10 | 1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045 11 | 1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510 12 | 1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280 13 | 1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320 14 | 1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150 15 | 1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547 16 | 1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310 17 | 1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280 18 | 1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130 19 | 1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680 20 | 1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845 21 | 1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780 22 | 1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770 23 | 1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035 24 | 1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015 25 | 1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845 26 | 1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830 27 | 1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195 28 | 1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285 29 | 1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915 30 | 1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035 31 | 1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285 32 | 1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515 33 | 1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990 34 | 1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235 35 | 1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095 36 | 1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920 37 | 1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880 38 | 1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105 39 | 1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020 40 | 1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760 41 | 1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795 42 | 1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035 43 | 1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095 44 | 1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680 45 | 1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885 46 | 1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080 47 | 1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065 48 | 1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985 49 | 1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060 50 | 1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260 51 | 1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150 52 | 1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265 53 | 1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190 54 | 1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375 55 | 1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060 56 | 1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120 57 | 1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970 58 | 1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270 59 | 1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285 60 | 2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520 61 | 2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680 62 | 2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450 63 | 2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630 64 | 2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420 65 | 2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355 66 | 2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678 67 | 2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502 68 | 2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510 69 | 2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750 70 | 2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718 71 | 2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870 72 | 2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410 73 | 2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472 74 | 2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985 75 | 2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886 76 | 2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428 77 | 2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392 78 | 2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500 79 | 2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750 80 | 2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463 81 | 2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278 82 | 2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714 83 | 2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630 84 | 2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515 85 | 2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520 86 | 2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450 87 | 2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495 88 | 2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562 89 | 2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680 90 | 2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625 91 | 2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480 92 | 2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450 93 | 2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495 94 | 2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290 95 | 2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345 96 | 2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937 97 | 2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625 98 | 2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428 99 | 2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660 100 | 2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406 101 | 2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710 102 | 2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562 103 | 2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438 104 | 2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415 105 | 2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672 106 | 2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315 107 | 2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510 108 | 2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488 109 | 2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312 110 | 2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680 111 | 2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562 112 | 2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325 113 | 2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607 114 | 2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434 115 | 2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385 116 | 2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407 117 | 2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495 118 | 2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345 119 | 2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372 120 | 2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564 121 | 2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625 122 | 2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465 123 | 2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365 124 | 2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380 125 | 2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380 126 | 2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378 127 | 2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352 128 | 2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466 129 | 2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342 130 | 2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580 131 | 3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630 132 | 3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530 133 | 3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560 134 | 3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600 135 | 3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650 136 | 3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695 137 | 3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720 138 | 3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515 139 | 3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580 140 | 3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590 141 | 3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600 142 | 3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780 143 | 3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520 144 | 3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550 145 | 3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855 146 | 3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830 147 | 3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415 148 | 3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625 149 | 3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650 150 | 3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550 151 | 3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500 152 | 3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480 153 | 3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425 154 | 3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675 155 | 3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640 156 | 3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725 157 | 3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480 158 | 3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880 159 | 3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660 160 | 3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620 161 | 3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520 162 | 3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680 163 | 3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570 164 | 3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675 165 | 3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615 166 | 3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520 167 | 3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695 168 | 3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685 169 | 3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750 170 | 3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630 171 | 3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510 172 | 3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470 173 | 3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660 174 | 3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740 175 | 3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750 176 | 3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835 177 | 3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840 178 | 3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560 179 | -------------------------------------------------------------------------------- /evolving_tree.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/predict-idlab/GENESIM/44925de91ae408fea30ea8ece3688f9e42d4f040/evolving_tree.gif -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is an example script that will apply k-cross-validation on all datasets with a load function in 3 | `data.load_datasets` and for all implemented tree constructors, ensemble techniques and GENESIM. In the end, 4 | a confusion matrices will be stored at path `output/dataset_name_CVk.png` and the average model complexity and 5 | computational time required for each of the algorithms will be printed out. 6 | 7 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent. 8 | """ 9 | 10 | 11 | import time 12 | 13 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis 14 | from sklearn.grid_search import GridSearchCV 15 | from sklearn.metrics import confusion_matrix 16 | from sklearn.cross_validation import StratifiedKFold, KFold 17 | 18 | import matplotlib.pyplot as plt 19 | import numpy as np 20 | from sklearn.neural_network import MLPClassifier 21 | 22 | import constructors.ISM 23 | from constructors.ensemble import RFClassification, XGBClassification, bootstrap 24 | from constructors.genesim import GENESIM 25 | from constructors.inTrees import inTreesClassifier 26 | from constructors.treeconstructor import QUESTConstructor, GUIDEConstructor, C45Constructor, CARTConstructor 27 | from data.load_all_datasets import load_all_datasets 28 | from decisiontree import DecisionTree 29 | 30 | if __name__ == "__main__": 31 | 32 | algorithms = {QUESTConstructor().get_name(): QUESTConstructor(), 33 | GUIDEConstructor().get_name(): GUIDEConstructor(), 34 | CARTConstructor().get_name(): CARTConstructor(), 35 | C45Constructor().get_name(): C45Constructor(), 36 | RFClassification().get_name(): RFClassification(), 37 | XGBClassification().get_name(): XGBClassification() 38 | } 39 | genesim = GENESIM() 40 | inTrees_clf = inTreesClassifier() 41 | 42 | NR_FOLDS = 5 43 | for dataset in load_all_datasets(): 44 | df = dataset['dataframe'] 45 | label_col = dataset['label_col'] 46 | feature_cols = dataset['feature_cols'] 47 | 48 | conf_matrices, avg_nodes, times = {}, {}, {} 49 | 50 | for algorithm in algorithms: 51 | conf_matrices[algorithm] = [] 52 | avg_nodes[algorithm] = [] 53 | times[algorithm] = [] 54 | conf_matrices['GENESIM'], avg_nodes['GENESIM'], times['GENESIM'] = [], [], [] 55 | conf_matrices['ISM'], avg_nodes['ISM'], times['ISM'] = [], [], [] 56 | conf_matrices['inTrees'], avg_nodes['inTrees'], times['inTrees'] = [], [], [] 57 | 58 | skf = StratifiedKFold(df[label_col], n_folds=NR_FOLDS, shuffle=True, random_state=None) 59 | 60 | for fold, (train_idx, test_idx) in enumerate(skf): 61 | print 'Fold', fold+1, '/', NR_FOLDS, 'for dataset', dataset['name'] 62 | train = df.iloc[train_idx, :].reset_index(drop=True) 63 | X_train = train.drop(label_col, axis=1) 64 | y_train = train[label_col] 65 | test = df.iloc[test_idx, :].reset_index(drop=True) 66 | X_test = test.drop(label_col, axis=1) 67 | y_test = test[label_col] 68 | 69 | for algorithm in algorithms: 70 | print algorithm 71 | start = time.time() 72 | clf = algorithms[algorithm].construct_classifier(train, feature_cols, label_col) 73 | end = time.time() 74 | times[algorithm].append(end-start) 75 | predictions = clf.evaluate_multiple(X_test).astype(int) 76 | conf_matrix = confusion_matrix(y_test, predictions) 77 | print conf_matrix 78 | diagonal_sum = sum( 79 | [conf_matrix[i][i] for i in range(len(conf_matrix))]) 80 | norm_diagonal_sum = sum( 81 | [float(conf_matrix[i][i]) / float(sum(conf_matrix[i])) for i in 82 | range(len(conf_matrix))]) 83 | total_count = np.sum(conf_matrix) 84 | print 'Accuracy:', float(diagonal_sum) / float(total_count) 85 | print 'Balanced accuracy:', float(norm_diagonal_sum) / float(conf_matrix.shape[0]) 86 | conf_matrices[algorithm].append(confusion_matrix(y_test, predictions)) 87 | if type(clf) is DecisionTree: 88 | avg_nodes[algorithm].append(clf.count_nodes()) 89 | else: 90 | avg_nodes[algorithm].append(clf.nr_clf) 91 | 92 | _constructors = [CARTConstructor(), QUESTConstructor(), GUIDEConstructor()] 93 | 94 | print 'inTrees' 95 | start = time.time() 96 | orl = inTrees_clf.construct_rule_list(train, label_col, _constructors, nr_bootstraps=25) 97 | end = time.time() 98 | times['inTrees'].append(end-start) 99 | predictions = orl.evaluate_multiple(X_test).astype(int) 100 | conf_matrices['inTrees'].append(confusion_matrix(y_test, predictions)) 101 | conf_matrix = confusion_matrix(y_test, predictions) 102 | print conf_matrix 103 | diagonal_sum = sum( 104 | [conf_matrix[i][i] for i in range(len(conf_matrix))]) 105 | norm_diagonal_sum = sum( 106 | [float(conf_matrix[i][i]) / float(sum(conf_matrix[i])) for i in 107 | range(len(conf_matrix))]) 108 | total_count = np.sum(conf_matrix) 109 | correct = 0 110 | for i in range(len(conf_matrix)): 111 | correct += conf_matrix[i][i] + conf_matrix[i][max(i - 1, 0)] * ((i - 1) >= 0) + \ 112 | conf_matrix[i][min(i + 1, len(conf_matrix[i]) - 1)] * ((i + 1) <= len(conf_matrix[i]) - 1) 113 | # print 'Accuracy [-1, +1]:', float(correct) / float(total_count) 114 | print 'Accuracy:', float(diagonal_sum) / float(total_count) 115 | print 'Balanced accuracy:', float(norm_diagonal_sum) / float(conf_matrix.shape[0]) 116 | avg_nodes['inTrees'].append(len(orl.rule_list)) 117 | 118 | print 'ISM' 119 | start = time.time() 120 | ism_tree = constructors.ISM.ism(bootstrap(train, label_col, _constructors, boosting=True, nr_classifiers=5), 121 | train, label_col, min_nr_samples=1, calc_fracs_from_ensemble=False) 122 | ism_pruned = ism_tree.cost_complexity_pruning(X_train, y_train, 'ism', ism_constructors=_constructors, 123 | ism_calc_fracs=False, n_folds=3, ism_nr_classifiers=5, 124 | ism_boosting=True) 125 | end = time.time() 126 | times['ISM'].append(end - start) 127 | predictions = ism_pruned.evaluate_multiple(X_test).astype(int) 128 | conf_matrices['ISM'].append(confusion_matrix(y_test, predictions)) 129 | avg_nodes['ISM'].append(ism_pruned.count_nodes()) 130 | conf_matrix = confusion_matrix(y_test, predictions) 131 | print conf_matrix 132 | diagonal_sum = sum( 133 | [conf_matrix[i][i] for i in range(len(conf_matrix))]) 134 | norm_diagonal_sum = sum( 135 | [float(conf_matrix[i][i]) / float(sum(conf_matrix[i])) for i in 136 | range(len(conf_matrix))]) 137 | total_count = np.sum(conf_matrix) 138 | correct = 0 139 | for i in range(len(conf_matrix)): 140 | correct += conf_matrix[i][i] + conf_matrix[i][max(i - 1, 0)] * ((i - 1) >= 0) + \ 141 | conf_matrix[i][min(i + 1, len(conf_matrix[i]) - 1)] * ((i + 1) <= len(conf_matrix[i]) - 1) 142 | # print 'Accuracy [-1, +1]:', float(correct) / float(total_count) 143 | print 'Accuracy:', float(diagonal_sum) / float(total_count) 144 | print 'Balanced accuracy:', float(norm_diagonal_sum) / float(conf_matrix.shape[0]) 145 | avg_nodes['ISM'].append(ism_pruned.count_nodes()) 146 | 147 | print 'GENESIM' 148 | # train_gen = train.rename(columns={'Class': 'cat'}) 149 | start = time.time() 150 | genetic = genesim.genetic_algorithm(train, label_col, _constructors, seed=None, num_iterations=40, 151 | num_crossovers=15, population_size=250, val_fraction=0.4, prune=True, 152 | max_samples=3, tournament_size=15, nr_bootstraps=40) 153 | end = time.time() 154 | times['GENESIM'].append(end - start) 155 | predictions = genetic.evaluate_multiple(X_test).astype(int) 156 | conf_matrices['GENESIM'].append(confusion_matrix(y_test, predictions)) 157 | conf_matrix = confusion_matrix(y_test, predictions) 158 | print conf_matrix 159 | diagonal_sum = sum( 160 | [conf_matrix[i][i] for i in range(len(conf_matrix))]) 161 | norm_diagonal_sum = sum( 162 | [float(conf_matrix[i][i]) / float(sum(conf_matrix[i])) for i in 163 | range(len(conf_matrix))]) 164 | total_count = np.sum(conf_matrix) 165 | correct = 0 166 | for i in range(len(conf_matrix)): 167 | correct += conf_matrix[i][i] + conf_matrix[i][max(i - 1, 0)] * ((i - 1) >= 0) + \ 168 | conf_matrix[i][min(i + 1, len(conf_matrix[i]) - 1)] * ((i + 1) <= len(conf_matrix[i]) - 1) 169 | # print 'Accuracy [-1, +1]:', float(correct) / float(total_count) 170 | print 'Accuracy:', float(diagonal_sum) / float(total_count) 171 | print 'Balanced accuracy:', float(norm_diagonal_sum) / float(conf_matrix.shape[0]) 172 | avg_nodes['GENESIM'].append(genetic.count_nodes()) 173 | 174 | print times 175 | print avg_nodes 176 | 177 | fig = plt.figure() 178 | fig.suptitle('Accuracy on ' + dataset['name'] + ' dataset using ' + str(NR_FOLDS) + ' folds', fontsize=20) 179 | counter = 0 180 | conf_matrices_mean = {} 181 | for key in conf_matrices: 182 | conf_matrices_mean[key] = np.zeros(conf_matrices[key][0].shape) 183 | for i in range(len(conf_matrices[key])): 184 | conf_matrices_mean[key] = np.add(conf_matrices_mean[key], conf_matrices[key][i]) 185 | cm_normalized = np.around( 186 | conf_matrices_mean[key].astype('float') / conf_matrices_mean[key].sum(axis=1)[:, 187 | np.newaxis], 4) 188 | 189 | diagonal_sum = sum( 190 | [conf_matrices_mean[key][i][i] for i in range(len(conf_matrices_mean[key]))]) 191 | norm_diagonal_sum = sum( 192 | [conf_matrices_mean[key][i][i]/sum(conf_matrices_mean[key][i]) for i in range(len(conf_matrices_mean[key]))]) 193 | total_count = np.sum(conf_matrices_mean[key]) 194 | print key 195 | print conf_matrices_mean[key] 196 | correct = 0 197 | for i in range(len(conf_matrices_mean[key])): 198 | correct += conf_matrices_mean[key][i][i] + conf_matrices_mean[key][i][max(i - 1, 0)] * ((i - 1) >= 0) + \ 199 | conf_matrices_mean[key][i][min(i + 1, len(conf_matrices_mean[key][i]) - 1)] * ((i + 1) <= len(conf_matrices_mean[key][i]) - 1) 200 | print 'Accuracy [-1, +1]:', float(correct) / float(total_count) 201 | print 'Accuracy:', float(diagonal_sum) / float(total_count) 202 | print 'Balanced accuracy:', float(norm_diagonal_sum) / conf_matrices_mean[key].shape[0] 203 | 204 | ax = fig.add_subplot(2, np.math.ceil(len(conf_matrices) / 2.0), counter + 1) 205 | cax = ax.matshow(cm_normalized, cmap=plt.cm.Blues, vmin=0.0, vmax=1.0) 206 | ax.set_title(key + '(' + str(sum(avg_nodes[key])/len(avg_nodes[key])) + ')', y=1.08) 207 | for (j, i), label in np.ndenumerate(cm_normalized): 208 | ax.text(i, j, label, ha='center', va='center') 209 | if counter == len(conf_matrices) - 1: 210 | fig.colorbar(cax, fraction=0.046, pad=0.04) 211 | counter += 1 212 | F = plt.gcf() 213 | Size = F.get_size_inches() 214 | F.set_size_inches(Size[0] * 2, Size[1] * 1.75, forward=True) 215 | plt.show() 216 | rand_nr = str(int(10000*np.random.rand())) 217 | plt.savefig('output/' + dataset['name'] + '_CV'+str(NR_FOLDS)+'.png', bbox_inches='tight') 218 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Install some python packages 4 | pip install pandas 5 | pip install numpy 6 | pip install sklearn 7 | pip install matplotlib 8 | pip install -U imbalanced-learn 9 | pip install orange 10 | pip install graphviz 11 | pip install xgboost 12 | pip install rpy2 13 | pip install pylatex 14 | 15 | # For bayesian optimization: download source and install it 16 | git clone https://github.com/fmfn/BayesianOptimization.git 17 | cd BayesianOptimization-master 18 | sudo python setup.py install 19 | cd .. 20 | 21 | # Special care needed for C45Learner from Orange 22 | wget http://www.rulequest.com/Personal/c4.5r8.tar.gz 23 | tar -xvzf rc4.5r8.tar.gz 24 | cd R8/Src 25 | wget https://github.com/biolab/orange/blob/master/Orange/orng/buildC45.py 26 | wget https://github.com/biolab/orange/blob/master/Orange/orng/ensemble.c 27 | sudo python buildC45.py 28 | cd .. 29 | cd .. 30 | 31 | # Install some R packages 32 | wget https://cran.r-project.org/src/contrib/randomForest_4.6-12.tar.gz 33 | tar -xvzf randomForest_4.6-12.tar.gz 34 | sudo R -e 'install.packages("'$(pwd)'/randomForest", repos=NULL, type="source")' 35 | wget https://cran.r-project.org/src/contrib/inTrees_1.1.tar.gz 36 | tar -xvzf inTrees_1.1.tar.gz 37 | sudo R -e 'install.packages("'$(pwd)'/inTrees", repos=NULL, type="source")' 38 | 39 | 40 | # sudo cp matplotlibrc /users/givdwiel/.local/lib/python2.7/site-packages/matplotlib/mpl-data/matplotlibrc 41 | 42 | 43 | 44 | --------------------------------------------------------------------------------