├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── RFTest.py
├── __init__.py
├── constructors
├── ISM.py
├── __init__.py
├── ensemble.py
├── genesim.py
├── guide
├── inTrees.py
├── quest
└── treeconstructor.py
├── data
├── __init__.py
├── austra.data
├── breast-cancer-wisconsin.data
├── car.data
├── ecoli.data
├── glass.data
├── heart.dat
├── labor.arff
├── led7.data
├── load_all_datasets.py
├── load_datasets.py
├── lymph.data
├── magic04.data
├── migbase.csv
├── migbase_noise20.csv
├── migbase_noise33.csv
├── migbase_noise50.csv
├── nursery.data
├── pima.data
├── reduced_migbase.csv
├── shuttle.tst
├── shuttle_full.trn
├── vehicle.data
├── waveform.data
├── wine.data
└── yeast.data
├── decisiontree.py
├── doc
├── constructors
│ ├── ISM.m.html
│ ├── ensemble.m.html
│ ├── genesim.m.html
│ ├── inTrees.m.html
│ ├── index.html
│ └── treeconstructor.m.html
├── data
│ ├── index.html
│ ├── load_all_datasets.m.html
│ └── load_datasets.m.html
├── decisiontree.m.html
├── example.m.html
└── index.html
├── evolving_tree.gif
├── example.py
└── install.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | *.pyc
3 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Use an official Python runtime as a base image
2 | FROM python:2.7-slim
3 |
4 | # Installing some command line tools, required to build all dependencies
5 | RUN apt-get update
6 | RUN apt-get -y install libc-dev
7 | RUN apt-get -y install gcc
8 | RUN apt-get -y install g++
9 | RUN apt-get -y install git
10 | RUN apt-get -y install wget
11 | RUN apt-get -y install gfortran
12 | RUN apt-get -y install build-essential
13 | RUN apt-get -y install r-base
14 | RUN apt-get -y install libcurl4-openssl-dev
15 | RUN apt-get -y install tk
16 | RUN apt-get -y install libcurl4-gnutls-dev
17 | RUN apt-get -y install libssl-dev
18 |
19 | # Clone the repo, set working dir
20 | RUN git clone https://github.com/GillesVandewiele/GENESIM-1
21 | WORKDIR /GENESIM-1
22 |
23 | # Install the required python libraries
24 | RUN pip install pandas
25 | RUN pip install numpy
26 | RUN pip install sklearn
27 | RUN pip install matplotlib
28 | RUN pip install -U imbalanced-learn
29 | RUN pip install graphviz
30 | RUN pip install xgboost
31 | RUN pip install rpy2
32 | RUN pip install pylatex
33 | RUN pip install orange
34 | RUN pip install bayesian-optimization
35 |
36 | # Install R 3.3.2
37 | RUN wget https://cran.rstudio.com/src/base/R-3/R-3.3.2.tar.gz
38 | RUN tar -xvzf R-3.3.2.tar.gz
39 | RUN cd R-3.3.2 && ./configure --with-readline=no --with-x=no && make && make install
40 |
41 | # Special care needed for C45Learner from Orange
42 | RUN wget http://www.rulequest.com/Personal/c4.5r8.tar.gz
43 | RUN tar -xvzf c4.5r8.tar.gz
44 | RUN cd R8/Src && wget https://raw.githubusercontent.com/biolab/orange/master/Orange/orng/buildC45.py && wget https://raw.githubusercontent.com/biolab/orange/master/Orange/orng/ensemble.c && python buildC45.py
45 |
46 | # Install some R packages
47 | RUN wget https://cran.r-project.org/src/contrib/randomForest_4.6-12.tar.gz
48 | RUN tar -xvzf randomForest_4.6-12.tar.gz
49 | RUN R -e 'install.packages("'$(pwd)'/randomForest", repos=NULL, type="source")'
50 | RUN wget https://cran.r-project.org/src/contrib/inTrees_1.1.tar.gz
51 | RUN tar -xvzf inTrees_1.1.tar.gz
52 | RUN R -e 'install.packages("devtools", repos="http://cran.us.r-project.org")'
53 | RUN R -e 'library(devtools); install("'$(pwd)'/inTrees", dependencies=TRUE)'
54 |
55 | CMD ["python", "example.py"]
56 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2016 Ghent University and iMinds vzw with offices at Technologiepark 15, 9052 Ghent, Belgium.
2 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software for non-commercial educational and research use, including without limitation the rights to use, copy, modify, merge, publish, distribute and/or sublicense copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
3 | 1. The above copyright notice and this permission notice shall be included in all copies of the Software.
4 | 2. Permission is restricted to non-commercial educational and research use: the use of the Software is allowed for teaching purposes and academic research. Usage by non-academic parties is allowed in a strict research environment only. The use of the results of the research for commercial purposes or inclusion in commercial activities requires the permission of iMinds vzw.
5 | 3. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GENESIM: GENetic Extraction of a Single, Interpretable Model
2 |
3 |
4 |
5 |
6 |
7 | This repository contains an innovative algorithm that constructs an ensemble using well-known decision tree induction algorithms such as CART, C4.5, QUEST and GUIDE combined with bagging and boosting. Then, this ensemble is converted to a single, interpretable decision tree in a genetic fashion. For a certain number of iterations, random pairs of decision trees are merged together by first converting them to sets of k-dimensional hyperplanes and then calculating the intersection of these two sets (a classic problem from computational geometry). Moreover, in each iteration, an individual is mutated with a certain probabibility. After these iterations, the accuracy on a validation set is measured for each of the decision trees in the population and the one with the highest accuracy (and lowest number of nodes in case of a tie) is returned. Example.py has run code for all implemented algorithms and returns their average predictive performance, computational complexity and model complexity on a number of dataset
8 |
9 | ## Dependencies
10 |
11 | An install.sh script is provided that will install all required dependencies
12 |
13 | ## Documentation
14 |
15 | A nicely looking documentation page is available in the doc/ directory. Download the complete directory and open index.html
16 |
17 | ## Decision Tree Induction Algorithm Wrappers
18 |
19 | A wrapper is written around [Orange C4.5](http://docs.orange.biolab.si/2/reference/rst/Orange.classification.tree.html#Orange.classification.tree.C45Learner), [sklearn CART](http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html), [GUIDE](https://www.stat.wisc.edu/~loh/guide.html) and [QUEST](https://www.stat.wisc.edu/~loh/quest.html). The returned object is a Decision Tree, which can be found in `decisiontree.py`. Moreover, different methods are available on this decision tree: classify new, unknown samples; visualise the tree; export it to string, JSON and DOT; etc.
20 |
21 | ## Ensemble Technique Wrappers
22 |
23 | A wrapper is written around the well-known state-of-the-art ensemble techniques [XGBoost](http://xgboost.readthedocs.io/en/latest/python/python_intro.html) and [Random Forests](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html)
24 |
25 | ## Similar techniques
26 |
27 | A wrapper written around the R package [inTrees](https://arxiv.org/abs/1408.5456) and an implementation of [ISM](https://lirias.kuleuven.be/handle/123456789/146229) can be found in the constructors package.
28 |
29 | ## New dataset
30 |
31 | A new dataset can easily be plugged in into the benchmark. For this, a `load_dataset()` function must be written in `load_datasets.py`
32 |
33 | ## Contact
34 |
35 | You can contact me at givdwiel.vandewiele at ugent.be for any questions, proposals or if you wish to contribute.
36 |
37 | ## Referring
38 |
39 | Please refer to my work when you use it. A reference to this github or to the following (yet unpublished) paper:
40 |
41 | `
42 | @article{vandewiele2016genesim,
43 | title={GENESIM: genetic extraction of a single, interpretable model},
44 | author={Vandewiele, Gilles and Janssens, Olivier and Ongenae, Femke and De Turck, Filip and Van Hoecke, Sofie},
45 | journal={arXiv preprint arXiv:1611.05722},
46 | year={2016}
47 | }
48 | `
49 |
50 |
--------------------------------------------------------------------------------
/RFTest.py:
--------------------------------------------------------------------------------
1 | from sklearn.cross_validation import StratifiedKFold
2 | from sklearn.metrics import confusion_matrix
3 |
4 | from constructors.ensemble import RFClassification
5 | from data.load_all_datasets import load_all_datasets
6 |
7 | import numpy as np
8 |
9 | from decisiontree import DecisionTree
10 |
11 | from refined_rf import RefinedRandomForest
12 |
13 | rf = RFClassification()
14 |
15 | NR_FOLDS = 5
16 |
17 |
18 | def _convert_to_tree(dt, features):
19 | """Convert a sklearn object to a `decisiontree.decisiontree` object"""
20 | n_nodes = dt.tree_.node_count
21 | children_left = dt.tree_.children_left
22 | children_right = dt.tree_.children_right
23 | feature = dt.tree_.feature
24 | threshold = dt.tree_.threshold
25 | classes = dt.classes_
26 |
27 | # The tree structure can be traversed to compute various properties such
28 | # as the depth of each node and whether or not it is a leaf.
29 | node_depth = np.zeros(shape=n_nodes)
30 | decision_trees = [None] * n_nodes
31 | for i in range(n_nodes):
32 | decision_trees[i] = DecisionTree()
33 | is_leaves = np.zeros(shape=n_nodes, dtype=bool)
34 | stack = [(0, -1)] # seed is the root node id and its parent depth
35 | while len(stack) > 0:
36 | node_id, parent_depth = stack.pop()
37 | node_depth[node_id] = parent_depth + 1
38 |
39 | # If we have a test node
40 | if children_left[node_id] != children_right[node_id]:
41 | stack.append((children_left[node_id], parent_depth + 1))
42 | stack.append((children_right[node_id], parent_depth + 1))
43 | else:
44 | is_leaves[node_id] = True
45 |
46 | for i in range(n_nodes):
47 |
48 | if children_left[i] > 0:
49 | decision_trees[i].left = decision_trees[children_left[i]]
50 |
51 | if children_right[i] > 0:
52 | decision_trees[i].right = decision_trees[children_right[i]]
53 |
54 | if is_leaves[i]:
55 | decision_trees[i].label = dt.classes_[np.argmax(dt.tree_.value[i][0])]
56 | decision_trees[i].value = None
57 | else:
58 | decision_trees[i].label = features[feature[i]]
59 | decision_trees[i].value = threshold[i]
60 |
61 | return decision_trees[0]
62 |
63 |
64 | for dataset in load_all_datasets():
65 | df = dataset['dataframe']
66 | label_col = dataset['label_col']
67 | feature_cols = dataset['feature_cols']
68 |
69 | skf = StratifiedKFold(df[label_col], n_folds=NR_FOLDS, shuffle=True, random_state=1337)
70 |
71 | for fold, (train_idx, test_idx) in enumerate(skf):
72 | print 'Fold', fold+1, '/', NR_FOLDS, 'for dataset', dataset['name']
73 | train = df.iloc[train_idx, :].reset_index(drop=True)
74 | X_train = train.drop(label_col, axis=1)
75 | y_train = train[label_col]
76 | test = df.iloc[test_idx, :].reset_index(drop=True)
77 | X_test = test.drop(label_col, axis=1)
78 | y_test = test[label_col]
79 |
80 | rf.construct_classifier(train, feature_cols, label_col)
81 |
82 | for estimator in rf.clf.estimators_:
83 | print estimator.tree_
84 | print _convert_to_tree(estimator, feature_cols)
85 |
86 | predictions = rf.evaluate_multiple(X_test).astype(int)
87 | conf_matrix = confusion_matrix(y_test, predictions)
88 | print conf_matrix
89 | diagonal_sum = sum(
90 | [conf_matrix[i][i] for i in range(len(conf_matrix))])
91 | norm_diagonal_sum = sum(
92 | [float(conf_matrix[i][i]) / float(sum(conf_matrix[i])) for i in
93 | range(len(conf_matrix))])
94 | total_count = np.sum(conf_matrix)
95 | print 'Accuracy:', float(diagonal_sum) / float(total_count)
96 | print 'Balanced accuracy:', float(norm_diagonal_sum) / float(conf_matrix.shape[0])
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/predict-idlab/GENESIM/44925de91ae408fea30ea8ece3688f9e42d4f040/__init__.py
--------------------------------------------------------------------------------
/constructors/ISM.py:
--------------------------------------------------------------------------------
1 | """
2 | Interpretable Single Model
3 | --------------------------
4 |
5 | Merges different decision trees in an ensemble together in a single, interpretable decision tree
6 |
7 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent.
8 |
9 | Reference:
10 | Van Assche, Anneleen, and Hendrik Blockeel.
11 | "Seeing the forest through the trees: Learning a comprehensible model from an ensemble."
12 | European Conference on Machine Learning. Springer Berlin Heidelberg, 2007.
13 | """
14 |
15 | from collections import Counter
16 | import math
17 |
18 | import numpy as np
19 |
20 | import decisiontree
21 |
22 |
23 | def _extract_tests(tree, _tests=set()):
24 | """
25 | Given a decision tree, extract all tests from the nodes
26 |
27 | :param tree: the decision tree to extract tests from (decisiontree.py)
28 | :param _tests: recursive parameter, don't touch
29 | :return: a set of possible tests (feature_label <= threshold_value); each entry is a tuple (label, value)
30 | """
31 | if tree.value is not None:
32 | _tests.add((tree.label, tree.value))
33 | _extract_tests(tree.left, _tests)
34 | _extract_tests(tree.right, _tests)
35 | return _tests
36 |
37 |
38 | def _calculate_entropy(probabilities):
39 | """
40 | Calculate the entropy of given probabilities
41 |
42 | :param probabilities: a list of floats between [0, 1] (sum(probabilities) must be 1)
43 | :return: the entropy
44 | """
45 | return sum([-prob * np.log(prob)/np.log(2) if prob != 0 else 0 for prob in probabilities])
46 |
47 |
48 | def _get_most_occurring_class(data, class_label):
49 | """
50 | Get the most occurring class in a dataframe of data
51 |
52 | :param data: a pandas dataframe
53 | :param class_label: the column of the class labels
54 | :return: the most occurring class
55 | """
56 | return Counter(data[class_label].values.tolist()).most_common(1)[0][0]
57 |
58 |
59 | def _calculate_prob(tree, label, value, prior_tests, negate=False):
60 | """
61 | Estimate the probabilities from a decision tree by propagating down from the root to the leaves
62 |
63 | :param tree: the decision tree to estimate the probabilities from
64 | :param label: the label of the test being evaluated
65 | :param value: the value of the test being evaluated
66 | :param prior_tests: tests that are already in the conjunctions
67 | :param negate: is it a negative or positive test
68 | :return: a vector of probabilities for each class
69 | """
70 | if tree.value is None: # If the value is None, we're at a leaf, return a vector of probabilities
71 | return np.divide(list(map(float, list(tree.class_probabilities.values()))), float(sum(list(tree.class_probabilities.values()))))
72 | else:
73 | if (tree.label, tree.value) in prior_tests:
74 | # The test in the current node is already in the conjunction, take the correct path
75 | if prior_tests[(tree.label, tree.value)]:
76 | return _calculate_prob(tree.left, label, value, prior_tests, negate)
77 | else:
78 | return _calculate_prob(tree.right, label, value, prior_tests, negate)
79 | elif not (tree.label == label and tree.value == value):
80 | # The test of current node is not yet in conjunction and is not the test we're looking for
81 | # Keep propagating (but add weights (estimate how many times the test succeeds/fails))!
82 | samples_sum = sum(list(tree.class_probabilities.values()))
83 | if samples_sum == 0:
84 | left_fraction = 1.0
85 | right_fraction = 1.0
86 | else:
87 | left_fraction = sum(list(tree.left.class_probabilities.values())) / samples_sum
88 | right_fraction = sum(list(tree.right.class_probabilities.values())) / samples_sum
89 |
90 | return np.add(left_fraction * _calculate_prob(tree.left, label, value, prior_tests, negate),
91 | right_fraction * _calculate_prob(tree.right, label, value, prior_tests, negate))
92 | elif not negate:
93 | # We found the test we are looking for
94 | # If negate is False, then it is a positive test and we take the left subtree
95 | return _calculate_prob(tree.left, label, value, prior_tests, negate)
96 | else:
97 | return _calculate_prob(tree.right, label, value, prior_tests, negate)
98 |
99 |
100 | def _calculate_prob_dict(tree, label, value, prior_tests, negate=False):
101 | """
102 | Wrapper around calculate_prob, so we know which probability belongs to which class
103 | """
104 | return dict(zip(tree.class_probabilities.keys(), _calculate_prob(tree, label, value, prior_tests, negate)))
105 |
106 |
107 | def ism(decision_trees, data, class_label, min_nr_samples=1, calc_fracs_from_ensemble=False):
108 | """
109 | Return a single decision tree from an ensemble of decision tree, using the normalized information gain as
110 | split criterion, estimated from the ensemble. This is a wrapper function around `constructors.ISM.build_dt_from_ensemble`,
111 | which first calculate the required parameters for this method.
112 |
113 | **Params**
114 | ----------
115 | - `decision_trees` (list of `decisiontree.DecisionTree` objects): the ensemble of decision trees to be merged
116 |
117 | - `data` (pandas DataFrame): the data frame with training data
118 |
119 | - `class_label` (string): the column identifier for the column with class labels in the data
120 |
121 | - `min_nr_samples` (int): pre-prune condition, stop searching if number of samples is smaller or equal than threshold
122 |
123 | - `calc_fracs_from_ensemble` (boolean): if `True`, the different probabilities are calculated using the ensemble. Else, the data is used
124 |
125 | **Returns**
126 | -----------
127 | a single decision tree based on the ensemble of decision trees
128 | """
129 | X = data.drop(class_label, axis=1).reset_index(drop=True)
130 | y = data[class_label].reset_index(drop=True)
131 |
132 | non_empty_decision_trees = []
133 | for tree in decision_trees:
134 | if tree.count_nodes() > 1: non_empty_decision_trees.append(tree)
135 | decision_trees = non_empty_decision_trees
136 |
137 | prior_entropy = 0
138 | tests = set()
139 | tests.clear()
140 | for dt in decision_trees:
141 | tests = tests | _extract_tests(dt, set())
142 | prior_entropy += _calculate_entropy(np.divide(list(dt.class_probabilities.values()),
143 | sum(dt.class_probabilities.values())))
144 | prior_entropy /= len(decision_trees)
145 |
146 | combined_dt = build_dt_from_ensemble(decision_trees, data, class_label, tests, prior_entropy, {}, min_nr_samples,
147 | calc_fracs_from_ensemble)
148 | combined_dt.populate_samples(X, y)
149 |
150 | return combined_dt
151 |
152 |
153 | def _add_reduce_by_key(A, B):
154 | """
155 | Reduces two dicts by key using add operator
156 |
157 | :param A: dict one
158 | :param B: dict two
159 | :return: a new dict, containing a of the values if the two dicts have the same key, else just the value
160 | """
161 | return {x: A.get(x, 0) + B.get(x, 0) for x in set(A).union(B)}
162 |
163 |
164 | def build_dt_from_ensemble(decision_trees, data, class_label, tests, prior_entropy, prior_tests={}, min_nr_samples=1,
165 | calc_fracs_from_ensemble=False):
166 | """
167 | Given an ensemble of decision trees, build a single decision tree using estimates from the ensemble
168 |
169 | **Params**
170 | ----------
171 | - `decision_trees` (list of `decisiontree.DecisionTree` objects): the ensemble of decision trees to be merged
172 |
173 | - `data` (pandas DataFrame): the data frame with training data
174 |
175 | - `class_label` (string): the column identifier for the column with class labels in the data
176 |
177 | - `tests` (set of tuples): all possible tests (extracted from the ensemble)
178 |
179 | - `prior_entropy` (float): recursive parameter to calculate information gain
180 |
181 | - `prior_tests` (set of tuples): the tests that are already picked for our final decision tree
182 |
183 | - `min_nr_samples` (int): pre-prune condition, stop searching if number of samples is smaller or equal than threshold
184 |
185 | - `calc_fracs_from_ensemble` (boolean): if `True`, the different probabilities are calculated using the ensemble. Else, the data is used
186 |
187 | **Returns**
188 | -----------
189 | a single decision tree, calculated using information from the ensemble
190 | """
191 | # Pre-pruning conditions:
192 | # - if the length of data is <= min_nr_samples
193 | # - when we have no tests left
194 | # - when there is only 1 unique class in the data left
195 | # print len(data), len(tests), np.unique(data[class_label].values)
196 | if len(data) > min_nr_samples and len(tests) > 0 and len(np.unique(data[class_label].values)) > 1:
197 | max_ig = 0
198 | best_pos_data, best_neg_data, best_pos_entropy, best_neg_entropy = [None]*4
199 | best_dt = decisiontree.DecisionTree()
200 | # Find the test that results in the maximum information gain
201 | for test in tests:
202 | pos_avg_probs, neg_avg_probs, pos_fraction, neg_fraction = {}, {}, 0.0, 0.0
203 | for dt in decision_trees:
204 | pos_prob_dict = _calculate_prob_dict(dt, test[0], test[1], prior_tests, False)
205 | neg_prob_dict = _calculate_prob_dict(dt, test[0], test[1], prior_tests, True)
206 |
207 | if not any(math.isnan(x) for x in pos_prob_dict.values()) and not any(math.isnan(x) for x in neg_prob_dict.values()):
208 | pos_avg_probs = _add_reduce_by_key(pos_avg_probs, _calculate_prob_dict(dt, test[0], test[1], prior_tests, False))
209 | neg_avg_probs = _add_reduce_by_key(neg_avg_probs, _calculate_prob_dict(dt, test[0], test[1], prior_tests, True))
210 |
211 | if calc_fracs_from_ensemble and len(data) > 0:
212 | pos_fraction += float(len(dt.data[dt.data[test[0]] <= test[1]]))/len(dt.data)
213 | neg_fraction += float(len(dt.data[dt.data[test[0]] > test[1]]))/len(dt.data)
214 |
215 | for key in pos_avg_probs:
216 | pos_avg_probs[key] /= len(decision_trees)
217 | for key in neg_avg_probs:
218 | neg_avg_probs[key] /= len(decision_trees)
219 |
220 | if calc_fracs_from_ensemble:
221 | pos_fraction /= float(len(decision_trees))
222 | neg_fraction /= float(len(decision_trees))
223 |
224 | pos_entropy = _calculate_entropy(np.divide(list(pos_avg_probs.values()), len(decision_trees)))
225 | neg_entropy = _calculate_entropy(np.divide(list(neg_avg_probs.values()), len(decision_trees)))
226 |
227 | pos_data = data[data[test[0]] <= test[1]].copy()
228 | neg_data = data[data[test[0]] > test[1]].copy()
229 |
230 | if not calc_fracs_from_ensemble:
231 | pos_fraction = float(len(pos_data)) / float(len(data))
232 | neg_fraction = float(len(neg_data)) / float(len(data))
233 |
234 | weighted_entropy = pos_fraction * pos_entropy + neg_fraction * neg_entropy
235 | information_gain = prior_entropy - weighted_entropy
236 |
237 | if information_gain > max_ig and len(pos_data) > 0 and len(neg_data) > 0:
238 | max_ig, best_dt.label, best_dt.value = information_gain, test[0], test[1]
239 | best_pos_data, best_neg_data, best_pos_entropy, best_neg_entropy = pos_data, neg_data, pos_entropy, neg_entropy
240 |
241 | # print max_ig
242 | if max_ig == 0: # If we can't find a test that results in an information gain, we can pre-prune
243 | return decisiontree.DecisionTree(value=None, label=_get_most_occurring_class(data, class_label))
244 |
245 | # Update some variables and do recursive calls
246 | left_prior_tests = prior_tests.copy()
247 | left_prior_tests.update({(best_dt.label, best_dt.value): True})
248 | new_tests = tests.copy()
249 | new_tests.remove((best_dt.label, best_dt.value))
250 | best_dt.left = build_dt_from_ensemble(decision_trees, best_pos_data, class_label, new_tests,
251 | best_pos_entropy, left_prior_tests, min_nr_samples)
252 |
253 | right_prior_tests = prior_tests.copy()
254 | right_prior_tests.update({(best_dt.label, best_dt.value): False})
255 | best_dt.right = build_dt_from_ensemble(decision_trees, best_neg_data, class_label, new_tests,
256 | best_neg_entropy, right_prior_tests, min_nr_samples)
257 |
258 | return best_dt
259 | else:
260 | return decisiontree.DecisionTree(value=None, label=_get_most_occurring_class(data, class_label))
--------------------------------------------------------------------------------
/constructors/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Contains implementations for different classifiers: decision tree induction algorithms, ensemble techniques and
3 | GENESIM: GENetic Extraction of a Single, Interpretable Model
4 |
5 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent.
6 | """
--------------------------------------------------------------------------------
/constructors/ensemble.py:
--------------------------------------------------------------------------------
1 | """
2 | Contains wrappers around well-known ensemble techniques: Random Forest and XGBoost.
3 |
4 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent.
5 | """
6 |
7 | import time
8 | from bayes_opt import BayesianOptimization
9 | from sklearn.cross_validation import cross_val_score
10 | from sklearn.ensemble import AdaBoostClassifier
11 | from xgboost import XGBClassifier
12 | from sklearn.ensemble import RandomForestClassifier
13 | import numpy as np
14 | import decisiontree
15 |
16 |
17 | class EnsembleConstructor(object):
18 | """This class is an interface for all tree induction algorithms."""
19 |
20 | def __init__(self):
21 | """In the init method, all hyper-parameters should be set."""
22 | self.clf = None
23 |
24 | def get_name(self):
25 | """Get the name of the induction algorithm implemented."""
26 | raise NotImplementedError("This method needs to be implemented")
27 |
28 | def construct_classifier(self, train, features, label_col):
29 | """Construct an ensemble classifier.
30 |
31 | **Params**
32 | ----------
33 | - `train` (pandas DataFrame) - a `Dataframe` containing all the training data
34 |
35 | - `features` (pandas Series or list) - the names of the feature columns
36 |
37 | - `label_col` (string) - the name of the class label column
38 |
39 | **Returns**
40 | -----------
41 | an ensemble classifier
42 | """
43 | raise NotImplementedError("This method needs to be implemented")
44 |
45 | def evaluate_multiple(self, feature_vectors):
46 | """Evaluate multiple samples
47 |
48 | **Params**
49 | ----------
50 | - `feature_vectors` (pandas DataFrame) - a `Dataframe` containing all the feature vectors
51 |
52 | **Returns**
53 | -----------
54 | a list of predicted class labels
55 |
56 | """
57 | return self.clf.predict(feature_vectors)
58 |
59 |
60 | class XGBClassification(EnsembleConstructor):
61 |
62 | def get_name(self):
63 | return 'XGBoost'
64 |
65 | def __init__(self):
66 | super(XGBClassification, self).__init__()
67 | self.nr_clf = 0
68 | self.time = 0
69 |
70 | def construct_classifier(self, train, features, label_col):
71 | data = train[features]
72 | target = train[label_col]
73 |
74 | def xgbcv(nr_classifiers, learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, gamma,
75 | reg_lambda):
76 | nr_classifiers = int(nr_classifiers)
77 | max_depth = int(max_depth)
78 | min_child_weight = int(min_child_weight)
79 | return cross_val_score(XGBClassifier(learning_rate=learning_rate, n_estimators=nr_classifiers,
80 | gamma=gamma, subsample=subsample, colsample_bytree=colsample_bytree,
81 | nthread=1, scale_pos_weight=1, reg_lambda=reg_lambda,
82 | min_child_weight=min_child_weight, max_depth=max_depth),
83 | data, target, 'accuracy', cv=5).mean()
84 |
85 | params = {
86 | 'nr_classifiers': (50, 1000),
87 | 'learning_rate': (0.01, 0.3),
88 | 'max_depth': (5, 10),
89 | 'min_child_weight': (2, 10),
90 | 'subsample': (0.7, 0.8),
91 | 'colsample_bytree': (0.5, 0.99),
92 | 'gamma': (0.01, 1.),
93 | 'reg_lambda': (0, 1)
94 | }
95 |
96 | xgbBO = BayesianOptimization(xgbcv, params, verbose=0)
97 | xgbBO.maximize(init_points=10, n_iter=20, n_restarts_optimizer=50)
98 | # xgbBO.maximize(init_points=1, n_iter=1, n_restarts_optimizer=100)
99 |
100 | best_params = xgbBO.res['max']['max_params']
101 |
102 | best_nr_classifiers = int(best_params['nr_classifiers'])
103 | self.nr_clf = best_nr_classifiers
104 | best_max_depth = int(best_params['max_depth'])
105 | best_min_child_weight = int(best_params['min_child_weight'])
106 | best_colsample_bytree = best_params['colsample_bytree']
107 | best_subsample = best_params['subsample']
108 | best_reg_lambda = best_params['reg_lambda']
109 | best_learning_rate = best_params['learning_rate']
110 | best_gamma = best_params['gamma']
111 |
112 | print(best_nr_classifiers)
113 |
114 | self.clf = XGBClassifier(learning_rate=best_learning_rate, n_estimators=best_nr_classifiers,
115 | gamma=best_gamma, subsample=best_subsample, colsample_bytree=best_colsample_bytree,
116 | nthread=1, scale_pos_weight=1, reg_lambda=best_reg_lambda,
117 | min_child_weight=best_min_child_weight, max_depth=best_max_depth)
118 | start = time.time()
119 | self.clf.fit(data, target)
120 | self.time = time.time() - start
121 |
122 | return self
123 |
124 | def evaluate_multiple(self, feature_vectors):
125 | return self.clf.predict(feature_vectors)
126 |
127 |
128 | class RFClassification(EnsembleConstructor):
129 |
130 | def get_name(self):
131 | return 'RF'
132 |
133 | def __init__(self):
134 | super(RFClassification, self).__init__()
135 | self.nr_clf = 0
136 | self.time = 0
137 |
138 | def construct_classifier(self, train, features, label_col):
139 | data = train[features]
140 | target = train[label_col]
141 |
142 | def rfcv(nr_classifiers, max_depth, min_samples_leaf, bootstrap, criterion, max_features):
143 | nr_classifiers = int(nr_classifiers)
144 | max_depth = int(max_depth)
145 | min_samples_leaf = int(min_samples_leaf)
146 | if np.round(bootstrap):
147 | bootstrap = True
148 | else:
149 | bootstrap = False
150 | if np.round(criterion):
151 | criterion = 'gini'
152 | else:
153 | criterion = 'entropy'
154 | if np.round(max_features):
155 | max_features = None
156 | else:
157 | max_features = 1.0
158 |
159 | return cross_val_score(RandomForestClassifier(n_estimators=nr_classifiers, max_depth=max_depth,
160 | min_samples_leaf=min_samples_leaf, bootstrap=bootstrap,
161 | criterion=criterion, max_features=max_features),
162 | data, target, 'accuracy', cv=5).mean()
163 |
164 | params = {
165 | 'nr_classifiers': (10, 1000),
166 | 'max_depth': (5, 10),
167 | 'min_samples_leaf': (2, 10),
168 | 'bootstrap': (0, 1),
169 | 'criterion': (0, 1),
170 | 'max_features': (0, 1)
171 | }
172 |
173 | rfBO = BayesianOptimization(rfcv, params, verbose=0)
174 | rfBO.maximize(init_points=10, n_iter=20, n_restarts_optimizer=50)
175 | # rfBO.maximize(init_points=1, n_iter=1, n_restarts_optimizer=50)
176 |
177 | best_params = rfBO.res['max']['max_params']
178 |
179 | best_nr_classifiers = int(best_params['nr_classifiers'])
180 | self.nr_clf = best_nr_classifiers
181 | best_max_depth = int(best_params['max_depth'])
182 | best_min_samples_leaf = int(best_params['min_samples_leaf'])
183 | best_bootstrap = best_params['bootstrap']
184 | best_criterion = best_params['criterion']
185 | best_max_features = best_params['max_features']
186 |
187 | if np.round(best_bootstrap):
188 | best_bootstrap = True
189 | else:
190 | best_bootstrap = False
191 | if np.round(best_criterion):
192 | best_criterion = 'gini'
193 | else:
194 | best_criterion = 'entropy'
195 | if np.round(best_max_features):
196 | best_max_features = None
197 | else:
198 | best_max_features = 1.0
199 |
200 | self.clf = RandomForestClassifier(n_estimators=best_nr_classifiers, max_depth=best_max_depth,
201 | min_samples_leaf=best_min_samples_leaf, bootstrap=best_bootstrap,
202 | criterion=best_criterion, max_features=best_max_features)
203 | start = time.time()
204 | self.clf.fit(data, target)
205 |
206 | self.time = time.time() - start
207 |
208 | return self
209 |
210 | def evaluate_multiple(self, feature_vectors):
211 | return self.clf.predict(feature_vectors)
212 |
213 |
214 | def bootstrap(data, class_label, tree_constructors, bootstrap_features=False, nr_classifiers=3, boosting=True):
215 | """
216 | Bootstrapping ensemble technique
217 |
218 | **Params**
219 | ----------
220 | - `data` (DataFrame): containing all the data to be bootstrapped
221 |
222 | - `class_label` (string): the column in the dataframe that contains the target variables
223 |
224 | - `tree_constructors` (list): the induction algorithms (`constructors.treeconstructor.TreeConstructor`) used
225 |
226 | - `bootstrap_features` (boolean): if `True`, then apply bootstrapping to the features as well
227 |
228 | - `nr_classifiers` (int): for each `tree_constructor`, how many times must we bootstrap
229 |
230 | - `boosting` (boolean): if `True`, then do create models with AdaBoost too
231 |
232 | **Returns**
233 | -----------
234 | a vector of fitted classifiers, converted to DecisionTree (`decisiontree.DecisionTree`)
235 | """
236 |
237 | def _convert_to_tree(classifier, features):
238 | n_nodes = classifier.tree_.node_count
239 | children_left = classifier.tree_.children_left
240 | children_right = classifier.tree_.children_right
241 | feature = classifier.tree_.feature
242 | threshold = classifier.tree_.threshold
243 | classes = classifier.classes_
244 |
245 | # The tree structure can be traversed to compute various properties such
246 | # as the depth of each node and whether or not it is a leaf.
247 | node_depth = np.zeros(shape=n_nodes)
248 | decision_trees = [None] * n_nodes
249 | for i in range(n_nodes):
250 | decision_trees[i] = decisiontree.DecisionTree()
251 | is_leaves = np.zeros(shape=n_nodes, dtype=bool)
252 | stack = [(0, -1)] # seed is the root node id and its parent depth
253 | while len(stack) > 0:
254 | node_id, parent_depth = stack.pop()
255 | node_depth[node_id] = parent_depth + 1
256 |
257 | # If we have a test node
258 | if children_left[node_id] != children_right[node_id]:
259 | stack.append((children_left[node_id], parent_depth + 1))
260 | stack.append((children_right[node_id], parent_depth + 1))
261 | else:
262 | is_leaves[node_id] = True
263 |
264 | for i in range(n_nodes):
265 | if children_left[i] > 0:
266 | decision_trees[i].left = decision_trees[children_left[i]]
267 |
268 | if children_right[i] > 0:
269 | decision_trees[i].right = decision_trees[children_right[i]]
270 |
271 | if is_leaves[i]:
272 | decision_trees[i].label = classes[np.argmax(classifier.tree_.value[i][0])]
273 | decision_trees[i].value = None
274 | else:
275 | decision_trees[i].label = features[feature[i]]
276 | decision_trees[i].value = threshold[i]
277 | return decision_trees[0]
278 |
279 | idx = np.random.randint(0, len(data), (nr_classifiers, len(data)))
280 | decision_trees = []
281 |
282 | if boosting:
283 | ada = AdaBoostClassifier(base_estimator=None, n_estimators=nr_classifiers, learning_rate=0.25, random_state=1337)
284 | X_train = data.drop(class_label, axis=1).reset_index(drop=True)
285 | y_train = data[class_label].reset_index(drop=True)
286 | ada.fit(X_train, y_train)
287 | for estimator in ada.estimators_:
288 | dt = _convert_to_tree(estimator, X_train.columns)
289 | dt.data = data
290 | dt.populate_samples(X_train, y_train)
291 | decision_trees.append(dt)
292 |
293 | for indices in idx:
294 | if bootstrap_features:
295 | features = list(set(np.random.randint(0, len(data.columns), (1, len(data.columns))).tolist()[0]))
296 | X_bootstrap = data.iloc[indices, features].reset_index(drop=True)
297 | if class_label in X_bootstrap.columns:
298 | X_bootstrap = X_bootstrap.drop(class_label, axis=1)
299 | y_bootstrap = data.iloc[indices][class_label].reset_index(drop=True)
300 | else:
301 | X_bootstrap = data.iloc[indices, :].drop(class_label, axis=1).reset_index(drop=True)
302 | y_bootstrap = data.iloc[indices][class_label].reset_index(drop=True)
303 |
304 | X = data.drop(class_label, axis=1).reset_index(drop=True)
305 | y = data[class_label].reset_index(drop=True)
306 | train_bootstrap = X_bootstrap.copy()
307 | train_bootstrap[y_bootstrap.name] = y_bootstrap
308 |
309 | for tree_constructor in tree_constructors:
310 | tree = tree_constructor.construct_classifier(train_bootstrap, X_bootstrap.columns, y_bootstrap.name)
311 | # print 'Number of nodes in stub:', tree_constructor.get_name(), count_nodes(tree)
312 | # print tree_constructor.get_name(), tree.count_nodes()
313 | tree.data = data.iloc[indices, :].reset_index(drop=True)
314 | tree.populate_samples(X, y)
315 | decision_trees.append(tree)
316 |
317 | return decision_trees
318 |
--------------------------------------------------------------------------------
/constructors/guide:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/predict-idlab/GENESIM/44925de91ae408fea30ea8ece3688f9e42d4f040/constructors/guide
--------------------------------------------------------------------------------
/constructors/inTrees.py:
--------------------------------------------------------------------------------
1 | """
2 | inTrees / STEL
3 | --------------
4 |
5 | Merges different decision trees in an ensemble together in an ordered rule list
6 |
7 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent.
8 |
9 | Reference:
10 | Houtao Deng
11 | "Interpreting Tree Ensembles with inTrees"
12 | """
13 |
14 | import sys
15 | import re
16 |
17 | import numpy as np
18 | import pandas as pd
19 | import rpy2
20 | from rpy2.robjects import pandas2ri
21 | pandas2ri.activate()
22 | import rpy2.robjects as ro
23 |
24 | from rpy2.robjects.packages import importr
25 |
26 | from constructors import ensemble
27 |
28 | sys.path.append('../')
29 |
30 | def convert_to_r_posixct(obj):
31 | """
32 | Convert DatetimeIndex or np.datetime array to R POSIXct using
33 | m8[s] format.
34 |
35 | Parameters
36 | ----------
37 | obj : source pandas object (one of [DatetimeIndex, np.datetime])
38 |
39 | Returns
40 | -------
41 | An R POSIXct vector (rpy2.robjects.vectors.POSIXct)
42 |
43 | """
44 | import time
45 | from rpy2.rinterface import StrSexpVector
46 |
47 | # convert m8[ns] to m8[s]
48 | vals = robj.vectors.FloatSexpVector(obj.values.view('i8') / 1E9)
49 | as_posixct = robj.baseenv.get('as.POSIXct')
50 | origin = StrSexpVector([time.strftime("%Y-%m-%d",
51 | time.gmtime(0)), ])
52 |
53 | # We will be sending ints as UTC
54 | tz = obj.tz.zone if hasattr(
55 | obj, 'tz') and hasattr(obj.tz, 'zone') else 'UTC'
56 | tz = StrSexpVector([tz])
57 | utc_tz = StrSexpVector(['UTC'])
58 |
59 | posixct = as_posixct(vals, origin=origin, tz=utc_tz)
60 | posixct.do_slot_assign('tzone', tz)
61 | return posixct
62 |
63 |
64 | class Condition:
65 | """
66 | Class which represents one part of the rule (which can be seen as a conjunction of conditions)
67 | """
68 | def __init__(self, feature, test, value):
69 | self.feature = feature
70 | '''The feature on which the test is performed'''
71 | self.test = test
72 | '''What kind of test is done. Must be either `==`, `>` or `<=`'''
73 | self.value = value
74 | '''The threshold value'''
75 |
76 | def evaluate(self, feature_vector):
77 | """Create a prediction for a sample (using its feature vector)
78 |
79 | **Params**
80 | ----------
81 | - `feature_vector` (pandas Series or dict) - the sample to evaluate, must be a `pandas Series` object or a
82 | `dict`. It is important that the attribute keys in the sample are the same as the labels occuring in the rules.
83 |
84 | **Returns**
85 | -----------
86 | `True` if feature_vector[] , where is equal to `==`, `>` or `<=`
87 | """
88 | if self.value is None:
89 | return True
90 | elif self.test == '==':
91 | return feature_vector[self.feature] == self.value
92 | elif self.test == '>':
93 | return feature_vector[self.feature] > self.value
94 | else:
95 | return feature_vector[self.feature] <= self.value
96 |
97 |
98 | class Rule:
99 | """
100 | Class which represents a rule, which is a conjunction of conditions
101 | """
102 | def __init__(self, index, conditions, prediction):
103 | self.index = index
104 | '''The index of this rule in a rule list (which is traversed sequentially until a match is found).'''
105 | self.rules = conditions
106 | '''A list of `constructors.inTrees.Condition`'''
107 | self.prediction = prediction
108 | '''This is returned when a sample fully complies to the rule (`True` for all conditions)'''
109 |
110 | def evaluate(self, feature_vector):
111 | """Create a prediction for a sample (using its feature vector)
112 |
113 | **Params**
114 | ----------
115 | - `feature_vector` (pandas Series or dict) - the sample to evaluate, must be a `pandas Series` object or a
116 | `dict`. It is important that the attribute keys in the sample are the same as the labels occuring in the rules.
117 |
118 | **Returns**
119 | -----------
120 | `True` if `True` for each condition in conditions
121 | """
122 | for rule in self.rules:
123 | if not rule.evaluate(feature_vector): return False, -1
124 | return True, self.prediction
125 |
126 |
127 | class OrderedRuleList:
128 | """
129 | Class which represents a list of rules. To make a prediction, the list is traversed and when a rule is found where
130 | the sample complies to, its prediction is returned.
131 | """
132 | def __init__(self, rule_list):
133 | self.rule_list = rule_list
134 | '''A list of `constructors.inTrees.Rule`'''
135 |
136 | def _evaluate(self, feature_vector):
137 | for ruleset in sorted(self.rule_list, key=lambda x: x.index): # Sort to make sure they are evaluated in order
138 | rule_evaluation_result, rule_evaluation_pred = ruleset.evaluate(feature_vector)
139 | if rule_evaluation_result: return rule_evaluation_pred
140 | return None
141 |
142 | def print_rules(self):
143 | """Print the rules"""
144 | for rule_set in self.rule_list:
145 | print '*' + ' & '.join([str(rule.feature)+' '+str(rule.test)+' '+str(rule.value) for rule in rule_set.rules]), '==>', rule_set.prediction
146 |
147 | def evaluate_multiple(self, feature_vectors):
148 | """Wrapper method to evaluate multiple vectors at once (just a for loop where evaluate is called)
149 |
150 | **Params**
151 | ----------
152 | - `feature_vectors` (pandas DataFrame or list of dicts) - the samples to evaluate
153 |
154 | **Returns**
155 | -----------
156 | a class label
157 | """
158 | results = []
159 |
160 | for _index, feature_vector in feature_vectors.iterrows():
161 | results.append(self._evaluate(feature_vector))
162 |
163 | return np.asarray(results)
164 |
165 |
166 | class inTreesClassifier:
167 |
168 | def __init__(self):
169 | pass
170 |
171 | def _convert_to_r_dataframe(self, df, strings_as_factors=False):
172 | """
173 | Convert a pandas DataFrame to a R data.frame.
174 |
175 | Parameters
176 | ----------
177 | df: The DataFrame being converted
178 | strings_as_factors: Whether to turn strings into R factors (default: False)
179 |
180 | Returns
181 | -------
182 | A R data.frame
183 |
184 | """
185 |
186 | import rpy2.rlike.container as rlc
187 |
188 | columns = rlc.OrdDict()
189 |
190 | # FIXME: This doesn't handle MultiIndex
191 |
192 | for column in df:
193 | value = df[column]
194 | value_type = value.dtype.type
195 |
196 | if value_type == np.datetime64:
197 | value = convert_to_r_posixct(value)
198 | else:
199 | value = [item if pd.notnull(item) else rpy2.rinterface.NA_Integer#com.NA_TYPES[value_type]
200 | for item in value]
201 |
202 | value = rpy2.robjects.vectors.FloatVector(value)#com.VECTOR_TYPES[value_type](value)
203 |
204 | if not strings_as_factors:
205 | I = ro.baseenv.get("I")
206 | value = I(value)
207 |
208 | columns[column] = value
209 |
210 | r_dataframe = ro.DataFrame(columns)
211 | del columns
212 |
213 | r_dataframe.rownames = ro.StrVector(list(df.index))
214 | r_dataframe.colnames = list(df.columns)
215 |
216 | return r_dataframe
217 |
218 | def _tree_to_R_object(self, tree, feature_mapping):
219 | node_mapping = {}
220 | nodes = tree._get_nodes()
221 | nodes.extend(tree._get_leaves())
222 | for i, node in enumerate(nodes):
223 | node_mapping[node] = i+1
224 | vectors = []
225 | for node in nodes:
226 | if node.value is not None:
227 | vectors.append([node_mapping[node], node_mapping[node.left], node_mapping[node.right],
228 | feature_mapping[node.label], node.value, 1, 0])
229 | else:
230 | vectors.append([node_mapping[node], 0, 0, 0, 0.0, -1, node.label])
231 |
232 | df = pd.DataFrame(vectors)
233 | df.columns = ['id', 'left daughter', 'right daughter', 'split var', 'split point', 'status', 'prediction']
234 | df = df.set_index('id')
235 | df.index.name = None
236 |
237 | return self._convert_to_r_dataframe(df)
238 |
239 | def construct_rule_list(self, train_df, label_col, tree_constructors, nr_bootstraps=3):
240 | """ Construct an `constructors.inTrees.OrderedRuleList` from an ensemble of decision trees
241 |
242 | **Params**
243 | ----------
244 | - `train_df` (pandas DataFrame) - the training data
245 |
246 | - `label_col` (string) - the column identifier for the class labels
247 |
248 | - `tree_constructors` (`constructors.treeconstructor.TreeConstructor`) - the decision tree induction algorithms used to create an ensemble with
249 |
250 | - `nr_bootstraps` (pandas DataFrame) - how many times do we apply bootstrapping for each TreeConstructor? The size of the ensemble will be equal to
251 | |tree_constructors|*nr_bootstraps
252 |
253 | **Returns**
254 | -----------
255 | an OrderedRuleList
256 | """
257 | y_train = train_df[label_col]
258 | X_train = train_df.copy()
259 | X_train = X_train.drop(label_col, axis=1)
260 |
261 | importr('randomForest')
262 | importr('inTrees')
263 |
264 | ro.globalenv["X"] = pandas2ri.py2ri(X_train)
265 | ro.globalenv["target"] = ro.FactorVector(y_train.values.tolist())
266 |
267 | feature_mapping = {}
268 | feature_mapping_reverse = {}
269 | for i, feature in enumerate(X_train.columns):
270 | feature_mapping[feature] = i + 1
271 | feature_mapping_reverse[i + 1] = feature
272 |
273 | treeList = []
274 | for tree in ensemble.bootstrap(train_df, label_col, tree_constructors, nr_classifiers=nr_bootstraps):
275 | if tree.count_nodes() > 1: treeList.append(self._tree_to_R_object(tree, feature_mapping))
276 |
277 | ro.globalenv["treeList"] = ro.Vector([len(treeList), ro.Vector(treeList)])
278 | ro.r('names(treeList) <- c("ntree", "list")')
279 |
280 | rules = ro.r('buildLearner(getRuleMetric(extractRules(treeList, X), X, target), X, target)')
281 | rules=list(rules)
282 | conditions=rules[int(0.6*len(rules)):int(0.8*len(rules))]
283 | predictions=rules[int(0.8*len(rules)):]
284 |
285 | # Create a OrderedRuleList
286 | rulesets = []
287 | for idx, (condition, prediction) in enumerate(zip(conditions, predictions)):
288 | # Split each condition in Rules to form a RuleSet
289 | rulelist = []
290 | condition_split = [x.lstrip().rstrip() for x in condition.split('&')]
291 | for rule in condition_split:
292 | feature = feature_mapping_reverse[int(re.findall(r',[0-9]+]', rule)[0][1:-1])]
293 |
294 | lte = re.findall(r'<=', rule)
295 | gt = re.findall(r'>', rule)
296 | eq = re.findall(r'==', rule)
297 | cond = lte[0] if len(lte) else (gt[0] if len(gt) else eq[0])
298 |
299 | extract_value = re.findall(r'[=>]-?[0-9\.]+', rule)
300 | if len(extract_value):
301 | value = float(re.findall(r'[=>]-?[0-9\.]+', rule)[0][1:])
302 | else:
303 | feature = 'True'
304 | value = None
305 |
306 | rulelist.append(Condition(feature, cond, value))
307 | rulesets.append(Rule(idx, rulelist, prediction))
308 |
309 | return OrderedRuleList(rulesets)
--------------------------------------------------------------------------------
/constructors/quest:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/predict-idlab/GENESIM/44925de91ae408fea30ea8ece3688f9e42d4f040/constructors/quest
--------------------------------------------------------------------------------
/constructors/treeconstructor.py:
--------------------------------------------------------------------------------
1 | """
2 | Contains wrappers around well-known decision tree induction algorithms: C4.5, CART, QUEST and GUIDE.
3 |
4 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent.
5 | """
6 |
7 | import pandas as pd
8 | import numpy as np
9 | from sklearn.cross_validation import StratifiedKFold
10 | from sklearn.metrics import accuracy_score
11 | from sklearn.tree import DecisionTreeClassifier
12 |
13 | import Orange
14 | import operator
15 | import os
16 | import time
17 | import subprocess
18 |
19 | import decisiontree
20 |
21 |
22 | class TreeConstructor(object):
23 | """This class is an interface for all tree induction algorithms."""
24 |
25 | def __init__(self):
26 | """In the init method, all hyper-parameters should be set."""
27 | pass
28 |
29 | def get_name(self):
30 | """Get the name of the induction algorithm implemented."""
31 | raise NotImplementedError("This method needs to be implemented")
32 |
33 | def construct_classifier(self, train, features, label_col):
34 | """Construct a `decisiontree.DecisionTree` object from the given training data
35 |
36 | **Params**
37 | ----------
38 | - `train` (pandas DataFrame) - a `Dataframe` containing all the training data
39 |
40 | - `features` (list) - the names of the feature columns
41 |
42 | - `label_col` (string) - the name of the class label column
43 |
44 | **Returns**
45 | -----------
46 | a DecisionTree object
47 | """
48 | raise NotImplementedError("This method needs to be implemented")
49 |
50 |
51 | #
52 | def _series2descriptor(d, discrete=False):
53 | if d.dtype is np.dtype("float"):
54 | return Orange.feature.Continuous(str(d.name))
55 | elif d.dtype is np.dtype("int"):
56 | return Orange.feature.Continuous(str(d.name), number_of_decimals=0)
57 | else:
58 | t = d.unique()
59 | if discrete or len(t) < len(d) / 2:
60 | t.sort()
61 | return Orange.feature.Discrete(str(d.name), values=list(t.astype("str")))
62 | else:
63 | return Orange.feature.String(str(d.name))
64 |
65 |
66 | def _df2domain(df):
67 | featurelist = [_series2descriptor(df.iloc[:, col]) for col in xrange(len(df.columns))]
68 | return Orange.data.Domain(featurelist)
69 |
70 |
71 | def _df2table(df):
72 | # It seems they are using native python object/lists internally for Orange.data types (?)
73 | # And I didn't find a constructor suitable for pandas.DataFrame since it may carry
74 | # multiple dtypes
75 | # --> the best approximate is Orange.data.Table.__init__(domain, numpy.ndarray),
76 | # --> but the dtype of numpy array can only be "int" and "float"
77 | # --> * refer to src/orange/lib_kernel.cpp 3059:
78 | # --> * if (((*vi)->varType != TValue::INTVAR) && ((*vi)->varType != TValue::FLOATVAR))
79 | # --> Documents never mentioned >_<
80 | # So we use numpy constructor for those int/float columns, python list constructor for other
81 |
82 | tdomain = _df2domain(df)
83 | ttables = [_series2table(df.iloc[:, i], tdomain[i]) for i in xrange(len(df.columns))]
84 | return Orange.data.Table(ttables)
85 |
86 |
87 | def _series2table(series, variable):
88 | if series.dtype is np.dtype("int") or series.dtype is np.dtype("float"):
89 | # Use numpy
90 | # Table._init__(Domain, numpy.ndarray)
91 | return Orange.data.Table(Orange.data.Domain(variable), series.values[:, np.newaxis])
92 | else:
93 | # Build instance list
94 | # Table.__init__(Domain, list_of_instances)
95 | tdomain = Orange.data.Domain(variable)
96 | tinsts = [Orange.data.Instance(tdomain, [i]) for i in series]
97 | return Orange.data.Table(tdomain, tinsts)
98 | # 5x performance
99 |
100 |
101 | def _column2df(col):
102 | if type(col.domain[0]) is Orange.feature.Continuous:
103 | return (col.domain[0].name, pd.Series(col.to_numpy()[0].flatten()))
104 | else:
105 | tmp = pd.Series(np.array(list(col)).flatten()) # type(tmp) -> np.array( dtype=list (Orange.data.Value) )
106 | tmp = tmp.apply(lambda x: str(x[0]))
107 | return (col.domain[0].name, tmp)
108 |
109 |
110 | def _table2df(tab):
111 | # Orange.data.Table().to_numpy() cannot handle strings
112 | # So we must build the array column by column,
113 | # When it comes to strings, python list is used
114 | series = [_column2df(tab.select(i)) for i in xrange(len(tab.domain))]
115 | series_name = [i[0] for i in series] # To keep the order of variables unchanged
116 | series_data = dict(series)
117 | return pd.DataFrame(series_data, columns=series_name)
118 |
119 | #
120 |
121 |
122 | class C45Constructor(TreeConstructor):
123 | """This class contains an implementation of C4.5, written by Quinlan. It uses an extern library
124 | for this called [Orange](http://docs.orange.biolab.si/2/reference/rst/Orange.classification.tree.html#Orange.classification.tree.C45Learner)."""
125 |
126 | def __init__(self, gain_ratio=False, cf=0.15):
127 | super(C45Constructor, self).__init__()
128 | self.gain_ratio = gain_ratio
129 | '''boolean value that indicates if either gain ratio or information gain is used as split metric'''
130 | self.cf = cf
131 | '''pruning confidence level: the lower this value, the more pruning will be done'''
132 |
133 | def get_name(self):
134 | return "C4.5"
135 |
136 | def construct_classifier(self, train, features, label_col, param_opt=True):
137 | training_feature_vectors = train[features].copy()
138 | labels = train[label_col].copy()
139 | if param_opt:
140 | optimal_clf = C45Constructor.get_best_c45_classifier(train, label_col,
141 | StratifiedKFold(train[label_col], n_folds=3,
142 | shuffle=True, random_state=None))
143 | self.cf = optimal_clf.cf
144 |
145 | # First call df2table on the feature table
146 | orange_feature_table = _df2table(training_feature_vectors)
147 |
148 | # Convert classes to strings and call df2table
149 | orange_labels_table = _df2table(pd.DataFrame(labels.map(str)))
150 |
151 | # Merge two tables
152 | orange_table = Orange.data.Table([orange_feature_table, orange_labels_table])
153 |
154 | return self._orange_dt_to_my_dt(Orange.classification.tree.C45Learner(orange_table, gain_ratio=self.gain_ratio,
155 | cf=self.cf, min_objs=2, subset=False).tree)
156 |
157 | def _orange_dt_to_my_dt(self, orange_dt_root):
158 | # Check if leaf
159 | if orange_dt_root.node_type == Orange.classification.tree.C45Node.Leaf:
160 | return decisiontree.DecisionTree(left=None, right=None, label=str(int(orange_dt_root.leaf)), data=None, value=None)
161 | else:
162 | dt = decisiontree.DecisionTree(label=orange_dt_root.tested.name, data=None, value=orange_dt_root.cut)
163 | dt.left = self._orange_dt_to_my_dt(orange_dt_root.branch[0])
164 | dt.right = self._orange_dt_to_my_dt(orange_dt_root.branch[1])
165 | return dt
166 |
167 | @staticmethod
168 | def get_best_c45_classifier(train, label_col, skf_tune):
169 | """Returns a `treeconstructor.C45Constructor` with optimized hyper-parameters using
170 | [Grid Search](https://en.wikipedia.org/wiki/Hyperparameter_optimization#Grid_search)
171 |
172 | **Params**
173 | ----------
174 | - `train` (pandas DataFrame) - `a pandas Dataframe` with all training data
175 |
176 | - `label_col` (string) - the column identifier for the label in the `train` Dataframe
177 |
178 | - `skf_tune` (`sklearn.cross_validation.StratifiedKFold`) - cross-validation object to tune parameters
179 |
180 | **Returns**
181 | -----------
182 | a C45Constructor with optimized hyper-parameters
183 | """
184 | c45 = C45Constructor()
185 | cfs = np.arange(0.05, 1.05, 0.05)
186 | cfs_errors = {}
187 | for cf in cfs: cfs_errors[cf] = []
188 |
189 | for train_tune_idx, val_tune_idx in skf_tune:
190 | train_tune = train.iloc[train_tune_idx, :]
191 | X_train_tune = train_tune.drop(label_col, axis=1)
192 | y_train_tune = train_tune[label_col]
193 | val_tune = train.iloc[val_tune_idx, :]
194 | X_val_tune = val_tune.drop(label_col, axis=1)
195 | y_val_tune = val_tune[label_col]
196 | for cf in cfs:
197 | c45.cf = cf
198 | tree = c45.construct_classifier(train_tune, X_train_tune.columns, label_col, param_opt=False)
199 | predictions = tree.evaluate_multiple(X_val_tune).astype(int)
200 | cfs_errors[cf].append(1 - accuracy_score(predictions, y_val_tune, normalize=True))
201 |
202 | for cf in cfs:
203 | cfs_errors[cf] = np.mean(cfs_errors[cf])
204 |
205 | c45.cf = min(cfs_errors.items(), key=operator.itemgetter(1))[0]
206 | return c45
207 |
208 |
209 | class CARTConstructor(TreeConstructor):
210 | """This class contains an implementation of CART, written by Breiman. It uses an extern library
211 | for this called [sklearn](http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html)."""
212 |
213 | def __init__(self, criterion='gini', min_samples_leaf=1, min_samples_split=2, max_depth=10):
214 | super(CARTConstructor, self).__init__()
215 | self.min_samples_leaf = min_samples_leaf
216 | '''pre-prune condition: when the current number of samples is lower than this threshold, then stop'''
217 | self.min_samples_split = min_samples_split
218 | '''pre-prune condition: when a split causes the number of samples in one of the two partitions to be lower
219 | than this threshold, then stop'''
220 | self.max_depth = max_depth
221 | '''pre-prune condition: when a depth equal to this parameter is reached, then stop'''
222 | self.criterion = criterion
223 | '''defines which split criterion to use, is either equal to `gini` or `entropy`'''
224 |
225 | def get_name(self):
226 | return "CART"
227 |
228 | def construct_classifier(self, train, features, label_col, param_opt=True):
229 | training_feature_vectors = train[features]
230 | labels = train[label_col]
231 | train = training_feature_vectors.copy()
232 | label_col = labels.name
233 | train[label_col] = labels
234 | if param_opt:
235 | optimal_clf = CARTConstructor.get_best_cart_classifier(train, label_col,
236 | StratifiedKFold(train[label_col], n_folds=3,
237 | shuffle=True, random_state=None))
238 | self.max_depth = optimal_clf.max_depth
239 | self.min_samples_split = optimal_clf.min_samples_split
240 |
241 | self.features = list(training_feature_vectors.columns)
242 |
243 | self.y = labels.values
244 | self.X = training_feature_vectors[self.features]
245 |
246 |
247 | self.dt = DecisionTreeClassifier(criterion=self.criterion, min_samples_leaf=self.min_samples_leaf,
248 | min_samples_split=self.min_samples_split, max_depth=self.max_depth)
249 | self.dt.fit(self.X, self.y)
250 |
251 | return self._convert_to_tree()
252 |
253 | def _convert_to_tree(self):
254 | """Convert a sklearn object to a `decisiontree.decisiontree` object"""
255 | n_nodes = self.dt.tree_.node_count
256 | children_left = self.dt.tree_.children_left
257 | children_right = self.dt.tree_.children_right
258 | feature = self.dt.tree_.feature
259 | threshold = self.dt.tree_.threshold
260 | classes = self.dt.classes_
261 |
262 | # The tree structure can be traversed to compute various properties such
263 | # as the depth of each node and whether or not it is a leaf.
264 | node_depth = np.zeros(shape=n_nodes)
265 | decision_trees = [None] * n_nodes
266 | for i in range(n_nodes):
267 | decision_trees[i] = decisiontree.DecisionTree()
268 | is_leaves = np.zeros(shape=n_nodes, dtype=bool)
269 | stack = [(0, -1)] # seed is the root node id and its parent depth
270 | while len(stack) > 0:
271 | node_id, parent_depth = stack.pop()
272 | node_depth[node_id] = parent_depth + 1
273 |
274 | # If we have a test node
275 | if children_left[node_id] != children_right[node_id]:
276 | stack.append((children_left[node_id], parent_depth + 1))
277 | stack.append((children_right[node_id], parent_depth + 1))
278 | else:
279 | is_leaves[node_id] = True
280 |
281 | for i in range(n_nodes):
282 |
283 | if children_left[i] > 0:
284 | decision_trees[i].left = decision_trees[children_left[i]]
285 |
286 | if children_right[i] > 0:
287 | decision_trees[i].right = decision_trees[children_right[i]]
288 |
289 | if is_leaves[i]:
290 | decision_trees[i].label = self.dt.classes_[np.argmax(self.dt.tree_.value[i][0])]
291 | decision_trees[i].value = None
292 | else:
293 | decision_trees[i].label = self.features[feature[i]]
294 | decision_trees[i].value = threshold[i]
295 |
296 | return decision_trees[0]
297 |
298 | @staticmethod
299 | def get_best_cart_classifier(train, label_col, skf_tune):
300 | """Returns a `treeconstructor.CARTConstructor` with optimized hyper-parameters using
301 | [Grid Search](https://en.wikipedia.org/wiki/Hyperparameter_optimization#Grid_search)
302 |
303 | **Params**
304 | ----------
305 | - `train` (pandas DataFrame) - `a pandas Dataframe` with all training data
306 |
307 | - `label_col` (string) - the column identifier for the label in the `train` Dataframe
308 |
309 | - `skf_tune` (`sklearn.cross_validation.StratifiedKFold`) - cross-validation object to tune parameters
310 |
311 | **Returns**
312 | -----------
313 | a CARTConstructor with optimized hyper-parameters
314 | """
315 | cart = CARTConstructor()
316 | max_depths = np.arange(1,21,2)
317 | max_depths = np.append(max_depths, None)
318 | min_samples_splits = np.arange(2,20,1)
319 |
320 | errors = {}
321 | for max_depth in max_depths:
322 | for min_samples_split in min_samples_splits:
323 | errors[(max_depth, min_samples_split)] = []
324 |
325 | for train_tune_idx, val_tune_idx in skf_tune:
326 | train_tune = train.iloc[train_tune_idx, :]
327 | X_train_tune = train_tune.drop(label_col, axis=1)
328 | y_train_tune = train_tune[label_col]
329 | val_tune = train.iloc[val_tune_idx, :]
330 | X_val_tune = val_tune.drop(label_col, axis=1)
331 | y_val_tune = val_tune[label_col]
332 | for max_depth in max_depths:
333 | for min_samples_split in min_samples_splits:
334 | cart.max_depth = max_depth
335 | cart.min_samples_split = min_samples_split
336 | tree = cart.construct_classifier(train_tune, X_train_tune.columns, label_col, param_opt=False)
337 | predictions = tree.evaluate_multiple(X_val_tune).astype(int)
338 | errors[((max_depth, min_samples_split))].append(1 - accuracy_score(predictions, y_val_tune, normalize=True))
339 |
340 |
341 | for max_depth in max_depths:
342 | for min_samples_split in min_samples_splits:
343 | errors[(max_depth, min_samples_split)] = np.mean(errors[(max_depth, min_samples_split)])
344 |
345 | best_params = min(errors.items(), key=operator.itemgetter(1))[0]
346 | cart.max_depth = best_params[0]
347 | cart.min_samples_split = best_params[1]
348 |
349 | return cart
350 |
351 |
352 | class QUESTConstructor(TreeConstructor):
353 | """This class contains a wrapper around an implementation of [QUEST](http://www.stat.wisc.edu/~loh/quest.html),
354 | written by Loh."""
355 |
356 | def __init__(self):
357 | super(QUESTConstructor, self).__init__()
358 |
359 | def get_name(self):
360 | return "QUEST"
361 |
362 | def construct_classifier(self, train, features, label_col):
363 | training_feature_vectors = train[features]
364 | labels = train[label_col]
365 | self._create_desc_and_data_file(training_feature_vectors, labels)
366 | input = open("in.txt", "w")
367 | output = file('out.txt', 'w')
368 | p = subprocess.Popen(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1])+'/quest > log.txt', stdin=subprocess.PIPE, shell=True)
369 | p.stdin.write("2\n")
370 | p.stdin.write("in.txt\n")
371 | p.stdin.write("1\n")
372 | p.stdin.write("out.txt\n")
373 | p.stdin.write("1\n")
374 | p.stdin.write("dsc.txt\n")
375 | p.stdin.write("1\n")
376 | p.stdin.write("\n")
377 | p.wait()
378 | input.close()
379 | output.close()
380 |
381 | while not os.path.exists('in.txt'):
382 | time.sleep(1)
383 | p = subprocess.Popen(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1])+'/quest < in.txt > log.txt', stdin=subprocess.PIPE, shell=True)
384 | p.wait()
385 |
386 | output = file('out.txt', 'r')
387 | lines = output.readlines()
388 | output.close()
389 |
390 | start_index, end_index, counter = 0, 0, 0
391 | for line in lines:
392 | if line == ' Classification tree:\n':
393 | start_index = counter+2
394 | if line == ' Information for each node:\n':
395 | end_index = counter-1
396 | counter += 1
397 | tree = self._decision_tree_from_text(lines[start_index:end_index])
398 |
399 | self._remove_files()
400 |
401 | return tree
402 |
403 | def _decision_tree_from_text(self, lines):
404 | dt = decisiontree.DecisionTree()
405 |
406 | if '<=' in lines[0] or '>' in lines[0]:
407 | # Intermediate node
408 | node_name = lines[0].split(':')[0].lstrip()
409 | label, value = lines[0].split(':')[1].split('<=')
410 | label = ' '.join(label.lstrip().rstrip().split('.'))
411 | value = value.lstrip().split()[0]
412 | dt.label = label
413 | dt.value = float(value)
414 | dt.left = self._decision_tree_from_text(lines[1:])
415 | counter = 1
416 | while lines[counter].split(':')[0].lstrip() != node_name: counter+=1
417 | dt.right = self._decision_tree_from_text(lines[counter + 1:])
418 | else:
419 | # Terminal node
420 | dt.label = int(eval(lines[0].split(':')[1].lstrip()))
421 |
422 | return dt
423 |
424 | def _create_desc_and_data_file(self, training_feature_vectors, labels):
425 | dsc = open("dsc.txt", "w")
426 | data = open("data.txt", "w")
427 |
428 | dsc.write("data.txt\n")
429 | dsc.write("\"?\"\n")
430 | dsc.write("column, var, type\n")
431 | count = 1
432 | for col in training_feature_vectors.columns:
433 | dsc.write(str(count) + ' \"' + str(col) + '\" n\n')
434 | count += 1
435 | dsc.write(str(count) + ' ' + str(labels.name) + ' d')
436 |
437 | for i in range(len(training_feature_vectors)):
438 | sample = training_feature_vectors.iloc[i,:]
439 | for col in training_feature_vectors.columns:
440 | data.write(str(sample[col]) + ' ')
441 | if i != len(training_feature_vectors)-1:
442 | data.write(str(labels[i])+'\n')
443 | else:
444 | data.write(str(labels[i]))
445 |
446 | data.close()
447 | dsc.close()
448 |
449 | def _remove_files(self):
450 | os.remove('data.txt')
451 | os.remove('in.txt')
452 | os.remove('dsc.txt')
453 | os.remove('out.txt')
454 | os.remove('log.txt')
455 |
456 |
457 | class GUIDEConstructor(TreeConstructor):
458 | """This class contains a wrapper around an implementation of [GUIDE](http://www.stat.wisc.edu/~loh/guide.html),
459 | written by Loh."""
460 |
461 | def __init__(self):
462 | super(GUIDEConstructor, self).__init__()
463 |
464 | def get_name(self):
465 | return "GUIDE"
466 |
467 | def construct_classifier(self, train, features, label_col):
468 | training_feature_vectors = train[features]
469 | labels = train[label_col]
470 | self._create_desc_and_data_file(training_feature_vectors, labels)
471 | input = open("in.txt", "w")
472 | output = file('out.txt', 'w')
473 | p = subprocess.Popen(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1])+'/guide > log.txt', stdin=subprocess.PIPE, shell=True)
474 | p.stdin.write("1\n")
475 | p.stdin.write("in.txt\n")
476 | p.stdin.write("1\n")
477 | p.stdin.write("1\n")
478 | p.stdin.write("out.txt\n")
479 | p.stdin.write("1\n")
480 | p.stdin.write("1\n")
481 | p.stdin.write("1\n")
482 | p.stdin.write("2\n")
483 | p.stdin.write("1\n")
484 | p.stdin.write("3\n")
485 | p.stdin.write("1\n")
486 | p.stdin.write('dsc.txt\n')
487 | p.stdin.write("\n")
488 | p.stdin.write("\n")
489 | p.stdin.write("\n")
490 | p.stdin.write("1\n")
491 | p.stdin.write("1\n")
492 | p.stdin.write("\n")
493 | p.stdin.write("\n")
494 | p.stdin.write("\n")
495 | p.stdin.write("2\n")
496 | p.stdin.write("1\n")
497 | p.stdin.write("1\n")
498 | p.stdin.write("1\n")
499 | p.stdin.write("1\n")
500 | p.stdin.write("\n")
501 | p.wait()
502 | input.close()
503 | output.close()
504 |
505 | while not os.path.exists('in.txt'):
506 | time.sleep(1)
507 | p = subprocess.Popen(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1])+'/guide < in.txt > log.txt', shell=True)
508 | p.wait()
509 |
510 | output = file('out.txt', 'r')
511 | lines = output.readlines()
512 | output.close()
513 |
514 | start_index, end_index, counter = 0, 0, 0
515 | for line in lines:
516 | if line == ' Classification tree:\n':
517 | start_index = counter+2
518 | if line == ' ***************************************************************\n':
519 | end_index = counter-1
520 | counter += 1
521 | tree = self._decision_tree_from_text(lines[start_index:end_index])
522 |
523 | # self.remove_files()
524 |
525 | # tree.visualise('GUIDE')
526 | return tree
527 |
528 | def _decision_tree_from_text(self, lines):
529 |
530 | dt = decisiontree.DecisionTree()
531 |
532 | if '<=' in lines[0] or '>' in lines[0] or '=' in lines[0]:
533 | # Intermediate node
534 | node_name = lines[0].split(':')[0].lstrip()
535 | # print(lines[0])
536 | label, value = lines[0].split(':')[1].split('<=')
537 | label = ' '.join(label.lstrip().rstrip().split('.'))
538 | value = value.lstrip().split()[0]
539 | dt.label = label
540 | dt.value = float(value)
541 | dt.left = self._decision_tree_from_text(lines[1:])
542 | counter = 1
543 | while lines[counter].split(':')[0].lstrip() != node_name: counter+=1
544 | dt.right = self._decision_tree_from_text(lines[counter + 1:])
545 | else:
546 | # Terminal node
547 | # print lines[0]
548 | dt.label = int(lines[0].split(':')[1].lstrip().split('.')[0])
549 |
550 | return dt
551 |
552 | def _create_desc_and_data_file(self, training_feature_vectors, labels):
553 | dsc = open("dsc.txt", "w")
554 | data = open("data.txt", "w")
555 | dsc.write("data.txt\n")
556 | dsc.write("\"?\"\n")
557 | dsc.write("1\n")
558 | count = 1
559 | for col in training_feature_vectors.columns:
560 | dsc.write(str(count) + ' \"' + str(col) + '\" n\n')
561 | count += 1
562 | dsc.write(str(count) + ' ' + str(labels.name) + ' d')
563 |
564 | for i in range(len(training_feature_vectors)):
565 | sample = training_feature_vectors.iloc[i,:]
566 | for col in training_feature_vectors.columns:
567 | data.write(str(sample[col]) + ' ')
568 | if i != len(training_feature_vectors)-1:
569 | data.write(str(labels[i])+'\n')
570 | else:
571 | data.write(str(labels[i]))
572 |
573 | data.close()
574 | dsc.close()
575 |
576 | def _remove_files(self):
577 | os.remove('data.txt')
578 | os.remove('in.txt')
579 | os.remove('dsc.txt')
580 | os.remove('out.txt')
581 | os.remove('log.txt')
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Contains the data files and two python files that are responsible for loading them in easily. In `data.load_datasets`,
3 | a load function for each dataset must be written. In `data.load_all_datasets` python introspection is used to easily
4 | load in all datasets with a load function.
5 |
6 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent.
7 | """
--------------------------------------------------------------------------------
/data/breast-cancer-wisconsin.data:
--------------------------------------------------------------------------------
1 | 1000025,5,1,1,1,2,1,3,1,1,2
2 | 1002945,5,4,4,5,7,10,3,2,1,2
3 | 1015425,3,1,1,1,2,2,3,1,1,2
4 | 1016277,6,8,8,1,3,4,3,7,1,2
5 | 1017023,4,1,1,3,2,1,3,1,1,2
6 | 1017122,8,10,10,8,7,10,9,7,1,4
7 | 1018099,1,1,1,1,2,10,3,1,1,2
8 | 1018561,2,1,2,1,2,1,3,1,1,2
9 | 1033078,2,1,1,1,2,1,1,1,5,2
10 | 1033078,4,2,1,1,2,1,2,1,1,2
11 | 1035283,1,1,1,1,1,1,3,1,1,2
12 | 1036172,2,1,1,1,2,1,2,1,1,2
13 | 1041801,5,3,3,3,2,3,4,4,1,4
14 | 1043999,1,1,1,1,2,3,3,1,1,2
15 | 1044572,8,7,5,10,7,9,5,5,4,4
16 | 1047630,7,4,6,4,6,1,4,3,1,4
17 | 1048672,4,1,1,1,2,1,2,1,1,2
18 | 1049815,4,1,1,1,2,1,3,1,1,2
19 | 1050670,10,7,7,6,4,10,4,1,2,4
20 | 1050718,6,1,1,1,2,1,3,1,1,2
21 | 1054590,7,3,2,10,5,10,5,4,4,4
22 | 1054593,10,5,5,3,6,7,7,10,1,4
23 | 1056784,3,1,1,1,2,1,2,1,1,2
24 | 1057013,8,4,5,1,2,?,7,3,1,4
25 | 1059552,1,1,1,1,2,1,3,1,1,2
26 | 1065726,5,2,3,4,2,7,3,6,1,4
27 | 1066373,3,2,1,1,1,1,2,1,1,2
28 | 1066979,5,1,1,1,2,1,2,1,1,2
29 | 1067444,2,1,1,1,2,1,2,1,1,2
30 | 1070935,1,1,3,1,2,1,1,1,1,2
31 | 1070935,3,1,1,1,1,1,2,1,1,2
32 | 1071760,2,1,1,1,2,1,3,1,1,2
33 | 1072179,10,7,7,3,8,5,7,4,3,4
34 | 1074610,2,1,1,2,2,1,3,1,1,2
35 | 1075123,3,1,2,1,2,1,2,1,1,2
36 | 1079304,2,1,1,1,2,1,2,1,1,2
37 | 1080185,10,10,10,8,6,1,8,9,1,4
38 | 1081791,6,2,1,1,1,1,7,1,1,2
39 | 1084584,5,4,4,9,2,10,5,6,1,4
40 | 1091262,2,5,3,3,6,7,7,5,1,4
41 | 1096800,6,6,6,9,6,?,7,8,1,2
42 | 1099510,10,4,3,1,3,3,6,5,2,4
43 | 1100524,6,10,10,2,8,10,7,3,3,4
44 | 1102573,5,6,5,6,10,1,3,1,1,4
45 | 1103608,10,10,10,4,8,1,8,10,1,4
46 | 1103722,1,1,1,1,2,1,2,1,2,2
47 | 1105257,3,7,7,4,4,9,4,8,1,4
48 | 1105524,1,1,1,1,2,1,2,1,1,2
49 | 1106095,4,1,1,3,2,1,3,1,1,2
50 | 1106829,7,8,7,2,4,8,3,8,2,4
51 | 1108370,9,5,8,1,2,3,2,1,5,4
52 | 1108449,5,3,3,4,2,4,3,4,1,4
53 | 1110102,10,3,6,2,3,5,4,10,2,4
54 | 1110503,5,5,5,8,10,8,7,3,7,4
55 | 1110524,10,5,5,6,8,8,7,1,1,4
56 | 1111249,10,6,6,3,4,5,3,6,1,4
57 | 1112209,8,10,10,1,3,6,3,9,1,4
58 | 1113038,8,2,4,1,5,1,5,4,4,4
59 | 1113483,5,2,3,1,6,10,5,1,1,4
60 | 1113906,9,5,5,2,2,2,5,1,1,4
61 | 1115282,5,3,5,5,3,3,4,10,1,4
62 | 1115293,1,1,1,1,2,2,2,1,1,2
63 | 1116116,9,10,10,1,10,8,3,3,1,4
64 | 1116132,6,3,4,1,5,2,3,9,1,4
65 | 1116192,1,1,1,1,2,1,2,1,1,2
66 | 1116998,10,4,2,1,3,2,4,3,10,4
67 | 1117152,4,1,1,1,2,1,3,1,1,2
68 | 1118039,5,3,4,1,8,10,4,9,1,4
69 | 1120559,8,3,8,3,4,9,8,9,8,4
70 | 1121732,1,1,1,1,2,1,3,2,1,2
71 | 1121919,5,1,3,1,2,1,2,1,1,2
72 | 1123061,6,10,2,8,10,2,7,8,10,4
73 | 1124651,1,3,3,2,2,1,7,2,1,2
74 | 1125035,9,4,5,10,6,10,4,8,1,4
75 | 1126417,10,6,4,1,3,4,3,2,3,4
76 | 1131294,1,1,2,1,2,2,4,2,1,2
77 | 1132347,1,1,4,1,2,1,2,1,1,2
78 | 1133041,5,3,1,2,2,1,2,1,1,2
79 | 1133136,3,1,1,1,2,3,3,1,1,2
80 | 1136142,2,1,1,1,3,1,2,1,1,2
81 | 1137156,2,2,2,1,1,1,7,1,1,2
82 | 1143978,4,1,1,2,2,1,2,1,1,2
83 | 1143978,5,2,1,1,2,1,3,1,1,2
84 | 1147044,3,1,1,1,2,2,7,1,1,2
85 | 1147699,3,5,7,8,8,9,7,10,7,4
86 | 1147748,5,10,6,1,10,4,4,10,10,4
87 | 1148278,3,3,6,4,5,8,4,4,1,4
88 | 1148873,3,6,6,6,5,10,6,8,3,4
89 | 1152331,4,1,1,1,2,1,3,1,1,2
90 | 1155546,2,1,1,2,3,1,2,1,1,2
91 | 1156272,1,1,1,1,2,1,3,1,1,2
92 | 1156948,3,1,1,2,2,1,1,1,1,2
93 | 1157734,4,1,1,1,2,1,3,1,1,2
94 | 1158247,1,1,1,1,2,1,2,1,1,2
95 | 1160476,2,1,1,1,2,1,3,1,1,2
96 | 1164066,1,1,1,1,2,1,3,1,1,2
97 | 1165297,2,1,1,2,2,1,1,1,1,2
98 | 1165790,5,1,1,1,2,1,3,1,1,2
99 | 1165926,9,6,9,2,10,6,2,9,10,4
100 | 1166630,7,5,6,10,5,10,7,9,4,4
101 | 1166654,10,3,5,1,10,5,3,10,2,4
102 | 1167439,2,3,4,4,2,5,2,5,1,4
103 | 1167471,4,1,2,1,2,1,3,1,1,2
104 | 1168359,8,2,3,1,6,3,7,1,1,4
105 | 1168736,10,10,10,10,10,1,8,8,8,4
106 | 1169049,7,3,4,4,3,3,3,2,7,4
107 | 1170419,10,10,10,8,2,10,4,1,1,4
108 | 1170420,1,6,8,10,8,10,5,7,1,4
109 | 1171710,1,1,1,1,2,1,2,3,1,2
110 | 1171710,6,5,4,4,3,9,7,8,3,4
111 | 1171795,1,3,1,2,2,2,5,3,2,2
112 | 1171845,8,6,4,3,5,9,3,1,1,4
113 | 1172152,10,3,3,10,2,10,7,3,3,4
114 | 1173216,10,10,10,3,10,8,8,1,1,4
115 | 1173235,3,3,2,1,2,3,3,1,1,2
116 | 1173347,1,1,1,1,2,5,1,1,1,2
117 | 1173347,8,3,3,1,2,2,3,2,1,2
118 | 1173509,4,5,5,10,4,10,7,5,8,4
119 | 1173514,1,1,1,1,4,3,1,1,1,2
120 | 1173681,3,2,1,1,2,2,3,1,1,2
121 | 1174057,1,1,2,2,2,1,3,1,1,2
122 | 1174057,4,2,1,1,2,2,3,1,1,2
123 | 1174131,10,10,10,2,10,10,5,3,3,4
124 | 1174428,5,3,5,1,8,10,5,3,1,4
125 | 1175937,5,4,6,7,9,7,8,10,1,4
126 | 1176406,1,1,1,1,2,1,2,1,1,2
127 | 1176881,7,5,3,7,4,10,7,5,5,4
128 | 1177027,3,1,1,1,2,1,3,1,1,2
129 | 1177399,8,3,5,4,5,10,1,6,2,4
130 | 1177512,1,1,1,1,10,1,1,1,1,2
131 | 1178580,5,1,3,1,2,1,2,1,1,2
132 | 1179818,2,1,1,1,2,1,3,1,1,2
133 | 1180194,5,10,8,10,8,10,3,6,3,4
134 | 1180523,3,1,1,1,2,1,2,2,1,2
135 | 1180831,3,1,1,1,3,1,2,1,1,2
136 | 1181356,5,1,1,1,2,2,3,3,1,2
137 | 1182404,4,1,1,1,2,1,2,1,1,2
138 | 1182410,3,1,1,1,2,1,1,1,1,2
139 | 1183240,4,1,2,1,2,1,2,1,1,2
140 | 1183246,1,1,1,1,1,?,2,1,1,2
141 | 1183516,3,1,1,1,2,1,1,1,1,2
142 | 1183911,2,1,1,1,2,1,1,1,1,2
143 | 1183983,9,5,5,4,4,5,4,3,3,4
144 | 1184184,1,1,1,1,2,5,1,1,1,2
145 | 1184241,2,1,1,1,2,1,2,1,1,2
146 | 1184840,1,1,3,1,2,?,2,1,1,2
147 | 1185609,3,4,5,2,6,8,4,1,1,4
148 | 1185610,1,1,1,1,3,2,2,1,1,2
149 | 1187457,3,1,1,3,8,1,5,8,1,2
150 | 1187805,8,8,7,4,10,10,7,8,7,4
151 | 1188472,1,1,1,1,1,1,3,1,1,2
152 | 1189266,7,2,4,1,6,10,5,4,3,4
153 | 1189286,10,10,8,6,4,5,8,10,1,4
154 | 1190394,4,1,1,1,2,3,1,1,1,2
155 | 1190485,1,1,1,1,2,1,1,1,1,2
156 | 1192325,5,5,5,6,3,10,3,1,1,4
157 | 1193091,1,2,2,1,2,1,2,1,1,2
158 | 1193210,2,1,1,1,2,1,3,1,1,2
159 | 1193683,1,1,2,1,3,?,1,1,1,2
160 | 1196295,9,9,10,3,6,10,7,10,6,4
161 | 1196915,10,7,7,4,5,10,5,7,2,4
162 | 1197080,4,1,1,1,2,1,3,2,1,2
163 | 1197270,3,1,1,1,2,1,3,1,1,2
164 | 1197440,1,1,1,2,1,3,1,1,7,2
165 | 1197510,5,1,1,1,2,?,3,1,1,2
166 | 1197979,4,1,1,1,2,2,3,2,1,2
167 | 1197993,5,6,7,8,8,10,3,10,3,4
168 | 1198128,10,8,10,10,6,1,3,1,10,4
169 | 1198641,3,1,1,1,2,1,3,1,1,2
170 | 1199219,1,1,1,2,1,1,1,1,1,2
171 | 1199731,3,1,1,1,2,1,1,1,1,2
172 | 1199983,1,1,1,1,2,1,3,1,1,2
173 | 1200772,1,1,1,1,2,1,2,1,1,2
174 | 1200847,6,10,10,10,8,10,10,10,7,4
175 | 1200892,8,6,5,4,3,10,6,1,1,4
176 | 1200952,5,8,7,7,10,10,5,7,1,4
177 | 1201834,2,1,1,1,2,1,3,1,1,2
178 | 1201936,5,10,10,3,8,1,5,10,3,4
179 | 1202125,4,1,1,1,2,1,3,1,1,2
180 | 1202812,5,3,3,3,6,10,3,1,1,4
181 | 1203096,1,1,1,1,1,1,3,1,1,2
182 | 1204242,1,1,1,1,2,1,1,1,1,2
183 | 1204898,6,1,1,1,2,1,3,1,1,2
184 | 1205138,5,8,8,8,5,10,7,8,1,4
185 | 1205579,8,7,6,4,4,10,5,1,1,4
186 | 1206089,2,1,1,1,1,1,3,1,1,2
187 | 1206695,1,5,8,6,5,8,7,10,1,4
188 | 1206841,10,5,6,10,6,10,7,7,10,4
189 | 1207986,5,8,4,10,5,8,9,10,1,4
190 | 1208301,1,2,3,1,2,1,3,1,1,2
191 | 1210963,10,10,10,8,6,8,7,10,1,4
192 | 1211202,7,5,10,10,10,10,4,10,3,4
193 | 1212232,5,1,1,1,2,1,2,1,1,2
194 | 1212251,1,1,1,1,2,1,3,1,1,2
195 | 1212422,3,1,1,1,2,1,3,1,1,2
196 | 1212422,4,1,1,1,2,1,3,1,1,2
197 | 1213375,8,4,4,5,4,7,7,8,2,2
198 | 1213383,5,1,1,4,2,1,3,1,1,2
199 | 1214092,1,1,1,1,2,1,1,1,1,2
200 | 1214556,3,1,1,1,2,1,2,1,1,2
201 | 1214966,9,7,7,5,5,10,7,8,3,4
202 | 1216694,10,8,8,4,10,10,8,1,1,4
203 | 1216947,1,1,1,1,2,1,3,1,1,2
204 | 1217051,5,1,1,1,2,1,3,1,1,2
205 | 1217264,1,1,1,1,2,1,3,1,1,2
206 | 1218105,5,10,10,9,6,10,7,10,5,4
207 | 1218741,10,10,9,3,7,5,3,5,1,4
208 | 1218860,1,1,1,1,1,1,3,1,1,2
209 | 1218860,1,1,1,1,1,1,3,1,1,2
210 | 1219406,5,1,1,1,1,1,3,1,1,2
211 | 1219525,8,10,10,10,5,10,8,10,6,4
212 | 1219859,8,10,8,8,4,8,7,7,1,4
213 | 1220330,1,1,1,1,2,1,3,1,1,2
214 | 1221863,10,10,10,10,7,10,7,10,4,4
215 | 1222047,10,10,10,10,3,10,10,6,1,4
216 | 1222936,8,7,8,7,5,5,5,10,2,4
217 | 1223282,1,1,1,1,2,1,2,1,1,2
218 | 1223426,1,1,1,1,2,1,3,1,1,2
219 | 1223793,6,10,7,7,6,4,8,10,2,4
220 | 1223967,6,1,3,1,2,1,3,1,1,2
221 | 1224329,1,1,1,2,2,1,3,1,1,2
222 | 1225799,10,6,4,3,10,10,9,10,1,4
223 | 1226012,4,1,1,3,1,5,2,1,1,4
224 | 1226612,7,5,6,3,3,8,7,4,1,4
225 | 1227210,10,5,5,6,3,10,7,9,2,4
226 | 1227244,1,1,1,1,2,1,2,1,1,2
227 | 1227481,10,5,7,4,4,10,8,9,1,4
228 | 1228152,8,9,9,5,3,5,7,7,1,4
229 | 1228311,1,1,1,1,1,1,3,1,1,2
230 | 1230175,10,10,10,3,10,10,9,10,1,4
231 | 1230688,7,4,7,4,3,7,7,6,1,4
232 | 1231387,6,8,7,5,6,8,8,9,2,4
233 | 1231706,8,4,6,3,3,1,4,3,1,2
234 | 1232225,10,4,5,5,5,10,4,1,1,4
235 | 1236043,3,3,2,1,3,1,3,6,1,2
236 | 1241232,3,1,4,1,2,?,3,1,1,2
237 | 1241559,10,8,8,2,8,10,4,8,10,4
238 | 1241679,9,8,8,5,6,2,4,10,4,4
239 | 1242364,8,10,10,8,6,9,3,10,10,4
240 | 1243256,10,4,3,2,3,10,5,3,2,4
241 | 1270479,5,1,3,3,2,2,2,3,1,2
242 | 1276091,3,1,1,3,1,1,3,1,1,2
243 | 1277018,2,1,1,1,2,1,3,1,1,2
244 | 128059,1,1,1,1,2,5,5,1,1,2
245 | 1285531,1,1,1,1,2,1,3,1,1,2
246 | 1287775,5,1,1,2,2,2,3,1,1,2
247 | 144888,8,10,10,8,5,10,7,8,1,4
248 | 145447,8,4,4,1,2,9,3,3,1,4
249 | 167528,4,1,1,1,2,1,3,6,1,2
250 | 169356,3,1,1,1,2,?,3,1,1,2
251 | 183913,1,2,2,1,2,1,1,1,1,2
252 | 191250,10,4,4,10,2,10,5,3,3,4
253 | 1017023,6,3,3,5,3,10,3,5,3,2
254 | 1100524,6,10,10,2,8,10,7,3,3,4
255 | 1116116,9,10,10,1,10,8,3,3,1,4
256 | 1168736,5,6,6,2,4,10,3,6,1,4
257 | 1182404,3,1,1,1,2,1,1,1,1,2
258 | 1182404,3,1,1,1,2,1,2,1,1,2
259 | 1198641,3,1,1,1,2,1,3,1,1,2
260 | 242970,5,7,7,1,5,8,3,4,1,2
261 | 255644,10,5,8,10,3,10,5,1,3,4
262 | 263538,5,10,10,6,10,10,10,6,5,4
263 | 274137,8,8,9,4,5,10,7,8,1,4
264 | 303213,10,4,4,10,6,10,5,5,1,4
265 | 314428,7,9,4,10,10,3,5,3,3,4
266 | 1182404,5,1,4,1,2,1,3,2,1,2
267 | 1198641,10,10,6,3,3,10,4,3,2,4
268 | 320675,3,3,5,2,3,10,7,1,1,4
269 | 324427,10,8,8,2,3,4,8,7,8,4
270 | 385103,1,1,1,1,2,1,3,1,1,2
271 | 390840,8,4,7,1,3,10,3,9,2,4
272 | 411453,5,1,1,1,2,1,3,1,1,2
273 | 320675,3,3,5,2,3,10,7,1,1,4
274 | 428903,7,2,4,1,3,4,3,3,1,4
275 | 431495,3,1,1,1,2,1,3,2,1,2
276 | 432809,3,1,3,1,2,?,2,1,1,2
277 | 434518,3,1,1,1,2,1,2,1,1,2
278 | 452264,1,1,1,1,2,1,2,1,1,2
279 | 456282,1,1,1,1,2,1,3,1,1,2
280 | 476903,10,5,7,3,3,7,3,3,8,4
281 | 486283,3,1,1,1,2,1,3,1,1,2
282 | 486662,2,1,1,2,2,1,3,1,1,2
283 | 488173,1,4,3,10,4,10,5,6,1,4
284 | 492268,10,4,6,1,2,10,5,3,1,4
285 | 508234,7,4,5,10,2,10,3,8,2,4
286 | 527363,8,10,10,10,8,10,10,7,3,4
287 | 529329,10,10,10,10,10,10,4,10,10,4
288 | 535331,3,1,1,1,3,1,2,1,1,2
289 | 543558,6,1,3,1,4,5,5,10,1,4
290 | 555977,5,6,6,8,6,10,4,10,4,4
291 | 560680,1,1,1,1,2,1,1,1,1,2
292 | 561477,1,1,1,1,2,1,3,1,1,2
293 | 563649,8,8,8,1,2,?,6,10,1,4
294 | 601265,10,4,4,6,2,10,2,3,1,4
295 | 606140,1,1,1,1,2,?,2,1,1,2
296 | 606722,5,5,7,8,6,10,7,4,1,4
297 | 616240,5,3,4,3,4,5,4,7,1,2
298 | 61634,5,4,3,1,2,?,2,3,1,2
299 | 625201,8,2,1,1,5,1,1,1,1,2
300 | 63375,9,1,2,6,4,10,7,7,2,4
301 | 635844,8,4,10,5,4,4,7,10,1,4
302 | 636130,1,1,1,1,2,1,3,1,1,2
303 | 640744,10,10,10,7,9,10,7,10,10,4
304 | 646904,1,1,1,1,2,1,3,1,1,2
305 | 653777,8,3,4,9,3,10,3,3,1,4
306 | 659642,10,8,4,4,4,10,3,10,4,4
307 | 666090,1,1,1,1,2,1,3,1,1,2
308 | 666942,1,1,1,1,2,1,3,1,1,2
309 | 667204,7,8,7,6,4,3,8,8,4,4
310 | 673637,3,1,1,1,2,5,5,1,1,2
311 | 684955,2,1,1,1,3,1,2,1,1,2
312 | 688033,1,1,1,1,2,1,1,1,1,2
313 | 691628,8,6,4,10,10,1,3,5,1,4
314 | 693702,1,1,1,1,2,1,1,1,1,2
315 | 704097,1,1,1,1,1,1,2,1,1,2
316 | 704168,4,6,5,6,7,?,4,9,1,2
317 | 706426,5,5,5,2,5,10,4,3,1,4
318 | 709287,6,8,7,8,6,8,8,9,1,4
319 | 718641,1,1,1,1,5,1,3,1,1,2
320 | 721482,4,4,4,4,6,5,7,3,1,2
321 | 730881,7,6,3,2,5,10,7,4,6,4
322 | 733639,3,1,1,1,2,?,3,1,1,2
323 | 733639,3,1,1,1,2,1,3,1,1,2
324 | 733823,5,4,6,10,2,10,4,1,1,4
325 | 740492,1,1,1,1,2,1,3,1,1,2
326 | 743348,3,2,2,1,2,1,2,3,1,2
327 | 752904,10,1,1,1,2,10,5,4,1,4
328 | 756136,1,1,1,1,2,1,2,1,1,2
329 | 760001,8,10,3,2,6,4,3,10,1,4
330 | 760239,10,4,6,4,5,10,7,1,1,4
331 | 76389,10,4,7,2,2,8,6,1,1,4
332 | 764974,5,1,1,1,2,1,3,1,2,2
333 | 770066,5,2,2,2,2,1,2,2,1,2
334 | 785208,5,4,6,6,4,10,4,3,1,4
335 | 785615,8,6,7,3,3,10,3,4,2,4
336 | 792744,1,1,1,1,2,1,1,1,1,2
337 | 797327,6,5,5,8,4,10,3,4,1,4
338 | 798429,1,1,1,1,2,1,3,1,1,2
339 | 704097,1,1,1,1,1,1,2,1,1,2
340 | 806423,8,5,5,5,2,10,4,3,1,4
341 | 809912,10,3,3,1,2,10,7,6,1,4
342 | 810104,1,1,1,1,2,1,3,1,1,2
343 | 814265,2,1,1,1,2,1,1,1,1,2
344 | 814911,1,1,1,1,2,1,1,1,1,2
345 | 822829,7,6,4,8,10,10,9,5,3,4
346 | 826923,1,1,1,1,2,1,1,1,1,2
347 | 830690,5,2,2,2,3,1,1,3,1,2
348 | 831268,1,1,1,1,1,1,1,3,1,2
349 | 832226,3,4,4,10,5,1,3,3,1,4
350 | 832567,4,2,3,5,3,8,7,6,1,4
351 | 836433,5,1,1,3,2,1,1,1,1,2
352 | 837082,2,1,1,1,2,1,3,1,1,2
353 | 846832,3,4,5,3,7,3,4,6,1,2
354 | 850831,2,7,10,10,7,10,4,9,4,4
355 | 855524,1,1,1,1,2,1,2,1,1,2
356 | 857774,4,1,1,1,3,1,2,2,1,2
357 | 859164,5,3,3,1,3,3,3,3,3,4
358 | 859350,8,10,10,7,10,10,7,3,8,4
359 | 866325,8,10,5,3,8,4,4,10,3,4
360 | 873549,10,3,5,4,3,7,3,5,3,4
361 | 877291,6,10,10,10,10,10,8,10,10,4
362 | 877943,3,10,3,10,6,10,5,1,4,4
363 | 888169,3,2,2,1,4,3,2,1,1,2
364 | 888523,4,4,4,2,2,3,2,1,1,2
365 | 896404,2,1,1,1,2,1,3,1,1,2
366 | 897172,2,1,1,1,2,1,2,1,1,2
367 | 95719,6,10,10,10,8,10,7,10,7,4
368 | 160296,5,8,8,10,5,10,8,10,3,4
369 | 342245,1,1,3,1,2,1,1,1,1,2
370 | 428598,1,1,3,1,1,1,2,1,1,2
371 | 492561,4,3,2,1,3,1,2,1,1,2
372 | 493452,1,1,3,1,2,1,1,1,1,2
373 | 493452,4,1,2,1,2,1,2,1,1,2
374 | 521441,5,1,1,2,2,1,2,1,1,2
375 | 560680,3,1,2,1,2,1,2,1,1,2
376 | 636437,1,1,1,1,2,1,1,1,1,2
377 | 640712,1,1,1,1,2,1,2,1,1,2
378 | 654244,1,1,1,1,1,1,2,1,1,2
379 | 657753,3,1,1,4,3,1,2,2,1,2
380 | 685977,5,3,4,1,4,1,3,1,1,2
381 | 805448,1,1,1,1,2,1,1,1,1,2
382 | 846423,10,6,3,6,4,10,7,8,4,4
383 | 1002504,3,2,2,2,2,1,3,2,1,2
384 | 1022257,2,1,1,1,2,1,1,1,1,2
385 | 1026122,2,1,1,1,2,1,1,1,1,2
386 | 1071084,3,3,2,2,3,1,1,2,3,2
387 | 1080233,7,6,6,3,2,10,7,1,1,4
388 | 1114570,5,3,3,2,3,1,3,1,1,2
389 | 1114570,2,1,1,1,2,1,2,2,1,2
390 | 1116715,5,1,1,1,3,2,2,2,1,2
391 | 1131411,1,1,1,2,2,1,2,1,1,2
392 | 1151734,10,8,7,4,3,10,7,9,1,4
393 | 1156017,3,1,1,1,2,1,2,1,1,2
394 | 1158247,1,1,1,1,1,1,1,1,1,2
395 | 1158405,1,2,3,1,2,1,2,1,1,2
396 | 1168278,3,1,1,1,2,1,2,1,1,2
397 | 1176187,3,1,1,1,2,1,3,1,1,2
398 | 1196263,4,1,1,1,2,1,1,1,1,2
399 | 1196475,3,2,1,1,2,1,2,2,1,2
400 | 1206314,1,2,3,1,2,1,1,1,1,2
401 | 1211265,3,10,8,7,6,9,9,3,8,4
402 | 1213784,3,1,1,1,2,1,1,1,1,2
403 | 1223003,5,3,3,1,2,1,2,1,1,2
404 | 1223306,3,1,1,1,2,4,1,1,1,2
405 | 1223543,1,2,1,3,2,1,1,2,1,2
406 | 1229929,1,1,1,1,2,1,2,1,1,2
407 | 1231853,4,2,2,1,2,1,2,1,1,2
408 | 1234554,1,1,1,1,2,1,2,1,1,2
409 | 1236837,2,3,2,2,2,2,3,1,1,2
410 | 1237674,3,1,2,1,2,1,2,1,1,2
411 | 1238021,1,1,1,1,2,1,2,1,1,2
412 | 1238464,1,1,1,1,1,?,2,1,1,2
413 | 1238633,10,10,10,6,8,4,8,5,1,4
414 | 1238915,5,1,2,1,2,1,3,1,1,2
415 | 1238948,8,5,6,2,3,10,6,6,1,4
416 | 1239232,3,3,2,6,3,3,3,5,1,2
417 | 1239347,8,7,8,5,10,10,7,2,1,4
418 | 1239967,1,1,1,1,2,1,2,1,1,2
419 | 1240337,5,2,2,2,2,2,3,2,2,2
420 | 1253505,2,3,1,1,5,1,1,1,1,2
421 | 1255384,3,2,2,3,2,3,3,1,1,2
422 | 1257200,10,10,10,7,10,10,8,2,1,4
423 | 1257648,4,3,3,1,2,1,3,3,1,2
424 | 1257815,5,1,3,1,2,1,2,1,1,2
425 | 1257938,3,1,1,1,2,1,1,1,1,2
426 | 1258549,9,10,10,10,10,10,10,10,1,4
427 | 1258556,5,3,6,1,2,1,1,1,1,2
428 | 1266154,8,7,8,2,4,2,5,10,1,4
429 | 1272039,1,1,1,1,2,1,2,1,1,2
430 | 1276091,2,1,1,1,2,1,2,1,1,2
431 | 1276091,1,3,1,1,2,1,2,2,1,2
432 | 1276091,5,1,1,3,4,1,3,2,1,2
433 | 1277629,5,1,1,1,2,1,2,2,1,2
434 | 1293439,3,2,2,3,2,1,1,1,1,2
435 | 1293439,6,9,7,5,5,8,4,2,1,2
436 | 1294562,10,8,10,1,3,10,5,1,1,4
437 | 1295186,10,10,10,1,6,1,2,8,1,4
438 | 527337,4,1,1,1,2,1,1,1,1,2
439 | 558538,4,1,3,3,2,1,1,1,1,2
440 | 566509,5,1,1,1,2,1,1,1,1,2
441 | 608157,10,4,3,10,4,10,10,1,1,4
442 | 677910,5,2,2,4,2,4,1,1,1,2
443 | 734111,1,1,1,3,2,3,1,1,1,2
444 | 734111,1,1,1,1,2,2,1,1,1,2
445 | 780555,5,1,1,6,3,1,2,1,1,2
446 | 827627,2,1,1,1,2,1,1,1,1,2
447 | 1049837,1,1,1,1,2,1,1,1,1,2
448 | 1058849,5,1,1,1,2,1,1,1,1,2
449 | 1182404,1,1,1,1,1,1,1,1,1,2
450 | 1193544,5,7,9,8,6,10,8,10,1,4
451 | 1201870,4,1,1,3,1,1,2,1,1,2
452 | 1202253,5,1,1,1,2,1,1,1,1,2
453 | 1227081,3,1,1,3,2,1,1,1,1,2
454 | 1230994,4,5,5,8,6,10,10,7,1,4
455 | 1238410,2,3,1,1,3,1,1,1,1,2
456 | 1246562,10,2,2,1,2,6,1,1,2,4
457 | 1257470,10,6,5,8,5,10,8,6,1,4
458 | 1259008,8,8,9,6,6,3,10,10,1,4
459 | 1266124,5,1,2,1,2,1,1,1,1,2
460 | 1267898,5,1,3,1,2,1,1,1,1,2
461 | 1268313,5,1,1,3,2,1,1,1,1,2
462 | 1268804,3,1,1,1,2,5,1,1,1,2
463 | 1276091,6,1,1,3,2,1,1,1,1,2
464 | 1280258,4,1,1,1,2,1,1,2,1,2
465 | 1293966,4,1,1,1,2,1,1,1,1,2
466 | 1296572,10,9,8,7,6,4,7,10,3,4
467 | 1298416,10,6,6,2,4,10,9,7,1,4
468 | 1299596,6,6,6,5,4,10,7,6,2,4
469 | 1105524,4,1,1,1,2,1,1,1,1,2
470 | 1181685,1,1,2,1,2,1,2,1,1,2
471 | 1211594,3,1,1,1,1,1,2,1,1,2
472 | 1238777,6,1,1,3,2,1,1,1,1,2
473 | 1257608,6,1,1,1,1,1,1,1,1,2
474 | 1269574,4,1,1,1,2,1,1,1,1,2
475 | 1277145,5,1,1,1,2,1,1,1,1,2
476 | 1287282,3,1,1,1,2,1,1,1,1,2
477 | 1296025,4,1,2,1,2,1,1,1,1,2
478 | 1296263,4,1,1,1,2,1,1,1,1,2
479 | 1296593,5,2,1,1,2,1,1,1,1,2
480 | 1299161,4,8,7,10,4,10,7,5,1,4
481 | 1301945,5,1,1,1,1,1,1,1,1,2
482 | 1302428,5,3,2,4,2,1,1,1,1,2
483 | 1318169,9,10,10,10,10,5,10,10,10,4
484 | 474162,8,7,8,5,5,10,9,10,1,4
485 | 787451,5,1,2,1,2,1,1,1,1,2
486 | 1002025,1,1,1,3,1,3,1,1,1,2
487 | 1070522,3,1,1,1,1,1,2,1,1,2
488 | 1073960,10,10,10,10,6,10,8,1,5,4
489 | 1076352,3,6,4,10,3,3,3,4,1,4
490 | 1084139,6,3,2,1,3,4,4,1,1,4
491 | 1115293,1,1,1,1,2,1,1,1,1,2
492 | 1119189,5,8,9,4,3,10,7,1,1,4
493 | 1133991,4,1,1,1,1,1,2,1,1,2
494 | 1142706,5,10,10,10,6,10,6,5,2,4
495 | 1155967,5,1,2,10,4,5,2,1,1,2
496 | 1170945,3,1,1,1,1,1,2,1,1,2
497 | 1181567,1,1,1,1,1,1,1,1,1,2
498 | 1182404,4,2,1,1,2,1,1,1,1,2
499 | 1204558,4,1,1,1,2,1,2,1,1,2
500 | 1217952,4,1,1,1,2,1,2,1,1,2
501 | 1224565,6,1,1,1,2,1,3,1,1,2
502 | 1238186,4,1,1,1,2,1,2,1,1,2
503 | 1253917,4,1,1,2,2,1,2,1,1,2
504 | 1265899,4,1,1,1,2,1,3,1,1,2
505 | 1268766,1,1,1,1,2,1,1,1,1,2
506 | 1277268,3,3,1,1,2,1,1,1,1,2
507 | 1286943,8,10,10,10,7,5,4,8,7,4
508 | 1295508,1,1,1,1,2,4,1,1,1,2
509 | 1297327,5,1,1,1,2,1,1,1,1,2
510 | 1297522,2,1,1,1,2,1,1,1,1,2
511 | 1298360,1,1,1,1,2,1,1,1,1,2
512 | 1299924,5,1,1,1,2,1,2,1,1,2
513 | 1299994,5,1,1,1,2,1,1,1,1,2
514 | 1304595,3,1,1,1,1,1,2,1,1,2
515 | 1306282,6,6,7,10,3,10,8,10,2,4
516 | 1313325,4,10,4,7,3,10,9,10,1,4
517 | 1320077,1,1,1,1,1,1,1,1,1,2
518 | 1320077,1,1,1,1,1,1,2,1,1,2
519 | 1320304,3,1,2,2,2,1,1,1,1,2
520 | 1330439,4,7,8,3,4,10,9,1,1,4
521 | 333093,1,1,1,1,3,1,1,1,1,2
522 | 369565,4,1,1,1,3,1,1,1,1,2
523 | 412300,10,4,5,4,3,5,7,3,1,4
524 | 672113,7,5,6,10,4,10,5,3,1,4
525 | 749653,3,1,1,1,2,1,2,1,1,2
526 | 769612,3,1,1,2,2,1,1,1,1,2
527 | 769612,4,1,1,1,2,1,1,1,1,2
528 | 798429,4,1,1,1,2,1,3,1,1,2
529 | 807657,6,1,3,2,2,1,1,1,1,2
530 | 8233704,4,1,1,1,1,1,2,1,1,2
531 | 837480,7,4,4,3,4,10,6,9,1,4
532 | 867392,4,2,2,1,2,1,2,1,1,2
533 | 869828,1,1,1,1,1,1,3,1,1,2
534 | 1043068,3,1,1,1,2,1,2,1,1,2
535 | 1056171,2,1,1,1,2,1,2,1,1,2
536 | 1061990,1,1,3,2,2,1,3,1,1,2
537 | 1113061,5,1,1,1,2,1,3,1,1,2
538 | 1116192,5,1,2,1,2,1,3,1,1,2
539 | 1135090,4,1,1,1,2,1,2,1,1,2
540 | 1145420,6,1,1,1,2,1,2,1,1,2
541 | 1158157,5,1,1,1,2,2,2,1,1,2
542 | 1171578,3,1,1,1,2,1,1,1,1,2
543 | 1174841,5,3,1,1,2,1,1,1,1,2
544 | 1184586,4,1,1,1,2,1,2,1,1,2
545 | 1186936,2,1,3,2,2,1,2,1,1,2
546 | 1197527,5,1,1,1,2,1,2,1,1,2
547 | 1222464,6,10,10,10,4,10,7,10,1,4
548 | 1240603,2,1,1,1,1,1,1,1,1,2
549 | 1240603,3,1,1,1,1,1,1,1,1,2
550 | 1241035,7,8,3,7,4,5,7,8,2,4
551 | 1287971,3,1,1,1,2,1,2,1,1,2
552 | 1289391,1,1,1,1,2,1,3,1,1,2
553 | 1299924,3,2,2,2,2,1,4,2,1,2
554 | 1306339,4,4,2,1,2,5,2,1,2,2
555 | 1313658,3,1,1,1,2,1,1,1,1,2
556 | 1313982,4,3,1,1,2,1,4,8,1,2
557 | 1321264,5,2,2,2,1,1,2,1,1,2
558 | 1321321,5,1,1,3,2,1,1,1,1,2
559 | 1321348,2,1,1,1,2,1,2,1,1,2
560 | 1321931,5,1,1,1,2,1,2,1,1,2
561 | 1321942,5,1,1,1,2,1,3,1,1,2
562 | 1321942,5,1,1,1,2,1,3,1,1,2
563 | 1328331,1,1,1,1,2,1,3,1,1,2
564 | 1328755,3,1,1,1,2,1,2,1,1,2
565 | 1331405,4,1,1,1,2,1,3,2,1,2
566 | 1331412,5,7,10,10,5,10,10,10,1,4
567 | 1333104,3,1,2,1,2,1,3,1,1,2
568 | 1334071,4,1,1,1,2,3,2,1,1,2
569 | 1343068,8,4,4,1,6,10,2,5,2,4
570 | 1343374,10,10,8,10,6,5,10,3,1,4
571 | 1344121,8,10,4,4,8,10,8,2,1,4
572 | 142932,7,6,10,5,3,10,9,10,2,4
573 | 183936,3,1,1,1,2,1,2,1,1,2
574 | 324382,1,1,1,1,2,1,2,1,1,2
575 | 378275,10,9,7,3,4,2,7,7,1,4
576 | 385103,5,1,2,1,2,1,3,1,1,2
577 | 690557,5,1,1,1,2,1,2,1,1,2
578 | 695091,1,1,1,1,2,1,2,1,1,2
579 | 695219,1,1,1,1,2,1,2,1,1,2
580 | 824249,1,1,1,1,2,1,3,1,1,2
581 | 871549,5,1,2,1,2,1,2,1,1,2
582 | 878358,5,7,10,6,5,10,7,5,1,4
583 | 1107684,6,10,5,5,4,10,6,10,1,4
584 | 1115762,3,1,1,1,2,1,1,1,1,2
585 | 1217717,5,1,1,6,3,1,1,1,1,2
586 | 1239420,1,1,1,1,2,1,1,1,1,2
587 | 1254538,8,10,10,10,6,10,10,10,1,4
588 | 1261751,5,1,1,1,2,1,2,2,1,2
589 | 1268275,9,8,8,9,6,3,4,1,1,4
590 | 1272166,5,1,1,1,2,1,1,1,1,2
591 | 1294261,4,10,8,5,4,1,10,1,1,4
592 | 1295529,2,5,7,6,4,10,7,6,1,4
593 | 1298484,10,3,4,5,3,10,4,1,1,4
594 | 1311875,5,1,2,1,2,1,1,1,1,2
595 | 1315506,4,8,6,3,4,10,7,1,1,4
596 | 1320141,5,1,1,1,2,1,2,1,1,2
597 | 1325309,4,1,2,1,2,1,2,1,1,2
598 | 1333063,5,1,3,1,2,1,3,1,1,2
599 | 1333495,3,1,1,1,2,1,2,1,1,2
600 | 1334659,5,2,4,1,1,1,1,1,1,2
601 | 1336798,3,1,1,1,2,1,2,1,1,2
602 | 1344449,1,1,1,1,1,1,2,1,1,2
603 | 1350568,4,1,1,1,2,1,2,1,1,2
604 | 1352663,5,4,6,8,4,1,8,10,1,4
605 | 188336,5,3,2,8,5,10,8,1,2,4
606 | 352431,10,5,10,3,5,8,7,8,3,4
607 | 353098,4,1,1,2,2,1,1,1,1,2
608 | 411453,1,1,1,1,2,1,1,1,1,2
609 | 557583,5,10,10,10,10,10,10,1,1,4
610 | 636375,5,1,1,1,2,1,1,1,1,2
611 | 736150,10,4,3,10,3,10,7,1,2,4
612 | 803531,5,10,10,10,5,2,8,5,1,4
613 | 822829,8,10,10,10,6,10,10,10,10,4
614 | 1016634,2,3,1,1,2,1,2,1,1,2
615 | 1031608,2,1,1,1,1,1,2,1,1,2
616 | 1041043,4,1,3,1,2,1,2,1,1,2
617 | 1042252,3,1,1,1,2,1,2,1,1,2
618 | 1057067,1,1,1,1,1,?,1,1,1,2
619 | 1061990,4,1,1,1,2,1,2,1,1,2
620 | 1073836,5,1,1,1,2,1,2,1,1,2
621 | 1083817,3,1,1,1,2,1,2,1,1,2
622 | 1096352,6,3,3,3,3,2,6,1,1,2
623 | 1140597,7,1,2,3,2,1,2,1,1,2
624 | 1149548,1,1,1,1,2,1,1,1,1,2
625 | 1174009,5,1,1,2,1,1,2,1,1,2
626 | 1183596,3,1,3,1,3,4,1,1,1,2
627 | 1190386,4,6,6,5,7,6,7,7,3,4
628 | 1190546,2,1,1,1,2,5,1,1,1,2
629 | 1213273,2,1,1,1,2,1,1,1,1,2
630 | 1218982,4,1,1,1,2,1,1,1,1,2
631 | 1225382,6,2,3,1,2,1,1,1,1,2
632 | 1235807,5,1,1,1,2,1,2,1,1,2
633 | 1238777,1,1,1,1,2,1,1,1,1,2
634 | 1253955,8,7,4,4,5,3,5,10,1,4
635 | 1257366,3,1,1,1,2,1,1,1,1,2
636 | 1260659,3,1,4,1,2,1,1,1,1,2
637 | 1268952,10,10,7,8,7,1,10,10,3,4
638 | 1275807,4,2,4,3,2,2,2,1,1,2
639 | 1277792,4,1,1,1,2,1,1,1,1,2
640 | 1277792,5,1,1,3,2,1,1,1,1,2
641 | 1285722,4,1,1,3,2,1,1,1,1,2
642 | 1288608,3,1,1,1,2,1,2,1,1,2
643 | 1290203,3,1,1,1,2,1,2,1,1,2
644 | 1294413,1,1,1,1,2,1,1,1,1,2
645 | 1299596,2,1,1,1,2,1,1,1,1,2
646 | 1303489,3,1,1,1,2,1,2,1,1,2
647 | 1311033,1,2,2,1,2,1,1,1,1,2
648 | 1311108,1,1,1,3,2,1,1,1,1,2
649 | 1315807,5,10,10,10,10,2,10,10,10,4
650 | 1318671,3,1,1,1,2,1,2,1,1,2
651 | 1319609,3,1,1,2,3,4,1,1,1,2
652 | 1323477,1,2,1,3,2,1,2,1,1,2
653 | 1324572,5,1,1,1,2,1,2,2,1,2
654 | 1324681,4,1,1,1,2,1,2,1,1,2
655 | 1325159,3,1,1,1,2,1,3,1,1,2
656 | 1326892,3,1,1,1,2,1,2,1,1,2
657 | 1330361,5,1,1,1,2,1,2,1,1,2
658 | 1333877,5,4,5,1,8,1,3,6,1,2
659 | 1334015,7,8,8,7,3,10,7,2,3,4
660 | 1334667,1,1,1,1,2,1,1,1,1,2
661 | 1339781,1,1,1,1,2,1,2,1,1,2
662 | 1339781,4,1,1,1,2,1,3,1,1,2
663 | 13454352,1,1,3,1,2,1,2,1,1,2
664 | 1345452,1,1,3,1,2,1,2,1,1,2
665 | 1345593,3,1,1,3,2,1,2,1,1,2
666 | 1347749,1,1,1,1,2,1,1,1,1,2
667 | 1347943,5,2,2,2,2,1,1,1,2,2
668 | 1348851,3,1,1,1,2,1,3,1,1,2
669 | 1350319,5,7,4,1,6,1,7,10,3,4
670 | 1350423,5,10,10,8,5,5,7,10,1,4
671 | 1352848,3,10,7,8,5,8,7,4,1,4
672 | 1353092,3,2,1,2,2,1,3,1,1,2
673 | 1354840,2,1,1,1,2,1,3,1,1,2
674 | 1354840,5,3,2,1,3,1,1,1,1,2
675 | 1355260,1,1,1,1,2,1,2,1,1,2
676 | 1365075,4,1,4,1,2,1,1,1,1,2
677 | 1365328,1,1,2,1,2,1,2,1,1,2
678 | 1368267,5,1,1,1,2,1,1,1,1,2
679 | 1368273,1,1,1,1,2,1,1,1,1,2
680 | 1368882,2,1,1,1,2,1,1,1,1,2
681 | 1369821,10,10,10,10,5,10,10,10,7,4
682 | 1371026,5,10,10,10,4,10,5,6,3,4
683 | 1371920,5,1,1,1,2,1,3,2,1,2
684 | 466906,1,1,1,1,2,1,1,1,1,2
685 | 466906,1,1,1,1,2,1,1,1,1,2
686 | 534555,1,1,1,1,2,1,1,1,1,2
687 | 536708,1,1,1,1,2,1,1,1,1,2
688 | 566346,3,1,1,1,2,1,2,3,1,2
689 | 603148,4,1,1,1,2,1,1,1,1,2
690 | 654546,1,1,1,1,2,1,1,1,8,2
691 | 654546,1,1,1,3,2,1,1,1,1,2
692 | 695091,5,10,10,5,4,5,4,4,1,4
693 | 714039,3,1,1,1,2,1,1,1,1,2
694 | 763235,3,1,1,1,2,1,2,1,2,2
695 | 776715,3,1,1,1,3,2,1,1,1,2
696 | 841769,2,1,1,1,2,1,1,1,1,2
697 | 888820,5,10,10,3,7,3,8,10,2,4
698 | 897471,4,8,6,4,3,4,10,6,1,4
699 | 897471,4,8,8,5,4,5,10,4,1,4
700 |
--------------------------------------------------------------------------------
/data/ecoli.data:
--------------------------------------------------------------------------------
1 | AAT_ECOLI 0.49 0.29 0.48 0.50 0.56 0.24 0.35 cp
2 | ACEA_ECOLI 0.07 0.40 0.48 0.50 0.54 0.35 0.44 cp
3 | ACEK_ECOLI 0.56 0.40 0.48 0.50 0.49 0.37 0.46 cp
4 | ACKA_ECOLI 0.59 0.49 0.48 0.50 0.52 0.45 0.36 cp
5 | ADI_ECOLI 0.23 0.32 0.48 0.50 0.55 0.25 0.35 cp
6 | ALKH_ECOLI 0.67 0.39 0.48 0.50 0.36 0.38 0.46 cp
7 | AMPD_ECOLI 0.29 0.28 0.48 0.50 0.44 0.23 0.34 cp
8 | AMY2_ECOLI 0.21 0.34 0.48 0.50 0.51 0.28 0.39 cp
9 | APT_ECOLI 0.20 0.44 0.48 0.50 0.46 0.51 0.57 cp
10 | ARAC_ECOLI 0.42 0.40 0.48 0.50 0.56 0.18 0.30 cp
11 | ASG1_ECOLI 0.42 0.24 0.48 0.50 0.57 0.27 0.37 cp
12 | BTUR_ECOLI 0.25 0.48 0.48 0.50 0.44 0.17 0.29 cp
13 | CAFA_ECOLI 0.39 0.32 0.48 0.50 0.46 0.24 0.35 cp
14 | CAIB_ECOLI 0.51 0.50 0.48 0.50 0.46 0.32 0.35 cp
15 | CFA_ECOLI 0.22 0.43 0.48 0.50 0.48 0.16 0.28 cp
16 | CHEA_ECOLI 0.25 0.40 0.48 0.50 0.46 0.44 0.52 cp
17 | CHEB_ECOLI 0.34 0.45 0.48 0.50 0.38 0.24 0.35 cp
18 | CHEW_ECOLI 0.44 0.27 0.48 0.50 0.55 0.52 0.58 cp
19 | CHEY_ECOLI 0.23 0.40 0.48 0.50 0.39 0.28 0.38 cp
20 | CHEZ_ECOLI 0.41 0.57 0.48 0.50 0.39 0.21 0.32 cp
21 | CRL_ECOLI 0.40 0.45 0.48 0.50 0.38 0.22 0.00 cp
22 | CSPA_ECOLI 0.31 0.23 0.48 0.50 0.73 0.05 0.14 cp
23 | CYNR_ECOLI 0.51 0.54 0.48 0.50 0.41 0.34 0.43 cp
24 | CYPB_ECOLI 0.30 0.16 0.48 0.50 0.56 0.11 0.23 cp
25 | CYPC_ECOLI 0.36 0.39 0.48 0.50 0.48 0.22 0.23 cp
26 | CYSB_ECOLI 0.29 0.37 0.48 0.50 0.48 0.44 0.52 cp
27 | CYSE_ECOLI 0.25 0.40 0.48 0.50 0.47 0.33 0.42 cp
28 | DAPD_ECOLI 0.21 0.51 0.48 0.50 0.50 0.32 0.41 cp
29 | DCP_ECOLI 0.43 0.37 0.48 0.50 0.53 0.35 0.44 cp
30 | DDLA_ECOLI 0.43 0.39 0.48 0.50 0.47 0.31 0.41 cp
31 | DDLB_ECOLI 0.53 0.38 0.48 0.50 0.44 0.26 0.36 cp
32 | DEOC_ECOLI 0.34 0.33 0.48 0.50 0.38 0.35 0.44 cp
33 | DLDH_ECOLI 0.56 0.51 0.48 0.50 0.34 0.37 0.46 cp
34 | EFG_ECOLI 0.40 0.29 0.48 0.50 0.42 0.35 0.44 cp
35 | EFTS_ECOLI 0.24 0.35 0.48 0.50 0.31 0.19 0.31 cp
36 | EFTU_ECOLI 0.36 0.54 0.48 0.50 0.41 0.38 0.46 cp
37 | ENO_ECOLI 0.29 0.52 0.48 0.50 0.42 0.29 0.39 cp
38 | FABB_ECOLI 0.65 0.47 0.48 0.50 0.59 0.30 0.40 cp
39 | FES_ECOLI 0.32 0.42 0.48 0.50 0.35 0.28 0.38 cp
40 | G3P1_ECOLI 0.38 0.46 0.48 0.50 0.48 0.22 0.29 cp
41 | G3P2_ECOLI 0.33 0.45 0.48 0.50 0.52 0.32 0.41 cp
42 | G6PI_ECOLI 0.30 0.37 0.48 0.50 0.59 0.41 0.49 cp
43 | GCVA_ECOLI 0.40 0.50 0.48 0.50 0.45 0.39 0.47 cp
44 | GLNA_ECOLI 0.28 0.38 0.48 0.50 0.50 0.33 0.42 cp
45 | GLPD_ECOLI 0.61 0.45 0.48 0.50 0.48 0.35 0.41 cp
46 | GLYA_ECOLI 0.17 0.38 0.48 0.50 0.45 0.42 0.50 cp
47 | GSHR_ECOLI 0.44 0.35 0.48 0.50 0.55 0.55 0.61 cp
48 | GT_ECOLI 0.43 0.40 0.48 0.50 0.39 0.28 0.39 cp
49 | HEM6_ECOLI 0.42 0.35 0.48 0.50 0.58 0.15 0.27 cp
50 | HEMN_ECOLI 0.23 0.33 0.48 0.50 0.43 0.33 0.43 cp
51 | HPRT_ECOLI 0.37 0.52 0.48 0.50 0.42 0.42 0.36 cp
52 | IF1_ECOLI 0.29 0.30 0.48 0.50 0.45 0.03 0.17 cp
53 | IF2_ECOLI 0.22 0.36 0.48 0.50 0.35 0.39 0.47 cp
54 | ILVY_ECOLI 0.23 0.58 0.48 0.50 0.37 0.53 0.59 cp
55 | IPYR_ECOLI 0.47 0.47 0.48 0.50 0.22 0.16 0.26 cp
56 | KAD_ECOLI 0.54 0.47 0.48 0.50 0.28 0.33 0.42 cp
57 | KDSA_ECOLI 0.51 0.37 0.48 0.50 0.35 0.36 0.45 cp
58 | LEU3_ECOLI 0.40 0.35 0.48 0.50 0.45 0.33 0.42 cp
59 | LON_ECOLI 0.44 0.34 0.48 0.50 0.30 0.33 0.43 cp
60 | LPLA_ECOLI 0.42 0.38 0.48 0.50 0.54 0.34 0.43 cp
61 | LYSR_ECOLI 0.44 0.56 0.48 0.50 0.50 0.46 0.54 cp
62 | MALQ_ECOLI 0.52 0.36 0.48 0.50 0.41 0.28 0.38 cp
63 | MALZ_ECOLI 0.36 0.41 0.48 0.50 0.48 0.47 0.54 cp
64 | MASY_ECOLI 0.18 0.30 0.48 0.50 0.46 0.24 0.35 cp
65 | METB_ECOLI 0.47 0.29 0.48 0.50 0.51 0.33 0.43 cp
66 | METC_ECOLI 0.24 0.43 0.48 0.50 0.54 0.52 0.59 cp
67 | METK_ECOLI 0.25 0.37 0.48 0.50 0.41 0.33 0.42 cp
68 | METR_ECOLI 0.52 0.57 0.48 0.50 0.42 0.47 0.54 cp
69 | METX_ECOLI 0.25 0.37 0.48 0.50 0.43 0.26 0.36 cp
70 | MURF_ECOLI 0.35 0.48 0.48 0.50 0.56 0.40 0.48 cp
71 | NADA_ECOLI 0.26 0.26 0.48 0.50 0.34 0.25 0.35 cp
72 | NFRC_ECOLI 0.44 0.51 0.48 0.50 0.47 0.26 0.36 cp
73 | NHAR_ECOLI 0.37 0.50 0.48 0.50 0.42 0.36 0.45 cp
74 | NIRD_ECOLI 0.44 0.42 0.48 0.50 0.42 0.25 0.20 cp
75 | OMPR_ECOLI 0.24 0.43 0.48 0.50 0.37 0.28 0.38 cp
76 | OTC1_ECOLI 0.42 0.30 0.48 0.50 0.48 0.26 0.36 cp
77 | OTC2_ECOLI 0.48 0.42 0.48 0.50 0.45 0.25 0.35 cp
78 | PEPE_ECOLI 0.41 0.48 0.48 0.50 0.51 0.44 0.51 cp
79 | PFLA_ECOLI 0.44 0.28 0.48 0.50 0.43 0.27 0.37 cp
80 | PFLB_ECOLI 0.29 0.41 0.48 0.50 0.48 0.38 0.46 cp
81 | PGK_ECOLI 0.34 0.28 0.48 0.50 0.41 0.35 0.44 cp
82 | PHOB_ECOLI 0.41 0.43 0.48 0.50 0.45 0.31 0.41 cp
83 | PHOH_ECOLI 0.29 0.47 0.48 0.50 0.41 0.23 0.34 cp
84 | PMBA_ECOLI 0.34 0.55 0.48 0.50 0.58 0.31 0.41 cp
85 | PNP_ECOLI 0.36 0.56 0.48 0.50 0.43 0.45 0.53 cp
86 | PROB_ECOLI 0.40 0.46 0.48 0.50 0.52 0.49 0.56 cp
87 | PT1A_ECOLI 0.50 0.49 0.48 0.50 0.49 0.46 0.53 cp
88 | PT1_ECOLI 0.52 0.44 0.48 0.50 0.37 0.36 0.42 cp
89 | PTCA_ECOLI 0.50 0.51 0.48 0.50 0.27 0.23 0.34 cp
90 | PTCB_ECOLI 0.53 0.42 0.48 0.50 0.16 0.29 0.39 cp
91 | PTFA_ECOLI 0.34 0.46 0.48 0.50 0.52 0.35 0.44 cp
92 | PTGA_ECOLI 0.40 0.42 0.48 0.50 0.37 0.27 0.27 cp
93 | PTHA_ECOLI 0.41 0.43 0.48 0.50 0.50 0.24 0.25 cp
94 | PTHP_ECOLI 0.30 0.45 0.48 0.50 0.36 0.21 0.32 cp
95 | PTKA_ECOLI 0.31 0.47 0.48 0.50 0.29 0.28 0.39 cp
96 | PTKB_ECOLI 0.64 0.76 0.48 0.50 0.45 0.35 0.38 cp
97 | PTNA_ECOLI 0.35 0.37 0.48 0.50 0.30 0.34 0.43 cp
98 | PTWB_ECOLI 0.57 0.54 0.48 0.50 0.37 0.28 0.33 cp
99 | PTWX_ECOLI 0.65 0.55 0.48 0.50 0.34 0.37 0.28 cp
100 | RHAR_ECOLI 0.51 0.46 0.48 0.50 0.58 0.31 0.41 cp
101 | RHAS_ECOLI 0.38 0.40 0.48 0.50 0.63 0.25 0.35 cp
102 | RIMI_ECOLI 0.24 0.57 0.48 0.50 0.63 0.34 0.43 cp
103 | RIMJ_ECOLI 0.38 0.26 0.48 0.50 0.54 0.16 0.28 cp
104 | RIML_ECOLI 0.33 0.47 0.48 0.50 0.53 0.18 0.29 cp
105 | RNB_ECOLI 0.24 0.34 0.48 0.50 0.38 0.30 0.40 cp
106 | RNC_ECOLI 0.26 0.50 0.48 0.50 0.44 0.32 0.41 cp
107 | RND_ECOLI 0.44 0.49 0.48 0.50 0.39 0.38 0.40 cp
108 | RNE_ECOLI 0.43 0.32 0.48 0.50 0.33 0.45 0.52 cp
109 | SERC_ECOLI 0.49 0.43 0.48 0.50 0.49 0.30 0.40 cp
110 | SLYD_ECOLI 0.47 0.28 0.48 0.50 0.56 0.20 0.25 cp
111 | SOXS_ECOLI 0.32 0.33 0.48 0.50 0.60 0.06 0.20 cp
112 | SYA_ECOLI 0.34 0.35 0.48 0.50 0.51 0.49 0.56 cp
113 | SYC_ECOLI 0.35 0.34 0.48 0.50 0.46 0.30 0.27 cp
114 | SYD_ECOLI 0.38 0.30 0.48 0.50 0.43 0.29 0.39 cp
115 | SYE_ECOLI 0.38 0.44 0.48 0.50 0.43 0.20 0.31 cp
116 | SYFA_ECOLI 0.41 0.51 0.48 0.50 0.58 0.20 0.31 cp
117 | SYFB_ECOLI 0.34 0.42 0.48 0.50 0.41 0.34 0.43 cp
118 | SYGA_ECOLI 0.51 0.49 0.48 0.50 0.53 0.14 0.26 cp
119 | SYGB_ECOLI 0.25 0.51 0.48 0.50 0.37 0.42 0.50 cp
120 | SYH_ECOLI 0.29 0.28 0.48 0.50 0.50 0.42 0.50 cp
121 | SYI_ECOLI 0.25 0.26 0.48 0.50 0.39 0.32 0.42 cp
122 | SYK1_ECOLI 0.24 0.41 0.48 0.50 0.49 0.23 0.34 cp
123 | SYK2_ECOLI 0.17 0.39 0.48 0.50 0.53 0.30 0.39 cp
124 | SYL_ECOLI 0.04 0.31 0.48 0.50 0.41 0.29 0.39 cp
125 | SYM_ECOLI 0.61 0.36 0.48 0.50 0.49 0.35 0.44 cp
126 | SYP_ECOLI 0.34 0.51 0.48 0.50 0.44 0.37 0.46 cp
127 | SYQ_ECOLI 0.28 0.33 0.48 0.50 0.45 0.22 0.33 cp
128 | SYR_ECOLI 0.40 0.46 0.48 0.50 0.42 0.35 0.44 cp
129 | SYS_ECOLI 0.23 0.34 0.48 0.50 0.43 0.26 0.37 cp
130 | SYT_ECOLI 0.37 0.44 0.48 0.50 0.42 0.39 0.47 cp
131 | SYV_ECOLI 0.00 0.38 0.48 0.50 0.42 0.48 0.55 cp
132 | SYW_ECOLI 0.39 0.31 0.48 0.50 0.38 0.34 0.43 cp
133 | SYY_ECOLI 0.30 0.44 0.48 0.50 0.49 0.22 0.33 cp
134 | THGA_ECOLI 0.27 0.30 0.48 0.50 0.71 0.28 0.39 cp
135 | THIK_ECOLI 0.17 0.52 0.48 0.50 0.49 0.37 0.46 cp
136 | TYRB_ECOLI 0.36 0.42 0.48 0.50 0.53 0.32 0.41 cp
137 | UBIC_ECOLI 0.30 0.37 0.48 0.50 0.43 0.18 0.30 cp
138 | UGPQ_ECOLI 0.26 0.40 0.48 0.50 0.36 0.26 0.37 cp
139 | USPA_ECOLI 0.40 0.41 0.48 0.50 0.55 0.22 0.33 cp
140 | UVRB_ECOLI 0.22 0.34 0.48 0.50 0.42 0.29 0.39 cp
141 | UVRC_ECOLI 0.44 0.35 0.48 0.50 0.44 0.52 0.59 cp
142 | XGPT_ECOLI 0.27 0.42 0.48 0.50 0.37 0.38 0.43 cp
143 | XYLA_ECOLI 0.16 0.43 0.48 0.50 0.54 0.27 0.37 cp
144 | EMRA_ECOLI 0.06 0.61 0.48 0.50 0.49 0.92 0.37 im
145 | AAS_ECOLI 0.44 0.52 0.48 0.50 0.43 0.47 0.54 im
146 | AMPE_ECOLI 0.63 0.47 0.48 0.50 0.51 0.82 0.84 im
147 | ARAE_ECOLI 0.23 0.48 0.48 0.50 0.59 0.88 0.89 im
148 | ARAH_ECOLI 0.34 0.49 0.48 0.50 0.58 0.85 0.80 im
149 | AROP_ECOLI 0.43 0.40 0.48 0.50 0.58 0.75 0.78 im
150 | ATKB_ECOLI 0.46 0.61 0.48 0.50 0.48 0.86 0.87 im
151 | ATP6_ECOLI 0.27 0.35 0.48 0.50 0.51 0.77 0.79 im
152 | BETT_ECOLI 0.52 0.39 0.48 0.50 0.65 0.71 0.73 im
153 | CODB_ECOLI 0.29 0.47 0.48 0.50 0.71 0.65 0.69 im
154 | CYDA_ECOLI 0.55 0.47 0.48 0.50 0.57 0.78 0.80 im
155 | CYOC_ECOLI 0.12 0.67 0.48 0.50 0.74 0.58 0.63 im
156 | CYOD_ECOLI 0.40 0.50 0.48 0.50 0.65 0.82 0.84 im
157 | DCTA_ECOLI 0.73 0.36 0.48 0.50 0.53 0.91 0.92 im
158 | DHG_ECOLI 0.84 0.44 0.48 0.50 0.48 0.71 0.74 im
159 | DHSC_ECOLI 0.48 0.45 0.48 0.50 0.60 0.78 0.80 im
160 | DHSD_ECOLI 0.54 0.49 0.48 0.50 0.40 0.87 0.88 im
161 | DPPC_ECOLI 0.48 0.41 0.48 0.50 0.51 0.90 0.88 im
162 | DSBB_ECOLI 0.50 0.66 0.48 0.50 0.31 0.92 0.92 im
163 | ENVZ_ECOLI 0.72 0.46 0.48 0.50 0.51 0.66 0.70 im
164 | EXBB_ECOLI 0.47 0.55 0.48 0.50 0.58 0.71 0.75 im
165 | FRDC_ECOLI 0.33 0.56 0.48 0.50 0.33 0.78 0.80 im
166 | FRDD_ECOLI 0.64 0.58 0.48 0.50 0.48 0.78 0.73 im
167 | FTSW_ECOLI 0.54 0.57 0.48 0.50 0.56 0.81 0.83 im
168 | GABP_ECOLI 0.47 0.59 0.48 0.50 0.52 0.76 0.79 im
169 | GALP_ECOLI 0.63 0.50 0.48 0.50 0.59 0.85 0.86 im
170 | GLNP_ECOLI 0.49 0.42 0.48 0.50 0.53 0.79 0.81 im
171 | GLPT_ECOLI 0.31 0.50 0.48 0.50 0.57 0.84 0.85 im
172 | GLTP_ECOLI 0.74 0.44 0.48 0.50 0.55 0.88 0.89 im
173 | KDGL_ECOLI 0.33 0.45 0.48 0.50 0.45 0.88 0.89 im
174 | KGTP_ECOLI 0.45 0.40 0.48 0.50 0.61 0.74 0.77 im
175 | LACY_ECOLI 0.71 0.40 0.48 0.50 0.71 0.70 0.74 im
176 | LGT_ECOLI 0.50 0.37 0.48 0.50 0.66 0.64 0.69 im
177 | LLDP_ECOLI 0.66 0.53 0.48 0.50 0.59 0.66 0.66 im
178 | LNT_ECOLI 0.60 0.61 0.48 0.50 0.54 0.67 0.71 im
179 | LSPA_ECOLI 0.83 0.37 0.48 0.50 0.61 0.71 0.74 im
180 | LYSP_ECOLI 0.34 0.51 0.48 0.50 0.67 0.90 0.90 im
181 | MALF_ECOLI 0.63 0.54 0.48 0.50 0.65 0.79 0.81 im
182 | MALG_ECOLI 0.70 0.40 0.48 0.50 0.56 0.86 0.83 im
183 | MCP3_ECOLI 0.60 0.50 1.00 0.50 0.54 0.77 0.80 im
184 | MSBB_ECOLI 0.16 0.51 0.48 0.50 0.33 0.39 0.48 im
185 | MTR_ECOLI 0.74 0.70 0.48 0.50 0.66 0.65 0.69 im
186 | NANT_ECOLI 0.20 0.46 0.48 0.50 0.57 0.78 0.81 im
187 | NHAA_ECOLI 0.89 0.55 0.48 0.50 0.51 0.72 0.76 im
188 | NHAB_ECOLI 0.70 0.46 0.48 0.50 0.56 0.78 0.73 im
189 | PHEP_ECOLI 0.12 0.43 0.48 0.50 0.63 0.70 0.74 im
190 | PHOR_ECOLI 0.61 0.52 0.48 0.50 0.54 0.67 0.52 im
191 | PNTA_ECOLI 0.33 0.37 0.48 0.50 0.46 0.65 0.69 im
192 | POTE_ECOLI 0.63 0.65 0.48 0.50 0.66 0.67 0.71 im
193 | PROP_ECOLI 0.41 0.51 0.48 0.50 0.53 0.75 0.78 im
194 | PSTA_ECOLI 0.34 0.67 0.48 0.50 0.52 0.76 0.79 im
195 | PSTC_ECOLI 0.58 0.34 0.48 0.50 0.56 0.87 0.81 im
196 | PTAA_ECOLI 0.59 0.56 0.48 0.50 0.55 0.80 0.82 im
197 | PTBA_ECOLI 0.51 0.40 0.48 0.50 0.57 0.62 0.67 im
198 | PTCC_ECOLI 0.50 0.57 0.48 0.50 0.71 0.61 0.66 im
199 | PTDA_ECOLI 0.60 0.46 0.48 0.50 0.45 0.81 0.83 im
200 | PTFB_ECOLI 0.37 0.47 0.48 0.50 0.39 0.76 0.79 im
201 | PTGB_ECOLI 0.58 0.55 0.48 0.50 0.57 0.70 0.74 im
202 | PTHB_ECOLI 0.36 0.47 0.48 0.50 0.51 0.69 0.72 im
203 | PTMA_ECOLI 0.39 0.41 0.48 0.50 0.52 0.72 0.75 im
204 | PTOA_ECOLI 0.35 0.51 0.48 0.50 0.61 0.71 0.74 im
205 | PTTB_ECOLI 0.31 0.44 0.48 0.50 0.50 0.79 0.82 im
206 | RODA_ECOLI 0.61 0.66 0.48 0.50 0.46 0.87 0.88 im
207 | SECE_ECOLI 0.48 0.49 0.48 0.50 0.52 0.77 0.71 im
208 | SECF_ECOLI 0.11 0.50 0.48 0.50 0.58 0.72 0.68 im
209 | SECY_ECOLI 0.31 0.36 0.48 0.50 0.58 0.94 0.94 im
210 | TNAB_ECOLI 0.68 0.51 0.48 0.50 0.71 0.75 0.78 im
211 | XYLE_ECOLI 0.69 0.39 0.48 0.50 0.57 0.76 0.79 im
212 | YCEE_ECOLI 0.52 0.54 0.48 0.50 0.62 0.76 0.79 im
213 | EXBD_ECOLI 0.46 0.59 0.48 0.50 0.36 0.76 0.23 im
214 | FTSL_ECOLI 0.36 0.45 0.48 0.50 0.38 0.79 0.17 im
215 | FTSN_ECOLI 0.00 0.51 0.48 0.50 0.35 0.67 0.44 im
216 | FTSQ_ECOLI 0.10 0.49 0.48 0.50 0.41 0.67 0.21 im
217 | MOTB_ECOLI 0.30 0.51 0.48 0.50 0.42 0.61 0.34 im
218 | TOLA_ECOLI 0.61 0.47 0.48 0.50 0.00 0.80 0.32 im
219 | TOLQ_ECOLI 0.63 0.75 0.48 0.50 0.64 0.73 0.66 im
220 | EMRB_ECOLI 0.71 0.52 0.48 0.50 0.64 1.00 0.99 im
221 | ATKC_ECOLI 0.85 0.53 0.48 0.50 0.53 0.52 0.35 imS
222 | NFRB_ECOLI 0.63 0.49 0.48 0.50 0.54 0.76 0.79 imS
223 | NLPA_ECOLI 0.75 0.55 1.00 1.00 0.40 0.47 0.30 imL
224 | CYOA_ECOLI 0.70 0.39 1.00 0.50 0.51 0.82 0.84 imL
225 | ATKA_ECOLI 0.72 0.42 0.48 0.50 0.65 0.77 0.79 imU
226 | BCR_ECOLI 0.79 0.41 0.48 0.50 0.66 0.81 0.83 imU
227 | CADB_ECOLI 0.83 0.48 0.48 0.50 0.65 0.76 0.79 imU
228 | CAIT_ECOLI 0.69 0.43 0.48 0.50 0.59 0.74 0.77 imU
229 | CPXA_ECOLI 0.79 0.36 0.48 0.50 0.46 0.82 0.70 imU
230 | CRED_ECOLI 0.78 0.33 0.48 0.50 0.57 0.77 0.79 imU
231 | CYDB_ECOLI 0.75 0.37 0.48 0.50 0.64 0.70 0.74 imU
232 | CYOB_ECOLI 0.59 0.29 0.48 0.50 0.64 0.75 0.77 imU
233 | CYOE_ECOLI 0.67 0.37 0.48 0.50 0.54 0.64 0.68 imU
234 | DMSC_ECOLI 0.66 0.48 0.48 0.50 0.54 0.70 0.74 imU
235 | DPPB_ECOLI 0.64 0.46 0.48 0.50 0.48 0.73 0.76 imU
236 | DSBD_ECOLI 0.76 0.71 0.48 0.50 0.50 0.71 0.75 imU
237 | FEPD_ECOLI 0.84 0.49 0.48 0.50 0.55 0.78 0.74 imU
238 | FEPG_ECOLI 0.77 0.55 0.48 0.50 0.51 0.78 0.74 imU
239 | FTSH_ECOLI 0.81 0.44 0.48 0.50 0.42 0.67 0.68 imU
240 | GLTS_ECOLI 0.58 0.60 0.48 0.50 0.59 0.73 0.76 imU
241 | KEFC_ECOLI 0.63 0.42 0.48 0.50 0.48 0.77 0.80 imU
242 | KUP_ECOLI 0.62 0.42 0.48 0.50 0.58 0.79 0.81 imU
243 | MCP1_ECOLI 0.86 0.39 0.48 0.50 0.59 0.89 0.90 imU
244 | MCP2_ECOLI 0.81 0.53 0.48 0.50 0.57 0.87 0.88 imU
245 | MCP4_ECOLI 0.87 0.49 0.48 0.50 0.61 0.76 0.79 imU
246 | MELB_ECOLI 0.47 0.46 0.48 0.50 0.62 0.74 0.77 imU
247 | MOTA_ECOLI 0.76 0.41 0.48 0.50 0.50 0.59 0.62 imU
248 | NUPC_ECOLI 0.70 0.53 0.48 0.50 0.70 0.86 0.87 imU
249 | NUPG_ECOLI 0.64 0.45 0.48 0.50 0.67 0.61 0.66 imU
250 | PNTB_ECOLI 0.81 0.52 0.48 0.50 0.57 0.78 0.80 imU
251 | PTKC_ECOLI 0.73 0.26 0.48 0.50 0.57 0.75 0.78 imU
252 | RHAT_ECOLI 0.49 0.61 1.00 0.50 0.56 0.71 0.74 imU
253 | SECD_ECOLI 0.88 0.42 0.48 0.50 0.52 0.73 0.75 imU
254 | SECG_ECOLI 0.84 0.54 0.48 0.50 0.75 0.92 0.70 imU
255 | TEHA_ECOLI 0.63 0.51 0.48 0.50 0.64 0.72 0.76 imU
256 | TYRP_ECOLI 0.86 0.55 0.48 0.50 0.63 0.81 0.83 imU
257 | UHPB_ECOLI 0.79 0.54 0.48 0.50 0.50 0.66 0.68 imU
258 | TONB_ECOLI 0.57 0.38 0.48 0.50 0.06 0.49 0.33 imU
259 | LEP_ECOLI 0.78 0.44 0.48 0.50 0.45 0.73 0.68 imU
260 | FADL_ECOLI 0.78 0.68 0.48 0.50 0.83 0.40 0.29 om
261 | FHUA_ECOLI 0.63 0.69 0.48 0.50 0.65 0.41 0.28 om
262 | LAMB_ECOLI 0.67 0.88 0.48 0.50 0.73 0.50 0.25 om
263 | NFRA_ECOLI 0.61 0.75 0.48 0.50 0.51 0.33 0.33 om
264 | NMPC_ECOLI 0.67 0.84 0.48 0.50 0.74 0.54 0.37 om
265 | OMPA_ECOLI 0.74 0.90 0.48 0.50 0.57 0.53 0.29 om
266 | OMPC_ECOLI 0.73 0.84 0.48 0.50 0.86 0.58 0.29 om
267 | OMPF_ECOLI 0.75 0.76 0.48 0.50 0.83 0.57 0.30 om
268 | OMPX_ECOLI 0.77 0.57 0.48 0.50 0.88 0.53 0.20 om
269 | PHOE_ECOLI 0.74 0.78 0.48 0.50 0.75 0.54 0.15 om
270 | TSX_ECOLI 0.68 0.76 0.48 0.50 0.84 0.45 0.27 om
271 | BTUB_ECOLI 0.56 0.68 0.48 0.50 0.77 0.36 0.45 om
272 | CIRA_ECOLI 0.65 0.51 0.48 0.50 0.66 0.54 0.33 om
273 | FECA_ECOLI 0.52 0.81 0.48 0.50 0.72 0.38 0.38 om
274 | FEPA_ECOLI 0.64 0.57 0.48 0.50 0.70 0.33 0.26 om
275 | FHUE_ECOLI 0.60 0.76 1.00 0.50 0.77 0.59 0.52 om
276 | OMPP_ECOLI 0.69 0.59 0.48 0.50 0.77 0.39 0.21 om
277 | OMPT_ECOLI 0.63 0.49 0.48 0.50 0.79 0.45 0.28 om
278 | TOLC_ECOLI 0.71 0.71 0.48 0.50 0.68 0.43 0.36 om
279 | PA1_ECOLI 0.68 0.63 0.48 0.50 0.73 0.40 0.30 om
280 | MULI_ECOLI 0.77 0.57 1.00 0.50 0.37 0.54 0.01 omL
281 | NLPB_ECOLI 0.66 0.49 1.00 0.50 0.54 0.56 0.36 omL
282 | NLPE_ECOLI 0.71 0.46 1.00 0.50 0.52 0.59 0.30 omL
283 | PAL_ECOLI 0.67 0.55 1.00 0.50 0.66 0.58 0.16 omL
284 | SLP_ECOLI 0.68 0.49 1.00 0.50 0.62 0.55 0.28 omL
285 | AGP_ECOLI 0.74 0.49 0.48 0.50 0.42 0.54 0.36 pp
286 | AMY1_ECOLI 0.70 0.61 0.48 0.50 0.56 0.52 0.43 pp
287 | ARAF_ECOLI 0.66 0.86 0.48 0.50 0.34 0.41 0.36 pp
288 | ASG2_ECOLI 0.73 0.78 0.48 0.50 0.58 0.51 0.31 pp
289 | BGLX_ECOLI 0.65 0.57 0.48 0.50 0.47 0.47 0.51 pp
290 | C562_ECOLI 0.72 0.86 0.48 0.50 0.17 0.55 0.21 pp
291 | CN16_ECOLI 0.67 0.70 0.48 0.50 0.46 0.45 0.33 pp
292 | CYPH_ECOLI 0.67 0.81 0.48 0.50 0.54 0.49 0.23 pp
293 | CYSP_ECOLI 0.67 0.61 0.48 0.50 0.51 0.37 0.38 pp
294 | DGAL_ECOLI 0.63 1.00 0.48 0.50 0.35 0.51 0.49 pp
295 | DPPA_ECOLI 0.57 0.59 0.48 0.50 0.39 0.47 0.33 pp
296 | DSBA_ECOLI 0.71 0.71 0.48 0.50 0.40 0.54 0.39 pp
297 | DSBC_ECOLI 0.66 0.74 0.48 0.50 0.31 0.38 0.43 pp
298 | ECOT_ECOLI 0.67 0.81 0.48 0.50 0.25 0.42 0.25 pp
299 | ECPD_ECOLI 0.64 0.72 0.48 0.50 0.49 0.42 0.19 pp
300 | FECB_ECOLI 0.68 0.82 0.48 0.50 0.38 0.65 0.56 pp
301 | FECR_ECOLI 0.32 0.39 0.48 0.50 0.53 0.28 0.38 pp
302 | FEPB_ECOLI 0.70 0.64 0.48 0.50 0.47 0.51 0.47 pp
303 | FIMC_ECOLI 0.63 0.57 0.48 0.50 0.49 0.70 0.20 pp
304 | GGT_ECOLI 0.74 0.82 0.48 0.50 0.49 0.49 0.41 pp
305 | GLNH_ECOLI 0.63 0.86 0.48 0.50 0.39 0.47 0.34 pp
306 | GLPQ_ECOLI 0.63 0.83 0.48 0.50 0.40 0.39 0.19 pp
307 | HTRA_ECOLI 0.63 0.71 0.48 0.50 0.60 0.40 0.39 pp
308 | LIVJ_ECOLI 0.71 0.86 0.48 0.50 0.40 0.54 0.32 pp
309 | LIVK_ECOLI 0.68 0.78 0.48 0.50 0.43 0.44 0.42 pp
310 | MALE_ECOLI 0.64 0.84 0.48 0.50 0.37 0.45 0.40 pp
311 | MALM_ECOLI 0.74 0.47 0.48 0.50 0.50 0.57 0.42 pp
312 | MEPA_ECOLI 0.75 0.84 0.48 0.50 0.35 0.52 0.33 pp
313 | MODA_ECOLI 0.63 0.65 0.48 0.50 0.39 0.44 0.35 pp
314 | NRFA_ECOLI 0.69 0.67 0.48 0.50 0.30 0.39 0.24 pp
315 | NRFF_ECOLI 0.70 0.71 0.48 0.50 0.42 0.84 0.85 pp
316 | OPPA_ECOLI 0.69 0.80 0.48 0.50 0.46 0.57 0.26 pp
317 | OSMY_ECOLI 0.64 0.66 0.48 0.50 0.41 0.39 0.20 pp
318 | POTD_ECOLI 0.63 0.80 0.48 0.50 0.46 0.31 0.29 pp
319 | POTF_ECOLI 0.66 0.71 0.48 0.50 0.41 0.50 0.35 pp
320 | PPA_ECOLI 0.69 0.59 0.48 0.50 0.46 0.44 0.52 pp
321 | PPB_ECOLI 0.68 0.67 0.48 0.50 0.49 0.40 0.34 pp
322 | PROX_ECOLI 0.64 0.78 0.48 0.50 0.50 0.36 0.38 pp
323 | PSTS_ECOLI 0.62 0.78 0.48 0.50 0.47 0.49 0.54 pp
324 | PTR_ECOLI 0.76 0.73 0.48 0.50 0.44 0.39 0.39 pp
325 | RBSB_ECOLI 0.64 0.81 0.48 0.50 0.37 0.39 0.44 pp
326 | SPEA_ECOLI 0.29 0.39 0.48 0.50 0.52 0.40 0.48 pp
327 | SUBI_ECOLI 0.62 0.83 0.48 0.50 0.46 0.36 0.40 pp
328 | TBPA_ECOLI 0.56 0.54 0.48 0.50 0.43 0.37 0.30 pp
329 | TESA_ECOLI 0.69 0.66 0.48 0.50 0.41 0.50 0.25 pp
330 | TOLB_ECOLI 0.69 0.65 0.48 0.50 0.63 0.48 0.41 pp
331 | TORA_ECOLI 0.43 0.59 0.48 0.50 0.52 0.49 0.56 pp
332 | TREA_ECOLI 0.74 0.56 0.48 0.50 0.47 0.68 0.30 pp
333 | UGPB_ECOLI 0.71 0.57 0.48 0.50 0.48 0.35 0.32 pp
334 | USHA_ECOLI 0.61 0.60 0.48 0.50 0.44 0.39 0.38 pp
335 | XYLF_ECOLI 0.59 0.61 0.48 0.50 0.42 0.42 0.37 pp
336 | YTFQ_ECOLI 0.74 0.74 0.48 0.50 0.31 0.53 0.52 pp
337 |
--------------------------------------------------------------------------------
/data/glass.data:
--------------------------------------------------------------------------------
1 | 1,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.00,1
2 | 2,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.00,1
3 | 3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.00,1
4 | 4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.00,1
5 | 5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.00,1
6 | 6,1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.00,0.26,1
7 | 7,1.51743,13.30,3.60,1.14,73.09,0.58,8.17,0.00,0.00,1
8 | 8,1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.00,0.00,1
9 | 9,1.51918,14.04,3.58,1.37,72.08,0.56,8.30,0.00,0.00,1
10 | 10,1.51755,13.00,3.60,1.36,72.99,0.57,8.40,0.00,0.11,1
11 | 11,1.51571,12.72,3.46,1.56,73.20,0.67,8.09,0.00,0.24,1
12 | 12,1.51763,12.80,3.66,1.27,73.01,0.60,8.56,0.00,0.00,1
13 | 13,1.51589,12.88,3.43,1.40,73.28,0.69,8.05,0.00,0.24,1
14 | 14,1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0.00,0.17,1
15 | 15,1.51763,12.61,3.59,1.31,73.29,0.58,8.50,0.00,0.00,1
16 | 16,1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0.00,0.00,1
17 | 17,1.51784,12.68,3.67,1.16,73.11,0.61,8.70,0.00,0.00,1
18 | 18,1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0.00,0.00,1
19 | 19,1.51911,13.90,3.73,1.18,72.12,0.06,8.89,0.00,0.00,1
20 | 20,1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0.00,0.07,1
21 | 21,1.51750,12.82,3.55,1.49,72.75,0.54,8.52,0.00,0.19,1
22 | 22,1.51966,14.77,3.75,0.29,72.02,0.03,9.00,0.00,0.00,1
23 | 23,1.51736,12.78,3.62,1.29,72.79,0.59,8.70,0.00,0.00,1
24 | 24,1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0.00,0.00,1
25 | 25,1.51720,13.38,3.50,1.15,72.85,0.50,8.43,0.00,0.00,1
26 | 26,1.51764,12.98,3.54,1.21,73.00,0.65,8.53,0.00,0.00,1
27 | 27,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.00,0.00,1
28 | 28,1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0.00,0.00,1
29 | 29,1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0.00,0.00,1
30 | 30,1.51784,13.08,3.49,1.28,72.86,0.60,8.49,0.00,0.00,1
31 | 31,1.51768,12.65,3.56,1.30,73.08,0.61,8.69,0.00,0.14,1
32 | 32,1.51747,12.84,3.50,1.14,73.27,0.56,8.55,0.00,0.00,1
33 | 33,1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,1
34 | 34,1.51753,12.57,3.47,1.38,73.39,0.60,8.55,0.00,0.06,1
35 | 35,1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0.00,0.00,1
36 | 36,1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0.00,0.00,1
37 | 37,1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0.00,1
38 | 38,1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0.00,0.00,1
39 | 39,1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.00,0.00,1
40 | 40,1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.00,0.00,1
41 | 41,1.51793,12.79,3.50,1.12,73.03,0.64,8.77,0.00,0.00,1
42 | 42,1.51755,12.71,3.42,1.20,73.20,0.59,8.64,0.00,0.00,1
43 | 43,1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0.00,0.00,1
44 | 44,1.52210,13.73,3.84,0.72,71.76,0.17,9.74,0.00,0.00,1
45 | 45,1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0.00,0.30,1
46 | 46,1.51900,13.49,3.48,1.35,71.95,0.55,9.00,0.00,0.00,1
47 | 47,1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0.00,0.16,1
48 | 48,1.52667,13.99,3.70,0.71,71.57,0.02,9.82,0.00,0.10,1
49 | 49,1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0.00,0.00,1
50 | 50,1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0.00,0.00,1
51 | 51,1.52320,13.72,3.72,0.51,71.75,0.09,10.06,0.00,0.16,1
52 | 52,1.51926,13.20,3.33,1.28,72.36,0.60,9.14,0.00,0.11,1
53 | 53,1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0.00,0.00,1
54 | 54,1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0.00,0.00,1
55 | 55,1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0.00,0.09,1
56 | 56,1.51769,12.45,2.71,1.29,73.70,0.56,9.06,0.00,0.24,1
57 | 57,1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0.00,0.31,1
58 | 58,1.51824,12.87,3.48,1.29,72.95,0.60,8.43,0.00,0.00,1
59 | 59,1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0.00,0.00,1
60 | 60,1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0.00,0.11,1
61 | 61,1.51905,13.60,3.62,1.11,72.64,0.14,8.76,0.00,0.00,1
62 | 62,1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0.00,1
63 | 63,1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0.00,0.11,1
64 | 64,1.52227,14.17,3.81,0.78,71.35,0.00,9.69,0.00,0.00,1
65 | 65,1.52172,13.48,3.74,0.90,72.01,0.18,9.61,0.00,0.07,1
66 | 66,1.52099,13.69,3.59,1.12,71.96,0.09,9.40,0.00,0.00,1
67 | 67,1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0.00,0.17,1
68 | 68,1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0.00,0.17,1
69 | 69,1.52152,13.12,3.58,0.90,72.20,0.23,9.82,0.00,0.16,1
70 | 70,1.52300,13.31,3.58,0.82,71.99,0.12,10.17,0.00,0.03,1
71 | 71,1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0.00,0.12,2
72 | 72,1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0.00,0.32,2
73 | 73,1.51593,13.09,3.59,1.52,73.10,0.67,7.83,0.00,0.00,2
74 | 74,1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0.00,0.00,2
75 | 75,1.51596,13.02,3.56,1.54,73.11,0.72,7.90,0.00,0.00,2
76 | 76,1.51590,13.02,3.58,1.51,73.12,0.69,7.96,0.00,0.00,2
77 | 77,1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0.00,0.00,2
78 | 78,1.51627,13.00,3.58,1.54,72.83,0.61,8.04,0.00,0.00,2
79 | 79,1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.00,0.14,2
80 | 80,1.51590,12.82,3.52,1.90,72.86,0.69,7.97,0.00,0.00,2
81 | 81,1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0.00,0.00,2
82 | 82,1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0.00,0.00,2
83 | 83,1.51646,13.41,3.55,1.25,72.81,0.68,8.10,0.00,0.00,2
84 | 84,1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0.00,0.09,2
85 | 85,1.51409,14.25,3.09,2.08,72.28,1.10,7.08,0.00,0.00,2
86 | 86,1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0.00,0.00,2
87 | 87,1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0.00,0.00,2
88 | 88,1.51645,13.40,3.49,1.52,72.65,0.67,8.08,0.00,0.10,2
89 | 89,1.51618,13.01,3.50,1.48,72.89,0.60,8.12,0.00,0.00,2
90 | 90,1.51640,12.55,3.48,1.87,73.23,0.63,8.08,0.00,0.09,2
91 | 91,1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0.00,0.22,2
92 | 92,1.51605,12.90,3.44,1.45,73.06,0.44,8.27,0.00,0.00,2
93 | 93,1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0.00,0.19,2
94 | 94,1.51590,13.24,3.34,1.47,73.10,0.39,8.22,0.00,0.00,2
95 | 95,1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0.00,0.00,2
96 | 96,1.51860,13.36,3.43,1.43,72.26,0.51,8.60,0.00,0.00,2
97 | 97,1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0.00,0.15,2
98 | 98,1.51743,12.20,3.25,1.16,73.55,0.62,8.90,0.00,0.24,2
99 | 99,1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0.00,0.00,2
100 | 100,1.51811,12.96,2.96,1.43,72.92,0.60,8.79,0.14,0.00,2
101 | 101,1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,2
102 | 102,1.51730,12.35,2.72,1.63,72.87,0.70,9.23,0.00,0.00,2
103 | 103,1.51820,12.62,2.76,0.83,73.81,0.35,9.42,0.00,0.20,2
104 | 104,1.52725,13.80,3.15,0.66,70.57,0.08,11.64,0.00,0.00,2
105 | 105,1.52410,13.83,2.90,1.17,71.15,0.08,10.79,0.00,0.00,2
106 | 106,1.52475,11.45,0.00,1.88,72.19,0.81,13.24,0.00,0.34,2
107 | 107,1.53125,10.73,0.00,2.10,69.81,0.58,13.30,3.15,0.28,2
108 | 108,1.53393,12.30,0.00,1.00,70.16,0.12,16.19,0.00,0.24,2
109 | 109,1.52222,14.43,0.00,1.00,72.67,0.10,11.52,0.00,0.08,2
110 | 110,1.51818,13.72,0.00,0.56,74.45,0.00,10.99,0.00,0.00,2
111 | 111,1.52664,11.23,0.00,0.77,73.21,0.00,14.68,0.00,0.00,2
112 | 112,1.52739,11.02,0.00,0.75,73.08,0.00,14.96,0.00,0.00,2
113 | 113,1.52777,12.64,0.00,0.67,72.02,0.06,14.40,0.00,0.00,2
114 | 114,1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0.00,0.14,2
115 | 115,1.51847,13.10,3.97,1.19,72.44,0.60,8.43,0.00,0.00,2
116 | 116,1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0.00,0.00,2
117 | 117,1.51829,13.24,3.90,1.41,72.33,0.55,8.31,0.00,0.10,2
118 | 118,1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0.00,0.00,2
119 | 119,1.51673,13.30,3.64,1.53,72.53,0.65,8.03,0.00,0.29,2
120 | 120,1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0.00,0.00,2
121 | 121,1.51844,13.25,3.76,1.32,72.40,0.58,8.42,0.00,0.00,2
122 | 122,1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0.00,0.21,2
123 | 123,1.51687,13.23,3.54,1.48,72.84,0.56,8.10,0.00,0.00,2
124 | 124,1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0.00,0.00,2
125 | 125,1.52177,13.20,3.68,1.15,72.75,0.54,8.52,0.00,0.00,2
126 | 126,1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0.00,0.12,2
127 | 127,1.51667,12.94,3.61,1.26,72.75,0.56,8.60,0.00,0.00,2
128 | 128,1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0.00,0.17,2
129 | 129,1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,2
130 | 130,1.52020,13.98,1.35,1.63,71.76,0.39,10.56,0.00,0.18,2
131 | 131,1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0.00,0.00,2
132 | 132,1.52614,13.70,0.00,1.36,71.24,0.19,13.44,0.00,0.10,2
133 | 133,1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0.00,0.00,2
134 | 134,1.51800,13.71,3.93,1.54,71.81,0.54,8.21,0.00,0.15,2
135 | 135,1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0.00,0.00,2
136 | 136,1.51789,13.19,3.90,1.30,72.33,0.55,8.44,0.00,0.28,2
137 | 137,1.51806,13.00,3.80,1.08,73.07,0.56,8.38,0.00,0.12,2
138 | 138,1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0.00,0.00,2
139 | 139,1.51674,12.79,3.52,1.54,73.36,0.66,7.90,0.00,0.00,2
140 | 140,1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0.00,0.00,2
141 | 141,1.51690,13.33,3.54,1.61,72.54,0.68,8.11,0.00,0.00,2
142 | 142,1.51851,13.20,3.63,1.07,72.83,0.57,8.41,0.09,0.17,2
143 | 143,1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,2
144 | 144,1.51709,13.00,3.47,1.79,72.72,0.66,8.18,0.00,0.00,2
145 | 145,1.51660,12.99,3.18,1.23,72.97,0.58,8.81,0.00,0.24,2
146 | 146,1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0.00,0.35,2
147 | 147,1.51769,13.65,3.66,1.11,72.77,0.11,8.60,0.00,0.00,3
148 | 148,1.51610,13.33,3.53,1.34,72.67,0.56,8.33,0.00,0.00,3
149 | 149,1.51670,13.24,3.57,1.38,72.70,0.56,8.44,0.00,0.10,3
150 | 150,1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.00,0.00,3
151 | 151,1.51665,13.14,3.45,1.76,72.48,0.60,8.38,0.00,0.17,3
152 | 152,1.52127,14.32,3.90,0.83,71.50,0.00,9.49,0.00,0.00,3
153 | 153,1.51779,13.64,3.65,0.65,73.00,0.06,8.93,0.00,0.00,3
154 | 154,1.51610,13.42,3.40,1.22,72.69,0.59,8.32,0.00,0.00,3
155 | 155,1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0.00,0.00,3
156 | 156,1.51646,13.04,3.40,1.26,73.01,0.52,8.58,0.00,0.00,3
157 | 157,1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0.00,0.00,3
158 | 158,1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0.00,0.00,3
159 | 159,1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0.00,0.00,3
160 | 160,1.51796,13.50,3.36,1.63,71.94,0.57,8.81,0.00,0.09,3
161 | 161,1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0.00,0.00,3
162 | 162,1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,3
163 | 163,1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.00,0.37,3
164 | 164,1.51514,14.01,2.68,3.50,69.89,1.68,5.87,2.20,0.00,5
165 | 165,1.51915,12.73,1.85,1.86,72.69,0.60,10.09,0.00,0.00,5
166 | 166,1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0.00,0.00,5
167 | 167,1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0.00,0.00,5
168 | 168,1.51969,12.64,0.00,1.65,73.75,0.38,11.53,0.00,0.00,5
169 | 169,1.51666,12.86,0.00,1.83,73.88,0.97,10.17,0.00,0.00,5
170 | 170,1.51994,13.27,0.00,1.76,73.03,0.47,11.32,0.00,0.00,5
171 | 171,1.52369,13.44,0.00,1.58,72.22,0.32,12.24,0.00,0.00,5
172 | 172,1.51316,13.02,0.00,3.04,70.48,6.21,6.96,0.00,0.00,5
173 | 173,1.51321,13.00,0.00,3.02,70.70,6.21,6.93,0.00,0.00,5
174 | 174,1.52043,13.38,0.00,1.40,72.25,0.33,12.50,0.00,0.00,5
175 | 175,1.52058,12.85,1.61,2.17,72.18,0.76,9.70,0.24,0.51,5
176 | 176,1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0.00,0.28,5
177 | 177,1.51905,14.00,2.39,1.56,72.37,0.00,9.57,0.00,0.00,6
178 | 178,1.51937,13.79,2.41,1.19,72.76,0.00,9.77,0.00,0.00,6
179 | 179,1.51829,14.46,2.24,1.62,72.38,0.00,9.26,0.00,0.00,6
180 | 180,1.51852,14.09,2.19,1.66,72.67,0.00,9.32,0.00,0.00,6
181 | 181,1.51299,14.40,1.74,1.54,74.55,0.00,7.59,0.00,0.00,6
182 | 182,1.51888,14.99,0.78,1.74,72.50,0.00,9.95,0.00,0.00,6
183 | 183,1.51916,14.15,0.00,2.09,72.74,0.00,10.88,0.00,0.00,6
184 | 184,1.51969,14.56,0.00,0.56,73.48,0.00,11.22,0.00,0.00,6
185 | 185,1.51115,17.38,0.00,0.34,75.41,0.00,6.65,0.00,0.00,6
186 | 186,1.51131,13.69,3.20,1.81,72.81,1.76,5.43,1.19,0.00,7
187 | 187,1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0.00,7
188 | 188,1.52315,13.44,3.34,1.23,72.38,0.60,8.83,0.00,0.00,7
189 | 189,1.52247,14.86,2.20,2.06,70.26,0.76,9.76,0.00,0.00,7
190 | 190,1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0.00,7
191 | 191,1.51613,13.88,1.78,1.79,73.10,0.00,8.67,0.76,0.00,7
192 | 192,1.51602,14.85,0.00,2.38,73.28,0.00,8.76,0.64,0.09,7
193 | 193,1.51623,14.20,0.00,2.79,73.46,0.04,9.04,0.40,0.09,7
194 | 194,1.51719,14.75,0.00,2.00,73.02,0.00,8.53,1.59,0.08,7
195 | 195,1.51683,14.56,0.00,1.98,73.29,0.00,8.52,1.57,0.07,7
196 | 196,1.51545,14.14,0.00,2.68,73.39,0.08,9.07,0.61,0.05,7
197 | 197,1.51556,13.87,0.00,2.54,73.23,0.14,9.41,0.81,0.01,7
198 | 198,1.51727,14.70,0.00,2.34,73.28,0.00,8.95,0.66,0.00,7
199 | 199,1.51531,14.38,0.00,2.66,73.10,0.04,9.08,0.64,0.00,7
200 | 200,1.51609,15.01,0.00,2.51,73.05,0.05,8.83,0.53,0.00,7
201 | 201,1.51508,15.15,0.00,2.25,73.50,0.00,8.34,0.63,0.00,7
202 | 202,1.51653,11.95,0.00,1.19,75.18,2.70,8.93,0.00,0.00,7
203 | 203,1.51514,14.85,0.00,2.42,73.72,0.00,8.39,0.56,0.00,7
204 | 204,1.51658,14.80,0.00,1.99,73.11,0.00,8.28,1.71,0.00,7
205 | 205,1.51617,14.95,0.00,2.27,73.30,0.00,8.71,0.67,0.00,7
206 | 206,1.51732,14.95,0.00,1.80,72.99,0.00,8.61,1.55,0.00,7
207 | 207,1.51645,14.94,0.00,1.87,73.11,0.00,8.67,1.38,0.00,7
208 | 208,1.51831,14.39,0.00,1.82,72.86,1.41,6.47,2.88,0.00,7
209 | 209,1.51640,14.37,0.00,2.74,72.85,0.00,9.45,0.54,0.00,7
210 | 210,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.00,7
211 | 211,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.00,7
212 | 212,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.00,7
213 | 213,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.00,7
214 | 214,1.51711,14.23,0.00,2.08,73.36,0.00,8.62,1.67,0.00,7
215 |
--------------------------------------------------------------------------------
/data/heart.dat:
--------------------------------------------------------------------------------
1 | 70.0 1.0 4.0 130.0 322.0 0.0 2.0 109.0 0.0 2.4 2.0 3.0 3.0 2
2 | 67.0 0.0 3.0 115.0 564.0 0.0 2.0 160.0 0.0 1.6 2.0 0.0 7.0 1
3 | 57.0 1.0 2.0 124.0 261.0 0.0 0.0 141.0 0.0 0.3 1.0 0.0 7.0 2
4 | 64.0 1.0 4.0 128.0 263.0 0.0 0.0 105.0 1.0 0.2 2.0 1.0 7.0 1
5 | 74.0 0.0 2.0 120.0 269.0 0.0 2.0 121.0 1.0 0.2 1.0 1.0 3.0 1
6 | 65.0 1.0 4.0 120.0 177.0 0.0 0.0 140.0 0.0 0.4 1.0 0.0 7.0 1
7 | 56.0 1.0 3.0 130.0 256.0 1.0 2.0 142.0 1.0 0.6 2.0 1.0 6.0 2
8 | 59.0 1.0 4.0 110.0 239.0 0.0 2.0 142.0 1.0 1.2 2.0 1.0 7.0 2
9 | 60.0 1.0 4.0 140.0 293.0 0.0 2.0 170.0 0.0 1.2 2.0 2.0 7.0 2
10 | 63.0 0.0 4.0 150.0 407.0 0.0 2.0 154.0 0.0 4.0 2.0 3.0 7.0 2
11 | 59.0 1.0 4.0 135.0 234.0 0.0 0.0 161.0 0.0 0.5 2.0 0.0 7.0 1
12 | 53.0 1.0 4.0 142.0 226.0 0.0 2.0 111.0 1.0 0.0 1.0 0.0 7.0 1
13 | 44.0 1.0 3.0 140.0 235.0 0.0 2.0 180.0 0.0 0.0 1.0 0.0 3.0 1
14 | 61.0 1.0 1.0 134.0 234.0 0.0 0.0 145.0 0.0 2.6 2.0 2.0 3.0 2
15 | 57.0 0.0 4.0 128.0 303.0 0.0 2.0 159.0 0.0 0.0 1.0 1.0 3.0 1
16 | 71.0 0.0 4.0 112.0 149.0 0.0 0.0 125.0 0.0 1.6 2.0 0.0 3.0 1
17 | 46.0 1.0 4.0 140.0 311.0 0.0 0.0 120.0 1.0 1.8 2.0 2.0 7.0 2
18 | 53.0 1.0 4.0 140.0 203.0 1.0 2.0 155.0 1.0 3.1 3.0 0.0 7.0 2
19 | 64.0 1.0 1.0 110.0 211.0 0.0 2.0 144.0 1.0 1.8 2.0 0.0 3.0 1
20 | 40.0 1.0 1.0 140.0 199.0 0.0 0.0 178.0 1.0 1.4 1.0 0.0 7.0 1
21 | 67.0 1.0 4.0 120.0 229.0 0.0 2.0 129.0 1.0 2.6 2.0 2.0 7.0 2
22 | 48.0 1.0 2.0 130.0 245.0 0.0 2.0 180.0 0.0 0.2 2.0 0.0 3.0 1
23 | 43.0 1.0 4.0 115.0 303.0 0.0 0.0 181.0 0.0 1.2 2.0 0.0 3.0 1
24 | 47.0 1.0 4.0 112.0 204.0 0.0 0.0 143.0 0.0 0.1 1.0 0.0 3.0 1
25 | 54.0 0.0 2.0 132.0 288.0 1.0 2.0 159.0 1.0 0.0 1.0 1.0 3.0 1
26 | 48.0 0.0 3.0 130.0 275.0 0.0 0.0 139.0 0.0 0.2 1.0 0.0 3.0 1
27 | 46.0 0.0 4.0 138.0 243.0 0.0 2.0 152.0 1.0 0.0 2.0 0.0 3.0 1
28 | 51.0 0.0 3.0 120.0 295.0 0.0 2.0 157.0 0.0 0.6 1.0 0.0 3.0 1
29 | 58.0 1.0 3.0 112.0 230.0 0.0 2.0 165.0 0.0 2.5 2.0 1.0 7.0 2
30 | 71.0 0.0 3.0 110.0 265.0 1.0 2.0 130.0 0.0 0.0 1.0 1.0 3.0 1
31 | 57.0 1.0 3.0 128.0 229.0 0.0 2.0 150.0 0.0 0.4 2.0 1.0 7.0 2
32 | 66.0 1.0 4.0 160.0 228.0 0.0 2.0 138.0 0.0 2.3 1.0 0.0 6.0 1
33 | 37.0 0.0 3.0 120.0 215.0 0.0 0.0 170.0 0.0 0.0 1.0 0.0 3.0 1
34 | 59.0 1.0 4.0 170.0 326.0 0.0 2.0 140.0 1.0 3.4 3.0 0.0 7.0 2
35 | 50.0 1.0 4.0 144.0 200.0 0.0 2.0 126.0 1.0 0.9 2.0 0.0 7.0 2
36 | 48.0 1.0 4.0 130.0 256.0 1.0 2.0 150.0 1.0 0.0 1.0 2.0 7.0 2
37 | 61.0 1.0 4.0 140.0 207.0 0.0 2.0 138.0 1.0 1.9 1.0 1.0 7.0 2
38 | 59.0 1.0 1.0 160.0 273.0 0.0 2.0 125.0 0.0 0.0 1.0 0.0 3.0 2
39 | 42.0 1.0 3.0 130.0 180.0 0.0 0.0 150.0 0.0 0.0 1.0 0.0 3.0 1
40 | 48.0 1.0 4.0 122.0 222.0 0.0 2.0 186.0 0.0 0.0 1.0 0.0 3.0 1
41 | 40.0 1.0 4.0 152.0 223.0 0.0 0.0 181.0 0.0 0.0 1.0 0.0 7.0 2
42 | 62.0 0.0 4.0 124.0 209.0 0.0 0.0 163.0 0.0 0.0 1.0 0.0 3.0 1
43 | 44.0 1.0 3.0 130.0 233.0 0.0 0.0 179.0 1.0 0.4 1.0 0.0 3.0 1
44 | 46.0 1.0 2.0 101.0 197.0 1.0 0.0 156.0 0.0 0.0 1.0 0.0 7.0 1
45 | 59.0 1.0 3.0 126.0 218.0 1.0 0.0 134.0 0.0 2.2 2.0 1.0 6.0 2
46 | 58.0 1.0 3.0 140.0 211.0 1.0 2.0 165.0 0.0 0.0 1.0 0.0 3.0 1
47 | 49.0 1.0 3.0 118.0 149.0 0.0 2.0 126.0 0.0 0.8 1.0 3.0 3.0 2
48 | 44.0 1.0 4.0 110.0 197.0 0.0 2.0 177.0 0.0 0.0 1.0 1.0 3.0 2
49 | 66.0 1.0 2.0 160.0 246.0 0.0 0.0 120.0 1.0 0.0 2.0 3.0 6.0 2
50 | 65.0 0.0 4.0 150.0 225.0 0.0 2.0 114.0 0.0 1.0 2.0 3.0 7.0 2
51 | 42.0 1.0 4.0 136.0 315.0 0.0 0.0 125.0 1.0 1.8 2.0 0.0 6.0 2
52 | 52.0 1.0 2.0 128.0 205.0 1.0 0.0 184.0 0.0 0.0 1.0 0.0 3.0 1
53 | 65.0 0.0 3.0 140.0 417.0 1.0 2.0 157.0 0.0 0.8 1.0 1.0 3.0 1
54 | 63.0 0.0 2.0 140.0 195.0 0.0 0.0 179.0 0.0 0.0 1.0 2.0 3.0 1
55 | 45.0 0.0 2.0 130.0 234.0 0.0 2.0 175.0 0.0 0.6 2.0 0.0 3.0 1
56 | 41.0 0.0 2.0 105.0 198.0 0.0 0.0 168.0 0.0 0.0 1.0 1.0 3.0 1
57 | 61.0 1.0 4.0 138.0 166.0 0.0 2.0 125.0 1.0 3.6 2.0 1.0 3.0 2
58 | 60.0 0.0 3.0 120.0 178.0 1.0 0.0 96.0 0.0 0.0 1.0 0.0 3.0 1
59 | 59.0 0.0 4.0 174.0 249.0 0.0 0.0 143.0 1.0 0.0 2.0 0.0 3.0 2
60 | 62.0 1.0 2.0 120.0 281.0 0.0 2.0 103.0 0.0 1.4 2.0 1.0 7.0 2
61 | 57.0 1.0 3.0 150.0 126.0 1.0 0.0 173.0 0.0 0.2 1.0 1.0 7.0 1
62 | 51.0 0.0 4.0 130.0 305.0 0.0 0.0 142.0 1.0 1.2 2.0 0.0 7.0 2
63 | 44.0 1.0 3.0 120.0 226.0 0.0 0.0 169.0 0.0 0.0 1.0 0.0 3.0 1
64 | 60.0 0.0 1.0 150.0 240.0 0.0 0.0 171.0 0.0 0.9 1.0 0.0 3.0 1
65 | 63.0 1.0 1.0 145.0 233.0 1.0 2.0 150.0 0.0 2.3 3.0 0.0 6.0 1
66 | 57.0 1.0 4.0 150.0 276.0 0.0 2.0 112.0 1.0 0.6 2.0 1.0 6.0 2
67 | 51.0 1.0 4.0 140.0 261.0 0.0 2.0 186.0 1.0 0.0 1.0 0.0 3.0 1
68 | 58.0 0.0 2.0 136.0 319.0 1.0 2.0 152.0 0.0 0.0 1.0 2.0 3.0 2
69 | 44.0 0.0 3.0 118.0 242.0 0.0 0.0 149.0 0.0 0.3 2.0 1.0 3.0 1
70 | 47.0 1.0 3.0 108.0 243.0 0.0 0.0 152.0 0.0 0.0 1.0 0.0 3.0 2
71 | 61.0 1.0 4.0 120.0 260.0 0.0 0.0 140.0 1.0 3.6 2.0 1.0 7.0 2
72 | 57.0 0.0 4.0 120.0 354.0 0.0 0.0 163.0 1.0 0.6 1.0 0.0 3.0 1
73 | 70.0 1.0 2.0 156.0 245.0 0.0 2.0 143.0 0.0 0.0 1.0 0.0 3.0 1
74 | 76.0 0.0 3.0 140.0 197.0 0.0 1.0 116.0 0.0 1.1 2.0 0.0 3.0 1
75 | 67.0 0.0 4.0 106.0 223.0 0.0 0.0 142.0 0.0 0.3 1.0 2.0 3.0 1
76 | 45.0 1.0 4.0 142.0 309.0 0.0 2.0 147.0 1.0 0.0 2.0 3.0 7.0 2
77 | 45.0 1.0 4.0 104.0 208.0 0.0 2.0 148.0 1.0 3.0 2.0 0.0 3.0 1
78 | 39.0 0.0 3.0 94.0 199.0 0.0 0.0 179.0 0.0 0.0 1.0 0.0 3.0 1
79 | 42.0 0.0 3.0 120.0 209.0 0.0 0.0 173.0 0.0 0.0 2.0 0.0 3.0 1
80 | 56.0 1.0 2.0 120.0 236.0 0.0 0.0 178.0 0.0 0.8 1.0 0.0 3.0 1
81 | 58.0 1.0 4.0 146.0 218.0 0.0 0.0 105.0 0.0 2.0 2.0 1.0 7.0 2
82 | 35.0 1.0 4.0 120.0 198.0 0.0 0.0 130.0 1.0 1.6 2.0 0.0 7.0 2
83 | 58.0 1.0 4.0 150.0 270.0 0.0 2.0 111.0 1.0 0.8 1.0 0.0 7.0 2
84 | 41.0 1.0 3.0 130.0 214.0 0.0 2.0 168.0 0.0 2.0 2.0 0.0 3.0 1
85 | 57.0 1.0 4.0 110.0 201.0 0.0 0.0 126.0 1.0 1.5 2.0 0.0 6.0 1
86 | 42.0 1.0 1.0 148.0 244.0 0.0 2.0 178.0 0.0 0.8 1.0 2.0 3.0 1
87 | 62.0 1.0 2.0 128.0 208.0 1.0 2.0 140.0 0.0 0.0 1.0 0.0 3.0 1
88 | 59.0 1.0 1.0 178.0 270.0 0.0 2.0 145.0 0.0 4.2 3.0 0.0 7.0 1
89 | 41.0 0.0 2.0 126.0 306.0 0.0 0.0 163.0 0.0 0.0 1.0 0.0 3.0 1
90 | 50.0 1.0 4.0 150.0 243.0 0.0 2.0 128.0 0.0 2.6 2.0 0.0 7.0 2
91 | 59.0 1.0 2.0 140.0 221.0 0.0 0.0 164.0 1.0 0.0 1.0 0.0 3.0 1
92 | 61.0 0.0 4.0 130.0 330.0 0.0 2.0 169.0 0.0 0.0 1.0 0.0 3.0 2
93 | 54.0 1.0 4.0 124.0 266.0 0.0 2.0 109.0 1.0 2.2 2.0 1.0 7.0 2
94 | 54.0 1.0 4.0 110.0 206.0 0.0 2.0 108.0 1.0 0.0 2.0 1.0 3.0 2
95 | 52.0 1.0 4.0 125.0 212.0 0.0 0.0 168.0 0.0 1.0 1.0 2.0 7.0 2
96 | 47.0 1.0 4.0 110.0 275.0 0.0 2.0 118.0 1.0 1.0 2.0 1.0 3.0 2
97 | 66.0 1.0 4.0 120.0 302.0 0.0 2.0 151.0 0.0 0.4 2.0 0.0 3.0 1
98 | 58.0 1.0 4.0 100.0 234.0 0.0 0.0 156.0 0.0 0.1 1.0 1.0 7.0 2
99 | 64.0 0.0 3.0 140.0 313.0 0.0 0.0 133.0 0.0 0.2 1.0 0.0 7.0 1
100 | 50.0 0.0 2.0 120.0 244.0 0.0 0.0 162.0 0.0 1.1 1.0 0.0 3.0 1
101 | 44.0 0.0 3.0 108.0 141.0 0.0 0.0 175.0 0.0 0.6 2.0 0.0 3.0 1
102 | 67.0 1.0 4.0 120.0 237.0 0.0 0.0 71.0 0.0 1.0 2.0 0.0 3.0 2
103 | 49.0 0.0 4.0 130.0 269.0 0.0 0.0 163.0 0.0 0.0 1.0 0.0 3.0 1
104 | 57.0 1.0 4.0 165.0 289.0 1.0 2.0 124.0 0.0 1.0 2.0 3.0 7.0 2
105 | 63.0 1.0 4.0 130.0 254.0 0.0 2.0 147.0 0.0 1.4 2.0 1.0 7.0 2
106 | 48.0 1.0 4.0 124.0 274.0 0.0 2.0 166.0 0.0 0.5 2.0 0.0 7.0 2
107 | 51.0 1.0 3.0 100.0 222.0 0.0 0.0 143.0 1.0 1.2 2.0 0.0 3.0 1
108 | 60.0 0.0 4.0 150.0 258.0 0.0 2.0 157.0 0.0 2.6 2.0 2.0 7.0 2
109 | 59.0 1.0 4.0 140.0 177.0 0.0 0.0 162.0 1.0 0.0 1.0 1.0 7.0 2
110 | 45.0 0.0 2.0 112.0 160.0 0.0 0.0 138.0 0.0 0.0 2.0 0.0 3.0 1
111 | 55.0 0.0 4.0 180.0 327.0 0.0 1.0 117.0 1.0 3.4 2.0 0.0 3.0 2
112 | 41.0 1.0 2.0 110.0 235.0 0.0 0.0 153.0 0.0 0.0 1.0 0.0 3.0 1
113 | 60.0 0.0 4.0 158.0 305.0 0.0 2.0 161.0 0.0 0.0 1.0 0.0 3.0 2
114 | 54.0 0.0 3.0 135.0 304.0 1.0 0.0 170.0 0.0 0.0 1.0 0.0 3.0 1
115 | 42.0 1.0 2.0 120.0 295.0 0.0 0.0 162.0 0.0 0.0 1.0 0.0 3.0 1
116 | 49.0 0.0 2.0 134.0 271.0 0.0 0.0 162.0 0.0 0.0 2.0 0.0 3.0 1
117 | 46.0 1.0 4.0 120.0 249.0 0.0 2.0 144.0 0.0 0.8 1.0 0.0 7.0 2
118 | 56.0 0.0 4.0 200.0 288.0 1.0 2.0 133.0 1.0 4.0 3.0 2.0 7.0 2
119 | 66.0 0.0 1.0 150.0 226.0 0.0 0.0 114.0 0.0 2.6 3.0 0.0 3.0 1
120 | 56.0 1.0 4.0 130.0 283.0 1.0 2.0 103.0 1.0 1.6 3.0 0.0 7.0 2
121 | 49.0 1.0 3.0 120.0 188.0 0.0 0.0 139.0 0.0 2.0 2.0 3.0 7.0 2
122 | 54.0 1.0 4.0 122.0 286.0 0.0 2.0 116.0 1.0 3.2 2.0 2.0 3.0 2
123 | 57.0 1.0 4.0 152.0 274.0 0.0 0.0 88.0 1.0 1.2 2.0 1.0 7.0 2
124 | 65.0 0.0 3.0 160.0 360.0 0.0 2.0 151.0 0.0 0.8 1.0 0.0 3.0 1
125 | 54.0 1.0 3.0 125.0 273.0 0.0 2.0 152.0 0.0 0.5 3.0 1.0 3.0 1
126 | 54.0 0.0 3.0 160.0 201.0 0.0 0.0 163.0 0.0 0.0 1.0 1.0 3.0 1
127 | 62.0 1.0 4.0 120.0 267.0 0.0 0.0 99.0 1.0 1.8 2.0 2.0 7.0 2
128 | 52.0 0.0 3.0 136.0 196.0 0.0 2.0 169.0 0.0 0.1 2.0 0.0 3.0 1
129 | 52.0 1.0 2.0 134.0 201.0 0.0 0.0 158.0 0.0 0.8 1.0 1.0 3.0 1
130 | 60.0 1.0 4.0 117.0 230.0 1.0 0.0 160.0 1.0 1.4 1.0 2.0 7.0 2
131 | 63.0 0.0 4.0 108.0 269.0 0.0 0.0 169.0 1.0 1.8 2.0 2.0 3.0 2
132 | 66.0 1.0 4.0 112.0 212.0 0.0 2.0 132.0 1.0 0.1 1.0 1.0 3.0 2
133 | 42.0 1.0 4.0 140.0 226.0 0.0 0.0 178.0 0.0 0.0 1.0 0.0 3.0 1
134 | 64.0 1.0 4.0 120.0 246.0 0.0 2.0 96.0 1.0 2.2 3.0 1.0 3.0 2
135 | 54.0 1.0 3.0 150.0 232.0 0.0 2.0 165.0 0.0 1.6 1.0 0.0 7.0 1
136 | 46.0 0.0 3.0 142.0 177.0 0.0 2.0 160.0 1.0 1.4 3.0 0.0 3.0 1
137 | 67.0 0.0 3.0 152.0 277.0 0.0 0.0 172.0 0.0 0.0 1.0 1.0 3.0 1
138 | 56.0 1.0 4.0 125.0 249.0 1.0 2.0 144.0 1.0 1.2 2.0 1.0 3.0 2
139 | 34.0 0.0 2.0 118.0 210.0 0.0 0.0 192.0 0.0 0.7 1.0 0.0 3.0 1
140 | 57.0 1.0 4.0 132.0 207.0 0.0 0.0 168.0 1.0 0.0 1.0 0.0 7.0 1
141 | 64.0 1.0 4.0 145.0 212.0 0.0 2.0 132.0 0.0 2.0 2.0 2.0 6.0 2
142 | 59.0 1.0 4.0 138.0 271.0 0.0 2.0 182.0 0.0 0.0 1.0 0.0 3.0 1
143 | 50.0 1.0 3.0 140.0 233.0 0.0 0.0 163.0 0.0 0.6 2.0 1.0 7.0 2
144 | 51.0 1.0 1.0 125.0 213.0 0.0 2.0 125.0 1.0 1.4 1.0 1.0 3.0 1
145 | 54.0 1.0 2.0 192.0 283.0 0.0 2.0 195.0 0.0 0.0 1.0 1.0 7.0 2
146 | 53.0 1.0 4.0 123.0 282.0 0.0 0.0 95.0 1.0 2.0 2.0 2.0 7.0 2
147 | 52.0 1.0 4.0 112.0 230.0 0.0 0.0 160.0 0.0 0.0 1.0 1.0 3.0 2
148 | 40.0 1.0 4.0 110.0 167.0 0.0 2.0 114.0 1.0 2.0 2.0 0.0 7.0 2
149 | 58.0 1.0 3.0 132.0 224.0 0.0 2.0 173.0 0.0 3.2 1.0 2.0 7.0 2
150 | 41.0 0.0 3.0 112.0 268.0 0.0 2.0 172.0 1.0 0.0 1.0 0.0 3.0 1
151 | 41.0 1.0 3.0 112.0 250.0 0.0 0.0 179.0 0.0 0.0 1.0 0.0 3.0 1
152 | 50.0 0.0 3.0 120.0 219.0 0.0 0.0 158.0 0.0 1.6 2.0 0.0 3.0 1
153 | 54.0 0.0 3.0 108.0 267.0 0.0 2.0 167.0 0.0 0.0 1.0 0.0 3.0 1
154 | 64.0 0.0 4.0 130.0 303.0 0.0 0.0 122.0 0.0 2.0 2.0 2.0 3.0 1
155 | 51.0 0.0 3.0 130.0 256.0 0.0 2.0 149.0 0.0 0.5 1.0 0.0 3.0 1
156 | 46.0 0.0 2.0 105.0 204.0 0.0 0.0 172.0 0.0 0.0 1.0 0.0 3.0 1
157 | 55.0 1.0 4.0 140.0 217.0 0.0 0.0 111.0 1.0 5.6 3.0 0.0 7.0 2
158 | 45.0 1.0 2.0 128.0 308.0 0.0 2.0 170.0 0.0 0.0 1.0 0.0 3.0 1
159 | 56.0 1.0 1.0 120.0 193.0 0.0 2.0 162.0 0.0 1.9 2.0 0.0 7.0 1
160 | 66.0 0.0 4.0 178.0 228.0 1.0 0.0 165.0 1.0 1.0 2.0 2.0 7.0 2
161 | 38.0 1.0 1.0 120.0 231.0 0.0 0.0 182.0 1.0 3.8 2.0 0.0 7.0 2
162 | 62.0 0.0 4.0 150.0 244.0 0.0 0.0 154.0 1.0 1.4 2.0 0.0 3.0 2
163 | 55.0 1.0 2.0 130.0 262.0 0.0 0.0 155.0 0.0 0.0 1.0 0.0 3.0 1
164 | 58.0 1.0 4.0 128.0 259.0 0.0 2.0 130.0 1.0 3.0 2.0 2.0 7.0 2
165 | 43.0 1.0 4.0 110.0 211.0 0.0 0.0 161.0 0.0 0.0 1.0 0.0 7.0 1
166 | 64.0 0.0 4.0 180.0 325.0 0.0 0.0 154.0 1.0 0.0 1.0 0.0 3.0 1
167 | 50.0 0.0 4.0 110.0 254.0 0.0 2.0 159.0 0.0 0.0 1.0 0.0 3.0 1
168 | 53.0 1.0 3.0 130.0 197.0 1.0 2.0 152.0 0.0 1.2 3.0 0.0 3.0 1
169 | 45.0 0.0 4.0 138.0 236.0 0.0 2.0 152.0 1.0 0.2 2.0 0.0 3.0 1
170 | 65.0 1.0 1.0 138.0 282.0 1.0 2.0 174.0 0.0 1.4 2.0 1.0 3.0 2
171 | 69.0 1.0 1.0 160.0 234.0 1.0 2.0 131.0 0.0 0.1 2.0 1.0 3.0 1
172 | 69.0 1.0 3.0 140.0 254.0 0.0 2.0 146.0 0.0 2.0 2.0 3.0 7.0 2
173 | 67.0 1.0 4.0 100.0 299.0 0.0 2.0 125.0 1.0 0.9 2.0 2.0 3.0 2
174 | 68.0 0.0 3.0 120.0 211.0 0.0 2.0 115.0 0.0 1.5 2.0 0.0 3.0 1
175 | 34.0 1.0 1.0 118.0 182.0 0.0 2.0 174.0 0.0 0.0 1.0 0.0 3.0 1
176 | 62.0 0.0 4.0 138.0 294.0 1.0 0.0 106.0 0.0 1.9 2.0 3.0 3.0 2
177 | 51.0 1.0 4.0 140.0 298.0 0.0 0.0 122.0 1.0 4.2 2.0 3.0 7.0 2
178 | 46.0 1.0 3.0 150.0 231.0 0.0 0.0 147.0 0.0 3.6 2.0 0.0 3.0 2
179 | 67.0 1.0 4.0 125.0 254.0 1.0 0.0 163.0 0.0 0.2 2.0 2.0 7.0 2
180 | 50.0 1.0 3.0 129.0 196.0 0.0 0.0 163.0 0.0 0.0 1.0 0.0 3.0 1
181 | 42.0 1.0 3.0 120.0 240.0 1.0 0.0 194.0 0.0 0.8 3.0 0.0 7.0 1
182 | 56.0 0.0 4.0 134.0 409.0 0.0 2.0 150.0 1.0 1.9 2.0 2.0 7.0 2
183 | 41.0 1.0 4.0 110.0 172.0 0.0 2.0 158.0 0.0 0.0 1.0 0.0 7.0 2
184 | 42.0 0.0 4.0 102.0 265.0 0.0 2.0 122.0 0.0 0.6 2.0 0.0 3.0 1
185 | 53.0 1.0 3.0 130.0 246.0 1.0 2.0 173.0 0.0 0.0 1.0 3.0 3.0 1
186 | 43.0 1.0 3.0 130.0 315.0 0.0 0.0 162.0 0.0 1.9 1.0 1.0 3.0 1
187 | 56.0 1.0 4.0 132.0 184.0 0.0 2.0 105.0 1.0 2.1 2.0 1.0 6.0 2
188 | 52.0 1.0 4.0 108.0 233.0 1.0 0.0 147.0 0.0 0.1 1.0 3.0 7.0 1
189 | 62.0 0.0 4.0 140.0 394.0 0.0 2.0 157.0 0.0 1.2 2.0 0.0 3.0 1
190 | 70.0 1.0 3.0 160.0 269.0 0.0 0.0 112.0 1.0 2.9 2.0 1.0 7.0 2
191 | 54.0 1.0 4.0 140.0 239.0 0.0 0.0 160.0 0.0 1.2 1.0 0.0 3.0 1
192 | 70.0 1.0 4.0 145.0 174.0 0.0 0.0 125.0 1.0 2.6 3.0 0.0 7.0 2
193 | 54.0 1.0 2.0 108.0 309.0 0.0 0.0 156.0 0.0 0.0 1.0 0.0 7.0 1
194 | 35.0 1.0 4.0 126.0 282.0 0.0 2.0 156.0 1.0 0.0 1.0 0.0 7.0 2
195 | 48.0 1.0 3.0 124.0 255.0 1.0 0.0 175.0 0.0 0.0 1.0 2.0 3.0 1
196 | 55.0 0.0 2.0 135.0 250.0 0.0 2.0 161.0 0.0 1.4 2.0 0.0 3.0 1
197 | 58.0 0.0 4.0 100.0 248.0 0.0 2.0 122.0 0.0 1.0 2.0 0.0 3.0 1
198 | 54.0 0.0 3.0 110.0 214.0 0.0 0.0 158.0 0.0 1.6 2.0 0.0 3.0 1
199 | 69.0 0.0 1.0 140.0 239.0 0.0 0.0 151.0 0.0 1.8 1.0 2.0 3.0 1
200 | 77.0 1.0 4.0 125.0 304.0 0.0 2.0 162.0 1.0 0.0 1.0 3.0 3.0 2
201 | 68.0 1.0 3.0 118.0 277.0 0.0 0.0 151.0 0.0 1.0 1.0 1.0 7.0 1
202 | 58.0 1.0 4.0 125.0 300.0 0.0 2.0 171.0 0.0 0.0 1.0 2.0 7.0 2
203 | 60.0 1.0 4.0 125.0 258.0 0.0 2.0 141.0 1.0 2.8 2.0 1.0 7.0 2
204 | 51.0 1.0 4.0 140.0 299.0 0.0 0.0 173.0 1.0 1.6 1.0 0.0 7.0 2
205 | 55.0 1.0 4.0 160.0 289.0 0.0 2.0 145.0 1.0 0.8 2.0 1.0 7.0 2
206 | 52.0 1.0 1.0 152.0 298.0 1.0 0.0 178.0 0.0 1.2 2.0 0.0 7.0 1
207 | 60.0 0.0 3.0 102.0 318.0 0.0 0.0 160.0 0.0 0.0 1.0 1.0 3.0 1
208 | 58.0 1.0 3.0 105.0 240.0 0.0 2.0 154.0 1.0 0.6 2.0 0.0 7.0 1
209 | 64.0 1.0 3.0 125.0 309.0 0.0 0.0 131.0 1.0 1.8 2.0 0.0 7.0 2
210 | 37.0 1.0 3.0 130.0 250.0 0.0 0.0 187.0 0.0 3.5 3.0 0.0 3.0 1
211 | 59.0 1.0 1.0 170.0 288.0 0.0 2.0 159.0 0.0 0.2 2.0 0.0 7.0 2
212 | 51.0 1.0 3.0 125.0 245.0 1.0 2.0 166.0 0.0 2.4 2.0 0.0 3.0 1
213 | 43.0 0.0 3.0 122.0 213.0 0.0 0.0 165.0 0.0 0.2 2.0 0.0 3.0 1
214 | 58.0 1.0 4.0 128.0 216.0 0.0 2.0 131.0 1.0 2.2 2.0 3.0 7.0 2
215 | 29.0 1.0 2.0 130.0 204.0 0.0 2.0 202.0 0.0 0.0 1.0 0.0 3.0 1
216 | 41.0 0.0 2.0 130.0 204.0 0.0 2.0 172.0 0.0 1.4 1.0 0.0 3.0 1
217 | 63.0 0.0 3.0 135.0 252.0 0.0 2.0 172.0 0.0 0.0 1.0 0.0 3.0 1
218 | 51.0 1.0 3.0 94.0 227.0 0.0 0.0 154.0 1.0 0.0 1.0 1.0 7.0 1
219 | 54.0 1.0 3.0 120.0 258.0 0.0 2.0 147.0 0.0 0.4 2.0 0.0 7.0 1
220 | 44.0 1.0 2.0 120.0 220.0 0.0 0.0 170.0 0.0 0.0 1.0 0.0 3.0 1
221 | 54.0 1.0 4.0 110.0 239.0 0.0 0.0 126.0 1.0 2.8 2.0 1.0 7.0 2
222 | 65.0 1.0 4.0 135.0 254.0 0.0 2.0 127.0 0.0 2.8 2.0 1.0 7.0 2
223 | 57.0 1.0 3.0 150.0 168.0 0.0 0.0 174.0 0.0 1.6 1.0 0.0 3.0 1
224 | 63.0 1.0 4.0 130.0 330.0 1.0 2.0 132.0 1.0 1.8 1.0 3.0 7.0 2
225 | 35.0 0.0 4.0 138.0 183.0 0.0 0.0 182.0 0.0 1.4 1.0 0.0 3.0 1
226 | 41.0 1.0 2.0 135.0 203.0 0.0 0.0 132.0 0.0 0.0 2.0 0.0 6.0 1
227 | 62.0 0.0 3.0 130.0 263.0 0.0 0.0 97.0 0.0 1.2 2.0 1.0 7.0 2
228 | 43.0 0.0 4.0 132.0 341.0 1.0 2.0 136.0 1.0 3.0 2.0 0.0 7.0 2
229 | 58.0 0.0 1.0 150.0 283.0 1.0 2.0 162.0 0.0 1.0 1.0 0.0 3.0 1
230 | 52.0 1.0 1.0 118.0 186.0 0.0 2.0 190.0 0.0 0.0 2.0 0.0 6.0 1
231 | 61.0 0.0 4.0 145.0 307.0 0.0 2.0 146.0 1.0 1.0 2.0 0.0 7.0 2
232 | 39.0 1.0 4.0 118.0 219.0 0.0 0.0 140.0 0.0 1.2 2.0 0.0 7.0 2
233 | 45.0 1.0 4.0 115.0 260.0 0.0 2.0 185.0 0.0 0.0 1.0 0.0 3.0 1
234 | 52.0 1.0 4.0 128.0 255.0 0.0 0.0 161.0 1.0 0.0 1.0 1.0 7.0 2
235 | 62.0 1.0 3.0 130.0 231.0 0.0 0.0 146.0 0.0 1.8 2.0 3.0 7.0 1
236 | 62.0 0.0 4.0 160.0 164.0 0.0 2.0 145.0 0.0 6.2 3.0 3.0 7.0 2
237 | 53.0 0.0 4.0 138.0 234.0 0.0 2.0 160.0 0.0 0.0 1.0 0.0 3.0 1
238 | 43.0 1.0 4.0 120.0 177.0 0.0 2.0 120.0 1.0 2.5 2.0 0.0 7.0 2
239 | 47.0 1.0 3.0 138.0 257.0 0.0 2.0 156.0 0.0 0.0 1.0 0.0 3.0 1
240 | 52.0 1.0 2.0 120.0 325.0 0.0 0.0 172.0 0.0 0.2 1.0 0.0 3.0 1
241 | 68.0 1.0 3.0 180.0 274.0 1.0 2.0 150.0 1.0 1.6 2.0 0.0 7.0 2
242 | 39.0 1.0 3.0 140.0 321.0 0.0 2.0 182.0 0.0 0.0 1.0 0.0 3.0 1
243 | 53.0 0.0 4.0 130.0 264.0 0.0 2.0 143.0 0.0 0.4 2.0 0.0 3.0 1
244 | 62.0 0.0 4.0 140.0 268.0 0.0 2.0 160.0 0.0 3.6 3.0 2.0 3.0 2
245 | 51.0 0.0 3.0 140.0 308.0 0.0 2.0 142.0 0.0 1.5 1.0 1.0 3.0 1
246 | 60.0 1.0 4.0 130.0 253.0 0.0 0.0 144.0 1.0 1.4 1.0 1.0 7.0 2
247 | 65.0 1.0 4.0 110.0 248.0 0.0 2.0 158.0 0.0 0.6 1.0 2.0 6.0 2
248 | 65.0 0.0 3.0 155.0 269.0 0.0 0.0 148.0 0.0 0.8 1.0 0.0 3.0 1
249 | 60.0 1.0 3.0 140.0 185.0 0.0 2.0 155.0 0.0 3.0 2.0 0.0 3.0 2
250 | 60.0 1.0 4.0 145.0 282.0 0.0 2.0 142.0 1.0 2.8 2.0 2.0 7.0 2
251 | 54.0 1.0 4.0 120.0 188.0 0.0 0.0 113.0 0.0 1.4 2.0 1.0 7.0 2
252 | 44.0 1.0 2.0 130.0 219.0 0.0 2.0 188.0 0.0 0.0 1.0 0.0 3.0 1
253 | 44.0 1.0 4.0 112.0 290.0 0.0 2.0 153.0 0.0 0.0 1.0 1.0 3.0 2
254 | 51.0 1.0 3.0 110.0 175.0 0.0 0.0 123.0 0.0 0.6 1.0 0.0 3.0 1
255 | 59.0 1.0 3.0 150.0 212.0 1.0 0.0 157.0 0.0 1.6 1.0 0.0 3.0 1
256 | 71.0 0.0 2.0 160.0 302.0 0.0 0.0 162.0 0.0 0.4 1.0 2.0 3.0 1
257 | 61.0 1.0 3.0 150.0 243.0 1.0 0.0 137.0 1.0 1.0 2.0 0.0 3.0 1
258 | 55.0 1.0 4.0 132.0 353.0 0.0 0.0 132.0 1.0 1.2 2.0 1.0 7.0 2
259 | 64.0 1.0 3.0 140.0 335.0 0.0 0.0 158.0 0.0 0.0 1.0 0.0 3.0 2
260 | 43.0 1.0 4.0 150.0 247.0 0.0 0.0 171.0 0.0 1.5 1.0 0.0 3.0 1
261 | 58.0 0.0 3.0 120.0 340.0 0.0 0.0 172.0 0.0 0.0 1.0 0.0 3.0 1
262 | 60.0 1.0 4.0 130.0 206.0 0.0 2.0 132.0 1.0 2.4 2.0 2.0 7.0 2
263 | 58.0 1.0 2.0 120.0 284.0 0.0 2.0 160.0 0.0 1.8 2.0 0.0 3.0 2
264 | 49.0 1.0 2.0 130.0 266.0 0.0 0.0 171.0 0.0 0.6 1.0 0.0 3.0 1
265 | 48.0 1.0 2.0 110.0 229.0 0.0 0.0 168.0 0.0 1.0 3.0 0.0 7.0 2
266 | 52.0 1.0 3.0 172.0 199.0 1.0 0.0 162.0 0.0 0.5 1.0 0.0 7.0 1
267 | 44.0 1.0 2.0 120.0 263.0 0.0 0.0 173.0 0.0 0.0 1.0 0.0 7.0 1
268 | 56.0 0.0 2.0 140.0 294.0 0.0 2.0 153.0 0.0 1.3 2.0 0.0 3.0 1
269 | 57.0 1.0 4.0 140.0 192.0 0.0 0.0 148.0 0.0 0.4 2.0 0.0 6.0 1
270 | 67.0 1.0 4.0 160.0 286.0 0.0 2.0 108.0 1.0 1.5 2.0 3.0 3.0 2
271 |
--------------------------------------------------------------------------------
/data/labor.arff:
--------------------------------------------------------------------------------
1 | % Date: Tue, 15 Nov 88 15:44:08 EST
2 | % From: stan
3 | % To: aha@ICS.UCI.EDU
4 | %
5 | % 1. Title: Final settlements in labor negotitions in Canadian industry
6 | %
7 | % 2. Source Information
8 | % -- Creators: Collective Barganing Review, montly publication,
9 | % Labour Canada, Industrial Relations Information Service,
10 | % Ottawa, Ontario, K1A 0J2, Canada, (819) 997-3117
11 | % The data includes all collective agreements reached
12 | % in the business and personal services sector for locals
13 | % with at least 500 members (teachers, nurses, university
14 | % staff, police, etc) in Canada in 87 and first quarter of 88.
15 | % -- Donor: Stan Matwin, Computer Science Dept, University of Ottawa,
16 | % 34 Somerset East, K1N 9B4, (stan@uotcsi2.bitnet)
17 | % -- Date: November 1988
18 | %
19 | % 3. Past Usage:
20 | % -- testing concept learning software, in particular
21 | % an experimental method to learn two-tiered concept descriptions.
22 | % The data was used to learn the description of an acceptable
23 | % and unacceptable contract.
24 | % The unacceptable contracts were either obtained by interviewing
25 | % experts, or by inventing near misses.
26 | % Examples of use are described in:
27 | % Bergadano, F., Matwin, S., Michalski, R.,
28 | % Zhang, J., Measuring Quality of Concept Descriptions,
29 | % Procs. of the 3rd European Working Sessions on Learning,
30 | % Glasgow, October 1988.
31 | % Bergadano, F., Matwin, S., Michalski, R., Zhang, J.,
32 | % Representing and Acquiring Imprecise and Context-dependent
33 | % Concepts in Knowledge-based Systems, Procs. of ISMIS'88,
34 | % North Holland, 1988.
35 | % 4. Relevant Information:
36 | % -- data was used to test 2tier approach with learning
37 | % from positive and negative examples
38 | %
39 | % 5. Number of Instances: 57
40 | %
41 | % 6. Number of Attributes: 16
42 | %
43 | % 7. Attribute Information:
44 | % 1. dur: duration of agreement
45 | % [1..7]
46 | % 2 wage1.wage : wage increase in first year of contract
47 | % [2.0 .. 7.0]
48 | % 3 wage2.wage : wage increase in second year of contract
49 | % [2.0 .. 7.0]
50 | % 4 wage3.wage : wage increase in third year of contract
51 | % [2.0 .. 7.0]
52 | % 5 cola : cost of living allowance
53 | % [none, tcf, tc]
54 | % 6 hours.hrs : number of working hours during week
55 | % [35 .. 40]
56 | % 7 pension : employer contributions to pension plan
57 | % [none, ret_allw, empl_contr]
58 | % 8 stby_pay : standby pay
59 | % [2 .. 25]
60 | % 9 shift_diff : shift differencial : supplement for work on II and III shift
61 | % [1 .. 25]
62 | % 10 educ_allw.boolean : education allowance
63 | % [true false]
64 | % 11 holidays : number of statutory holidays
65 | % [9 .. 15]
66 | % 12 vacation : number of paid vacation days
67 | % [ba, avg, gnr]
68 | % 13 lngtrm_disabil.boolean :
69 | % employer's help during employee longterm disabil
70 | % ity [true , false]
71 | % 14 dntl_ins : employers contribution towards the dental plan
72 | % [none, half, full]
73 | % 15 bereavement.boolean : employer's financial contribution towards the
74 | % covering the costs of bereavement
75 | % [true , false]
76 | % 16 empl_hplan : employer's contribution towards the health plan
77 | % [none, half, full]
78 | %
79 | % 8. Missing Attribute Values: None
80 | %
81 | % 9. Class Distribution:
82 | %
83 | % 10. Exceptions from format instructions: no commas between attribute values.
84 | %
85 | %
86 | @relation labor
87 | @attribute 'duration' real
88 | @attribute 'wage-increase-first-year' real
89 | @attribute 'wage-increase-second-year' real
90 | @attribute 'wage-increase-third-year' real
91 | @attribute 'cost-of-living-adjustment' {'none','tcf','tc'}
92 | @attribute 'working-hours' real
93 | @attribute 'pension' {'none','ret_allw','empl_contr'}
94 | @attribute 'standby-pay' real
95 | @attribute 'shift-differential' real
96 | @attribute 'education-allowance' {'yes','no'}
97 | @attribute 'statutory-holidays' real
98 | @attribute 'vacation' {'below_average','average','generous'}
99 | @attribute 'longterm-disability-assistance' {'yes','no'}
100 | @attribute 'contribution-to-dental-plan' {'none','half','full'}
101 | @attribute 'bereavement-assistance' {'yes','no'}
102 | @attribute 'contribution-to-health-plan' {'none','half','full'}
103 | @attribute 'class' {'bad','good'}
104 | @data
105 | 1,5,?,?,?,40,?,?,2,?,11,'average',?,?,'yes',?,'good'
106 | 2,4.5,5.8,?,?,35,'ret_allw',?,?,'yes',11,'below_average',?,'full',?,'full','good'
107 | ?,?,?,?,?,38,'empl_contr',?,5,?,11,'generous','yes','half','yes','half','good'
108 | 3,3.7,4,5,'tc',?,?,?,?,'yes',?,?,?,?,'yes',?,'good'
109 | 3,4.5,4.5,5,?,40,?,?,?,?,12,'average',?,'half','yes','half','good'
110 | 2,2,2.5,?,?,35,?,?,6,'yes',12,'average',?,?,?,?,'good'
111 | 3,4,5,5,'tc',?,'empl_contr',?,?,?,12,'generous','yes','none','yes','half','good'
112 | 3,6.9,4.8,2.3,?,40,?,?,3,?,12,'below_average',?,?,?,?,'good'
113 | 2,3,7,?,?,38,?,12,25,'yes',11,'below_average','yes','half','yes',?,'good'
114 | 1,5.7,?,?,'none',40,'empl_contr',?,4,?,11,'generous','yes','full',?,?,'good'
115 | 3,3.5,4,4.6,'none',36,?,?,3,?,13,'generous',?,?,'yes','full','good'
116 | 2,6.4,6.4,?,?,38,?,?,4,?,15,?,?,'full',?,?,'good'
117 | 2,3.5,4,?,'none',40,?,?,2,'no',10,'below_average','no','half',?,'half','bad'
118 | 3,3.5,4,5.1,'tcf',37,?,?,4,?,13,'generous',?,'full','yes','full','good'
119 | 1,3,?,?,'none',36,?,?,10,'no',11,'generous',?,?,?,?,'good'
120 | 2,4.5,4,?,'none',37,'empl_contr',?,?,?,11,'average',?,'full','yes',?,'good'
121 | 1,2.8,?,?,?,35,?,?,2,?,12,'below_average',?,?,?,?,'good'
122 | 1,2.1,?,?,'tc',40,'ret_allw',2,3,'no',9,'below_average','yes','half',?,'none','bad'
123 | 1,2,?,?,'none',38,'none',?,?,'yes',11,'average','no','none','no','none','bad'
124 | 2,4,5,?,'tcf',35,?,13,5,?,15,'generous',?,?,?,?,'good'
125 | 2,4.3,4.4,?,?,38,?,?,4,?,12,'generous',?,'full',?,'full','good'
126 | 2,2.5,3,?,?,40,'none',?,?,?,11,'below_average',?,?,?,?,'bad'
127 | 3,3.5,4,4.6,'tcf',27,?,?,?,?,?,?,?,?,?,?,'good'
128 | 2,4.5,4,?,?,40,?,?,4,?,10,'generous',?,'half',?,'full','good'
129 | 1,6,?,?,?,38,?,8,3,?,9,'generous',?,?,?,?,'good'
130 | 3,2,2,2,'none',40,'none',?,?,?,10,'below_average',?,'half','yes','full','bad'
131 | 2,4.5,4.5,?,'tcf',?,?,?,?,'yes',10,'below_average','yes','none',?,'half','good'
132 | 2,3,3,?,'none',33,?,?,?,'yes',12,'generous',?,?,'yes','full','good'
133 | 2,5,4,?,'none',37,?,?,5,'no',11,'below_average','yes','full','yes','full','good'
134 | 3,2,2.5,?,?,35,'none',?,?,?,10,'average',?,?,'yes','full','bad'
135 | 3,4.5,4.5,5,'none',40,?,?,?,'no',11,'average',?,'half',?,?,'good'
136 | 3,3,2,2.5,'tc',40,'none',?,5,'no',10,'below_average','yes','half','yes','full','bad'
137 | 2,2.5,2.5,?,?,38,'empl_contr',?,?,?,10,'average',?,?,?,?,'bad'
138 | 2,4,5,?,'none',40,'none',?,3,'no',10,'below_average','no','none',?,'none','bad'
139 | 3,2,2.5,2.1,'tc',40,'none',2,1,'no',10,'below_average','no','half','yes','full','bad'
140 | 2,2,2,?,'none',40,'none',?,?,'no',11,'average','yes','none','yes','full','bad'
141 | 1,2,?,?,'tc',40,'ret_allw',4,0,'no',11,'generous','no','none','no','none','bad'
142 | 1,2.8,?,?,'none',38,'empl_contr',2,3,'no',9,'below_average','yes','half',?,'none','bad'
143 | 3,2,2.5,2,?,37,'empl_contr',?,?,?,10,'average',?,?,'yes','none','bad'
144 | 2,4.5,4,?,'none',40,?,?,4,?,12,'average','yes','full','yes','half','good'
145 | 1,4,?,?,'none',?,'none',?,?,'yes',11,'average','no','none','no','none','bad'
146 | 2,2,3,?,'none',38,'empl_contr',?,?,'yes',12,'generous','yes','none','yes','full','bad'
147 | 2,2.5,2.5,?,'tc',39,'empl_contr',?,?,?,12,'average',?,?,'yes',?,'bad'
148 | 2,2.5,3,?,'tcf',40,'none',?,?,?,11,'below_average',?,?,'yes',?,'bad'
149 | 2,4,4,?,'none',40,'none',?,3,?,10,'below_average','no','none',?,'none','bad'
150 | 2,4.5,4,?,?,40,?,?,2,'no',10,'below_average','no','half',?,'half','bad'
151 | 2,4.5,4,?,'none',40,?,?,5,?,11,'average',?,'full','yes','full','good'
152 | 2,4.6,4.6,?,'tcf',38,?,?,?,?,?,?,'yes','half',?,'half','good'
153 | 2,5,4.5,?,'none',38,?,14,5,?,11,'below_average','yes',?,?,'full','good'
154 | 2,5.7,4.5,?,'none',40,'ret_allw',?,?,?,11,'average','yes','full','yes','full','good'
155 | 2,7,5.3,?,?,?,?,?,?,?,11,?,'yes','full',?,?,'good'
156 | 3,2,3,?,'tcf',?,'empl_contr',?,?,'yes',?,?,'yes','half','yes',?,'good'
157 | 3,3.5,4,4.5,'tcf',35,?,?,?,?,13,'generous',?,?,'yes','full','good'
158 | 3,4,3.5,?,'none',40,'empl_contr',?,6,?,11,'average','yes','full',?,'full','good'
159 | 3,5,4.4,?,'none',38,'empl_contr',10,6,?,11,'generous','yes',?,?,'full','good'
160 | 3,5,5,5,?,40,?,?,?,?,12,'average',?,'half','yes','half','good'
161 | 3,6,6,4,?,35,?,?,14,?,9,'generous','yes','full','yes','full','good'
162 | %
163 | %
164 | %
165 |
--------------------------------------------------------------------------------
/data/load_all_datasets.py:
--------------------------------------------------------------------------------
1 | """
2 | Uses python introspection to call all function in `data.load_datasets`
3 |
4 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent.
5 | """
6 |
7 | import data.load_datasets
8 | from inspect import getmembers, isfunction
9 |
10 |
11 | def load_all_datasets():
12 | """
13 | Uses python introspection to call all function in `data.load_datasets`
14 |
15 | **Returns**
16 | -----------
17 | a list of loaded datasets
18 | """
19 | datasets = []
20 | for o in getmembers(data.load_datasets):
21 | if isfunction(o[1]):
22 | df, feature_cols, label_col, name = o[1]()
23 | datasets.append({'dataframe': df, 'feature_cols': feature_cols, 'label_col': label_col, 'name': name})
24 |
25 | return datasets
--------------------------------------------------------------------------------
/data/load_datasets.py:
--------------------------------------------------------------------------------
1 | """Contains data set loading functions. If you want the test script to include a new dataset, a new function must
2 | be written in this module that returns a pandas Dataframe, the feature column names, the label column name and the
3 | dataset name.
4 |
5 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent.
6 | """
7 |
8 | from sklearn import datasets
9 |
10 | import pandas as pd
11 | import numpy as np
12 | import os
13 |
14 |
15 | # def load_wine():
16 | # columns = ['Class', 'Alcohol', 'Acid', 'Ash', 'Alcalinity', 'Magnesium', 'Phenols', 'Flavanoids', 'Nonflavanoids',
17 | # 'Proanthocyanins', 'Color', 'Hue', 'Diluted', 'Proline']
18 | # features = ['Alcohol', 'Acid', 'Ash', 'Alcalinity', 'Magnesium', 'Phenols', 'Flavanoids', 'Nonflavanoids',
19 | # 'Proanthocyanins', 'Color', 'Hue', 'Diluted', 'Proline']
20 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'wine.data'))
21 | # df.columns = columns
22 | # df['Class'] = np.subtract(df['Class'], 1)
23 | #
24 | # return df, features, 'Class', 'wine'
25 | #
26 | #
27 | # def load_cars():
28 | # columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'Class']
29 | # features = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety']
30 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'car.data'))
31 | # df.columns = columns
32 | # df = df.reindex(np.random.permutation(df.index)).reset_index(drop=1)
33 | #
34 | # mapping_buy_maint = {'low': 0, 'med': 1, 'high': 2, 'vhigh': 3}
35 | # mapping_doors = {'2': 0, '3': 1, '4': 2, '5more': 3}
36 | # mapping_persons = {'2': 0, '4': 1, 'more': 2}
37 | # mapping_lug = {'small': 0, 'med': 1, 'big': 2}
38 | # mapping_safety = {'low': 0, 'med': 1, 'high': 2}
39 | # mapping_class = {'unacc': 0, 'acc': 1, 'good': 2, 'vgood': 3}
40 | #
41 | # df['maint'] = df['maint'].map(mapping_buy_maint)
42 | # df['buying'] = df['buying'].map(mapping_buy_maint)
43 | # df['doors'] = df['doors'].map(mapping_doors)
44 | # df['persons'] = df['persons'].map(mapping_persons)
45 | # df['lug_boot'] = df['lug_boot'].map(mapping_lug)
46 | # df['safety'] = df['safety'].map(mapping_safety)
47 | # df['Class'] = df['Class'].map(mapping_class).astype(int)
48 | #
49 | # return df, features, 'Class', 'cars'
50 | #
51 | #
52 | # def load_wisconsin_breast_cancer():
53 | # columns = ['ID', 'ClumpThickness', 'CellSizeUniform', 'CellShapeUniform', 'MargAdhesion', 'EpithCellSize', 'BareNuclei',
54 | # 'BlandChromatin', 'NormalNuclei', 'Mitoses', 'Class']
55 | # features = ['ClumpThickness', 'CellSizeUniform', 'CellShapeUniform', 'MargAdhesion', 'EpithCellSize', 'BareNuclei',
56 | # 'BlandChromatin', 'NormalNuclei', 'Mitoses']
57 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'breast-cancer-wisconsin.data'))
58 | # df.columns = columns
59 | # df['Class'] = np.subtract(np.divide(df['Class'], 2), 1)
60 | # df = df.drop('ID', axis=1).reset_index(drop=True)
61 | # df['BareNuclei'] = df['BareNuclei'].replace('?', int(np.mean(df['BareNuclei'][df['BareNuclei'] != '?'].map(int))))
62 | # df = df.applymap(int)
63 | #
64 | # return df, features, 'Class', 'wisconsinBreast'
65 | from sklearn.preprocessing import LabelEncoder
66 |
67 |
68 | # def load_heart():
69 | # columns = ['age', 'sex', 'chest pain type', 'resting blood pressure', 'serum cholestoral', 'fasting blood sugar', \
70 | # 'resting electrocardio', 'max heartrate', 'exercise induced', 'oldpeak', 'slope peak', \
71 | # 'vessels', 'thal', 'Class']
72 | # features = ['age', 'sex', 'chest pain type', 'resting blood pressure', 'serum cholestoral', 'fasting blood sugar', \
73 | # 'resting electrocardio', 'max heartrate', 'exercise induced', 'oldpeak', 'slope peak', \
74 | # 'vessels', 'thal']
75 | #
76 | # columns_copy = []
77 | # for column in columns:
78 | # column=column[:10]
79 | # columns_copy.append(column)
80 | # columns = columns_copy
81 | #
82 | # features_copy = []
83 | # for feature in features:
84 | # feature=feature[:10]
85 | # features_copy.append(feature)
86 | # features=features_copy
87 | #
88 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'heart.dat'), sep=' ')
89 | # df.columns = columns
90 | # df['Class'] = np.subtract(df['Class'], 1)
91 | # return df, features, 'Class', 'heart'
92 |
93 |
94 | # def load_glass():
95 | # columns = ['id', 'RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe', 'Class']
96 | # features = ['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe']
97 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'glass.data'))
98 | # df.columns = columns
99 | # df = df.drop('id', axis=1).reset_index(drop=True)
100 | # df['Class'] = np.subtract(df['Class'], 1)
101 | # df = df[df['Class'] != 3]
102 | # df['Class'] = df['Class'].map({0:0, 1:1, 2:2, 4: 3, 5: 4, 6: 5}).astype(int)
103 | # return df, features, 'Class', 'glass'
104 | #
105 | #
106 | # def load_austra():
107 | # columns = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','Class']
108 | # features = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14']
109 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'austra.data'))
110 | # df.columns = columns
111 | # df['Class'] = df['Class'].map({'y0': 0, 'y1': 1}).astype(int)
112 | # return df, features, 'Class', 'austra'
113 | #
114 | #
115 | # def load_led7():
116 | # columns = ['X1','X2','X3','X4','X5','X6','X7','Class']
117 | # features = ['X1','X2','X3','X4','X5','X6','X7']
118 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'led7.data'))
119 | # df.columns = columns
120 | # df['Class'] = df['Class'].map({'y0': 0, 'y1': 1, 'y2': 2, 'y3': 3, 'y4': 4, 'y5': 5, 'y6': 6,
121 | # 'y7': 7, 'y8': 8, 'y9': 9}).astype(int)
122 | # df = df[df['Class'] < 8]
123 | # return df, features, 'Class', 'led7'
124 | #
125 | #
126 | # def load_lymph():
127 | # columns = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18','Class']
128 | # features = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18']
129 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'lymph.data'))
130 | # df.columns = columns
131 | # df = df[df['Class'] != 'y1']
132 | # df = df[df['Class'] != 'y4']
133 | # df['Class'] = df['Class'].map({'y2': 0, 'y3': 1}).astype(int)
134 | # return df, features, 'Class', 'lymph'
135 | #
136 | #
137 | # def load_pima():
138 | # columns = ['X1','X2','X3','X4','X5','X6','X7','X8','Class']
139 | # features = ['X1','X2','X3','X4','X5','X6','X7','X8']
140 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'pima.data'))
141 | # df.columns = columns
142 | # df['Class'] = df['Class'].map({'y0': 0, 'y1': 1}).astype(int)
143 | # return df, features, 'Class', 'pima'
144 | #
145 | #
146 | # def load_vehicle():
147 | # columns = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18','Class']
148 | # features = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18']
149 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'vehicle.data'))
150 | # df.columns = columns
151 | # df['Class'] = df['Class'].map({'y1': 0, 'y2': 1, 'y3': 2, 'y4': 3}).astype(int)
152 | # return df, features, 'Class', 'vehicle'
153 | #
154 | #
155 | # def load_iris():
156 | # iris = datasets.load_iris()
157 | # df = pd.DataFrame(iris.data)
158 | # features = ["SepalLength", "SepalWidth", "PetalLength", "PetalWidth"]
159 | # df.columns = features
160 | # df['Class'] = iris.target
161 | #
162 | # return df, features, 'Class', 'iris'
163 | #
164 | #
165 | # def load_ecoli():
166 | # columns = ['name', 'mcg', 'gvh', 'lip', 'chg', 'aac', 'alm1', 'alm2', 'Class']
167 | # features = ['mcg', 'gvh', 'lip', 'chg', 'aac', 'alm1', 'alm2']
168 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'ecoli.data'), delim_whitespace=True, header=0)
169 | # df.columns = columns
170 | # df = df.drop('name', axis=1).reset_index(drop=True)
171 | # mapping_class = {'cp': 0, 'im': 1, 'pp': 2, 'imU': 3, 'om': 4, 'omL': 5, 'imL': 6, 'imS': 7}
172 | # df['Class'] = df['Class'].map(mapping_class).astype(int)
173 | # df = df[df['Class'] < 5]
174 | # return df, features, 'Class', 'ecoli'
175 | #
176 | #
177 | # def load_yeast():
178 | # columns = ['name', 'mcg', 'gvh', 'alm', 'mit', 'erl', 'pox', 'vac', 'nuc', 'Class']
179 | # features = ['mcg', 'gvh', 'alm', 'mit', 'erl', 'pox', 'vac', 'nuc']
180 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'yeast.data'), delim_whitespace=True, header=0)
181 | # df.columns = columns
182 | # df = df.drop('name', axis=1).reset_index(drop=True)
183 | # mapping_class = {'CYT': 0, 'NUC': 1, 'MIT': 2, 'ME3': 3, 'ME2': 4, 'ME1': 5, 'EXC': 6, 'VAC': 7, 'POX': 8, 'ERL': 9}
184 | # df['Class'] = df['Class'].map(mapping_class)
185 | # df = df[df['Class'] < 8]
186 | # return df, features, 'Class', 'yeast'
187 | #
188 | #
189 | # def load_waveform():
190 | # columns = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18','X19','X20','X21','Class']
191 | # features = ['X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18','X19','X20','X21']
192 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'waveform.data'))
193 | # df.columns = columns
194 | # df['Class'] = df['Class'].map({'y0': 0, 'y1': 1, 'y2': 2}).astype(int)
195 | # return df, features, 'Class', 'waveform'
196 | #
197 | #
198 | # def load_magic():
199 | # columns = ['fLength', 'fWidth', 'fSize', 'fConc', 'fConc1', 'fAsym', 'fM3Long', 'fM3Trans', 'fAlpha', 'fDist', 'Class']
200 | # features = ['fLength', 'fWidth', 'fSize', 'fConc', 'fConc1', 'fAsym', 'fM3Long', 'fM3Trans', 'fAlpha', 'fDist']
201 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'magic04.data'))
202 | # df.columns = columns
203 | # for feature in features:
204 | # if np.min(df[feature]) < 0:
205 | # df[feature] += np.min(df[feature]) * (-1)
206 | # mapping_class = {'g': 0, 'h': 1}
207 | # df['Class'] = df['Class'].map(mapping_class).astype(int)
208 | # return df, features, 'Class', 'magic'
209 | #
210 | #
211 | # def load_shuttle():
212 | # columns = ['feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6', 'feature7', 'feature8',
213 | # 'feature9', 'Class']
214 | # features = ['feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6', 'feature7', 'feature8',
215 | # 'feature9']
216 | #
217 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'shuttle.tst'), sep=' ')
218 | # df.columns = columns
219 | # for feature in features:
220 | # if np.min(df[feature]) < 0:
221 | # df[feature] += np.min(df[feature]) * (-1)
222 | # df = df[df['Class'] < 6]
223 | # df['Class'] = np.subtract(df['Class'], 1)
224 | # df = df.reset_index(drop=True)
225 | #
226 | # return df, features, 'Class', 'shuttle'
227 | #
228 | #
229 | # def load_shuttle_full():
230 | # columns = ['feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6', 'feature7', 'feature8',
231 | # 'feature9', 'Class']
232 | # features = ['feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6', 'feature7', 'feature8',
233 | # 'feature9']
234 | #
235 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'shuttle_full.trn'), sep=' ')
236 | # df.columns = columns
237 | # for feature in features:
238 | # if np.min(df[feature]) < 0:
239 | # df[feature] += np.min(df[feature]) * (-1)
240 | # df = df[df['Class'] < 6]
241 | # df['Class'] = np.subtract(df['Class'], 1)
242 | # df = df.reset_index(drop=True)
243 | #
244 | # return df, features, 'Class', 'shuttleFull'
245 | #
246 | #
247 | # def load_nursery():
248 | # columns = ['parents', 'has_nurs', 'form', 'children', 'housing', 'finance', 'social', 'health', 'Class']
249 | # features = ['parents', 'has_nurs', 'form', 'children', 'housing', 'finance', 'social', 'health']
250 | #
251 | # mapping_parents = {'usual': 0, 'pretentious': 1, 'great_pret': 2}
252 | # mapping_has_nurs = {'proper': 0, 'less_proper': 1, 'improper': 2, 'critical': 3, 'very_crit': 4}
253 | # mapping_form = {'complete': 0, 'completed': 1, 'incomplete': 2, 'foster': 3}
254 | # mapping_housing = {'convenient': 0, 'less_conv': 1, 'critical': 2}
255 | # mapping_finance = {'convenient': 0, 'inconv': 1}
256 | # mapping_social = {'nonprob': 0, 'slightly_prob': 1, 'problematic': 2}
257 | # mapping_health = {'recommended': 0, 'priority': 1, 'not_recom': 2}
258 | # mapping_class = {'not_recom': 1, 'recommend': 0, 'very_recom': 2, 'priority': 3, 'spec_prior': 4}
259 | #
260 | # df = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]), 'nursery.data'), sep=',')
261 | # df = df.dropna()
262 | # df.columns = columns
263 | #
264 | # df['parents'] = df['parents'].map(mapping_parents)
265 | # df['has_nurs'] = df['has_nurs'].map(mapping_has_nurs)
266 | # df['form'] = df['form'].map(mapping_form)
267 | # df['children'] = df['children'].map(lambda x: 4 if x == 'more' else int(x))
268 | # df['housing'] = df['housing'].map(mapping_housing)
269 | # df['finance'] = df['finance'].map(mapping_finance)
270 | # df['social'] = df['social'].map(mapping_social)
271 | # df['health'] = df['health'].map(mapping_health)
272 | # df['Class'] = df['Class'].map(mapping_class)
273 | #
274 | # df = df[df['Class'] != 0]
275 | # df['Class'] = np.subtract(df['Class'], 1)
276 | # df = df.reset_index(drop=True)
277 | #
278 | # return df, features, 'Class', 'nursery'
279 |
280 | # def load_aa_gent():
281 | # label_col = 'RPE'
282 | # # 'H5060', 'H6070', 'Variabele A',
283 | # feature_cols = ['S1', 'S2', 'S3', 'S4', 'S5', 'H7080', 'H8090', 'H90100', 'H5060', 'H6070', 'Idnummer',
284 | # 'Aantal sprints', 'Gemiddelde snelheid (m/s)', 'Totaal tijd (s)', 'Totaal afstand (m)',# 'Variabele A', 'Variabele B',
285 | # 'Temperature', 'Humidity', 'Windspeed', 'Visibility', 'Weather Type', 'Variabele B']#, 'overall', 'phy', 'pac']
286 | # #, 'ID', 'temperature', 'humidity', 'windspeed', 'visibility', 'weather_type']
287 | # cols = feature_cols + [label_col] + ['Datum']
288 | # df = pd.read_csv('aa_gent_with_player_features.csv')
289 | # df = df[cols]
290 | # df['Snelheid'] = df['Gemiddelde snelheid (m/s)'] # Kan evt weggelaten worden?
291 | # df['Variabele B'] = df['Variabele B'].fillna(df['Variabele B'].mean())
292 | # df['Tijd'] = df['Totaal tijd (s)']
293 | # df['Afstand'] = df['Totaal afstand (m)']
294 | # df = df.drop(['Gemiddelde snelheid (m/s)', 'Totaal tijd (s)', 'Totaal afstand (m)'], axis=1)
295 | # print df.head(5)
296 | # df[label_col] = df[label_col].map({1: 2, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 9}).astype(int)
297 | # df = df.drop(['Datum'], axis=1)
298 | # # df = df.drop(['temperature', 'humidity', 'windspeed', 'visibility', 'weather_type'], axis=1)
299 | # df = pd.get_dummies(df, columns=['Idnummer'])
300 | # #df = pd.get_dummies(df, columns=['Weather Type'])
301 | # feature_cols = list(df.columns)
302 | # feature_cols.remove('RPE')
303 | # print feature_cols
304 | # return df, feature_cols, label_col, 'AA Gent'
305 | # df = pd.read_csv('data/aa_gent.csv', sep=";")
306 | #
307 | # label_col = 'RPE'
308 | # feature_cols = ['S1', 'S2', 'S3', 'S4', 'S5', 'HF-zone 80-90',
309 | # 'HF-zone 70-80', 'HF-zone 90-100', 'Aantal sprints',
310 | # 'Gem v', 'Tijd (s)', 'Afstand']
311 | # print df[label_col].value_counts()
312 | # df = df[feature_cols + [label_col]]
313 | # df[label_col] = df[label_col].map({1: 2, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 9}).astype(int)
314 | # return df, feature_cols, label_col, 'AA Gent'
315 |
316 |
317 | def load_migbase():
318 | migbase = pd.read_csv(os.path.join(os.sep.join(os.path.realpath(__file__).split(os.sep)[:-1]),
319 | 'migbase_noise20.csv'), sep=',')
320 | if 'Unnamed: 0' in migbase.columns:
321 | migbase = migbase.drop('Unnamed: 0', axis=1)
322 | #encoders = {}
323 | col_mapping = {}
324 | for col in migbase:
325 | # encoders[col] = LabelEncoder()
326 | # migbase[col] = encoders[col].fit_transform(migbase[col])
327 | col_mapping[col] = col[:10]
328 |
329 | migbase = migbase.rename(index=str, columns=col_mapping)
330 |
331 | feature_cols = list(migbase.columns)
332 | feature_cols.remove('CLASS')
333 |
334 | return migbase, feature_cols, 'CLASS', 'migbase'
335 |
--------------------------------------------------------------------------------
/data/lymph.data:
--------------------------------------------------------------------------------
1 | X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16,X17,X18,Y
2 | 3,2,1,1,2,2,1,2,1,2,2,4,3,5,2,2,2,4,y3
3 | 2,1,1,1,1,1,1,2,1,3,2,2,2,8,3,2,2,5,y3
4 | 2,2,1,1,1,1,1,2,1,3,2,2,2,8,3,1,2,5,y3
5 | 4,2,1,1,1,2,1,2,1,4,3,3,3,7,3,2,2,3,y3
6 | 2,2,2,1,2,2,2,2,1,3,3,4,4,5,3,2,2,5,y3
7 | 2,2,1,1,1,2,1,2,1,4,3,4,4,8,3,2,2,7,y3
8 | 4,1,1,1,1,1,1,2,1,4,2,2,4,7,3,2,2,7,y3
9 | 4,2,1,1,2,2,1,2,1,4,3,4,3,4,2,2,2,2,y2
10 | 3,1,1,1,1,1,2,1,1,2,2,4,3,5,2,1,2,1,y2
11 | 4,1,1,1,1,1,1,2,1,3,3,2,2,4,2,2,2,2,y2
12 | 2,2,1,1,1,1,1,2,1,2,2,2,3,8,3,1,2,1,y2
13 | 3,2,1,1,1,2,1,1,1,2,2,4,3,4,1,2,2,2,y2
14 | 2,2,1,1,2,2,1,1,1,2,2,4,2,8,3,2,2,1,y2
15 | 2,2,2,1,2,2,1,2,1,3,2,4,3,5,1,2,2,3,y2
16 | 2,2,1,1,2,2,1,1,1,3,3,4,3,4,3,1,1,1,y2
17 | 3,1,1,1,1,1,1,1,1,2,2,2,2,5,1,1,2,2,y3
18 | 3,1,1,1,1,2,1,2,1,4,2,4,4,2,3,2,2,3,y3
19 | 4,1,1,1,1,2,1,2,1,4,3,4,2,5,3,2,2,2,y3
20 | 2,1,1,1,1,1,1,2,1,3,2,3,3,8,3,2,2,3,y3
21 | 2,1,1,1,1,1,1,2,1,2,2,4,2,8,3,2,2,3,y3
22 | 3,1,1,1,1,1,1,2,2,1,2,2,2,8,3,1,2,8,y3
23 | 3,2,2,1,2,1,1,2,1,2,2,3,3,8,3,1,2,1,y2
24 | 4,2,1,1,2,2,1,1,1,1,3,3,3,3,3,2,2,3,y2
25 | 4,2,1,1,2,2,1,2,1,2,3,2,3,2,3,2,2,4,y2
26 | 2,1,1,1,1,1,1,1,1,1,1,2,2,3,1,2,2,1,y2
27 | 2,2,1,1,1,1,1,2,1,4,3,4,2,8,2,1,2,4,y2
28 | 2,1,1,1,1,1,1,1,1,1,1,1,1,3,1,2,2,1,y2
29 | 3,2,2,1,1,2,1,1,1,2,3,3,3,5,2,1,2,2,y2
30 | 3,2,1,1,1,2,1,2,1,2,2,2,2,1,3,1,1,1,y2
31 | 2,1,1,1,1,1,1,1,1,2,3,2,2,8,1,2,1,1,y2
32 | 2,1,1,1,1,1,1,2,1,2,2,3,3,5,3,1,1,2,y3
33 | 2,2,1,1,1,2,1,2,1,3,3,4,2,8,3,2,2,2,y3
34 | 4,1,1,1,1,1,1,2,1,4,2,4,2,8,3,2,2,6,y3
35 | 3,1,1,1,1,1,1,2,1,4,3,3,4,5,3,2,2,3,y3
36 | 2,1,1,1,1,2,1,2,1,3,3,4,2,8,3,2,2,3,y3
37 | 2,1,1,1,1,1,1,2,1,3,2,2,2,6,3,2,2,6,y3
38 | 2,1,1,1,1,1,1,1,1,2,2,2,3,8,2,1,2,1,y2
39 | 4,2,1,1,1,1,1,1,1,4,3,3,3,4,2,2,1,1,y2
40 | 2,2,1,1,1,2,1,2,1,2,3,3,3,5,3,2,2,1,y2
41 | 4,2,2,1,1,2,1,2,1,2,2,3,3,8,3,2,2,2,y2
42 | 3,2,2,2,2,2,1,2,1,2,3,3,3,4,3,2,2,7,y2
43 | 3,2,2,1,2,2,1,2,1,2,3,3,4,2,2,2,1,1,y2
44 | 2,2,1,1,1,1,1,2,1,2,3,3,3,5,3,1,2,1,y2
45 | 2,2,1,1,1,2,1,2,1,2,3,3,3,5,3,2,2,2,y2
46 | 1,1,1,1,1,2,1,2,1,2,2,1,1,2,1,1,1,2,y1
47 | 3,2,1,1,1,1,1,2,1,3,2,2,4,8,3,2,2,3,y3
48 | 4,1,1,1,1,1,1,2,1,2,2,3,3,3,3,2,1,1,y3
49 | 3,2,2,2,2,2,2,2,1,4,3,3,4,8,3,2,2,7,y3
50 | 2,1,1,1,1,1,1,2,1,3,2,4,4,4,3,2,2,5,y3
51 | 4,1,1,1,1,2,1,2,1,4,2,2,4,7,3,2,2,2,y3
52 | 2,1,1,1,1,1,1,2,1,2,2,2,2,8,2,2,2,1,y3
53 | 2,2,2,1,2,2,1,1,1,2,2,3,3,4,2,1,2,1,y2
54 | 2,2,2,1,2,2,1,2,1,3,3,3,3,8,3,1,2,2,y2
55 | 4,2,1,1,2,2,1,2,1,3,3,4,3,4,3,2,2,2,y2
56 | 3,1,1,1,1,1,1,1,1,2,3,3,3,4,3,1,2,2,y2
57 | 3,2,1,1,1,2,1,2,1,2,3,3,3,2,2,2,2,3,y2
58 | 2,2,2,1,1,1,1,2,1,2,2,4,3,8,2,2,2,2,y2
59 | 2,2,1,1,1,1,1,2,1,3,3,3,3,3,2,1,2,1,y2
60 | 2,2,1,1,1,1,1,2,1,3,3,3,3,2,2,2,1,1,y2
61 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,y1
62 | 3,1,1,1,1,2,1,2,1,4,3,4,2,4,3,2,2,6,y3
63 | 2,1,1,1,1,2,1,2,1,2,2,2,2,8,2,1,2,2,y3
64 | 2,1,1,1,1,1,1,1,1,2,3,2,3,3,2,2,1,1,y3
65 | 2,1,1,1,1,1,1,2,1,3,2,4,2,8,3,2,2,4,y3
66 | 3,2,1,1,1,2,1,2,1,2,2,2,4,8,3,1,2,3,y3
67 | 4,1,1,1,1,1,1,2,1,3,2,2,4,3,2,2,1,1,y3
68 | 2,2,2,1,2,2,1,2,1,3,3,3,3,8,3,2,2,2,y2
69 | 2,2,1,1,1,2,1,2,1,2,3,2,2,4,2,1,1,2,y2
70 | 2,1,1,1,1,1,1,1,1,1,2,2,3,3,1,2,2,1,y2
71 | 4,2,1,1,1,2,1,1,1,3,3,4,3,8,3,2,2,2,y2
72 | 4,2,1,1,2,1,1,2,1,3,3,3,3,2,2,2,2,3,y2
73 | 2,2,2,1,2,2,1,2,1,3,3,4,3,4,3,2,2,6,y2
74 | 3,2,1,1,1,1,1,1,1,2,2,3,3,5,1,1,1,1,y2
75 | 3,2,1,1,1,2,1,2,1,3,3,3,3,4,3,2,2,1,y2
76 | 3,2,2,2,2,2,2,2,3,1,1,2,2,8,1,2,2,4,y4
77 | 2,1,1,1,1,1,1,2,1,3,3,2,3,4,3,2,2,2,y3
78 | 2,1,1,1,1,1,1,2,1,2,2,4,4,8,2,1,2,2,y3
79 | 3,1,1,1,1,2,1,2,1,2,2,4,3,8,2,2,2,1,y3
80 | 4,1,1,1,2,1,2,2,1,3,2,3,4,5,3,2,2,7,y3
81 | 2,1,1,1,1,2,1,2,1,2,2,3,3,3,3,1,2,1,y3
82 | 2,2,1,1,1,2,1,2,1,3,3,3,3,8,3,2,2,4,y3
83 | 2,2,1,1,1,1,1,1,1,2,3,3,3,3,3,1,2,1,y2
84 | 3,2,1,1,1,1,1,2,1,2,2,2,2,5,1,1,1,1,y2
85 | 4,2,2,1,1,1,1,2,1,2,2,2,3,3,2,2,2,2,y2
86 | 3,2,1,1,2,2,1,2,1,3,3,2,3,4,2,2,2,2,y2
87 | 2,2,1,1,1,1,1,1,1,1,2,2,3,3,1,1,1,1,y2
88 | 4,2,1,1,1,2,1,1,1,2,2,3,3,5,2,1,2,1,y2
89 | 2,1,1,1,1,1,1,2,1,2,2,2,2,4,1,2,1,2,y2
90 | 4,1,1,1,1,1,1,1,2,1,3,4,2,8,1,2,2,1,y2
91 | 3,1,1,1,2,2,2,1,3,1,1,4,2,5,3,1,2,4,y4
92 | 3,1,1,1,1,1,1,2,1,2,2,4,4,2,3,2,1,1,y3
93 | 3,1,1,1,1,2,1,2,1,3,3,4,4,4,3,1,2,6,y3
94 | 3,1,1,1,1,1,1,2,1,2,2,4,2,4,3,2,2,3,y3
95 | 3,1,1,1,1,2,1,2,1,4,2,2,2,4,3,2,2,7,y3
96 | 4,2,1,1,2,2,1,2,1,3,3,4,2,5,3,2,2,3,y3
97 | 2,1,1,1,1,2,1,2,1,2,2,2,4,8,1,2,2,2,y3
98 | 2,2,1,1,1,1,1,2,1,2,2,3,3,2,3,2,2,2,y2
99 | 4,2,2,1,2,2,1,2,1,3,3,4,3,8,2,2,2,3,y2
100 | 2,1,1,1,1,1,1,2,1,3,3,3,3,6,3,1,2,4,y2
101 | 3,1,1,1,1,1,1,1,1,2,2,4,3,5,1,2,2,1,y2
102 | 2,2,1,1,1,2,1,1,1,2,2,2,3,5,2,1,2,2,y2
103 | 2,2,1,1,1,1,1,1,1,2,2,3,3,5,2,2,2,1,y2
104 | 2,2,2,2,2,2,1,1,1,2,2,4,3,8,2,2,2,3,y2
105 | 3,2,2,1,2,2,1,2,1,3,2,2,3,4,1,2,2,1,y2
106 | 3,2,2,2,2,2,2,1,2,2,2,4,2,4,3,2,2,7,y4
107 | 2,1,1,1,1,2,1,2,1,3,3,2,2,4,3,2,2,6,y3
108 | 4,1,1,1,1,2,1,2,1,3,3,4,2,4,3,2,2,4,y3
109 | 4,1,1,1,1,1,1,2,1,3,2,4,4,8,3,2,2,1,y3
110 | 4,2,1,1,1,1,1,2,1,2,2,2,4,8,1,1,2,2,y3
111 | 4,1,1,1,2,2,1,2,1,3,3,3,4,5,3,2,2,4,y3
112 | 2,1,1,1,1,1,1,2,1,4,3,4,4,5,3,2,2,5,y3
113 | 2,1,1,1,1,1,1,1,1,2,2,2,2,8,1,1,1,1,y2
114 | 2,2,1,1,1,1,1,1,1,2,3,3,3,2,2,2,2,1,y2
115 | 3,2,1,1,2,2,1,1,1,2,2,4,3,2,1,2,2,3,y2
116 | 2,2,1,1,1,2,1,2,1,2,2,3,3,4,2,1,2,1,y2
117 | 3,2,1,1,1,1,1,2,1,2,3,3,3,5,2,2,1,1,y2
118 | 2,1,1,1,1,1,1,2,1,2,2,4,2,2,1,2,2,1,y2
119 | 3,1,1,1,1,1,1,2,1,2,2,2,3,2,1,2,2,1,y2
120 | 3,1,1,1,2,1,1,2,1,2,3,3,3,5,3,1,1,1,y2
121 | 3,1,1,1,2,2,2,1,3,1,1,2,1,5,3,1,1,7,y4
122 | 2,1,1,1,1,2,1,2,1,3,2,2,2,4,3,1,2,5,y3
123 | 2,2,1,1,1,1,1,2,1,4,3,4,2,7,3,2,2,5,y3
124 | 2,2,2,1,2,2,2,2,1,4,2,2,2,4,3,2,2,6,y3
125 | 2,1,1,1,1,2,1,1,1,2,2,2,2,3,1,1,1,1,y3
126 | 2,1,1,1,1,1,1,2,1,2,2,2,1,7,1,2,2,2,y3
127 | 4,2,2,1,2,2,1,2,1,1,2,2,1,3,1,2,2,2,y3
128 | 4,2,1,1,1,2,1,1,1,2,3,2,3,2,2,1,1,1,y2
129 | 2,2,2,1,1,2,1,1,1,2,3,2,3,3,2,1,1,1,y2
130 | 2,2,1,1,1,1,1,2,1,2,3,2,3,8,2,1,1,1,y2
131 | 4,2,1,1,1,1,1,2,1,2,2,2,2,3,2,1,1,1,y2
132 | 2,1,1,1,1,1,1,1,1,2,3,3,3,8,3,1,1,1,y2
133 | 3,2,1,1,1,1,1,1,1,2,2,3,3,3,3,1,1,2,y2
134 | 4,2,1,1,1,1,1,2,1,2,2,2,3,3,2,2,2,1,y2
135 | 2,2,1,1,1,1,1,2,1,2,3,3,3,5,2,1,2,1,y2
136 | 2,2,1,1,1,2,1,2,1,3,3,3,4,8,3,2,2,2,y3
137 | 4,2,2,2,2,2,2,2,1,4,3,4,4,7,3,2,2,8,y3
138 | 3,1,1,1,1,1,1,1,1,2,2,4,2,8,2,2,2,1,y3
139 | 3,1,1,1,1,2,1,1,1,3,2,3,3,8,3,2,2,2,y3
140 | 4,2,2,2,2,2,1,2,1,4,3,4,4,7,3,2,2,6,y3
141 | 4,1,1,1,1,2,1,2,1,4,3,4,3,3,3,2,2,5,y3
142 | 2,1,1,1,1,1,1,1,1,1,2,4,3,8,2,2,2,2,y2
143 | 3,1,1,1,1,1,1,2,1,3,2,2,2,8,1,1,1,1,y2
144 | 3,2,1,1,1,1,1,2,1,2,3,3,3,8,2,2,2,1,y2
145 | 3,2,2,1,1,2,1,1,1,3,2,3,3,4,3,1,2,2,y2
146 | 3,2,1,1,1,2,1,2,1,3,2,4,3,4,2,2,2,2,y2
147 | 3,2,2,1,2,2,1,1,1,3,3,4,3,4,2,2,2,3,y2
148 | 3,2,1,1,1,2,1,1,1,2,3,4,2,4,1,1,1,1,y2
149 | 2,2,1,1,1,2,1,2,1,2,2,4,4,5,3,2,2,1,y2
150 |
--------------------------------------------------------------------------------
/data/wine.data:
--------------------------------------------------------------------------------
1 | 1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
2 | 1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
3 | 1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
4 | 1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
5 | 1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
6 | 1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
7 | 1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
8 | 1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
9 | 1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
10 | 1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045
11 | 1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510
12 | 1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280
13 | 1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320
14 | 1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150
15 | 1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547
16 | 1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310
17 | 1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280
18 | 1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130
19 | 1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680
20 | 1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845
21 | 1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780
22 | 1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770
23 | 1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035
24 | 1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015
25 | 1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845
26 | 1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830
27 | 1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195
28 | 1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285
29 | 1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915
30 | 1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035
31 | 1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285
32 | 1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515
33 | 1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990
34 | 1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235
35 | 1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095
36 | 1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920
37 | 1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880
38 | 1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105
39 | 1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020
40 | 1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760
41 | 1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795
42 | 1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035
43 | 1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095
44 | 1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680
45 | 1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885
46 | 1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080
47 | 1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065
48 | 1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985
49 | 1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060
50 | 1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260
51 | 1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150
52 | 1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265
53 | 1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190
54 | 1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375
55 | 1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060
56 | 1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120
57 | 1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970
58 | 1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270
59 | 1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285
60 | 2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520
61 | 2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680
62 | 2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450
63 | 2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630
64 | 2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420
65 | 2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355
66 | 2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678
67 | 2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502
68 | 2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510
69 | 2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750
70 | 2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718
71 | 2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870
72 | 2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410
73 | 2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472
74 | 2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985
75 | 2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886
76 | 2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428
77 | 2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392
78 | 2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500
79 | 2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750
80 | 2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463
81 | 2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278
82 | 2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714
83 | 2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630
84 | 2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515
85 | 2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520
86 | 2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450
87 | 2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495
88 | 2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562
89 | 2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680
90 | 2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625
91 | 2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480
92 | 2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450
93 | 2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495
94 | 2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290
95 | 2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345
96 | 2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937
97 | 2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625
98 | 2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428
99 | 2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660
100 | 2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406
101 | 2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710
102 | 2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562
103 | 2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438
104 | 2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415
105 | 2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672
106 | 2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315
107 | 2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510
108 | 2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488
109 | 2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312
110 | 2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680
111 | 2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562
112 | 2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325
113 | 2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607
114 | 2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434
115 | 2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385
116 | 2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407
117 | 2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495
118 | 2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345
119 | 2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372
120 | 2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564
121 | 2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625
122 | 2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465
123 | 2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365
124 | 2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380
125 | 2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380
126 | 2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378
127 | 2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352
128 | 2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466
129 | 2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342
130 | 2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580
131 | 3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630
132 | 3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530
133 | 3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560
134 | 3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600
135 | 3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650
136 | 3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695
137 | 3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720
138 | 3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515
139 | 3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580
140 | 3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590
141 | 3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600
142 | 3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780
143 | 3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520
144 | 3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550
145 | 3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855
146 | 3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830
147 | 3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415
148 | 3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625
149 | 3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650
150 | 3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550
151 | 3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500
152 | 3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480
153 | 3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425
154 | 3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675
155 | 3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640
156 | 3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725
157 | 3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480
158 | 3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880
159 | 3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660
160 | 3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620
161 | 3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520
162 | 3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680
163 | 3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570
164 | 3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675
165 | 3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615
166 | 3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520
167 | 3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695
168 | 3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685
169 | 3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750
170 | 3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630
171 | 3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510
172 | 3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470
173 | 3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660
174 | 3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740
175 | 3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750
176 | 3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835
177 | 3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840
178 | 3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560
179 |
--------------------------------------------------------------------------------
/evolving_tree.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/predict-idlab/GENESIM/44925de91ae408fea30ea8ece3688f9e42d4f040/evolving_tree.gif
--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
1 | """
2 | This is an example script that will apply k-cross-validation on all datasets with a load function in
3 | `data.load_datasets` and for all implemented tree constructors, ensemble techniques and GENESIM. In the end,
4 | a confusion matrices will be stored at path `output/dataset_name_CVk.png` and the average model complexity and
5 | computational time required for each of the algorithms will be printed out.
6 |
7 | Written by Gilles Vandewiele in commission of IDLab - INTEC from University Ghent.
8 | """
9 |
10 |
11 | import time
12 |
13 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
14 | from sklearn.grid_search import GridSearchCV
15 | from sklearn.metrics import confusion_matrix
16 | from sklearn.cross_validation import StratifiedKFold, KFold
17 |
18 | import matplotlib.pyplot as plt
19 | import numpy as np
20 | from sklearn.neural_network import MLPClassifier
21 |
22 | import constructors.ISM
23 | from constructors.ensemble import RFClassification, XGBClassification, bootstrap
24 | from constructors.genesim import GENESIM
25 | from constructors.inTrees import inTreesClassifier
26 | from constructors.treeconstructor import QUESTConstructor, GUIDEConstructor, C45Constructor, CARTConstructor
27 | from data.load_all_datasets import load_all_datasets
28 | from decisiontree import DecisionTree
29 |
30 | if __name__ == "__main__":
31 |
32 | algorithms = {QUESTConstructor().get_name(): QUESTConstructor(),
33 | GUIDEConstructor().get_name(): GUIDEConstructor(),
34 | CARTConstructor().get_name(): CARTConstructor(),
35 | C45Constructor().get_name(): C45Constructor(),
36 | RFClassification().get_name(): RFClassification(),
37 | XGBClassification().get_name(): XGBClassification()
38 | }
39 | genesim = GENESIM()
40 | inTrees_clf = inTreesClassifier()
41 |
42 | NR_FOLDS = 5
43 | for dataset in load_all_datasets():
44 | df = dataset['dataframe']
45 | label_col = dataset['label_col']
46 | feature_cols = dataset['feature_cols']
47 |
48 | conf_matrices, avg_nodes, times = {}, {}, {}
49 |
50 | for algorithm in algorithms:
51 | conf_matrices[algorithm] = []
52 | avg_nodes[algorithm] = []
53 | times[algorithm] = []
54 | conf_matrices['GENESIM'], avg_nodes['GENESIM'], times['GENESIM'] = [], [], []
55 | conf_matrices['ISM'], avg_nodes['ISM'], times['ISM'] = [], [], []
56 | conf_matrices['inTrees'], avg_nodes['inTrees'], times['inTrees'] = [], [], []
57 |
58 | skf = StratifiedKFold(df[label_col], n_folds=NR_FOLDS, shuffle=True, random_state=None)
59 |
60 | for fold, (train_idx, test_idx) in enumerate(skf):
61 | print 'Fold', fold+1, '/', NR_FOLDS, 'for dataset', dataset['name']
62 | train = df.iloc[train_idx, :].reset_index(drop=True)
63 | X_train = train.drop(label_col, axis=1)
64 | y_train = train[label_col]
65 | test = df.iloc[test_idx, :].reset_index(drop=True)
66 | X_test = test.drop(label_col, axis=1)
67 | y_test = test[label_col]
68 |
69 | for algorithm in algorithms:
70 | print algorithm
71 | start = time.time()
72 | clf = algorithms[algorithm].construct_classifier(train, feature_cols, label_col)
73 | end = time.time()
74 | times[algorithm].append(end-start)
75 | predictions = clf.evaluate_multiple(X_test).astype(int)
76 | conf_matrix = confusion_matrix(y_test, predictions)
77 | print conf_matrix
78 | diagonal_sum = sum(
79 | [conf_matrix[i][i] for i in range(len(conf_matrix))])
80 | norm_diagonal_sum = sum(
81 | [float(conf_matrix[i][i]) / float(sum(conf_matrix[i])) for i in
82 | range(len(conf_matrix))])
83 | total_count = np.sum(conf_matrix)
84 | print 'Accuracy:', float(diagonal_sum) / float(total_count)
85 | print 'Balanced accuracy:', float(norm_diagonal_sum) / float(conf_matrix.shape[0])
86 | conf_matrices[algorithm].append(confusion_matrix(y_test, predictions))
87 | if type(clf) is DecisionTree:
88 | avg_nodes[algorithm].append(clf.count_nodes())
89 | else:
90 | avg_nodes[algorithm].append(clf.nr_clf)
91 |
92 | _constructors = [CARTConstructor(), QUESTConstructor(), GUIDEConstructor()]
93 |
94 | print 'inTrees'
95 | start = time.time()
96 | orl = inTrees_clf.construct_rule_list(train, label_col, _constructors, nr_bootstraps=25)
97 | end = time.time()
98 | times['inTrees'].append(end-start)
99 | predictions = orl.evaluate_multiple(X_test).astype(int)
100 | conf_matrices['inTrees'].append(confusion_matrix(y_test, predictions))
101 | conf_matrix = confusion_matrix(y_test, predictions)
102 | print conf_matrix
103 | diagonal_sum = sum(
104 | [conf_matrix[i][i] for i in range(len(conf_matrix))])
105 | norm_diagonal_sum = sum(
106 | [float(conf_matrix[i][i]) / float(sum(conf_matrix[i])) for i in
107 | range(len(conf_matrix))])
108 | total_count = np.sum(conf_matrix)
109 | correct = 0
110 | for i in range(len(conf_matrix)):
111 | correct += conf_matrix[i][i] + conf_matrix[i][max(i - 1, 0)] * ((i - 1) >= 0) + \
112 | conf_matrix[i][min(i + 1, len(conf_matrix[i]) - 1)] * ((i + 1) <= len(conf_matrix[i]) - 1)
113 | # print 'Accuracy [-1, +1]:', float(correct) / float(total_count)
114 | print 'Accuracy:', float(diagonal_sum) / float(total_count)
115 | print 'Balanced accuracy:', float(norm_diagonal_sum) / float(conf_matrix.shape[0])
116 | avg_nodes['inTrees'].append(len(orl.rule_list))
117 |
118 | print 'ISM'
119 | start = time.time()
120 | ism_tree = constructors.ISM.ism(bootstrap(train, label_col, _constructors, boosting=True, nr_classifiers=5),
121 | train, label_col, min_nr_samples=1, calc_fracs_from_ensemble=False)
122 | ism_pruned = ism_tree.cost_complexity_pruning(X_train, y_train, 'ism', ism_constructors=_constructors,
123 | ism_calc_fracs=False, n_folds=3, ism_nr_classifiers=5,
124 | ism_boosting=True)
125 | end = time.time()
126 | times['ISM'].append(end - start)
127 | predictions = ism_pruned.evaluate_multiple(X_test).astype(int)
128 | conf_matrices['ISM'].append(confusion_matrix(y_test, predictions))
129 | avg_nodes['ISM'].append(ism_pruned.count_nodes())
130 | conf_matrix = confusion_matrix(y_test, predictions)
131 | print conf_matrix
132 | diagonal_sum = sum(
133 | [conf_matrix[i][i] for i in range(len(conf_matrix))])
134 | norm_diagonal_sum = sum(
135 | [float(conf_matrix[i][i]) / float(sum(conf_matrix[i])) for i in
136 | range(len(conf_matrix))])
137 | total_count = np.sum(conf_matrix)
138 | correct = 0
139 | for i in range(len(conf_matrix)):
140 | correct += conf_matrix[i][i] + conf_matrix[i][max(i - 1, 0)] * ((i - 1) >= 0) + \
141 | conf_matrix[i][min(i + 1, len(conf_matrix[i]) - 1)] * ((i + 1) <= len(conf_matrix[i]) - 1)
142 | # print 'Accuracy [-1, +1]:', float(correct) / float(total_count)
143 | print 'Accuracy:', float(diagonal_sum) / float(total_count)
144 | print 'Balanced accuracy:', float(norm_diagonal_sum) / float(conf_matrix.shape[0])
145 | avg_nodes['ISM'].append(ism_pruned.count_nodes())
146 |
147 | print 'GENESIM'
148 | # train_gen = train.rename(columns={'Class': 'cat'})
149 | start = time.time()
150 | genetic = genesim.genetic_algorithm(train, label_col, _constructors, seed=None, num_iterations=40,
151 | num_crossovers=15, population_size=250, val_fraction=0.4, prune=True,
152 | max_samples=3, tournament_size=15, nr_bootstraps=40)
153 | end = time.time()
154 | times['GENESIM'].append(end - start)
155 | predictions = genetic.evaluate_multiple(X_test).astype(int)
156 | conf_matrices['GENESIM'].append(confusion_matrix(y_test, predictions))
157 | conf_matrix = confusion_matrix(y_test, predictions)
158 | print conf_matrix
159 | diagonal_sum = sum(
160 | [conf_matrix[i][i] for i in range(len(conf_matrix))])
161 | norm_diagonal_sum = sum(
162 | [float(conf_matrix[i][i]) / float(sum(conf_matrix[i])) for i in
163 | range(len(conf_matrix))])
164 | total_count = np.sum(conf_matrix)
165 | correct = 0
166 | for i in range(len(conf_matrix)):
167 | correct += conf_matrix[i][i] + conf_matrix[i][max(i - 1, 0)] * ((i - 1) >= 0) + \
168 | conf_matrix[i][min(i + 1, len(conf_matrix[i]) - 1)] * ((i + 1) <= len(conf_matrix[i]) - 1)
169 | # print 'Accuracy [-1, +1]:', float(correct) / float(total_count)
170 | print 'Accuracy:', float(diagonal_sum) / float(total_count)
171 | print 'Balanced accuracy:', float(norm_diagonal_sum) / float(conf_matrix.shape[0])
172 | avg_nodes['GENESIM'].append(genetic.count_nodes())
173 |
174 | print times
175 | print avg_nodes
176 |
177 | fig = plt.figure()
178 | fig.suptitle('Accuracy on ' + dataset['name'] + ' dataset using ' + str(NR_FOLDS) + ' folds', fontsize=20)
179 | counter = 0
180 | conf_matrices_mean = {}
181 | for key in conf_matrices:
182 | conf_matrices_mean[key] = np.zeros(conf_matrices[key][0].shape)
183 | for i in range(len(conf_matrices[key])):
184 | conf_matrices_mean[key] = np.add(conf_matrices_mean[key], conf_matrices[key][i])
185 | cm_normalized = np.around(
186 | conf_matrices_mean[key].astype('float') / conf_matrices_mean[key].sum(axis=1)[:,
187 | np.newaxis], 4)
188 |
189 | diagonal_sum = sum(
190 | [conf_matrices_mean[key][i][i] for i in range(len(conf_matrices_mean[key]))])
191 | norm_diagonal_sum = sum(
192 | [conf_matrices_mean[key][i][i]/sum(conf_matrices_mean[key][i]) for i in range(len(conf_matrices_mean[key]))])
193 | total_count = np.sum(conf_matrices_mean[key])
194 | print key
195 | print conf_matrices_mean[key]
196 | correct = 0
197 | for i in range(len(conf_matrices_mean[key])):
198 | correct += conf_matrices_mean[key][i][i] + conf_matrices_mean[key][i][max(i - 1, 0)] * ((i - 1) >= 0) + \
199 | conf_matrices_mean[key][i][min(i + 1, len(conf_matrices_mean[key][i]) - 1)] * ((i + 1) <= len(conf_matrices_mean[key][i]) - 1)
200 | print 'Accuracy [-1, +1]:', float(correct) / float(total_count)
201 | print 'Accuracy:', float(diagonal_sum) / float(total_count)
202 | print 'Balanced accuracy:', float(norm_diagonal_sum) / conf_matrices_mean[key].shape[0]
203 |
204 | ax = fig.add_subplot(2, np.math.ceil(len(conf_matrices) / 2.0), counter + 1)
205 | cax = ax.matshow(cm_normalized, cmap=plt.cm.Blues, vmin=0.0, vmax=1.0)
206 | ax.set_title(key + '(' + str(sum(avg_nodes[key])/len(avg_nodes[key])) + ')', y=1.08)
207 | for (j, i), label in np.ndenumerate(cm_normalized):
208 | ax.text(i, j, label, ha='center', va='center')
209 | if counter == len(conf_matrices) - 1:
210 | fig.colorbar(cax, fraction=0.046, pad=0.04)
211 | counter += 1
212 | F = plt.gcf()
213 | Size = F.get_size_inches()
214 | F.set_size_inches(Size[0] * 2, Size[1] * 1.75, forward=True)
215 | plt.show()
216 | rand_nr = str(int(10000*np.random.rand()))
217 | plt.savefig('output/' + dataset['name'] + '_CV'+str(NR_FOLDS)+'.png', bbox_inches='tight')
218 |
--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Install some python packages
4 | pip install pandas
5 | pip install numpy
6 | pip install sklearn
7 | pip install matplotlib
8 | pip install -U imbalanced-learn
9 | pip install orange
10 | pip install graphviz
11 | pip install xgboost
12 | pip install rpy2
13 | pip install pylatex
14 |
15 | # For bayesian optimization: download source and install it
16 | git clone https://github.com/fmfn/BayesianOptimization.git
17 | cd BayesianOptimization-master
18 | sudo python setup.py install
19 | cd ..
20 |
21 | # Special care needed for C45Learner from Orange
22 | wget http://www.rulequest.com/Personal/c4.5r8.tar.gz
23 | tar -xvzf rc4.5r8.tar.gz
24 | cd R8/Src
25 | wget https://github.com/biolab/orange/blob/master/Orange/orng/buildC45.py
26 | wget https://github.com/biolab/orange/blob/master/Orange/orng/ensemble.c
27 | sudo python buildC45.py
28 | cd ..
29 | cd ..
30 |
31 | # Install some R packages
32 | wget https://cran.r-project.org/src/contrib/randomForest_4.6-12.tar.gz
33 | tar -xvzf randomForest_4.6-12.tar.gz
34 | sudo R -e 'install.packages("'$(pwd)'/randomForest", repos=NULL, type="source")'
35 | wget https://cran.r-project.org/src/contrib/inTrees_1.1.tar.gz
36 | tar -xvzf inTrees_1.1.tar.gz
37 | sudo R -e 'install.packages("'$(pwd)'/inTrees", repos=NULL, type="source")'
38 |
39 |
40 | # sudo cp matplotlibrc /users/givdwiel/.local/lib/python2.7/site-packages/matplotlib/mpl-data/matplotlibrc
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------