├── .gitignore ├── README.md ├── data ├── groc.arules.tsv ├── groc.arules.xml ├── iris.glm.tsv ├── iris.glm.xml ├── iris.hc.tsv ├── iris.hc.xml ├── iris.kmeans.tsv ├── iris.kmeans.xml ├── iris.lm_p.tsv ├── iris.lm_p.xml ├── iris.multinom.tsv ├── iris.multinom.xml ├── iris.nn.tsv ├── iris.nn.xml ├── iris.rf.tsv ├── iris.rf.xml ├── iris.rpart.tsv ├── iris.rpart.xml ├── iris.svm.tsv ├── iris.svm.xml ├── orders.tsv ├── sample.rf.xml └── sample.tsv ├── pom.xml └── src ├── main └── java │ └── pattern │ ├── Classifier.java │ ├── ClassifierFunction.java │ ├── ClassifierSplitFunction.java │ ├── PMML.java │ ├── PatternException.java │ ├── Schema.java │ ├── XPathReader.java │ ├── datafield │ ├── CategoricalDataField.java │ ├── ContinuousDataField.java │ ├── DataField.java │ ├── DataFieldFactory.java │ └── PortableBitSet.java │ ├── model │ ├── MiningModel.java │ ├── Model.java │ ├── clust │ │ ├── ClusteringModel.java │ │ └── Exemplar.java │ ├── glm │ │ ├── GeneralizedRegressionModel.java │ │ ├── LinkFunction.java │ │ ├── PCell.java │ │ ├── PPCell.java │ │ ├── PPMatrix.java │ │ └── ParamMatrix.java │ ├── lm │ │ └── RegressionModel.java │ └── tree │ │ ├── Context.java │ │ ├── Edge.java │ │ ├── Tree.java │ │ ├── TreeModel.java │ │ └── Vertex.java │ └── predictor │ ├── CategoricalPredictor.java │ ├── NumericPredictor.java │ ├── Predictor.java │ └── PredictorFactory.java ├── py ├── gen_orders.py └── rf_eval.py ├── r ├── pmml_models.R ├── rattle_pmml.R └── rf_pmml.R └── test └── java └── pattern └── model ├── KMeansTest.java ├── ModelTest.java └── RandomForestTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | dot/ 3 | out/ 4 | misc/ 5 | .gradle 6 | Rplots.pdf 7 | *.class 8 | *.log 9 | *~ 10 | 11 | # IntelliJ Files # 12 | *.ipr 13 | *.iml 14 | *.iws 15 | 16 | # Package Files # 17 | *.jar 18 | *.war 19 | *.ear 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #Storm.pattern 2 | 3 | This project is based on the cascading.pattern project. The pattern sub-project for http://Cascading.org/ which uses flows as 4 | containers for machine learning models, importing [PMML](http://en.wikipedia.org/wiki/Predictive_Model_Markup_Language) model descriptions from _R_, _SAS_, _Weka_, _RapidMiner_, _KNIME_, 5 | _SQL Server_, etc. 6 | 7 | All Credit to Chris and Paco for the excellent work! 8 | 9 | Current support for PMML includes: 10 | 11 | * [Random Forest](http://en.wikipedia.org/wiki/Random_forest) in [PMML 4.0+](http://www.dmg.org/v4-0-1/MultipleModels.html) exported from [R/Rattle](http://cran.r-project.org/web/packages/rattle/index.html) 12 | * [Linear Regression](http://en.wikipedia.org/wiki/Linear_regression) in [PMML 1.1+](http://www.dmg.org/v1-1/generalregression.html) 13 | * [Hierarchical Clustering](http://en.wikipedia.org/wiki/Hierarchical_clustering) and [K-Means Clustering](http://en.wikipedia.org/wiki/K-means_clustering) in [PMML 2.0+](http://www.dmg.org/v2-0/ClusteringModel.html) 14 | * [Logistic Regression](http://en.wikipedia.org/wiki/Logistic_regression) in [PMML 4.0.1+](http://www.dmg.org/v4-0-1/Regression.html) 15 | 16 | 17 | Use in Storm Topology 18 | --------------------- 19 | 20 | First include the clojars repo in your POM (or project.clj or sbt or wherever): 21 | 22 | 23 | 24 | clojars.org 25 | http://clojars.org/repo 26 | 27 | 28 | 29 | And then add the dependency: 30 | 31 | 32 | com.github.quintona 33 | storm-pattern 34 | 0.0.2-SNAPSHOT 35 | 36 | 37 | I have created a very simple trident topology to illustrate the usage, it is available from [here.](https://github.com/quintona/pattern-demo-topology). At a high level, this is all that is required: 38 | 39 | topology.newStream("valueStream", spout) 40 | .each(new Fields(fields), new ClassifierFunction(pmml_file), 41 | new Fields("prediction")) 42 | .each(new Fields("prediction"), new PrintlnFunction(), 43 | new Fields()); 44 | 45 | You simply need to create the Classifier function and pass in the model. 46 | 47 | ## Build Instructions (if you are extending storm-pattern) 48 | ------------------ 49 | To build and then run its unit tests: 50 | 51 | mvn clean install 52 | 53 | The following scripts generate a baseline (model+data) for the _Random 54 | Forest_ algorithm. This baseline includes a reference data set -- 55 | 1000 independent variables, 500 rows of simulated ecommerce orders -- 56 | plus a predictive model in PMML: 57 | 58 | ./src/py/gen_orders.py 500 1000 > orders.tsv 59 | R --vanilla < ./src/r/rf_pmml.R > model.log 60 | 61 | This will generate `huge.rf.xml` as the PMML export for a Random 62 | Forest classifier plus `huge.tsv` as a baseline data set for 63 | regression testing. 64 | 65 | Example Models 66 | -------------- 67 | Check the `src/r/rattle_pmml.R` script for examples of predictive 68 | models which are created in R, then exported using _Rattle_. 69 | These examples use the popular 70 | [Iris](http://en.wikipedia.org/wiki/Iris_flower_data_set) data set. 71 | 72 | * random forest (rf) 73 | * linear regression (lm) 74 | * hierarchical clustering (hclust) 75 | * k-means clustering (kmeans) 76 | * logistic regression (glm) 77 | * multinomial model (multinom) 78 | * single hidden-layer neural network (nnet) 79 | * support vector machine (ksvm) 80 | * recursive partition classification tree (rpart) 81 | * association rules 82 | 83 | To execute the R script: 84 | 85 | R --vanilla < src/r/rattle_pmml.R 86 | 87 | It is possible to extend PMML support for other kinds of modeling in R 88 | and other analytics platforms. Contact the developers to discuss on 89 | the [cascading-user](https://groups.google.com/forum/?fromgroups#!forum/cascading-user) 90 | email forum. 91 | 92 | 93 | PMML Resources 94 | -------------- 95 | * [Data Mining Group](http://www.dmg.org/) XML standards and supported vendors 96 | * [PMML In Action](http://www.amazon.com/dp/1470003244) book 97 | * [PMML validator](http://www.zementis.com/pmml_tools.htm) 98 | -------------------------------------------------------------------------------- /data/groc.arules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:55 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 |
252 | -------------------------------------------------------------------------------- /data/iris.glm.tsv: -------------------------------------------------------------------------------- 1 | sepal_length sepal_width petal_length petal_width species predict 2 | 5.1 3.5 1.4 0.2 setosa 1 3 | 4.9 3 1.4 0.2 setosa 1 4 | 4.7 3.2 1.3 0.2 setosa 1 5 | 4.6 3.1 1.5 0.2 setosa 1 6 | 5 3.6 1.4 0.2 setosa 1 7 | 5.4 3.9 1.7 0.4 setosa 1 8 | 4.6 3.4 1.4 0.3 setosa 1 9 | 5 3.4 1.5 0.2 setosa 1 10 | 4.4 2.9 1.4 0.2 setosa 1 11 | 4.9 3.1 1.5 0.1 setosa 1 12 | 5.4 3.7 1.5 0.2 setosa 1 13 | 4.8 3.4 1.6 0.2 setosa 1 14 | 4.8 3 1.4 0.1 setosa 1 15 | 4.3 3 1.1 0.1 setosa 1 16 | 5.8 4 1.2 0.2 setosa 1 17 | 5.7 4.4 1.5 0.4 setosa 1 18 | 5.4 3.9 1.3 0.4 setosa 1 19 | 5.1 3.5 1.4 0.3 setosa 1 20 | 5.7 3.8 1.7 0.3 setosa 1 21 | 5.1 3.8 1.5 0.3 setosa 1 22 | 5.4 3.4 1.7 0.2 setosa 1 23 | 5.1 3.7 1.5 0.4 setosa 1 24 | 4.6 3.6 1 0.2 setosa 1 25 | 5.1 3.3 1.7 0.5 setosa 1 26 | 4.8 3.4 1.9 0.2 setosa 1 27 | 5 3 1.6 0.2 setosa 1 28 | 5 3.4 1.6 0.4 setosa 1 29 | 5.2 3.5 1.5 0.2 setosa 1 30 | 5.2 3.4 1.4 0.2 setosa 1 31 | 4.7 3.2 1.6 0.2 setosa 1 32 | 4.8 3.1 1.6 0.2 setosa 1 33 | 5.4 3.4 1.5 0.4 setosa 1 34 | 5.2 4.1 1.5 0.1 setosa 1 35 | 5.5 4.2 1.4 0.2 setosa 1 36 | 4.9 3.1 1.5 0.2 setosa 1 37 | 5 3.2 1.2 0.2 setosa 1 38 | 5.5 3.5 1.3 0.2 setosa 1 39 | 4.9 3.6 1.4 0.1 setosa 1 40 | 4.4 3 1.3 0.2 setosa 1 41 | 5.1 3.4 1.5 0.2 setosa 1 42 | 5 3.5 1.3 0.3 setosa 1 43 | 4.5 2.3 1.3 0.3 setosa 1 44 | 4.4 3.2 1.3 0.2 setosa 1 45 | 5 3.5 1.6 0.6 setosa 1 46 | 5.1 3.8 1.9 0.4 setosa 1 47 | 4.8 3 1.4 0.3 setosa 1 48 | 5.1 3.8 1.6 0.2 setosa 1 49 | 4.6 3.2 1.4 0.2 setosa 1 50 | 5.3 3.7 1.5 0.2 setosa 1 51 | 5 3.3 1.4 0.2 setosa 1 52 | 7 3.2 4.7 1.4 versicolor 0 53 | 6.4 3.2 4.5 1.5 versicolor 0 54 | 6.9 3.1 4.9 1.5 versicolor 0 55 | 5.5 2.3 4 1.3 versicolor 0 56 | 6.5 2.8 4.6 1.5 versicolor 0 57 | 5.7 2.8 4.5 1.3 versicolor 0 58 | 6.3 3.3 4.7 1.6 versicolor 0 59 | 4.9 2.4 3.3 1 versicolor 0 60 | 6.6 2.9 4.6 1.3 versicolor 0 61 | 5.2 2.7 3.9 1.4 versicolor 0 62 | 5 2 3.5 1 versicolor 0 63 | 5.9 3 4.2 1.5 versicolor 0 64 | 6 2.2 4 1 versicolor 0 65 | 6.1 2.9 4.7 1.4 versicolor 0 66 | 5.6 2.9 3.6 1.3 versicolor 0 67 | 6.7 3.1 4.4 1.4 versicolor 0 68 | 5.6 3 4.5 1.5 versicolor 0 69 | 5.8 2.7 4.1 1 versicolor 0 70 | 6.2 2.2 4.5 1.5 versicolor 0 71 | 5.6 2.5 3.9 1.1 versicolor 0 72 | 5.9 3.2 4.8 1.8 versicolor 0 73 | 6.1 2.8 4 1.3 versicolor 0 74 | 6.3 2.5 4.9 1.5 versicolor 0 75 | 6.1 2.8 4.7 1.2 versicolor 0 76 | 6.4 2.9 4.3 1.3 versicolor 0 77 | 6.6 3 4.4 1.4 versicolor 0 78 | 6.8 2.8 4.8 1.4 versicolor 0 79 | 6.7 3 5 1.7 versicolor 0 80 | 6 2.9 4.5 1.5 versicolor 0 81 | 5.7 2.6 3.5 1 versicolor 0 82 | 5.5 2.4 3.8 1.1 versicolor 0 83 | 5.5 2.4 3.7 1 versicolor 0 84 | 5.8 2.7 3.9 1.2 versicolor 0 85 | 6 2.7 5.1 1.6 versicolor 0 86 | 5.4 3 4.5 1.5 versicolor 0 87 | 6 3.4 4.5 1.6 versicolor 0 88 | 6.7 3.1 4.7 1.5 versicolor 0 89 | 6.3 2.3 4.4 1.3 versicolor 0 90 | 5.6 3 4.1 1.3 versicolor 0 91 | 5.5 2.5 4 1.3 versicolor 0 92 | 5.5 2.6 4.4 1.2 versicolor 0 93 | 6.1 3 4.6 1.4 versicolor 0 94 | 5.8 2.6 4 1.2 versicolor 0 95 | 5 2.3 3.3 1 versicolor 0 96 | 5.6 2.7 4.2 1.3 versicolor 0 97 | 5.7 3 4.2 1.2 versicolor 0 98 | 5.7 2.9 4.2 1.3 versicolor 0 99 | 6.2 2.9 4.3 1.3 versicolor 0 100 | 5.1 2.5 3 1.1 versicolor 0 101 | 5.7 2.8 4.1 1.3 versicolor 0 102 | 6.3 3.3 6 2.5 virginica 0 103 | 5.8 2.7 5.1 1.9 virginica 0 104 | 7.1 3 5.9 2.1 virginica 0 105 | 6.3 2.9 5.6 1.8 virginica 0 106 | 6.5 3 5.8 2.2 virginica 0 107 | 7.6 3 6.6 2.1 virginica 0 108 | 4.9 2.5 4.5 1.7 virginica 0 109 | 7.3 2.9 6.3 1.8 virginica 0 110 | 6.7 2.5 5.8 1.8 virginica 0 111 | 7.2 3.6 6.1 2.5 virginica 0 112 | 6.5 3.2 5.1 2 virginica 0 113 | 6.4 2.7 5.3 1.9 virginica 0 114 | 6.8 3 5.5 2.1 virginica 0 115 | 5.7 2.5 5 2 virginica 0 116 | 5.8 2.8 5.1 2.4 virginica 0 117 | 6.4 3.2 5.3 2.3 virginica 0 118 | 6.5 3 5.5 1.8 virginica 0 119 | 7.7 3.8 6.7 2.2 virginica 0 120 | 7.7 2.6 6.9 2.3 virginica 0 121 | 6 2.2 5 1.5 virginica 0 122 | 6.9 3.2 5.7 2.3 virginica 0 123 | 5.6 2.8 4.9 2 virginica 0 124 | 7.7 2.8 6.7 2 virginica 0 125 | 6.3 2.7 4.9 1.8 virginica 0 126 | 6.7 3.3 5.7 2.1 virginica 0 127 | 7.2 3.2 6 1.8 virginica 0 128 | 6.2 2.8 4.8 1.8 virginica 0 129 | 6.1 3 4.9 1.8 virginica 0 130 | 6.4 2.8 5.6 2.1 virginica 0 131 | 7.2 3 5.8 1.6 virginica 0 132 | 7.4 2.8 6.1 1.9 virginica 0 133 | 7.9 3.8 6.4 2 virginica 0 134 | 6.4 2.8 5.6 2.2 virginica 0 135 | 6.3 2.8 5.1 1.5 virginica 0 136 | 6.1 2.6 5.6 1.4 virginica 0 137 | 7.7 3 6.1 2.3 virginica 0 138 | 6.3 3.4 5.6 2.4 virginica 0 139 | 6.4 3.1 5.5 1.8 virginica 0 140 | 6 3 4.8 1.8 virginica 0 141 | 6.9 3.1 5.4 2.1 virginica 0 142 | 6.7 3.1 5.6 2.4 virginica 0 143 | 6.9 3.1 5.1 2.3 virginica 0 144 | 5.8 2.7 5.1 1.9 virginica 0 145 | 6.8 3.2 5.9 2.3 virginica 0 146 | 6.7 3.3 5.7 2.5 virginica 0 147 | 6.7 3 5.2 2.3 virginica 0 148 | 6.3 2.5 5 1.9 virginica 0 149 | 6.5 3 5.2 2 virginica 0 150 | 6.2 3.4 5.4 2.3 virginica 0 151 | 5.9 3 5.1 1.8 virginica 0 152 | -------------------------------------------------------------------------------- /data/iris.glm.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:54 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 |
52 | -------------------------------------------------------------------------------- /data/iris.hc.tsv: -------------------------------------------------------------------------------- 1 | sepal_length sepal_width petal_length petal_width species predict 2 | 5.1 3.5 1.4 0.2 setosa 1 3 | 4.9 3 1.4 0.2 setosa 1 4 | 4.7 3.2 1.3 0.2 setosa 1 5 | 4.6 3.1 1.5 0.2 setosa 1 6 | 5 3.6 1.4 0.2 setosa 1 7 | 5.4 3.9 1.7 0.4 setosa 1 8 | 4.6 3.4 1.4 0.3 setosa 1 9 | 5 3.4 1.5 0.2 setosa 1 10 | 4.4 2.9 1.4 0.2 setosa 1 11 | 4.9 3.1 1.5 0.1 setosa 1 12 | 5.4 3.7 1.5 0.2 setosa 1 13 | 4.8 3.4 1.6 0.2 setosa 1 14 | 4.8 3 1.4 0.1 setosa 1 15 | 4.3 3 1.1 0.1 setosa 1 16 | 5.8 4 1.2 0.2 setosa 1 17 | 5.7 4.4 1.5 0.4 setosa 1 18 | 5.4 3.9 1.3 0.4 setosa 1 19 | 5.1 3.5 1.4 0.3 setosa 1 20 | 5.7 3.8 1.7 0.3 setosa 1 21 | 5.1 3.8 1.5 0.3 setosa 1 22 | 5.4 3.4 1.7 0.2 setosa 1 23 | 5.1 3.7 1.5 0.4 setosa 1 24 | 4.6 3.6 1 0.2 setosa 1 25 | 5.1 3.3 1.7 0.5 setosa 1 26 | 4.8 3.4 1.9 0.2 setosa 1 27 | 5 3 1.6 0.2 setosa 1 28 | 5 3.4 1.6 0.4 setosa 1 29 | 5.2 3.5 1.5 0.2 setosa 1 30 | 5.2 3.4 1.4 0.2 setosa 1 31 | 4.7 3.2 1.6 0.2 setosa 1 32 | 4.8 3.1 1.6 0.2 setosa 1 33 | 5.4 3.4 1.5 0.4 setosa 1 34 | 5.2 4.1 1.5 0.1 setosa 1 35 | 5.5 4.2 1.4 0.2 setosa 1 36 | 4.9 3.1 1.5 0.2 setosa 1 37 | 5 3.2 1.2 0.2 setosa 1 38 | 5.5 3.5 1.3 0.2 setosa 1 39 | 4.9 3.6 1.4 0.1 setosa 1 40 | 4.4 3 1.3 0.2 setosa 1 41 | 5.1 3.4 1.5 0.2 setosa 1 42 | 5 3.5 1.3 0.3 setosa 1 43 | 4.5 2.3 1.3 0.3 setosa 1 44 | 4.4 3.2 1.3 0.2 setosa 1 45 | 5 3.5 1.6 0.6 setosa 1 46 | 5.1 3.8 1.9 0.4 setosa 1 47 | 4.8 3 1.4 0.3 setosa 1 48 | 5.1 3.8 1.6 0.2 setosa 1 49 | 4.6 3.2 1.4 0.2 setosa 1 50 | 5.3 3.7 1.5 0.2 setosa 1 51 | 5 3.3 1.4 0.2 setosa 1 52 | 7 3.2 4.7 1.4 versicolor 2 53 | 6.4 3.2 4.5 1.5 versicolor 2 54 | 6.9 3.1 4.9 1.5 versicolor 2 55 | 5.5 2.3 4 1.3 versicolor 2 56 | 6.5 2.8 4.6 1.5 versicolor 2 57 | 5.7 2.8 4.5 1.3 versicolor 2 58 | 6.3 3.3 4.7 1.6 versicolor 2 59 | 4.9 2.4 3.3 1 versicolor 2 60 | 6.6 2.9 4.6 1.3 versicolor 2 61 | 5.2 2.7 3.9 1.4 versicolor 2 62 | 5 2 3.5 1 versicolor 2 63 | 5.9 3 4.2 1.5 versicolor 2 64 | 6 2.2 4 1 versicolor 2 65 | 6.1 2.9 4.7 1.4 versicolor 2 66 | 5.6 2.9 3.6 1.3 versicolor 2 67 | 6.7 3.1 4.4 1.4 versicolor 2 68 | 5.6 3 4.5 1.5 versicolor 2 69 | 5.8 2.7 4.1 1 versicolor 2 70 | 6.2 2.2 4.5 1.5 versicolor 2 71 | 5.6 2.5 3.9 1.1 versicolor 2 72 | 5.9 3.2 4.8 1.8 versicolor 2 73 | 6.1 2.8 4 1.3 versicolor 2 74 | 6.3 2.5 4.9 1.5 versicolor 2 75 | 6.1 2.8 4.7 1.2 versicolor 2 76 | 6.4 2.9 4.3 1.3 versicolor 2 77 | 6.6 3 4.4 1.4 versicolor 2 78 | 6.8 2.8 4.8 1.4 versicolor 2 79 | 6.7 3 5 1.7 versicolor 2 80 | 6 2.9 4.5 1.5 versicolor 2 81 | 5.7 2.6 3.5 1 versicolor 2 82 | 5.5 2.4 3.8 1.1 versicolor 2 83 | 5.5 2.4 3.7 1 versicolor 2 84 | 5.8 2.7 3.9 1.2 versicolor 2 85 | 6 2.7 5.1 1.6 versicolor 2 86 | 5.4 3 4.5 1.5 versicolor 2 87 | 6 3.4 4.5 1.6 versicolor 2 88 | 6.7 3.1 4.7 1.5 versicolor 2 89 | 6.3 2.3 4.4 1.3 versicolor 2 90 | 5.6 3 4.1 1.3 versicolor 2 91 | 5.5 2.5 4 1.3 versicolor 2 92 | 5.5 2.6 4.4 1.2 versicolor 2 93 | 6.1 3 4.6 1.4 versicolor 2 94 | 5.8 2.6 4 1.2 versicolor 2 95 | 5 2.3 3.3 1 versicolor 2 96 | 5.6 2.7 4.2 1.3 versicolor 2 97 | 5.7 3 4.2 1.2 versicolor 2 98 | 5.7 2.9 4.2 1.3 versicolor 2 99 | 6.2 2.9 4.3 1.3 versicolor 2 100 | 5.1 2.5 3 1.1 versicolor 2 101 | 5.7 2.8 4.1 1.3 versicolor 2 102 | 6.3 3.3 6 2.5 virginica 3 103 | 5.8 2.7 5.1 1.9 virginica 2 104 | 7.1 3 5.9 2.1 virginica 3 105 | 6.3 2.9 5.6 1.8 virginica 3 106 | 6.5 3 5.8 2.2 virginica 3 107 | 7.6 3 6.6 2.1 virginica 3 108 | 4.9 2.5 4.5 1.7 virginica 2 109 | 7.3 2.9 6.3 1.8 virginica 3 110 | 6.7 2.5 5.8 1.8 virginica 3 111 | 7.2 3.6 6.1 2.5 virginica 3 112 | 6.5 3.2 5.1 2 virginica 3 113 | 6.4 2.7 5.3 1.9 virginica 3 114 | 6.8 3 5.5 2.1 virginica 3 115 | 5.7 2.5 5 2 virginica 2 116 | 5.8 2.8 5.1 2.4 virginica 2 117 | 6.4 3.2 5.3 2.3 virginica 3 118 | 6.5 3 5.5 1.8 virginica 3 119 | 7.7 3.8 6.7 2.2 virginica 3 120 | 7.7 2.6 6.9 2.3 virginica 3 121 | 6 2.2 5 1.5 virginica 2 122 | 6.9 3.2 5.7 2.3 virginica 3 123 | 5.6 2.8 4.9 2 virginica 2 124 | 7.7 2.8 6.7 2 virginica 3 125 | 6.3 2.7 4.9 1.8 virginica 2 126 | 6.7 3.3 5.7 2.1 virginica 3 127 | 7.2 3.2 6 1.8 virginica 3 128 | 6.2 2.8 4.8 1.8 virginica 2 129 | 6.1 3 4.9 1.8 virginica 2 130 | 6.4 2.8 5.6 2.1 virginica 3 131 | 7.2 3 5.8 1.6 virginica 3 132 | 7.4 2.8 6.1 1.9 virginica 3 133 | 7.9 3.8 6.4 2 virginica 3 134 | 6.4 2.8 5.6 2.2 virginica 3 135 | 6.3 2.8 5.1 1.5 virginica 2 136 | 6.1 2.6 5.6 1.4 virginica 3 137 | 7.7 3 6.1 2.3 virginica 3 138 | 6.3 3.4 5.6 2.4 virginica 3 139 | 6.4 3.1 5.5 1.8 virginica 3 140 | 6 3 4.8 1.8 virginica 2 141 | 6.9 3.1 5.4 2.1 virginica 3 142 | 6.7 3.1 5.6 2.4 virginica 3 143 | 6.9 3.1 5.1 2.3 virginica 3 144 | 5.8 2.7 5.1 1.9 virginica 2 145 | 6.8 3.2 5.9 2.3 virginica 3 146 | 6.7 3.3 5.7 2.5 virginica 3 147 | 6.7 3 5.2 2.3 virginica 3 148 | 6.3 2.5 5 1.9 virginica 2 149 | 6.5 3 5.2 2 virginica 3 150 | 6.2 3.4 5.4 2.3 virginica 3 151 | 5.9 3 5.1 1.8 virginica 2 152 | -------------------------------------------------------------------------------- /data/iris.hc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:53 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 5.006 3.428 1.462 0.246 30 | 31 | 32 | 5.9296875 2.7578125 4.4109375 1.4390625 33 | 34 | 35 | 6.85277777777778 3.075 5.78611111111111 2.09722222222222 36 | 37 | 38 |
39 | -------------------------------------------------------------------------------- /data/iris.kmeans.tsv: -------------------------------------------------------------------------------- 1 | sepal_length sepal_width petal_length petal_width species predict 2 | 5.1 3.5 1.4 0.2 setosa 1 3 | 4.9 3 1.4 0.2 setosa 1 4 | 4.7 3.2 1.3 0.2 setosa 1 5 | 4.6 3.1 1.5 0.2 setosa 1 6 | 5 3.6 1.4 0.2 setosa 1 7 | 5.4 3.9 1.7 0.4 setosa 1 8 | 4.6 3.4 1.4 0.3 setosa 1 9 | 5 3.4 1.5 0.2 setosa 1 10 | 4.4 2.9 1.4 0.2 setosa 1 11 | 4.9 3.1 1.5 0.1 setosa 1 12 | 5.4 3.7 1.5 0.2 setosa 1 13 | 4.8 3.4 1.6 0.2 setosa 1 14 | 4.8 3 1.4 0.1 setosa 1 15 | 4.3 3 1.1 0.1 setosa 1 16 | 5.8 4 1.2 0.2 setosa 1 17 | 5.7 4.4 1.5 0.4 setosa 1 18 | 5.4 3.9 1.3 0.4 setosa 1 19 | 5.1 3.5 1.4 0.3 setosa 1 20 | 5.7 3.8 1.7 0.3 setosa 1 21 | 5.1 3.8 1.5 0.3 setosa 1 22 | 5.4 3.4 1.7 0.2 setosa 1 23 | 5.1 3.7 1.5 0.4 setosa 1 24 | 4.6 3.6 1 0.2 setosa 1 25 | 5.1 3.3 1.7 0.5 setosa 1 26 | 4.8 3.4 1.9 0.2 setosa 1 27 | 5 3 1.6 0.2 setosa 1 28 | 5 3.4 1.6 0.4 setosa 1 29 | 5.2 3.5 1.5 0.2 setosa 1 30 | 5.2 3.4 1.4 0.2 setosa 1 31 | 4.7 3.2 1.6 0.2 setosa 1 32 | 4.8 3.1 1.6 0.2 setosa 1 33 | 5.4 3.4 1.5 0.4 setosa 1 34 | 5.2 4.1 1.5 0.1 setosa 1 35 | 5.5 4.2 1.4 0.2 setosa 1 36 | 4.9 3.1 1.5 0.2 setosa 1 37 | 5 3.2 1.2 0.2 setosa 1 38 | 5.5 3.5 1.3 0.2 setosa 1 39 | 4.9 3.6 1.4 0.1 setosa 1 40 | 4.4 3 1.3 0.2 setosa 1 41 | 5.1 3.4 1.5 0.2 setosa 1 42 | 5 3.5 1.3 0.3 setosa 1 43 | 4.5 2.3 1.3 0.3 setosa 1 44 | 4.4 3.2 1.3 0.2 setosa 1 45 | 5 3.5 1.6 0.6 setosa 1 46 | 5.1 3.8 1.9 0.4 setosa 1 47 | 4.8 3 1.4 0.3 setosa 1 48 | 5.1 3.8 1.6 0.2 setosa 1 49 | 4.6 3.2 1.4 0.2 setosa 1 50 | 5.3 3.7 1.5 0.2 setosa 1 51 | 5 3.3 1.4 0.2 setosa 1 52 | 7 3.2 4.7 1.4 versicolor 3 53 | 6.4 3.2 4.5 1.5 versicolor 3 54 | 6.9 3.1 4.9 1.5 versicolor 2 55 | 5.5 2.3 4 1.3 versicolor 3 56 | 6.5 2.8 4.6 1.5 versicolor 3 57 | 5.7 2.8 4.5 1.3 versicolor 3 58 | 6.3 3.3 4.7 1.6 versicolor 3 59 | 4.9 2.4 3.3 1 versicolor 3 60 | 6.6 2.9 4.6 1.3 versicolor 3 61 | 5.2 2.7 3.9 1.4 versicolor 3 62 | 5 2 3.5 1 versicolor 3 63 | 5.9 3 4.2 1.5 versicolor 3 64 | 6 2.2 4 1 versicolor 3 65 | 6.1 2.9 4.7 1.4 versicolor 3 66 | 5.6 2.9 3.6 1.3 versicolor 3 67 | 6.7 3.1 4.4 1.4 versicolor 3 68 | 5.6 3 4.5 1.5 versicolor 3 69 | 5.8 2.7 4.1 1 versicolor 3 70 | 6.2 2.2 4.5 1.5 versicolor 3 71 | 5.6 2.5 3.9 1.1 versicolor 3 72 | 5.9 3.2 4.8 1.8 versicolor 3 73 | 6.1 2.8 4 1.3 versicolor 3 74 | 6.3 2.5 4.9 1.5 versicolor 3 75 | 6.1 2.8 4.7 1.2 versicolor 3 76 | 6.4 2.9 4.3 1.3 versicolor 3 77 | 6.6 3 4.4 1.4 versicolor 3 78 | 6.8 2.8 4.8 1.4 versicolor 3 79 | 6.7 3 5 1.7 versicolor 2 80 | 6 2.9 4.5 1.5 versicolor 3 81 | 5.7 2.6 3.5 1 versicolor 3 82 | 5.5 2.4 3.8 1.1 versicolor 3 83 | 5.5 2.4 3.7 1 versicolor 3 84 | 5.8 2.7 3.9 1.2 versicolor 3 85 | 6 2.7 5.1 1.6 versicolor 3 86 | 5.4 3 4.5 1.5 versicolor 3 87 | 6 3.4 4.5 1.6 versicolor 3 88 | 6.7 3.1 4.7 1.5 versicolor 3 89 | 6.3 2.3 4.4 1.3 versicolor 3 90 | 5.6 3 4.1 1.3 versicolor 3 91 | 5.5 2.5 4 1.3 versicolor 3 92 | 5.5 2.6 4.4 1.2 versicolor 3 93 | 6.1 3 4.6 1.4 versicolor 3 94 | 5.8 2.6 4 1.2 versicolor 3 95 | 5 2.3 3.3 1 versicolor 3 96 | 5.6 2.7 4.2 1.3 versicolor 3 97 | 5.7 3 4.2 1.2 versicolor 3 98 | 5.7 2.9 4.2 1.3 versicolor 3 99 | 6.2 2.9 4.3 1.3 versicolor 3 100 | 5.1 2.5 3 1.1 versicolor 3 101 | 5.7 2.8 4.1 1.3 versicolor 3 102 | 6.3 3.3 6 2.5 virginica 2 103 | 5.8 2.7 5.1 1.9 virginica 3 104 | 7.1 3 5.9 2.1 virginica 2 105 | 6.3 2.9 5.6 1.8 virginica 2 106 | 6.5 3 5.8 2.2 virginica 2 107 | 7.6 3 6.6 2.1 virginica 2 108 | 4.9 2.5 4.5 1.7 virginica 3 109 | 7.3 2.9 6.3 1.8 virginica 2 110 | 6.7 2.5 5.8 1.8 virginica 2 111 | 7.2 3.6 6.1 2.5 virginica 2 112 | 6.5 3.2 5.1 2 virginica 2 113 | 6.4 2.7 5.3 1.9 virginica 2 114 | 6.8 3 5.5 2.1 virginica 2 115 | 5.7 2.5 5 2 virginica 3 116 | 5.8 2.8 5.1 2.4 virginica 3 117 | 6.4 3.2 5.3 2.3 virginica 2 118 | 6.5 3 5.5 1.8 virginica 2 119 | 7.7 3.8 6.7 2.2 virginica 2 120 | 7.7 2.6 6.9 2.3 virginica 2 121 | 6 2.2 5 1.5 virginica 3 122 | 6.9 3.2 5.7 2.3 virginica 2 123 | 5.6 2.8 4.9 2 virginica 3 124 | 7.7 2.8 6.7 2 virginica 2 125 | 6.3 2.7 4.9 1.8 virginica 3 126 | 6.7 3.3 5.7 2.1 virginica 2 127 | 7.2 3.2 6 1.8 virginica 2 128 | 6.2 2.8 4.8 1.8 virginica 3 129 | 6.1 3 4.9 1.8 virginica 3 130 | 6.4 2.8 5.6 2.1 virginica 2 131 | 7.2 3 5.8 1.6 virginica 2 132 | 7.4 2.8 6.1 1.9 virginica 2 133 | 7.9 3.8 6.4 2 virginica 2 134 | 6.4 2.8 5.6 2.2 virginica 2 135 | 6.3 2.8 5.1 1.5 virginica 3 136 | 6.1 2.6 5.6 1.4 virginica 2 137 | 7.7 3 6.1 2.3 virginica 2 138 | 6.3 3.4 5.6 2.4 virginica 2 139 | 6.4 3.1 5.5 1.8 virginica 2 140 | 6 3 4.8 1.8 virginica 3 141 | 6.9 3.1 5.4 2.1 virginica 2 142 | 6.7 3.1 5.6 2.4 virginica 2 143 | 6.9 3.1 5.1 2.3 virginica 2 144 | 5.8 2.7 5.1 1.9 virginica 3 145 | 6.8 3.2 5.9 2.3 virginica 2 146 | 6.7 3.3 5.7 2.5 virginica 2 147 | 6.7 3 5.2 2.3 virginica 2 148 | 6.3 2.5 5 1.9 virginica 3 149 | 6.5 3 5.2 2 virginica 2 150 | 6.2 3.4 5.4 2.3 virginica 2 151 | 5.9 3 5.1 1.8 virginica 3 152 | -------------------------------------------------------------------------------- /data/iris.kmeans.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:53 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 5.006 3.428 1.462 0.246 30 | 31 | 32 | 6.85 3.07368421052632 5.74210526315789 2.07105263157895 33 | 34 | 35 | 5.90161290322581 2.74838709677419 4.39354838709678 1.43387096774194 36 | 37 | 38 |
39 | -------------------------------------------------------------------------------- /data/iris.lm_p.tsv: -------------------------------------------------------------------------------- 1 | sepal_length sepal_width petal_length petal_width species predict 2 | 5.1 3.5 1.4 0.2 setosa 5.03682930924758 3 | 4.9 3 1.4 0.2 setosa 4.76958829322152 4 | 4.7 3.2 1.3 0.2 setosa 4.80738114334108 5 | 4.6 3.1 1.5 0.2 setosa 4.89214005271759 6 | 5 3.6 1.4 0.2 setosa 5.09027751245279 7 | 5.4 3.9 1.7 0.4 setosa 5.41495647572261 8 | 4.6 3.4 1.4 0.3 setosa 4.96189294843316 9 | 5 3.4 1.5 0.2 setosa 5.05248466233323 10 | 4.4 2.9 1.4 0.2 setosa 4.7161400900163 11 | 4.9 3.1 1.5 0.1 setosa 4.91362821032679 12 | 5.4 3.7 1.5 0.2 setosa 5.21282927194886 13 | 4.8 3.4 1.6 0.2 setosa 5.12158821862409 14 | 4.8 3 1.4 0.1 setosa 4.79107645083072 15 | 4.3 3 1.1 0.1 setosa 4.58376578195813 16 | 5.8 4 1.2 0.2 setosa 5.16586321269191 17 | 5.7 4.4 1.5 0.4 setosa 5.54399037916694 18 | 5.4 3.9 1.3 0.4 setosa 5.13854225055916 19 | 5.1 3.5 1.4 0.3 setosa 5.01534115163837 20 | 5.7 3.8 1.7 0.3 setosa 5.3829964301266 21 | 5.1 3.8 1.5 0.3 setosa 5.24478931754487 22 | 5.4 3.4 1.7 0.2 setosa 5.19069177491495 23 | 5.1 3.7 1.5 0.4 setosa 5.16985295673046 24 | 4.6 3.6 1 0.2 setosa 4.81386328728934 25 | 5.1 3.3 1.7 0.5 setosa 5.07277909888213 26 | 4.8 3.4 1.9 0.2 setosa 5.32889888749668 27 | 5 3 1.6 0.2 setosa 4.90779540580324 28 | 5 3.4 1.6 0.4 setosa 5.07861190340568 29 | 5.2 3.5 1.5 0.2 setosa 5.10593286553844 30 | 5.2 3.4 1.4 0.2 setosa 4.98338110604236 31 | 4.7 3.2 1.6 0.2 setosa 5.01469181221367 32 | 4.8 3.1 1.6 0.2 setosa 4.96124360900845 33 | 5.4 3.4 1.5 0.4 setosa 5.00950834711482 34 | 5.2 4.1 1.5 0.1 setosa 5.44811024237891 35 | 5.5 4.2 1.4 0.2 setosa 5.41096673168406 36 | 4.9 3.1 1.5 0.2 setosa 4.89214005271759 37 | 5 3.2 1.2 0.2 setosa 4.73827758705021 38 | 5.5 3.5 1.3 0.2 setosa 4.96772575295671 39 | 4.9 3.6 1.4 0.1 setosa 5.11176567006199 40 | 4.4 3 1.3 0.2 setosa 4.70048473693065 41 | 5.1 3.4 1.5 0.2 setosa 5.05248466233323 42 | 5 3.5 1.3 0.3 setosa 4.94623759534751 43 | 4.5 2.3 1.3 0.3 setosa 4.30485915688497 44 | 4.4 3.2 1.3 0.2 setosa 4.80738114334108 45 | 5 3.5 1.6 0.6 setosa 5.08908379139249 46 | 5.1 3.8 1.9 0.4 setosa 5.49971538509912 47 | 4.8 3 1.4 0.3 setosa 4.74810013561231 48 | 5.1 3.8 1.6 0.2 setosa 5.33538103144494 49 | 4.6 3.2 1.4 0.2 setosa 4.87648469963194 50 | 5.3 3.7 1.5 0.2 setosa 5.21282927194886 51 | 5 3.3 1.4 0.2 setosa 4.92993290283715 52 | 7 3.2 4.7 1.4 versicolor 6.46753665223872 53 | 6.4 3.2 4.5 1.5 versicolor 6.30784138204779 54 | 6.9 3.1 4.9 1.5 versicolor 6.53080740400603 55 | 5.5 2.3 4 1.3 versicolor 5.52426608696498 56 | 6.5 2.8 4.6 1.5 versicolor 6.16315212551781 57 | 5.7 2.8 4.5 1.3 versicolor 6.13702488444535 58 | 6.3 3.3 4.7 1.6 versicolor 6.47800854022553 59 | 4.9 2.4 3.3 1 versicolor 5.15845386896176 60 | 6.6 2.9 4.6 1.3 versicolor 6.25957664394142 61 | 5.2 2.7 3.9 1.4 versicolor 5.64746718588576 62 | 5 2 3.5 1 versicolor 5.08286816872263 63 | 5.9 3 4.2 1.5 versicolor 5.99363430676478 64 | 6 2.2 4 1 versicolor 5.53528235658737 65 | 6.1 2.9 4.7 1.4 versicolor 6.30719204262309 66 | 5.6 2.9 3.6 1.3 versicolor 5.5685410810328 67 | 6.7 3.1 4.4 1.4 versicolor 6.20677778016092 68 | 5.6 3 4.5 1.5 versicolor 6.20094497563737 69 | 5.8 2.7 4.1 1 versicolor 5.87162692890429 70 | 6.2 2.2 4.5 1.5 versicolor 5.77335934999567 71 | 5.6 2.5 3.9 1.1 versicolor 5.60503525230294 72 | 5.9 3.2 4.8 1.8 versicolor 6.45068757809278 73 | 6.1 2.8 4 1.3 versicolor 5.79150710299104 74 | 6.3 2.5 4.9 1.5 versicolor 6.21011818477476 75 | 6.1 2.8 4.7 1.2 versicolor 6.29672015463628 76 | 6.4 2.9 4.3 1.3 versicolor 6.05226597506884 77 | 6.6 3 4.4 1.4 versicolor 6.15332957695571 78 | 6.8 2.8 4.8 1.4 versicolor 6.32284739570874 79 | 6.7 3 5 1.7 versicolor 6.50348644187328 80 | 6 2.9 4.5 1.5 versicolor 6.14749677243216 81 | 5.7 2.6 3.5 1 versicolor 5.40355738795391 82 | 5.5 2.4 3.8 1.1 versicolor 5.48248349280687 83 | 5.5 2.4 3.7 1 versicolor 5.43486809412521 84 | 5.8 2.7 3.9 1.2 versicolor 5.69044350110416 85 | 6 2.7 5.1 1.6 versicolor 6.43373354615771 86 | 5.4 3 4.5 1.5 versicolor 6.20094497563737 87 | 6 3.4 4.5 1.6 versicolor 6.39324963084902 88 | 6.7 3.1 4.7 1.5 versicolor 6.39260029142431 89 | 6.3 2.3 4.4 1.3 versicolor 5.80068031212843 90 | 5.6 3 4.1 1.3 versicolor 5.96750706569232 91 | 5.5 2.5 4 1.3 versicolor 5.6311624933754 92 | 5.5 2.6 4.4 1.2 versicolor 5.98251307935327 93 | 6.1 3 4.6 1.4 versicolor 6.29153668953743 94 | 5.8 2.6 4 1.2 versicolor 5.70609885418982 95 | 5 2.3 3.3 1 versicolor 5.10500566575654 96 | 5.6 2.7 4.2 1.3 versicolor 5.87626601236755 97 | 5.7 3 4.2 1.2 versicolor 6.05809877959239 98 | 5.7 2.9 4.2 1.3 versicolor 5.98316241877798 99 | 6.2 2.9 4.3 1.3 versicolor 6.05226597506884 100 | 5.1 2.5 3 1.1 versicolor 4.98310324568518 101 | 5.7 2.8 4.1 1.3 versicolor 5.8606106592819 102 | 6.3 3.3 6 2.5 virginica 6.99577962517456 103 | 5.8 2.7 5.1 1.9 virginica 6.18208734498073 104 | 7.1 3 5.9 2.1 virginica 6.85228408970486 105 | 6.3 2.9 5.6 1.8 virginica 6.65598969045467 106 | 6.5 3 5.8 2.2 virginica 6.7616923758048 107 | 7.6 3 6.6 2.1 virginica 7.3360089837409 108 | 4.9 2.5 4.5 1.7 virginica 5.70354591604354 109 | 7.3 2.9 6.3 1.8 virginica 7.13971458449071 110 | 6.7 2.5 5.8 1.8 virginica 6.58040399021555 111 | 7.2 3.6 6.1 2.5 virginica 7.22522779108105 112 | 6.5 3.2 5.1 2 virginica 6.42784020339759 113 | 6.4 2.7 5.3 1.9 virginica 6.32029445756246 114 | 6.8 3 5.5 2.1 virginica 6.57586986454141 115 | 5.7 2.5 5 2 virginica 5.98459922467024 116 | 5.8 2.8 5.1 2.4 virginica 6.12809476013993 117 | 6.4 3.2 5.3 2.3 virginica 6.50158284315171 118 | 6.5 3 5.5 1.8 virginica 6.64033433736902 119 | 7.7 3.8 6.7 2.2 virginica 7.81121000806426 120 | 7.7 2.6 6.9 2.3 virginica 7.28655052457424 121 | 6 2.2 5 1.5 virginica 5.93169540310062 122 | 6.9 3.2 5.7 2.3 virginica 6.77799706831516 123 | 5.6 2.8 4.9 2 virginica 6.07584027799502 124 | 7.7 2.8 6.7 2 virginica 7.31970429123054 125 | 6.3 2.7 4.9 1.8 virginica 6.06536839000821 126 | 6.7 3.3 5.7 2.1 virginica 6.87442158673877 127 | 7.2 3.2 6 1.8 virginica 7.09274852523376 128 | 6.2 2.8 4.8 1.8 virginica 6.04971303692256 129 | 6.1 3 4.9 1.8 virginica 6.22571299962384 130 | 6.4 2.8 5.6 2.1 virginica 6.53807701442185 131 | 7.2 3 5.8 1.6 virginica 6.89062132146001 132 | 7.4 2.8 6.1 1.9 virginica 6.92657111109457 133 | 7.9 3.8 6.4 2 virginica 7.64687565441008 134 | 6.4 2.8 5.6 2.2 virginica 6.51658885681265 135 | 6.3 2.8 5.1 1.5 virginica 6.32148817862275 136 | 6.1 2.6 5.6 1.4 virginica 6.58159771127584 137 | 7.7 3 6.1 2.3 virginica 6.94751488706819 138 | 6.3 3.4 5.6 2.4 virginica 6.79430176082552 139 | 6.4 3.1 5.5 1.8 virginica 6.69378254057423 140 | 6 3 4.8 1.8 virginica 6.15660944333298 141 | 6.9 3.1 5.4 2.1 virginica 6.56021451145576 142 | 6.7 3.1 5.6 2.4 virginica 6.63395715120988 143 | 6.9 3.1 5.1 2.3 virginica 6.30992752736477 144 | 5.8 2.7 5.1 1.9 virginica 6.18208734498073 145 | 6.8 3.2 5.9 2.3 virginica 6.91620418089689 146 | 6.7 3.3 5.7 2.5 virginica 6.78846895630197 147 | 6.7 3 5.2 2.3 virginica 6.32558288045042 148 | 6.3 2.5 5 1.9 virginica 6.00608738227944 149 | 6.5 3 5.2 2 virginica 6.39004735327803 150 | 6.2 3.4 5.4 2.3 virginica 6.677582805853 151 | 5.9 3 5.1 1.8 virginica 6.36392011220557 152 | -------------------------------------------------------------------------------- /data/iris.lm_p.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:53 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 |
37 | -------------------------------------------------------------------------------- /data/iris.multinom.tsv: -------------------------------------------------------------------------------- 1 | sepal_length sepal_width petal_length petal_width species predict 2 | 5.1 3.5 1.4 0.2 setosa setosa 3 | 4.9 3 1.4 0.2 setosa setosa 4 | 4.7 3.2 1.3 0.2 setosa setosa 5 | 4.6 3.1 1.5 0.2 setosa setosa 6 | 5 3.6 1.4 0.2 setosa setosa 7 | 5.4 3.9 1.7 0.4 setosa setosa 8 | 4.6 3.4 1.4 0.3 setosa setosa 9 | 5 3.4 1.5 0.2 setosa setosa 10 | 4.4 2.9 1.4 0.2 setosa setosa 11 | 4.9 3.1 1.5 0.1 setosa setosa 12 | 5.4 3.7 1.5 0.2 setosa setosa 13 | 4.8 3.4 1.6 0.2 setosa setosa 14 | 4.8 3 1.4 0.1 setosa setosa 15 | 4.3 3 1.1 0.1 setosa setosa 16 | 5.8 4 1.2 0.2 setosa setosa 17 | 5.7 4.4 1.5 0.4 setosa setosa 18 | 5.4 3.9 1.3 0.4 setosa setosa 19 | 5.1 3.5 1.4 0.3 setosa setosa 20 | 5.7 3.8 1.7 0.3 setosa setosa 21 | 5.1 3.8 1.5 0.3 setosa setosa 22 | 5.4 3.4 1.7 0.2 setosa setosa 23 | 5.1 3.7 1.5 0.4 setosa setosa 24 | 4.6 3.6 1 0.2 setosa setosa 25 | 5.1 3.3 1.7 0.5 setosa setosa 26 | 4.8 3.4 1.9 0.2 setosa setosa 27 | 5 3 1.6 0.2 setosa setosa 28 | 5 3.4 1.6 0.4 setosa setosa 29 | 5.2 3.5 1.5 0.2 setosa setosa 30 | 5.2 3.4 1.4 0.2 setosa setosa 31 | 4.7 3.2 1.6 0.2 setosa setosa 32 | 4.8 3.1 1.6 0.2 setosa setosa 33 | 5.4 3.4 1.5 0.4 setosa setosa 34 | 5.2 4.1 1.5 0.1 setosa setosa 35 | 5.5 4.2 1.4 0.2 setosa setosa 36 | 4.9 3.1 1.5 0.2 setosa setosa 37 | 5 3.2 1.2 0.2 setosa setosa 38 | 5.5 3.5 1.3 0.2 setosa setosa 39 | 4.9 3.6 1.4 0.1 setosa setosa 40 | 4.4 3 1.3 0.2 setosa setosa 41 | 5.1 3.4 1.5 0.2 setosa setosa 42 | 5 3.5 1.3 0.3 setosa setosa 43 | 4.5 2.3 1.3 0.3 setosa setosa 44 | 4.4 3.2 1.3 0.2 setosa setosa 45 | 5 3.5 1.6 0.6 setosa setosa 46 | 5.1 3.8 1.9 0.4 setosa setosa 47 | 4.8 3 1.4 0.3 setosa setosa 48 | 5.1 3.8 1.6 0.2 setosa setosa 49 | 4.6 3.2 1.4 0.2 setosa setosa 50 | 5.3 3.7 1.5 0.2 setosa setosa 51 | 5 3.3 1.4 0.2 setosa setosa 52 | 7 3.2 4.7 1.4 versicolor versicolor 53 | 6.4 3.2 4.5 1.5 versicolor versicolor 54 | 6.9 3.1 4.9 1.5 versicolor versicolor 55 | 5.5 2.3 4 1.3 versicolor versicolor 56 | 6.5 2.8 4.6 1.5 versicolor versicolor 57 | 5.7 2.8 4.5 1.3 versicolor versicolor 58 | 6.3 3.3 4.7 1.6 versicolor versicolor 59 | 4.9 2.4 3.3 1 versicolor versicolor 60 | 6.6 2.9 4.6 1.3 versicolor versicolor 61 | 5.2 2.7 3.9 1.4 versicolor versicolor 62 | 5 2 3.5 1 versicolor versicolor 63 | 5.9 3 4.2 1.5 versicolor versicolor 64 | 6 2.2 4 1 versicolor versicolor 65 | 6.1 2.9 4.7 1.4 versicolor versicolor 66 | 5.6 2.9 3.6 1.3 versicolor versicolor 67 | 6.7 3.1 4.4 1.4 versicolor versicolor 68 | 5.6 3 4.5 1.5 versicolor versicolor 69 | 5.8 2.7 4.1 1 versicolor versicolor 70 | 6.2 2.2 4.5 1.5 versicolor versicolor 71 | 5.6 2.5 3.9 1.1 versicolor versicolor 72 | 5.9 3.2 4.8 1.8 versicolor virginica 73 | 6.1 2.8 4 1.3 versicolor versicolor 74 | 6.3 2.5 4.9 1.5 versicolor versicolor 75 | 6.1 2.8 4.7 1.2 versicolor versicolor 76 | 6.4 2.9 4.3 1.3 versicolor versicolor 77 | 6.6 3 4.4 1.4 versicolor versicolor 78 | 6.8 2.8 4.8 1.4 versicolor versicolor 79 | 6.7 3 5 1.7 versicolor versicolor 80 | 6 2.9 4.5 1.5 versicolor versicolor 81 | 5.7 2.6 3.5 1 versicolor versicolor 82 | 5.5 2.4 3.8 1.1 versicolor versicolor 83 | 5.5 2.4 3.7 1 versicolor versicolor 84 | 5.8 2.7 3.9 1.2 versicolor versicolor 85 | 6 2.7 5.1 1.6 versicolor virginica 86 | 5.4 3 4.5 1.5 versicolor versicolor 87 | 6 3.4 4.5 1.6 versicolor versicolor 88 | 6.7 3.1 4.7 1.5 versicolor versicolor 89 | 6.3 2.3 4.4 1.3 versicolor versicolor 90 | 5.6 3 4.1 1.3 versicolor versicolor 91 | 5.5 2.5 4 1.3 versicolor versicolor 92 | 5.5 2.6 4.4 1.2 versicolor versicolor 93 | 6.1 3 4.6 1.4 versicolor versicolor 94 | 5.8 2.6 4 1.2 versicolor versicolor 95 | 5 2.3 3.3 1 versicolor versicolor 96 | 5.6 2.7 4.2 1.3 versicolor versicolor 97 | 5.7 3 4.2 1.2 versicolor versicolor 98 | 5.7 2.9 4.2 1.3 versicolor versicolor 99 | 6.2 2.9 4.3 1.3 versicolor versicolor 100 | 5.1 2.5 3 1.1 versicolor versicolor 101 | 5.7 2.8 4.1 1.3 versicolor versicolor 102 | 6.3 3.3 6 2.5 virginica virginica 103 | 5.8 2.7 5.1 1.9 virginica virginica 104 | 7.1 3 5.9 2.1 virginica virginica 105 | 6.3 2.9 5.6 1.8 virginica virginica 106 | 6.5 3 5.8 2.2 virginica virginica 107 | 7.6 3 6.6 2.1 virginica virginica 108 | 4.9 2.5 4.5 1.7 virginica virginica 109 | 7.3 2.9 6.3 1.8 virginica virginica 110 | 6.7 2.5 5.8 1.8 virginica virginica 111 | 7.2 3.6 6.1 2.5 virginica virginica 112 | 6.5 3.2 5.1 2 virginica virginica 113 | 6.4 2.7 5.3 1.9 virginica virginica 114 | 6.8 3 5.5 2.1 virginica virginica 115 | 5.7 2.5 5 2 virginica virginica 116 | 5.8 2.8 5.1 2.4 virginica virginica 117 | 6.4 3.2 5.3 2.3 virginica virginica 118 | 6.5 3 5.5 1.8 virginica virginica 119 | 7.7 3.8 6.7 2.2 virginica virginica 120 | 7.7 2.6 6.9 2.3 virginica virginica 121 | 6 2.2 5 1.5 virginica virginica 122 | 6.9 3.2 5.7 2.3 virginica virginica 123 | 5.6 2.8 4.9 2 virginica virginica 124 | 7.7 2.8 6.7 2 virginica virginica 125 | 6.3 2.7 4.9 1.8 virginica virginica 126 | 6.7 3.3 5.7 2.1 virginica virginica 127 | 7.2 3.2 6 1.8 virginica virginica 128 | 6.2 2.8 4.8 1.8 virginica virginica 129 | 6.1 3 4.9 1.8 virginica virginica 130 | 6.4 2.8 5.6 2.1 virginica virginica 131 | 7.2 3 5.8 1.6 virginica virginica 132 | 7.4 2.8 6.1 1.9 virginica virginica 133 | 7.9 3.8 6.4 2 virginica virginica 134 | 6.4 2.8 5.6 2.2 virginica virginica 135 | 6.3 2.8 5.1 1.5 virginica virginica 136 | 6.1 2.6 5.6 1.4 virginica virginica 137 | 7.7 3 6.1 2.3 virginica virginica 138 | 6.3 3.4 5.6 2.4 virginica virginica 139 | 6.4 3.1 5.5 1.8 virginica virginica 140 | 6 3 4.8 1.8 virginica virginica 141 | 6.9 3.1 5.4 2.1 virginica virginica 142 | 6.7 3.1 5.6 2.4 virginica virginica 143 | 6.9 3.1 5.1 2.3 virginica virginica 144 | 5.8 2.7 5.1 1.9 virginica virginica 145 | 6.8 3.2 5.9 2.3 virginica virginica 146 | 6.7 3.3 5.7 2.5 virginica virginica 147 | 6.7 3 5.2 2.3 virginica virginica 148 | 6.3 2.5 5 1.9 virginica virginica 149 | 6.5 3 5.2 2 virginica virginica 150 | 6.2 3.4 5.4 2.3 virginica virginica 151 | 5.9 3 5.1 1.8 virginica virginica 152 | -------------------------------------------------------------------------------- /data/iris.multinom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:53 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 |
48 | -------------------------------------------------------------------------------- /data/iris.nn.tsv: -------------------------------------------------------------------------------- 1 | Sepal.L. Sepal.W. Petal.L. Petal.W. species predict 2 | 5.1 3.5 1.4 0.2 setosa setosa 3 | 4.9 3 1.4 0.2 setosa setosa 4 | 4.7 3.2 1.3 0.2 setosa setosa 5 | 4.6 3.1 1.5 0.2 setosa setosa 6 | 5 3.6 1.4 0.2 setosa setosa 7 | 5.4 3.9 1.7 0.4 setosa setosa 8 | 4.6 3.4 1.4 0.3 setosa setosa 9 | 5 3.4 1.5 0.2 setosa setosa 10 | 4.4 2.9 1.4 0.2 setosa setosa 11 | 4.9 3.1 1.5 0.1 setosa setosa 12 | 5.4 3.7 1.5 0.2 setosa setosa 13 | 4.8 3.4 1.6 0.2 setosa setosa 14 | 4.8 3 1.4 0.1 setosa setosa 15 | 4.3 3 1.1 0.1 setosa setosa 16 | 5.8 4 1.2 0.2 setosa setosa 17 | 5.7 4.4 1.5 0.4 setosa setosa 18 | 5.4 3.9 1.3 0.4 setosa setosa 19 | 5.1 3.5 1.4 0.3 setosa setosa 20 | 5.7 3.8 1.7 0.3 setosa setosa 21 | 5.1 3.8 1.5 0.3 setosa setosa 22 | 5.4 3.4 1.7 0.2 setosa setosa 23 | 5.1 3.7 1.5 0.4 setosa setosa 24 | 4.6 3.6 1 0.2 setosa setosa 25 | 5.1 3.3 1.7 0.5 setosa setosa 26 | 4.8 3.4 1.9 0.2 setosa setosa 27 | 5 3 1.6 0.2 setosa setosa 28 | 5 3.4 1.6 0.4 setosa setosa 29 | 5.2 3.5 1.5 0.2 setosa setosa 30 | 5.2 3.4 1.4 0.2 setosa setosa 31 | 4.7 3.2 1.6 0.2 setosa setosa 32 | 4.8 3.1 1.6 0.2 setosa setosa 33 | 5.4 3.4 1.5 0.4 setosa setosa 34 | 5.2 4.1 1.5 0.1 setosa setosa 35 | 5.5 4.2 1.4 0.2 setosa setosa 36 | 4.9 3.1 1.5 0.2 setosa setosa 37 | 5 3.2 1.2 0.2 setosa setosa 38 | 5.5 3.5 1.3 0.2 setosa setosa 39 | 4.9 3.6 1.4 0.1 setosa setosa 40 | 4.4 3 1.3 0.2 setosa setosa 41 | 5.1 3.4 1.5 0.2 setosa setosa 42 | 5 3.5 1.3 0.3 setosa setosa 43 | 4.5 2.3 1.3 0.3 setosa setosa 44 | 4.4 3.2 1.3 0.2 setosa setosa 45 | 5 3.5 1.6 0.6 setosa setosa 46 | 5.1 3.8 1.9 0.4 setosa setosa 47 | 4.8 3 1.4 0.3 setosa setosa 48 | 5.1 3.8 1.6 0.2 setosa setosa 49 | 4.6 3.2 1.4 0.2 setosa setosa 50 | 5.3 3.7 1.5 0.2 setosa setosa 51 | 5 3.3 1.4 0.2 setosa setosa 52 | 7 3.2 4.7 1.4 versicolor versicolor 53 | 6.4 3.2 4.5 1.5 versicolor versicolor 54 | 6.9 3.1 4.9 1.5 versicolor versicolor 55 | 5.5 2.3 4 1.3 versicolor versicolor 56 | 6.5 2.8 4.6 1.5 versicolor versicolor 57 | 5.7 2.8 4.5 1.3 versicolor versicolor 58 | 6.3 3.3 4.7 1.6 versicolor versicolor 59 | 4.9 2.4 3.3 1 versicolor versicolor 60 | 6.6 2.9 4.6 1.3 versicolor versicolor 61 | 5.2 2.7 3.9 1.4 versicolor versicolor 62 | 5 2 3.5 1 versicolor versicolor 63 | 5.9 3 4.2 1.5 versicolor versicolor 64 | 6 2.2 4 1 versicolor versicolor 65 | 6.1 2.9 4.7 1.4 versicolor versicolor 66 | 5.6 2.9 3.6 1.3 versicolor versicolor 67 | 6.7 3.1 4.4 1.4 versicolor versicolor 68 | 5.6 3 4.5 1.5 versicolor versicolor 69 | 5.8 2.7 4.1 1 versicolor versicolor 70 | 6.2 2.2 4.5 1.5 versicolor versicolor 71 | 5.6 2.5 3.9 1.1 versicolor versicolor 72 | 5.9 3.2 4.8 1.8 versicolor virginica 73 | 6.1 2.8 4 1.3 versicolor versicolor 74 | 6.3 2.5 4.9 1.5 versicolor virginica 75 | 6.1 2.8 4.7 1.2 versicolor versicolor 76 | 6.4 2.9 4.3 1.3 versicolor versicolor 77 | 6.6 3 4.4 1.4 versicolor versicolor 78 | 6.8 2.8 4.8 1.4 versicolor versicolor 79 | 6.7 3 5 1.7 versicolor versicolor 80 | 6 2.9 4.5 1.5 versicolor versicolor 81 | 5.7 2.6 3.5 1 versicolor versicolor 82 | 5.5 2.4 3.8 1.1 versicolor versicolor 83 | 5.5 2.4 3.7 1 versicolor versicolor 84 | 5.8 2.7 3.9 1.2 versicolor versicolor 85 | 6 2.7 5.1 1.6 versicolor virginica 86 | 5.4 3 4.5 1.5 versicolor versicolor 87 | 6 3.4 4.5 1.6 versicolor versicolor 88 | 6.7 3.1 4.7 1.5 versicolor versicolor 89 | 6.3 2.3 4.4 1.3 versicolor versicolor 90 | 5.6 3 4.1 1.3 versicolor versicolor 91 | 5.5 2.5 4 1.3 versicolor versicolor 92 | 5.5 2.6 4.4 1.2 versicolor versicolor 93 | 6.1 3 4.6 1.4 versicolor versicolor 94 | 5.8 2.6 4 1.2 versicolor versicolor 95 | 5 2.3 3.3 1 versicolor versicolor 96 | 5.6 2.7 4.2 1.3 versicolor versicolor 97 | 5.7 3 4.2 1.2 versicolor versicolor 98 | 5.7 2.9 4.2 1.3 versicolor versicolor 99 | 6.2 2.9 4.3 1.3 versicolor versicolor 100 | 5.1 2.5 3 1.1 versicolor versicolor 101 | 5.7 2.8 4.1 1.3 versicolor versicolor 102 | 6.3 3.3 6 2.5 virginica virginica 103 | 5.8 2.7 5.1 1.9 virginica virginica 104 | 7.1 3 5.9 2.1 virginica virginica 105 | 6.3 2.9 5.6 1.8 virginica virginica 106 | 6.5 3 5.8 2.2 virginica virginica 107 | 7.6 3 6.6 2.1 virginica virginica 108 | 4.9 2.5 4.5 1.7 virginica virginica 109 | 7.3 2.9 6.3 1.8 virginica virginica 110 | 6.7 2.5 5.8 1.8 virginica virginica 111 | 7.2 3.6 6.1 2.5 virginica virginica 112 | 6.5 3.2 5.1 2 virginica virginica 113 | 6.4 2.7 5.3 1.9 virginica virginica 114 | 6.8 3 5.5 2.1 virginica virginica 115 | 5.7 2.5 5 2 virginica virginica 116 | 5.8 2.8 5.1 2.4 virginica virginica 117 | 6.4 3.2 5.3 2.3 virginica virginica 118 | 6.5 3 5.5 1.8 virginica virginica 119 | 7.7 3.8 6.7 2.2 virginica virginica 120 | 7.7 2.6 6.9 2.3 virginica virginica 121 | 6 2.2 5 1.5 virginica virginica 122 | 6.9 3.2 5.7 2.3 virginica virginica 123 | 5.6 2.8 4.9 2 virginica virginica 124 | 7.7 2.8 6.7 2 virginica virginica 125 | 6.3 2.7 4.9 1.8 virginica virginica 126 | 6.7 3.3 5.7 2.1 virginica virginica 127 | 7.2 3.2 6 1.8 virginica virginica 128 | 6.2 2.8 4.8 1.8 virginica virginica 129 | 6.1 3 4.9 1.8 virginica virginica 130 | 6.4 2.8 5.6 2.1 virginica virginica 131 | 7.2 3 5.8 1.6 virginica virginica 132 | 7.4 2.8 6.1 1.9 virginica virginica 133 | 7.9 3.8 6.4 2 virginica virginica 134 | 6.4 2.8 5.6 2.2 virginica virginica 135 | 6.3 2.8 5.1 1.5 virginica virginica 136 | 6.1 2.6 5.6 1.4 virginica virginica 137 | 7.7 3 6.1 2.3 virginica virginica 138 | 6.3 3.4 5.6 2.4 virginica virginica 139 | 6.4 3.1 5.5 1.8 virginica virginica 140 | 6 3 4.8 1.8 virginica virginica 141 | 6.9 3.1 5.4 2.1 virginica virginica 142 | 6.7 3.1 5.6 2.4 virginica virginica 143 | 6.9 3.1 5.1 2.3 virginica virginica 144 | 5.8 2.7 5.1 1.9 virginica virginica 145 | 6.8 3.2 5.9 2.3 virginica virginica 146 | 6.7 3.3 5.7 2.5 virginica virginica 147 | 6.7 3 5.2 2.3 virginica virginica 148 | 6.3 2.5 5 1.9 virginica virginica 149 | 6.5 3 5.2 2 virginica virginica 150 | 6.2 3.4 5.4 2.3 virginica virginica 151 | 5.9 3 5.1 1.8 virginica virginica 152 | -------------------------------------------------------------------------------- /data/iris.nn.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:53 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 |
102 | -------------------------------------------------------------------------------- /data/iris.rf.tsv: -------------------------------------------------------------------------------- 1 | sepal_length sepal_width petal_length petal_width species predict 2 | 5.1 3.5 1.4 0.2 setosa setosa 3 | 4.9 3 1.4 0.2 setosa setosa 4 | 4.7 3.2 1.3 0.2 setosa setosa 5 | 4.6 3.1 1.5 0.2 setosa setosa 6 | 5 3.6 1.4 0.2 setosa setosa 7 | 5.4 3.9 1.7 0.4 setosa setosa 8 | 4.6 3.4 1.4 0.3 setosa setosa 9 | 5 3.4 1.5 0.2 setosa setosa 10 | 4.4 2.9 1.4 0.2 setosa setosa 11 | 4.9 3.1 1.5 0.1 setosa setosa 12 | 5.4 3.7 1.5 0.2 setosa setosa 13 | 4.8 3.4 1.6 0.2 setosa setosa 14 | 4.8 3 1.4 0.1 setosa setosa 15 | 4.3 3 1.1 0.1 setosa setosa 16 | 5.8 4 1.2 0.2 setosa setosa 17 | 5.7 4.4 1.5 0.4 setosa setosa 18 | 5.4 3.9 1.3 0.4 setosa setosa 19 | 5.1 3.5 1.4 0.3 setosa setosa 20 | 5.7 3.8 1.7 0.3 setosa setosa 21 | 5.1 3.8 1.5 0.3 setosa setosa 22 | 5.4 3.4 1.7 0.2 setosa setosa 23 | 5.1 3.7 1.5 0.4 setosa setosa 24 | 4.6 3.6 1 0.2 setosa setosa 25 | 5.1 3.3 1.7 0.5 setosa setosa 26 | 4.8 3.4 1.9 0.2 setosa setosa 27 | 5 3 1.6 0.2 setosa setosa 28 | 5 3.4 1.6 0.4 setosa setosa 29 | 5.2 3.5 1.5 0.2 setosa setosa 30 | 5.2 3.4 1.4 0.2 setosa setosa 31 | 4.7 3.2 1.6 0.2 setosa setosa 32 | 4.8 3.1 1.6 0.2 setosa setosa 33 | 5.4 3.4 1.5 0.4 setosa setosa 34 | 5.2 4.1 1.5 0.1 setosa setosa 35 | 5.5 4.2 1.4 0.2 setosa setosa 36 | 4.9 3.1 1.5 0.2 setosa setosa 37 | 5 3.2 1.2 0.2 setosa setosa 38 | 5.5 3.5 1.3 0.2 setosa setosa 39 | 4.9 3.6 1.4 0.1 setosa setosa 40 | 4.4 3 1.3 0.2 setosa setosa 41 | 5.1 3.4 1.5 0.2 setosa setosa 42 | 5 3.5 1.3 0.3 setosa setosa 43 | 4.5 2.3 1.3 0.3 setosa setosa 44 | 4.4 3.2 1.3 0.2 setosa setosa 45 | 5 3.5 1.6 0.6 setosa setosa 46 | 5.1 3.8 1.9 0.4 setosa setosa 47 | 4.8 3 1.4 0.3 setosa setosa 48 | 5.1 3.8 1.6 0.2 setosa setosa 49 | 4.6 3.2 1.4 0.2 setosa setosa 50 | 5.3 3.7 1.5 0.2 setosa setosa 51 | 5 3.3 1.4 0.2 setosa setosa 52 | 7 3.2 4.7 1.4 versicolor versicolor 53 | 6.4 3.2 4.5 1.5 versicolor versicolor 54 | 6.9 3.1 4.9 1.5 versicolor versicolor 55 | 5.5 2.3 4 1.3 versicolor versicolor 56 | 6.5 2.8 4.6 1.5 versicolor versicolor 57 | 5.7 2.8 4.5 1.3 versicolor versicolor 58 | 6.3 3.3 4.7 1.6 versicolor versicolor 59 | 4.9 2.4 3.3 1 versicolor versicolor 60 | 6.6 2.9 4.6 1.3 versicolor versicolor 61 | 5.2 2.7 3.9 1.4 versicolor versicolor 62 | 5 2 3.5 1 versicolor versicolor 63 | 5.9 3 4.2 1.5 versicolor versicolor 64 | 6 2.2 4 1 versicolor versicolor 65 | 6.1 2.9 4.7 1.4 versicolor versicolor 66 | 5.6 2.9 3.6 1.3 versicolor versicolor 67 | 6.7 3.1 4.4 1.4 versicolor versicolor 68 | 5.6 3 4.5 1.5 versicolor versicolor 69 | 5.8 2.7 4.1 1 versicolor versicolor 70 | 6.2 2.2 4.5 1.5 versicolor versicolor 71 | 5.6 2.5 3.9 1.1 versicolor versicolor 72 | 5.9 3.2 4.8 1.8 versicolor virginica 73 | 6.1 2.8 4 1.3 versicolor versicolor 74 | 6.3 2.5 4.9 1.5 versicolor versicolor 75 | 6.1 2.8 4.7 1.2 versicolor versicolor 76 | 6.4 2.9 4.3 1.3 versicolor versicolor 77 | 6.6 3 4.4 1.4 versicolor versicolor 78 | 6.8 2.8 4.8 1.4 versicolor versicolor 79 | 6.7 3 5 1.7 versicolor versicolor 80 | 6 2.9 4.5 1.5 versicolor versicolor 81 | 5.7 2.6 3.5 1 versicolor versicolor 82 | 5.5 2.4 3.8 1.1 versicolor versicolor 83 | 5.5 2.4 3.7 1 versicolor versicolor 84 | 5.8 2.7 3.9 1.2 versicolor versicolor 85 | 6 2.7 5.1 1.6 versicolor virginica 86 | 5.4 3 4.5 1.5 versicolor versicolor 87 | 6 3.4 4.5 1.6 versicolor versicolor 88 | 6.7 3.1 4.7 1.5 versicolor versicolor 89 | 6.3 2.3 4.4 1.3 versicolor versicolor 90 | 5.6 3 4.1 1.3 versicolor versicolor 91 | 5.5 2.5 4 1.3 versicolor versicolor 92 | 5.5 2.6 4.4 1.2 versicolor versicolor 93 | 6.1 3 4.6 1.4 versicolor versicolor 94 | 5.8 2.6 4 1.2 versicolor versicolor 95 | 5 2.3 3.3 1 versicolor versicolor 96 | 5.6 2.7 4.2 1.3 versicolor versicolor 97 | 5.7 3 4.2 1.2 versicolor versicolor 98 | 5.7 2.9 4.2 1.3 versicolor versicolor 99 | 6.2 2.9 4.3 1.3 versicolor versicolor 100 | 5.1 2.5 3 1.1 versicolor versicolor 101 | 5.7 2.8 4.1 1.3 versicolor versicolor 102 | 6.3 3.3 6 2.5 virginica virginica 103 | 5.8 2.7 5.1 1.9 virginica virginica 104 | 7.1 3 5.9 2.1 virginica virginica 105 | 6.3 2.9 5.6 1.8 virginica virginica 106 | 6.5 3 5.8 2.2 virginica virginica 107 | 7.6 3 6.6 2.1 virginica virginica 108 | 4.9 2.5 4.5 1.7 virginica versicolor 109 | 7.3 2.9 6.3 1.8 virginica virginica 110 | 6.7 2.5 5.8 1.8 virginica virginica 111 | 7.2 3.6 6.1 2.5 virginica virginica 112 | 6.5 3.2 5.1 2 virginica virginica 113 | 6.4 2.7 5.3 1.9 virginica virginica 114 | 6.8 3 5.5 2.1 virginica virginica 115 | 5.7 2.5 5 2 virginica virginica 116 | 5.8 2.8 5.1 2.4 virginica virginica 117 | 6.4 3.2 5.3 2.3 virginica virginica 118 | 6.5 3 5.5 1.8 virginica virginica 119 | 7.7 3.8 6.7 2.2 virginica virginica 120 | 7.7 2.6 6.9 2.3 virginica virginica 121 | 6 2.2 5 1.5 virginica virginica 122 | 6.9 3.2 5.7 2.3 virginica virginica 123 | 5.6 2.8 4.9 2 virginica virginica 124 | 7.7 2.8 6.7 2 virginica virginica 125 | 6.3 2.7 4.9 1.8 virginica virginica 126 | 6.7 3.3 5.7 2.1 virginica virginica 127 | 7.2 3.2 6 1.8 virginica virginica 128 | 6.2 2.8 4.8 1.8 virginica virginica 129 | 6.1 3 4.9 1.8 virginica virginica 130 | 6.4 2.8 5.6 2.1 virginica virginica 131 | 7.2 3 5.8 1.6 virginica virginica 132 | 7.4 2.8 6.1 1.9 virginica virginica 133 | 7.9 3.8 6.4 2 virginica virginica 134 | 6.4 2.8 5.6 2.2 virginica virginica 135 | 6.3 2.8 5.1 1.5 virginica virginica 136 | 6.1 2.6 5.6 1.4 virginica virginica 137 | 7.7 3 6.1 2.3 virginica virginica 138 | 6.3 3.4 5.6 2.4 virginica virginica 139 | 6.4 3.1 5.5 1.8 virginica virginica 140 | 6 3 4.8 1.8 virginica virginica 141 | 6.9 3.1 5.4 2.1 virginica virginica 142 | 6.7 3.1 5.6 2.4 virginica virginica 143 | 6.9 3.1 5.1 2.3 virginica virginica 144 | 5.8 2.7 5.1 1.9 virginica virginica 145 | 6.8 3.2 5.9 2.3 virginica virginica 146 | 6.7 3.3 5.7 2.5 virginica virginica 147 | 6.7 3 5.2 2.3 virginica virginica 148 | 6.3 2.5 5 1.9 virginica virginica 149 | 6.5 3 5.2 2 virginica virginica 150 | 6.2 3.4 5.4 2.3 virginica virginica 151 | 5.9 3 5.1 1.8 virginica virginica 152 | -------------------------------------------------------------------------------- /data/iris.rpart.tsv: -------------------------------------------------------------------------------- 1 | sepal_length sepal_width petal_length petal_width species predict 2 | 5.1 3.5 1.4 0.2 setosa setosa 3 | 4.9 3 1.4 0.2 setosa setosa 4 | 4.7 3.2 1.3 0.2 setosa setosa 5 | 4.6 3.1 1.5 0.2 setosa setosa 6 | 5 3.6 1.4 0.2 setosa setosa 7 | 5.4 3.9 1.7 0.4 setosa setosa 8 | 4.6 3.4 1.4 0.3 setosa setosa 9 | 5 3.4 1.5 0.2 setosa setosa 10 | 4.4 2.9 1.4 0.2 setosa setosa 11 | 4.9 3.1 1.5 0.1 setosa setosa 12 | 5.4 3.7 1.5 0.2 setosa setosa 13 | 4.8 3.4 1.6 0.2 setosa setosa 14 | 4.8 3 1.4 0.1 setosa setosa 15 | 4.3 3 1.1 0.1 setosa setosa 16 | 5.8 4 1.2 0.2 setosa setosa 17 | 5.7 4.4 1.5 0.4 setosa setosa 18 | 5.4 3.9 1.3 0.4 setosa setosa 19 | 5.1 3.5 1.4 0.3 setosa setosa 20 | 5.7 3.8 1.7 0.3 setosa setosa 21 | 5.1 3.8 1.5 0.3 setosa setosa 22 | 5.4 3.4 1.7 0.2 setosa setosa 23 | 5.1 3.7 1.5 0.4 setosa setosa 24 | 4.6 3.6 1 0.2 setosa setosa 25 | 5.1 3.3 1.7 0.5 setosa setosa 26 | 4.8 3.4 1.9 0.2 setosa setosa 27 | 5 3 1.6 0.2 setosa setosa 28 | 5 3.4 1.6 0.4 setosa setosa 29 | 5.2 3.5 1.5 0.2 setosa setosa 30 | 5.2 3.4 1.4 0.2 setosa setosa 31 | 4.7 3.2 1.6 0.2 setosa setosa 32 | 4.8 3.1 1.6 0.2 setosa setosa 33 | 5.4 3.4 1.5 0.4 setosa setosa 34 | 5.2 4.1 1.5 0.1 setosa setosa 35 | 5.5 4.2 1.4 0.2 setosa setosa 36 | 4.9 3.1 1.5 0.2 setosa setosa 37 | 5 3.2 1.2 0.2 setosa setosa 38 | 5.5 3.5 1.3 0.2 setosa setosa 39 | 4.9 3.6 1.4 0.1 setosa setosa 40 | 4.4 3 1.3 0.2 setosa setosa 41 | 5.1 3.4 1.5 0.2 setosa setosa 42 | 5 3.5 1.3 0.3 setosa setosa 43 | 4.5 2.3 1.3 0.3 setosa setosa 44 | 4.4 3.2 1.3 0.2 setosa setosa 45 | 5 3.5 1.6 0.6 setosa setosa 46 | 5.1 3.8 1.9 0.4 setosa setosa 47 | 4.8 3 1.4 0.3 setosa setosa 48 | 5.1 3.8 1.6 0.2 setosa setosa 49 | 4.6 3.2 1.4 0.2 setosa setosa 50 | 5.3 3.7 1.5 0.2 setosa setosa 51 | 5 3.3 1.4 0.2 setosa setosa 52 | 7 3.2 4.7 1.4 versicolor versicolor 53 | 6.4 3.2 4.5 1.5 versicolor versicolor 54 | 6.9 3.1 4.9 1.5 versicolor virginica 55 | 5.5 2.3 4 1.3 versicolor versicolor 56 | 6.5 2.8 4.6 1.5 versicolor versicolor 57 | 5.7 2.8 4.5 1.3 versicolor versicolor 58 | 6.3 3.3 4.7 1.6 versicolor versicolor 59 | 4.9 2.4 3.3 1 versicolor versicolor 60 | 6.6 2.9 4.6 1.3 versicolor versicolor 61 | 5.2 2.7 3.9 1.4 versicolor versicolor 62 | 5 2 3.5 1 versicolor versicolor 63 | 5.9 3 4.2 1.5 versicolor versicolor 64 | 6 2.2 4 1 versicolor versicolor 65 | 6.1 2.9 4.7 1.4 versicolor versicolor 66 | 5.6 2.9 3.6 1.3 versicolor versicolor 67 | 6.7 3.1 4.4 1.4 versicolor versicolor 68 | 5.6 3 4.5 1.5 versicolor versicolor 69 | 5.8 2.7 4.1 1 versicolor versicolor 70 | 6.2 2.2 4.5 1.5 versicolor versicolor 71 | 5.6 2.5 3.9 1.1 versicolor versicolor 72 | 5.9 3.2 4.8 1.8 versicolor virginica 73 | 6.1 2.8 4 1.3 versicolor versicolor 74 | 6.3 2.5 4.9 1.5 versicolor virginica 75 | 6.1 2.8 4.7 1.2 versicolor versicolor 76 | 6.4 2.9 4.3 1.3 versicolor versicolor 77 | 6.6 3 4.4 1.4 versicolor versicolor 78 | 6.8 2.8 4.8 1.4 versicolor virginica 79 | 6.7 3 5 1.7 versicolor virginica 80 | 6 2.9 4.5 1.5 versicolor versicolor 81 | 5.7 2.6 3.5 1 versicolor versicolor 82 | 5.5 2.4 3.8 1.1 versicolor versicolor 83 | 5.5 2.4 3.7 1 versicolor versicolor 84 | 5.8 2.7 3.9 1.2 versicolor versicolor 85 | 6 2.7 5.1 1.6 versicolor virginica 86 | 5.4 3 4.5 1.5 versicolor versicolor 87 | 6 3.4 4.5 1.6 versicolor versicolor 88 | 6.7 3.1 4.7 1.5 versicolor versicolor 89 | 6.3 2.3 4.4 1.3 versicolor versicolor 90 | 5.6 3 4.1 1.3 versicolor versicolor 91 | 5.5 2.5 4 1.3 versicolor versicolor 92 | 5.5 2.6 4.4 1.2 versicolor versicolor 93 | 6.1 3 4.6 1.4 versicolor versicolor 94 | 5.8 2.6 4 1.2 versicolor versicolor 95 | 5 2.3 3.3 1 versicolor versicolor 96 | 5.6 2.7 4.2 1.3 versicolor versicolor 97 | 5.7 3 4.2 1.2 versicolor versicolor 98 | 5.7 2.9 4.2 1.3 versicolor versicolor 99 | 6.2 2.9 4.3 1.3 versicolor versicolor 100 | 5.1 2.5 3 1.1 versicolor versicolor 101 | 5.7 2.8 4.1 1.3 versicolor versicolor 102 | 6.3 3.3 6 2.5 virginica virginica 103 | 5.8 2.7 5.1 1.9 virginica virginica 104 | 7.1 3 5.9 2.1 virginica virginica 105 | 6.3 2.9 5.6 1.8 virginica virginica 106 | 6.5 3 5.8 2.2 virginica virginica 107 | 7.6 3 6.6 2.1 virginica virginica 108 | 4.9 2.5 4.5 1.7 virginica versicolor 109 | 7.3 2.9 6.3 1.8 virginica virginica 110 | 6.7 2.5 5.8 1.8 virginica virginica 111 | 7.2 3.6 6.1 2.5 virginica virginica 112 | 6.5 3.2 5.1 2 virginica virginica 113 | 6.4 2.7 5.3 1.9 virginica virginica 114 | 6.8 3 5.5 2.1 virginica virginica 115 | 5.7 2.5 5 2 virginica virginica 116 | 5.8 2.8 5.1 2.4 virginica virginica 117 | 6.4 3.2 5.3 2.3 virginica virginica 118 | 6.5 3 5.5 1.8 virginica virginica 119 | 7.7 3.8 6.7 2.2 virginica virginica 120 | 7.7 2.6 6.9 2.3 virginica virginica 121 | 6 2.2 5 1.5 virginica virginica 122 | 6.9 3.2 5.7 2.3 virginica virginica 123 | 5.6 2.8 4.9 2 virginica virginica 124 | 7.7 2.8 6.7 2 virginica virginica 125 | 6.3 2.7 4.9 1.8 virginica virginica 126 | 6.7 3.3 5.7 2.1 virginica virginica 127 | 7.2 3.2 6 1.8 virginica virginica 128 | 6.2 2.8 4.8 1.8 virginica virginica 129 | 6.1 3 4.9 1.8 virginica virginica 130 | 6.4 2.8 5.6 2.1 virginica virginica 131 | 7.2 3 5.8 1.6 virginica virginica 132 | 7.4 2.8 6.1 1.9 virginica virginica 133 | 7.9 3.8 6.4 2 virginica virginica 134 | 6.4 2.8 5.6 2.2 virginica virginica 135 | 6.3 2.8 5.1 1.5 virginica virginica 136 | 6.1 2.6 5.6 1.4 virginica virginica 137 | 7.7 3 6.1 2.3 virginica virginica 138 | 6.3 3.4 5.6 2.4 virginica virginica 139 | 6.4 3.1 5.5 1.8 virginica virginica 140 | 6 3 4.8 1.8 virginica virginica 141 | 6.9 3.1 5.4 2.1 virginica virginica 142 | 6.7 3.1 5.6 2.4 virginica virginica 143 | 6.9 3.1 5.1 2.3 virginica virginica 144 | 5.8 2.7 5.1 1.9 virginica virginica 145 | 6.8 3.2 5.9 2.3 virginica virginica 146 | 6.7 3.3 5.7 2.5 virginica virginica 147 | 6.7 3 5.2 2.3 virginica virginica 148 | 6.3 2.5 5 1.9 virginica virginica 149 | 6.5 3 5.2 2 virginica virginica 150 | 6.2 3.4 5.4 2.3 virginica virginica 151 | 5.9 3 5.1 1.8 virginica virginica 152 | -------------------------------------------------------------------------------- /data/iris.rpart.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:53 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 |
85 | -------------------------------------------------------------------------------- /data/iris.svm.tsv: -------------------------------------------------------------------------------- 1 | sepal_length sepal_width petal_length petal_width species predict 2 | 5.1 3.5 1.4 0.2 setosa setosa 3 | 4.9 3 1.4 0.2 setosa setosa 4 | 4.7 3.2 1.3 0.2 setosa setosa 5 | 4.6 3.1 1.5 0.2 setosa setosa 6 | 5 3.6 1.4 0.2 setosa setosa 7 | 5.4 3.9 1.7 0.4 setosa setosa 8 | 4.6 3.4 1.4 0.3 setosa setosa 9 | 5 3.4 1.5 0.2 setosa setosa 10 | 4.4 2.9 1.4 0.2 setosa setosa 11 | 4.9 3.1 1.5 0.1 setosa setosa 12 | 5.4 3.7 1.5 0.2 setosa setosa 13 | 4.8 3.4 1.6 0.2 setosa setosa 14 | 4.8 3 1.4 0.1 setosa setosa 15 | 4.3 3 1.1 0.1 setosa setosa 16 | 5.8 4 1.2 0.2 setosa setosa 17 | 5.7 4.4 1.5 0.4 setosa setosa 18 | 5.4 3.9 1.3 0.4 setosa setosa 19 | 5.1 3.5 1.4 0.3 setosa setosa 20 | 5.7 3.8 1.7 0.3 setosa setosa 21 | 5.1 3.8 1.5 0.3 setosa setosa 22 | 5.4 3.4 1.7 0.2 setosa setosa 23 | 5.1 3.7 1.5 0.4 setosa setosa 24 | 4.6 3.6 1 0.2 setosa setosa 25 | 5.1 3.3 1.7 0.5 setosa setosa 26 | 4.8 3.4 1.9 0.2 setosa setosa 27 | 5 3 1.6 0.2 setosa setosa 28 | 5 3.4 1.6 0.4 setosa setosa 29 | 5.2 3.5 1.5 0.2 setosa setosa 30 | 5.2 3.4 1.4 0.2 setosa setosa 31 | 4.7 3.2 1.6 0.2 setosa setosa 32 | 4.8 3.1 1.6 0.2 setosa setosa 33 | 5.4 3.4 1.5 0.4 setosa setosa 34 | 5.2 4.1 1.5 0.1 setosa setosa 35 | 5.5 4.2 1.4 0.2 setosa setosa 36 | 4.9 3.1 1.5 0.2 setosa setosa 37 | 5 3.2 1.2 0.2 setosa setosa 38 | 5.5 3.5 1.3 0.2 setosa setosa 39 | 4.9 3.6 1.4 0.1 setosa setosa 40 | 4.4 3 1.3 0.2 setosa setosa 41 | 5.1 3.4 1.5 0.2 setosa setosa 42 | 5 3.5 1.3 0.3 setosa setosa 43 | 4.5 2.3 1.3 0.3 setosa setosa 44 | 4.4 3.2 1.3 0.2 setosa setosa 45 | 5 3.5 1.6 0.6 setosa setosa 46 | 5.1 3.8 1.9 0.4 setosa setosa 47 | 4.8 3 1.4 0.3 setosa setosa 48 | 5.1 3.8 1.6 0.2 setosa setosa 49 | 4.6 3.2 1.4 0.2 setosa setosa 50 | 5.3 3.7 1.5 0.2 setosa setosa 51 | 5 3.3 1.4 0.2 setosa setosa 52 | 7 3.2 4.7 1.4 versicolor versicolor 53 | 6.4 3.2 4.5 1.5 versicolor versicolor 54 | 6.9 3.1 4.9 1.5 versicolor versicolor 55 | 5.5 2.3 4 1.3 versicolor versicolor 56 | 6.5 2.8 4.6 1.5 versicolor versicolor 57 | 5.7 2.8 4.5 1.3 versicolor versicolor 58 | 6.3 3.3 4.7 1.6 versicolor versicolor 59 | 4.9 2.4 3.3 1 versicolor versicolor 60 | 6.6 2.9 4.6 1.3 versicolor versicolor 61 | 5.2 2.7 3.9 1.4 versicolor versicolor 62 | 5 2 3.5 1 versicolor versicolor 63 | 5.9 3 4.2 1.5 versicolor versicolor 64 | 6 2.2 4 1 versicolor versicolor 65 | 6.1 2.9 4.7 1.4 versicolor versicolor 66 | 5.6 2.9 3.6 1.3 versicolor versicolor 67 | 6.7 3.1 4.4 1.4 versicolor versicolor 68 | 5.6 3 4.5 1.5 versicolor versicolor 69 | 5.8 2.7 4.1 1 versicolor versicolor 70 | 6.2 2.2 4.5 1.5 versicolor versicolor 71 | 5.6 2.5 3.9 1.1 versicolor versicolor 72 | 5.9 3.2 4.8 1.8 versicolor versicolor 73 | 6.1 2.8 4 1.3 versicolor versicolor 74 | 6.3 2.5 4.9 1.5 versicolor versicolor 75 | 6.1 2.8 4.7 1.2 versicolor versicolor 76 | 6.4 2.9 4.3 1.3 versicolor versicolor 77 | 6.6 3 4.4 1.4 versicolor versicolor 78 | 6.8 2.8 4.8 1.4 versicolor versicolor 79 | 6.7 3 5 1.7 versicolor versicolor 80 | 6 2.9 4.5 1.5 versicolor versicolor 81 | 5.7 2.6 3.5 1 versicolor versicolor 82 | 5.5 2.4 3.8 1.1 versicolor versicolor 83 | 5.5 2.4 3.7 1 versicolor versicolor 84 | 5.8 2.7 3.9 1.2 versicolor versicolor 85 | 6 2.7 5.1 1.6 versicolor virginica 86 | 5.4 3 4.5 1.5 versicolor versicolor 87 | 6 3.4 4.5 1.6 versicolor versicolor 88 | 6.7 3.1 4.7 1.5 versicolor versicolor 89 | 6.3 2.3 4.4 1.3 versicolor versicolor 90 | 5.6 3 4.1 1.3 versicolor versicolor 91 | 5.5 2.5 4 1.3 versicolor versicolor 92 | 5.5 2.6 4.4 1.2 versicolor versicolor 93 | 6.1 3 4.6 1.4 versicolor versicolor 94 | 5.8 2.6 4 1.2 versicolor versicolor 95 | 5 2.3 3.3 1 versicolor versicolor 96 | 5.6 2.7 4.2 1.3 versicolor versicolor 97 | 5.7 3 4.2 1.2 versicolor versicolor 98 | 5.7 2.9 4.2 1.3 versicolor versicolor 99 | 6.2 2.9 4.3 1.3 versicolor versicolor 100 | 5.1 2.5 3 1.1 versicolor versicolor 101 | 5.7 2.8 4.1 1.3 versicolor versicolor 102 | 6.3 3.3 6 2.5 virginica virginica 103 | 5.8 2.7 5.1 1.9 virginica virginica 104 | 7.1 3 5.9 2.1 virginica virginica 105 | 6.3 2.9 5.6 1.8 virginica virginica 106 | 6.5 3 5.8 2.2 virginica virginica 107 | 7.6 3 6.6 2.1 virginica virginica 108 | 4.9 2.5 4.5 1.7 virginica versicolor 109 | 7.3 2.9 6.3 1.8 virginica virginica 110 | 6.7 2.5 5.8 1.8 virginica virginica 111 | 7.2 3.6 6.1 2.5 virginica virginica 112 | 6.5 3.2 5.1 2 virginica virginica 113 | 6.4 2.7 5.3 1.9 virginica virginica 114 | 6.8 3 5.5 2.1 virginica virginica 115 | 5.7 2.5 5 2 virginica virginica 116 | 5.8 2.8 5.1 2.4 virginica virginica 117 | 6.4 3.2 5.3 2.3 virginica virginica 118 | 6.5 3 5.5 1.8 virginica virginica 119 | 7.7 3.8 6.7 2.2 virginica virginica 120 | 7.7 2.6 6.9 2.3 virginica virginica 121 | 6 2.2 5 1.5 virginica versicolor 122 | 6.9 3.2 5.7 2.3 virginica virginica 123 | 5.6 2.8 4.9 2 virginica virginica 124 | 7.7 2.8 6.7 2 virginica virginica 125 | 6.3 2.7 4.9 1.8 virginica virginica 126 | 6.7 3.3 5.7 2.1 virginica virginica 127 | 7.2 3.2 6 1.8 virginica virginica 128 | 6.2 2.8 4.8 1.8 virginica virginica 129 | 6.1 3 4.9 1.8 virginica virginica 130 | 6.4 2.8 5.6 2.1 virginica virginica 131 | 7.2 3 5.8 1.6 virginica virginica 132 | 7.4 2.8 6.1 1.9 virginica virginica 133 | 7.9 3.8 6.4 2 virginica virginica 134 | 6.4 2.8 5.6 2.2 virginica virginica 135 | 6.3 2.8 5.1 1.5 virginica versicolor 136 | 6.1 2.6 5.6 1.4 virginica virginica 137 | 7.7 3 6.1 2.3 virginica virginica 138 | 6.3 3.4 5.6 2.4 virginica virginica 139 | 6.4 3.1 5.5 1.8 virginica virginica 140 | 6 3 4.8 1.8 virginica virginica 141 | 6.9 3.1 5.4 2.1 virginica virginica 142 | 6.7 3.1 5.6 2.4 virginica virginica 143 | 6.9 3.1 5.1 2.3 virginica virginica 144 | 5.8 2.7 5.1 1.9 virginica virginica 145 | 6.8 3.2 5.9 2.3 virginica virginica 146 | 6.7 3.3 5.7 2.5 virginica virginica 147 | 6.7 3 5.2 2.3 virginica virginica 148 | 6.3 2.5 5 1.9 virginica virginica 149 | 6.5 3 5.2 2 virginica virginica 150 | 6.2 3.4 5.4 2.3 virginica virginica 151 | 5.9 3 5.1 1.8 virginica virginica 152 | -------------------------------------------------------------------------------- /data/orders.tsv: -------------------------------------------------------------------------------- 1 | label var0 var1 var2 order_id 2 | 1 0 1 0 6f8e1014 3 | 0 0 0 1 6f8ea22e 4 | 1 0 1 0 6f8ea435 5 | 0 0 0 1 6f8ea5e1 6 | 1 0 1 0 6f8ea785 7 | 1 0 1 0 6f8ea91e 8 | 0 1 0 0 6f8eaaba 9 | 1 0 1 0 6f8eac54 10 | 0 1 1 0 6f8eade3 11 | 0 1 0 0 6f8eaf87 12 | 0 0 1 0 6f8eb119 13 | 1 0 1 0 6f8eb2e3 14 | 1 1 1 0 6f8eb45e 15 | 1 0 1 0 6f8eb5e6 16 | 1 0 1 0 6f8eb761 17 | 0 0 0 1 6f8eb8d4 18 | 0 0 0 1 6f8eba59 19 | 0 0 1 0 6f8ebbd4 20 | 1 0 1 0 6f8ebd5c 21 | 1 0 1 0 6f8ebec5 22 | 0 1 0 0 6f8ec04a 23 | 0 0 0 1 6f8ec1bd 24 | 1 0 1 0 6f8ec319 25 | 1 0 1 0 6f8ec480 26 | 1 1 0 0 6f8ec5d7 27 | 1 1 1 0 6f8ec72b 28 | 0 0 0 1 6f8ec887 29 | 0 0 0 1 6f8eca05 30 | 1 1 1 0 6f8ecb94 31 | 1 0 1 0 6f8ecd0f 32 | 0 1 0 0 6f8ece82 33 | 1 1 1 0 6f8ecfd7 34 | 1 0 1 0 6f8ed135 35 | 1 1 1 0 6f8ed27d 36 | 0 0 0 1 6f8ed3c7 37 | 0 0 0 1 6f8ed511 38 | 0 0 0 1 6f8ed663 39 | 0 0 0 1 6f8ed7eb 40 | 1 0 1 0 6f8ed940 41 | 0 1 0 0 6f8eda8a 42 | 0 1 0 0 6f8edbd1 43 | 1 1 1 0 6f8edd1e 44 | 1 1 1 0 6f8ede66 45 | 0 1 0 0 6f8edfee 46 | 0 1 0 0 6f8ee161 47 | 1 1 0 0 6f8ee2bd 48 | 0 0 1 0 6f8ee43a 49 | 1 0 1 0 6f8ee5ca 50 | 0 0 1 0 6f8ee763 51 | 1 1 1 0 6f8ee8f5 52 | 0 1 0 0 6f8eea78 53 | 1 1 1 0 6f8eebf5 54 | 1 0 1 0 6f8eed85 55 | 1 0 1 0 6f8eef1e 56 | 1 0 1 0 6f8ef0b8 57 | 1 0 1 0 6f8ef254 58 | 0 0 0 1 6f8ef3e3 59 | 1 0 0 1 6f8ef57d 60 | 1 0 1 0 6f8ef717 61 | 0 0 1 0 6f8ef89e 62 | 0 1 0 0 6f8efa23 63 | 1 0 1 0 6f8efbb3 64 | 0 0 0 1 6f8efd4f 65 | 0 1 0 0 6f8efef0 66 | 0 1 0 0 6f8f00f0 67 | 1 1 1 0 6f8f02dc 68 | 1 1 1 0 6f8f0468 69 | 1 1 1 0 6f8f0602 70 | 1 1 0 0 6f8f079e 71 | 1 1 1 0 6f8f0940 72 | 1 1 1 0 6f8f0ae6 73 | 1 1 1 0 6f8f0c8a 74 | 1 1 1 0 6f8f0e0f 75 | 1 1 1 0 6f8f0f8c 76 | 1 0 0 1 6f8f1126 77 | 0 1 0 0 6f8f12a1 78 | 1 1 1 0 6f8f13f5 79 | 0 0 0 1 6f8f154a 80 | 0 0 0 1 6f8f1694 81 | 1 1 1 0 6f8f17e6 82 | 1 0 1 0 6f8f1933 83 | 1 0 1 0 6f8f1a7a 84 | 1 0 1 0 6f8f1bc5 85 | 1 0 1 0 6f8f1d0f 86 | 0 0 0 1 6f8f1e63 87 | 1 0 1 0 6f8f1fae 88 | 0 0 0 1 6f8f20f8 89 | 0 1 0 0 6f8f2242 90 | 1 1 1 0 6f8f2394 91 | 1 1 0 0 6f8f24e1 92 | 1 1 1 0 6f8f2628 93 | 0 0 1 0 6f8f2775 94 | 1 0 1 0 6f8f28c7 95 | 1 1 1 0 6f8f2a11 96 | 0 0 0 1 6f8f2b66 97 | 0 0 0 1 6f8f2cb0 98 | 1 1 1 0 6f8f2e02 99 | 1 0 1 0 6f8f2f4f 100 | 1 1 1 0 6f8f30a1 101 | 0 1 0 0 6f8f31ee 102 | 1 0 1 0 6f8f3335 103 | 1 1 1 0 6f8f3480 104 | 1 0 1 0 6f8f35d4 105 | 0 0 0 1 6f8f371c 106 | 0 0 0 1 6f8f387a 107 | 0 0 0 1 6f8f39c7 108 | 1 0 1 0 6f8f3b0f 109 | 0 1 0 0 6f8f3c63 110 | 1 1 1 0 6f8f3dae 111 | 1 0 0 1 6f8f3ef8 112 | 0 1 0 0 6f8f404c 113 | 1 1 1 0 6f8f4197 114 | 1 1 1 0 6f8f42e1 115 | 1 0 1 0 6f8f4435 116 | 1 1 1 0 6f8f457d 117 | 1 0 1 0 6f8f46c7 118 | 1 0 0 1 6f8f4811 119 | 0 0 0 1 6f8f4970 120 | 0 1 0 0 6f8f4aba 121 | 1 1 1 0 6f8f4c35 122 | 1 1 1 0 6f8f4da8 123 | 0 0 0 1 6f8f4f23 124 | 1 1 1 0 6f8f5094 125 | 0 0 0 1 6f8f5207 126 | 0 0 0 1 6f8f537a 127 | 0 0 0 1 6f8f54eb 128 | 1 1 1 0 6f8f5640 129 | 1 1 1 0 6f8f57b3 130 | 1 1 1 0 6f8f592e 131 | 0 0 0 1 6f8f5a9e 132 | 0 1 1 0 6f8f5c19 133 | 1 0 1 0 6f8f5df3 134 | 1 0 1 0 6f8f5f78 135 | 1 0 1 0 6f8f6111 136 | 0 1 1 0 6f8f628f 137 | 1 1 1 0 6f8f6433 138 | 0 0 0 1 6f8f6607 139 | 1 0 0 1 6f8f677a 140 | 1 0 1 0 6f8f690a 141 | 1 0 1 0 6f8f6a8f 142 | 0 0 0 1 6f8f6c0c 143 | 1 1 1 0 6f8f6d7d 144 | 1 1 1 0 6f8f6ee6 145 | 0 0 0 1 6f8f7045 146 | 0 0 1 0 6f8f71b5 147 | 0 1 1 0 6f8f731e 148 | 0 1 0 0 6f8f747d 149 | 1 1 1 0 6f8f75cf 150 | 1 0 1 0 6f8f7719 151 | 1 1 1 0 6f8f786e 152 | 1 0 1 0 6f8f79ae 153 | 0 1 0 0 6f8f7b02 154 | 0 0 0 1 6f8f7c4c 155 | 0 1 0 0 6f8f7d97 156 | 1 0 1 0 6f8f7ede 157 | 0 0 0 1 6f8f802b 158 | 0 1 0 0 6f8f817d 159 | 1 0 1 0 6f8f82c7 160 | 1 1 1 0 6f8f8411 161 | 1 0 1 0 6f8f855c 162 | 0 0 0 1 6f8f86a6 163 | 0 1 0 0 6f8f87ee 164 | 0 1 0 0 6f8f8930 165 | 1 1 1 0 6f8f8a82 166 | 1 1 0 0 6f8f8bcf 167 | 1 0 0 1 6f8f8d17 168 | 1 0 1 0 6f8f8e6b 169 | 0 0 0 1 6f8f8fb5 170 | 0 0 0 1 6f8f9100 171 | 0 1 0 0 6f8f924a 172 | 1 1 0 0 6f8f9391 173 | 1 1 1 0 6f8f94e8 174 | 0 1 0 0 6f8f9630 175 | 1 0 1 0 6f8f977d 176 | 0 0 0 1 6f8f98c5 177 | 1 0 1 0 6f8f9a19 178 | 0 1 0 0 6f8f9b63 179 | 0 0 0 1 6f8f9cb8 180 | 1 1 0 0 6f8f9e02 181 | 1 1 1 0 6f8f9f57 182 | 1 1 1 0 6f8fa09e 183 | 0 1 0 0 6f8fa1eb 184 | 1 1 1 0 6f8fa333 185 | 0 1 0 0 6f8fa480 186 | 0 0 0 1 6f8fa5d1 187 | 0 1 0 0 6f8fa71c 188 | 1 1 1 0 6f8fa866 189 | 0 1 0 0 6f8fa9ba 190 | 0 0 0 1 6f8fab05 191 | 1 1 1 0 6f8fac6b 192 | 1 0 1 0 6f8fade6 193 | 1 0 1 0 6f8faf5c 194 | 1 1 1 0 6f8fb0d7 195 | 0 1 0 0 6f8fb23d 196 | 1 1 1 0 6f8fb3b0 197 | 0 0 1 0 6f8fb52b 198 | 1 0 1 0 6f8fb6a8 199 | 1 0 1 0 6f8fb819 200 | 0 0 0 1 6f8fb978 201 | 1 1 1 0 6f8fbafd 202 | -------------------------------------------------------------------------------- /data/sample.rf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:08 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 |
80 | -------------------------------------------------------------------------------- /data/sample.tsv: -------------------------------------------------------------------------------- 1 | label var0 var1 var2 order_id predict 2 | 1 0 1 0 6f8e1014 1 3 | 0 0 0 1 6f8ea22e 0 4 | 1 0 1 0 6f8ea435 1 5 | 0 0 0 1 6f8ea5e1 0 6 | 1 0 1 0 6f8ea785 1 7 | 1 0 1 0 6f8ea91e 1 8 | 0 1 0 0 6f8eaaba 0 9 | 1 0 1 0 6f8eac54 1 10 | 0 1 1 0 6f8eade3 1 11 | 0 1 0 0 6f8eaf87 0 12 | 0 0 1 0 6f8eb119 1 13 | 1 0 1 0 6f8eb2e3 1 14 | 1 1 1 0 6f8eb45e 1 15 | 1 0 1 0 6f8eb5e6 1 16 | 1 0 1 0 6f8eb761 1 17 | 0 0 0 1 6f8eb8d4 0 18 | 0 0 0 1 6f8eba59 0 19 | 0 0 1 0 6f8ebbd4 1 20 | 1 0 1 0 6f8ebd5c 1 21 | 1 0 1 0 6f8ebec5 1 22 | 0 1 0 0 6f8ec04a 0 23 | 0 0 0 1 6f8ec1bd 0 24 | 1 0 1 0 6f8ec319 1 25 | 1 0 1 0 6f8ec480 1 26 | 1 1 0 0 6f8ec5d7 0 27 | 1 1 1 0 6f8ec72b 1 28 | 0 0 0 1 6f8ec887 0 29 | 0 0 0 1 6f8eca05 0 30 | 1 1 1 0 6f8ecb94 1 31 | 1 0 1 0 6f8ecd0f 1 32 | 0 1 0 0 6f8ece82 0 33 | 1 1 1 0 6f8ecfd7 1 34 | 1 0 1 0 6f8ed135 1 35 | 1 1 1 0 6f8ed27d 1 36 | 0 0 0 1 6f8ed3c7 0 37 | 0 0 0 1 6f8ed511 0 38 | 0 0 0 1 6f8ed663 0 39 | 0 0 0 1 6f8ed7eb 0 40 | 1 0 1 0 6f8ed940 1 41 | 0 1 0 0 6f8eda8a 0 42 | 0 1 0 0 6f8edbd1 0 43 | 1 1 1 0 6f8edd1e 1 44 | 1 1 1 0 6f8ede66 1 45 | 0 1 0 0 6f8edfee 0 46 | 0 1 0 0 6f8ee161 0 47 | 1 1 0 0 6f8ee2bd 0 48 | 0 0 1 0 6f8ee43a 1 49 | 1 0 1 0 6f8ee5ca 1 50 | 0 0 1 0 6f8ee763 1 51 | 1 1 1 0 6f8ee8f5 1 52 | 0 1 0 0 6f8eea78 0 53 | 1 1 1 0 6f8eebf5 1 54 | 1 0 1 0 6f8eed85 1 55 | 1 0 1 0 6f8eef1e 1 56 | 1 0 1 0 6f8ef0b8 1 57 | 1 0 1 0 6f8ef254 1 58 | 0 0 0 1 6f8ef3e3 0 59 | 1 0 0 1 6f8ef57d 0 60 | 1 0 1 0 6f8ef717 1 61 | 0 0 1 0 6f8ef89e 1 62 | 0 1 0 0 6f8efa23 0 63 | 1 0 1 0 6f8efbb3 1 64 | 0 0 0 1 6f8efd4f 0 65 | 0 1 0 0 6f8efef0 0 66 | 0 1 0 0 6f8f00f0 0 67 | 1 1 1 0 6f8f02dc 1 68 | 1 1 1 0 6f8f0468 1 69 | 1 1 1 0 6f8f0602 1 70 | 1 1 0 0 6f8f079e 0 71 | 1 1 1 0 6f8f0940 1 72 | 1 1 1 0 6f8f0ae6 1 73 | 1 1 1 0 6f8f0c8a 1 74 | 1 1 1 0 6f8f0e0f 1 75 | 1 1 1 0 6f8f0f8c 1 76 | 1 0 0 1 6f8f1126 0 77 | 0 1 0 0 6f8f12a1 0 78 | 1 1 1 0 6f8f13f5 1 79 | 0 0 0 1 6f8f154a 0 80 | 0 0 0 1 6f8f1694 0 81 | 1 1 1 0 6f8f17e6 1 82 | 1 0 1 0 6f8f1933 1 83 | 1 0 1 0 6f8f1a7a 1 84 | 1 0 1 0 6f8f1bc5 1 85 | 1 0 1 0 6f8f1d0f 1 86 | 0 0 0 1 6f8f1e63 0 87 | 1 0 1 0 6f8f1fae 1 88 | 0 0 0 1 6f8f20f8 0 89 | 0 1 0 0 6f8f2242 0 90 | 1 1 1 0 6f8f2394 1 91 | 1 1 0 0 6f8f24e1 0 92 | 1 1 1 0 6f8f2628 1 93 | 0 0 1 0 6f8f2775 1 94 | 1 0 1 0 6f8f28c7 1 95 | 1 1 1 0 6f8f2a11 1 96 | 0 0 0 1 6f8f2b66 0 97 | 0 0 0 1 6f8f2cb0 0 98 | 1 1 1 0 6f8f2e02 1 99 | 1 0 1 0 6f8f2f4f 1 100 | 1 1 1 0 6f8f30a1 1 101 | 0 1 0 0 6f8f31ee 0 102 | 1 0 1 0 6f8f3335 1 103 | 1 1 1 0 6f8f3480 1 104 | 1 0 1 0 6f8f35d4 1 105 | 0 0 0 1 6f8f371c 0 106 | 0 0 0 1 6f8f387a 0 107 | 0 0 0 1 6f8f39c7 0 108 | 1 0 1 0 6f8f3b0f 1 109 | 0 1 0 0 6f8f3c63 0 110 | 1 1 1 0 6f8f3dae 1 111 | 1 0 0 1 6f8f3ef8 0 112 | 0 1 0 0 6f8f404c 0 113 | 1 1 1 0 6f8f4197 1 114 | 1 1 1 0 6f8f42e1 1 115 | 1 0 1 0 6f8f4435 1 116 | 1 1 1 0 6f8f457d 1 117 | 1 0 1 0 6f8f46c7 1 118 | 1 0 0 1 6f8f4811 0 119 | 0 0 0 1 6f8f4970 0 120 | 0 1 0 0 6f8f4aba 0 121 | 1 1 1 0 6f8f4c35 1 122 | 1 1 1 0 6f8f4da8 1 123 | 0 0 0 1 6f8f4f23 0 124 | 1 1 1 0 6f8f5094 1 125 | 0 0 0 1 6f8f5207 0 126 | 0 0 0 1 6f8f537a 0 127 | 0 0 0 1 6f8f54eb 0 128 | 1 1 1 0 6f8f5640 1 129 | 1 1 1 0 6f8f57b3 1 130 | 1 1 1 0 6f8f592e 1 131 | 0 0 0 1 6f8f5a9e 0 132 | 0 1 1 0 6f8f5c19 1 133 | 1 0 1 0 6f8f5df3 1 134 | 1 0 1 0 6f8f5f78 1 135 | 1 0 1 0 6f8f6111 1 136 | 0 1 1 0 6f8f628f 1 137 | 1 1 1 0 6f8f6433 1 138 | 0 0 0 1 6f8f6607 0 139 | 1 0 0 1 6f8f677a 0 140 | 1 0 1 0 6f8f690a 1 141 | 1 0 1 0 6f8f6a8f 1 142 | 0 0 0 1 6f8f6c0c 0 143 | 1 1 1 0 6f8f6d7d 1 144 | 1 1 1 0 6f8f6ee6 1 145 | 0 0 0 1 6f8f7045 0 146 | 0 0 1 0 6f8f71b5 1 147 | 0 1 1 0 6f8f731e 1 148 | 0 1 0 0 6f8f747d 0 149 | 1 1 1 0 6f8f75cf 1 150 | 1 0 1 0 6f8f7719 1 151 | 1 1 1 0 6f8f786e 1 152 | 1 0 1 0 6f8f79ae 1 153 | 0 1 0 0 6f8f7b02 0 154 | 0 0 0 1 6f8f7c4c 0 155 | 0 1 0 0 6f8f7d97 0 156 | 1 0 1 0 6f8f7ede 1 157 | 0 0 0 1 6f8f802b 0 158 | 0 1 0 0 6f8f817d 0 159 | 1 0 1 0 6f8f82c7 1 160 | 1 1 1 0 6f8f8411 1 161 | 1 0 1 0 6f8f855c 1 162 | 0 0 0 1 6f8f86a6 0 163 | 0 1 0 0 6f8f87ee 0 164 | 0 1 0 0 6f8f8930 0 165 | 1 1 1 0 6f8f8a82 1 166 | 1 1 0 0 6f8f8bcf 0 167 | 1 0 0 1 6f8f8d17 0 168 | 1 0 1 0 6f8f8e6b 1 169 | 0 0 0 1 6f8f8fb5 0 170 | 0 0 0 1 6f8f9100 0 171 | 0 1 0 0 6f8f924a 0 172 | 1 1 0 0 6f8f9391 0 173 | 1 1 1 0 6f8f94e8 1 174 | 0 1 0 0 6f8f9630 0 175 | 1 0 1 0 6f8f977d 1 176 | 0 0 0 1 6f8f98c5 0 177 | 1 0 1 0 6f8f9a19 1 178 | 0 1 0 0 6f8f9b63 0 179 | 0 0 0 1 6f8f9cb8 0 180 | 1 1 0 0 6f8f9e02 0 181 | 1 1 1 0 6f8f9f57 1 182 | 1 1 1 0 6f8fa09e 1 183 | 0 1 0 0 6f8fa1eb 0 184 | 1 1 1 0 6f8fa333 1 185 | 0 1 0 0 6f8fa480 0 186 | 0 0 0 1 6f8fa5d1 0 187 | 0 1 0 0 6f8fa71c 0 188 | 1 1 1 0 6f8fa866 1 189 | 0 1 0 0 6f8fa9ba 0 190 | 0 0 0 1 6f8fab05 0 191 | 1 1 1 0 6f8fac6b 1 192 | 1 0 1 0 6f8fade6 1 193 | 1 0 1 0 6f8faf5c 1 194 | 1 1 1 0 6f8fb0d7 1 195 | 0 1 0 0 6f8fb23d 0 196 | 1 1 1 0 6f8fb3b0 1 197 | 0 0 1 0 6f8fb52b 1 198 | 1 0 1 0 6f8fb6a8 1 199 | 1 0 1 0 6f8fb819 1 200 | 0 0 0 1 6f8fb978 0 201 | 1 1 1 0 6f8fbafd 1 202 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | com.github.quintona 7 | storm-pattern 8 | 0.0.2-SNAPSHOT 9 | 10 | 11 | 12 | clojars.org 13 | http://clojars.org/repo 14 | 15 | 16 | 17 | 18 | 19 | clojars 20 | Clojars repository 21 | https://clojars.org/repo 22 | 23 | 24 | 25 | 26 | 27 | net.sf.jopt-simple 28 | jopt-simple 29 | 4.3 30 | compile 31 | 32 | 33 | org.codehaus.janino 34 | janino 35 | 2.6.1 36 | 37 | 38 | org.slf4j 39 | slf4j-api 40 | 1.6.1 41 | 42 | 43 | junit 44 | junit 45 | 4.8 46 | test 47 | 48 | 49 | log4j 50 | log4j 51 | 1.2.16 52 | runtime 53 | 54 | 55 | storm 56 | storm 57 | 0.9.0-wip16 58 | 59 | provided 60 | 61 | 62 | slf4j-api 63 | org.slf4j 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | org.apache.maven.plugins 73 | maven-compiler-plugin 74 | 2.5.1 75 | 76 | 1.6 77 | 1.6 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /src/main/java/pattern/Classifier.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern; 8 | 9 | import java.io.BufferedReader; 10 | import java.io.File; 11 | import java.io.FileInputStream; 12 | import java.io.IOException; 13 | import java.io.InputStreamReader; 14 | import java.io.Reader; 15 | import java.io.Serializable; 16 | import java.io.StringReader; 17 | 18 | import org.slf4j.Logger; 19 | import org.slf4j.LoggerFactory; 20 | 21 | import pattern.model.MiningModel; 22 | import pattern.model.Model; 23 | import pattern.model.clust.ClusteringModel; 24 | import pattern.model.glm.GeneralizedRegressionModel; 25 | import pattern.model.lm.RegressionModel; 26 | import pattern.model.tree.TreeModel; 27 | import storm.trident.tuple.TridentTuple; 28 | 29 | public class Classifier implements Serializable { 30 | /** Field LOG */ 31 | private static final Logger LOG = LoggerFactory.getLogger(Classifier.class); 32 | 33 | public Model model; 34 | 35 | /** 36 | * Construct a Classifier by parsing the PMML file, verifying the model 37 | * type, and building an appropriate Model. 38 | * 39 | * @param pmmlUri 40 | * XML source for the PMML description 41 | * @throws PatternException 42 | */ 43 | public Classifier(String pmmlUri) throws PatternException { 44 | try { 45 | PMML pmml = new PMML(getSourceReader(pmmlUri)); 46 | 47 | if (PMML.Models.MINING.equals(pmml.model_type)) 48 | model = new MiningModel(pmml); 49 | else if (PMML.Models.TREE.equals(pmml.model_type)) 50 | model = new TreeModel(pmml); 51 | else if (PMML.Models.REGRESSION.equals(pmml.model_type)) 52 | model = new RegressionModel(pmml); 53 | else if (PMML.Models.CLUSTERING.equals(pmml.model_type)) 54 | model = new ClusteringModel(pmml); 55 | else if (PMML.Models.GENERALIZED_REGRESSION.equals(pmml.model_type)) 56 | model = new GeneralizedRegressionModel(pmml); 57 | else 58 | throw new PatternException("unsupported model type: " 59 | + pmml.model_type.name()); 60 | } catch (IOException exception) { 61 | LOG.error("could not read PMML file", exception); 62 | throw new PatternException(" could not read PMML file", exception); 63 | } 64 | } 65 | 66 | /** 67 | * Construct a Reader by reading the PMML file into a string buffer first. 68 | * This default implementation expects a file on the local disk. 69 | * 70 | * @param file 71 | * XML source for the PMML description 72 | * @return Reader 73 | * @throws IOException 74 | */ 75 | public Reader getSourceReader(String filePath) throws IOException { 76 | File file = new File(filePath); 77 | BufferedReader reader = new BufferedReader(new InputStreamReader( 78 | new FileInputStream(file))); 79 | String line = null; 80 | StringBuilder stringBuilder = new StringBuilder(); 81 | String ls = System.getProperty("line.separator"); 82 | while ((line = reader.readLine()) != null) { 83 | stringBuilder.append(line); 84 | stringBuilder.append(ls); 85 | } 86 | reader.close(); 87 | return new StringReader(stringBuilder.toString()); 88 | } 89 | 90 | /** 91 | * Prepare to classify with this model. Called immediately before the 92 | * enclosing Operation instance is put into play processing Tuples. 93 | */ 94 | public void prepare() { 95 | model.prepare(); 96 | } 97 | 98 | /** 99 | * Classify an input tuple, returning the predicted label. 100 | * 101 | * @param values 102 | * tuple values 103 | * @param fields 104 | * tuple fields 105 | * @return String 106 | * @throws PatternException 107 | */ 108 | public String classifyTuple(TridentTuple values) throws PatternException { 109 | return model.classifyTuple(values); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/main/java/pattern/ClassifierFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern; 8 | 9 | import java.util.Map; 10 | 11 | import storm.trident.operation.BaseFunction; 12 | import storm.trident.operation.TridentCollector; 13 | import storm.trident.operation.TridentOperationContext; 14 | import storm.trident.tuple.TridentTuple; 15 | import backtype.storm.tuple.Fields; 16 | import backtype.storm.tuple.Values; 17 | 18 | public class ClassifierFunction extends BaseFunction { 19 | public Classifier classifier; 20 | 21 | private String pmmlPath; 22 | 23 | /** 24 | * @param pmmlPath 25 | * PMML file 26 | */ 27 | public ClassifierFunction(String pmmlPath) { 28 | this.pmmlPath = pmmlPath; 29 | } 30 | 31 | /** 32 | * @param flowProcess 33 | * @param operationCall 34 | */ 35 | @Override 36 | public void prepare(Map conf, TridentOperationContext context) { 37 | 38 | this.classifier = new Classifier(pmmlPath); 39 | classifier.prepare(); 40 | } 41 | 42 | /** 43 | * @param flowProcess 44 | * @param functionCall 45 | */ 46 | @Override 47 | public void execute(TridentTuple tuple, TridentCollector collector) { 48 | 49 | String label = classifier.classifyTuple(tuple); 50 | collector.emit(new Values(label)); 51 | } 52 | 53 | /** 54 | * Returns a Fields data structure naming the input tuple fields. 55 | * 56 | * @return Fields 57 | */ 58 | public Fields getInputFields() { 59 | return classifier.model.schema.getInputFields(); 60 | } 61 | 62 | /** 63 | * Returns a String naming the predictor tuple fields. 64 | * 65 | * @return 66 | */ 67 | public String getPredictor() { 68 | return classifier.model.schema.label_field.name; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/pattern/ClassifierSplitFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern; 8 | 9 | import java.util.HashMap; 10 | import java.util.Map; 11 | 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | import storm.trident.operation.BaseFunction; 16 | import storm.trident.operation.TridentCollector; 17 | import storm.trident.operation.TridentOperationContext; 18 | import storm.trident.tuple.TridentTuple; 19 | import backtype.storm.tuple.Values; 20 | 21 | public class ClassifierSplitFunction extends BaseFunction { 22 | /** Field LOG */ 23 | private static final Logger LOG = LoggerFactory 24 | .getLogger(ClassifierSplitFunction.class); 25 | 26 | public Map classifierMap; 27 | public String splitField; 28 | Map pmmlMap; 29 | 30 | public ClassifierSplitFunction(String splitField, 31 | Map pmmlMap) { 32 | this.pmmlMap = pmmlMap; 33 | this.splitField = splitField; 34 | 35 | } 36 | 37 | @Override 38 | public void prepare(Map conf, TridentOperationContext context) { 39 | classifierMap = new HashMap(); 40 | 41 | for (Map.Entry entry : pmmlMap.entrySet()) { 42 | String splitId = entry.getKey(); 43 | String pmmlPath = entry.getValue(); 44 | Classifier classifier = new Classifier(pmmlPath); 45 | classifierMap.put(splitId, classifier); 46 | classifier.prepare(); 47 | } 48 | } 49 | 50 | @Override 51 | public void execute(TridentTuple tuple, TridentCollector collector) { 52 | String splitId = tuple.getStringByField(splitField); 53 | Classifier classifier = classifierMap.get(splitId); 54 | 55 | if (classifier != null) { 56 | String label = classifier.classifyTuple(tuple); 57 | collector.emit(new Values(label)); 58 | } else { 59 | String message = String.format( 60 | "unknown experimental split ID [ %s ]", splitId); 61 | LOG.error(message); 62 | throw new PatternException(message); 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/pattern/PMML.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern; 8 | 9 | import java.io.Reader; 10 | 11 | import javax.xml.xpath.XPathConstants; 12 | 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | import org.w3c.dom.Element; 16 | import org.w3c.dom.NodeList; 17 | 18 | public class PMML { 19 | /** Field LOG */ 20 | private static final Logger LOG = LoggerFactory.getLogger(PMML.class); 21 | 22 | protected XPathReader reader; 23 | protected Schema schema = new Schema(); 24 | 25 | /** Implemented model types */ 26 | public enum Models { 27 | UNKNOWN, MINING, TREE, REGRESSION, CLUSTERING, GENERALIZED_REGRESSION 28 | }; 29 | 30 | public Models model_type = Models.UNKNOWN; 31 | public String version; 32 | 33 | /** 34 | * Parse the PMML description, using XPath. 35 | * 36 | * @param pmmlSource 37 | * XML source for the PMML description 38 | * @throws PatternException 39 | */ 40 | public PMML(Reader pmmlSource) throws PatternException { 41 | reader = new XPathReader(pmmlSource); 42 | 43 | version = ((Element) getNodeList("/PMML").item(0)) 44 | .getAttribute("version"); 45 | model_type = parseModelType(); 46 | 47 | LOG.debug(String.format("Model: %s", model_type.name())); 48 | LOG.debug(String.format("Version: %s", version)); 49 | 50 | schema.parseDictionary(this, 51 | getNodeList("/PMML/DataDictionary/DataField")); 52 | } 53 | 54 | /** 55 | * Parse the model type. 56 | * 57 | * @return Models 58 | */ 59 | public Models parseModelType() { 60 | Models model_type = Models.UNKNOWN; 61 | 62 | if (reader.read("/PMML/MiningModel", XPathConstants.NODE) != null) 63 | return Models.MINING; 64 | else if (reader.read("/PMML/TreeModel", XPathConstants.NODE) != null) 65 | return Models.TREE; 66 | else if (reader.read("/PMML/RegressionModel", XPathConstants.NODE) != null) 67 | return Models.REGRESSION; 68 | else if (reader.read("/PMML/ClusteringModel", XPathConstants.NODE) != null) 69 | return Models.CLUSTERING; 70 | else if (reader.read("/PMML/GeneralRegressionModel", 71 | XPathConstants.NODE) != null) 72 | return Models.GENERALIZED_REGRESSION; 73 | 74 | return model_type; 75 | } 76 | 77 | /** 78 | * Getter for the PMML data dictionary. 79 | * 80 | * @return Schema 81 | */ 82 | public Schema getSchema() { 83 | return schema; 84 | } 85 | 86 | /** 87 | * Getter for the XML document reader. 88 | * 89 | * @return XPathReader 90 | */ 91 | public XPathReader getReader() { 92 | return reader; 93 | } 94 | 95 | /** 96 | * Extract an XML node list based on an XPath expression. 97 | * 98 | * @param expr 99 | * XPath expression to evaluate 100 | * @return NodeList 101 | */ 102 | public NodeList getNodeList(String expr) { 103 | return (NodeList) reader.read(expr, XPathConstants.NODESET); 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/main/java/pattern/PatternException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern; 8 | 9 | public class PatternException extends RuntimeException { 10 | /** 11 | * 12 | */ 13 | public PatternException() { 14 | } 15 | 16 | /** @param message */ 17 | public PatternException(String message) { 18 | super(message); 19 | } 20 | 21 | /** 22 | * @param message 23 | * @param cause 24 | */ 25 | public PatternException(String message, Throwable cause) { 26 | super(message, cause); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/pattern/Schema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern; 8 | 9 | import java.io.Serializable; 10 | import java.util.HashMap; 11 | import java.util.Iterator; 12 | import java.util.LinkedHashMap; 13 | import java.util.Map; 14 | 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | import org.w3c.dom.Element; 18 | import org.w3c.dom.Node; 19 | import org.w3c.dom.NodeList; 20 | 21 | import pattern.datafield.DataField; 22 | import pattern.datafield.DataFieldFactory; 23 | import storm.trident.tuple.TridentTuple; 24 | import backtype.storm.tuple.Fields; 25 | 26 | public class Schema extends LinkedHashMap implements 27 | Serializable { 28 | /** Field LOG */ 29 | private static final Logger LOG = LoggerFactory.getLogger(Schema.class); 30 | 31 | /** 32 | * Field label_field - metadata of the label to be produced by the 33 | * classifier 34 | */ 35 | public DataField label_field; 36 | 37 | /** 38 | * Parse the data dictionary from PMML. 39 | * 40 | * @param pmml 41 | * PMML model 42 | * @param node_list 43 | * list of DataField nodes in the DataDictionary. 44 | * @throws PatternException 45 | */ 46 | public void parseDictionary(PMML pmml, NodeList node_list) 47 | throws PatternException { 48 | for (int i = 0; i < node_list.getLength(); i++) { 49 | Node node = node_list.item(i); 50 | 51 | if (node.getNodeType() == Node.ELEMENT_NODE) { 52 | String name = ((Element) node).getAttribute("name"); 53 | String op_type = ((Element) node).getAttribute("optype"); 54 | String data_type = ((Element) node).getAttribute("dataType"); 55 | 56 | if (!containsKey(name)) { 57 | DataField df = DataFieldFactory.getDataField( 58 | pmml.getReader(), node, name, op_type, data_type); 59 | put(name, df); 60 | LOG.debug("PMML add DataField: " + df); 61 | } 62 | } 63 | } 64 | } 65 | 66 | /** 67 | * Determine the active tuple fields for the input schema. 68 | * 69 | * @param node_list 70 | * list of DataField nodes in the DataDictionary. 71 | * @throws PatternException 72 | */ 73 | public void parseMiningSchema(NodeList node_list) throws PatternException { 74 | for (int i = 0; i < node_list.getLength(); i++) { 75 | Node node = node_list.item(i); 76 | 77 | if (node.getNodeType() == Node.ELEMENT_NODE) { 78 | String name = ((Element) node).getAttribute("name"); 79 | String usage_type = ((Element) node).getAttribute("usageType"); 80 | 81 | LOG.debug(String.format("DataField: %s:%s", name, usage_type)); 82 | 83 | if (containsKey(name)) { 84 | if ("predicted".equals(usage_type)) 85 | label_field = remove(name); 86 | else if (!"active".equals(usage_type)) 87 | remove(name); 88 | } else { 89 | String message = String 90 | .format("unknown DataField referenced in PMML [ %s ]", 91 | name); 92 | LOG.error(message); 93 | throw new PatternException(message); 94 | } 95 | } 96 | } 97 | } 98 | 99 | /** 100 | * Returns a Fields data structure naming the input tuple fields. 101 | * 102 | * @return Fields 103 | */ 104 | public Fields getInputFields() { 105 | return new Fields(getParamNames()); 106 | } 107 | 108 | /** 109 | * Returns the expected name for each field in the Tuple, to be used as 110 | * Janino parameters. 111 | * 112 | * @return String[] 113 | */ 114 | public String[] getParamNames() { 115 | return keySet().toArray(new String[0]); 116 | } 117 | 118 | /** 119 | * Returns the expected class for each field in the Tuple, to be used as 120 | * Janino parameters. 121 | * 122 | * @return Class[] 123 | */ 124 | public Class[] getParamTypes() { 125 | Class[] param_types = new Class[size()]; 126 | Iterator iter = values().iterator(); 127 | 128 | for (int i = 0; i < size(); i++) { 129 | DataField df = iter.next(); 130 | param_types[i] = df.getClassType(); 131 | } 132 | 133 | return param_types; 134 | } 135 | 136 | /** 137 | * Convert values for the fields in the Tuple, in a form that Janino 138 | * expects. 139 | * 140 | * @param values 141 | * @param param_values 142 | * @throws PatternException 143 | */ 144 | public void setParamValues(TridentTuple values, Object[] param_values) 145 | throws PatternException { 146 | Iterator iter = values().iterator(); 147 | 148 | for (int i = 0; i < size(); i++) { 149 | DataField df = iter.next(); 150 | param_values[i] = df.getValue(values, i); 151 | } 152 | } 153 | 154 | /** 155 | * Returns a Map of names/values for each field in the Tuple. 156 | * 157 | * @param values 158 | * @return Map 159 | * @throws PatternException 160 | */ 161 | public Map getParamMap(TridentTuple values) 162 | throws PatternException { 163 | HashMap param_map = new HashMap(); 164 | Iterator iter = values().iterator(); 165 | 166 | for (int i = 0; i < size(); i++) { 167 | DataField df = iter.next(); 168 | param_map.put(df.name, df.getValue(values, i)); 169 | } 170 | 171 | return param_map; 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/main/java/pattern/XPathReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern; 8 | 9 | import java.io.IOException; 10 | import java.io.Reader; 11 | 12 | import javax.xml.namespace.QName; 13 | import javax.xml.parsers.DocumentBuilderFactory; 14 | import javax.xml.parsers.ParserConfigurationException; 15 | import javax.xml.xpath.XPath; 16 | import javax.xml.xpath.XPathExpression; 17 | import javax.xml.xpath.XPathExpressionException; 18 | import javax.xml.xpath.XPathFactory; 19 | 20 | import org.slf4j.Logger; 21 | import org.slf4j.LoggerFactory; 22 | import org.w3c.dom.Document; 23 | import org.xml.sax.InputSource; 24 | import org.xml.sax.SAXException; 25 | 26 | public class XPathReader { 27 | /** Field LOG */ 28 | private static final Logger LOG = LoggerFactory 29 | .getLogger(XPathReader.class); 30 | 31 | private Document xmlDocument; 32 | private XPath xPath; 33 | 34 | /** 35 | * Set up to read from the XML source. 36 | * 37 | * @param xmlSource 38 | * Reader for the XML 39 | */ 40 | public XPathReader(Reader xmlSource) { 41 | try { 42 | xmlDocument = DocumentBuilderFactory.newInstance() 43 | .newDocumentBuilder().parse(new InputSource(xmlSource)); 44 | xPath = XPathFactory.newInstance().newXPath(); 45 | } catch (IOException exception) { 46 | LOG.error("could not read PMML file", exception); 47 | throw new PatternException(" could not read PMML file", exception); 48 | } catch (SAXException exception) { 49 | LOG.error("could not parse PMML file", exception); 50 | throw new PatternException(" could not parse PMML file", exception); 51 | } catch (ParserConfigurationException exception) { 52 | LOG.error("could not configure parser for PMML file", exception); 53 | throw new PatternException( 54 | " could not configure parser for PMML file", exception); 55 | } 56 | } 57 | 58 | /** 59 | * @param expression 60 | * @param returnType 61 | * @return 62 | */ 63 | public Object read(String expression, QName returnType) { 64 | Object result = null; 65 | 66 | try { 67 | XPathExpression xPathExpression = xPath.compile(expression); 68 | result = xPathExpression.evaluate(xmlDocument, returnType); 69 | } catch (XPathExpressionException exception) { 70 | String message = String 71 | .format("could not evaluate XPath [ %s ] from doc root", 72 | expression); 73 | LOG.error(message, exception); 74 | throw new PatternException(message, exception); 75 | } finally { 76 | return result; 77 | } 78 | } 79 | 80 | /** 81 | * @param item 82 | * @param expression 83 | * @param returnType 84 | * @return 85 | */ 86 | public Object read(Object item, String expression, QName returnType) { 87 | Object result = null; 88 | 89 | try { 90 | XPathExpression xPathExpression = xPath.compile(expression); 91 | result = xPathExpression.evaluate(item, returnType); 92 | } catch (XPathExpressionException exception) { 93 | String message = String.format( 94 | "could not evaluate XPath [ %s ] from %s", expression, 95 | item.toString()); 96 | LOG.error(message, exception); 97 | throw new PatternException(message, exception); 98 | } finally { 99 | return result; 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/main/java/pattern/datafield/CategoricalDataField.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.datafield; 8 | 9 | import java.util.ArrayList; 10 | 11 | import javax.xml.xpath.XPathConstants; 12 | 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | import org.w3c.dom.Element; 16 | import org.w3c.dom.Node; 17 | import org.w3c.dom.NodeList; 18 | 19 | import pattern.PatternException; 20 | import pattern.XPathReader; 21 | import storm.trident.tuple.TridentTuple; 22 | 23 | public class CategoricalDataField extends DataField { 24 | /** Field LOG */ 25 | private static final Logger LOG = LoggerFactory 26 | .getLogger(CategoricalDataField.class); 27 | 28 | public ArrayList categories = new ArrayList(); 29 | 30 | /** 31 | * @param name 32 | * @param op_type 33 | * @param data_type 34 | */ 35 | public CategoricalDataField(String name, String op_type, String data_type) { 36 | this.name = name; 37 | this.op_type = op_type; 38 | this.data_type = data_type; 39 | } 40 | 41 | /** 42 | * @param reader 43 | * @param node 44 | */ 45 | @Override 46 | public void parse(XPathReader reader, Node node) { 47 | String node_expr = "./Value"; 48 | NodeList node_list = (NodeList) reader.read(node, node_expr, 49 | XPathConstants.NODESET); 50 | 51 | for (int i = 0; i < node_list.getLength(); i++) { 52 | Node child = node_list.item(i); 53 | 54 | if (child.getNodeType() == Node.ELEMENT_NODE) { 55 | String value = ((Element) child).getAttribute("value"); 56 | LOG.debug(String.format("PMML categorical value: %s", value)); 57 | categories.add(value); 58 | } 59 | } 60 | } 61 | 62 | /** 63 | * @param reader 64 | * @param node 65 | * @return String 66 | * @throws PatternException 67 | */ 68 | public String getEval(XPathReader reader, Element node) 69 | throws PatternException { 70 | String operator = node.getAttribute("booleanOperator"); 71 | String eval = null; 72 | 73 | String expr = "./Array[1]"; 74 | NodeList node_list = (NodeList) reader.read(node, expr, 75 | XPathConstants.NODESET); 76 | Element child = (Element) node_list.item(0); 77 | 78 | PortableBitSet bits = new PortableBitSet(categories.size()); 79 | String value = child.getTextContent(); 80 | 81 | value = value.substring(1, value.length() - 1); 82 | 83 | for (String s : value.split("\\\"\\s+\\\"")) 84 | bits.set(categories.indexOf(s)); 85 | 86 | if (operator.equals("isIn")) 87 | eval = String.format( 88 | "pattern.datafield.PortableBitSet.isIn( \"%s\", %s )", 89 | bits.toString(), name); 90 | else 91 | throw new PatternException("unknown operator: " + operator); 92 | 93 | return eval; 94 | } 95 | 96 | /** @return Class */ 97 | public Class getClassType() { 98 | return int.class; 99 | } 100 | 101 | /** 102 | * @param values 103 | * @param i 104 | * @return Object 105 | */ 106 | public Object getValue(TridentTuple values, int i) { 107 | String field_value = values.getString(i); 108 | int bit_index = categories.indexOf(field_value); 109 | 110 | LOG.debug(String.format("%s @ %d | %s", field_value, bit_index, 111 | categories)); 112 | return bit_index; 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/main/java/pattern/datafield/ContinuousDataField.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.datafield; 8 | 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | import org.w3c.dom.Element; 12 | 13 | import pattern.PatternException; 14 | import pattern.XPathReader; 15 | import storm.trident.tuple.TridentTuple; 16 | 17 | public class ContinuousDataField extends DataField { 18 | /** Field LOG */ 19 | private static final Logger LOG = LoggerFactory 20 | .getLogger(ContinuousDataField.class); 21 | 22 | /** 23 | * @param name 24 | * @param op_type 25 | * @param data_type 26 | */ 27 | public ContinuousDataField(String name, String op_type, String data_type) { 28 | this.name = name; 29 | this.op_type = op_type; 30 | this.data_type = data_type; 31 | } 32 | 33 | /** 34 | * @param reader 35 | * @param node 36 | * @return String 37 | * @throws PatternException 38 | */ 39 | public String getEval(XPathReader reader, Element node) 40 | throws PatternException { 41 | String operator = node.getAttribute("operator"); 42 | String value = node.getAttribute("value"); 43 | String eval = null; 44 | 45 | if (operator.equals("equal")) 46 | eval = name + " == " + value; 47 | else if (operator.equals("notEqual")) 48 | eval = name + " != " + value; 49 | else if (operator.equals("lessThan")) 50 | eval = name + " < " + value; 51 | else if (operator.equals("lessOrEqual")) 52 | eval = name + " <= " + value; 53 | else if (operator.equals("greaterThan")) 54 | eval = name + " > " + value; 55 | else if (operator.equals("greaterOrEqual")) 56 | eval = name + " >= " + value; 57 | else 58 | throw new PatternException("unknown operator: " + operator); 59 | 60 | return eval; 61 | } 62 | 63 | /** @return Class */ 64 | public Class getClassType() { 65 | return double.class; 66 | } 67 | 68 | /** 69 | * @param values 70 | * @param i 71 | * @return Object 72 | * @throws PatternException 73 | */ 74 | public Object getValue(TridentTuple values, int i) throws PatternException { 75 | try { 76 | return values.getDouble(i); 77 | } catch (NumberFormatException exception) { 78 | LOG.error("tuple format is bad", exception); 79 | throw new PatternException("tuple format is bad", exception); 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/main/java/pattern/datafield/DataField.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.datafield; 8 | 9 | import java.io.Serializable; 10 | 11 | import org.w3c.dom.Element; 12 | import org.w3c.dom.Node; 13 | 14 | import pattern.PatternException; 15 | import pattern.XPathReader; 16 | import storm.trident.tuple.TridentTuple; 17 | 18 | public abstract class DataField implements Serializable { 19 | public String name; 20 | public String op_type; 21 | public String data_type; 22 | 23 | /** 24 | * Does nothing. Override this method if a DataField subclass needs to parse 25 | * additional info from PMML. 26 | * 27 | * @param reader 28 | * @param node 29 | */ 30 | public void parse(XPathReader reader, Node node) { 31 | } 32 | 33 | /** 34 | * @param reader 35 | * @param node 36 | * @return String 37 | * @throws PatternException 38 | */ 39 | public abstract String getEval(XPathReader reader, Element node) 40 | throws PatternException; 41 | 42 | /** @return */ 43 | public abstract Class getClassType(); 44 | 45 | /** 46 | * @return Object 47 | * @throws PatternException 48 | */ 49 | public abstract Object getValue(TridentTuple values, int i) 50 | throws PatternException; 51 | 52 | /** @return Object */ 53 | @Override 54 | public String toString() { 55 | return name + ":" + op_type + ":" + data_type; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/pattern/datafield/DataFieldFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.datafield; 8 | 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | import org.w3c.dom.Node; 12 | 13 | import pattern.PatternException; 14 | import pattern.XPathReader; 15 | 16 | public class DataFieldFactory { 17 | /** Field LOG */ 18 | private static final Logger LOG = LoggerFactory 19 | .getLogger(DataFieldFactory.class); 20 | 21 | /** 22 | * Create the appropriate DataField object based on parsing the data 23 | * dictionary in a PMML file. 24 | * 25 | * @param reader 26 | * @param node 27 | * @param name 28 | * @param op_type 29 | * @param data_type 30 | * @return DataField 31 | * @throws PatternException 32 | */ 33 | public static DataField getDataField(XPathReader reader, Node node, 34 | String name, String op_type, String data_type) 35 | throws PatternException { 36 | DataField df = null; 37 | 38 | if ("continuous".equals(op_type) && "double".equals(data_type)) 39 | df = new ContinuousDataField(name, op_type, data_type); 40 | else if ("categorical".equals(op_type) && "string".equals(data_type)) { 41 | df = new CategoricalDataField(name, op_type, data_type); 42 | df.parse(reader, node); 43 | } else { 44 | String message = String.format( 45 | "unsupported DataField type %s / %s: ", op_type, data_type); 46 | LOG.error(message); 47 | throw new PatternException(message); 48 | } 49 | 50 | return df; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/pattern/datafield/PortableBitSet.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.datafield; 8 | 9 | import java.io.Serializable; 10 | import java.util.BitSet; 11 | 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | public class PortableBitSet extends BitSet implements Serializable { 16 | /** Field LOG */ 17 | private static final Logger LOG = LoggerFactory 18 | .getLogger(PortableBitSet.class); 19 | 20 | protected long max_size = 0L; 21 | 22 | public PortableBitSet(int nbits) { 23 | super(nbits); 24 | max_size = nbits; 25 | } 26 | 27 | public PortableBitSet(String bit_str) { 28 | super(bit_str.length()); 29 | max_size = bit_str.length(); 30 | 31 | for (int i = 0; i < max_size; i++) 32 | if (bit_str.charAt(i) == '1') 33 | set(i); 34 | } 35 | 36 | /** 37 | * @param bits 38 | * @return boolean 39 | */ 40 | public static boolean isIn(String bit_str, int i) { 41 | boolean result = false; 42 | 43 | if ((i >= 0) && (i < bit_str.length())) { 44 | PortableBitSet bits = new PortableBitSet(bit_str); 45 | result = bits.get(i); 46 | } 47 | 48 | return result; 49 | } 50 | 51 | /** @return */ 52 | @Override 53 | public String toString() { 54 | StringBuilder sb = new StringBuilder(); 55 | 56 | for (int i = 0; i < max_size; i++) 57 | sb.append(get(i) ? "1" : "0"); 58 | 59 | return sb.toString(); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/MiningModel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.model; 8 | 9 | import java.io.Serializable; 10 | import java.util.ArrayList; 11 | import java.util.HashMap; 12 | import java.util.List; 13 | import java.util.Map; 14 | 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | import org.w3c.dom.Node; 18 | import org.w3c.dom.NodeList; 19 | 20 | import pattern.PMML; 21 | import pattern.PatternException; 22 | import pattern.model.tree.Context; 23 | import pattern.model.tree.TreeModel; 24 | import storm.trident.tuple.TridentTuple; 25 | 26 | public class MiningModel extends Model implements Serializable { 27 | /** Field LOG */ 28 | private static final Logger LOG = LoggerFactory 29 | .getLogger(MiningModel.class); 30 | 31 | public Context context = null; 32 | public List segments = new ArrayList(); 33 | public Map votes = new HashMap(); 34 | 35 | /** 36 | * @param pmml 37 | * PMML model 38 | * @throws PatternException 39 | */ 40 | public MiningModel(PMML pmml) throws PatternException { 41 | this.schema = pmml.getSchema(); 42 | this.context = new Context(); 43 | 44 | schema.parseMiningSchema(pmml 45 | .getNodeList("/PMML/MiningModel/MiningSchema/MiningField")); 46 | 47 | String expr = "/PMML/MiningModel/Segmentation/Segment"; 48 | NodeList node_list = pmml.getNodeList(expr); 49 | 50 | for (int i = 0; i < node_list.getLength(); i++) { 51 | Node node = node_list.item(i); 52 | 53 | if (node.getNodeType() == Node.ELEMENT_NODE) { 54 | TreeModel tree_model = new TreeModel(pmml, context, node); 55 | segments.add(tree_model); 56 | } 57 | } 58 | } 59 | 60 | /** 61 | * Prepare to classify with this model. Called immediately before the 62 | * enclosing Operation instance is put into play processing Tuples. 63 | */ 64 | @Override 65 | public void prepare() { 66 | context.prepare(schema); 67 | } 68 | 69 | /** 70 | * Classify an input tuple, returning the predicted label. 71 | * 72 | * 73 | * @param values 74 | * tuple values 75 | * @param fields 76 | * field names 77 | * @return String 78 | * @throws PatternException 79 | */ 80 | @Override 81 | public String classifyTuple(TridentTuple values) throws PatternException { 82 | Boolean[] pred_eval = context.evalPredicates(schema, values); 83 | String label = null; 84 | Integer winning_vote = 0; 85 | 86 | votes.clear(); 87 | 88 | // tally the vote for each tree in the forest 89 | 90 | for (Model model : segments) { 91 | label = ((TreeModel) model).tree.traverse(pred_eval); 92 | 93 | if (!votes.containsKey(label)) 94 | winning_vote = 1; 95 | else 96 | winning_vote = votes.get(label) + 1; 97 | 98 | votes.put(label, winning_vote); 99 | } 100 | 101 | // determine the winning label 102 | 103 | for (String key : votes.keySet()) { 104 | if (votes.get(key) > winning_vote) { 105 | label = key; 106 | winning_vote = votes.get(key); 107 | } 108 | } 109 | 110 | return label; 111 | } 112 | 113 | /** @return String */ 114 | @Override 115 | public String toString() { 116 | StringBuilder buf = new StringBuilder(); 117 | 118 | if (schema != null) { 119 | buf.append(schema); 120 | buf.append("\n"); 121 | buf.append("---------"); 122 | buf.append("\n"); 123 | } 124 | 125 | if (context != null) { 126 | buf.append(context); 127 | buf.append("\n"); 128 | buf.append("---------"); 129 | buf.append("\n"); 130 | } 131 | 132 | buf.append("segments: "); 133 | buf.append(segments); 134 | buf.append("---------"); 135 | buf.append("\n"); 136 | 137 | for (Model model : segments) { 138 | buf.append(((TreeModel) model).tree); 139 | buf.append("\n"); 140 | } 141 | 142 | buf.append("---------"); 143 | buf.append("\n"); 144 | buf.append("votes: "); 145 | buf.append(votes); 146 | 147 | return buf.toString(); 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/Model.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.model; 8 | 9 | import java.io.Serializable; 10 | 11 | import pattern.PatternException; 12 | import pattern.Schema; 13 | import storm.trident.tuple.TridentTuple; 14 | 15 | public abstract class Model implements Serializable { 16 | public Schema schema = null; 17 | 18 | /** 19 | * Prepare to classify with this model. Called immediately before the 20 | * enclosing Operation instance is put into play processing Tuples. 21 | */ 22 | public abstract void prepare(); 23 | 24 | /** 25 | * Classify an input tuple, returning the predicted label. 26 | * 27 | * @param values 28 | * tuple values 29 | * @param fields 30 | * tuple fields 31 | * @return String 32 | * @throws PatternException 33 | */ 34 | public abstract String classifyTuple(TridentTuple values) 35 | throws PatternException; 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/clust/ClusteringModel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.model.clust; 8 | 9 | import java.io.Serializable; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | import java.util.Map; 13 | 14 | import org.slf4j.Logger; 15 | import org.slf4j.LoggerFactory; 16 | import org.w3c.dom.Element; 17 | import org.w3c.dom.Node; 18 | import org.w3c.dom.NodeList; 19 | 20 | import pattern.PMML; 21 | import pattern.PatternException; 22 | import pattern.model.Model; 23 | import storm.trident.tuple.TridentTuple; 24 | 25 | public class ClusteringModel extends Model implements Serializable { 26 | /** Field LOG */ 27 | private static final Logger LOG = LoggerFactory 28 | .getLogger(ClusteringModel.class); 29 | 30 | public List exemplars = new ArrayList(); 31 | 32 | /** 33 | * Constructor for a ClusteringModel as a standalone classifier (PMML 34 | * versions 1-3). 35 | * 36 | * @param pmml 37 | * PMML model 38 | * @throws PatternException 39 | */ 40 | public ClusteringModel(PMML pmml) throws PatternException { 41 | schema = pmml.getSchema(); 42 | schema.parseMiningSchema(pmml 43 | .getNodeList("/PMML/ClusteringModel/MiningSchema/MiningField")); 44 | 45 | String node_expr = "/PMML/ClusteringModel/Cluster"; 46 | NodeList node_list = pmml.getNodeList(node_expr); 47 | 48 | for (int i = 0; i < node_list.getLength(); i++) { 49 | Node node = node_list.item(i); 50 | 51 | if (node.getNodeType() == Node.ELEMENT_NODE) { 52 | Element node_elem = (Element) node; 53 | 54 | if ("Cluster".equals(node_elem.getNodeName())) 55 | exemplars.add(new Exemplar(node_elem)); 56 | } 57 | } 58 | } 59 | 60 | /** 61 | * Prepare to classify with this model. Called immediately before the 62 | * enclosing Operation instance is put into play processing Tuples. 63 | */ 64 | @Override 65 | public void prepare() { 66 | // not needed 67 | } 68 | 69 | /** 70 | * Classify an input tuple, returning the predicted label. 71 | * 72 | * @param values 73 | * tuple values 74 | * @param fields 75 | * field names 76 | * @return String 77 | * @throws PatternException 78 | */ 79 | @Override 80 | public String classifyTuple(TridentTuple values) throws PatternException { 81 | Map param_map = schema.getParamMap(values); 82 | String[] param_names = schema.getParamNames(); 83 | Double[] param_values = new Double[param_names.length]; 84 | 85 | for (int i = 0; i < param_names.length; i++) 86 | param_values[i] = (Double) param_map.get(param_names[i]); 87 | 88 | Exemplar best_clust = null; 89 | double best_dist = 0.0; 90 | 91 | for (Exemplar clust : exemplars) { 92 | double distance = clust.calcDistance(param_values); 93 | 94 | if ((best_clust == null) || (distance < best_dist)) { 95 | best_clust = clust; 96 | best_dist = distance; 97 | } 98 | } 99 | 100 | return best_clust.name; 101 | } 102 | 103 | /** @return String */ 104 | @Override 105 | public String toString() { 106 | StringBuilder buf = new StringBuilder(); 107 | 108 | if (schema != null) { 109 | buf.append(schema); 110 | buf.append("\n"); 111 | buf.append("---------"); 112 | buf.append("\n"); 113 | buf.append(exemplars); 114 | buf.append("---------"); 115 | buf.append("\n"); 116 | } 117 | 118 | return buf.toString(); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/clust/Exemplar.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.model.clust; 8 | 9 | import java.io.Serializable; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | import org.w3c.dom.Element; 16 | import org.w3c.dom.Node; 17 | import org.w3c.dom.NodeList; 18 | 19 | import pattern.PatternException; 20 | 21 | public class Exemplar implements Serializable { 22 | /** Field LOG */ 23 | private static final Logger LOG = LoggerFactory.getLogger(Exemplar.class); 24 | 25 | public List points = new ArrayList(); 26 | public String name; 27 | 28 | /** 29 | * @param cluster_node 30 | * node representing a cluster center 31 | * @throws PatternException 32 | */ 33 | public Exemplar(Element cluster_node) throws PatternException { 34 | // 35 | // 5.006 3.428 1.462 0.246 36 | name = cluster_node.getAttribute("name"); 37 | 38 | NodeList child_nodes = cluster_node.getChildNodes(); 39 | 40 | for (int j = 0; j < child_nodes.getLength(); j++) { 41 | Node child_node = child_nodes.item(j); 42 | 43 | if (child_node.getNodeType() == Node.ELEMENT_NODE) { 44 | Element child_elem = (Element) child_node; 45 | 46 | if ("Array".equals(child_elem.getNodeName())) { 47 | int n = Integer.valueOf(child_elem.getAttribute("n")); 48 | String type = child_elem.getAttribute("type"); 49 | String text = child_elem.getTextContent(); 50 | 51 | for (String val : text.split("\\s+")) 52 | points.add(Double.valueOf(val)); 53 | 54 | if (points.size() != n) { 55 | String message = String 56 | .format("expected %d data points in PMML for cluster %s [ %s ]", 57 | n, name, text); 58 | LOG.error(message); 59 | throw new PatternException(message); 60 | } 61 | } 62 | } 63 | } 64 | } 65 | 66 | /** 67 | * Calculate the distance from this cluster for the given tuple. 68 | * 69 | * @param param_values 70 | * array of tuple values 71 | * @return double 72 | */ 73 | public double calcDistance(Double[] param_values) { 74 | double sum_sq = 0.0; 75 | 76 | for (int i = 0; i < param_values.length; i++) 77 | sum_sq += Math.pow(param_values[i] - points.get(i), 2.0); 78 | 79 | return Math.sqrt(sum_sq); 80 | } 81 | 82 | /** @return String */ 83 | @Override 84 | public String toString() { 85 | return String.format("Exemplar: %s %s", name, points.toString()); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/glm/GeneralizedRegressionModel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | * 6 | * @author girish.kathalagiri 7 | */ 8 | 9 | package pattern.model.glm; 10 | 11 | import java.io.Serializable; 12 | import java.util.ArrayList; 13 | import java.util.HashSet; 14 | 15 | import javax.xml.xpath.XPathConstants; 16 | 17 | import org.slf4j.Logger; 18 | import org.slf4j.LoggerFactory; 19 | import org.w3c.dom.Element; 20 | import org.w3c.dom.Node; 21 | import org.w3c.dom.NodeList; 22 | 23 | import pattern.PMML; 24 | import pattern.PatternException; 25 | import pattern.model.Model; 26 | import storm.trident.tuple.TridentTuple; 27 | 28 | public class GeneralizedRegressionModel extends Model implements Serializable { 29 | /** LOGGER */ 30 | private static final Logger LOG = LoggerFactory 31 | .getLogger(GeneralizedRegressionModel.class); 32 | 33 | PPMatrix ppmatrix = new PPMatrix(); 34 | ParamMatrix paramMatrix = new ParamMatrix(); 35 | HashSet covariate = new HashSet(); 36 | HashSet factors = new HashSet(); 37 | HashSet parameterList = new HashSet(); 38 | LinkFunction linkFunction; 39 | 40 | /** 41 | * Constructor for a General Regression Model as a standalone classifier 42 | * (PMML versions 4.1). 43 | * 44 | * @param pmml 45 | * PMML model 46 | * @throws pattern.PatternException 47 | */ 48 | public GeneralizedRegressionModel(PMML pmml) throws PatternException { 49 | schema = pmml.getSchema(); 50 | schema.parseMiningSchema(pmml 51 | .getNodeList("/PMML/GeneralRegressionModel/MiningSchema/MiningField")); 52 | 53 | ppmatrix.parsePPCell(pmml 54 | .getNodeList("/PMML/GeneralRegressionModel/PPMatrix/PPCell")); 55 | LOG.debug(ppmatrix.toString()); 56 | 57 | paramMatrix.parsePCell(pmml 58 | .getNodeList("/PMML/GeneralRegressionModel/ParamMatrix/PCell")); 59 | LOG.debug(paramMatrix.toString()); 60 | 61 | String node_expr = "/PMML/GeneralRegressionModel/ParameterList/Parameter"; 62 | NodeList child_nodes = pmml.getNodeList(node_expr); 63 | // NodeList child_nodes = model_node.getChildNodes(); 64 | 65 | for (int i = 0; i < child_nodes.getLength(); i++) { 66 | Node child = child_nodes.item(i); 67 | 68 | if (child.getNodeType() == Node.ELEMENT_NODE) { 69 | String name = ((Element) child).getAttribute("name"); 70 | parameterList.add(name); 71 | } 72 | } 73 | 74 | String node_expr_covariate = "/PMML/GeneralRegressionModel/CovariateList/Predictor"; 75 | NodeList child_nodes_covariate = pmml.getNodeList(node_expr_covariate); 76 | 77 | for (int i = 0; i < child_nodes_covariate.getLength(); i++) { 78 | Node child = child_nodes_covariate.item(i); 79 | 80 | if (child.getNodeType() == Node.ELEMENT_NODE) { 81 | String name = ((Element) child).getAttribute("name"); 82 | covariate.add(name); 83 | } 84 | } 85 | 86 | String node_expr_factors = "/PMML/GeneralRegressionModel/FactorList/Predictor"; 87 | NodeList child_nodes_factors = pmml.getNodeList(node_expr_factors); 88 | 89 | for (int i = 0; i < child_nodes_factors.getLength(); i++) { 90 | Node child = child_nodes_factors.item(i); 91 | 92 | if (child.getNodeType() == Node.ELEMENT_NODE) { 93 | String name = ((Element) child).getAttribute("name"); 94 | factors.add(name); 95 | } 96 | } 97 | 98 | String node = "/PMML/GeneralRegressionModel/@linkFunction"; 99 | String linkFunctionStr = pmml.getReader() 100 | .read(node, XPathConstants.STRING).toString(); 101 | 102 | linkFunction = LinkFunction.getFunction(linkFunctionStr); 103 | } 104 | 105 | /** 106 | * Prepare to classify with this model. Called immediately before the 107 | * enclosing Operation instance is put into play processing Tuples. 108 | */ 109 | @Override 110 | public void prepare() { 111 | // not needed 112 | } 113 | 114 | /** 115 | * Classify an input tuple, returning the predicted label. TODO: Currently 116 | * handling only logit and Covariate. 117 | * 118 | * @param values 119 | * tuple values 120 | * @param fields 121 | * tuple fields 122 | * @return String 123 | * @throws pattern.PatternException 124 | */ 125 | @Override 126 | public String classifyTuple(TridentTuple values) throws PatternException { 127 | // TODO: Currently handling only logit and Covariate. 128 | double result = 0.0; 129 | 130 | for (String param : paramMatrix.keySet()) { 131 | // if PPMatrix has the parameter 132 | if (ppmatrix.containsKey(param)) { 133 | // get the Betas from the paramMatrix for param 134 | ArrayList pCells = paramMatrix.get(param); 135 | // TODO : Handling the targetCategory 136 | PCell pCell = pCells.get(0); 137 | Double beta = Double.parseDouble(pCell.getBeta()); 138 | 139 | // get the corresponding PPCells to get the predictor name 140 | ArrayList ppCells = ppmatrix.get(param); 141 | double paramResult = 1.0; 142 | 143 | for (PPCell pc : ppCells) { 144 | int power = Integer.parseInt(pc.getValue()); 145 | String data = values 146 | .getStringByField(pc.getPredictorName()); 147 | 148 | if (data != null) { 149 | // if in factor list 150 | if (factors.contains(param)) { 151 | if (pc.getValue().equals(data)) 152 | paramResult *= 1.0; 153 | else 154 | paramResult *= 0.0; 155 | } else // Covariate list 156 | { 157 | paramResult *= Math.pow(Double.parseDouble(data), 158 | power); 159 | } 160 | } else 161 | throw new PatternException( 162 | "XML and tuple fields mismatch"); 163 | } 164 | 165 | result += paramResult * beta; 166 | } else { 167 | ArrayList pCells = paramMatrix.get(param); 168 | 169 | // TODO: handling the targetCategory 170 | PCell pCell = pCells.get(0); 171 | result += Double.parseDouble(pCell.getBeta()); 172 | } 173 | } 174 | 175 | String linkResult = linkFunction.calc(result); 176 | LOG.debug("result: " + linkResult); 177 | 178 | // apply the appropriate LinkFunction 179 | return linkResult; 180 | } 181 | 182 | /** @return String */ 183 | @Override 184 | public String toString() { 185 | StringBuilder buf = new StringBuilder(); 186 | buf.append("GLM"); 187 | return buf.toString(); 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/glm/LinkFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | * 6 | * @author girish.kathalagiri 7 | */ 8 | 9 | package pattern.model.glm; 10 | 11 | /** 12 | * Enum for the LinkFunction in GLM 13 | */ 14 | public enum LinkFunction { 15 | NONE("none") { 16 | public String calc(double value) { 17 | return Double.toString(Double.NaN); 18 | } 19 | }, 20 | 21 | LOGIT("logit") { 22 | public String calc(double value) { 23 | return Double.toString(1.0 / (1.0 + Math.exp(-value))); 24 | } 25 | }, 26 | 27 | CLOGLOG("cloglog") { 28 | public String calc(double value) { 29 | return Double.toString(1.0 - Math.exp(-Math.exp(value))); 30 | } 31 | }, 32 | 33 | LOGLOG("loglog") { 34 | public String calc(double value) { 35 | return Double.toString(Math.exp(-Math.exp(-value))); 36 | } 37 | }, 38 | 39 | CAUCHIT("cauchit") { 40 | public String calc(double value) { 41 | return Double.toString(0.5 + (1.0 / Math.PI) * Math.atan(value)); 42 | } 43 | }; 44 | 45 | public String function; 46 | 47 | private LinkFunction(String function) { 48 | this.function = function; 49 | } 50 | 51 | /** 52 | * Returns the corresponding LinkFunction 53 | * 54 | * @param functionName 55 | * String 56 | * @return LinkFunction 57 | */ 58 | public static LinkFunction getFunction(String functionName) { 59 | 60 | for (LinkFunction lf : values()) 61 | if (lf.function.matches(functionName)) 62 | return lf; 63 | 64 | return LinkFunction.NONE; 65 | } 66 | 67 | public abstract String calc(double value); 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/glm/PCell.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | * 6 | * @author girish.kathalagiri 7 | */ 8 | 9 | package pattern.model.glm; 10 | 11 | import java.io.Serializable; 12 | 13 | /** 14 | * This Class represents PCell inside ParamMatrix 15 | */ 16 | public class PCell implements Serializable { 17 | private String df; 18 | private String beta; 19 | private String parameterName; 20 | 21 | public String getDf() { 22 | return df; 23 | } 24 | 25 | public void setDf(String df) { 26 | this.df = df; 27 | } 28 | 29 | public String getBeta() { 30 | return beta; 31 | } 32 | 33 | public void setBeta(String beta) { 34 | this.beta = beta; 35 | } 36 | 37 | public String getParameterName() { 38 | return parameterName; 39 | } 40 | 41 | public void setParameterName(String parameterName) { 42 | this.parameterName = parameterName; 43 | } 44 | 45 | /** @return String */ 46 | public String toString() { 47 | StringBuilder buf = new StringBuilder(); 48 | buf.append("parameterName = "); 49 | buf.append(parameterName).append(","); 50 | buf.append("df = "); 51 | buf.append(df).append(","); 52 | buf.append("beta = "); 53 | buf.append(beta); 54 | 55 | return buf.toString(); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/glm/PPCell.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | * 6 | * @author girish.kathalagiri 7 | */ 8 | 9 | package pattern.model.glm; 10 | 11 | import java.io.Serializable; 12 | 13 | /** 14 | * This Class represents PPCell inside PPMatrix 15 | */ 16 | public class PPCell implements Serializable { 17 | private String predictorName; 18 | private String value; 19 | private String parameterName; 20 | 21 | public String getPredictorName() { 22 | return predictorName; 23 | } 24 | 25 | public void setPredictorName(String predictorName) { 26 | this.predictorName = predictorName; 27 | } 28 | 29 | public String getValue() { 30 | return value; 31 | } 32 | 33 | public void setValue(String value) { 34 | this.value = value; 35 | } 36 | 37 | public String getParameterName() { 38 | return parameterName; 39 | } 40 | 41 | public void setParameterName(String parameterName) { 42 | this.parameterName = parameterName; 43 | } 44 | 45 | /** @return String */ 46 | public String toString() { 47 | StringBuilder buf = new StringBuilder(); 48 | buf.append("parameterName = "); 49 | buf.append(parameterName + ","); 50 | buf.append("predictorName = "); 51 | buf.append(predictorName + ","); 52 | buf.append("value = "); 53 | buf.append(value); 54 | 55 | return buf.toString(); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/glm/PPMatrix.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | * 6 | * @author girish.kathalagiri 7 | */ 8 | 9 | package pattern.model.glm; 10 | 11 | import java.io.Serializable; 12 | import java.util.ArrayList; 13 | import java.util.LinkedHashMap; 14 | 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | import org.w3c.dom.Element; 18 | import org.w3c.dom.Node; 19 | import org.w3c.dom.NodeList; 20 | 21 | import pattern.PatternException; 22 | 23 | /** 24 | * This Class represents PPMatrix 25 | */ 26 | public class PPMatrix extends LinkedHashMap> 27 | implements Serializable { 28 | private static final Logger LOG = LoggerFactory.getLogger(PPMatrix.class); 29 | 30 | /** 31 | * Parse the data dictionary from PMML. 32 | * 33 | * @param node_list 34 | * list of DataField nodes in the DataDictionary. 35 | * @throws pattern.PatternException 36 | */ 37 | public void parsePPCell(NodeList node_list) throws PatternException { 38 | for (int i = 0; i < node_list.getLength(); i++) { 39 | Node node = node_list.item(i); 40 | 41 | if (node.getNodeType() == Node.ELEMENT_NODE) { 42 | String name = ((Element) node).getAttribute("parameterName"); 43 | String predictorName = ((Element) node) 44 | .getAttribute("predictorName"); 45 | String value = ((Element) node).getAttribute("value"); 46 | 47 | if (!containsKey(name)) { 48 | ArrayList arrPPCell = new ArrayList(); 49 | PPCell ppCell = new PPCell(); 50 | ppCell.setParameterName(name); 51 | ppCell.setPredictorName(predictorName); 52 | ppCell.setValue(value); 53 | arrPPCell.add(ppCell); 54 | put(name, arrPPCell); 55 | LOG.debug("PMML add DataField: " + arrPPCell.toString()); 56 | } else { 57 | PPCell ppCell = new PPCell(); 58 | ppCell.setParameterName(name); 59 | ppCell.setPredictorName(predictorName); 60 | ppCell.setValue(value); 61 | ArrayList arrPPCell = get(name); 62 | arrPPCell.add(ppCell); 63 | put(name, arrPPCell); 64 | LOG.debug("PMML add DataField: " + arrPPCell.toString()); 65 | } 66 | } 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/glm/ParamMatrix.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | * 6 | * @author girish.kathalagiri 7 | */ 8 | 9 | package pattern.model.glm; 10 | 11 | import java.io.Serializable; 12 | import java.util.ArrayList; 13 | import java.util.LinkedHashMap; 14 | 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | import org.w3c.dom.Element; 18 | import org.w3c.dom.Node; 19 | import org.w3c.dom.NodeList; 20 | 21 | import pattern.PatternException; 22 | 23 | /** 24 | * This Class represents ParamMatrix 25 | */ 26 | public class ParamMatrix extends LinkedHashMap> 27 | implements Serializable { 28 | private static final Logger LOG = LoggerFactory.getLogger(PPMatrix.class); 29 | 30 | /** 31 | * Parse the data dictionary from PMML. 32 | * 33 | * @param node_list 34 | * list of DataField nodes in the DataDictionary. 35 | * @throws pattern.PatternException 36 | */ 37 | public void parsePCell(NodeList node_list) throws PatternException { 38 | for (int i = 0; i < node_list.getLength(); i++) { 39 | Node node = node_list.item(i); 40 | 41 | if (node.getNodeType() == Node.ELEMENT_NODE) { 42 | String name = ((Element) node).getAttribute("parameterName"); 43 | String predictorName = ((Element) node).getAttribute("beta"); 44 | String df = ((Element) node).getAttribute("df"); 45 | LOG.info(name); 46 | 47 | if (!containsKey(name)) { 48 | ArrayList arrPCell; 49 | arrPCell = new ArrayList(); 50 | PCell pCell = new PCell(); 51 | pCell.setParameterName(name); 52 | pCell.setBeta(predictorName); 53 | pCell.setDf(df); 54 | arrPCell.add(pCell); 55 | put(name, arrPCell); 56 | LOG.debug("PMML add DataField: " + arrPCell.toString()); 57 | } else { 58 | PCell pCell = new PCell(); 59 | pCell.setParameterName(name); 60 | pCell.setBeta(predictorName); 61 | pCell.setDf(df); 62 | ArrayList arrPCell = get(name); 63 | arrPCell.add(pCell); 64 | put(name, arrPCell); 65 | LOG.debug("PMML add DataField: " + arrPCell.toString()); 66 | } 67 | } 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/lm/RegressionModel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.model.lm; 8 | 9 | import java.io.Serializable; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | import java.util.Map; 13 | 14 | import org.slf4j.Logger; 15 | import org.slf4j.LoggerFactory; 16 | import org.w3c.dom.Element; 17 | import org.w3c.dom.Node; 18 | import org.w3c.dom.NodeList; 19 | 20 | import pattern.PMML; 21 | import pattern.PatternException; 22 | import pattern.model.Model; 23 | import pattern.predictor.Predictor; 24 | import pattern.predictor.PredictorFactory; 25 | import storm.trident.tuple.TridentTuple; 26 | 27 | public class RegressionModel extends Model implements Serializable { 28 | /** Field LOG */ 29 | private static final Logger LOG = LoggerFactory 30 | .getLogger(RegressionModel.class); 31 | 32 | public Double intercept = 0.0; 33 | public List predictors = new ArrayList(); 34 | 35 | /** 36 | * Constructor for a RegressionModel as a standalone classifier (PMML 37 | * versions 1-3). 38 | * 39 | * @param pmml 40 | * PMML model 41 | * @throws PatternException 42 | */ 43 | public RegressionModel(PMML pmml) throws PatternException { 44 | schema = pmml.getSchema(); 45 | schema.parseMiningSchema(pmml 46 | .getNodeList("/PMML/RegressionModel/MiningSchema/MiningField")); 47 | 48 | String node_expr = "/PMML/RegressionModel/RegressionTable[1]"; 49 | Element model_node = (Element) pmml.getNodeList(node_expr).item(0); 50 | 51 | intercept = Double.valueOf(model_node.getAttribute("intercept")); 52 | LOG.debug("Intercept: " + intercept); 53 | 54 | NodeList child_nodes = model_node.getChildNodes(); 55 | 56 | for (int i = 0; i < child_nodes.getLength(); i++) { 57 | Node child = child_nodes.item(i); 58 | 59 | if (child.getNodeType() == Node.ELEMENT_NODE) { 60 | Predictor pred = PredictorFactory.getPredictor(schema, 61 | (Element) child); 62 | predictors.add(pred); 63 | LOG.debug(pred.toString()); 64 | } 65 | } 66 | } 67 | 68 | /** 69 | * Prepare to classify with this model. Called immediately before the 70 | * enclosing Operation instance is put into play processing Tuples. 71 | */ 72 | @Override 73 | public void prepare() { 74 | // not needed 75 | } 76 | 77 | /** 78 | * Classify an input tuple, returning the predicted label. 79 | * 80 | * 81 | * @param values 82 | * tuple values 83 | * @param fields 84 | * field names 85 | * @return String 86 | * @throws PatternException 87 | */ 88 | @Override 89 | public String classifyTuple(TridentTuple values) throws PatternException { 90 | Map param_map = schema.getParamMap(values); 91 | double result = intercept; 92 | 93 | for (Predictor pred : predictors) { 94 | double term = pred.calcTerm(param_map); 95 | result += term; 96 | } 97 | 98 | LOG.debug("result: " + result); 99 | 100 | return Double.toString(result); 101 | } 102 | 103 | /** @return String */ 104 | @Override 105 | public String toString() { 106 | StringBuilder buf = new StringBuilder(); 107 | 108 | if (schema != null) { 109 | buf.append(schema); 110 | buf.append("\n"); 111 | buf.append("---------"); 112 | buf.append("\n"); 113 | buf.append(predictors); 114 | buf.append("---------"); 115 | buf.append("\n"); 116 | } 117 | 118 | return buf.toString(); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/tree/Context.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.model.tree; 8 | 9 | import java.io.Serializable; 10 | import java.lang.reflect.InvocationTargetException; 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | 14 | import org.codehaus.commons.compiler.CompileException; 15 | import org.codehaus.janino.ExpressionEvaluator; 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | import org.w3c.dom.Element; 19 | 20 | import pattern.PatternException; 21 | import pattern.Schema; 22 | import pattern.XPathReader; 23 | import storm.trident.tuple.TridentTuple; 24 | 25 | public class Context implements Serializable { 26 | /** Field LOG */ 27 | private static final Logger LOG = LoggerFactory.getLogger(Context.class); 28 | 29 | public List predicates = new ArrayList(); 30 | public List> variables = new ArrayList>(); 31 | 32 | protected Boolean[] pred_eval; 33 | protected Object[] param_values; 34 | protected ExpressionEvaluator[] ee_list; 35 | 36 | /** 37 | * Make a predicate representing the decision point for a vertext in the 38 | * tree, plus the indicies for the variables it requires. Return an ID for 39 | * the predicate. 40 | * 41 | * @param schema 42 | * model schema 43 | * @param reader 44 | * XML reader 45 | * @param node 46 | * predicate node in the XML 47 | * @param params 48 | * parameter names 49 | * @return Integer 50 | * @throws PatternException 51 | */ 52 | public Integer makePredicate(Schema schema, XPathReader reader, 53 | Element node, List params) throws PatternException { 54 | String field = node.getAttribute("field"); 55 | String eval = schema.get(field).getEval(reader, node); 56 | ArrayList pred_vars = new ArrayList(); 57 | LOG.debug("eval: " + eval + " | " + params.toString()); 58 | 59 | for (String s : eval.split("[^\\w\\_]")) { 60 | s = s.trim(); 61 | 62 | if (s.length() > 0) { 63 | int var_index = params.indexOf(s); 64 | 65 | if (var_index >= 0) 66 | pred_vars.add(var_index); 67 | LOG.debug("param: " + s + " ? " + var_index + " | " 68 | + pred_vars.toString()); 69 | } 70 | } 71 | 72 | if (!predicates.contains(eval)) { 73 | predicates.add(eval); 74 | variables.add(pred_vars); 75 | LOG.debug("pred: " + eval + " ? " + predicates.toString()); 76 | } 77 | 78 | Integer predicate_id = predicates.indexOf(eval); 79 | 80 | return predicate_id; 81 | } 82 | 83 | /** 84 | * Prepare to classify with this model. Called immediately before the 85 | * enclosing Operation instance is put into play processing Tuples. 86 | * 87 | * @param schema 88 | * model schema 89 | */ 90 | public void prepare(Schema schema) { 91 | // handle the loop-invariant preparations here, 92 | // in lieu of incurring overhead for each tuple 93 | 94 | String[] param_names = schema.getParamNames(); 95 | Class[] param_types = schema.getParamTypes(); 96 | 97 | ee_list = new ExpressionEvaluator[predicates.size()]; 98 | 99 | for (int i = 0; i < predicates.size(); i++) 100 | try { 101 | ArrayList pred_vars = variables.get(i); 102 | String[] pred_param_names = new String[pred_vars.size()]; 103 | Class[] pred_param_types = new Class[pred_vars.size()]; 104 | int j = 0; 105 | 106 | for (Integer pv : pred_vars) { 107 | LOG.debug("pv: " + pv + " name: " + param_names[pv] 108 | + " type: " + param_types[pv]); 109 | pred_param_names[j] = param_names[pv]; 110 | pred_param_types[j++] = param_types[pv]; 111 | } 112 | 113 | LOG.debug("eval: " + predicates.get(i) + " param len: " 114 | + pred_vars.size() + " ? " + pred_vars); 115 | ee_list[i] = new ExpressionEvaluator(predicates.get(i), 116 | boolean.class, pred_param_names, pred_param_types, 117 | new Class[0], null); 118 | } catch (NullPointerException exception) { 119 | String message = String.format("predicate [ %s ] failed", 120 | predicates.get(i)); 121 | LOG.error(message, exception); 122 | throw new PatternException(message, exception); 123 | } catch (CompileException exception) { 124 | String message = String.format( 125 | "predicate [ %s ] did not compile", predicates.get(i)); 126 | LOG.error(message, exception); 127 | throw new PatternException(message, exception); 128 | } 129 | 130 | param_values = new Object[schema.size()]; 131 | pred_eval = new Boolean[predicates.size()]; 132 | } 133 | 134 | /** 135 | * Evaluate a tuple of input values to generate an array of predicate values 136 | * for the tree/forest. 137 | * 138 | * @param schema 139 | * model schema 140 | * @param values 141 | * tuple values 142 | * @return Boolean[] 143 | * @throws PatternException 144 | */ 145 | public Boolean[] evalPredicates(Schema schema, TridentTuple values) 146 | throws PatternException { 147 | schema.setParamValues(values, param_values); 148 | 149 | for (int i = 0; i < predicates.size(); i++) 150 | try { 151 | ArrayList pred_vars = variables.get(i); 152 | Object[] pred_param_values = new Object[pred_vars.size()]; 153 | int j = 0; 154 | 155 | for (Integer pv : pred_vars) { 156 | LOG.debug("pv: " + pv + " value: " + param_values[pv]); 157 | pred_param_values[j++] = param_values[pv]; 158 | } 159 | 160 | pred_eval[i] = new Boolean(ee_list[i].evaluate( 161 | pred_param_values).toString()); 162 | } catch (InvocationTargetException exception) { 163 | String message = String.format( 164 | "predicate [ %s ] did not evaluate", predicates.get(i)); 165 | LOG.error(message, exception); 166 | throw new PatternException(message, exception); 167 | } 168 | 169 | return pred_eval; 170 | } 171 | 172 | /** @return String */ 173 | @Override 174 | public String toString() { 175 | StringBuilder buf = new StringBuilder(); 176 | 177 | for (String predicate : predicates) { 178 | buf.append("expr[ " + predicates.indexOf(predicate) + " ]: " 179 | + predicate); 180 | buf.append("\n"); 181 | } 182 | 183 | return buf.toString(); 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/tree/Edge.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.model.tree; 8 | 9 | import org.jgrapht.graph.DefaultEdge; 10 | 11 | public class Edge extends DefaultEdge { 12 | public Integer predicate_id = null; 13 | 14 | /** @param predicate_id */ 15 | public void setPredicateId(Integer predicate_id) { 16 | this.predicate_id = predicate_id; 17 | } 18 | 19 | /** @return */ 20 | public Integer getPredicateId() { 21 | return predicate_id; 22 | } 23 | 24 | /** @return */ 25 | @Override 26 | public String toString() { 27 | String base = super.toString(); 28 | 29 | return base + ":" + predicate_id; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/tree/Tree.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.model.tree; 8 | 9 | import java.io.Serializable; 10 | 11 | import org.jgrapht.DirectedGraph; 12 | import org.jgrapht.graph.DefaultDirectedGraph; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | 16 | public class Tree implements Serializable { 17 | /** Field LOG */ 18 | private static final Logger LOG = LoggerFactory.getLogger(Tree.class); 19 | 20 | public String tree_name; 21 | public Vertex root; 22 | public DirectedGraph graph = new DefaultDirectedGraph( 23 | Edge.class); 24 | 25 | /** @param id */ 26 | public Tree(String id) { 27 | tree_name = "tree_" + id; 28 | 29 | if (LOG.isDebugEnabled()) 30 | LOG.debug(tree_name); 31 | } 32 | 33 | /** @param root */ 34 | public void setRoot(Vertex root) { 35 | this.root = root; 36 | } 37 | 38 | /** @return */ 39 | public Vertex getRoot() { 40 | return root; 41 | } 42 | 43 | /** @return */ 44 | public DirectedGraph getGraph() { 45 | return graph; 46 | } 47 | 48 | /** 49 | * @param pred_eval 50 | * @return 51 | */ 52 | public String traverse(Boolean[] pred_eval) { 53 | return traverseVertex(root, pred_eval); 54 | } 55 | 56 | /** 57 | * @param vertex 58 | * @param pred_eval 59 | * @return 60 | */ 61 | protected String traverseVertex(Vertex vertex, Boolean[] pred_eval) { 62 | String score = vertex.getScore(); 63 | 64 | if (score != null) { 65 | if (LOG.isDebugEnabled()) 66 | LOG.debug(" then " + score); 67 | 68 | return score; 69 | } 70 | 71 | for (Edge edge : graph.outgoingEdgesOf(vertex)) { 72 | if (LOG.isDebugEnabled()) { 73 | LOG.debug(edge.toString()); 74 | LOG.debug(" if pred_eval[ " + edge.getPredicateId() + " ]:" 75 | + pred_eval[edge.getPredicateId()]); 76 | } 77 | 78 | if (pred_eval[edge.getPredicateId()]) { 79 | score = traverseVertex(graph.getEdgeTarget(edge), pred_eval); 80 | 81 | if (score != null) { 82 | return score; 83 | } 84 | } 85 | } 86 | 87 | return null; 88 | } 89 | 90 | /** @return */ 91 | @Override 92 | public String toString() { 93 | return tree_name + ": " + graph; 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/tree/TreeModel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.model.tree; 8 | 9 | import java.io.Serializable; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | import javax.xml.xpath.XPathConstants; 14 | 15 | import org.jgrapht.DirectedGraph; 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | import org.w3c.dom.Element; 19 | import org.w3c.dom.Node; 20 | import org.w3c.dom.NodeList; 21 | 22 | import pattern.PMML; 23 | import pattern.PatternException; 24 | import pattern.Schema; 25 | import pattern.model.Model; 26 | import storm.trident.tuple.TridentTuple; 27 | 28 | public class TreeModel extends Model implements Serializable { 29 | /** Field LOG */ 30 | private static final Logger LOG = LoggerFactory.getLogger(TreeModel.class); 31 | 32 | public Context context = null; 33 | public Tree tree; 34 | 35 | /** 36 | * Constructor for a TreeModel as a standalone classifier (PMML versions 37 | * 1-3). 38 | * 39 | * @param pmml 40 | * PMML model 41 | * @throws PatternException 42 | */ 43 | public TreeModel(PMML pmml) throws PatternException { 44 | schema = pmml.getSchema(); 45 | context = new Context(); 46 | 47 | schema.parseMiningSchema(pmml 48 | .getNodeList("/PMML/TreeModel/MiningSchema/MiningField")); 49 | tree = new Tree("default"); 50 | 51 | String node_expr = "./TreeModel/Node[1]"; 52 | NodeList root_node = pmml.getNodeList(node_expr); 53 | 54 | buildTree(pmml, context, (Element) root_node.item(0), tree); 55 | } 56 | 57 | /** 58 | * Constructor for a TreeModel as part of an ensemble (PMML verion 4+), such 59 | * as in Random Forest. 60 | * 61 | * @param pmml 62 | * PMML model 63 | * @param context 64 | * tree context 65 | * @param parent 66 | * parent node in the XML 67 | * @throws PatternException 68 | */ 69 | public TreeModel(PMML pmml, Context context, Node parent) 70 | throws PatternException { 71 | String id = ((Element) parent).getAttribute("id"); 72 | tree = new Tree(id); 73 | 74 | String node_expr = "./TreeModel/Node[1]"; 75 | NodeList root_node = (NodeList) pmml.getReader().read(parent, 76 | node_expr, XPathConstants.NODESET); 77 | 78 | buildTree(pmml, context, (Element) root_node.item(0), tree); 79 | } 80 | 81 | /** 82 | * Prepare to classify with this model. Called immediately before the 83 | * enclosing Operation instance is put into play processing Tuples. 84 | */ 85 | @Override 86 | public void prepare() { 87 | context.prepare(schema); 88 | } 89 | 90 | /** 91 | * Classify an input tuple, returning the predicted label. 92 | * 93 | * 94 | * @param values 95 | * tuple values 96 | * @param fields 97 | * @return String 98 | * @throws PatternException 99 | */ 100 | @Override 101 | public String classifyTuple(TridentTuple values) throws PatternException { 102 | // TODO 103 | return "null"; 104 | } 105 | 106 | /** 107 | * Generate a serializable graph representation for a tree. 108 | * 109 | * @param pmml 110 | * PMML model 111 | * @param shared_context 112 | * tree context 113 | * @param root 114 | * root node in the XML 115 | * @param tree 116 | * serializable tree structure 117 | * @throws PatternException 118 | */ 119 | public void buildTree(PMML pmml, Context shared_context, Element root, 120 | Tree tree) throws PatternException { 121 | Vertex vertex = makeVertex(root, tree.getGraph()); 122 | tree.setRoot(vertex); 123 | 124 | buildNode(pmml, shared_context, root, vertex, tree.getGraph()); 125 | } 126 | 127 | /** 128 | * @param pmml 129 | * PMML model 130 | * @param shared_context 131 | * tree context 132 | * @param node 133 | * predicate node in the XML 134 | * @param vertex 135 | * tree vertex 136 | * @param graph 137 | * serializable graph 138 | * @throws PatternException 139 | */ 140 | protected void buildNode(PMML pmml, Context shared_context, Element node, 141 | Vertex vertex, DirectedGraph graph) 142 | throws PatternException { 143 | // build a list of parameters from which the predicate will be evaluated 144 | 145 | Schema schema = pmml.getSchema(); 146 | String[] param_names = schema.getParamNames(); 147 | List params = new ArrayList(); 148 | 149 | for (int i = 0; i < param_names.length; i++) 150 | params.add(param_names[i]); 151 | 152 | // walk the node list to construct serializable predicates 153 | 154 | NodeList child_nodes = node.getChildNodes(); 155 | 156 | for (int i = 0; i < child_nodes.getLength(); i++) { 157 | Node child = child_nodes.item(i); 158 | 159 | if (child.getNodeType() == Node.ELEMENT_NODE) { 160 | if ("SimplePredicate".equals(child.getNodeName()) 161 | || "SimpleSetPredicate".equals(child.getNodeName())) { 162 | Integer predicate_id = shared_context.makePredicate(schema, 163 | pmml.getReader(), (Element) child, params); 164 | 165 | if (node.hasAttribute("score")) { 166 | String score = (node).getAttribute("score"); 167 | vertex.setScore(score); 168 | } 169 | 170 | for (Edge e : graph.edgesOf(vertex)) 171 | e.setPredicateId(predicate_id); 172 | } else if ("Node".equals(child.getNodeName())) { 173 | Vertex child_vertex = makeVertex((Element) child, graph); 174 | Edge edge = graph.addEdge(vertex, child_vertex); 175 | 176 | buildNode(pmml, shared_context, (Element) child, 177 | child_vertex, graph); 178 | } 179 | } 180 | } 181 | } 182 | 183 | /** 184 | * @param node 185 | * predicate node in the XML 186 | * @param graph 187 | * serializable graph 188 | * @return Vertex 189 | */ 190 | protected Vertex makeVertex(Element node, DirectedGraph graph) { 191 | String id = (node).getAttribute("id"); 192 | Vertex vertex = new Vertex(id); 193 | graph.addVertex(vertex); 194 | 195 | return vertex; 196 | } 197 | 198 | /** @return String */ 199 | @Override 200 | public String toString() { 201 | StringBuilder buf = new StringBuilder(); 202 | 203 | if (schema != null) { 204 | buf.append(schema); 205 | buf.append("\n"); 206 | buf.append("---------"); 207 | buf.append("\n"); 208 | } 209 | 210 | if (context != null) { 211 | buf.append(context); 212 | buf.append("\n"); 213 | buf.append("---------"); 214 | buf.append("\n"); 215 | } 216 | 217 | buf.append(tree); 218 | buf.append(tree.getRoot()); 219 | 220 | for (Edge edge : tree.getGraph().edgeSet()) 221 | buf.append(edge); 222 | 223 | buf.append("\n"); 224 | 225 | return buf.toString(); 226 | } 227 | } 228 | -------------------------------------------------------------------------------- /src/main/java/pattern/model/tree/Vertex.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.model.tree; 8 | 9 | import java.io.Serializable; 10 | 11 | public class Vertex implements Serializable { 12 | public String id; 13 | public String score = null; 14 | 15 | /** 16 | * @param id 17 | * vertex ID 18 | */ 19 | public Vertex(String id) { 20 | this.id = id; 21 | } 22 | 23 | /** 24 | * @param score 25 | * evaluated model score 26 | */ 27 | public void setScore(String score) { 28 | this.score = score; 29 | } 30 | 31 | /** @return String */ 32 | public String getScore() { 33 | return score; 34 | } 35 | 36 | /** @return String */ 37 | @Override 38 | public String toString() { 39 | if (score != null) 40 | return id + ":" + score; 41 | else 42 | return id; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/pattern/predictor/CategoricalPredictor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.predictor; 8 | 9 | import java.util.Map; 10 | 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | public class CategoricalPredictor extends Predictor { 15 | /** Field LOG */ 16 | private static final Logger LOG = LoggerFactory 17 | .getLogger(CategoricalPredictor.class); 18 | 19 | public Integer value; 20 | 21 | /** 22 | * @param name 23 | * name of the DataField used by this term 24 | * @param coefficient 25 | * coefficient for the term 26 | * @param value 27 | * value for the category 28 | */ 29 | public CategoricalPredictor(String name, Double coefficient, Integer value) { 30 | this.name = name; 31 | this.coefficient = coefficient; 32 | this.value = value; 33 | } 34 | 35 | /** 36 | * Calculate the value for the term based on this Predictor. 37 | * 38 | * @param param_map 39 | * tuples names/values 40 | * @return double 41 | */ 42 | @Override 43 | public double calcTerm(Map param_map) { 44 | double result = 0.0; 45 | int cat = (Integer) param_map.get(name); 46 | 47 | if (value == cat) 48 | result = coefficient; 49 | 50 | LOG.debug(String.format("calc: %s, %d, %d, %e", name, value, cat, 51 | result)); 52 | 53 | return result; 54 | } 55 | 56 | /** @return String */ 57 | @Override 58 | public String toString() { 59 | return String.format("CategoricalPredictor: %s, %d, %e", name, value, 60 | coefficient); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/pattern/predictor/NumericPredictor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.predictor; 8 | 9 | import java.util.Map; 10 | 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | public class NumericPredictor extends Predictor { 15 | /** Field LOG */ 16 | private static final Logger LOG = LoggerFactory 17 | .getLogger(NumericPredictor.class); 18 | 19 | public Double exponent; 20 | 21 | /** 22 | * @param name 23 | * name of the DataField used by this term 24 | * @param coefficient 25 | * coefficient for the term 26 | * @param exponent 27 | * exponent for the term 28 | */ 29 | public NumericPredictor(String name, Double coefficient, Double exponent) { 30 | this.name = name; 31 | this.coefficient = coefficient; 32 | this.exponent = exponent; 33 | } 34 | 35 | /** 36 | * Calculate the value for the term based on this Predictor. 37 | * 38 | * @param param_map 39 | * tuples names/values 40 | * @return double 41 | */ 42 | @Override 43 | public double calcTerm(Map param_map) { 44 | double value = (Double) param_map.get(name); 45 | double result = Math.pow(value, exponent) * coefficient; 46 | 47 | LOG.debug(String.format("calc: %s, %e, %e, %e, %e", name, value, 48 | exponent, coefficient, result)); 49 | 50 | return result; 51 | } 52 | 53 | /** @return String */ 54 | @Override 55 | public String toString() { 56 | return String.format("NumericPredictor: %s, %e, %e", name, exponent, 57 | coefficient); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/pattern/predictor/Predictor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.predictor; 8 | 9 | import java.io.Serializable; 10 | import java.util.Map; 11 | 12 | public abstract class Predictor implements Serializable { 13 | public String name; 14 | public Double coefficient; 15 | 16 | /** 17 | * Calculate the value for the term based on this Predictor. 18 | * 19 | * @param param_map 20 | * tuples names/values 21 | * @return double 22 | */ 23 | public abstract double calcTerm(Map param_map); 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/pattern/predictor/PredictorFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.predictor; 8 | 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | import org.w3c.dom.Element; 12 | 13 | import pattern.PatternException; 14 | import pattern.Schema; 15 | import pattern.datafield.CategoricalDataField; 16 | 17 | public class PredictorFactory { 18 | /** Field LOG */ 19 | private static final Logger LOG = LoggerFactory 20 | .getLogger(PredictorFactory.class); 21 | 22 | /** 23 | * Create the appropriate Predictor object based on parsing the predictor 24 | * terms (IV) of a regression model. 25 | * 26 | * @param schema 27 | * model schema 28 | * @param node 29 | * predictor node in the XML 30 | * @return Predictor 31 | * @throws PatternException 32 | */ 33 | public static Predictor getPredictor(Schema schema, Element node) 34 | throws PatternException { 35 | Predictor pred = null; 36 | String name = node.getAttribute("name"); 37 | 38 | if ("NumericPredictor".equals(node.getNodeName())) { 39 | // 41 | String exponent_text = node.getAttribute("exponent"); 42 | Double exponent = new Double(1.0); 43 | 44 | if (exponent_text != null) 45 | exponent = Double.valueOf(exponent_text); 46 | 47 | Double coefficient = Double.valueOf(node 48 | .getAttribute("coefficient")); 49 | pred = new NumericPredictor(name, coefficient, exponent); 50 | } else if ("CategoricalPredictor".equals(node.getNodeName())) { 51 | // 53 | CategoricalDataField df = (CategoricalDataField) schema.get(name); 54 | Integer value = df.categories.indexOf(node.getAttribute("value")); 55 | Double coefficient = Double.valueOf(node 56 | .getAttribute("coefficient")); 57 | pred = new CategoricalPredictor(name, coefficient, value); 58 | } else { 59 | String message = String.format("unsupported Predictor type %s", 60 | node.getNodeName()); 61 | LOG.error(message); 62 | throw new PatternException(message); 63 | } 64 | 65 | return pred; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/py/gen_orders.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import random 5 | import sys 6 | import uuid 7 | 8 | 9 | debug = False # True 10 | 11 | CUSTOMER_SEGMENTS = ( 12 | [0.2, ["0", random.gauss, 0.25, 0.75, "%0.2f"]], 13 | [0.8, ["0", random.gauss, 1.5, 0.25, "%0.2f"]], 14 | [0.9, ["1", random.gauss, 0.6, 0.2, "%0.2f"]], 15 | [1.0, ["1", random.gauss, 0.75, 0.2, "%0.2f"]] 16 | ) 17 | 18 | def gen_row (segments, num_col): 19 | coin_flip = random.random() 20 | 21 | for prob, rand_var in segments: 22 | if debug: 23 | print coin_flip, prob 24 | 25 | if coin_flip <= prob: 26 | (label, dist, mean, sigma, format) = rand_var 27 | order_id = str(uuid.uuid1()).split("-")[0] 28 | return [label] + map(lambda x: format % dist(mean, sigma), range(0, num_col)) + [order_id] 29 | 30 | 31 | if __name__ == '__main__': 32 | num_row = int(sys.argv[1]) 33 | num_col = int(sys.argv[2]) 34 | 35 | print "\t".join(["label"] + map(lambda x: "v" + str(x), range(0, num_col)) + ["order_id"]) 36 | 37 | for i in range(0, num_row): 38 | print "\t".join(gen_row(CUSTOMER_SEGMENTS, num_col)) 39 | -------------------------------------------------------------------------------- /src/py/rf_eval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | from lxml import etree 5 | import errno 6 | import sys 7 | from xml.dom.minidom import parse, parseString 8 | 9 | 10 | ###################################################################### 11 | ## global definitions 12 | 13 | debug = False # True 14 | 15 | ns = { 16 | 'xsi': 'http://www.w3.org/2001/XMLSchema-instance' 17 | } 18 | 19 | textOps = { 20 | "lessOrEqual": "<=", 21 | "greaterThan": ">" 22 | } 23 | 24 | predicates = [] 25 | 26 | 27 | 28 | def resolve_tree (segment, dv_name, iv_list): 29 | tree_model = segment.getElementsByTagName("TreeModel")[0] 30 | tree_id = segment.attributes["id"].value 31 | def_name = "tree_" + tree_id 32 | 33 | print "def " + def_name + " ():" 34 | 35 | node = tree_model.getElementsByTagName("Node")[0] 36 | resolve_node(tree_id, node, 0, []) 37 | 38 | return def_name 39 | 40 | 41 | def resolve_node (tree_id, node, depth, trail): 42 | indent = get_indent(depth) 43 | 44 | try: 45 | score = node.attributes["score"].value 46 | except KeyError: 47 | score = None 48 | 49 | if debug: 50 | print indent, "node", node.attributes["id"].value, score 51 | 52 | expr_ind = None 53 | 54 | for child_node in node.childNodes: 55 | if child_node.nodeName == "SimplePredicate": 56 | expr_ind, predicate = compose_predicate(child_node) 57 | print indent, predicate 58 | 59 | elif child_node.nodeName == "Node": 60 | resolve_node(tree_id, child_node, depth + 1, trail + [expr_ind]) 61 | 62 | if score: 63 | indent = get_indent(depth + 1) 64 | print indent, " ".join(["return", score]) 65 | print indent, "#", tree_id, score, filter(lambda x: x >= 0, trail + [expr_ind]) 66 | print 67 | 68 | 69 | def get_indent (depth): 70 | return " ".join(map(lambda x: " ", range(0, depth))) 71 | 72 | 73 | def compose_predicate (predicate): 74 | field = predicate.attributes["field"].value 75 | operator = predicate.attributes["operator"].value 76 | value = predicate.attributes["value"].value 77 | 78 | # 79 | 80 | expression = " ".join([field, textOps[operator], value]) 81 | 82 | if expression not in predicates: 83 | predicates.append(expression) 84 | 85 | expr_ind = predicates.index(expression) 86 | condition = " ".join(["if", "expr[", str(expr_ind), "]:"]) 87 | 88 | return expr_ind, condition 89 | 90 | 91 | if __name__ == "__main__": 92 | file_model = sys.argv[1] 93 | dom = parse(file_model) 94 | 95 | ## generate code for the preamble 96 | 97 | print "import sys" 98 | print 99 | 100 | ## determine the data dictionary 101 | 102 | data_dict = dom.getElementsByTagName("DataDictionary")[0] 103 | 104 | for data_field in data_dict.getElementsByTagName("DataField"): 105 | # 106 | 107 | if debug: 108 | print data_field.attributes["name"].value 109 | 110 | mining_model = dom.getElementsByTagName("MiningModel")[0] 111 | 112 | if debug: 113 | print mining_model.attributes["modelName"].value 114 | 115 | ## determine the input schema 116 | 117 | dv_name = None 118 | iv_list = [] 119 | mining_schema = mining_model.getElementsByTagName("MiningSchema")[0] 120 | 121 | for mining_field in mining_schema.getElementsByTagName("MiningField"): 122 | # 123 | 124 | if mining_field.attributes["usageType"].value == "predicted": 125 | dv_name = mining_field.attributes["name"].value 126 | elif mining_field.attributes["usageType"].value == "active": 127 | iv_list.append(mining_field.attributes["name"].value) 128 | 129 | ## generate code for each tree 130 | 131 | def_list = [] 132 | segmentation = mining_model.getElementsByTagName("Segmentation")[0] 133 | 134 | for segment in segmentation.getElementsByTagName("Segment"): 135 | def_name = resolve_tree(segment, dv_name, iv_list) 136 | def_list.append(def_name) 137 | 138 | ## classify each input tuple 139 | 140 | indent = get_indent(1) 141 | input_tuple = "( " + ", ".join([dv_name] + iv_list) + " )" 142 | 143 | print "status = { '00': 'TN', '11': 'TP', '10': 'FN', '01': 'FP' }" 144 | print "confuse = { 'TN': 0, 'TP': 0, 'FN': 0, 'FP': 0 }" 145 | print "count = -1" 146 | print 147 | print "for line in sys.stdin:" 148 | print indent, "count += 1" 149 | 150 | print 151 | print indent, "if count > 0:" 152 | indent = get_indent(2) 153 | 154 | print indent, input_tuple + " = map(lambda x: float(x), line.strip().split('\\t')[0:" + str(len(iv_list) + 1) + "] )" 155 | print indent, "vote = []" 156 | print indent, "expr = []" 157 | 158 | for expression in predicates: 159 | print indent, "expr.append( " + expression + " )" 160 | 161 | input_tuple = "( " + ", ".join(iv_list) + " )" 162 | 163 | for def_name in def_list: 164 | print indent, "vote.append( " + def_name + "() )" 165 | 166 | print indent, "predict = sum(vote) / float(len(vote))" 167 | print indent, "print " + ", ".join(iv_list) 168 | print indent, "print vote" 169 | print indent, "print label, predict, label == predict" 170 | print indent, "x = str(int(label)) + str(int(predict))" 171 | print indent, "confuse[status[x]] += 1" 172 | 173 | print 174 | print "print confuse" 175 | print "print 'TN', confuse['TN'] / float(confuse['TN'] + confuse['FP'])" 176 | print "print 'FP', confuse['FP'] / float(confuse['TN'] + confuse['FP'])" 177 | print "print 'TP', confuse['TP'] / float(confuse['TP'] + confuse['FN'])" 178 | print "print 'FN', confuse['FN'] / float(confuse['TP'] + confuse['FN'])" 179 | -------------------------------------------------------------------------------- /src/r/pmml_models.R: -------------------------------------------------------------------------------- 1 | ## uncomment the following two lines to install the required libraries 2 | #install.packages("pmml") 3 | #install.packages("randomForest") 4 | 5 | library(pmml) 6 | library(randomForest) 7 | 8 | ## load the "baseline" reference data 9 | 10 | dat_folder <- './data' 11 | data <- read.table(file=paste(dat_folder, "orders.tsv", sep="/"), sep="\t", quote="", na.strings="NULL", header=TRUE, encoding="UTF8") 12 | 13 | dim(data) 14 | head(data) 15 | 16 | ## split data into test and train sets 17 | 18 | set.seed(71) 19 | split_ratio <- 2/10 20 | split <- round(dim(data)[1] * split_ratio) 21 | 22 | data_tests <- data[1:split,] 23 | dim(data_tests) 24 | print(table(data_tests[,"label"])) 25 | 26 | data_train <- data[(split + 1):dim(data)[1],] 27 | i <- colnames(data_train) == "order_id" 28 | j <- 1:length(i) 29 | data_train <- data_train[,-j[i]] 30 | dim(data_train) 31 | 32 | ## train a RandomForest model 33 | 34 | f <- as.formula("as.factor(label) ~ .") 35 | fit <- randomForest(f, data_train, ntree=2) 36 | 37 | ## test the model on the holdout test set 38 | 39 | print(fit$importance) 40 | print(fit) 41 | 42 | predicted <- predict(fit, data) 43 | data$predict <- predicted 44 | confuse <- table(pred = predicted, true = data[,1]) 45 | print(confuse) 46 | 47 | ## export predicted labels to TSV 48 | 49 | write.table(data, file=paste(dat_folder, "sample.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE) 50 | 51 | ## export RF model to PMML 52 | 53 | saveXML(pmml(fit), file=paste(dat_folder, "sample.rf.xml", sep="/")) 54 | -------------------------------------------------------------------------------- /src/r/rattle_pmml.R: -------------------------------------------------------------------------------- 1 | ## uncomment the following lines to install required libraries 2 | #install.packages("pmml") 3 | #install.packages("randomForest") 4 | #install.packages("rpart.plot") 5 | #install.packages("nnet") 6 | #install.packages("kernlab") 7 | #install.packages("arules") 8 | #install.packages("arulesViz") 9 | 10 | require(graphics) 11 | library(pmml) 12 | library(randomForest) 13 | library(rpart) 14 | library(rpart.plot) 15 | library(nnet) 16 | library(XML) 17 | library(kernlab) 18 | library(arules) 19 | library(arulesViz) 20 | 21 | dat_folder <- './data' 22 | COPY <- "Copyright (c)2012, Concurrent, Inc. (www.concurrentinc.com)" 23 | 24 | ## split data into test and train sets 25 | 26 | data(iris) 27 | iris_full <- iris 28 | colnames(iris_full) <- c("sepal_length", "sepal_width", "petal_length", "petal_width", "species") 29 | 30 | idx <- sample(150, 100) 31 | iris_train <- iris_full[idx,] 32 | iris_test <- iris_full[-idx,] 33 | 34 | 35 | ## train a Random Forest model 36 | ## example: http://mkseo.pe.kr/stats/?p=220 37 | print("model: Random Forest") 38 | 39 | f <- as.formula("as.factor(species) ~ .") 40 | fit <- randomForest(f, data=iris_train, proximity=TRUE, ntree=50) 41 | 42 | print(fit$importance) 43 | print(fit) 44 | print(table(iris_test$species, predict(fit, iris_test, type="class"))) 45 | 46 | plot(fit, log="y", main="Random Forest") 47 | varImpPlot(fit) 48 | MDSplot(fit, iris_full$species) 49 | 50 | out <- iris_full 51 | out$predict <- predict(fit, out, type="class") 52 | 53 | write.table(out, file=paste(dat_folder, "iris.rf.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE) 54 | saveXML(pmml(fit, copyright=COPY), file=paste(dat_folder, "iris.rf.xml", sep="/")) 55 | 56 | 57 | ## train a Linear Regression predictive model 58 | ## example: http://www2.warwick.ac.uk/fac/sci/moac/people/students/peter_cock/r/iris_lm/ 59 | print("model: Linear Regression - predictive model") 60 | 61 | f <- as.formula("sepal_length ~ .") 62 | fit <- lm(f, data=iris_train) 63 | 64 | print(summary(fit)) 65 | print(table(round(iris_test$sepal_length), round(predict(fit, iris_test)))) 66 | 67 | op <- par(mfrow = c(3, 2)) 68 | plot(predict(fit), main="Linear Regression") 69 | plot(iris_full$petal_length, iris_full$petal_width, pch=21, bg=c("red", "green3", "blue")[unclass(iris_full$species)], main="Edgar Anderson's Iris Data", xlab="petal length", ylab="petal width") 70 | plot(fit) 71 | par(op) 72 | 73 | out <- iris_full 74 | out$predict <- predict(fit, out) 75 | 76 | write.table(out, file=paste(dat_folder, "iris.lm_p.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE) 77 | saveXML(pmml(fit, copyright=COPY), file=paste(dat_folder, "iris.lm_p.xml", sep="/")) 78 | 79 | 80 | ## train a Recursive Partition classification tree 81 | ## example: http://www.r-bloggers.com/example-9-10-more-regression-trees-and-recursive-partitioning-with-partykit/ 82 | print("model: Recursive Partition") 83 | 84 | f <- as.formula("species ~ .") 85 | fit <- rpart(f, data=iris_train) 86 | 87 | print(fit) 88 | print(summary(fit)) 89 | print(table(iris_test$species, predict(fit, iris_test, type="class"))) 90 | 91 | op <- par(mfrow = c(2, 1)) 92 | prp(fit, extra=1, uniform=F, branch=1, yesno=F, border.col=0, xsep="/", main="Recursive Partition") 93 | par(op) 94 | 95 | out <- iris_full 96 | out$predict <- predict(fit, out, type="class") 97 | 98 | write.table(out, file=paste(dat_folder, "iris.rpart.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE) 99 | saveXML(pmml(fit, copyright=COPY), file=paste(dat_folder, "iris.rpart.xml", sep="/")) 100 | 101 | 102 | ## train a single hidden-layer Neural Network 103 | ## example: http://statisticsr.blogspot.com/2008/10/notes-for-nnet.html 104 | print("model: Neural Network") 105 | 106 | samp <- c(sample(1:50,25), sample(51:100,25), sample(101:150,25)) 107 | 108 | ird <- data.frame(rbind(iris3[,,1], iris3[,,2], iris3[,,3]), 109 | species = factor(c(rep("setosa",50), rep("versicolor", 50), rep("virginica", 50)))) 110 | 111 | f <- as.formula("species ~ .") 112 | fit <- nnet(f, data=ird, subset=samp, size=2, rang=0.1, decay=5e-4, maxit=200) 113 | 114 | print(fit) 115 | print(summary(fit)) 116 | print(table(ird$species[-samp], predict(fit, ird[-samp,], type = "class"))) 117 | 118 | out <- ird 119 | out$predict <- predict(fit, ird, type="class") 120 | 121 | write.table(out, file=paste(dat_folder, "iris.nn.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE) 122 | saveXML(pmml(fit, copyright=COPY), file=paste(dat_folder, "iris.nn.xml", sep="/")) 123 | 124 | 125 | ## train a Multinomial Regression model 126 | ## example: http://www.jameskeirstead.ca/r/how-to-multinomial-regression-models-in-r/ 127 | print("model: Multinomial Regression") 128 | 129 | f <- as.formula("species ~ .") 130 | fit <- multinom(f, data=iris_train) 131 | 132 | print(summary(fit)) 133 | print(table(iris_test$species, predict(fit, iris_test))) 134 | 135 | out <- iris_full 136 | out$predict <- predict(fit, out, type="class") 137 | 138 | write.table(out, file=paste(dat_folder, "iris.multinom.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE) 139 | saveXML(pmml(fit, dataset=iris_train, copyright=COPY), file=paste(dat_folder, "iris.multinom.xml", sep="/")) 140 | 141 | 142 | ## train a K-Means clustering model 143 | ## example: http://mkseo.pe.kr/stats/?p=15 144 | print("model: K-Means Clustering") 145 | 146 | ds <- iris_full[,-5] 147 | fit <- kmeans(ds, 3) 148 | 149 | print(fit) 150 | print(summary(fit)) 151 | print(table(fit$cluster, iris_full$species)) 152 | 153 | op <- par(mfrow = c(1, 1)) 154 | plot(iris_full$sepal_length, iris_full$sepal_width, pch = 23, bg = c("blue", "red", "green")[fit$cluster], main="K-Means Clustering") 155 | points(fit$centers[,c(1, 2)], col=1:3, pch=8, cex=2) 156 | par(op) 157 | 158 | out <- iris_full 159 | out$predict <- fit$cluster 160 | 161 | write.table(out, file=paste(dat_folder, "iris.kmeans.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE) 162 | saveXML(pmml(fit, copyright=COPY), file=paste(dat_folder, "iris.kmeans.xml", sep="/")) 163 | 164 | 165 | ## train a Hierarchical Clustering model 166 | ## example: http://mkseo.pe.kr/stats/?p=15 167 | print("model: Hierarchical Clustering") 168 | 169 | i = as.matrix(iris_full[,-5]) 170 | fit <- hclust(dist(i), method = "average") 171 | 172 | initial <- tapply(i, list(rep(cutree(fit, 3), ncol(i)), col(i)), mean) 173 | dimnames(initial) <- list(NULL, dimnames(i)[[2]]) 174 | kls = cutree(fit, 3) 175 | 176 | print(fit) 177 | print(table(iris_full$species, kls)) 178 | 179 | op <- par(mfrow = c(1, 1)) 180 | plclust(fit, main="Hierarchical Clustering") 181 | par(op) 182 | 183 | out <- iris_full 184 | out$predict <- kls 185 | 186 | write.table(out, file=paste(dat_folder, "iris.hc.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE) 187 | saveXML(pmml(fit, data=iris, centers=initial, copyright=COPY), file=paste(dat_folder, "iris.hc.xml", sep="/")) 188 | 189 | 190 | ## train a General Linear Regression model (in this case, Logistic Regression) 191 | ## example: http://www.stat.cmu.edu/~cshalizi/490/clustering/clustering01.r 192 | print("model: Logistic Regression") 193 | 194 | myiris <- cbind(iris_full, setosa=ifelse(iris_full$species=="setosa", 1, 0)) 195 | myiris <- cbind(myiris, versicolor=ifelse(iris_full$species=="versicolor", 1, 0)) 196 | myiris <- cbind(myiris, virginica=ifelse(iris_full$species=="virginica", 1, 0)) 197 | myiris <- myiris[,-5] # drop the old labels 198 | 199 | f <- as.formula("setosa ~ sepal_length + sepal_width + petal_length + petal_width") 200 | fit <- glm(f, family=binomial, data=myiris) 201 | 202 | print(summary(fit)) 203 | print(table(cbind(round(fitted(fit)), myiris$setosa))) 204 | 205 | op <- par(mfrow = c(3, 2)) 206 | cols=c(1, 2) 207 | plot(iris_full[,cols], type="n") 208 | points(iris_full[iris_full$species=="setosa", cols], col=1, pch="*") 209 | points(iris_full[iris_full$species=="versicolor", cols], col=2, pch="*") 210 | points(iris_full[iris_full$species=="virginica", cols], col=3, pch="*") 211 | 212 | cols=c(3, 4) 213 | plot(iris_full[,cols], type="n") 214 | points(iris_full[iris_full$species=="setosa", cols], col=1, pch="*") 215 | points(iris_full[iris_full$species=="versicolor", cols], col=2, pch="*") 216 | points(iris_full[iris_full$species=="virginica", cols], col=3, pch="*") 217 | 218 | plot(fit) 219 | par(op) 220 | 221 | out <- iris_full 222 | out$predict <- round(fitted(fit)) 223 | 224 | write.table(out, file=paste(dat_folder, "iris.glm.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE) 225 | saveXML(pmml(fit, copyright=COPY), file=paste(dat_folder, "iris.glm.xml", sep="/")) 226 | 227 | 228 | ## train a Support Vector Machine model 229 | ## example: https://support.zementis.com/entries/21176632-what-types-of-svm-models-built-in-r-can-i-export-to-pmml 230 | print("model: Support Vector Machine") 231 | 232 | f <- as.formula("species ~ .") 233 | fit <- ksvm(f, data=iris_train, kernel="rbfdot", prob.model=TRUE) 234 | 235 | print(fit) 236 | print(table(iris_test$species, predict(fit, iris_test))) 237 | 238 | out <- iris_full 239 | out$predict <- predict(fit, out) 240 | 241 | write.table(out, file=paste(dat_folder, "iris.svm.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE) 242 | saveXML(pmml(fit, dataset=iris_train, copyright=COPY), file=paste(dat_folder, "iris.svm.xml", sep="/")) 243 | 244 | 245 | ## train an Association Rules model 246 | ## example: http://jmlr.csail.mit.edu/papers/volume12/hahsler11a/hahsler11a.pdf 247 | print("model: Association Rules") 248 | 249 | data("Groceries") 250 | rules <- apriori(Groceries, parameter = list(supp = 0.001, conf = 0.8)) 251 | 252 | print(rules) 253 | print(summary(rules)) 254 | rules_high_lift <- head(sort(rules, by="lift"), 4) 255 | print(inspect(rules_high_lift)) 256 | 257 | #plot(rules, control=list(jitter=2)) 258 | plot(rules_high_lift, method="graph", control=list(type="items")) 259 | itemFrequencyPlot(Groceries, support = 0.05, cex.names=0.8) 260 | 261 | rules_high_lift <- head(sort(rules, by="confidence"), 10) 262 | 263 | #WRITE(Groceries, file=paste(dat_folder, "groc.arules.csv", sep="/"), sep=",") 264 | saveXML(pmml(rules_high_lift, copyright=COPY), file=paste(dat_folder, "groc.arules.xml", sep="/")) 265 | 266 | 267 | ## TODO: 268 | ## pmml.rsf 269 | -------------------------------------------------------------------------------- /src/r/rf_pmml.R: -------------------------------------------------------------------------------- 1 | ## uncomment the following two lines to install the required libraries 2 | #install.packages("pmml") 3 | #install.packages("randomForest") 4 | 5 | library(pmml) 6 | library(randomForest) 7 | 8 | ## load the "baseline" reference data 9 | 10 | dat_folder <- '.' 11 | data <- read.table(file=paste(dat_folder, "orders.tsv", sep="/"), sep="\t", quote="", na.strings="NULL", header=TRUE, encoding="UTF8") 12 | 13 | dim(data) 14 | head(data) 15 | 16 | ## split data into test and train sets 17 | 18 | set.seed(71) 19 | split_ratio <- 2/10 20 | split <- round(dim(data)[1] * split_ratio) 21 | 22 | data_tests <- data[1:split,] 23 | dim(data_tests) 24 | print(table(data_tests[,"label"])) 25 | 26 | data_train <- data[(split + 1):dim(data)[1],] 27 | i <- colnames(data_train) == "order_id" 28 | j <- 1:length(i) 29 | data_train <- data_train[,-j[i]] 30 | dim(data_train) 31 | 32 | ## train a RandomForest model 33 | 34 | f <- as.formula("as.factor(label) ~ .") 35 | fit <- randomForest(f, data_train, ntree=2) 36 | 37 | ## test the model on the holdout test set 38 | 39 | print(fit$importance) 40 | print(fit) 41 | 42 | predicted <- predict(fit, data) 43 | data$predict <- predicted 44 | confuse <- table(pred = predicted, true = data[,1]) 45 | print(confuse) 46 | 47 | ## export predicted labels to TSV 48 | 49 | write.table(data, file=paste(dat_folder, "huge.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE) 50 | 51 | ## export RF model to PMML 52 | 53 | saveXML(pmml(fit), file=paste(dat_folder, "huge.rf.xml", sep="/")) 54 | -------------------------------------------------------------------------------- /src/test/java/pattern/model/KMeansTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.model; 8 | 9 | import org.junit.Test; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | import pattern.Classifier; 14 | 15 | public class KMeansTest extends ModelTest { 16 | /** Field LOG */ 17 | private static final Logger LOG = LoggerFactory.getLogger(KMeansTest.class); 18 | 19 | /** 20 | * evaluate sample model + data from temp files 21 | * 22 | * @throws Exception 23 | */ 24 | @Test 25 | public void testMain() throws Exception { 26 | String pmml_file = makeFile("km_test", ".xml", pmml_text); 27 | String data_file = makeFile("km_test", ".tsv", data_text); 28 | 29 | Classifier classifier = new Classifier(pmml_file); 30 | eval_data(data_file, classifier); 31 | } 32 | 33 | protected String pmml_text = "
2013-01-10 18:44:35
6.85 3.07368421052632 5.74210526315789 2.071052631578955.006 3.428 1.462 0.2465.90161290322581 2.74838709677419 4.39354838709678 1.43387096774194
"; 34 | 35 | protected String data_text = "sepal_length\tsepal_width\tpetal_length\tpetal_width\tpredict\n5.1\t3.5\t1.4\t0.2\t2"; 36 | } 37 | -------------------------------------------------------------------------------- /src/test/java/pattern/model/ModelTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.concurrentinc.com/ 5 | */ 6 | 7 | package pattern.model; 8 | 9 | import static org.junit.Assert.assertEquals; 10 | import static org.junit.Assert.fail; 11 | 12 | import java.io.BufferedReader; 13 | import java.io.File; 14 | import java.io.FileOutputStream; 15 | import java.io.FileReader; 16 | import java.io.IOException; 17 | import java.io.PrintStream; 18 | import java.util.LinkedList; 19 | import java.util.List; 20 | 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | 24 | import pattern.Classifier; 25 | import pattern.PatternException; 26 | import storm.trident.testing.MockTridentTuple; 27 | import storm.trident.tuple.TridentTuple; 28 | 29 | public class ModelTest { 30 | /** Field LOG */ 31 | private static final Logger LOG = LoggerFactory.getLogger(ModelTest.class); 32 | 33 | /** 34 | * Create a temporary text file, used for: a PMML model source, reference 35 | * input data. 36 | * 37 | * @param base 38 | * base path in the file system 39 | * @param suffix 40 | * file suffix 41 | * @param text 42 | * text to write into the file 43 | * @return String 44 | */ 45 | protected String makeFile(String base, String suffix, String text) 46 | throws IOException { 47 | String filename = null; 48 | PrintStream out = null; 49 | 50 | try { 51 | File file = File.createTempFile(base, suffix); 52 | file.deleteOnExit(); 53 | 54 | filename = file.getCanonicalFile().toString(); 55 | 56 | if (LOG.isDebugEnabled()) 57 | LOG.debug("file: {}", filename); 58 | 59 | out = new PrintStream(new FileOutputStream(file)); 60 | out.print(text); 61 | } catch (IOException exception) { 62 | LOG.error("could not create temp file", exception); 63 | fail("cannot set up test environment"); 64 | } finally { 65 | if (out != null) { 66 | out.flush(); 67 | out.close(); 68 | } 69 | 70 | return filename; 71 | } 72 | } 73 | 74 | /** 75 | * For each tuple in the reference data -- assuming that the last field is a 76 | * predicted "label" -- present the input tuple to the model and compare the 77 | * resulting label vs. predicted as a regression test. 78 | * 79 | * @param data_file 80 | * input data for the regression test 81 | * @param classifier 82 | * Classifier object based on the PMML model 83 | * @throws IOException 84 | * @throws PatternException 85 | */ 86 | protected void eval_data(String data_file, Classifier classifier) 87 | throws IOException, PatternException { 88 | FileReader fr = new FileReader(data_file); 89 | BufferedReader br = new BufferedReader(fr); 90 | String line; 91 | int count = 0; 92 | 93 | while ((line = br.readLine()) != null) { 94 | if (count++ > 0) { 95 | // for each tuple in the reference data, assuming that the 96 | // predicted "label" is in the last field... 97 | 98 | String[] test_vector = line.split("\\t"); 99 | String predicted = test_vector[test_vector.length - 1]; 100 | 101 | int i = 1; 102 | List tempList = new LinkedList(); 103 | for (String key : classifier.model.schema.keySet()) 104 | tempList.add(Double.parseDouble(test_vector[i++])); 105 | 106 | TridentTuple values = new MockTridentTuple(new LinkedList( 107 | classifier.model.schema.keySet()), tempList); 108 | 109 | // compare classifier label vs. predicted 110 | 111 | classifier.prepare(); 112 | 113 | String label = classifier.classifyTuple(values); 114 | LOG.debug(values.toString() + " predicted: " + predicted 115 | + " score: " + label); 116 | 117 | if (!predicted.equals(label)) { 118 | StringBuilder sb = new StringBuilder(); 119 | 120 | sb.append( 121 | String.format( 122 | "regression: classifier label [ %s ] does not match predicted [ %s ]\n", 123 | label, predicted)).append(line); 124 | 125 | fail(sb.toString()); 126 | } 127 | 128 | assertEquals("Label", predicted, label); 129 | } 130 | } 131 | 132 | fr.close(); 133 | } 134 | } 135 | --------------------------------------------------------------------------------