├── .gitignore
├── ExploreModels
├── modeltables.RData
├── server.R
├── shinyFunctions.R
└── ui.R
├── LICENSE
├── PrepareSummaries
└── prepForShiny.R
├── README.md
├── ScoreModels
├── processArffs.py
└── runOnEC2.bash
├── ScoreModelsR
└── .gitignore
└── data
├── modelres.csv.gz
├── problemDescr.csv
└── processArffs_log.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *~
3 | *.pem
4 | *.pem.txt
5 | *.dcf
6 |
7 |
--------------------------------------------------------------------------------
/ExploreModels/modeltables.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/ExploreModels/49e68c6101739d06eb3a3ab5b6a6fad337ca9bf0/ExploreModels/modeltables.RData
--------------------------------------------------------------------------------
/ExploreModels/server.R:
--------------------------------------------------------------------------------
1 | #AccuracyCounts
2 |
3 | #
4 | # from directory above this one:
5 | # library(shiny)
6 | # runApp("ExploreModels")
7 | #
8 | options(rgl.useNULL=TRUE)
9 | library(shiny)
10 | source("shinyFunctions.R")
11 |
12 |
13 | shinyServer(function(input, output) {
14 |
15 | dataSets = reactive (
16 | getDataSets(input$minrows, input$maxrows, input$nclasses, input$ratio)
17 | )
18 |
19 | accTable = reactive (
20 | getAccuracyCounts(dataSets()$sets)
21 | )
22 |
23 | avgAccTable = reactive (
24 | calculateAccuracy(dataSets()$sets)
25 | )
26 |
27 | dmatrix = reactive (
28 | getDistanceMatrix(dataSets()$sets)
29 | )
30 |
31 | # Average Accuracy tab
32 | output$plotlabelAA = renderText(paste("Average model accuracy over sets with",
33 | input$minrows, "to", input$maxrows, "rows"))
34 |
35 | output$accPlot = renderPlot( {
36 | title = paste(dataSets()$nsets, "sets in total")
37 | plotAccuracy(avgAccTable(), title)
38 | })
39 |
40 | # Accuracy counts tab
41 |
42 | output$plotlabelAC = renderText(paste("How many times is each model maximally accurate for data sets of size",
43 | input$minrows, "to", input$maxrows, "rows?"))
44 |
45 | output$countPlot <- renderPlot({
46 | title = paste(dataSets()$nsets, "sets in total")
47 | plotAccCounts(accTable(), title)
48 | })
49 |
50 | # Dendrogram
51 |
52 | # reuse this for 3d plots, too
53 | output$plotlabelDend = renderText(paste("Distances between models, over data sets of size",
54 | input$minrows, "to", input$maxrows, "rows"))
55 |
56 | output$plotDend = renderPlot( {
57 | title = paste("Model groupings:", dataSets()$nsets, "sets in total")
58 | plotModelDends(dmatrix(), title)
59 | })
60 |
61 | # 3d plots
62 |
63 | output$plotDists <- renderWebGL({
64 | plotModelDists(dmatrix())
65 | })
66 |
67 | })
68 |
--------------------------------------------------------------------------------
/ExploreModels/shinyFunctions.R:
--------------------------------------------------------------------------------
1 | #AccuracyCounts
2 |
3 | options(gsubfn.engine="R")
4 | library(sqldf)
5 | library(ggplot2)
6 | library(rgl)
7 |
8 | load("modeltables.RData")
9 |
10 | # More globals
11 | MAXROWS = max(PROBLEM_DESC$nRows)
12 | MAXN = max(PROBLEM_DESC$nTargets)
13 | DS_RATIO = with(PROBLEM_DESC, nVars/nRows)
14 |
15 |
16 | calculateAccuracy = function(problemfilelist, modellist = ALL_BUT_GROUND, accmat=ACCURACY_MATRIX) {
17 | amat = subset(accmat, Problem_File_Name %in% problemfilelist & Model_Name %in% modellist)
18 | sqldf("select Model_Name, avg(isRight) accuracy
19 | from amat
20 | group by Model_Name
21 | order by Model_Name")
22 | }
23 |
24 |
25 | plotAccuracy = function(acc_table, title = "Model accuracies" ) {
26 | best = acc_table$accuracy >= max(acc_table$accuracy)
27 | acc_table$isbest='no'
28 | acc_table$isbest[best]='yes'
29 | acc_table$accstring = format(acc_table$accuracy, digits=3)
30 |
31 | ggplot(acc_table, aes(x=Model_Name, y=accuracy, fill=isbest)) +
32 | geom_bar(stat="identity") +
33 | scale_fill_manual(values=c("no"="darkgray", "yes"="darkblue")) +
34 | geom_text(aes(y=-0.05, label=accstring)) +
35 | coord_flip() + guides(fill=FALSE) + ggtitle(title)
36 | }
37 |
38 |
39 | # test range - 10 000 - 100 000
40 | getAccuracyCounts = function(problemfilelist, modellist=ALL_BUT_GROUND, accmat=ACCURACY_MATRIX) {
41 | amat = subset(accmat, Problem_File_Name %in% problemfilelist & Model_Name %in% modellist)
42 |
43 | by_problem = sqldf("select Problem_File_Name,
44 | Model_Name,
45 | avg(isRight) accuracy
46 | from amat
47 | group by Problem_File_Name, Model_Name")
48 | arg_max = sqldf("select Problem_File_Name,
49 | max(accuracy) maxacc
50 | from by_problem
51 | group by Problem_File_Name")
52 |
53 | most_acc = sqldf("select bp.Model_Name Model_Name,
54 | count(bp.Problem_File_Name) nwins
55 | from by_problem bp,
56 | arg_max am
57 | where bp.Problem_File_Name = am.Problem_File_Name
58 | and bp.accuracy = am.maxacc
59 | group by bp.Model_Name
60 | order by bp.Model_Name")
61 | included_models = most_acc$Model_Name
62 | sdiff = setdiff(modellist, included_models)
63 | if(length(sdiff) > 0) {
64 | empty = data.frame(Model_Name=sdiff, nwins=0)
65 | most_acc = rbind(most_acc, empty)
66 | }
67 |
68 | most_acc
69 | }
70 |
71 | plotAccCounts = function(most_acc_table, title='Accuracy Counts') {
72 | best = most_acc_table$nwins >= max(most_acc_table$nwins)
73 | most_acc_table$isbest='no'
74 | most_acc_table$isbest[best]='yes'
75 |
76 | ggplot(most_acc_table, aes(x=Model_Name, y=nwins, fill=isbest)) +
77 | geom_bar(stat="identity") +
78 | scale_fill_manual(values=c("no"="darkgray", "yes"="darkblue")) +
79 | geom_text(aes(y=-1, label=nwins)) +
80 | coord_flip() + guides(fill=FALSE) + ggtitle(title)
81 | }
82 |
83 |
84 | getDistanceMatrix = function(problemfilelist, modellist=ALLMODELS, distmat=DISTANCE_TABLE) {
85 | dmat = subset(distmat, Problem_File_Name %in% problemfilelist &
86 | modelName1 %in% modellist &
87 | modelName2 %in% modellist)
88 | distSqs = sqldf("select modelName1,
89 | modelName2,
90 | sum(sqDist) sqDist
91 | from dmat
92 | group by modelName1, modelName2")
93 | as.matrix(xtabs(sqrt(sqDist) ~ modelName1+modelName2, data=distSqs))
94 | }
95 |
96 | distFromTruth = function(problemfilelist, modellist=ALL_BUT_GROUND, distmat=DISTANCE_TABLE) {
97 | dmat = subset(distmat, Problem_File_Name %in% problemfilelist &
98 | modelName1 == 'ground truth' &
99 | modelName2 %in% modellist)
100 | distSqs = sqldf("select modelName1 ground,
101 | modelName2 modelName,
102 | sum(sqDist) sqDist
103 | from dmat
104 | group by modelName1, modelName2
105 | order by modelName2")
106 | distSqs[, -1] # don't need to return the column that just says ground truth
107 | }
108 |
109 |
110 | plotDistFromTruth = function(dist_table, title = "Model distance from ground truth" ) {
111 | best = which.min(dist_table$sqDist)
112 | dist_table$isbest='no'
113 | dist_table$isbest[best]='yes'
114 | dist_table$dist = sqrt(dist_table$sqDist)
115 | dist_table$diststr = format(dist_table$dist, digits=3)
116 |
117 | ggplot(dist_table, aes(x=modelName, y=dist, fill=isbest)) +
118 | geom_bar(stat="identity") +
119 | scale_fill_manual(values=c("no"="darkgray", "yes"="darkblue")) +
120 | geom_text(aes(y=-1.5, label=diststr)) +
121 | coord_flip() + guides(fill=FALSE) + ggtitle(title)
122 | }
123 |
124 | plotModelDists = function(dmatrix) {
125 | plt3 = cmdscale(dmatrix, k=3)
126 |
127 | # set ground truth to the origin
128 | origin = plt3['ground truth', ] # a row
129 | plt3 = t(apply(plt3, 1, FUN=function(row) {row-origin}))
130 | if(sum(colSums(plt3)) < 0) plt3=-plt3 # reflect it so that origin is bottom left
131 |
132 | labels = LABELMAP[rownames(plt3)]
133 |
134 | rownames(plt3) = labels
135 |
136 | plot3d(plt3, size=5, col="blue", xlab="X", ylab="Y", zlab="Z")
137 | originrow = which(rownames(plt3)=='ground truth')
138 | indices = 1:dim(plt3)[1]
139 | for(irow in indices[-originrow]) {
140 | plot3d(plt3[c(irow, originrow),], add=TRUE, type="l", col="gray")
141 | }
142 | text3d(plt3, texts=rownames(plt3), adj=c(0,0))
143 |
144 | }
145 |
146 | plotModelDends = function(dmatrix, title="Model Groupings") {
147 | tree = hclust(as.dist(dmatrix))
148 | plot(tree, main=title, axes=FALSE, sub='', xlab='', ylab='')
149 | }
150 |
151 | # nclasslist = subset of c("2", "3:5", "6:10", "11:MAXN")
152 | # ratiolist = subset of c("c(0, 0.01)", "c(0.01, 0.03)", "c(0.03, 0.1)", "c(0.1, Inf)")
153 | getDataSets = function(minrows, maxrows, nclasslist, ratiolist) {
154 | filterRows = with(PROBLEM_DESC, minrows <= nRows & nRows <= maxrows)
155 |
156 | # I feel like there ought to be an "apply" way to do this
157 | classnumlist = NULL
158 | for(str in nclasslist) {
159 | classnumlist = c(classnumlist, eval(parse(text=str)))
160 | }
161 | filterN = with(PROBLEM_DESC, nTargets %in% classnumlist)
162 |
163 | filterR = logical(dim(PROBLEM_DESC)[1])
164 |
165 | for(str in ratiolist) {
166 | interval = eval(parse(text=str))
167 | filterR = filterR | (interval[1] < DS_RATIO & DS_RATIO <= interval[2])
168 | }
169 |
170 | filter = filterRows & filterN & filterR
171 |
172 | list(sets=PROBLEM_DESC$Problem_File_Name[filter], nsets=sum(filter))
173 | }
174 |
175 |
176 | #-------------------------
177 | # print("global results")
178 | #
179 | #
180 | # print("model accuracy")
181 | # print(calculateAccuracy(ALLFILES))
182 | #
183 | #
184 | # print("model distances from ground truth")
185 | # print(distFromTruth(ALLFILES))
186 | #
187 | #
188 | # print("how many times did each model achieve best score?")
189 | # print(getAccuracyCounts(ALLFILES))
190 | #
191 | # print("model distances")
192 | # plotModelDists(getDistanceMatrix(ALLFILES))
193 | # plotModelDends(getDistanceMatrix(ALLFILES), "Model grouping, all sets")
194 | #
195 |
--------------------------------------------------------------------------------
/ExploreModels/ui.R:
--------------------------------------------------------------------------------
1 | # AccuracyCounts
2 | options(rgl.useNULL=TRUE)
3 | library(shiny)
4 | library(shinyRGL)
5 | source("shinyFunctions.R")
6 |
7 | urlNote = 'See The Win-Vector blog for details.'
8 |
9 |
10 | # Define UI for application that draws a histogram
11 | shinyUI(fluidPage(
12 |
13 | # Application title
14 | titlePanel("Explore Model Behavior"),
15 |
16 | # Sidebar for number of class outputs :
17 | # interval around it; right now set to +/- 10%
18 | sidebarLayout(
19 | sidebarPanel(
20 | numericInput("minrows",
21 | "Minimum data set size:",
22 | 0, # initial value
23 | min = 0,
24 | max = MAXROWS,
25 | step = 100),
26 |
27 | numericInput("maxrows",
28 | "Maximum data set size:",
29 | MAXROWS, # initial value
30 | min = 0,
31 | max = MAXROWS,
32 | step = 100),
33 |
34 | hr(),
35 |
36 | checkboxGroupInput("nclasses", "Number of target classes (check all that apply)",
37 | c("Two-Class" = "2",
38 | "3-5 classes" = "3:5",
39 | "6-10 classes" = "6:10",
40 | "more than 10 classes" = "11:MAXN"),
41 | selected = c("2", "3:5", "6:10", "11:MAXN")), # all of them
42 |
43 | hr(),
44 |
45 | checkboxGroupInput("ratio", "Data set shape (variables vs. rows)", # intervals ( ..]
46 | c("Narrow (Less than 1 variable per 100 rows)" = "c(0, 0.01)",
47 | "Moderate (1 to 3 variables per 100 rows)" = "c(0.01, 0.03)",
48 | "Wide (3 to 10 variables per 100 rows)" = "c(0.03, 0.1)",
49 | "Very wide (More than 10 variables per 100 rows" = "c(0.1, Inf)"),
50 | selected = c("c(0, 0.01)", "c(0.01, 0.03)", "c(0.03, 0.1)", "c(0.1, Inf)")) # all of them
51 | ),
52 |
53 | # TODO: tabs for Count Accuracy, Model Distance Dendrogram, Model Distance 3D Plot
54 | mainPanel (
55 | tabsetPanel (
56 | tabPanel("Average Accuracy", list(textOutput("plotlabelAA"),HTML(urlNote)), plotOutput("accPlot")),
57 | tabPanel("Accuracy Counts", list(textOutput("plotlabelAC"),HTML(urlNote)), plotOutput("countPlot")),
58 | tabPanel("Model Dendrogram", list(textOutput("plotlabelDend"),HTML(urlNote)), plotOutput("plotDend")),
59 | tabPanel("Model Distances (3d)", list(textOutput("plotlabelDent"),HTML(urlNote)), webGLOutput("plotDists"))
60 | )
61 | )
62 |
63 | ) # end sidebarLayout
64 | )
65 | )
66 |
67 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 | {one line to give the program's name and a brief idea of what it does.}
635 | Copyright (C) {year} {name of author}
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | {project} Copyright (C) {year} {fullname}
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
676 |
--------------------------------------------------------------------------------
/PrepareSummaries/prepForShiny.R:
--------------------------------------------------------------------------------
1 | # setwd("~/Documents/Projects/scratch.git/shapeOfML")
2 |
3 | options(gsubfn.engine="R")
4 | library(sqldf)
5 |
6 |
7 | #
8 | # Globals
9 | #
10 | PROBLEM_DESC = read.table('data/problemDescr.csv', header=TRUE, sep=',', stringsAsFactors=TRUE)
11 | colnames(PROBLEM_DESC) = gsub('\\.', '_', colnames(PROBLEM_DESC) )
12 |
13 | MODEL_RESULTS = read.table('data/modelres.csv.gz', header=TRUE, sep=',', stringsAsFactors=TRUE)
14 | colnames(MODEL_RESULTS) = gsub('\\.', '_', colnames(MODEL_RESULTS) )
15 | # remove the RBM
16 | # isRBM = grepl("(RBM)", MODEL_RESULTS$Model_Name)
17 | # MODEL_RESULTS = subset(MODEL_RESULTS, !isRBM)
18 | # MODEL_RESULTS$Model_Name = factor(MODEL_RESULTS$Model_Name)
19 |
20 |
21 | ALLMODELS = sort(unique(MODEL_RESULTS$Model_Name))
22 |
23 | ALL_BUT_GROUND = setdiff(ALLMODELS, c('ground truth'))
24 |
25 | LABELMAP = c("DT", "GB", "ground truth", "KNN5", "LR", "NB", "RF", "SVM")
26 | names(LABELMAP) = ALLMODELS
27 |
28 | ALLFILES = MODEL_RESULTS$Problem_File_Name
29 |
30 |
31 | #
32 | # Functions
33 | #
34 |
35 | #
36 | # Just calculate the accuracy matrix once; we can use subsets of it later
37 | # [Problem_File_Name, Model_Name, Row_Number, PredictionIndex, Correct_Answer, isRight]
38 | #
39 | calculateAccuracyMatrix = function(modelres=MODEL_RESULTS) {
40 | # first, get the maximum probabilities --- change this to use ArgMaxIndicator column
41 |
42 | maxProbs = sqldf("select * from modelres
43 | where ArgMaxIndicator=1
44 | group by Problem_File_Name, Model_Name, Row_Number
45 | order by Problem_File_Name, Model_Name, Row_Number")
46 | maxProbs$isRight = as.numeric(with(maxProbs,PredictionIndex==Correct_Answer))
47 | maxProbs
48 | }
49 |
50 | ACCURACY_MATRIX = calculateAccuracyMatrix()
51 |
52 | #
53 | # calculate the matrix [Problem_File_Name, modelName1, modelName2, sqDist]
54 | #
55 | # NOTE: if I do a similar calculation with deviance, use SmoothedProbIndex column
56 | #
57 | calculateDistanceTable = function(modelres=MODEL_RESULTS) {
58 | distSqs = sqldf("select
59 | m1.Problem_File_Name Problem_File_Name,
60 | m1.Model_Name modelName1,
61 | m2.Model_Name modelName2,
62 | sum(square(m1.ProbIndex-m2.ProbIndex)) sqDist
63 | from
64 | modelres m1,
65 | modelres m2
66 | where
67 | m1.Problem_File_Name = m2.Problem_File_Name
68 | and m1.Row_Number = m2.Row_Number
69 | and m1.PredictionIndex = m2.PredictionIndex
70 | group by
71 | m1.Problem_File_Name,
72 | m1.Model_Name,
73 | m2.Model_Name
74 | order by m1.Problem_File_Name, m1.Model_Name, m2.Model_Name")
75 | distSqs
76 | }
77 |
78 | DISTANCE_TABLE = calculateDistanceTable()
79 |
80 | save(PROBLEM_DESC, MODEL_RESULTS, ALLMODELS, ALL_BUT_GROUND, LABELMAP,
81 | ALLFILES,ACCURACY_MATRIX, DISTANCE_TABLE,
82 | file="ShinyApps/modeltables.RData")
83 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | [code and data for "The Geometry of Classifiers"](http://www.win-vector.com/blog/2014/12/the-geometry-of-classifiers/)
3 |
--------------------------------------------------------------------------------
/ScoreModels/processArffs.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | # script from Win-Vector LLC http://www.win-vector.com/ GPL3 license
4 | # run the data from through scikit-learn
5 | # http://jmlr.csail.mit.edu/papers/v15/delgado14a.html
6 | # "Do we Need Hundreds of Classifiers to Solve Real World Classification Problems?" Manuel Fernandez-Delgado, Eva Cernadas, Senen Barro, Dinani Amorim; 15(Oct):3133-3181, 2014
7 | # see http://www.win-vector.com/blog/2014/12/a-comment-on-preparing-data-for-classifiers/ (and other blog articles) for some details
8 | # reads argv[1] which should be a gzipped tar file containing arff files ending in .arrf
9 | # expects all input variables to be numeric and result variable to be categorical
10 | # produces model modelres.csv.gz and problemDescr.csv
11 | # R code to look at accuracy:
12 | # m <- read.table('modelres.csv.gz',header=TRUE,sep=',')
13 | # aggregate(ArgMaxIndicator~Problem.File.Name+Model.Name,data=subset(m, PredictionIndex== Correct.Answer),FUN=mean)
14 |
15 |
16 | import re
17 | import math
18 | import tarfile
19 | import random
20 | import numpy.random
21 | import csv
22 | import sys
23 | import time
24 | import os
25 | # liac-arff https://pypi.python.org/pypi/liac-arff 'pip install liac-arff'
26 | import arff
27 | import gzip
28 | import pandas
29 | import numpy
30 | import sklearn.svm
31 | import sklearn.ensemble
32 | import sklearn.tree
33 | import sklearn.naive_bayes
34 | import sklearn.linear_model
35 | import sklearn.neighbors
36 | import scipy.optimize
37 | import sklearn.neural_network
38 | import multiprocessing
39 | import contextlib
40 |
41 | print 'start',os.getpid(),time.strftime("%c")
42 | sys.stdout.flush()
43 |
44 |
45 | testTarget = 100
46 | testStrategyCut = 500
47 | doXForm = False
48 | maxXFormTrainSize = 5000
49 | maxCTrainSize = 200000
50 |
51 |
52 | randSeed = 2382085237
53 | random.seed(randSeed)
54 | numpy.random.seed(randSeed)
55 |
56 |
57 |
58 | # based on http://scikit-learn.org/stable/auto_examples/plot_rbm_logistic_classification.html#example-plot-rbm-logistic-classification-py
59 | class RBMform:
60 | """"Restricted Boltzman / transform"""
61 | def __init__(self, random_state):
62 | self.xform_ = sklearn.neural_network.BernoulliRBM(random_state=random_state)
63 | def scale_(self,x):
64 | nRow = len(x)
65 | def f(i,j):
66 | return min(1.0,max(0.0,(x[i][j]-self.colrange_[j][0])/(1.0*(self.colrange_[j][1]-self.colrange_[j][0]))))
67 | return [ [ f(i,j) for j in self.varIndices_ ] for i in range(nRow) ]
68 | def fit(self, xTrain):
69 | # rescale to range [0,1]
70 | xTrain = numpy.array(xTrain)
71 | nCol = len(xTrain[0])
72 | self.colrange_ = [ [min(xTrain[:,j]),max(xTrain[:,j])] for j in range(nCol)]
73 | self.varIndices_ = [ j for j in range(nCol) if self.colrange_[j][1]>(self.colrange_[j][0]+1.0e-6) ]
74 | xScale = self.scale_(xTrain)
75 | self.xform_.fit(xScale)
76 | def transform(self,xTest):
77 | nRow = len(xTest)
78 | sc = self.scale_(xTest)
79 | xf = self.xform_.transform(self.scale_(xTest))
80 | # return transformed plus original scaled columns
81 | return [ numpy.array(sc[i]).tolist() + numpy.array(xf[i]).tolist() for i in range(nRow) ]
82 |
83 |
84 | # map of modelfactories (each takes a random state on instantiation)
85 | model_dict = {"SVM" : (lambda rs: sklearn.svm.SVC(kernel='rbf',gamma=0.001,C=10.0,probability=True,random_state=rs)),\
86 | "Random Forest" : (lambda rs: sklearn.ensemble.RandomForestClassifier(n_estimators=100, max_depth=10, n_jobs=-1,random_state=rs)),\
87 | "Gradient Boosting" : (lambda rs: sklearn.ensemble.GradientBoostingClassifier(random_state=rs)), \
88 | "Decision Tree" : (lambda rs: sklearn.tree.DecisionTreeClassifier(random_state=rs)), \
89 | "Naive Bayes" : (lambda rs: sklearn.naive_bayes.GaussianNB()), \
90 | "Logistic Regression" : (lambda rs: sklearn.linear_model.LogisticRegression(random_state=rs)), \
91 | "KNN5" : (lambda rs: sklearn.neighbors.KNeighborsClassifier(n_neighbors=5))}
92 |
93 |
94 |
95 | tarFileName = sys.argv[1] # data.tar.gz
96 | doutfile = 'modelres.csv.gz'
97 | poutfile = 'problemDescr.csv'
98 | print 'reading:',tarFileName
99 | print 'writing data to:',doutfile
100 | print 'writing problem summaries to:',poutfile
101 | sys.stdout.flush()
102 | realTypes = set(['REAL', 'NUMERIC'])
103 |
104 | # mark first maximal element with 1, else with zero
105 | # v numeric array
106 | def argMaxV(v):
107 | n = len(v)
108 | maxV = max(v)
109 | mv = numpy.zeros(n)
110 | for j in range(n):
111 | if v[j]>=maxV:
112 | mv[j] = 1.0
113 | break
114 | return mv
115 |
116 | # raise all entries up to a common floor while preserving total
117 | # this is a smoothing term (replacing things near zero with a floor, and scaling other entries to get mass)
118 | # and the idea is a classifier already staying away from zero is unaffected
119 | # v numeric array of non-negative entries summing to more than epsilon*len(v)
120 | # example smoothElevate([0.5,0.39,0.11,0.0],0.1) -> [0.44943820224719105, 0.35056179775280905, 0.1, 0.1]
121 | def smoothElevate(vIn,epsilon):
122 | v = numpy.array(vIn)
123 | n = len(v)
124 | while True:
125 | peggedIndices = set([i for i in range(n) if v[i]<=epsilon])
126 | neededMass = sum([epsilon - v[i] for i in peggedIndices])
127 | if neededMass<=0.0:
128 | return v
129 | else:
130 | scale = (1.0 - len(peggedIndices)*epsilon)/sum([vIn[i] for i in range(n) if i not in peggedIndices])
131 | v = [ epsilon if i in peggedIndices else scale*vIn[i] for i in range(n) ]
132 |
133 | def processArff(name,reln):
134 | rname = reln['relation']
135 | attr = reln['attributes']
136 | print 'start\t',rname,os.getpid(),time.strftime("%c")
137 | sys.stdout.flush()
138 | data = reln['data']
139 | ncol = len(attr)
140 | # assuming unique non-numeric feature is category to be predicted
141 | features = [ i for i in range(ncol) if isinstance(attr[i][1],basestring) and attr[i][1] in realTypes ]
142 | outcomes = [ i for i in range(ncol) if not i in set(features) ]
143 | if (not len(outcomes)==1) or (len(features)<=0):
144 | print '\tskip (not in std form)',name,attr,os.getpid(),time.strftime("%c")
145 | sys.stdout.flush()
146 | return None
147 | outcome = outcomes[0]
148 | xvars = [ [ row[i] for i in features ] for row in data ]
149 | yvar = [ row[outcome] for row in data ]
150 | nrow = len(yvar)
151 | levels = sorted([ l for l in set(yvar) ])
152 | nlevels = len(levels)
153 | levelMap = dict([ (levels[j],j) for j in range(nlevels) ])
154 | prng = numpy.random.RandomState(randSeed)
155 | def scoreRows(testIndices):
156 | scoredRowsI = []
157 | trainIndices = [ i for i in range(nrow) if i not in set(testIndices) ]
158 | if len(trainIndices)>maxCTrainSize:
159 | trainIndices = sorted(prng.choice(trainIndices,size=maxCTrainSize,replace=False))
160 | xTrain = [ xvars[i] for i in trainIndices ]
161 | yTrain = [ yvar[i] for i in trainIndices ]
162 | nTrain = len(yTrain)
163 | xTest = [ xvars[i] for i in testIndices ]
164 | yTest = [ yvar[i] for i in testIndices ]
165 | nTest = len(yTest)
166 | trainLevels = sorted([ l for l in set(yTrain) ])
167 | if (nTrain>0) and (nTest>0) and (len(trainLevels)==nlevels):
168 | # naive non-parametric empirical idea:
169 | # no evidence any event can be rarer than 1/(nTrain+1)
170 | # the elevate/smoothing code needs the total weight
171 | # to sum to 1 and to have nlevels*epsilon < 1 so there is
172 | # weight to move around (so we add the 1/(1+nlevels) condition)
173 | epsilon = min(1.0/(1.0+nlevels),1.0/(1.0+nTrain))
174 | for i in range(nTest):
175 | ti = numpy.zeros(nlevels)
176 | ti[levelMap[yTest[i]]] = 1
177 | adjTruth = smoothElevate(ti,epsilon)
178 | for j in range(nlevels):
179 | scoredRowsI.append([name,'ground truth',testIndices[i],levelMap[yTest[i]],j,ti[j],ti[j],adjTruth[j]])
180 | def runModels(mSuffix,xTrainD,xTestD):
181 | for modelName, modelFactory in model_dict.iteritems():
182 | model = modelFactory(numpy.random.RandomState(randSeed))
183 | model.fit(xTrainD, yTrain)
184 | assert len(model.classes_)==nlevels
185 | for j in range(nlevels):
186 | assert levels[j]==model.classes_[j]
187 | pred = model.predict_proba(xTestD)
188 | for i in range(nTest):
189 | sawMax = False
190 | maxIndicator = argMaxV(pred[i])
191 | adjPred = smoothElevate(pred[i],epsilon)
192 | for j in range(nlevels):
193 | scoredRowsI.append([name,modelName+mSuffix,testIndices[i],levelMap[yTest[i]],j,pred[i][j],maxIndicator[j],adjPred[j]])
194 | runModels('',xTrain,xTest)
195 | if doXForm:
196 | xform = RBMform(numpy.random.RandomState(randSeed))
197 | if nTrain<=maxXFormTrainSize:
198 | xform.fit(xTrain)
199 | else:
200 | xIndices = sorted(prng.choice(range(nTrain),size=maxXFormTrainSize,replace=False))
201 | xform.fit([xTrain[i] for i in xIndices])
202 | runModels(' (RBM)',xform.transform(xTrain),xform.transform(xTest))
203 | return scoredRowsI
204 | print 'initial \t',[name,rname,nrow,len(features),nlevels],os.getpid(),time.strftime("%c")
205 | sys.stdout.flush()
206 | scoredRows = []
207 | nTested = 0
208 | if (nrow>10) and (nlevels>1):
209 | if nrow>testStrategyCut:
210 | # single hold-out testing
211 | nTries = 10
212 | while (nTested<=0) and (nTries>0):
213 | testIndices = sorted(prng.choice(range(nrow),size=testTarget,replace=False))
214 | nTries -= 1
215 | sri = scoreRows(testIndices)
216 | if len(sri)>0:
217 | scoredRows.extend(scoreRows(testIndices))
218 | nTested = testTarget
219 | probDescr = [name,rname,nrow,'test-set cross-validation',nrow-testTarget,nTested,len(features),nlevels]
220 | else:
221 | # hold out one cross validation
222 | candidates = [ i for i in range(nrow) ]
223 | prng.shuffle(candidates)
224 | for ci in candidates:
225 | testIndices = [ ci ]
226 | sri = scoreRows(testIndices)
227 | if len(sri)>0:
228 | scoredRows.extend(sri)
229 | nTested += 1
230 | if nTested>=testTarget:
231 | break
232 | probDescr = [name,rname,nrow,'leave-one-out cross-validation',nrow-1,nTested,len(features),nlevels]
233 | print '\tdone\t',probDescr,os.getpid(),time.strftime("%c")
234 | sys.stdout.flush()
235 | if nTested>0:
236 | return { 'pDesc': probDescr, 'scoredRows': scoredRows }
237 | else:
238 | print '\tskip (could not split)',name,os.getpid(),time.strftime("%c")
239 | sys.stdout.flush()
240 | return None
241 |
242 |
243 | def processArffByName(targetName):
244 | with contextlib.closing(tarfile.open(tarFileName,'r:gz')) as t_in:
245 | for member in t_in.getmembers():
246 | name = member.name
247 | if targetName==name:
248 | return processArff(name,arff.load(t_in.extractfile(member)))
249 | return None
250 |
251 | # get targets
252 | names = []
253 | with contextlib.closing(tarfile.open(tarFileName,'r:gz')) as t_in:
254 | for member in t_in.getmembers():
255 | name = member.name
256 | if not name.endswith('.arff'):
257 | continue
258 | sys.stdout.flush()
259 | if name.endswith('_test.arff'):
260 | print '\tskip (test)',name
261 | sys.stdout.flush()
262 | continue
263 | names.append(name)
264 |
265 | print names,os.getpid(),time.strftime("%c")
266 | pool = multiprocessing.Pool()
267 | results = pool.map(processArffByName,names)
268 | print 'done processing pool',os.getpid(),time.strftime("%c")
269 |
270 | # write out results
271 | with contextlib.closing(gzip.open(doutfile,'wb')) as f_out:
272 | with open(poutfile,'wb') as p_out:
273 | pwriter = csv.writer(p_out,delimiter=',')
274 | pwriter.writerow(['Problem File Name','Relation Name',"nRows","TestMethod","nTrain","nTest","nVars","nTargets"])
275 | dwriter = csv.writer(f_out,delimiter=',')
276 | dwriter.writerow(['Problem File Name','Model Name','Row Number','Correct Answer','PredictionIndex','ProbIndex','ArgMaxIndicator','SmoothedProbIndex'])
277 | for res in results:
278 | if res is not None:
279 | pwriter.writerow(res['pDesc'])
280 | for r in res['scoredRows']:
281 | dwriter.writerow(r)
282 | f_out.flush()
283 | p_out.flush()
284 |
285 | print 'done',os.getpid(),time.strftime("%c")
286 | sys.stdout.flush()
287 |
--------------------------------------------------------------------------------
/ScoreModels/runOnEC2.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # run this script on an Amazon 64 bit Amazon Linux
4 | # recommend a 64 bit instance with at least 8GB of ram
5 | # installs dev tools and Python sklearn on a fresh machine
6 | # also downloads the data
7 | # as of 12-17-2014 this defaulting to Python 2.6, so written for that
8 | # did not work on micro instance, did work on c3.2xlarge instance
9 |
10 |
11 | # Start getting data (in background):
12 | echo "starting background data download"
13 | wget http://persoal.citius.usc.es/manuel.fernandez.delgado/papers/jmlr/data.tar.gz 1>/dev/null 2>&1 &
14 |
15 |
16 | echo "installing software"
17 | # Configure machine
18 | sudo yum -y groupinstall "Development Tools"
19 | # from http://dacamo76.com/blog/2012/12/07/installing-scikit-learn-on-amazon-ec2/
20 | sudo yum -y install gcc-c++ python-devel atlas-sse3-devel lapack-devel
21 | sudo easy_install pip
22 | sudo pip install -U liac-arff
23 | sudo pip install -U pytz numpy pandas scipy scikit-learn
24 |
25 |
26 | # Wait for data load to finish
27 | echo "waiting for data to finish downloading"
28 | wait
29 | echo "expect: 252603f2a5bf8d7975a392cdf5f84fb1c5d9b5c2 data.tar.gz"
30 | shasum data.tar.gz
31 |
32 |
33 | # run the job
34 | echo "running the job"
35 | python processArffs.py data.tar.gz > processArffs_log.txt 2>&1
36 |
37 | wait
38 | echo "job done"
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/ScoreModelsR/.gitignore:
--------------------------------------------------------------------------------
1 | data.tar.gz
2 |
--------------------------------------------------------------------------------
/data/modelres.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/ExploreModels/49e68c6101739d06eb3a3ab5b6a6fad337ca9bf0/data/modelres.csv.gz
--------------------------------------------------------------------------------
/data/problemDescr.csv:
--------------------------------------------------------------------------------
1 | Problem File Name,Relation Name,nRows,TestMethod,nTrain,nTest,nVars,nTargets
2 | abalone/abalone.arff,abalone,4177,test-set cross-validation,4077,100,8,3
3 | acute-inflammation/acute-inflammation.arff,acute-inflammation,120,leave-one-out cross-validation,119,100,6,2
4 | acute-nephritis/acute-nephritis.arff,acute-nephritis,120,leave-one-out cross-validation,119,100,6,2
5 | adult/adult_train.arff,adult,32561,test-set cross-validation,32461,100,14,2
6 | annealing/annealing_train.arff,annealing,798,test-set cross-validation,698,100,31,5
7 | arrhythmia/arrhythmia.arff,arrhythmia,452,leave-one-out cross-validation,451,100,262,13
8 | audiology-std/con_patrons_repetidos/audiology-std_train.arff,audiology-std,194,leave-one-out cross-validation,193,100,59,18
9 | audiology-std/audiology-std_train.arff,audiology-std,171,leave-one-out cross-validation,170,100,59,18
10 | balance-scale/balance-scale.arff,balance-scale,625,test-set cross-validation,525,100,4,3
11 | balloons/balloons.arff,balloons,16,leave-one-out cross-validation,15,16,4,2
12 | bank/bank.arff,bank,4521,test-set cross-validation,4421,100,16,2
13 | blood/blood.arff,blood,748,test-set cross-validation,648,100,4,2
14 | breast-cancer/breast-cancer.arff,breast-cancer,286,leave-one-out cross-validation,285,100,9,2
15 | breast-cancer-wisc/breast-cancer-wisc.arff,breast-cancer-wisc,699,test-set cross-validation,599,100,9,2
16 | breast-cancer-wisc-diag/breast-cancer-wisc-diag.arff,breast-cancer-wisc-diag,569,test-set cross-validation,469,100,30,2
17 | breast-cancer-wisc-prog/breast-cancer-wisc-prog.arff,breast-cancer-wisc-prog,198,leave-one-out cross-validation,197,100,33,2
18 | breast-tissue/breast-tissue.arff,breast-tissue,106,leave-one-out cross-validation,105,100,9,6
19 | car/car.arff,car,1728,test-set cross-validation,1628,100,6,4
20 | cardiotocography-3clases/cardiotocography-3clases.arff,cardiotocography-3clases,2126,test-set cross-validation,2026,100,21,3
21 | cardiotocography-10clases/cardiotocography-10clases.arff,cardiotocography-10clases,2126,test-set cross-validation,2026,100,21,10
22 | chess-krvk/chess-krvk.arff,chess-krvk,28056,test-set cross-validation,27956,100,6,18
23 | chess-krvkp/chess-krvkp.arff,chess-krvkp,3196,test-set cross-validation,3096,100,36,2
24 | congressional-voting/congressional-voting.arff,congressional-voting,435,leave-one-out cross-validation,434,100,16,2
25 | conn-bench-sonar-mines-rocks/conn-bench-sonar-mines-rocks.arff,conn-bench-sonar-mines-rocks,208,leave-one-out cross-validation,207,100,60,2
26 | conn-bench-vowel-deterding/conn-bench-vowel-deterding_train.arff,conn-bench-vowel-deterding,528,test-set cross-validation,428,100,11,11
27 | connect-4/connect-4.arff,connect-4,67557,test-set cross-validation,67457,100,42,2
28 | contrac/contrac.arff,contrac,1473,test-set cross-validation,1373,100,9,3
29 | credit-approval/credit-approval.arff,credit-approval,690,test-set cross-validation,590,100,15,2
30 | cylinder-bands/cylinder-bands.arff,cylinder-bands,512,test-set cross-validation,412,100,35,2
31 | dermatology/dermatology.arff,dermatology,366,leave-one-out cross-validation,365,100,34,6
32 | echocardiogram/echocardiogram.arff,echocardiogram,131,leave-one-out cross-validation,130,100,10,2
33 | ecoli/ecoli.arff,ecoli,336,leave-one-out cross-validation,335,100,7,8
34 | energy-y1/energy-y1.arff,energy-y1,768,test-set cross-validation,668,100,8,3
35 | energy-y2/energy-y2.arff,energy-y2,768,test-set cross-validation,668,100,8,3
36 | fertility/fertility.arff,fertility,100,leave-one-out cross-validation,99,100,9,2
37 | flags/flags.arff,flags,194,leave-one-out cross-validation,193,100,28,8
38 | glass/glass.arff,glass,214,leave-one-out cross-validation,213,100,9,6
39 | haberman-survival/haberman-survival.arff,haberman-survival,306,leave-one-out cross-validation,305,100,3,2
40 | hayes-roth/hayes-roth_train.arff,hayes-roth,132,leave-one-out cross-validation,131,100,3,3
41 | heart-cleveland/heart-cleveland.arff,heart-cleveland,303,leave-one-out cross-validation,302,100,13,5
42 | heart-hungarian/heart-hungarian.arff,heart-hungarian,294,leave-one-out cross-validation,293,100,12,2
43 | heart-switzerland/heart-switzerland.arff,heart-switzerland,123,leave-one-out cross-validation,122,100,12,5
44 | heart-va/heart-va.arff,heart-va,200,leave-one-out cross-validation,199,100,12,5
45 | hepatitis/hepatitis.arff,hepatitis,155,leave-one-out cross-validation,154,100,19,2
46 | hill-valley/hill-valley_train.arff,hill-valley,606,test-set cross-validation,506,100,100,2
47 | horse-colic/horse-colic_train.arff,horse-colic,300,leave-one-out cross-validation,299,100,25,2
48 | ilpd-indian-liver/ilpd-indian-liver.arff,ilpd-indian-liver,583,test-set cross-validation,483,100,9,2
49 | image-segmentation/image-segmentation_train.arff,image-segmentation,210,leave-one-out cross-validation,209,100,18,7
50 | ionosphere/ionosphere.arff,ionosphere,351,leave-one-out cross-validation,350,100,33,2
51 | iris/iris.arff,iris,150,leave-one-out cross-validation,149,100,4,3
52 | led-display/led-display.arff,led-display,1000,test-set cross-validation,900,100,7,10
53 | lenses/lenses.arff,lenses,24,leave-one-out cross-validation,23,24,4,3
54 | letter/letter.arff,letter,20000,test-set cross-validation,19900,100,16,26
55 | libras/libras.arff,libras,360,leave-one-out cross-validation,359,100,90,15
56 | low-res-spect/low-res-spect.arff,low-res-spect,531,test-set cross-validation,431,100,100,9
57 | lung-cancer/lung-cancer.arff,lung-cancer,32,leave-one-out cross-validation,31,32,56,3
58 | lymphography/lymphography.arff,lymphography,148,leave-one-out cross-validation,147,100,18,4
59 | magic/magic.arff,magic,19020,test-set cross-validation,18920,100,10,2
60 | mammographic/mammographic.arff,mammographic,961,test-set cross-validation,861,100,5,2
61 | miniboone/miniboone.arff,miniboone,130064,test-set cross-validation,129964,100,50,2
62 | molec-biol-promoter/molec-biol-promoter.arff,molec-biol-promoter,106,leave-one-out cross-validation,105,100,57,2
63 | molec-biol-splice/molec-biol-splice.arff,molec-biol-splice,3190,test-set cross-validation,3090,100,60,3
64 | monks-1/monks-1_train.arff,monks-1,124,leave-one-out cross-validation,123,100,6,2
65 | monks-2/monks-2_train.arff,monks-2,169,leave-one-out cross-validation,168,100,6,2
66 | monks-3/monks-3_train.arff,monks-3,122,leave-one-out cross-validation,121,100,6,2
67 | mushroom/mushroom.arff,mushroom,8124,test-set cross-validation,8024,100,21,2
68 | musk-1/musk-1.arff,musk-1,476,leave-one-out cross-validation,475,100,166,2
69 | musk-2/musk-2.arff,musk-2,6598,test-set cross-validation,6498,100,166,2
70 | nursery/nursery.arff,nursery,12960,test-set cross-validation,12860,100,8,5
71 | oocytes_merluccius_nucleus_4d/oocytes_merluccius_nucleus_4d.arff,oocytes_merluccius_nucleus_4d,1022,test-set cross-validation,922,100,41,2
72 | oocytes_merluccius_states_2f/oocytes_merluccius_states_2f.arff,oocytes_merluccius_states_2f,1022,test-set cross-validation,922,100,25,3
73 | oocytes_trisopterus_nucleus_2f/oocytes_trisopterus_nucleus_2f.arff,oocytes_trisopterus_nucleus_2f,912,test-set cross-validation,812,100,25,2
74 | oocytes_trisopterus_states_5b/oocytes_trisopterus_states_5b.arff,oocytes_trisopterus_states_5b,912,test-set cross-validation,812,100,32,3
75 | optical/optical_train.arff,optical,3823,test-set cross-validation,3723,100,62,10
76 | ozone/ozone.arff,ozone,2536,test-set cross-validation,2436,100,72,2
77 | page-blocks/page-blocks.arff,page-blocks,5473,test-set cross-validation,5373,100,10,5
78 | parkinsons/parkinsons.arff,parkinsons,195,leave-one-out cross-validation,194,100,22,2
79 | pendigits/pendigits_train.arff,pendigits,7494,test-set cross-validation,7394,100,16,10
80 | pima/pima.arff,pima,768,test-set cross-validation,668,100,8,2
81 | pittsburg-bridges-MATERIAL/pittsburg-bridges-MATERIAL.arff,pittsburg-bridges-MATERIAL,106,leave-one-out cross-validation,105,100,7,3
82 | pittsburg-bridges-REL-L/pittsburg-bridges-REL-L.arff,pittsburg-bridges-REL-L,103,leave-one-out cross-validation,102,100,7,3
83 | pittsburg-bridges-SPAN/pittsburg-bridges-SPAN.arff,pittsburg-bridges-SPAN,92,leave-one-out cross-validation,91,92,7,3
84 | pittsburg-bridges-T-OR-D/pittsburg-bridges-T-OR-D.arff,pittsburg-bridges-T-OR-D,102,leave-one-out cross-validation,101,100,7,2
85 | pittsburg-bridges-TYPE/pittsburg-bridges-TYPE.arff,pittsburg-bridges-TYPE,105,leave-one-out cross-validation,104,100,7,6
86 | planning/planning.arff,planning,182,leave-one-out cross-validation,181,100,12,2
87 | plant-margin/plant-margin.arff,plant-margin,1600,test-set cross-validation,1500,100,64,100
88 | plant-shape/plant-shape.arff,plant-shape,1600,test-set cross-validation,1500,100,64,100
89 | plant-texture/plant-texture.arff,plant-texture,1599,test-set cross-validation,1499,100,64,100
90 | post-operative/post-operative.arff,post-operative,90,leave-one-out cross-validation,89,90,8,3
91 | primary-tumor/primary-tumor.arff,primary-tumor,330,leave-one-out cross-validation,329,100,17,15
92 | ringnorm/ringnorm.arff,ringnorm,7400,test-set cross-validation,7300,100,20,2
93 | seeds/seeds.arff,seeds,210,leave-one-out cross-validation,209,100,7,3
94 | semeion/semeion.arff,semeion,1593,test-set cross-validation,1493,100,256,10
95 | soybean/soybean_train.arff,soybean,307,leave-one-out cross-validation,306,100,35,18
96 | spambase/spambase.arff,spambase,4601,test-set cross-validation,4501,100,57,2
97 | spect/spect_train.arff,spect,79,leave-one-out cross-validation,78,79,22,2
98 | spectf/spectf_train.arff,spectf,80,leave-one-out cross-validation,79,80,44,2
99 | statlog-australian-credit/statlog-australian-credit.arff,statlog-australian-credit,690,test-set cross-validation,590,100,14,2
100 | statlog-german-credit/statlog-german-credit.arff,statlog-german-credit,1000,test-set cross-validation,900,100,24,2
101 | statlog-heart/statlog-heart.arff,statlog-heart,270,leave-one-out cross-validation,269,100,13,2
102 | statlog-image/statlog-image.arff,statlog-image,2310,test-set cross-validation,2210,100,18,7
103 | statlog-landsat/statlog-landsat_train.arff,statlog-landsat,4435,test-set cross-validation,4335,100,36,6
104 | statlog-shuttle/statlog-shuttle_train.arff,statlog-shuttle,43500,test-set cross-validation,43400,100,9,7
105 | statlog-vehicle/statlog-vehicle.arff,statlog-vehicle,846,test-set cross-validation,746,100,18,4
106 | steel-plates/steel-plates.arff,steel-plates,1941,test-set cross-validation,1841,100,27,7
107 | synthetic-control/synthetic-control.arff,synthetic-control,600,test-set cross-validation,500,100,60,6
108 | teaching/teaching.arff,teaching,151,leave-one-out cross-validation,150,100,5,3
109 | thyroid/thyroid_train.arff,thyroid,3772,test-set cross-validation,3672,100,21,3
110 | tic-tac-toe/tic-tac-toe.arff,tic-tac-toe,958,test-set cross-validation,858,100,9,2
111 | titanic/titanic.arff,titanic,2201,test-set cross-validation,2101,100,3,2
112 | twonorm/twonorm.arff,twonorm,7400,test-set cross-validation,7300,100,20,2
113 | vertebral-column-2clases/datos_orixinais/column_3C_weka.arff,column_3C_weka,310,leave-one-out cross-validation,309,100,6,3
114 | vertebral-column-2clases/datos_orixinais/column_2C_weka.arff,column_2C_weka,310,leave-one-out cross-validation,309,100,6,2
115 | vertebral-column-2clases/vertebral-column-2clases.arff,vertebral-column-2clases,310,leave-one-out cross-validation,309,100,6,2
116 | vertebral-column-3clases/vertebral-column-3clases.arff,vertebral-column-3clases,310,leave-one-out cross-validation,309,100,6,3
117 | wall-following/wall-following.arff,wall-following,5456,test-set cross-validation,5356,100,24,4
118 | waveform/waveform.arff,waveform,5000,test-set cross-validation,4900,100,21,3
119 | waveform-noise/waveform-noise.arff,waveform-noise,5000,test-set cross-validation,4900,100,40,3
120 | wine/wine.arff,wine,178,leave-one-out cross-validation,177,100,13,3
121 | wine-quality-red/wine-quality-red.arff,wine-quality-red,1599,test-set cross-validation,1499,100,11,6
122 | wine-quality-white/wine-quality-white.arff,wine-quality-white,4898,test-set cross-validation,4798,100,11,7
123 | yeast/yeast.arff,yeast,1484,test-set cross-validation,1384,100,8,10
124 | zoo/zoo.arff,zoo,101,leave-one-out cross-validation,100,100,16,7
125 |
--------------------------------------------------------------------------------
/data/processArffs_log.txt:
--------------------------------------------------------------------------------
1 | nohup: ignoring input
2 | processArffs.py:28: DeprecationWarning: the sets module is deprecated
3 | import sets
4 | start 23595 Wed Dec 17 18:22:00 2014
5 | reading: data.tar.gz
6 | writing data to: modelres.csv.gz
7 | writing problem summaries to: problemDescr.csv
8 | skip (test) adult/adult_test.arff
9 | skip (test) annealing/annealing_test.arff
10 | skip (test) audiology-std/con_patrons_repetidos/audiology-std_test.arff
11 | skip (test) audiology-std/audiology-std_test.arff
12 | skip (test) conn-bench-vowel-deterding/conn-bench-vowel-deterding_test.arff
13 | skip (test) hayes-roth/hayes-roth_test.arff
14 | skip (test) hill-valley/hill-valley_test.arff
15 | skip (test) horse-colic/horse-colic_test.arff
16 | skip (test) image-segmentation/image-segmentation_test.arff
17 | skip (test) monks-1/monks-1_test.arff
18 | skip (test) monks-2/monks-2_test.arff
19 | skip (test) monks-3/monks-3_test.arff
20 | skip (test) optical/optical_test.arff
21 | skip (test) pendigits/pendigits_test.arff
22 | skip (test) soybean/soybean_test.arff
23 | skip (test) spect/spect_test.arff
24 | skip (test) spectf/spectf_test.arff
25 | skip (test) statlog-landsat/statlog-landsat_test.arff
26 | skip (test) statlog-shuttle/statlog-shuttle_test.arff
27 | skip (test) thyroid/thyroid_test.arff
28 | ['abalone/abalone.arff', 'acute-inflammation/acute-inflammation.arff', 'acute-nephritis/acute-nephritis.arff', 'adult/adult_train.arff', 'annealing/annealing_train.arff', 'arrhythmia/arrhythmia.arff', 'audiology-std/con_patrons_repetidos/audiology-std_train.arff', 'audiology-std/audiology-std_train.arff', 'balance-scale/balance-scale.arff', 'balloons/balloons.arff', 'bank/bank.arff', 'blood/blood.arff', 'breast-cancer/breast-cancer.arff', 'breast-cancer-wisc/breast-cancer-wisc.arff', 'breast-cancer-wisc-diag/breast-cancer-wisc-diag.arff', 'breast-cancer-wisc-prog/breast-cancer-wisc-prog.arff', 'breast-tissue/breast-tissue.arff', 'car/car.arff', 'cardiotocography-3clases/cardiotocography-3clases.arff', 'cardiotocography-10clases/cardiotocography-10clases.arff', 'chess-krvk/chess-krvk.arff', 'chess-krvkp/chess-krvkp.arff', 'congressional-voting/congressional-voting.arff', 'conn-bench-sonar-mines-rocks/conn-bench-sonar-mines-rocks.arff', 'conn-bench-vowel-deterding/conn-bench-vowel-deterding_train.arff', 'connect-4/connect-4.arff', 'contrac/contrac.arff', 'credit-approval/credit-approval.arff', 'cylinder-bands/cylinder-bands.arff', 'dermatology/dermatology.arff', 'echocardiogram/echocardiogram.arff', 'ecoli/ecoli.arff', 'energy-y1/energy-y1.arff', 'energy-y2/energy-y2.arff', 'fertility/fertility.arff', 'flags/flags.arff', 'glass/glass.arff', 'haberman-survival/haberman-survival.arff', 'hayes-roth/hayes-roth_train.arff', 'heart-cleveland/heart-cleveland.arff', 'heart-hungarian/heart-hungarian.arff', 'heart-switzerland/heart-switzerland.arff', 'heart-va/heart-va.arff', 'hepatitis/hepatitis.arff', 'hill-valley/hill-valley_train.arff', 'horse-colic/horse-colic_train.arff', 'ilpd-indian-liver/ilpd-indian-liver.arff', 'image-segmentation/image-segmentation_train.arff', 'ionosphere/ionosphere.arff', 'iris/iris.arff', 'led-display/led-display.arff', 'lenses/lenses.arff', 'letter/letter.arff', 'libras/libras.arff', 'low-res-spect/low-res-spect.arff', 'lung-cancer/lung-cancer.arff', 'lymphography/lymphography.arff', 'magic/magic.arff', 'mammographic/mammographic.arff', 'miniboone/miniboone.arff', 'molec-biol-promoter/molec-biol-promoter.arff', 'molec-biol-splice/molec-biol-splice.arff', 'monks-1/monks-1_train.arff', 'monks-2/monks-2_train.arff', 'monks-3/monks-3_train.arff', 'mushroom/mushroom.arff', 'musk-1/musk-1.arff', 'musk-2/musk-2.arff', 'nursery/nursery.arff', 'oocytes_merluccius_nucleus_4d/oocytes_merluccius_nucleus_4d.arff', 'oocytes_merluccius_states_2f/oocytes_merluccius_states_2f.arff', 'oocytes_trisopterus_nucleus_2f/oocytes_trisopterus_nucleus_2f.arff', 'oocytes_trisopterus_states_5b/oocytes_trisopterus_states_5b.arff', 'optical/optical_train.arff', 'ozone/ozone.arff', 'page-blocks/page-blocks.arff', 'parkinsons/parkinsons.arff', 'pendigits/pendigits_train.arff', 'pima/pima.arff', 'pittsburg-bridges-MATERIAL/pittsburg-bridges-MATERIAL.arff', 'pittsburg-bridges-REL-L/pittsburg-bridges-REL-L.arff', 'pittsburg-bridges-SPAN/pittsburg-bridges-SPAN.arff', 'pittsburg-bridges-T-OR-D/pittsburg-bridges-T-OR-D.arff', 'pittsburg-bridges-TYPE/pittsburg-bridges-TYPE.arff', 'planning/planning.arff', 'plant-margin/plant-margin.arff', 'plant-shape/plant-shape.arff', 'plant-texture/plant-texture.arff', 'post-operative/post-operative.arff', 'primary-tumor/primary-tumor.arff', 'ringnorm/ringnorm.arff', 'seeds/seeds.arff', 'semeion/semeion.arff', 'soybean/soybean_train.arff', 'spambase/spambase.arff', 'spect/spect_train.arff', 'spectf/spectf_train.arff', 'statlog-australian-credit/statlog-australian-credit.arff', 'statlog-german-credit/statlog-german-credit.arff', 'statlog-heart/statlog-heart.arff', 'statlog-image/statlog-image.arff', 'statlog-landsat/statlog-landsat_train.arff', 'statlog-shuttle/statlog-shuttle_train.arff', 'statlog-vehicle/statlog-vehicle.arff', 'steel-plates/steel-plates.arff', 'synthetic-control/synthetic-control.arff', 'teaching/teaching.arff', 'thyroid/thyroid_train.arff', 'tic-tac-toe/tic-tac-toe.arff', 'titanic/titanic.arff', 'trains/trains.arff', 'twonorm/twonorm.arff', 'vertebral-column-2clases/datos_orixinais/column_3C_weka.arff', 'vertebral-column-2clases/datos_orixinais/column_2C_weka.arff', 'vertebral-column-2clases/vertebral-column-2clases.arff', 'vertebral-column-3clases/vertebral-column-3clases.arff', 'wall-following/wall-following.arff', 'waveform/waveform.arff', 'waveform-noise/waveform-noise.arff', 'wine/wine.arff', 'wine-quality-red/wine-quality-red.arff', 'wine-quality-white/wine-quality-white.arff', 'yeast/yeast.arff', 'zoo/zoo.arff'] 23595 Wed Dec 17 18:22:05 2014
29 | start ecoli 23632 Wed Dec 17 18:22:13 2014
30 | initial ['ecoli/ecoli.arff', u'ecoli', 336, 7, 8] 23632 Wed Dec 17 18:22:13 2014
31 | start echocardiogram 23631 Wed Dec 17 18:22:13 2014
32 | initial ['echocardiogram/echocardiogram.arff', u'echocardiogram', 131, 10, 2] 23631 Wed Dec 17 18:22:13 2014
33 | start dermatology 23630 Wed Dec 17 18:22:13 2014
34 | initial ['dermatology/dermatology.arff', u'dermatology', 366, 34, 6] 23630 Wed Dec 17 18:22:13 2014
35 | start conn-bench-vowel-deterding 23625 Wed Dec 17 18:22:13 2014
36 | initial ['conn-bench-vowel-deterding/conn-bench-vowel-deterding_train.arff', u'conn-bench-vowel-deterding', 528, 11, 11] 23625 Wed Dec 17 18:22:13 2014
37 | start conn-bench-sonar-mines-rocks 23624 Wed Dec 17 18:22:13 2014
38 | initial ['conn-bench-sonar-mines-rocks/conn-bench-sonar-mines-rocks.arff', u'conn-bench-sonar-mines-rocks', 208, 60, 2] 23624 Wed Dec 17 18:22:13 2014
39 | start cylinder-bands 23629 Wed Dec 17 18:22:13 2014
40 | initial ['cylinder-bands/cylinder-bands.arff', u'cylinder-bands', 512, 35, 2] 23629 Wed Dec 17 18:22:13 2014
41 | start congressional-voting 23623 Wed Dec 17 18:22:13 2014
42 | initial ['congressional-voting/congressional-voting.arff', u'congressional-voting', 435, 16, 2] 23623 Wed Dec 17 18:22:13 2014
43 | start contrac 23627 Wed Dec 17 18:22:14 2014
44 | start credit-approval 23628 Wed Dec 17 18:22:14 2014
45 | initial ['credit-approval/credit-approval.arff', u'credit-approval', 690, 15, 2] 23628 Wed Dec 17 18:22:14 2014
46 | initial ['contrac/contrac.arff', u'contrac', 1473, 9, 3] 23627 Wed Dec 17 18:22:14 2014
47 | start car 23618 Wed Dec 17 18:22:14 2014
48 | initial ['car/car.arff', u'car', 1728, 6, 4] 23618 Wed Dec 17 18:22:14 2014
49 | start breast-tissue 23617 Wed Dec 17 18:22:14 2014
50 | initial ['breast-tissue/breast-tissue.arff', u'breast-tissue', 106, 9, 6] 23617 Wed Dec 17 18:22:14 2014
51 | start cardiotocography-10clases 23620 Wed Dec 17 18:22:14 2014
52 | initial ['cardiotocography-10clases/cardiotocography-10clases.arff', u'cardiotocography-10clases', 2126, 21, 10] 23620 Wed Dec 17 18:22:14 2014
53 | start breast-cancer-wisc 23614 Wed Dec 17 18:22:14 2014
54 | initial ['breast-cancer-wisc/breast-cancer-wisc.arff', u'breast-cancer-wisc', 699, 9, 2] 23614 Wed Dec 17 18:22:14 2014
55 | start chess-krvkp 23622 Wed Dec 17 18:22:14 2014
56 | start breast-cancer-wisc-prog 23616 Wed Dec 17 18:22:14 2014
57 | initial ['breast-cancer-wisc-prog/breast-cancer-wisc-prog.arff', u'breast-cancer-wisc-prog', 198, 33, 2] 23616 Wed Dec 17 18:22:14 2014
58 | initial ['chess-krvkp/chess-krvkp.arff', u'chess-krvkp', 3196, 36, 2] 23622 Wed Dec 17 18:22:14 2014
59 | start cardiotocography-3clases 23619 Wed Dec 17 18:22:14 2014
60 | start breast-cancer 23613 Wed Dec 17 18:22:14 2014
61 | initial ['cardiotocography-3clases/cardiotocography-3clases.arff', u'cardiotocography-3clases', 2126, 21, 3] 23619 Wed Dec 17 18:22:14 2014
62 | initial ['breast-cancer/breast-cancer.arff', u'breast-cancer', 286, 9, 2] 23613 Wed Dec 17 18:22:14 2014
63 | start breast-cancer-wisc-diag 23615 Wed Dec 17 18:22:14 2014
64 | initial ['breast-cancer-wisc-diag/breast-cancer-wisc-diag.arff', u'breast-cancer-wisc-diag', 569, 30, 2] 23615 Wed Dec 17 18:22:14 2014
65 | start balloons 23610 Wed Dec 17 18:22:14 2014
66 | initial ['balloons/balloons.arff', u'balloons', 16, 4, 2] 23610 Wed Dec 17 18:22:14 2014
67 | start blood 23612 Wed Dec 17 18:22:14 2014
68 | initial ['blood/blood.arff', u'blood', 748, 4, 2] 23612 Wed Dec 17 18:22:14 2014
69 | start audiology-std 23608 Wed Dec 17 18:22:14 2014
70 | initial ['audiology-std/audiology-std_train.arff', u'audiology-std', 171, 59, 18] 23608 Wed Dec 17 18:22:14 2014
71 | start acute-inflammation 23602 Wed Dec 17 18:22:14 2014
72 | initial ['acute-inflammation/acute-inflammation.arff', u'acute-inflammation', 120, 6, 2] 23602 Wed Dec 17 18:22:14 2014
73 | start annealing 23605 Wed Dec 17 18:22:14 2014
74 | initial ['annealing/annealing_train.arff', u'annealing', 798, 31, 5] 23605 Wed Dec 17 18:22:14 2014
75 | start bank 23611 Wed Dec 17 18:22:14 2014
76 | initial ['bank/bank.arff', u'bank', 4521, 16, 2] 23611 Wed Dec 17 18:22:14 2014
77 | start acute-nephritis 23603 Wed Dec 17 18:22:14 2014
78 | initial ['acute-nephritis/acute-nephritis.arff', u'acute-nephritis', 120, 6, 2] 23603 Wed Dec 17 18:22:14 2014
79 | start balance-scale 23609 Wed Dec 17 18:22:14 2014
80 | initial ['balance-scale/balance-scale.arff', u'balance-scale', 625, 4, 3] 23609 Wed Dec 17 18:22:14 2014
81 | done ['cylinder-bands/cylinder-bands.arff', u'cylinder-bands', 512, 'test-set cross-validation', 412, 100, 35, 2] 23629 Wed Dec 17 18:22:14 2014
82 | start abalone 23601 Wed Dec 17 18:22:15 2014
83 | initial ['abalone/abalone.arff', u'abalone', 4177, 8, 3] 23601 Wed Dec 17 18:22:15 2014
84 | start audiology-std 23607 Wed Dec 17 18:22:15 2014
85 | start chess-krvk 23621 Wed Dec 17 18:22:15 2014
86 | initial ['audiology-std/con_patrons_repetidos/audiology-std_train.arff', u'audiology-std', 194, 59, 18] 23607 Wed Dec 17 18:22:15 2014
87 | start arrhythmia 23606 Wed Dec 17 18:22:15 2014
88 | done ['credit-approval/credit-approval.arff', u'credit-approval', 690, 'test-set cross-validation', 590, 100, 15, 2] 23628 Wed Dec 17 18:22:15 2014
89 | initial ['chess-krvk/chess-krvk.arff', u'chess-krvk', 28056, 6, 18] 23621 Wed Dec 17 18:22:15 2014
90 | initial ['arrhythmia/arrhythmia.arff', u'arrhythmia', 452, 262, 13] 23606 Wed Dec 17 18:22:15 2014
91 | done ['breast-cancer-wisc/breast-cancer-wisc.arff', u'breast-cancer-wisc', 699, 'test-set cross-validation', 599, 100, 9, 2] 23614 Wed Dec 17 18:22:15 2014
92 | done ['blood/blood.arff', u'blood', 748, 'test-set cross-validation', 648, 100, 4, 2] 23612 Wed Dec 17 18:22:15 2014
93 | done ['breast-cancer-wisc-diag/breast-cancer-wisc-diag.arff', u'breast-cancer-wisc-diag', 569, 'test-set cross-validation', 469, 100, 30, 2] 23615 Wed Dec 17 18:22:16 2014
94 | done ['balance-scale/balance-scale.arff', u'balance-scale', 625, 'test-set cross-validation', 525, 100, 4, 3] 23609 Wed Dec 17 18:22:16 2014
95 | start adult 23604 Wed Dec 17 18:22:17 2014
96 | initial ['adult/adult_train.arff', u'adult', 32561, 14, 2] 23604 Wed Dec 17 18:22:17 2014
97 | done ['contrac/contrac.arff', u'contrac', 1473, 'test-set cross-validation', 1373, 100, 9, 3] 23627 Wed Dec 17 18:22:17 2014
98 | done ['car/car.arff', u'car', 1728, 'test-set cross-validation', 1628, 100, 6, 4] 23618 Wed Dec 17 18:22:18 2014
99 | done ['annealing/annealing_train.arff', u'annealing', 798, 'test-set cross-validation', 698, 100, 31, 5] 23605 Wed Dec 17 18:22:18 2014
100 | done ['balloons/balloons.arff', u'balloons', 16, 'leave-one-out cross-validation', 15, 16, 4, 2] 23610 Wed Dec 17 18:22:19 2014
101 | done ['conn-bench-vowel-deterding/conn-bench-vowel-deterding_train.arff', u'conn-bench-vowel-deterding', 528, 'test-set cross-validation', 428, 100, 11, 11] 23625 Wed Dec 17 18:22:19 2014
102 | done ['cardiotocography-3clases/cardiotocography-3clases.arff', u'cardiotocography-3clases', 2126, 'test-set cross-validation', 2026, 100, 21, 3] 23619 Wed Dec 17 18:22:20 2014
103 | done ['chess-krvkp/chess-krvkp.arff', u'chess-krvkp', 3196, 'test-set cross-validation', 3096, 100, 36, 2] 23622 Wed Dec 17 18:22:21 2014
104 | start energy-y1 23629 Wed Dec 17 18:22:23 2014
105 | initial ['energy-y1/energy-y1.arff', u'energy-y1', 768, 8, 3] 23629 Wed Dec 17 18:22:23 2014
106 | done ['bank/bank.arff', u'bank', 4521, 'test-set cross-validation', 4421, 100, 16, 2] 23611 Wed Dec 17 18:22:23 2014
107 | start energy-y2 23628 Wed Dec 17 18:22:23 2014
108 | initial ['energy-y2/energy-y2.arff', u'energy-y2', 768, 8, 3] 23628 Wed Dec 17 18:22:23 2014
109 | start fertility 23614 Wed Dec 17 18:22:24 2014
110 | initial ['fertility/fertility.arff', u'fertility', 100, 9, 2] 23614 Wed Dec 17 18:22:24 2014
111 | start flags 23612 Wed Dec 17 18:22:24 2014
112 | initial ['flags/flags.arff', u'flags', 194, 28, 8] 23612 Wed Dec 17 18:22:24 2014
113 | done ['energy-y1/energy-y1.arff', u'energy-y1', 768, 'test-set cross-validation', 668, 100, 8, 3] 23629 Wed Dec 17 18:22:25 2014
114 | start glass 23615 Wed Dec 17 18:22:25 2014
115 | initial ['glass/glass.arff', u'glass', 214, 9, 6] 23615 Wed Dec 17 18:22:25 2014
116 | done ['energy-y2/energy-y2.arff', u'energy-y2', 768, 'test-set cross-validation', 668, 100, 8, 3] 23628 Wed Dec 17 18:22:25 2014
117 | start haberman-survival 23609 Wed Dec 17 18:22:25 2014
118 | initial ['haberman-survival/haberman-survival.arff', u'haberman-survival', 306, 3, 2] 23609 Wed Dec 17 18:22:25 2014
119 | start hayes-roth 23627 Wed Dec 17 18:22:26 2014
120 | initial ['hayes-roth/hayes-roth_train.arff', u'hayes-roth', 132, 3, 3] 23627 Wed Dec 17 18:22:26 2014
121 | start heart-cleveland 23618 Wed Dec 17 18:22:27 2014
122 | initial ['heart-cleveland/heart-cleveland.arff', u'heart-cleveland', 303, 13, 5] 23618 Wed Dec 17 18:22:27 2014
123 | start connect-4 23626 Wed Dec 17 18:22:27 2014
124 | start heart-hungarian 23605 Wed Dec 17 18:22:27 2014
125 | initial ['heart-hungarian/heart-hungarian.arff', u'heart-hungarian', 294, 12, 2] 23605 Wed Dec 17 18:22:27 2014
126 | initial ['connect-4/connect-4.arff', u'connect-4', 67557, 42, 2] 23626 Wed Dec 17 18:22:27 2014
127 | start heart-switzerland 23610 Wed Dec 17 18:22:28 2014
128 | initial ['heart-switzerland/heart-switzerland.arff', u'heart-switzerland', 123, 12, 5] 23610 Wed Dec 17 18:22:28 2014
129 | start heart-va 23625 Wed Dec 17 18:22:28 2014
130 | initial ['heart-va/heart-va.arff', u'heart-va', 200, 12, 5] 23625 Wed Dec 17 18:22:28 2014
131 | start hepatitis 23619 Wed Dec 17 18:22:28 2014
132 | initial ['hepatitis/hepatitis.arff', u'hepatitis', 155, 19, 2] 23619 Wed Dec 17 18:22:28 2014
133 | start hill-valley 23622 Wed Dec 17 18:22:30 2014
134 | initial ['hill-valley/hill-valley_train.arff', u'hill-valley', 606, 100, 2] 23622 Wed Dec 17 18:22:30 2014
135 | done ['cardiotocography-10clases/cardiotocography-10clases.arff', u'cardiotocography-10clases', 2126, 'test-set cross-validation', 2026, 100, 21, 10] 23620 Wed Dec 17 18:22:30 2014
136 | done ['abalone/abalone.arff', u'abalone', 4177, 'test-set cross-validation', 4077, 100, 8, 3] 23601 Wed Dec 17 18:22:31 2014
137 | start horse-colic 23611 Wed Dec 17 18:22:32 2014
138 | initial ['horse-colic/horse-colic_train.arff', u'horse-colic', 300, 25, 2] 23611 Wed Dec 17 18:22:32 2014
139 | start ilpd-indian-liver 23629 Wed Dec 17 18:22:33 2014
140 | initial ['ilpd-indian-liver/ilpd-indian-liver.arff', u'ilpd-indian-liver', 583, 9, 2] 23629 Wed Dec 17 18:22:33 2014
141 | start image-segmentation 23628 Wed Dec 17 18:22:34 2014
142 | initial ['image-segmentation/image-segmentation_train.arff', u'image-segmentation', 210, 18, 7] 23628 Wed Dec 17 18:22:34 2014
143 | done ['ilpd-indian-liver/ilpd-indian-liver.arff', u'ilpd-indian-liver', 583, 'test-set cross-validation', 483, 100, 9, 2] 23629 Wed Dec 17 18:22:34 2014
144 | done ['hill-valley/hill-valley_train.arff', u'hill-valley', 606, 'test-set cross-validation', 506, 100, 100, 2] 23622 Wed Dec 17 18:22:35 2014
145 | start iris 23601 Wed Dec 17 18:22:39 2014
146 | initial ['iris/iris.arff', u'iris', 150, 4, 3] 23601 Wed Dec 17 18:22:39 2014
147 | start ionosphere 23620 Wed Dec 17 18:22:39 2014
148 | initial ['ionosphere/ionosphere.arff', u'ionosphere', 351, 33, 2] 23620 Wed Dec 17 18:22:39 2014
149 | start led-display 23629 Wed Dec 17 18:22:43 2014
150 | initial ['led-display/led-display.arff', u'led-display', 1000, 7, 10] 23629 Wed Dec 17 18:22:43 2014
151 | start lenses 23622 Wed Dec 17 18:22:43 2014
152 | initial ['lenses/lenses.arff', u'lenses', 24, 4, 3] 23622 Wed Dec 17 18:22:43 2014
153 | done ['acute-nephritis/acute-nephritis.arff', u'acute-nephritis', 120, 'leave-one-out cross-validation', 119, 100, 6, 2] 23603 Wed Dec 17 18:22:44 2014
154 | done ['acute-inflammation/acute-inflammation.arff', u'acute-inflammation', 120, 'leave-one-out cross-validation', 119, 100, 6, 2] 23602 Wed Dec 17 18:22:45 2014
155 | done ['echocardiogram/echocardiogram.arff', u'echocardiogram', 131, 'leave-one-out cross-validation', 130, 100, 10, 2] 23631 Wed Dec 17 18:22:46 2014
156 | done ['led-display/led-display.arff', u'led-display', 1000, 'test-set cross-validation', 900, 100, 7, 10] 23629 Wed Dec 17 18:22:48 2014
157 | done ['breast-cancer/breast-cancer.arff', u'breast-cancer', 286, 'leave-one-out cross-validation', 285, 100, 9, 2] 23613 Wed Dec 17 18:22:51 2014
158 | start libras 23602 Wed Dec 17 18:22:54 2014
159 | initial ['libras/libras.arff', u'libras', 360, 90, 15] 23602 Wed Dec 17 18:22:54 2014
160 | done ['lenses/lenses.arff', u'lenses', 24, 'leave-one-out cross-validation', 23, 24, 4, 3] 23622 Wed Dec 17 18:22:55 2014
161 | start letter 23603 Wed Dec 17 18:22:55 2014
162 | start low-res-spect 23631 Wed Dec 17 18:22:55 2014
163 | initial ['letter/letter.arff', u'letter', 20000, 16, 26] 23603 Wed Dec 17 18:22:55 2014
164 | initial ['low-res-spect/low-res-spect.arff', u'low-res-spect', 531, 100, 9] 23631 Wed Dec 17 18:22:55 2014
165 | done ['fertility/fertility.arff', u'fertility', 100, 'leave-one-out cross-validation', 99, 100, 9, 2] 23614 Wed Dec 17 18:22:57 2014
166 | start lung-cancer 23629 Wed Dec 17 18:22:57 2014
167 | initial ['lung-cancer/lung-cancer.arff', u'lung-cancer', 32, 56, 3] 23629 Wed Dec 17 18:22:57 2014
168 | done ['congressional-voting/congressional-voting.arff', u'congressional-voting', 435, 'leave-one-out cross-validation', 434, 100, 16, 2] 23623 Wed Dec 17 18:23:00 2014
169 | start lymphography 23613 Wed Dec 17 18:23:00 2014
170 | initial ['lymphography/lymphography.arff', u'lymphography', 148, 18, 4] 23613 Wed Dec 17 18:23:00 2014
171 | done ['haberman-survival/haberman-survival.arff', u'haberman-survival', 306, 'leave-one-out cross-validation', 305, 100, 3, 2] 23609 Wed Dec 17 18:23:03 2014
172 | start magic 23622 Wed Dec 17 18:23:05 2014
173 | initial ['magic/magic.arff', u'magic', 19020, 10, 2] 23622 Wed Dec 17 18:23:05 2014
174 | done ['hepatitis/hepatitis.arff', u'hepatitis', 155, 'leave-one-out cross-validation', 154, 100, 19, 2] 23619 Wed Dec 17 18:23:05 2014
175 | start mammographic 23614 Wed Dec 17 18:23:06 2014
176 | initial ['mammographic/mammographic.arff', u'mammographic', 961, 5, 2] 23614 Wed Dec 17 18:23:06 2014
177 | done ['breast-cancer-wisc-prog/breast-cancer-wisc-prog.arff', u'breast-cancer-wisc-prog', 198, 'leave-one-out cross-validation', 197, 100, 33, 2] 23616 Wed Dec 17 18:23:07 2014
178 | done ['mammographic/mammographic.arff', u'mammographic', 961, 'test-set cross-validation', 861, 100, 5, 2] 23614 Wed Dec 17 18:23:07 2014
179 | done ['heart-hungarian/heart-hungarian.arff', u'heart-hungarian', 294, 'leave-one-out cross-validation', 293, 100, 12, 2] 23605 Wed Dec 17 18:23:08 2014
180 | done ['lung-cancer/lung-cancer.arff', u'lung-cancer', 32, 'leave-one-out cross-validation', 31, 32, 56, 3] 23629 Wed Dec 17 18:23:13 2014
181 | done ['low-res-spect/low-res-spect.arff', u'low-res-spect', 531, 'test-set cross-validation', 431, 100, 100, 9] 23631 Wed Dec 17 18:23:14 2014
182 | start molec-biol-promoter 23609 Wed Dec 17 18:23:15 2014
183 | initial ['molec-biol-promoter/molec-biol-promoter.arff', u'molec-biol-promoter', 106, 57, 2] 23609 Wed Dec 17 18:23:15 2014
184 | done ['horse-colic/horse-colic_train.arff', u'horse-colic', 300, 'leave-one-out cross-validation', 299, 100, 25, 2] 23611 Wed Dec 17 18:23:16 2014
185 | start molec-biol-splice 23619 Wed Dec 17 18:23:18 2014
186 | initial ['molec-biol-splice/molec-biol-splice.arff', u'molec-biol-splice', 3190, 60, 3] 23619 Wed Dec 17 18:23:18 2014
187 | start monks-1 23616 Wed Dec 17 18:23:19 2014
188 | initial ['monks-1/monks-1_train.arff', u'monks-1', 124, 6, 2] 23616 Wed Dec 17 18:23:19 2014
189 | start monks-2 23614 Wed Dec 17 18:23:19 2014
190 | initial ['monks-2/monks-2_train.arff', u'monks-2', 169, 6, 2] 23614 Wed Dec 17 18:23:19 2014
191 | done ['hayes-roth/hayes-roth_train.arff', u'hayes-roth', 132, 'leave-one-out cross-validation', 131, 100, 3, 3] 23627 Wed Dec 17 18:23:19 2014
192 | start monks-3 23605 Wed Dec 17 18:23:20 2014
193 | initial ['monks-3/monks-3_train.arff', u'monks-3', 122, 6, 2] 23605 Wed Dec 17 18:23:20 2014
194 | done ['conn-bench-sonar-mines-rocks/conn-bench-sonar-mines-rocks.arff', u'conn-bench-sonar-mines-rocks', 208, 'leave-one-out cross-validation', 207, 100, 60, 2] 23624 Wed Dec 17 18:23:24 2014
195 | start musk-1 23631 Wed Dec 17 18:23:25 2014
196 | initial ['musk-1/musk-1.arff', u'musk-1', 476, 166, 2] 23631 Wed Dec 17 18:23:25 2014
197 | start mushroom 23629 Wed Dec 17 18:23:26 2014
198 | initial ['mushroom/mushroom.arff', u'mushroom', 8124, 21, 2] 23629 Wed Dec 17 18:23:26 2014
199 | done ['iris/iris.arff', u'iris', 150, 'leave-one-out cross-validation', 149, 100, 4, 3] 23601 Wed Dec 17 18:23:29 2014
200 | start nursery 23627 Wed Dec 17 18:23:33 2014
201 | initial ['nursery/nursery.arff', u'nursery', 12960, 8, 5] 23627 Wed Dec 17 18:23:33 2014
202 | start musk-2 23611 Wed Dec 17 18:23:34 2014
203 | initial ['musk-2/musk-2.arff', u'musk-2', 6598, 166, 2] 23611 Wed Dec 17 18:23:34 2014
204 | start oocytes_merluccius_nucleus_4d 23624 Wed Dec 17 18:23:37 2014
205 | initial ['oocytes_merluccius_nucleus_4d/oocytes_merluccius_nucleus_4d.arff', u'oocytes_merluccius_nucleus_4d', 1022, 41, 2] 23624 Wed Dec 17 18:23:37 2014
206 | done ['breast-tissue/breast-tissue.arff', u'breast-tissue', 106, 'leave-one-out cross-validation', 105, 100, 9, 6] 23617 Wed Dec 17 18:23:37 2014
207 | done ['molec-biol-splice/molec-biol-splice.arff', u'molec-biol-splice', 3190, 'test-set cross-validation', 3090, 100, 60, 3] 23619 Wed Dec 17 18:23:40 2014
208 | done ['oocytes_merluccius_nucleus_4d/oocytes_merluccius_nucleus_4d.arff', u'oocytes_merluccius_nucleus_4d', 1022, 'test-set cross-validation', 922, 100, 41, 2] 23624 Wed Dec 17 18:23:41 2014
209 | done ['mushroom/mushroom.arff', u'mushroom', 8124, 'test-set cross-validation', 8024, 100, 21, 2] 23629 Wed Dec 17 18:23:42 2014
210 | start miniboone 23623 Wed Dec 17 18:23:42 2014
211 | start oocytes_merluccius_states_2f 23601 Wed Dec 17 18:23:42 2014
212 | initial ['oocytes_merluccius_states_2f/oocytes_merluccius_states_2f.arff', u'oocytes_merluccius_states_2f', 1022, 25, 3] 23601 Wed Dec 17 18:23:42 2014
213 | initial ['miniboone/miniboone.arff', u'miniboone', 130064, 50, 2] 23623 Wed Dec 17 18:23:44 2014
214 | done ['ionosphere/ionosphere.arff', u'ionosphere', 351, 'leave-one-out cross-validation', 350, 100, 33, 2] 23620 Wed Dec 17 18:23:45 2014
215 | done ['heart-switzerland/heart-switzerland.arff', u'heart-switzerland', 123, 'leave-one-out cross-validation', 122, 100, 12, 5] 23610 Wed Dec 17 18:23:45 2014
216 | done ['oocytes_merluccius_states_2f/oocytes_merluccius_states_2f.arff', u'oocytes_merluccius_states_2f', 1022, 'test-set cross-validation', 922, 100, 25, 3] 23601 Wed Dec 17 18:23:48 2014
217 | start oocytes_trisopterus_nucleus_2f 23617 Wed Dec 17 18:23:50 2014
218 | initial ['oocytes_trisopterus_nucleus_2f/oocytes_trisopterus_nucleus_2f.arff', u'oocytes_trisopterus_nucleus_2f', 912, 25, 2] 23617 Wed Dec 17 18:23:50 2014
219 | done ['monks-1/monks-1_train.arff', u'monks-1', 124, 'leave-one-out cross-validation', 123, 100, 6, 2] 23616 Wed Dec 17 18:23:52 2014
220 | start oocytes_trisopterus_states_5b 23619 Wed Dec 17 18:23:53 2014
221 | initial ['oocytes_trisopterus_states_5b/oocytes_trisopterus_states_5b.arff', u'oocytes_trisopterus_states_5b', 912, 32, 3] 23619 Wed Dec 17 18:23:53 2014
222 | done ['heart-va/heart-va.arff', u'heart-va', 200, 'leave-one-out cross-validation', 199, 100, 12, 5] 23625 Wed Dec 17 18:23:53 2014
223 | done ['oocytes_trisopterus_nucleus_2f/oocytes_trisopterus_nucleus_2f.arff', u'oocytes_trisopterus_nucleus_2f', 912, 'test-set cross-validation', 812, 100, 25, 2] 23617 Wed Dec 17 18:23:53 2014
224 | done ['monks-3/monks-3_train.arff', u'monks-3', 122, 'leave-one-out cross-validation', 121, 100, 6, 2] 23605 Wed Dec 17 18:23:54 2014
225 | done ['molec-biol-promoter/molec-biol-promoter.arff', u'molec-biol-promoter', 106, 'leave-one-out cross-validation', 105, 100, 57, 2] 23609 Wed Dec 17 18:23:54 2014
226 | start optical 23624 Wed Dec 17 18:23:55 2014
227 | initial ['optical/optical_train.arff', u'optical', 3823, 62, 10] 23624 Wed Dec 17 18:23:55 2014
228 | done ['monks-2/monks-2_train.arff', u'monks-2', 169, 'leave-one-out cross-validation', 168, 100, 6, 2] 23614 Wed Dec 17 18:23:55 2014
229 | start ozone 23629 Wed Dec 17 18:23:55 2014
230 | initial ['ozone/ozone.arff', u'ozone', 2536, 72, 2] 23629 Wed Dec 17 18:23:55 2014
231 | start page-blocks 23620 Wed Dec 17 18:23:58 2014
232 | initial ['page-blocks/page-blocks.arff', u'page-blocks', 5473, 10, 5] 23620 Wed Dec 17 18:23:58 2014
233 | start parkinsons 23610 Wed Dec 17 18:23:58 2014
234 | initial ['parkinsons/parkinsons.arff', u'parkinsons', 195, 22, 2] 23610 Wed Dec 17 18:23:58 2014
235 | done ['oocytes_trisopterus_states_5b/oocytes_trisopterus_states_5b.arff', u'oocytes_trisopterus_states_5b', 912, 'test-set cross-validation', 812, 100, 32, 3] 23619 Wed Dec 17 18:23:59 2014
236 | start pendigits 23601 Wed Dec 17 18:24:02 2014
237 | initial ['pendigits/pendigits_train.arff', u'pendigits', 7494, 16, 10] 23601 Wed Dec 17 18:24:02 2014
238 | done ['ozone/ozone.arff', u'ozone', 2536, 'test-set cross-validation', 2436, 100, 72, 2] 23629 Wed Dec 17 18:24:03 2014
239 | start pima 23616 Wed Dec 17 18:24:05 2014
240 | initial ['pima/pima.arff', u'pima', 768, 8, 2] 23616 Wed Dec 17 18:24:05 2014
241 | start pittsburg-bridges-REL-L 23617 Wed Dec 17 18:24:06 2014
242 | initial ['pittsburg-bridges-REL-L/pittsburg-bridges-REL-L.arff', u'pittsburg-bridges-REL-L', 103, 7, 3] 23617 Wed Dec 17 18:24:06 2014
243 | done ['pima/pima.arff', u'pima', 768, 'test-set cross-validation', 668, 100, 8, 2] 23616 Wed Dec 17 18:24:06 2014
244 | start pittsburg-bridges-MATERIAL 23625 Wed Dec 17 18:24:06 2014
245 | initial ['pittsburg-bridges-MATERIAL/pittsburg-bridges-MATERIAL.arff', u'pittsburg-bridges-MATERIAL', 106, 7, 3] 23625 Wed Dec 17 18:24:06 2014
246 | done ['heart-cleveland/heart-cleveland.arff', u'heart-cleveland', 303, 'leave-one-out cross-validation', 302, 100, 13, 5] 23618 Wed Dec 17 18:24:06 2014
247 | done ['lymphography/lymphography.arff', u'lymphography', 148, 'leave-one-out cross-validation', 147, 100, 18, 4] 23613 Wed Dec 17 18:24:07 2014
248 | start pittsburg-bridges-SPAN 23605 Wed Dec 17 18:24:07 2014
249 | initial ['pittsburg-bridges-SPAN/pittsburg-bridges-SPAN.arff', u'pittsburg-bridges-SPAN', 92, 7, 3] 23605 Wed Dec 17 18:24:07 2014
250 | done ['glass/glass.arff', u'glass', 214, 'leave-one-out cross-validation', 213, 100, 9, 6] 23615 Wed Dec 17 18:24:07 2014
251 | start pittsburg-bridges-T-OR-D 23609 Wed Dec 17 18:24:08 2014
252 | initial ['pittsburg-bridges-T-OR-D/pittsburg-bridges-T-OR-D.arff', u'pittsburg-bridges-T-OR-D', 102, 7, 2] 23609 Wed Dec 17 18:24:08 2014
253 | start pittsburg-bridges-TYPE 23614 Wed Dec 17 18:24:08 2014
254 | initial ['pittsburg-bridges-TYPE/pittsburg-bridges-TYPE.arff', u'pittsburg-bridges-TYPE', 105, 7, 6] 23614 Wed Dec 17 18:24:08 2014
255 | start planning 23619 Wed Dec 17 18:24:12 2014
256 | initial ['planning/planning.arff', u'planning', 182, 12, 2] 23619 Wed Dec 17 18:24:12 2014
257 | done ['page-blocks/page-blocks.arff', u'page-blocks', 5473, 'test-set cross-validation', 5373, 100, 10, 5] 23620 Wed Dec 17 18:24:15 2014
258 | start plant-margin 23629 Wed Dec 17 18:24:18 2014
259 | initial ['plant-margin/plant-margin.arff', u'plant-margin', 1600, 64, 100] 23629 Wed Dec 17 18:24:18 2014
260 | done ['ecoli/ecoli.arff', u'ecoli', 336, 'leave-one-out cross-validation', 335, 100, 7, 8] 23632 Wed Dec 17 18:24:20 2014
261 | start plant-shape 23616 Wed Dec 17 18:24:21 2014
262 | initial ['plant-shape/plant-shape.arff', u'plant-shape', 1600, 64, 100] 23616 Wed Dec 17 18:24:21 2014
263 | start post-operative 23613 Wed Dec 17 18:24:21 2014
264 | initial ['post-operative/post-operative.arff', u'post-operative', 90, 8, 3] 23613 Wed Dec 17 18:24:21 2014
265 | start primary-tumor 23615 Wed Dec 17 18:24:21 2014
266 | initial ['primary-tumor/primary-tumor.arff', u'primary-tumor', 330, 17, 15] 23615 Wed Dec 17 18:24:21 2014
267 | start plant-texture 23618 Wed Dec 17 18:24:22 2014
268 | initial ['plant-texture/plant-texture.arff', u'plant-texture', 1599, 64, 100] 23618 Wed Dec 17 18:24:22 2014
269 | done ['nursery/nursery.arff', u'nursery', 12960, 'test-set cross-validation', 12860, 100, 8, 5] 23627 Wed Dec 17 18:24:23 2014
270 | start ringnorm 23620 Wed Dec 17 18:24:30 2014
271 | initial ['ringnorm/ringnorm.arff', u'ringnorm', 7400, 20, 2] 23620 Wed Dec 17 18:24:30 2014
272 | done ['dermatology/dermatology.arff', u'dermatology', 366, 'leave-one-out cross-validation', 365, 100, 34, 6] 23630 Wed Dec 17 18:24:31 2014
273 | start seeds 23632 Wed Dec 17 18:24:34 2014
274 | initial ['seeds/seeds.arff', u'seeds', 210, 7, 3] 23632 Wed Dec 17 18:24:34 2014
275 | done ['musk-2/musk-2.arff', u'musk-2', 6598, 'test-set cross-validation', 6498, 100, 166, 2] 23611 Wed Dec 17 18:24:39 2014
276 | start semeion 23627 Wed Dec 17 18:24:39 2014
277 | initial ['semeion/semeion.arff', u'semeion', 1593, 256, 10] 23627 Wed Dec 17 18:24:39 2014
278 | done ['image-segmentation/image-segmentation_train.arff', u'image-segmentation', 210, 'leave-one-out cross-validation', 209, 100, 18, 7] 23628 Wed Dec 17 18:24:40 2014
279 | done ['pittsburg-bridges-T-OR-D/pittsburg-bridges-T-OR-D.arff', u'pittsburg-bridges-T-OR-D', 102, 'leave-one-out cross-validation', 101, 100, 7, 2] 23609 Wed Dec 17 18:24:42 2014
280 | done ['parkinsons/parkinsons.arff', u'parkinsons', 195, 'leave-one-out cross-validation', 194, 100, 22, 2] 23610 Wed Dec 17 18:24:44 2014
281 | start soybean 23630 Wed Dec 17 18:24:45 2014
282 | initial ['soybean/soybean_train.arff', u'soybean', 307, 35, 18] 23630 Wed Dec 17 18:24:45 2014
283 | done ['flags/flags.arff', u'flags', 194, 'leave-one-out cross-validation', 193, 100, 28, 8] 23612 Wed Dec 17 18:24:53 2014
284 | done ['pendigits/pendigits_train.arff', u'pendigits', 7494, 'test-set cross-validation', 7394, 100, 16, 10] 23601 Wed Dec 17 18:24:53 2014
285 | done ['optical/optical_train.arff', u'optical', 3823, 'test-set cross-validation', 3723, 100, 62, 10] 23624 Wed Dec 17 18:24:54 2014
286 | done ['planning/planning.arff', u'planning', 182, 'leave-one-out cross-validation', 181, 100, 12, 2] 23619 Wed Dec 17 18:24:55 2014
287 | start spect 23628 Wed Dec 17 18:24:55 2014
288 | initial ['spect/spect_train.arff', u'spect', 79, 22, 2] 23628 Wed Dec 17 18:24:55 2014
289 | done ['pittsburg-bridges-SPAN/pittsburg-bridges-SPAN.arff', u'pittsburg-bridges-SPAN', 92, 'leave-one-out cross-validation', 91, 92, 7, 3] 23605 Wed Dec 17 18:24:55 2014
290 | start spambase 23611 Wed Dec 17 18:24:56 2014
291 | initial ['spambase/spambase.arff', u'spambase', 4601, 57, 2] 23611 Wed Dec 17 18:24:56 2014
292 | start spectf 23609 Wed Dec 17 18:24:56 2014
293 | initial ['spectf/spectf_train.arff', u'spectf', 80, 44, 2] 23609 Wed Dec 17 18:24:56 2014
294 | done ['pittsburg-bridges-MATERIAL/pittsburg-bridges-MATERIAL.arff', u'pittsburg-bridges-MATERIAL', 106, 'leave-one-out cross-validation', 105, 100, 7, 3] 23625 Wed Dec 17 18:24:57 2014
295 | done ['pittsburg-bridges-REL-L/pittsburg-bridges-REL-L.arff', u'pittsburg-bridges-REL-L', 103, 'leave-one-out cross-validation', 102, 100, 7, 3] 23617 Wed Dec 17 18:24:58 2014
296 | start statlog-australian-credit 23610 Wed Dec 17 18:24:58 2014
297 | initial ['statlog-australian-credit/statlog-australian-credit.arff', u'statlog-australian-credit', 690, 14, 2] 23610 Wed Dec 17 18:24:58 2014
298 | done ['ringnorm/ringnorm.arff', u'ringnorm', 7400, 'test-set cross-validation', 7300, 100, 20, 2] 23620 Wed Dec 17 18:24:59 2014
299 | done ['statlog-australian-credit/statlog-australian-credit.arff', u'statlog-australian-credit', 690, 'test-set cross-validation', 590, 100, 14, 2] 23610 Wed Dec 17 18:25:00 2014
300 | start statlog-german-credit 23612 Wed Dec 17 18:25:08 2014
301 | initial ['statlog-german-credit/statlog-german-credit.arff', u'statlog-german-credit', 1000, 24, 2] 23612 Wed Dec 17 18:25:08 2014
302 | start statlog-heart 23601 Wed Dec 17 18:25:08 2014
303 | initial ['statlog-heart/statlog-heart.arff', u'statlog-heart', 270, 13, 2] 23601 Wed Dec 17 18:25:08 2014
304 | done ['post-operative/post-operative.arff', u'post-operative', 90, 'leave-one-out cross-validation', 89, 90, 8, 3] 23613 Wed Dec 17 18:25:08 2014
305 | start statlog-image 23624 Wed Dec 17 18:25:09 2014
306 | initial ['statlog-image/statlog-image.arff', u'statlog-image', 2310, 18, 7] 23624 Wed Dec 17 18:25:09 2014
307 | done ['statlog-german-credit/statlog-german-credit.arff', u'statlog-german-credit', 1000, 'test-set cross-validation', 900, 100, 24, 2] 23612 Wed Dec 17 18:25:10 2014
308 | start statlog-landsat 23619 Wed Dec 17 18:25:10 2014
309 | initial ['statlog-landsat/statlog-landsat_train.arff', u'statlog-landsat', 4435, 36, 6] 23619 Wed Dec 17 18:25:10 2014
310 | start statlog-shuttle 23605 Wed Dec 17 18:25:12 2014
311 | start statlog-vehicle 23625 Wed Dec 17 18:25:12 2014
312 | initial ['statlog-vehicle/statlog-vehicle.arff', u'statlog-vehicle', 846, 18, 4] 23625 Wed Dec 17 18:25:12 2014
313 | initial ['statlog-shuttle/statlog-shuttle_train.arff', u'statlog-shuttle', 43500, 9, 7] 23605 Wed Dec 17 18:25:12 2014
314 | start steel-plates 23617 Wed Dec 17 18:25:13 2014
315 | initial ['steel-plates/steel-plates.arff', u'steel-plates', 1941, 27, 7] 23617 Wed Dec 17 18:25:13 2014
316 | done ['spambase/spambase.arff', u'spambase', 4601, 'test-set cross-validation', 4501, 100, 57, 2] 23611 Wed Dec 17 18:25:14 2014
317 | start synthetic-control 23620 Wed Dec 17 18:25:14 2014
318 | initial ['synthetic-control/synthetic-control.arff', u'synthetic-control', 600, 60, 6] 23620 Wed Dec 17 18:25:14 2014
319 | start teaching 23610 Wed Dec 17 18:25:14 2014
320 | initial ['teaching/teaching.arff', u'teaching', 151, 5, 3] 23610 Wed Dec 17 18:25:14 2014
321 | done ['statlog-vehicle/statlog-vehicle.arff', u'statlog-vehicle', 846, 'test-set cross-validation', 746, 100, 18, 4] 23625 Wed Dec 17 18:25:16 2014
322 | done ['spect/spect_train.arff', u'spect', 79, 'leave-one-out cross-validation', 78, 79, 22, 2] 23628 Wed Dec 17 18:25:19 2014
323 | done ['synthetic-control/synthetic-control.arff', u'synthetic-control', 600, 'test-set cross-validation', 500, 100, 60, 6] 23620 Wed Dec 17 18:25:23 2014
324 | done ['statlog-image/statlog-image.arff', u'statlog-image', 2310, 'test-set cross-validation', 2210, 100, 18, 7] 23624 Wed Dec 17 18:25:23 2014
325 | start thyroid 23613 Wed Dec 17 18:25:23 2014
326 | initial ['thyroid/thyroid_train.arff', u'thyroid', 3772, 21, 3] 23613 Wed Dec 17 18:25:23 2014
327 | start tic-tac-toe 23612 Wed Dec 17 18:25:25 2014
328 | initial ['tic-tac-toe/tic-tac-toe.arff', u'tic-tac-toe', 958, 9, 2] 23612 Wed Dec 17 18:25:25 2014
329 | done ['spectf/spectf_train.arff', u'spectf', 80, 'leave-one-out cross-validation', 79, 80, 44, 2] 23609 Wed Dec 17 18:25:26 2014
330 | done ['tic-tac-toe/tic-tac-toe.arff', u'tic-tac-toe', 958, 'test-set cross-validation', 858, 100, 9, 2] 23612 Wed Dec 17 18:25:26 2014
331 | done ['pittsburg-bridges-TYPE/pittsburg-bridges-TYPE.arff', u'pittsburg-bridges-TYPE', 105, 'leave-one-out cross-validation', 104, 100, 7, 6] 23614 Wed Dec 17 18:25:26 2014
332 | start titanic 23611 Wed Dec 17 18:25:29 2014
333 | initial ['titanic/titanic.arff', u'titanic', 2201, 3, 2] 23611 Wed Dec 17 18:25:29 2014
334 | start trains 23625 Wed Dec 17 18:25:30 2014
335 | initial ['trains/trains.arff', u'trains', 10, 29, 2] 23625 Wed Dec 17 18:25:30 2014
336 | skip (could not split) trains/trains.arff 23625 Wed Dec 17 18:25:30 2014
337 | done ['thyroid/thyroid_train.arff', u'thyroid', 3772, 'test-set cross-validation', 3672, 100, 21, 3] 23613 Wed Dec 17 18:25:32 2014
338 | done ['steel-plates/steel-plates.arff', u'steel-plates', 1941, 'test-set cross-validation', 1841, 100, 27, 7] 23617 Wed Dec 17 18:25:32 2014
339 | done ['titanic/titanic.arff', u'titanic', 2201, 'test-set cross-validation', 2101, 100, 3, 2] 23611 Wed Dec 17 18:25:32 2014
340 | start twonorm 23628 Wed Dec 17 18:25:35 2014
341 | initial ['twonorm/twonorm.arff', u'twonorm', 7400, 20, 2] 23628 Wed Dec 17 18:25:35 2014
342 | done ['seeds/seeds.arff', u'seeds', 210, 'leave-one-out cross-validation', 209, 100, 7, 3] 23632 Wed Dec 17 18:25:36 2014
343 | start column_3C_weka 23620 Wed Dec 17 18:25:37 2014
344 | initial ['vertebral-column-2clases/datos_orixinais/column_3C_weka.arff', u'column_3C_weka', 310, 6, 3] 23620 Wed Dec 17 18:25:37 2014
345 | start column_2C_weka 23624 Wed Dec 17 18:25:38 2014
346 | initial ['vertebral-column-2clases/datos_orixinais/column_2C_weka.arff', u'column_2C_weka', 310, 6, 2] 23624 Wed Dec 17 18:25:38 2014
347 | done ['statlog-landsat/statlog-landsat_train.arff', u'statlog-landsat', 4435, 'test-set cross-validation', 4335, 100, 36, 6] 23619 Wed Dec 17 18:25:41 2014
348 | start vertebral-column-2clases 23609 Wed Dec 17 18:25:41 2014
349 | initial ['vertebral-column-2clases/vertebral-column-2clases.arff', u'vertebral-column-2clases', 310, 6, 2] 23609 Wed Dec 17 18:25:41 2014
350 | start vertebral-column-3clases 23612 Wed Dec 17 18:25:41 2014
351 | initial ['vertebral-column-3clases/vertebral-column-3clases.arff', u'vertebral-column-3clases', 310, 6, 3] 23612 Wed Dec 17 18:25:41 2014
352 | start wall-following 23614 Wed Dec 17 18:25:42 2014
353 | initial ['wall-following/wall-following.arff', u'wall-following', 5456, 24, 4] 23614 Wed Dec 17 18:25:42 2014
354 | done ['statlog-heart/statlog-heart.arff', u'statlog-heart', 270, 'leave-one-out cross-validation', 269, 100, 13, 2] 23601 Wed Dec 17 18:25:46 2014
355 | start waveform 23625 Wed Dec 17 18:25:46 2014
356 | initial ['waveform/waveform.arff', u'waveform', 5000, 21, 3] 23625 Wed Dec 17 18:25:46 2014
357 | start waveform-noise 23613 Wed Dec 17 18:25:47 2014
358 | initial ['waveform-noise/waveform-noise.arff', u'waveform-noise', 5000, 40, 3] 23613 Wed Dec 17 18:25:47 2014
359 | start wine 23617 Wed Dec 17 18:25:48 2014
360 | initial ['wine/wine.arff', u'wine', 178, 13, 3] 23617 Wed Dec 17 18:25:48 2014
361 | start wine-quality-red 23611 Wed Dec 17 18:25:48 2014
362 | initial ['wine-quality-red/wine-quality-red.arff', u'wine-quality-red', 1599, 11, 6] 23611 Wed Dec 17 18:25:48 2014
363 | start wine-quality-white 23632 Wed Dec 17 18:25:51 2014
364 | initial ['wine-quality-white/wine-quality-white.arff', u'wine-quality-white', 4898, 11, 7] 23632 Wed Dec 17 18:25:51 2014
365 | done ['twonorm/twonorm.arff', u'twonorm', 7400, 'test-set cross-validation', 7300, 100, 20, 2] 23628 Wed Dec 17 18:25:52 2014
366 | done ['wine-quality-red/wine-quality-red.arff', u'wine-quality-red', 1599, 'test-set cross-validation', 1499, 100, 11, 6] 23611 Wed Dec 17 18:25:56 2014
367 | start yeast 23619 Wed Dec 17 18:25:57 2014
368 | initial ['yeast/yeast.arff', u'yeast', 1484, 8, 10] 23619 Wed Dec 17 18:25:57 2014
369 | done ['semeion/semeion.arff', u'semeion', 1593, 'test-set cross-validation', 1493, 100, 256, 10] 23627 Wed Dec 17 18:26:00 2014
370 | start zoo 23601 Wed Dec 17 18:26:01 2014
371 | initial ['zoo/zoo.arff', u'zoo', 101, 16, 7] 23601 Wed Dec 17 18:26:01 2014
372 | done ['yeast/yeast.arff', u'yeast', 1484, 'test-set cross-validation', 1384, 100, 8, 10] 23619 Wed Dec 17 18:26:05 2014
373 | done ['teaching/teaching.arff', u'teaching', 151, 'leave-one-out cross-validation', 150, 100, 5, 3] 23610 Wed Dec 17 18:26:06 2014
374 | done ['waveform/waveform.arff', u'waveform', 5000, 'test-set cross-validation', 4900, 100, 21, 3] 23625 Wed Dec 17 18:26:06 2014
375 | done ['vertebral-column-2clases/datos_orixinais/column_2C_weka.arff', u'column_2C_weka', 310, 'leave-one-out cross-validation', 309, 100, 6, 2] 23624 Wed Dec 17 18:26:14 2014
376 | done ['waveform-noise/waveform-noise.arff', u'waveform-noise', 5000, 'test-set cross-validation', 4900, 100, 40, 3] 23613 Wed Dec 17 18:26:17 2014
377 | done ['vertebral-column-2clases/vertebral-column-2clases.arff', u'vertebral-column-2clases', 310, 'leave-one-out cross-validation', 309, 100, 6, 2] 23609 Wed Dec 17 18:26:18 2014
378 | done ['magic/magic.arff', u'magic', 19020, 'test-set cross-validation', 18920, 100, 10, 2] 23622 Wed Dec 17 18:26:20 2014
379 | done ['wall-following/wall-following.arff', u'wall-following', 5456, 'test-set cross-validation', 5356, 100, 24, 4] 23614 Wed Dec 17 18:26:21 2014
380 | done ['wine-quality-white/wine-quality-white.arff', u'wine-quality-white', 4898, 'test-set cross-validation', 4798, 100, 11, 7] 23632 Wed Dec 17 18:26:25 2014
381 | done ['wine/wine.arff', u'wine', 178, 'leave-one-out cross-validation', 177, 100, 13, 3] 23617 Wed Dec 17 18:26:34 2014
382 | done ['musk-1/musk-1.arff', u'musk-1', 476, 'leave-one-out cross-validation', 475, 100, 166, 2] 23631 Wed Dec 17 18:26:38 2014
383 | done ['audiology-std/audiology-std_train.arff', u'audiology-std', 171, 'leave-one-out cross-validation', 170, 100, 59, 18] 23608 Wed Dec 17 18:26:39 2014
384 | done ['vertebral-column-2clases/datos_orixinais/column_3C_weka.arff', u'column_3C_weka', 310, 'leave-one-out cross-validation', 309, 100, 6, 3] 23620 Wed Dec 17 18:26:40 2014
385 | done ['vertebral-column-3clases/vertebral-column-3clases.arff', u'vertebral-column-3clases', 310, 'leave-one-out cross-validation', 309, 100, 6, 3] 23612 Wed Dec 17 18:26:42 2014
386 | done ['audiology-std/con_patrons_repetidos/audiology-std_train.arff', u'audiology-std', 194, 'leave-one-out cross-validation', 193, 100, 59, 18] 23607 Wed Dec 17 18:26:50 2014
387 | done ['zoo/zoo.arff', u'zoo', 101, 'leave-one-out cross-validation', 100, 100, 16, 7] 23601 Wed Dec 17 18:26:54 2014
388 | done ['plant-texture/plant-texture.arff', u'plant-texture', 1599, 'test-set cross-validation', 1499, 100, 64, 100] 23618 Wed Dec 17 18:27:17 2014
389 | done ['plant-margin/plant-margin.arff', u'plant-margin', 1600, 'test-set cross-validation', 1500, 100, 64, 100] 23629 Wed Dec 17 18:27:28 2014
390 | done ['primary-tumor/primary-tumor.arff', u'primary-tumor', 330, 'leave-one-out cross-validation', 329, 100, 17, 15] 23615 Wed Dec 17 18:27:40 2014
391 | done ['soybean/soybean_train.arff', u'soybean', 307, 'leave-one-out cross-validation', 306, 100, 35, 18] 23630 Wed Dec 17 18:28:22 2014
392 | done ['statlog-shuttle/statlog-shuttle_train.arff', u'statlog-shuttle', 43500, 'test-set cross-validation', 43400, 100, 9, 7] 23605 Wed Dec 17 18:29:00 2014
393 | done ['adult/adult_train.arff', u'adult', 32561, 'test-set cross-validation', 32461, 100, 14, 2] 23604 Wed Dec 17 18:29:25 2014
394 | done ['plant-shape/plant-shape.arff', u'plant-shape', 1600, 'test-set cross-validation', 1500, 100, 64, 100] 23616 Wed Dec 17 18:29:30 2014
395 | done ['letter/letter.arff', u'letter', 20000, 'test-set cross-validation', 19900, 100, 16, 26] 23603 Wed Dec 17 18:29:36 2014
396 | done ['chess-krvk/chess-krvk.arff', u'chess-krvk', 28056, 'test-set cross-validation', 27956, 100, 6, 18] 23621 Wed Dec 17 18:33:45 2014
397 | done ['libras/libras.arff', u'libras', 360, 'leave-one-out cross-validation', 359, 100, 90, 15] 23602 Wed Dec 17 18:34:41 2014
398 | done ['arrhythmia/arrhythmia.arff', u'arrhythmia', 452, 'leave-one-out cross-validation', 451, 100, 262, 13] 23606 Wed Dec 17 18:39:41 2014
399 | done ['connect-4/connect-4.arff', u'connect-4', 67557, 'test-set cross-validation', 67457, 100, 42, 2] 23626 Wed Dec 17 19:28:23 2014
400 | done ['miniboone/miniboone.arff', u'miniboone', 130064, 'test-set cross-validation', 129964, 100, 50, 2] 23623 Wed Dec 17 21:56:09 2014
401 | done processing pool 23595 Wed Dec 17 21:56:09 2014
402 | done 23595 Wed Dec 17 21:56:28 2014
403 |
--------------------------------------------------------------------------------