├── .gitignore ├── ExploreModels ├── modeltables.RData ├── server.R ├── shinyFunctions.R └── ui.R ├── LICENSE ├── PrepareSummaries └── prepForShiny.R ├── README.md ├── ScoreModels ├── processArffs.py └── runOnEC2.bash ├── ScoreModelsR └── .gitignore └── data ├── modelres.csv.gz ├── problemDescr.csv └── processArffs_log.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *~ 3 | *.pem 4 | *.pem.txt 5 | *.dcf 6 | 7 | -------------------------------------------------------------------------------- /ExploreModels/modeltables.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/ExploreModels/49e68c6101739d06eb3a3ab5b6a6fad337ca9bf0/ExploreModels/modeltables.RData -------------------------------------------------------------------------------- /ExploreModels/server.R: -------------------------------------------------------------------------------- 1 | #AccuracyCounts 2 | 3 | # 4 | # from directory above this one: 5 | # library(shiny) 6 | # runApp("ExploreModels") 7 | # 8 | options(rgl.useNULL=TRUE) 9 | library(shiny) 10 | source("shinyFunctions.R") 11 | 12 | 13 | shinyServer(function(input, output) { 14 | 15 | dataSets = reactive ( 16 | getDataSets(input$minrows, input$maxrows, input$nclasses, input$ratio) 17 | ) 18 | 19 | accTable = reactive ( 20 | getAccuracyCounts(dataSets()$sets) 21 | ) 22 | 23 | avgAccTable = reactive ( 24 | calculateAccuracy(dataSets()$sets) 25 | ) 26 | 27 | dmatrix = reactive ( 28 | getDistanceMatrix(dataSets()$sets) 29 | ) 30 | 31 | # Average Accuracy tab 32 | output$plotlabelAA = renderText(paste("Average model accuracy over sets with", 33 | input$minrows, "to", input$maxrows, "rows")) 34 | 35 | output$accPlot = renderPlot( { 36 | title = paste(dataSets()$nsets, "sets in total") 37 | plotAccuracy(avgAccTable(), title) 38 | }) 39 | 40 | # Accuracy counts tab 41 | 42 | output$plotlabelAC = renderText(paste("How many times is each model maximally accurate for data sets of size", 43 | input$minrows, "to", input$maxrows, "rows?")) 44 | 45 | output$countPlot <- renderPlot({ 46 | title = paste(dataSets()$nsets, "sets in total") 47 | plotAccCounts(accTable(), title) 48 | }) 49 | 50 | # Dendrogram 51 | 52 | # reuse this for 3d plots, too 53 | output$plotlabelDend = renderText(paste("Distances between models, over data sets of size", 54 | input$minrows, "to", input$maxrows, "rows")) 55 | 56 | output$plotDend = renderPlot( { 57 | title = paste("Model groupings:", dataSets()$nsets, "sets in total") 58 | plotModelDends(dmatrix(), title) 59 | }) 60 | 61 | # 3d plots 62 | 63 | output$plotDists <- renderWebGL({ 64 | plotModelDists(dmatrix()) 65 | }) 66 | 67 | }) 68 | -------------------------------------------------------------------------------- /ExploreModels/shinyFunctions.R: -------------------------------------------------------------------------------- 1 | #AccuracyCounts 2 | 3 | options(gsubfn.engine="R") 4 | library(sqldf) 5 | library(ggplot2) 6 | library(rgl) 7 | 8 | load("modeltables.RData") 9 | 10 | # More globals 11 | MAXROWS = max(PROBLEM_DESC$nRows) 12 | MAXN = max(PROBLEM_DESC$nTargets) 13 | DS_RATIO = with(PROBLEM_DESC, nVars/nRows) 14 | 15 | 16 | calculateAccuracy = function(problemfilelist, modellist = ALL_BUT_GROUND, accmat=ACCURACY_MATRIX) { 17 | amat = subset(accmat, Problem_File_Name %in% problemfilelist & Model_Name %in% modellist) 18 | sqldf("select Model_Name, avg(isRight) accuracy 19 | from amat 20 | group by Model_Name 21 | order by Model_Name") 22 | } 23 | 24 | 25 | plotAccuracy = function(acc_table, title = "Model accuracies" ) { 26 | best = acc_table$accuracy >= max(acc_table$accuracy) 27 | acc_table$isbest='no' 28 | acc_table$isbest[best]='yes' 29 | acc_table$accstring = format(acc_table$accuracy, digits=3) 30 | 31 | ggplot(acc_table, aes(x=Model_Name, y=accuracy, fill=isbest)) + 32 | geom_bar(stat="identity") + 33 | scale_fill_manual(values=c("no"="darkgray", "yes"="darkblue")) + 34 | geom_text(aes(y=-0.05, label=accstring)) + 35 | coord_flip() + guides(fill=FALSE) + ggtitle(title) 36 | } 37 | 38 | 39 | # test range - 10 000 - 100 000 40 | getAccuracyCounts = function(problemfilelist, modellist=ALL_BUT_GROUND, accmat=ACCURACY_MATRIX) { 41 | amat = subset(accmat, Problem_File_Name %in% problemfilelist & Model_Name %in% modellist) 42 | 43 | by_problem = sqldf("select Problem_File_Name, 44 | Model_Name, 45 | avg(isRight) accuracy 46 | from amat 47 | group by Problem_File_Name, Model_Name") 48 | arg_max = sqldf("select Problem_File_Name, 49 | max(accuracy) maxacc 50 | from by_problem 51 | group by Problem_File_Name") 52 | 53 | most_acc = sqldf("select bp.Model_Name Model_Name, 54 | count(bp.Problem_File_Name) nwins 55 | from by_problem bp, 56 | arg_max am 57 | where bp.Problem_File_Name = am.Problem_File_Name 58 | and bp.accuracy = am.maxacc 59 | group by bp.Model_Name 60 | order by bp.Model_Name") 61 | included_models = most_acc$Model_Name 62 | sdiff = setdiff(modellist, included_models) 63 | if(length(sdiff) > 0) { 64 | empty = data.frame(Model_Name=sdiff, nwins=0) 65 | most_acc = rbind(most_acc, empty) 66 | } 67 | 68 | most_acc 69 | } 70 | 71 | plotAccCounts = function(most_acc_table, title='Accuracy Counts') { 72 | best = most_acc_table$nwins >= max(most_acc_table$nwins) 73 | most_acc_table$isbest='no' 74 | most_acc_table$isbest[best]='yes' 75 | 76 | ggplot(most_acc_table, aes(x=Model_Name, y=nwins, fill=isbest)) + 77 | geom_bar(stat="identity") + 78 | scale_fill_manual(values=c("no"="darkgray", "yes"="darkblue")) + 79 | geom_text(aes(y=-1, label=nwins)) + 80 | coord_flip() + guides(fill=FALSE) + ggtitle(title) 81 | } 82 | 83 | 84 | getDistanceMatrix = function(problemfilelist, modellist=ALLMODELS, distmat=DISTANCE_TABLE) { 85 | dmat = subset(distmat, Problem_File_Name %in% problemfilelist & 86 | modelName1 %in% modellist & 87 | modelName2 %in% modellist) 88 | distSqs = sqldf("select modelName1, 89 | modelName2, 90 | sum(sqDist) sqDist 91 | from dmat 92 | group by modelName1, modelName2") 93 | as.matrix(xtabs(sqrt(sqDist) ~ modelName1+modelName2, data=distSqs)) 94 | } 95 | 96 | distFromTruth = function(problemfilelist, modellist=ALL_BUT_GROUND, distmat=DISTANCE_TABLE) { 97 | dmat = subset(distmat, Problem_File_Name %in% problemfilelist & 98 | modelName1 == 'ground truth' & 99 | modelName2 %in% modellist) 100 | distSqs = sqldf("select modelName1 ground, 101 | modelName2 modelName, 102 | sum(sqDist) sqDist 103 | from dmat 104 | group by modelName1, modelName2 105 | order by modelName2") 106 | distSqs[, -1] # don't need to return the column that just says ground truth 107 | } 108 | 109 | 110 | plotDistFromTruth = function(dist_table, title = "Model distance from ground truth" ) { 111 | best = which.min(dist_table$sqDist) 112 | dist_table$isbest='no' 113 | dist_table$isbest[best]='yes' 114 | dist_table$dist = sqrt(dist_table$sqDist) 115 | dist_table$diststr = format(dist_table$dist, digits=3) 116 | 117 | ggplot(dist_table, aes(x=modelName, y=dist, fill=isbest)) + 118 | geom_bar(stat="identity") + 119 | scale_fill_manual(values=c("no"="darkgray", "yes"="darkblue")) + 120 | geom_text(aes(y=-1.5, label=diststr)) + 121 | coord_flip() + guides(fill=FALSE) + ggtitle(title) 122 | } 123 | 124 | plotModelDists = function(dmatrix) { 125 | plt3 = cmdscale(dmatrix, k=3) 126 | 127 | # set ground truth to the origin 128 | origin = plt3['ground truth', ] # a row 129 | plt3 = t(apply(plt3, 1, FUN=function(row) {row-origin})) 130 | if(sum(colSums(plt3)) < 0) plt3=-plt3 # reflect it so that origin is bottom left 131 | 132 | labels = LABELMAP[rownames(plt3)] 133 | 134 | rownames(plt3) = labels 135 | 136 | plot3d(plt3, size=5, col="blue", xlab="X", ylab="Y", zlab="Z") 137 | originrow = which(rownames(plt3)=='ground truth') 138 | indices = 1:dim(plt3)[1] 139 | for(irow in indices[-originrow]) { 140 | plot3d(plt3[c(irow, originrow),], add=TRUE, type="l", col="gray") 141 | } 142 | text3d(plt3, texts=rownames(plt3), adj=c(0,0)) 143 | 144 | } 145 | 146 | plotModelDends = function(dmatrix, title="Model Groupings") { 147 | tree = hclust(as.dist(dmatrix)) 148 | plot(tree, main=title, axes=FALSE, sub='', xlab='', ylab='') 149 | } 150 | 151 | # nclasslist = subset of c("2", "3:5", "6:10", "11:MAXN") 152 | # ratiolist = subset of c("c(0, 0.01)", "c(0.01, 0.03)", "c(0.03, 0.1)", "c(0.1, Inf)") 153 | getDataSets = function(minrows, maxrows, nclasslist, ratiolist) { 154 | filterRows = with(PROBLEM_DESC, minrows <= nRows & nRows <= maxrows) 155 | 156 | # I feel like there ought to be an "apply" way to do this 157 | classnumlist = NULL 158 | for(str in nclasslist) { 159 | classnumlist = c(classnumlist, eval(parse(text=str))) 160 | } 161 | filterN = with(PROBLEM_DESC, nTargets %in% classnumlist) 162 | 163 | filterR = logical(dim(PROBLEM_DESC)[1]) 164 | 165 | for(str in ratiolist) { 166 | interval = eval(parse(text=str)) 167 | filterR = filterR | (interval[1] < DS_RATIO & DS_RATIO <= interval[2]) 168 | } 169 | 170 | filter = filterRows & filterN & filterR 171 | 172 | list(sets=PROBLEM_DESC$Problem_File_Name[filter], nsets=sum(filter)) 173 | } 174 | 175 | 176 | #------------------------- 177 | # print("global results") 178 | # 179 | # 180 | # print("model accuracy") 181 | # print(calculateAccuracy(ALLFILES)) 182 | # 183 | # 184 | # print("model distances from ground truth") 185 | # print(distFromTruth(ALLFILES)) 186 | # 187 | # 188 | # print("how many times did each model achieve best score?") 189 | # print(getAccuracyCounts(ALLFILES)) 190 | # 191 | # print("model distances") 192 | # plotModelDists(getDistanceMatrix(ALLFILES)) 193 | # plotModelDends(getDistanceMatrix(ALLFILES), "Model grouping, all sets") 194 | # 195 | -------------------------------------------------------------------------------- /ExploreModels/ui.R: -------------------------------------------------------------------------------- 1 | # AccuracyCounts 2 | options(rgl.useNULL=TRUE) 3 | library(shiny) 4 | library(shinyRGL) 5 | source("shinyFunctions.R") 6 | 7 | urlNote = 'See The Win-Vector blog for details.' 8 | 9 | 10 | # Define UI for application that draws a histogram 11 | shinyUI(fluidPage( 12 | 13 | # Application title 14 | titlePanel("Explore Model Behavior"), 15 | 16 | # Sidebar for number of class outputs : 17 | # interval around it; right now set to +/- 10% 18 | sidebarLayout( 19 | sidebarPanel( 20 | numericInput("minrows", 21 | "Minimum data set size:", 22 | 0, # initial value 23 | min = 0, 24 | max = MAXROWS, 25 | step = 100), 26 | 27 | numericInput("maxrows", 28 | "Maximum data set size:", 29 | MAXROWS, # initial value 30 | min = 0, 31 | max = MAXROWS, 32 | step = 100), 33 | 34 | hr(), 35 | 36 | checkboxGroupInput("nclasses", "Number of target classes (check all that apply)", 37 | c("Two-Class" = "2", 38 | "3-5 classes" = "3:5", 39 | "6-10 classes" = "6:10", 40 | "more than 10 classes" = "11:MAXN"), 41 | selected = c("2", "3:5", "6:10", "11:MAXN")), # all of them 42 | 43 | hr(), 44 | 45 | checkboxGroupInput("ratio", "Data set shape (variables vs. rows)", # intervals ( ..] 46 | c("Narrow (Less than 1 variable per 100 rows)" = "c(0, 0.01)", 47 | "Moderate (1 to 3 variables per 100 rows)" = "c(0.01, 0.03)", 48 | "Wide (3 to 10 variables per 100 rows)" = "c(0.03, 0.1)", 49 | "Very wide (More than 10 variables per 100 rows" = "c(0.1, Inf)"), 50 | selected = c("c(0, 0.01)", "c(0.01, 0.03)", "c(0.03, 0.1)", "c(0.1, Inf)")) # all of them 51 | ), 52 | 53 | # TODO: tabs for Count Accuracy, Model Distance Dendrogram, Model Distance 3D Plot 54 | mainPanel ( 55 | tabsetPanel ( 56 | tabPanel("Average Accuracy", list(textOutput("plotlabelAA"),HTML(urlNote)), plotOutput("accPlot")), 57 | tabPanel("Accuracy Counts", list(textOutput("plotlabelAC"),HTML(urlNote)), plotOutput("countPlot")), 58 | tabPanel("Model Dendrogram", list(textOutput("plotlabelDend"),HTML(urlNote)), plotOutput("plotDend")), 59 | tabPanel("Model Distances (3d)", list(textOutput("plotlabelDent"),HTML(urlNote)), webGLOutput("plotDists")) 60 | ) 61 | ) 62 | 63 | ) # end sidebarLayout 64 | ) 65 | ) 66 | 67 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | 676 | -------------------------------------------------------------------------------- /PrepareSummaries/prepForShiny.R: -------------------------------------------------------------------------------- 1 | # setwd("~/Documents/Projects/scratch.git/shapeOfML") 2 | 3 | options(gsubfn.engine="R") 4 | library(sqldf) 5 | 6 | 7 | # 8 | # Globals 9 | # 10 | PROBLEM_DESC = read.table('data/problemDescr.csv', header=TRUE, sep=',', stringsAsFactors=TRUE) 11 | colnames(PROBLEM_DESC) = gsub('\\.', '_', colnames(PROBLEM_DESC) ) 12 | 13 | MODEL_RESULTS = read.table('data/modelres.csv.gz', header=TRUE, sep=',', stringsAsFactors=TRUE) 14 | colnames(MODEL_RESULTS) = gsub('\\.', '_', colnames(MODEL_RESULTS) ) 15 | # remove the RBM 16 | # isRBM = grepl("(RBM)", MODEL_RESULTS$Model_Name) 17 | # MODEL_RESULTS = subset(MODEL_RESULTS, !isRBM) 18 | # MODEL_RESULTS$Model_Name = factor(MODEL_RESULTS$Model_Name) 19 | 20 | 21 | ALLMODELS = sort(unique(MODEL_RESULTS$Model_Name)) 22 | 23 | ALL_BUT_GROUND = setdiff(ALLMODELS, c('ground truth')) 24 | 25 | LABELMAP = c("DT", "GB", "ground truth", "KNN5", "LR", "NB", "RF", "SVM") 26 | names(LABELMAP) = ALLMODELS 27 | 28 | ALLFILES = MODEL_RESULTS$Problem_File_Name 29 | 30 | 31 | # 32 | # Functions 33 | # 34 | 35 | # 36 | # Just calculate the accuracy matrix once; we can use subsets of it later 37 | # [Problem_File_Name, Model_Name, Row_Number, PredictionIndex, Correct_Answer, isRight] 38 | # 39 | calculateAccuracyMatrix = function(modelres=MODEL_RESULTS) { 40 | # first, get the maximum probabilities --- change this to use ArgMaxIndicator column 41 | 42 | maxProbs = sqldf("select * from modelres 43 | where ArgMaxIndicator=1 44 | group by Problem_File_Name, Model_Name, Row_Number 45 | order by Problem_File_Name, Model_Name, Row_Number") 46 | maxProbs$isRight = as.numeric(with(maxProbs,PredictionIndex==Correct_Answer)) 47 | maxProbs 48 | } 49 | 50 | ACCURACY_MATRIX = calculateAccuracyMatrix() 51 | 52 | # 53 | # calculate the matrix [Problem_File_Name, modelName1, modelName2, sqDist] 54 | # 55 | # NOTE: if I do a similar calculation with deviance, use SmoothedProbIndex column 56 | # 57 | calculateDistanceTable = function(modelres=MODEL_RESULTS) { 58 | distSqs = sqldf("select 59 | m1.Problem_File_Name Problem_File_Name, 60 | m1.Model_Name modelName1, 61 | m2.Model_Name modelName2, 62 | sum(square(m1.ProbIndex-m2.ProbIndex)) sqDist 63 | from 64 | modelres m1, 65 | modelres m2 66 | where 67 | m1.Problem_File_Name = m2.Problem_File_Name 68 | and m1.Row_Number = m2.Row_Number 69 | and m1.PredictionIndex = m2.PredictionIndex 70 | group by 71 | m1.Problem_File_Name, 72 | m1.Model_Name, 73 | m2.Model_Name 74 | order by m1.Problem_File_Name, m1.Model_Name, m2.Model_Name") 75 | distSqs 76 | } 77 | 78 | DISTANCE_TABLE = calculateDistanceTable() 79 | 80 | save(PROBLEM_DESC, MODEL_RESULTS, ALLMODELS, ALL_BUT_GROUND, LABELMAP, 81 | ALLFILES,ACCURACY_MATRIX, DISTANCE_TABLE, 82 | file="ShinyApps/modeltables.RData") 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [code and data for "The Geometry of Classifiers"](http://www.win-vector.com/blog/2014/12/the-geometry-of-classifiers/) 3 | -------------------------------------------------------------------------------- /ScoreModels/processArffs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # script from Win-Vector LLC http://www.win-vector.com/ GPL3 license 4 | # run the data from through scikit-learn 5 | # http://jmlr.csail.mit.edu/papers/v15/delgado14a.html 6 | # "Do we Need Hundreds of Classifiers to Solve Real World Classification Problems?" Manuel Fernandez-Delgado, Eva Cernadas, Senen Barro, Dinani Amorim; 15(Oct):3133-3181, 2014 7 | # see http://www.win-vector.com/blog/2014/12/a-comment-on-preparing-data-for-classifiers/ (and other blog articles) for some details 8 | # reads argv[1] which should be a gzipped tar file containing arff files ending in .arrf 9 | # expects all input variables to be numeric and result variable to be categorical 10 | # produces model modelres.csv.gz and problemDescr.csv 11 | # R code to look at accuracy: 12 | # m <- read.table('modelres.csv.gz',header=TRUE,sep=',') 13 | # aggregate(ArgMaxIndicator~Problem.File.Name+Model.Name,data=subset(m, PredictionIndex== Correct.Answer),FUN=mean) 14 | 15 | 16 | import re 17 | import math 18 | import tarfile 19 | import random 20 | import numpy.random 21 | import csv 22 | import sys 23 | import time 24 | import os 25 | # liac-arff https://pypi.python.org/pypi/liac-arff 'pip install liac-arff' 26 | import arff 27 | import gzip 28 | import pandas 29 | import numpy 30 | import sklearn.svm 31 | import sklearn.ensemble 32 | import sklearn.tree 33 | import sklearn.naive_bayes 34 | import sklearn.linear_model 35 | import sklearn.neighbors 36 | import scipy.optimize 37 | import sklearn.neural_network 38 | import multiprocessing 39 | import contextlib 40 | 41 | print 'start',os.getpid(),time.strftime("%c") 42 | sys.stdout.flush() 43 | 44 | 45 | testTarget = 100 46 | testStrategyCut = 500 47 | doXForm = False 48 | maxXFormTrainSize = 5000 49 | maxCTrainSize = 200000 50 | 51 | 52 | randSeed = 2382085237 53 | random.seed(randSeed) 54 | numpy.random.seed(randSeed) 55 | 56 | 57 | 58 | # based on http://scikit-learn.org/stable/auto_examples/plot_rbm_logistic_classification.html#example-plot-rbm-logistic-classification-py 59 | class RBMform: 60 | """"Restricted Boltzman / transform""" 61 | def __init__(self, random_state): 62 | self.xform_ = sklearn.neural_network.BernoulliRBM(random_state=random_state) 63 | def scale_(self,x): 64 | nRow = len(x) 65 | def f(i,j): 66 | return min(1.0,max(0.0,(x[i][j]-self.colrange_[j][0])/(1.0*(self.colrange_[j][1]-self.colrange_[j][0])))) 67 | return [ [ f(i,j) for j in self.varIndices_ ] for i in range(nRow) ] 68 | def fit(self, xTrain): 69 | # rescale to range [0,1] 70 | xTrain = numpy.array(xTrain) 71 | nCol = len(xTrain[0]) 72 | self.colrange_ = [ [min(xTrain[:,j]),max(xTrain[:,j])] for j in range(nCol)] 73 | self.varIndices_ = [ j for j in range(nCol) if self.colrange_[j][1]>(self.colrange_[j][0]+1.0e-6) ] 74 | xScale = self.scale_(xTrain) 75 | self.xform_.fit(xScale) 76 | def transform(self,xTest): 77 | nRow = len(xTest) 78 | sc = self.scale_(xTest) 79 | xf = self.xform_.transform(self.scale_(xTest)) 80 | # return transformed plus original scaled columns 81 | return [ numpy.array(sc[i]).tolist() + numpy.array(xf[i]).tolist() for i in range(nRow) ] 82 | 83 | 84 | # map of modelfactories (each takes a random state on instantiation) 85 | model_dict = {"SVM" : (lambda rs: sklearn.svm.SVC(kernel='rbf',gamma=0.001,C=10.0,probability=True,random_state=rs)),\ 86 | "Random Forest" : (lambda rs: sklearn.ensemble.RandomForestClassifier(n_estimators=100, max_depth=10, n_jobs=-1,random_state=rs)),\ 87 | "Gradient Boosting" : (lambda rs: sklearn.ensemble.GradientBoostingClassifier(random_state=rs)), \ 88 | "Decision Tree" : (lambda rs: sklearn.tree.DecisionTreeClassifier(random_state=rs)), \ 89 | "Naive Bayes" : (lambda rs: sklearn.naive_bayes.GaussianNB()), \ 90 | "Logistic Regression" : (lambda rs: sklearn.linear_model.LogisticRegression(random_state=rs)), \ 91 | "KNN5" : (lambda rs: sklearn.neighbors.KNeighborsClassifier(n_neighbors=5))} 92 | 93 | 94 | 95 | tarFileName = sys.argv[1] # data.tar.gz 96 | doutfile = 'modelres.csv.gz' 97 | poutfile = 'problemDescr.csv' 98 | print 'reading:',tarFileName 99 | print 'writing data to:',doutfile 100 | print 'writing problem summaries to:',poutfile 101 | sys.stdout.flush() 102 | realTypes = set(['REAL', 'NUMERIC']) 103 | 104 | # mark first maximal element with 1, else with zero 105 | # v numeric array 106 | def argMaxV(v): 107 | n = len(v) 108 | maxV = max(v) 109 | mv = numpy.zeros(n) 110 | for j in range(n): 111 | if v[j]>=maxV: 112 | mv[j] = 1.0 113 | break 114 | return mv 115 | 116 | # raise all entries up to a common floor while preserving total 117 | # this is a smoothing term (replacing things near zero with a floor, and scaling other entries to get mass) 118 | # and the idea is a classifier already staying away from zero is unaffected 119 | # v numeric array of non-negative entries summing to more than epsilon*len(v) 120 | # example smoothElevate([0.5,0.39,0.11,0.0],0.1) -> [0.44943820224719105, 0.35056179775280905, 0.1, 0.1] 121 | def smoothElevate(vIn,epsilon): 122 | v = numpy.array(vIn) 123 | n = len(v) 124 | while True: 125 | peggedIndices = set([i for i in range(n) if v[i]<=epsilon]) 126 | neededMass = sum([epsilon - v[i] for i in peggedIndices]) 127 | if neededMass<=0.0: 128 | return v 129 | else: 130 | scale = (1.0 - len(peggedIndices)*epsilon)/sum([vIn[i] for i in range(n) if i not in peggedIndices]) 131 | v = [ epsilon if i in peggedIndices else scale*vIn[i] for i in range(n) ] 132 | 133 | def processArff(name,reln): 134 | rname = reln['relation'] 135 | attr = reln['attributes'] 136 | print 'start\t',rname,os.getpid(),time.strftime("%c") 137 | sys.stdout.flush() 138 | data = reln['data'] 139 | ncol = len(attr) 140 | # assuming unique non-numeric feature is category to be predicted 141 | features = [ i for i in range(ncol) if isinstance(attr[i][1],basestring) and attr[i][1] in realTypes ] 142 | outcomes = [ i for i in range(ncol) if not i in set(features) ] 143 | if (not len(outcomes)==1) or (len(features)<=0): 144 | print '\tskip (not in std form)',name,attr,os.getpid(),time.strftime("%c") 145 | sys.stdout.flush() 146 | return None 147 | outcome = outcomes[0] 148 | xvars = [ [ row[i] for i in features ] for row in data ] 149 | yvar = [ row[outcome] for row in data ] 150 | nrow = len(yvar) 151 | levels = sorted([ l for l in set(yvar) ]) 152 | nlevels = len(levels) 153 | levelMap = dict([ (levels[j],j) for j in range(nlevels) ]) 154 | prng = numpy.random.RandomState(randSeed) 155 | def scoreRows(testIndices): 156 | scoredRowsI = [] 157 | trainIndices = [ i for i in range(nrow) if i not in set(testIndices) ] 158 | if len(trainIndices)>maxCTrainSize: 159 | trainIndices = sorted(prng.choice(trainIndices,size=maxCTrainSize,replace=False)) 160 | xTrain = [ xvars[i] for i in trainIndices ] 161 | yTrain = [ yvar[i] for i in trainIndices ] 162 | nTrain = len(yTrain) 163 | xTest = [ xvars[i] for i in testIndices ] 164 | yTest = [ yvar[i] for i in testIndices ] 165 | nTest = len(yTest) 166 | trainLevels = sorted([ l for l in set(yTrain) ]) 167 | if (nTrain>0) and (nTest>0) and (len(trainLevels)==nlevels): 168 | # naive non-parametric empirical idea: 169 | # no evidence any event can be rarer than 1/(nTrain+1) 170 | # the elevate/smoothing code needs the total weight 171 | # to sum to 1 and to have nlevels*epsilon < 1 so there is 172 | # weight to move around (so we add the 1/(1+nlevels) condition) 173 | epsilon = min(1.0/(1.0+nlevels),1.0/(1.0+nTrain)) 174 | for i in range(nTest): 175 | ti = numpy.zeros(nlevels) 176 | ti[levelMap[yTest[i]]] = 1 177 | adjTruth = smoothElevate(ti,epsilon) 178 | for j in range(nlevels): 179 | scoredRowsI.append([name,'ground truth',testIndices[i],levelMap[yTest[i]],j,ti[j],ti[j],adjTruth[j]]) 180 | def runModels(mSuffix,xTrainD,xTestD): 181 | for modelName, modelFactory in model_dict.iteritems(): 182 | model = modelFactory(numpy.random.RandomState(randSeed)) 183 | model.fit(xTrainD, yTrain) 184 | assert len(model.classes_)==nlevels 185 | for j in range(nlevels): 186 | assert levels[j]==model.classes_[j] 187 | pred = model.predict_proba(xTestD) 188 | for i in range(nTest): 189 | sawMax = False 190 | maxIndicator = argMaxV(pred[i]) 191 | adjPred = smoothElevate(pred[i],epsilon) 192 | for j in range(nlevels): 193 | scoredRowsI.append([name,modelName+mSuffix,testIndices[i],levelMap[yTest[i]],j,pred[i][j],maxIndicator[j],adjPred[j]]) 194 | runModels('',xTrain,xTest) 195 | if doXForm: 196 | xform = RBMform(numpy.random.RandomState(randSeed)) 197 | if nTrain<=maxXFormTrainSize: 198 | xform.fit(xTrain) 199 | else: 200 | xIndices = sorted(prng.choice(range(nTrain),size=maxXFormTrainSize,replace=False)) 201 | xform.fit([xTrain[i] for i in xIndices]) 202 | runModels(' (RBM)',xform.transform(xTrain),xform.transform(xTest)) 203 | return scoredRowsI 204 | print 'initial \t',[name,rname,nrow,len(features),nlevels],os.getpid(),time.strftime("%c") 205 | sys.stdout.flush() 206 | scoredRows = [] 207 | nTested = 0 208 | if (nrow>10) and (nlevels>1): 209 | if nrow>testStrategyCut: 210 | # single hold-out testing 211 | nTries = 10 212 | while (nTested<=0) and (nTries>0): 213 | testIndices = sorted(prng.choice(range(nrow),size=testTarget,replace=False)) 214 | nTries -= 1 215 | sri = scoreRows(testIndices) 216 | if len(sri)>0: 217 | scoredRows.extend(scoreRows(testIndices)) 218 | nTested = testTarget 219 | probDescr = [name,rname,nrow,'test-set cross-validation',nrow-testTarget,nTested,len(features),nlevels] 220 | else: 221 | # hold out one cross validation 222 | candidates = [ i for i in range(nrow) ] 223 | prng.shuffle(candidates) 224 | for ci in candidates: 225 | testIndices = [ ci ] 226 | sri = scoreRows(testIndices) 227 | if len(sri)>0: 228 | scoredRows.extend(sri) 229 | nTested += 1 230 | if nTested>=testTarget: 231 | break 232 | probDescr = [name,rname,nrow,'leave-one-out cross-validation',nrow-1,nTested,len(features),nlevels] 233 | print '\tdone\t',probDescr,os.getpid(),time.strftime("%c") 234 | sys.stdout.flush() 235 | if nTested>0: 236 | return { 'pDesc': probDescr, 'scoredRows': scoredRows } 237 | else: 238 | print '\tskip (could not split)',name,os.getpid(),time.strftime("%c") 239 | sys.stdout.flush() 240 | return None 241 | 242 | 243 | def processArffByName(targetName): 244 | with contextlib.closing(tarfile.open(tarFileName,'r:gz')) as t_in: 245 | for member in t_in.getmembers(): 246 | name = member.name 247 | if targetName==name: 248 | return processArff(name,arff.load(t_in.extractfile(member))) 249 | return None 250 | 251 | # get targets 252 | names = [] 253 | with contextlib.closing(tarfile.open(tarFileName,'r:gz')) as t_in: 254 | for member in t_in.getmembers(): 255 | name = member.name 256 | if not name.endswith('.arff'): 257 | continue 258 | sys.stdout.flush() 259 | if name.endswith('_test.arff'): 260 | print '\tskip (test)',name 261 | sys.stdout.flush() 262 | continue 263 | names.append(name) 264 | 265 | print names,os.getpid(),time.strftime("%c") 266 | pool = multiprocessing.Pool() 267 | results = pool.map(processArffByName,names) 268 | print 'done processing pool',os.getpid(),time.strftime("%c") 269 | 270 | # write out results 271 | with contextlib.closing(gzip.open(doutfile,'wb')) as f_out: 272 | with open(poutfile,'wb') as p_out: 273 | pwriter = csv.writer(p_out,delimiter=',') 274 | pwriter.writerow(['Problem File Name','Relation Name',"nRows","TestMethod","nTrain","nTest","nVars","nTargets"]) 275 | dwriter = csv.writer(f_out,delimiter=',') 276 | dwriter.writerow(['Problem File Name','Model Name','Row Number','Correct Answer','PredictionIndex','ProbIndex','ArgMaxIndicator','SmoothedProbIndex']) 277 | for res in results: 278 | if res is not None: 279 | pwriter.writerow(res['pDesc']) 280 | for r in res['scoredRows']: 281 | dwriter.writerow(r) 282 | f_out.flush() 283 | p_out.flush() 284 | 285 | print 'done',os.getpid(),time.strftime("%c") 286 | sys.stdout.flush() 287 | -------------------------------------------------------------------------------- /ScoreModels/runOnEC2.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # run this script on an Amazon 64 bit Amazon Linux 4 | # recommend a 64 bit instance with at least 8GB of ram 5 | # installs dev tools and Python sklearn on a fresh machine 6 | # also downloads the data 7 | # as of 12-17-2014 this defaulting to Python 2.6, so written for that 8 | # did not work on micro instance, did work on c3.2xlarge instance 9 | 10 | 11 | # Start getting data (in background): 12 | echo "starting background data download" 13 | wget http://persoal.citius.usc.es/manuel.fernandez.delgado/papers/jmlr/data.tar.gz 1>/dev/null 2>&1 & 14 | 15 | 16 | echo "installing software" 17 | # Configure machine 18 | sudo yum -y groupinstall "Development Tools" 19 | # from http://dacamo76.com/blog/2012/12/07/installing-scikit-learn-on-amazon-ec2/ 20 | sudo yum -y install gcc-c++ python-devel atlas-sse3-devel lapack-devel 21 | sudo easy_install pip 22 | sudo pip install -U liac-arff 23 | sudo pip install -U pytz numpy pandas scipy scikit-learn 24 | 25 | 26 | # Wait for data load to finish 27 | echo "waiting for data to finish downloading" 28 | wait 29 | echo "expect: 252603f2a5bf8d7975a392cdf5f84fb1c5d9b5c2 data.tar.gz" 30 | shasum data.tar.gz 31 | 32 | 33 | # run the job 34 | echo "running the job" 35 | python processArffs.py data.tar.gz > processArffs_log.txt 2>&1 36 | 37 | wait 38 | echo "job done" 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /ScoreModelsR/.gitignore: -------------------------------------------------------------------------------- 1 | data.tar.gz 2 | -------------------------------------------------------------------------------- /data/modelres.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WinVector/ExploreModels/49e68c6101739d06eb3a3ab5b6a6fad337ca9bf0/data/modelres.csv.gz -------------------------------------------------------------------------------- /data/problemDescr.csv: -------------------------------------------------------------------------------- 1 | Problem File Name,Relation Name,nRows,TestMethod,nTrain,nTest,nVars,nTargets 2 | abalone/abalone.arff,abalone,4177,test-set cross-validation,4077,100,8,3 3 | acute-inflammation/acute-inflammation.arff,acute-inflammation,120,leave-one-out cross-validation,119,100,6,2 4 | acute-nephritis/acute-nephritis.arff,acute-nephritis,120,leave-one-out cross-validation,119,100,6,2 5 | adult/adult_train.arff,adult,32561,test-set cross-validation,32461,100,14,2 6 | annealing/annealing_train.arff,annealing,798,test-set cross-validation,698,100,31,5 7 | arrhythmia/arrhythmia.arff,arrhythmia,452,leave-one-out cross-validation,451,100,262,13 8 | audiology-std/con_patrons_repetidos/audiology-std_train.arff,audiology-std,194,leave-one-out cross-validation,193,100,59,18 9 | audiology-std/audiology-std_train.arff,audiology-std,171,leave-one-out cross-validation,170,100,59,18 10 | balance-scale/balance-scale.arff,balance-scale,625,test-set cross-validation,525,100,4,3 11 | balloons/balloons.arff,balloons,16,leave-one-out cross-validation,15,16,4,2 12 | bank/bank.arff,bank,4521,test-set cross-validation,4421,100,16,2 13 | blood/blood.arff,blood,748,test-set cross-validation,648,100,4,2 14 | breast-cancer/breast-cancer.arff,breast-cancer,286,leave-one-out cross-validation,285,100,9,2 15 | breast-cancer-wisc/breast-cancer-wisc.arff,breast-cancer-wisc,699,test-set cross-validation,599,100,9,2 16 | breast-cancer-wisc-diag/breast-cancer-wisc-diag.arff,breast-cancer-wisc-diag,569,test-set cross-validation,469,100,30,2 17 | breast-cancer-wisc-prog/breast-cancer-wisc-prog.arff,breast-cancer-wisc-prog,198,leave-one-out cross-validation,197,100,33,2 18 | breast-tissue/breast-tissue.arff,breast-tissue,106,leave-one-out cross-validation,105,100,9,6 19 | car/car.arff,car,1728,test-set cross-validation,1628,100,6,4 20 | cardiotocography-3clases/cardiotocography-3clases.arff,cardiotocography-3clases,2126,test-set cross-validation,2026,100,21,3 21 | cardiotocography-10clases/cardiotocography-10clases.arff,cardiotocography-10clases,2126,test-set cross-validation,2026,100,21,10 22 | chess-krvk/chess-krvk.arff,chess-krvk,28056,test-set cross-validation,27956,100,6,18 23 | chess-krvkp/chess-krvkp.arff,chess-krvkp,3196,test-set cross-validation,3096,100,36,2 24 | congressional-voting/congressional-voting.arff,congressional-voting,435,leave-one-out cross-validation,434,100,16,2 25 | conn-bench-sonar-mines-rocks/conn-bench-sonar-mines-rocks.arff,conn-bench-sonar-mines-rocks,208,leave-one-out cross-validation,207,100,60,2 26 | conn-bench-vowel-deterding/conn-bench-vowel-deterding_train.arff,conn-bench-vowel-deterding,528,test-set cross-validation,428,100,11,11 27 | connect-4/connect-4.arff,connect-4,67557,test-set cross-validation,67457,100,42,2 28 | contrac/contrac.arff,contrac,1473,test-set cross-validation,1373,100,9,3 29 | credit-approval/credit-approval.arff,credit-approval,690,test-set cross-validation,590,100,15,2 30 | cylinder-bands/cylinder-bands.arff,cylinder-bands,512,test-set cross-validation,412,100,35,2 31 | dermatology/dermatology.arff,dermatology,366,leave-one-out cross-validation,365,100,34,6 32 | echocardiogram/echocardiogram.arff,echocardiogram,131,leave-one-out cross-validation,130,100,10,2 33 | ecoli/ecoli.arff,ecoli,336,leave-one-out cross-validation,335,100,7,8 34 | energy-y1/energy-y1.arff,energy-y1,768,test-set cross-validation,668,100,8,3 35 | energy-y2/energy-y2.arff,energy-y2,768,test-set cross-validation,668,100,8,3 36 | fertility/fertility.arff,fertility,100,leave-one-out cross-validation,99,100,9,2 37 | flags/flags.arff,flags,194,leave-one-out cross-validation,193,100,28,8 38 | glass/glass.arff,glass,214,leave-one-out cross-validation,213,100,9,6 39 | haberman-survival/haberman-survival.arff,haberman-survival,306,leave-one-out cross-validation,305,100,3,2 40 | hayes-roth/hayes-roth_train.arff,hayes-roth,132,leave-one-out cross-validation,131,100,3,3 41 | heart-cleveland/heart-cleveland.arff,heart-cleveland,303,leave-one-out cross-validation,302,100,13,5 42 | heart-hungarian/heart-hungarian.arff,heart-hungarian,294,leave-one-out cross-validation,293,100,12,2 43 | heart-switzerland/heart-switzerland.arff,heart-switzerland,123,leave-one-out cross-validation,122,100,12,5 44 | heart-va/heart-va.arff,heart-va,200,leave-one-out cross-validation,199,100,12,5 45 | hepatitis/hepatitis.arff,hepatitis,155,leave-one-out cross-validation,154,100,19,2 46 | hill-valley/hill-valley_train.arff,hill-valley,606,test-set cross-validation,506,100,100,2 47 | horse-colic/horse-colic_train.arff,horse-colic,300,leave-one-out cross-validation,299,100,25,2 48 | ilpd-indian-liver/ilpd-indian-liver.arff,ilpd-indian-liver,583,test-set cross-validation,483,100,9,2 49 | image-segmentation/image-segmentation_train.arff,image-segmentation,210,leave-one-out cross-validation,209,100,18,7 50 | ionosphere/ionosphere.arff,ionosphere,351,leave-one-out cross-validation,350,100,33,2 51 | iris/iris.arff,iris,150,leave-one-out cross-validation,149,100,4,3 52 | led-display/led-display.arff,led-display,1000,test-set cross-validation,900,100,7,10 53 | lenses/lenses.arff,lenses,24,leave-one-out cross-validation,23,24,4,3 54 | letter/letter.arff,letter,20000,test-set cross-validation,19900,100,16,26 55 | libras/libras.arff,libras,360,leave-one-out cross-validation,359,100,90,15 56 | low-res-spect/low-res-spect.arff,low-res-spect,531,test-set cross-validation,431,100,100,9 57 | lung-cancer/lung-cancer.arff,lung-cancer,32,leave-one-out cross-validation,31,32,56,3 58 | lymphography/lymphography.arff,lymphography,148,leave-one-out cross-validation,147,100,18,4 59 | magic/magic.arff,magic,19020,test-set cross-validation,18920,100,10,2 60 | mammographic/mammographic.arff,mammographic,961,test-set cross-validation,861,100,5,2 61 | miniboone/miniboone.arff,miniboone,130064,test-set cross-validation,129964,100,50,2 62 | molec-biol-promoter/molec-biol-promoter.arff,molec-biol-promoter,106,leave-one-out cross-validation,105,100,57,2 63 | molec-biol-splice/molec-biol-splice.arff,molec-biol-splice,3190,test-set cross-validation,3090,100,60,3 64 | monks-1/monks-1_train.arff,monks-1,124,leave-one-out cross-validation,123,100,6,2 65 | monks-2/monks-2_train.arff,monks-2,169,leave-one-out cross-validation,168,100,6,2 66 | monks-3/monks-3_train.arff,monks-3,122,leave-one-out cross-validation,121,100,6,2 67 | mushroom/mushroom.arff,mushroom,8124,test-set cross-validation,8024,100,21,2 68 | musk-1/musk-1.arff,musk-1,476,leave-one-out cross-validation,475,100,166,2 69 | musk-2/musk-2.arff,musk-2,6598,test-set cross-validation,6498,100,166,2 70 | nursery/nursery.arff,nursery,12960,test-set cross-validation,12860,100,8,5 71 | oocytes_merluccius_nucleus_4d/oocytes_merluccius_nucleus_4d.arff,oocytes_merluccius_nucleus_4d,1022,test-set cross-validation,922,100,41,2 72 | oocytes_merluccius_states_2f/oocytes_merluccius_states_2f.arff,oocytes_merluccius_states_2f,1022,test-set cross-validation,922,100,25,3 73 | oocytes_trisopterus_nucleus_2f/oocytes_trisopterus_nucleus_2f.arff,oocytes_trisopterus_nucleus_2f,912,test-set cross-validation,812,100,25,2 74 | oocytes_trisopterus_states_5b/oocytes_trisopterus_states_5b.arff,oocytes_trisopterus_states_5b,912,test-set cross-validation,812,100,32,3 75 | optical/optical_train.arff,optical,3823,test-set cross-validation,3723,100,62,10 76 | ozone/ozone.arff,ozone,2536,test-set cross-validation,2436,100,72,2 77 | page-blocks/page-blocks.arff,page-blocks,5473,test-set cross-validation,5373,100,10,5 78 | parkinsons/parkinsons.arff,parkinsons,195,leave-one-out cross-validation,194,100,22,2 79 | pendigits/pendigits_train.arff,pendigits,7494,test-set cross-validation,7394,100,16,10 80 | pima/pima.arff,pima,768,test-set cross-validation,668,100,8,2 81 | pittsburg-bridges-MATERIAL/pittsburg-bridges-MATERIAL.arff,pittsburg-bridges-MATERIAL,106,leave-one-out cross-validation,105,100,7,3 82 | pittsburg-bridges-REL-L/pittsburg-bridges-REL-L.arff,pittsburg-bridges-REL-L,103,leave-one-out cross-validation,102,100,7,3 83 | pittsburg-bridges-SPAN/pittsburg-bridges-SPAN.arff,pittsburg-bridges-SPAN,92,leave-one-out cross-validation,91,92,7,3 84 | pittsburg-bridges-T-OR-D/pittsburg-bridges-T-OR-D.arff,pittsburg-bridges-T-OR-D,102,leave-one-out cross-validation,101,100,7,2 85 | pittsburg-bridges-TYPE/pittsburg-bridges-TYPE.arff,pittsburg-bridges-TYPE,105,leave-one-out cross-validation,104,100,7,6 86 | planning/planning.arff,planning,182,leave-one-out cross-validation,181,100,12,2 87 | plant-margin/plant-margin.arff,plant-margin,1600,test-set cross-validation,1500,100,64,100 88 | plant-shape/plant-shape.arff,plant-shape,1600,test-set cross-validation,1500,100,64,100 89 | plant-texture/plant-texture.arff,plant-texture,1599,test-set cross-validation,1499,100,64,100 90 | post-operative/post-operative.arff,post-operative,90,leave-one-out cross-validation,89,90,8,3 91 | primary-tumor/primary-tumor.arff,primary-tumor,330,leave-one-out cross-validation,329,100,17,15 92 | ringnorm/ringnorm.arff,ringnorm,7400,test-set cross-validation,7300,100,20,2 93 | seeds/seeds.arff,seeds,210,leave-one-out cross-validation,209,100,7,3 94 | semeion/semeion.arff,semeion,1593,test-set cross-validation,1493,100,256,10 95 | soybean/soybean_train.arff,soybean,307,leave-one-out cross-validation,306,100,35,18 96 | spambase/spambase.arff,spambase,4601,test-set cross-validation,4501,100,57,2 97 | spect/spect_train.arff,spect,79,leave-one-out cross-validation,78,79,22,2 98 | spectf/spectf_train.arff,spectf,80,leave-one-out cross-validation,79,80,44,2 99 | statlog-australian-credit/statlog-australian-credit.arff,statlog-australian-credit,690,test-set cross-validation,590,100,14,2 100 | statlog-german-credit/statlog-german-credit.arff,statlog-german-credit,1000,test-set cross-validation,900,100,24,2 101 | statlog-heart/statlog-heart.arff,statlog-heart,270,leave-one-out cross-validation,269,100,13,2 102 | statlog-image/statlog-image.arff,statlog-image,2310,test-set cross-validation,2210,100,18,7 103 | statlog-landsat/statlog-landsat_train.arff,statlog-landsat,4435,test-set cross-validation,4335,100,36,6 104 | statlog-shuttle/statlog-shuttle_train.arff,statlog-shuttle,43500,test-set cross-validation,43400,100,9,7 105 | statlog-vehicle/statlog-vehicle.arff,statlog-vehicle,846,test-set cross-validation,746,100,18,4 106 | steel-plates/steel-plates.arff,steel-plates,1941,test-set cross-validation,1841,100,27,7 107 | synthetic-control/synthetic-control.arff,synthetic-control,600,test-set cross-validation,500,100,60,6 108 | teaching/teaching.arff,teaching,151,leave-one-out cross-validation,150,100,5,3 109 | thyroid/thyroid_train.arff,thyroid,3772,test-set cross-validation,3672,100,21,3 110 | tic-tac-toe/tic-tac-toe.arff,tic-tac-toe,958,test-set cross-validation,858,100,9,2 111 | titanic/titanic.arff,titanic,2201,test-set cross-validation,2101,100,3,2 112 | twonorm/twonorm.arff,twonorm,7400,test-set cross-validation,7300,100,20,2 113 | vertebral-column-2clases/datos_orixinais/column_3C_weka.arff,column_3C_weka,310,leave-one-out cross-validation,309,100,6,3 114 | vertebral-column-2clases/datos_orixinais/column_2C_weka.arff,column_2C_weka,310,leave-one-out cross-validation,309,100,6,2 115 | vertebral-column-2clases/vertebral-column-2clases.arff,vertebral-column-2clases,310,leave-one-out cross-validation,309,100,6,2 116 | vertebral-column-3clases/vertebral-column-3clases.arff,vertebral-column-3clases,310,leave-one-out cross-validation,309,100,6,3 117 | wall-following/wall-following.arff,wall-following,5456,test-set cross-validation,5356,100,24,4 118 | waveform/waveform.arff,waveform,5000,test-set cross-validation,4900,100,21,3 119 | waveform-noise/waveform-noise.arff,waveform-noise,5000,test-set cross-validation,4900,100,40,3 120 | wine/wine.arff,wine,178,leave-one-out cross-validation,177,100,13,3 121 | wine-quality-red/wine-quality-red.arff,wine-quality-red,1599,test-set cross-validation,1499,100,11,6 122 | wine-quality-white/wine-quality-white.arff,wine-quality-white,4898,test-set cross-validation,4798,100,11,7 123 | yeast/yeast.arff,yeast,1484,test-set cross-validation,1384,100,8,10 124 | zoo/zoo.arff,zoo,101,leave-one-out cross-validation,100,100,16,7 125 | -------------------------------------------------------------------------------- /data/processArffs_log.txt: -------------------------------------------------------------------------------- 1 | nohup: ignoring input 2 | processArffs.py:28: DeprecationWarning: the sets module is deprecated 3 | import sets 4 | start 23595 Wed Dec 17 18:22:00 2014 5 | reading: data.tar.gz 6 | writing data to: modelres.csv.gz 7 | writing problem summaries to: problemDescr.csv 8 | skip (test) adult/adult_test.arff 9 | skip (test) annealing/annealing_test.arff 10 | skip (test) audiology-std/con_patrons_repetidos/audiology-std_test.arff 11 | skip (test) audiology-std/audiology-std_test.arff 12 | skip (test) conn-bench-vowel-deterding/conn-bench-vowel-deterding_test.arff 13 | skip (test) hayes-roth/hayes-roth_test.arff 14 | skip (test) hill-valley/hill-valley_test.arff 15 | skip (test) horse-colic/horse-colic_test.arff 16 | skip (test) image-segmentation/image-segmentation_test.arff 17 | skip (test) monks-1/monks-1_test.arff 18 | skip (test) monks-2/monks-2_test.arff 19 | skip (test) monks-3/monks-3_test.arff 20 | skip (test) optical/optical_test.arff 21 | skip (test) pendigits/pendigits_test.arff 22 | skip (test) soybean/soybean_test.arff 23 | skip (test) spect/spect_test.arff 24 | skip (test) spectf/spectf_test.arff 25 | skip (test) statlog-landsat/statlog-landsat_test.arff 26 | skip (test) statlog-shuttle/statlog-shuttle_test.arff 27 | skip (test) thyroid/thyroid_test.arff 28 | ['abalone/abalone.arff', 'acute-inflammation/acute-inflammation.arff', 'acute-nephritis/acute-nephritis.arff', 'adult/adult_train.arff', 'annealing/annealing_train.arff', 'arrhythmia/arrhythmia.arff', 'audiology-std/con_patrons_repetidos/audiology-std_train.arff', 'audiology-std/audiology-std_train.arff', 'balance-scale/balance-scale.arff', 'balloons/balloons.arff', 'bank/bank.arff', 'blood/blood.arff', 'breast-cancer/breast-cancer.arff', 'breast-cancer-wisc/breast-cancer-wisc.arff', 'breast-cancer-wisc-diag/breast-cancer-wisc-diag.arff', 'breast-cancer-wisc-prog/breast-cancer-wisc-prog.arff', 'breast-tissue/breast-tissue.arff', 'car/car.arff', 'cardiotocography-3clases/cardiotocography-3clases.arff', 'cardiotocography-10clases/cardiotocography-10clases.arff', 'chess-krvk/chess-krvk.arff', 'chess-krvkp/chess-krvkp.arff', 'congressional-voting/congressional-voting.arff', 'conn-bench-sonar-mines-rocks/conn-bench-sonar-mines-rocks.arff', 'conn-bench-vowel-deterding/conn-bench-vowel-deterding_train.arff', 'connect-4/connect-4.arff', 'contrac/contrac.arff', 'credit-approval/credit-approval.arff', 'cylinder-bands/cylinder-bands.arff', 'dermatology/dermatology.arff', 'echocardiogram/echocardiogram.arff', 'ecoli/ecoli.arff', 'energy-y1/energy-y1.arff', 'energy-y2/energy-y2.arff', 'fertility/fertility.arff', 'flags/flags.arff', 'glass/glass.arff', 'haberman-survival/haberman-survival.arff', 'hayes-roth/hayes-roth_train.arff', 'heart-cleveland/heart-cleveland.arff', 'heart-hungarian/heart-hungarian.arff', 'heart-switzerland/heart-switzerland.arff', 'heart-va/heart-va.arff', 'hepatitis/hepatitis.arff', 'hill-valley/hill-valley_train.arff', 'horse-colic/horse-colic_train.arff', 'ilpd-indian-liver/ilpd-indian-liver.arff', 'image-segmentation/image-segmentation_train.arff', 'ionosphere/ionosphere.arff', 'iris/iris.arff', 'led-display/led-display.arff', 'lenses/lenses.arff', 'letter/letter.arff', 'libras/libras.arff', 'low-res-spect/low-res-spect.arff', 'lung-cancer/lung-cancer.arff', 'lymphography/lymphography.arff', 'magic/magic.arff', 'mammographic/mammographic.arff', 'miniboone/miniboone.arff', 'molec-biol-promoter/molec-biol-promoter.arff', 'molec-biol-splice/molec-biol-splice.arff', 'monks-1/monks-1_train.arff', 'monks-2/monks-2_train.arff', 'monks-3/monks-3_train.arff', 'mushroom/mushroom.arff', 'musk-1/musk-1.arff', 'musk-2/musk-2.arff', 'nursery/nursery.arff', 'oocytes_merluccius_nucleus_4d/oocytes_merluccius_nucleus_4d.arff', 'oocytes_merluccius_states_2f/oocytes_merluccius_states_2f.arff', 'oocytes_trisopterus_nucleus_2f/oocytes_trisopterus_nucleus_2f.arff', 'oocytes_trisopterus_states_5b/oocytes_trisopterus_states_5b.arff', 'optical/optical_train.arff', 'ozone/ozone.arff', 'page-blocks/page-blocks.arff', 'parkinsons/parkinsons.arff', 'pendigits/pendigits_train.arff', 'pima/pima.arff', 'pittsburg-bridges-MATERIAL/pittsburg-bridges-MATERIAL.arff', 'pittsburg-bridges-REL-L/pittsburg-bridges-REL-L.arff', 'pittsburg-bridges-SPAN/pittsburg-bridges-SPAN.arff', 'pittsburg-bridges-T-OR-D/pittsburg-bridges-T-OR-D.arff', 'pittsburg-bridges-TYPE/pittsburg-bridges-TYPE.arff', 'planning/planning.arff', 'plant-margin/plant-margin.arff', 'plant-shape/plant-shape.arff', 'plant-texture/plant-texture.arff', 'post-operative/post-operative.arff', 'primary-tumor/primary-tumor.arff', 'ringnorm/ringnorm.arff', 'seeds/seeds.arff', 'semeion/semeion.arff', 'soybean/soybean_train.arff', 'spambase/spambase.arff', 'spect/spect_train.arff', 'spectf/spectf_train.arff', 'statlog-australian-credit/statlog-australian-credit.arff', 'statlog-german-credit/statlog-german-credit.arff', 'statlog-heart/statlog-heart.arff', 'statlog-image/statlog-image.arff', 'statlog-landsat/statlog-landsat_train.arff', 'statlog-shuttle/statlog-shuttle_train.arff', 'statlog-vehicle/statlog-vehicle.arff', 'steel-plates/steel-plates.arff', 'synthetic-control/synthetic-control.arff', 'teaching/teaching.arff', 'thyroid/thyroid_train.arff', 'tic-tac-toe/tic-tac-toe.arff', 'titanic/titanic.arff', 'trains/trains.arff', 'twonorm/twonorm.arff', 'vertebral-column-2clases/datos_orixinais/column_3C_weka.arff', 'vertebral-column-2clases/datos_orixinais/column_2C_weka.arff', 'vertebral-column-2clases/vertebral-column-2clases.arff', 'vertebral-column-3clases/vertebral-column-3clases.arff', 'wall-following/wall-following.arff', 'waveform/waveform.arff', 'waveform-noise/waveform-noise.arff', 'wine/wine.arff', 'wine-quality-red/wine-quality-red.arff', 'wine-quality-white/wine-quality-white.arff', 'yeast/yeast.arff', 'zoo/zoo.arff'] 23595 Wed Dec 17 18:22:05 2014 29 | start ecoli 23632 Wed Dec 17 18:22:13 2014 30 | initial ['ecoli/ecoli.arff', u'ecoli', 336, 7, 8] 23632 Wed Dec 17 18:22:13 2014 31 | start echocardiogram 23631 Wed Dec 17 18:22:13 2014 32 | initial ['echocardiogram/echocardiogram.arff', u'echocardiogram', 131, 10, 2] 23631 Wed Dec 17 18:22:13 2014 33 | start dermatology 23630 Wed Dec 17 18:22:13 2014 34 | initial ['dermatology/dermatology.arff', u'dermatology', 366, 34, 6] 23630 Wed Dec 17 18:22:13 2014 35 | start conn-bench-vowel-deterding 23625 Wed Dec 17 18:22:13 2014 36 | initial ['conn-bench-vowel-deterding/conn-bench-vowel-deterding_train.arff', u'conn-bench-vowel-deterding', 528, 11, 11] 23625 Wed Dec 17 18:22:13 2014 37 | start conn-bench-sonar-mines-rocks 23624 Wed Dec 17 18:22:13 2014 38 | initial ['conn-bench-sonar-mines-rocks/conn-bench-sonar-mines-rocks.arff', u'conn-bench-sonar-mines-rocks', 208, 60, 2] 23624 Wed Dec 17 18:22:13 2014 39 | start cylinder-bands 23629 Wed Dec 17 18:22:13 2014 40 | initial ['cylinder-bands/cylinder-bands.arff', u'cylinder-bands', 512, 35, 2] 23629 Wed Dec 17 18:22:13 2014 41 | start congressional-voting 23623 Wed Dec 17 18:22:13 2014 42 | initial ['congressional-voting/congressional-voting.arff', u'congressional-voting', 435, 16, 2] 23623 Wed Dec 17 18:22:13 2014 43 | start contrac 23627 Wed Dec 17 18:22:14 2014 44 | start credit-approval 23628 Wed Dec 17 18:22:14 2014 45 | initial ['credit-approval/credit-approval.arff', u'credit-approval', 690, 15, 2] 23628 Wed Dec 17 18:22:14 2014 46 | initial ['contrac/contrac.arff', u'contrac', 1473, 9, 3] 23627 Wed Dec 17 18:22:14 2014 47 | start car 23618 Wed Dec 17 18:22:14 2014 48 | initial ['car/car.arff', u'car', 1728, 6, 4] 23618 Wed Dec 17 18:22:14 2014 49 | start breast-tissue 23617 Wed Dec 17 18:22:14 2014 50 | initial ['breast-tissue/breast-tissue.arff', u'breast-tissue', 106, 9, 6] 23617 Wed Dec 17 18:22:14 2014 51 | start cardiotocography-10clases 23620 Wed Dec 17 18:22:14 2014 52 | initial ['cardiotocography-10clases/cardiotocography-10clases.arff', u'cardiotocography-10clases', 2126, 21, 10] 23620 Wed Dec 17 18:22:14 2014 53 | start breast-cancer-wisc 23614 Wed Dec 17 18:22:14 2014 54 | initial ['breast-cancer-wisc/breast-cancer-wisc.arff', u'breast-cancer-wisc', 699, 9, 2] 23614 Wed Dec 17 18:22:14 2014 55 | start chess-krvkp 23622 Wed Dec 17 18:22:14 2014 56 | start breast-cancer-wisc-prog 23616 Wed Dec 17 18:22:14 2014 57 | initial ['breast-cancer-wisc-prog/breast-cancer-wisc-prog.arff', u'breast-cancer-wisc-prog', 198, 33, 2] 23616 Wed Dec 17 18:22:14 2014 58 | initial ['chess-krvkp/chess-krvkp.arff', u'chess-krvkp', 3196, 36, 2] 23622 Wed Dec 17 18:22:14 2014 59 | start cardiotocography-3clases 23619 Wed Dec 17 18:22:14 2014 60 | start breast-cancer 23613 Wed Dec 17 18:22:14 2014 61 | initial ['cardiotocography-3clases/cardiotocography-3clases.arff', u'cardiotocography-3clases', 2126, 21, 3] 23619 Wed Dec 17 18:22:14 2014 62 | initial ['breast-cancer/breast-cancer.arff', u'breast-cancer', 286, 9, 2] 23613 Wed Dec 17 18:22:14 2014 63 | start breast-cancer-wisc-diag 23615 Wed Dec 17 18:22:14 2014 64 | initial ['breast-cancer-wisc-diag/breast-cancer-wisc-diag.arff', u'breast-cancer-wisc-diag', 569, 30, 2] 23615 Wed Dec 17 18:22:14 2014 65 | start balloons 23610 Wed Dec 17 18:22:14 2014 66 | initial ['balloons/balloons.arff', u'balloons', 16, 4, 2] 23610 Wed Dec 17 18:22:14 2014 67 | start blood 23612 Wed Dec 17 18:22:14 2014 68 | initial ['blood/blood.arff', u'blood', 748, 4, 2] 23612 Wed Dec 17 18:22:14 2014 69 | start audiology-std 23608 Wed Dec 17 18:22:14 2014 70 | initial ['audiology-std/audiology-std_train.arff', u'audiology-std', 171, 59, 18] 23608 Wed Dec 17 18:22:14 2014 71 | start acute-inflammation 23602 Wed Dec 17 18:22:14 2014 72 | initial ['acute-inflammation/acute-inflammation.arff', u'acute-inflammation', 120, 6, 2] 23602 Wed Dec 17 18:22:14 2014 73 | start annealing 23605 Wed Dec 17 18:22:14 2014 74 | initial ['annealing/annealing_train.arff', u'annealing', 798, 31, 5] 23605 Wed Dec 17 18:22:14 2014 75 | start bank 23611 Wed Dec 17 18:22:14 2014 76 | initial ['bank/bank.arff', u'bank', 4521, 16, 2] 23611 Wed Dec 17 18:22:14 2014 77 | start acute-nephritis 23603 Wed Dec 17 18:22:14 2014 78 | initial ['acute-nephritis/acute-nephritis.arff', u'acute-nephritis', 120, 6, 2] 23603 Wed Dec 17 18:22:14 2014 79 | start balance-scale 23609 Wed Dec 17 18:22:14 2014 80 | initial ['balance-scale/balance-scale.arff', u'balance-scale', 625, 4, 3] 23609 Wed Dec 17 18:22:14 2014 81 | done ['cylinder-bands/cylinder-bands.arff', u'cylinder-bands', 512, 'test-set cross-validation', 412, 100, 35, 2] 23629 Wed Dec 17 18:22:14 2014 82 | start abalone 23601 Wed Dec 17 18:22:15 2014 83 | initial ['abalone/abalone.arff', u'abalone', 4177, 8, 3] 23601 Wed Dec 17 18:22:15 2014 84 | start audiology-std 23607 Wed Dec 17 18:22:15 2014 85 | start chess-krvk 23621 Wed Dec 17 18:22:15 2014 86 | initial ['audiology-std/con_patrons_repetidos/audiology-std_train.arff', u'audiology-std', 194, 59, 18] 23607 Wed Dec 17 18:22:15 2014 87 | start arrhythmia 23606 Wed Dec 17 18:22:15 2014 88 | done ['credit-approval/credit-approval.arff', u'credit-approval', 690, 'test-set cross-validation', 590, 100, 15, 2] 23628 Wed Dec 17 18:22:15 2014 89 | initial ['chess-krvk/chess-krvk.arff', u'chess-krvk', 28056, 6, 18] 23621 Wed Dec 17 18:22:15 2014 90 | initial ['arrhythmia/arrhythmia.arff', u'arrhythmia', 452, 262, 13] 23606 Wed Dec 17 18:22:15 2014 91 | done ['breast-cancer-wisc/breast-cancer-wisc.arff', u'breast-cancer-wisc', 699, 'test-set cross-validation', 599, 100, 9, 2] 23614 Wed Dec 17 18:22:15 2014 92 | done ['blood/blood.arff', u'blood', 748, 'test-set cross-validation', 648, 100, 4, 2] 23612 Wed Dec 17 18:22:15 2014 93 | done ['breast-cancer-wisc-diag/breast-cancer-wisc-diag.arff', u'breast-cancer-wisc-diag', 569, 'test-set cross-validation', 469, 100, 30, 2] 23615 Wed Dec 17 18:22:16 2014 94 | done ['balance-scale/balance-scale.arff', u'balance-scale', 625, 'test-set cross-validation', 525, 100, 4, 3] 23609 Wed Dec 17 18:22:16 2014 95 | start adult 23604 Wed Dec 17 18:22:17 2014 96 | initial ['adult/adult_train.arff', u'adult', 32561, 14, 2] 23604 Wed Dec 17 18:22:17 2014 97 | done ['contrac/contrac.arff', u'contrac', 1473, 'test-set cross-validation', 1373, 100, 9, 3] 23627 Wed Dec 17 18:22:17 2014 98 | done ['car/car.arff', u'car', 1728, 'test-set cross-validation', 1628, 100, 6, 4] 23618 Wed Dec 17 18:22:18 2014 99 | done ['annealing/annealing_train.arff', u'annealing', 798, 'test-set cross-validation', 698, 100, 31, 5] 23605 Wed Dec 17 18:22:18 2014 100 | done ['balloons/balloons.arff', u'balloons', 16, 'leave-one-out cross-validation', 15, 16, 4, 2] 23610 Wed Dec 17 18:22:19 2014 101 | done ['conn-bench-vowel-deterding/conn-bench-vowel-deterding_train.arff', u'conn-bench-vowel-deterding', 528, 'test-set cross-validation', 428, 100, 11, 11] 23625 Wed Dec 17 18:22:19 2014 102 | done ['cardiotocography-3clases/cardiotocography-3clases.arff', u'cardiotocography-3clases', 2126, 'test-set cross-validation', 2026, 100, 21, 3] 23619 Wed Dec 17 18:22:20 2014 103 | done ['chess-krvkp/chess-krvkp.arff', u'chess-krvkp', 3196, 'test-set cross-validation', 3096, 100, 36, 2] 23622 Wed Dec 17 18:22:21 2014 104 | start energy-y1 23629 Wed Dec 17 18:22:23 2014 105 | initial ['energy-y1/energy-y1.arff', u'energy-y1', 768, 8, 3] 23629 Wed Dec 17 18:22:23 2014 106 | done ['bank/bank.arff', u'bank', 4521, 'test-set cross-validation', 4421, 100, 16, 2] 23611 Wed Dec 17 18:22:23 2014 107 | start energy-y2 23628 Wed Dec 17 18:22:23 2014 108 | initial ['energy-y2/energy-y2.arff', u'energy-y2', 768, 8, 3] 23628 Wed Dec 17 18:22:23 2014 109 | start fertility 23614 Wed Dec 17 18:22:24 2014 110 | initial ['fertility/fertility.arff', u'fertility', 100, 9, 2] 23614 Wed Dec 17 18:22:24 2014 111 | start flags 23612 Wed Dec 17 18:22:24 2014 112 | initial ['flags/flags.arff', u'flags', 194, 28, 8] 23612 Wed Dec 17 18:22:24 2014 113 | done ['energy-y1/energy-y1.arff', u'energy-y1', 768, 'test-set cross-validation', 668, 100, 8, 3] 23629 Wed Dec 17 18:22:25 2014 114 | start glass 23615 Wed Dec 17 18:22:25 2014 115 | initial ['glass/glass.arff', u'glass', 214, 9, 6] 23615 Wed Dec 17 18:22:25 2014 116 | done ['energy-y2/energy-y2.arff', u'energy-y2', 768, 'test-set cross-validation', 668, 100, 8, 3] 23628 Wed Dec 17 18:22:25 2014 117 | start haberman-survival 23609 Wed Dec 17 18:22:25 2014 118 | initial ['haberman-survival/haberman-survival.arff', u'haberman-survival', 306, 3, 2] 23609 Wed Dec 17 18:22:25 2014 119 | start hayes-roth 23627 Wed Dec 17 18:22:26 2014 120 | initial ['hayes-roth/hayes-roth_train.arff', u'hayes-roth', 132, 3, 3] 23627 Wed Dec 17 18:22:26 2014 121 | start heart-cleveland 23618 Wed Dec 17 18:22:27 2014 122 | initial ['heart-cleveland/heart-cleveland.arff', u'heart-cleveland', 303, 13, 5] 23618 Wed Dec 17 18:22:27 2014 123 | start connect-4 23626 Wed Dec 17 18:22:27 2014 124 | start heart-hungarian 23605 Wed Dec 17 18:22:27 2014 125 | initial ['heart-hungarian/heart-hungarian.arff', u'heart-hungarian', 294, 12, 2] 23605 Wed Dec 17 18:22:27 2014 126 | initial ['connect-4/connect-4.arff', u'connect-4', 67557, 42, 2] 23626 Wed Dec 17 18:22:27 2014 127 | start heart-switzerland 23610 Wed Dec 17 18:22:28 2014 128 | initial ['heart-switzerland/heart-switzerland.arff', u'heart-switzerland', 123, 12, 5] 23610 Wed Dec 17 18:22:28 2014 129 | start heart-va 23625 Wed Dec 17 18:22:28 2014 130 | initial ['heart-va/heart-va.arff', u'heart-va', 200, 12, 5] 23625 Wed Dec 17 18:22:28 2014 131 | start hepatitis 23619 Wed Dec 17 18:22:28 2014 132 | initial ['hepatitis/hepatitis.arff', u'hepatitis', 155, 19, 2] 23619 Wed Dec 17 18:22:28 2014 133 | start hill-valley 23622 Wed Dec 17 18:22:30 2014 134 | initial ['hill-valley/hill-valley_train.arff', u'hill-valley', 606, 100, 2] 23622 Wed Dec 17 18:22:30 2014 135 | done ['cardiotocography-10clases/cardiotocography-10clases.arff', u'cardiotocography-10clases', 2126, 'test-set cross-validation', 2026, 100, 21, 10] 23620 Wed Dec 17 18:22:30 2014 136 | done ['abalone/abalone.arff', u'abalone', 4177, 'test-set cross-validation', 4077, 100, 8, 3] 23601 Wed Dec 17 18:22:31 2014 137 | start horse-colic 23611 Wed Dec 17 18:22:32 2014 138 | initial ['horse-colic/horse-colic_train.arff', u'horse-colic', 300, 25, 2] 23611 Wed Dec 17 18:22:32 2014 139 | start ilpd-indian-liver 23629 Wed Dec 17 18:22:33 2014 140 | initial ['ilpd-indian-liver/ilpd-indian-liver.arff', u'ilpd-indian-liver', 583, 9, 2] 23629 Wed Dec 17 18:22:33 2014 141 | start image-segmentation 23628 Wed Dec 17 18:22:34 2014 142 | initial ['image-segmentation/image-segmentation_train.arff', u'image-segmentation', 210, 18, 7] 23628 Wed Dec 17 18:22:34 2014 143 | done ['ilpd-indian-liver/ilpd-indian-liver.arff', u'ilpd-indian-liver', 583, 'test-set cross-validation', 483, 100, 9, 2] 23629 Wed Dec 17 18:22:34 2014 144 | done ['hill-valley/hill-valley_train.arff', u'hill-valley', 606, 'test-set cross-validation', 506, 100, 100, 2] 23622 Wed Dec 17 18:22:35 2014 145 | start iris 23601 Wed Dec 17 18:22:39 2014 146 | initial ['iris/iris.arff', u'iris', 150, 4, 3] 23601 Wed Dec 17 18:22:39 2014 147 | start ionosphere 23620 Wed Dec 17 18:22:39 2014 148 | initial ['ionosphere/ionosphere.arff', u'ionosphere', 351, 33, 2] 23620 Wed Dec 17 18:22:39 2014 149 | start led-display 23629 Wed Dec 17 18:22:43 2014 150 | initial ['led-display/led-display.arff', u'led-display', 1000, 7, 10] 23629 Wed Dec 17 18:22:43 2014 151 | start lenses 23622 Wed Dec 17 18:22:43 2014 152 | initial ['lenses/lenses.arff', u'lenses', 24, 4, 3] 23622 Wed Dec 17 18:22:43 2014 153 | done ['acute-nephritis/acute-nephritis.arff', u'acute-nephritis', 120, 'leave-one-out cross-validation', 119, 100, 6, 2] 23603 Wed Dec 17 18:22:44 2014 154 | done ['acute-inflammation/acute-inflammation.arff', u'acute-inflammation', 120, 'leave-one-out cross-validation', 119, 100, 6, 2] 23602 Wed Dec 17 18:22:45 2014 155 | done ['echocardiogram/echocardiogram.arff', u'echocardiogram', 131, 'leave-one-out cross-validation', 130, 100, 10, 2] 23631 Wed Dec 17 18:22:46 2014 156 | done ['led-display/led-display.arff', u'led-display', 1000, 'test-set cross-validation', 900, 100, 7, 10] 23629 Wed Dec 17 18:22:48 2014 157 | done ['breast-cancer/breast-cancer.arff', u'breast-cancer', 286, 'leave-one-out cross-validation', 285, 100, 9, 2] 23613 Wed Dec 17 18:22:51 2014 158 | start libras 23602 Wed Dec 17 18:22:54 2014 159 | initial ['libras/libras.arff', u'libras', 360, 90, 15] 23602 Wed Dec 17 18:22:54 2014 160 | done ['lenses/lenses.arff', u'lenses', 24, 'leave-one-out cross-validation', 23, 24, 4, 3] 23622 Wed Dec 17 18:22:55 2014 161 | start letter 23603 Wed Dec 17 18:22:55 2014 162 | start low-res-spect 23631 Wed Dec 17 18:22:55 2014 163 | initial ['letter/letter.arff', u'letter', 20000, 16, 26] 23603 Wed Dec 17 18:22:55 2014 164 | initial ['low-res-spect/low-res-spect.arff', u'low-res-spect', 531, 100, 9] 23631 Wed Dec 17 18:22:55 2014 165 | done ['fertility/fertility.arff', u'fertility', 100, 'leave-one-out cross-validation', 99, 100, 9, 2] 23614 Wed Dec 17 18:22:57 2014 166 | start lung-cancer 23629 Wed Dec 17 18:22:57 2014 167 | initial ['lung-cancer/lung-cancer.arff', u'lung-cancer', 32, 56, 3] 23629 Wed Dec 17 18:22:57 2014 168 | done ['congressional-voting/congressional-voting.arff', u'congressional-voting', 435, 'leave-one-out cross-validation', 434, 100, 16, 2] 23623 Wed Dec 17 18:23:00 2014 169 | start lymphography 23613 Wed Dec 17 18:23:00 2014 170 | initial ['lymphography/lymphography.arff', u'lymphography', 148, 18, 4] 23613 Wed Dec 17 18:23:00 2014 171 | done ['haberman-survival/haberman-survival.arff', u'haberman-survival', 306, 'leave-one-out cross-validation', 305, 100, 3, 2] 23609 Wed Dec 17 18:23:03 2014 172 | start magic 23622 Wed Dec 17 18:23:05 2014 173 | initial ['magic/magic.arff', u'magic', 19020, 10, 2] 23622 Wed Dec 17 18:23:05 2014 174 | done ['hepatitis/hepatitis.arff', u'hepatitis', 155, 'leave-one-out cross-validation', 154, 100, 19, 2] 23619 Wed Dec 17 18:23:05 2014 175 | start mammographic 23614 Wed Dec 17 18:23:06 2014 176 | initial ['mammographic/mammographic.arff', u'mammographic', 961, 5, 2] 23614 Wed Dec 17 18:23:06 2014 177 | done ['breast-cancer-wisc-prog/breast-cancer-wisc-prog.arff', u'breast-cancer-wisc-prog', 198, 'leave-one-out cross-validation', 197, 100, 33, 2] 23616 Wed Dec 17 18:23:07 2014 178 | done ['mammographic/mammographic.arff', u'mammographic', 961, 'test-set cross-validation', 861, 100, 5, 2] 23614 Wed Dec 17 18:23:07 2014 179 | done ['heart-hungarian/heart-hungarian.arff', u'heart-hungarian', 294, 'leave-one-out cross-validation', 293, 100, 12, 2] 23605 Wed Dec 17 18:23:08 2014 180 | done ['lung-cancer/lung-cancer.arff', u'lung-cancer', 32, 'leave-one-out cross-validation', 31, 32, 56, 3] 23629 Wed Dec 17 18:23:13 2014 181 | done ['low-res-spect/low-res-spect.arff', u'low-res-spect', 531, 'test-set cross-validation', 431, 100, 100, 9] 23631 Wed Dec 17 18:23:14 2014 182 | start molec-biol-promoter 23609 Wed Dec 17 18:23:15 2014 183 | initial ['molec-biol-promoter/molec-biol-promoter.arff', u'molec-biol-promoter', 106, 57, 2] 23609 Wed Dec 17 18:23:15 2014 184 | done ['horse-colic/horse-colic_train.arff', u'horse-colic', 300, 'leave-one-out cross-validation', 299, 100, 25, 2] 23611 Wed Dec 17 18:23:16 2014 185 | start molec-biol-splice 23619 Wed Dec 17 18:23:18 2014 186 | initial ['molec-biol-splice/molec-biol-splice.arff', u'molec-biol-splice', 3190, 60, 3] 23619 Wed Dec 17 18:23:18 2014 187 | start monks-1 23616 Wed Dec 17 18:23:19 2014 188 | initial ['monks-1/monks-1_train.arff', u'monks-1', 124, 6, 2] 23616 Wed Dec 17 18:23:19 2014 189 | start monks-2 23614 Wed Dec 17 18:23:19 2014 190 | initial ['monks-2/monks-2_train.arff', u'monks-2', 169, 6, 2] 23614 Wed Dec 17 18:23:19 2014 191 | done ['hayes-roth/hayes-roth_train.arff', u'hayes-roth', 132, 'leave-one-out cross-validation', 131, 100, 3, 3] 23627 Wed Dec 17 18:23:19 2014 192 | start monks-3 23605 Wed Dec 17 18:23:20 2014 193 | initial ['monks-3/monks-3_train.arff', u'monks-3', 122, 6, 2] 23605 Wed Dec 17 18:23:20 2014 194 | done ['conn-bench-sonar-mines-rocks/conn-bench-sonar-mines-rocks.arff', u'conn-bench-sonar-mines-rocks', 208, 'leave-one-out cross-validation', 207, 100, 60, 2] 23624 Wed Dec 17 18:23:24 2014 195 | start musk-1 23631 Wed Dec 17 18:23:25 2014 196 | initial ['musk-1/musk-1.arff', u'musk-1', 476, 166, 2] 23631 Wed Dec 17 18:23:25 2014 197 | start mushroom 23629 Wed Dec 17 18:23:26 2014 198 | initial ['mushroom/mushroom.arff', u'mushroom', 8124, 21, 2] 23629 Wed Dec 17 18:23:26 2014 199 | done ['iris/iris.arff', u'iris', 150, 'leave-one-out cross-validation', 149, 100, 4, 3] 23601 Wed Dec 17 18:23:29 2014 200 | start nursery 23627 Wed Dec 17 18:23:33 2014 201 | initial ['nursery/nursery.arff', u'nursery', 12960, 8, 5] 23627 Wed Dec 17 18:23:33 2014 202 | start musk-2 23611 Wed Dec 17 18:23:34 2014 203 | initial ['musk-2/musk-2.arff', u'musk-2', 6598, 166, 2] 23611 Wed Dec 17 18:23:34 2014 204 | start oocytes_merluccius_nucleus_4d 23624 Wed Dec 17 18:23:37 2014 205 | initial ['oocytes_merluccius_nucleus_4d/oocytes_merluccius_nucleus_4d.arff', u'oocytes_merluccius_nucleus_4d', 1022, 41, 2] 23624 Wed Dec 17 18:23:37 2014 206 | done ['breast-tissue/breast-tissue.arff', u'breast-tissue', 106, 'leave-one-out cross-validation', 105, 100, 9, 6] 23617 Wed Dec 17 18:23:37 2014 207 | done ['molec-biol-splice/molec-biol-splice.arff', u'molec-biol-splice', 3190, 'test-set cross-validation', 3090, 100, 60, 3] 23619 Wed Dec 17 18:23:40 2014 208 | done ['oocytes_merluccius_nucleus_4d/oocytes_merluccius_nucleus_4d.arff', u'oocytes_merluccius_nucleus_4d', 1022, 'test-set cross-validation', 922, 100, 41, 2] 23624 Wed Dec 17 18:23:41 2014 209 | done ['mushroom/mushroom.arff', u'mushroom', 8124, 'test-set cross-validation', 8024, 100, 21, 2] 23629 Wed Dec 17 18:23:42 2014 210 | start miniboone 23623 Wed Dec 17 18:23:42 2014 211 | start oocytes_merluccius_states_2f 23601 Wed Dec 17 18:23:42 2014 212 | initial ['oocytes_merluccius_states_2f/oocytes_merluccius_states_2f.arff', u'oocytes_merluccius_states_2f', 1022, 25, 3] 23601 Wed Dec 17 18:23:42 2014 213 | initial ['miniboone/miniboone.arff', u'miniboone', 130064, 50, 2] 23623 Wed Dec 17 18:23:44 2014 214 | done ['ionosphere/ionosphere.arff', u'ionosphere', 351, 'leave-one-out cross-validation', 350, 100, 33, 2] 23620 Wed Dec 17 18:23:45 2014 215 | done ['heart-switzerland/heart-switzerland.arff', u'heart-switzerland', 123, 'leave-one-out cross-validation', 122, 100, 12, 5] 23610 Wed Dec 17 18:23:45 2014 216 | done ['oocytes_merluccius_states_2f/oocytes_merluccius_states_2f.arff', u'oocytes_merluccius_states_2f', 1022, 'test-set cross-validation', 922, 100, 25, 3] 23601 Wed Dec 17 18:23:48 2014 217 | start oocytes_trisopterus_nucleus_2f 23617 Wed Dec 17 18:23:50 2014 218 | initial ['oocytes_trisopterus_nucleus_2f/oocytes_trisopterus_nucleus_2f.arff', u'oocytes_trisopterus_nucleus_2f', 912, 25, 2] 23617 Wed Dec 17 18:23:50 2014 219 | done ['monks-1/monks-1_train.arff', u'monks-1', 124, 'leave-one-out cross-validation', 123, 100, 6, 2] 23616 Wed Dec 17 18:23:52 2014 220 | start oocytes_trisopterus_states_5b 23619 Wed Dec 17 18:23:53 2014 221 | initial ['oocytes_trisopterus_states_5b/oocytes_trisopterus_states_5b.arff', u'oocytes_trisopterus_states_5b', 912, 32, 3] 23619 Wed Dec 17 18:23:53 2014 222 | done ['heart-va/heart-va.arff', u'heart-va', 200, 'leave-one-out cross-validation', 199, 100, 12, 5] 23625 Wed Dec 17 18:23:53 2014 223 | done ['oocytes_trisopterus_nucleus_2f/oocytes_trisopterus_nucleus_2f.arff', u'oocytes_trisopterus_nucleus_2f', 912, 'test-set cross-validation', 812, 100, 25, 2] 23617 Wed Dec 17 18:23:53 2014 224 | done ['monks-3/monks-3_train.arff', u'monks-3', 122, 'leave-one-out cross-validation', 121, 100, 6, 2] 23605 Wed Dec 17 18:23:54 2014 225 | done ['molec-biol-promoter/molec-biol-promoter.arff', u'molec-biol-promoter', 106, 'leave-one-out cross-validation', 105, 100, 57, 2] 23609 Wed Dec 17 18:23:54 2014 226 | start optical 23624 Wed Dec 17 18:23:55 2014 227 | initial ['optical/optical_train.arff', u'optical', 3823, 62, 10] 23624 Wed Dec 17 18:23:55 2014 228 | done ['monks-2/monks-2_train.arff', u'monks-2', 169, 'leave-one-out cross-validation', 168, 100, 6, 2] 23614 Wed Dec 17 18:23:55 2014 229 | start ozone 23629 Wed Dec 17 18:23:55 2014 230 | initial ['ozone/ozone.arff', u'ozone', 2536, 72, 2] 23629 Wed Dec 17 18:23:55 2014 231 | start page-blocks 23620 Wed Dec 17 18:23:58 2014 232 | initial ['page-blocks/page-blocks.arff', u'page-blocks', 5473, 10, 5] 23620 Wed Dec 17 18:23:58 2014 233 | start parkinsons 23610 Wed Dec 17 18:23:58 2014 234 | initial ['parkinsons/parkinsons.arff', u'parkinsons', 195, 22, 2] 23610 Wed Dec 17 18:23:58 2014 235 | done ['oocytes_trisopterus_states_5b/oocytes_trisopterus_states_5b.arff', u'oocytes_trisopterus_states_5b', 912, 'test-set cross-validation', 812, 100, 32, 3] 23619 Wed Dec 17 18:23:59 2014 236 | start pendigits 23601 Wed Dec 17 18:24:02 2014 237 | initial ['pendigits/pendigits_train.arff', u'pendigits', 7494, 16, 10] 23601 Wed Dec 17 18:24:02 2014 238 | done ['ozone/ozone.arff', u'ozone', 2536, 'test-set cross-validation', 2436, 100, 72, 2] 23629 Wed Dec 17 18:24:03 2014 239 | start pima 23616 Wed Dec 17 18:24:05 2014 240 | initial ['pima/pima.arff', u'pima', 768, 8, 2] 23616 Wed Dec 17 18:24:05 2014 241 | start pittsburg-bridges-REL-L 23617 Wed Dec 17 18:24:06 2014 242 | initial ['pittsburg-bridges-REL-L/pittsburg-bridges-REL-L.arff', u'pittsburg-bridges-REL-L', 103, 7, 3] 23617 Wed Dec 17 18:24:06 2014 243 | done ['pima/pima.arff', u'pima', 768, 'test-set cross-validation', 668, 100, 8, 2] 23616 Wed Dec 17 18:24:06 2014 244 | start pittsburg-bridges-MATERIAL 23625 Wed Dec 17 18:24:06 2014 245 | initial ['pittsburg-bridges-MATERIAL/pittsburg-bridges-MATERIAL.arff', u'pittsburg-bridges-MATERIAL', 106, 7, 3] 23625 Wed Dec 17 18:24:06 2014 246 | done ['heart-cleveland/heart-cleveland.arff', u'heart-cleveland', 303, 'leave-one-out cross-validation', 302, 100, 13, 5] 23618 Wed Dec 17 18:24:06 2014 247 | done ['lymphography/lymphography.arff', u'lymphography', 148, 'leave-one-out cross-validation', 147, 100, 18, 4] 23613 Wed Dec 17 18:24:07 2014 248 | start pittsburg-bridges-SPAN 23605 Wed Dec 17 18:24:07 2014 249 | initial ['pittsburg-bridges-SPAN/pittsburg-bridges-SPAN.arff', u'pittsburg-bridges-SPAN', 92, 7, 3] 23605 Wed Dec 17 18:24:07 2014 250 | done ['glass/glass.arff', u'glass', 214, 'leave-one-out cross-validation', 213, 100, 9, 6] 23615 Wed Dec 17 18:24:07 2014 251 | start pittsburg-bridges-T-OR-D 23609 Wed Dec 17 18:24:08 2014 252 | initial ['pittsburg-bridges-T-OR-D/pittsburg-bridges-T-OR-D.arff', u'pittsburg-bridges-T-OR-D', 102, 7, 2] 23609 Wed Dec 17 18:24:08 2014 253 | start pittsburg-bridges-TYPE 23614 Wed Dec 17 18:24:08 2014 254 | initial ['pittsburg-bridges-TYPE/pittsburg-bridges-TYPE.arff', u'pittsburg-bridges-TYPE', 105, 7, 6] 23614 Wed Dec 17 18:24:08 2014 255 | start planning 23619 Wed Dec 17 18:24:12 2014 256 | initial ['planning/planning.arff', u'planning', 182, 12, 2] 23619 Wed Dec 17 18:24:12 2014 257 | done ['page-blocks/page-blocks.arff', u'page-blocks', 5473, 'test-set cross-validation', 5373, 100, 10, 5] 23620 Wed Dec 17 18:24:15 2014 258 | start plant-margin 23629 Wed Dec 17 18:24:18 2014 259 | initial ['plant-margin/plant-margin.arff', u'plant-margin', 1600, 64, 100] 23629 Wed Dec 17 18:24:18 2014 260 | done ['ecoli/ecoli.arff', u'ecoli', 336, 'leave-one-out cross-validation', 335, 100, 7, 8] 23632 Wed Dec 17 18:24:20 2014 261 | start plant-shape 23616 Wed Dec 17 18:24:21 2014 262 | initial ['plant-shape/plant-shape.arff', u'plant-shape', 1600, 64, 100] 23616 Wed Dec 17 18:24:21 2014 263 | start post-operative 23613 Wed Dec 17 18:24:21 2014 264 | initial ['post-operative/post-operative.arff', u'post-operative', 90, 8, 3] 23613 Wed Dec 17 18:24:21 2014 265 | start primary-tumor 23615 Wed Dec 17 18:24:21 2014 266 | initial ['primary-tumor/primary-tumor.arff', u'primary-tumor', 330, 17, 15] 23615 Wed Dec 17 18:24:21 2014 267 | start plant-texture 23618 Wed Dec 17 18:24:22 2014 268 | initial ['plant-texture/plant-texture.arff', u'plant-texture', 1599, 64, 100] 23618 Wed Dec 17 18:24:22 2014 269 | done ['nursery/nursery.arff', u'nursery', 12960, 'test-set cross-validation', 12860, 100, 8, 5] 23627 Wed Dec 17 18:24:23 2014 270 | start ringnorm 23620 Wed Dec 17 18:24:30 2014 271 | initial ['ringnorm/ringnorm.arff', u'ringnorm', 7400, 20, 2] 23620 Wed Dec 17 18:24:30 2014 272 | done ['dermatology/dermatology.arff', u'dermatology', 366, 'leave-one-out cross-validation', 365, 100, 34, 6] 23630 Wed Dec 17 18:24:31 2014 273 | start seeds 23632 Wed Dec 17 18:24:34 2014 274 | initial ['seeds/seeds.arff', u'seeds', 210, 7, 3] 23632 Wed Dec 17 18:24:34 2014 275 | done ['musk-2/musk-2.arff', u'musk-2', 6598, 'test-set cross-validation', 6498, 100, 166, 2] 23611 Wed Dec 17 18:24:39 2014 276 | start semeion 23627 Wed Dec 17 18:24:39 2014 277 | initial ['semeion/semeion.arff', u'semeion', 1593, 256, 10] 23627 Wed Dec 17 18:24:39 2014 278 | done ['image-segmentation/image-segmentation_train.arff', u'image-segmentation', 210, 'leave-one-out cross-validation', 209, 100, 18, 7] 23628 Wed Dec 17 18:24:40 2014 279 | done ['pittsburg-bridges-T-OR-D/pittsburg-bridges-T-OR-D.arff', u'pittsburg-bridges-T-OR-D', 102, 'leave-one-out cross-validation', 101, 100, 7, 2] 23609 Wed Dec 17 18:24:42 2014 280 | done ['parkinsons/parkinsons.arff', u'parkinsons', 195, 'leave-one-out cross-validation', 194, 100, 22, 2] 23610 Wed Dec 17 18:24:44 2014 281 | start soybean 23630 Wed Dec 17 18:24:45 2014 282 | initial ['soybean/soybean_train.arff', u'soybean', 307, 35, 18] 23630 Wed Dec 17 18:24:45 2014 283 | done ['flags/flags.arff', u'flags', 194, 'leave-one-out cross-validation', 193, 100, 28, 8] 23612 Wed Dec 17 18:24:53 2014 284 | done ['pendigits/pendigits_train.arff', u'pendigits', 7494, 'test-set cross-validation', 7394, 100, 16, 10] 23601 Wed Dec 17 18:24:53 2014 285 | done ['optical/optical_train.arff', u'optical', 3823, 'test-set cross-validation', 3723, 100, 62, 10] 23624 Wed Dec 17 18:24:54 2014 286 | done ['planning/planning.arff', u'planning', 182, 'leave-one-out cross-validation', 181, 100, 12, 2] 23619 Wed Dec 17 18:24:55 2014 287 | start spect 23628 Wed Dec 17 18:24:55 2014 288 | initial ['spect/spect_train.arff', u'spect', 79, 22, 2] 23628 Wed Dec 17 18:24:55 2014 289 | done ['pittsburg-bridges-SPAN/pittsburg-bridges-SPAN.arff', u'pittsburg-bridges-SPAN', 92, 'leave-one-out cross-validation', 91, 92, 7, 3] 23605 Wed Dec 17 18:24:55 2014 290 | start spambase 23611 Wed Dec 17 18:24:56 2014 291 | initial ['spambase/spambase.arff', u'spambase', 4601, 57, 2] 23611 Wed Dec 17 18:24:56 2014 292 | start spectf 23609 Wed Dec 17 18:24:56 2014 293 | initial ['spectf/spectf_train.arff', u'spectf', 80, 44, 2] 23609 Wed Dec 17 18:24:56 2014 294 | done ['pittsburg-bridges-MATERIAL/pittsburg-bridges-MATERIAL.arff', u'pittsburg-bridges-MATERIAL', 106, 'leave-one-out cross-validation', 105, 100, 7, 3] 23625 Wed Dec 17 18:24:57 2014 295 | done ['pittsburg-bridges-REL-L/pittsburg-bridges-REL-L.arff', u'pittsburg-bridges-REL-L', 103, 'leave-one-out cross-validation', 102, 100, 7, 3] 23617 Wed Dec 17 18:24:58 2014 296 | start statlog-australian-credit 23610 Wed Dec 17 18:24:58 2014 297 | initial ['statlog-australian-credit/statlog-australian-credit.arff', u'statlog-australian-credit', 690, 14, 2] 23610 Wed Dec 17 18:24:58 2014 298 | done ['ringnorm/ringnorm.arff', u'ringnorm', 7400, 'test-set cross-validation', 7300, 100, 20, 2] 23620 Wed Dec 17 18:24:59 2014 299 | done ['statlog-australian-credit/statlog-australian-credit.arff', u'statlog-australian-credit', 690, 'test-set cross-validation', 590, 100, 14, 2] 23610 Wed Dec 17 18:25:00 2014 300 | start statlog-german-credit 23612 Wed Dec 17 18:25:08 2014 301 | initial ['statlog-german-credit/statlog-german-credit.arff', u'statlog-german-credit', 1000, 24, 2] 23612 Wed Dec 17 18:25:08 2014 302 | start statlog-heart 23601 Wed Dec 17 18:25:08 2014 303 | initial ['statlog-heart/statlog-heart.arff', u'statlog-heart', 270, 13, 2] 23601 Wed Dec 17 18:25:08 2014 304 | done ['post-operative/post-operative.arff', u'post-operative', 90, 'leave-one-out cross-validation', 89, 90, 8, 3] 23613 Wed Dec 17 18:25:08 2014 305 | start statlog-image 23624 Wed Dec 17 18:25:09 2014 306 | initial ['statlog-image/statlog-image.arff', u'statlog-image', 2310, 18, 7] 23624 Wed Dec 17 18:25:09 2014 307 | done ['statlog-german-credit/statlog-german-credit.arff', u'statlog-german-credit', 1000, 'test-set cross-validation', 900, 100, 24, 2] 23612 Wed Dec 17 18:25:10 2014 308 | start statlog-landsat 23619 Wed Dec 17 18:25:10 2014 309 | initial ['statlog-landsat/statlog-landsat_train.arff', u'statlog-landsat', 4435, 36, 6] 23619 Wed Dec 17 18:25:10 2014 310 | start statlog-shuttle 23605 Wed Dec 17 18:25:12 2014 311 | start statlog-vehicle 23625 Wed Dec 17 18:25:12 2014 312 | initial ['statlog-vehicle/statlog-vehicle.arff', u'statlog-vehicle', 846, 18, 4] 23625 Wed Dec 17 18:25:12 2014 313 | initial ['statlog-shuttle/statlog-shuttle_train.arff', u'statlog-shuttle', 43500, 9, 7] 23605 Wed Dec 17 18:25:12 2014 314 | start steel-plates 23617 Wed Dec 17 18:25:13 2014 315 | initial ['steel-plates/steel-plates.arff', u'steel-plates', 1941, 27, 7] 23617 Wed Dec 17 18:25:13 2014 316 | done ['spambase/spambase.arff', u'spambase', 4601, 'test-set cross-validation', 4501, 100, 57, 2] 23611 Wed Dec 17 18:25:14 2014 317 | start synthetic-control 23620 Wed Dec 17 18:25:14 2014 318 | initial ['synthetic-control/synthetic-control.arff', u'synthetic-control', 600, 60, 6] 23620 Wed Dec 17 18:25:14 2014 319 | start teaching 23610 Wed Dec 17 18:25:14 2014 320 | initial ['teaching/teaching.arff', u'teaching', 151, 5, 3] 23610 Wed Dec 17 18:25:14 2014 321 | done ['statlog-vehicle/statlog-vehicle.arff', u'statlog-vehicle', 846, 'test-set cross-validation', 746, 100, 18, 4] 23625 Wed Dec 17 18:25:16 2014 322 | done ['spect/spect_train.arff', u'spect', 79, 'leave-one-out cross-validation', 78, 79, 22, 2] 23628 Wed Dec 17 18:25:19 2014 323 | done ['synthetic-control/synthetic-control.arff', u'synthetic-control', 600, 'test-set cross-validation', 500, 100, 60, 6] 23620 Wed Dec 17 18:25:23 2014 324 | done ['statlog-image/statlog-image.arff', u'statlog-image', 2310, 'test-set cross-validation', 2210, 100, 18, 7] 23624 Wed Dec 17 18:25:23 2014 325 | start thyroid 23613 Wed Dec 17 18:25:23 2014 326 | initial ['thyroid/thyroid_train.arff', u'thyroid', 3772, 21, 3] 23613 Wed Dec 17 18:25:23 2014 327 | start tic-tac-toe 23612 Wed Dec 17 18:25:25 2014 328 | initial ['tic-tac-toe/tic-tac-toe.arff', u'tic-tac-toe', 958, 9, 2] 23612 Wed Dec 17 18:25:25 2014 329 | done ['spectf/spectf_train.arff', u'spectf', 80, 'leave-one-out cross-validation', 79, 80, 44, 2] 23609 Wed Dec 17 18:25:26 2014 330 | done ['tic-tac-toe/tic-tac-toe.arff', u'tic-tac-toe', 958, 'test-set cross-validation', 858, 100, 9, 2] 23612 Wed Dec 17 18:25:26 2014 331 | done ['pittsburg-bridges-TYPE/pittsburg-bridges-TYPE.arff', u'pittsburg-bridges-TYPE', 105, 'leave-one-out cross-validation', 104, 100, 7, 6] 23614 Wed Dec 17 18:25:26 2014 332 | start titanic 23611 Wed Dec 17 18:25:29 2014 333 | initial ['titanic/titanic.arff', u'titanic', 2201, 3, 2] 23611 Wed Dec 17 18:25:29 2014 334 | start trains 23625 Wed Dec 17 18:25:30 2014 335 | initial ['trains/trains.arff', u'trains', 10, 29, 2] 23625 Wed Dec 17 18:25:30 2014 336 | skip (could not split) trains/trains.arff 23625 Wed Dec 17 18:25:30 2014 337 | done ['thyroid/thyroid_train.arff', u'thyroid', 3772, 'test-set cross-validation', 3672, 100, 21, 3] 23613 Wed Dec 17 18:25:32 2014 338 | done ['steel-plates/steel-plates.arff', u'steel-plates', 1941, 'test-set cross-validation', 1841, 100, 27, 7] 23617 Wed Dec 17 18:25:32 2014 339 | done ['titanic/titanic.arff', u'titanic', 2201, 'test-set cross-validation', 2101, 100, 3, 2] 23611 Wed Dec 17 18:25:32 2014 340 | start twonorm 23628 Wed Dec 17 18:25:35 2014 341 | initial ['twonorm/twonorm.arff', u'twonorm', 7400, 20, 2] 23628 Wed Dec 17 18:25:35 2014 342 | done ['seeds/seeds.arff', u'seeds', 210, 'leave-one-out cross-validation', 209, 100, 7, 3] 23632 Wed Dec 17 18:25:36 2014 343 | start column_3C_weka 23620 Wed Dec 17 18:25:37 2014 344 | initial ['vertebral-column-2clases/datos_orixinais/column_3C_weka.arff', u'column_3C_weka', 310, 6, 3] 23620 Wed Dec 17 18:25:37 2014 345 | start column_2C_weka 23624 Wed Dec 17 18:25:38 2014 346 | initial ['vertebral-column-2clases/datos_orixinais/column_2C_weka.arff', u'column_2C_weka', 310, 6, 2] 23624 Wed Dec 17 18:25:38 2014 347 | done ['statlog-landsat/statlog-landsat_train.arff', u'statlog-landsat', 4435, 'test-set cross-validation', 4335, 100, 36, 6] 23619 Wed Dec 17 18:25:41 2014 348 | start vertebral-column-2clases 23609 Wed Dec 17 18:25:41 2014 349 | initial ['vertebral-column-2clases/vertebral-column-2clases.arff', u'vertebral-column-2clases', 310, 6, 2] 23609 Wed Dec 17 18:25:41 2014 350 | start vertebral-column-3clases 23612 Wed Dec 17 18:25:41 2014 351 | initial ['vertebral-column-3clases/vertebral-column-3clases.arff', u'vertebral-column-3clases', 310, 6, 3] 23612 Wed Dec 17 18:25:41 2014 352 | start wall-following 23614 Wed Dec 17 18:25:42 2014 353 | initial ['wall-following/wall-following.arff', u'wall-following', 5456, 24, 4] 23614 Wed Dec 17 18:25:42 2014 354 | done ['statlog-heart/statlog-heart.arff', u'statlog-heart', 270, 'leave-one-out cross-validation', 269, 100, 13, 2] 23601 Wed Dec 17 18:25:46 2014 355 | start waveform 23625 Wed Dec 17 18:25:46 2014 356 | initial ['waveform/waveform.arff', u'waveform', 5000, 21, 3] 23625 Wed Dec 17 18:25:46 2014 357 | start waveform-noise 23613 Wed Dec 17 18:25:47 2014 358 | initial ['waveform-noise/waveform-noise.arff', u'waveform-noise', 5000, 40, 3] 23613 Wed Dec 17 18:25:47 2014 359 | start wine 23617 Wed Dec 17 18:25:48 2014 360 | initial ['wine/wine.arff', u'wine', 178, 13, 3] 23617 Wed Dec 17 18:25:48 2014 361 | start wine-quality-red 23611 Wed Dec 17 18:25:48 2014 362 | initial ['wine-quality-red/wine-quality-red.arff', u'wine-quality-red', 1599, 11, 6] 23611 Wed Dec 17 18:25:48 2014 363 | start wine-quality-white 23632 Wed Dec 17 18:25:51 2014 364 | initial ['wine-quality-white/wine-quality-white.arff', u'wine-quality-white', 4898, 11, 7] 23632 Wed Dec 17 18:25:51 2014 365 | done ['twonorm/twonorm.arff', u'twonorm', 7400, 'test-set cross-validation', 7300, 100, 20, 2] 23628 Wed Dec 17 18:25:52 2014 366 | done ['wine-quality-red/wine-quality-red.arff', u'wine-quality-red', 1599, 'test-set cross-validation', 1499, 100, 11, 6] 23611 Wed Dec 17 18:25:56 2014 367 | start yeast 23619 Wed Dec 17 18:25:57 2014 368 | initial ['yeast/yeast.arff', u'yeast', 1484, 8, 10] 23619 Wed Dec 17 18:25:57 2014 369 | done ['semeion/semeion.arff', u'semeion', 1593, 'test-set cross-validation', 1493, 100, 256, 10] 23627 Wed Dec 17 18:26:00 2014 370 | start zoo 23601 Wed Dec 17 18:26:01 2014 371 | initial ['zoo/zoo.arff', u'zoo', 101, 16, 7] 23601 Wed Dec 17 18:26:01 2014 372 | done ['yeast/yeast.arff', u'yeast', 1484, 'test-set cross-validation', 1384, 100, 8, 10] 23619 Wed Dec 17 18:26:05 2014 373 | done ['teaching/teaching.arff', u'teaching', 151, 'leave-one-out cross-validation', 150, 100, 5, 3] 23610 Wed Dec 17 18:26:06 2014 374 | done ['waveform/waveform.arff', u'waveform', 5000, 'test-set cross-validation', 4900, 100, 21, 3] 23625 Wed Dec 17 18:26:06 2014 375 | done ['vertebral-column-2clases/datos_orixinais/column_2C_weka.arff', u'column_2C_weka', 310, 'leave-one-out cross-validation', 309, 100, 6, 2] 23624 Wed Dec 17 18:26:14 2014 376 | done ['waveform-noise/waveform-noise.arff', u'waveform-noise', 5000, 'test-set cross-validation', 4900, 100, 40, 3] 23613 Wed Dec 17 18:26:17 2014 377 | done ['vertebral-column-2clases/vertebral-column-2clases.arff', u'vertebral-column-2clases', 310, 'leave-one-out cross-validation', 309, 100, 6, 2] 23609 Wed Dec 17 18:26:18 2014 378 | done ['magic/magic.arff', u'magic', 19020, 'test-set cross-validation', 18920, 100, 10, 2] 23622 Wed Dec 17 18:26:20 2014 379 | done ['wall-following/wall-following.arff', u'wall-following', 5456, 'test-set cross-validation', 5356, 100, 24, 4] 23614 Wed Dec 17 18:26:21 2014 380 | done ['wine-quality-white/wine-quality-white.arff', u'wine-quality-white', 4898, 'test-set cross-validation', 4798, 100, 11, 7] 23632 Wed Dec 17 18:26:25 2014 381 | done ['wine/wine.arff', u'wine', 178, 'leave-one-out cross-validation', 177, 100, 13, 3] 23617 Wed Dec 17 18:26:34 2014 382 | done ['musk-1/musk-1.arff', u'musk-1', 476, 'leave-one-out cross-validation', 475, 100, 166, 2] 23631 Wed Dec 17 18:26:38 2014 383 | done ['audiology-std/audiology-std_train.arff', u'audiology-std', 171, 'leave-one-out cross-validation', 170, 100, 59, 18] 23608 Wed Dec 17 18:26:39 2014 384 | done ['vertebral-column-2clases/datos_orixinais/column_3C_weka.arff', u'column_3C_weka', 310, 'leave-one-out cross-validation', 309, 100, 6, 3] 23620 Wed Dec 17 18:26:40 2014 385 | done ['vertebral-column-3clases/vertebral-column-3clases.arff', u'vertebral-column-3clases', 310, 'leave-one-out cross-validation', 309, 100, 6, 3] 23612 Wed Dec 17 18:26:42 2014 386 | done ['audiology-std/con_patrons_repetidos/audiology-std_train.arff', u'audiology-std', 194, 'leave-one-out cross-validation', 193, 100, 59, 18] 23607 Wed Dec 17 18:26:50 2014 387 | done ['zoo/zoo.arff', u'zoo', 101, 'leave-one-out cross-validation', 100, 100, 16, 7] 23601 Wed Dec 17 18:26:54 2014 388 | done ['plant-texture/plant-texture.arff', u'plant-texture', 1599, 'test-set cross-validation', 1499, 100, 64, 100] 23618 Wed Dec 17 18:27:17 2014 389 | done ['plant-margin/plant-margin.arff', u'plant-margin', 1600, 'test-set cross-validation', 1500, 100, 64, 100] 23629 Wed Dec 17 18:27:28 2014 390 | done ['primary-tumor/primary-tumor.arff', u'primary-tumor', 330, 'leave-one-out cross-validation', 329, 100, 17, 15] 23615 Wed Dec 17 18:27:40 2014 391 | done ['soybean/soybean_train.arff', u'soybean', 307, 'leave-one-out cross-validation', 306, 100, 35, 18] 23630 Wed Dec 17 18:28:22 2014 392 | done ['statlog-shuttle/statlog-shuttle_train.arff', u'statlog-shuttle', 43500, 'test-set cross-validation', 43400, 100, 9, 7] 23605 Wed Dec 17 18:29:00 2014 393 | done ['adult/adult_train.arff', u'adult', 32561, 'test-set cross-validation', 32461, 100, 14, 2] 23604 Wed Dec 17 18:29:25 2014 394 | done ['plant-shape/plant-shape.arff', u'plant-shape', 1600, 'test-set cross-validation', 1500, 100, 64, 100] 23616 Wed Dec 17 18:29:30 2014 395 | done ['letter/letter.arff', u'letter', 20000, 'test-set cross-validation', 19900, 100, 16, 26] 23603 Wed Dec 17 18:29:36 2014 396 | done ['chess-krvk/chess-krvk.arff', u'chess-krvk', 28056, 'test-set cross-validation', 27956, 100, 6, 18] 23621 Wed Dec 17 18:33:45 2014 397 | done ['libras/libras.arff', u'libras', 360, 'leave-one-out cross-validation', 359, 100, 90, 15] 23602 Wed Dec 17 18:34:41 2014 398 | done ['arrhythmia/arrhythmia.arff', u'arrhythmia', 452, 'leave-one-out cross-validation', 451, 100, 262, 13] 23606 Wed Dec 17 18:39:41 2014 399 | done ['connect-4/connect-4.arff', u'connect-4', 67557, 'test-set cross-validation', 67457, 100, 42, 2] 23626 Wed Dec 17 19:28:23 2014 400 | done ['miniboone/miniboone.arff', u'miniboone', 130064, 'test-set cross-validation', 129964, 100, 50, 2] 23623 Wed Dec 17 21:56:09 2014 401 | done processing pool 23595 Wed Dec 17 21:56:09 2014 402 | done 23595 Wed Dec 17 21:56:28 2014 403 | --------------------------------------------------------------------------------