├── README.md ├── .gitignore ├── initState.Rdata ├── machLearn.Rproj ├── www ├── busy.js └── style.css ├── init.R ├── LICENSE └── app.R /README.md: -------------------------------------------------------------------------------- 1 | # machLearn Shiny app 2 | 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | -------------------------------------------------------------------------------- /initState.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davesteps/machLearn/HEAD/initState.Rdata -------------------------------------------------------------------------------- /machLearn.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /www/busy.js: -------------------------------------------------------------------------------- 1 | setInterval(function(){ 2 | if ($('html').attr('class')=='shiny-busy') { 3 | setTimeout(function() { 4 | if ($('html').attr('class')=='shiny-busy') { 5 | $('div.busy').show() 6 | } 7 | }, 1500) 8 | } else { 9 | $('div.busy').hide() 10 | } 11 | }, 100) -------------------------------------------------------------------------------- /www/style.css: -------------------------------------------------------------------------------- 1 | div.busy { 2 | position:absolute; 3 | top: 40%; 4 | left: 50%; 5 | margin-top: -100px; 6 | margin-left: -50px; 7 | display:none; 8 | background: rgba(230, 230, 230, .8); 9 | text-align: center; 10 | padding-top: 20px; 11 | padding-left: 30px; 12 | padding-bottom: 40px; 13 | padding-right: 30px; 14 | border-radius: 5px; 15 | } 16 | 17 | 18 | div.foot { 19 | position:absolute; 20 | bottom: 0%; 21 | left: 1%; 22 | text-align: center; 23 | padding-top: 0px; 24 | padding-left: 30px; 25 | padding-bottom: 0px; 26 | padding-right: 30px; 27 | border-radius: 5px; 28 | 29 | } 30 | 31 | div.container 32 | { 33 | width:840px; 34 | height:317px; 35 | overflow:scroll; /* if you don't want a scrollbar, set to hidden */ 36 | overflow-x:hidden; /* hides horizontal scrollbar on newer browsers */ 37 | 38 | /* resize and min-height are optional, allows user to resize viewable area */ 39 | -webkit-resize:vertical; 40 | -moz-resize:vertical; 41 | resize:vertical; 42 | min-height:317px; 43 | } 44 | 45 | iframe.embed 46 | { 47 | width:1000px; /* set this to approximate width of entire page you're embedding */ 48 | height:2000px; /* determines where the bottom of the page cuts off */ 49 | margin-left:-183px; /* clipping left side of page */ 50 | margin-top:-244px; /* clipping top of page */ 51 | overflow:hidden; 52 | 53 | /* resize seems to inherit in at least Firefox */ 54 | -webkit-resize:none; 55 | -moz-resize:none; 56 | resize:none; 57 | } 58 | -------------------------------------------------------------------------------- /init.R: -------------------------------------------------------------------------------- 1 | library(shiny) 2 | require(shinyBS) 3 | require(shinydashboard) 4 | require(shinyjs) 5 | require(caret) 6 | require(plyr) 7 | require(dplyr) 8 | require(tidyr) 9 | require(Cairo) 10 | require(raster) 11 | require(gstat) 12 | require(wesanderson) 13 | require(nnet) 14 | require(randomForest) 15 | 16 | # car, foreach, methods, plyr, nlme, reshape2, stats, stats4, utils, grDevices 17 | 18 | 19 | # Not all of these are required but shinyapps.io was crashing and 20 | # importing one of these solved the issue 21 | require(kernlab) 22 | require(klaR) 23 | require(vcd) 24 | require(e1071) 25 | require(gam) 26 | require(ipred) 27 | require(MASS) 28 | require(ellipse) 29 | require(mda) 30 | require(mgcv) 31 | require(mlbench) 32 | require(party) 33 | require(MLmetrics) 34 | require(Cubist) 35 | require(testthat) 36 | 37 | 38 | data(meuse) 39 | 40 | dmnds <- diamonds#[sample(1:nrow(diamonds),1e3),] 41 | 42 | # leaf <- read.csv('/Users/davesteps/Desktop/kaggle_data/leaf/train.csv') 43 | 44 | datasets <- list( 45 | 'iris'=iris, 46 | 'cars'=mtcars, 47 | 'meuse'=meuse, 48 | 'diamonds'=data.frame(dmnds), 49 | 'Boston'=Boston 50 | # 'leaf'=leaf 51 | # 'midwest'=data.frame(midwest), 52 | # 'mpg'=data.frame(mpg), 53 | # 'msleep'=data.frame(msleep), 54 | # 'txhousing'=data.frame(txhousing) 55 | ) 56 | 57 | tuneParams <- list( 58 | 'svmLinear'=data.frame(C=c(0.01,0.1,1)), 59 | 'svmPoly'= expand.grid(degree=1:3,scale=c(0.01,0.1),C=c(0.25,0.5,1)), 60 | 'nnet'=expand.grid(size=c(1,3,5),decay=c(0.01,0.1,1)), 61 | 'rf'=data.frame(mtry=c(2,3,4)), 62 | 'knn'=data.frame(k=c(1,3,5,7,9)), 63 | 'nb'=expand.grid(usekernel=c(T,F),adjust=c(0.01,0.1,1),fL=c(0.01,0.1,1)), 64 | 'glm'=NULL#data.frame() 65 | ) 66 | 67 | 68 | mdls <- list('svmLinear'='svmLinear', 69 | 'svmPoly'='svmPoly', 70 | 'Neural Network'='nnet', 71 | 'randomForest'='rf', 72 | 'k-NN'='knn', 73 | 'Naive Bayes'='nb', 74 | 'GLM'='glm', 75 | 'GAM'='gam') 76 | #multinom 77 | 78 | mdli <- list( 79 | 'Regression'=c(T,T,T,T,T,F,T,F), 80 | 'Classification'=c(T,T,T,T,T,T,F,F) 81 | ) 82 | 83 | reg.mdls <- mdls[mdli[['Regression']]] 84 | cls.mdls <- mdls[mdli[['Classification']]] 85 | 86 | 87 | # 88 | pal <- c('#b2df8a','#33a02c','#ff7f00','#cab2d6','#b15928', 89 | '#fdbf6f','#a6cee3','#fb9a99','#1f78b4','#e31a1c') 90 | set.seed(3) 91 | pal <- sample(pal,length(mdls),F) 92 | names(pal) <- mdls 93 | 94 | modelCSS <- function(item,col){ 95 | tags$style(HTML(paste0(".selectize-input [data-value=\"",item,"\"] {background: ",col," !important}"))) 96 | } 97 | 98 | 99 | tableCSS <- function(model,col){ 100 | paste0('if (data[6] == "',model,'") 101 | $("td", row).css("background", "',col,'");') 102 | } 103 | 104 | label.help <- function(label,id){ 105 | HTML(paste0(label,actionLink(id,label=NULL,icon=icon('question-circle')))) 106 | } 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /app.R: -------------------------------------------------------------------------------- 1 | source('init.R',local = T) 2 | 3 | 4 | # Server ------------------------------------------------------------------ 5 | 6 | server <- function(input, output,session) { 7 | 8 | 9 | CVtune <- readRDS('initState.Rdata') 10 | makeReactiveBinding('CVtune') 11 | 12 | rawdata <- reactive({ 13 | datasets[[input$dataset]] 14 | }) 15 | 16 | observe({ 17 | updateSelectizeInput(session,'yvar',choices=names(rawdata()),selected = names(rawdata())[1]) 18 | }) 19 | 20 | 21 | observe({ 22 | nms <- names(rawdata())[names(rawdata())!=input$yvar] 23 | updateSelectizeInput(session,'xvar',choices=nms,selected = nms) 24 | }) 25 | 26 | dataTrain <- NULL 27 | dataTest <- NULL 28 | 29 | makeReactiveBinding('dataTrain') 30 | # makeReactiveBinding('dataTest') 31 | modelType <- 'Regression' 32 | 33 | makeReactiveBinding('modelType') 34 | 35 | observeEvent(modelType,{ 36 | 37 | if(modelType=='Regression'){ 38 | updateSelectizeInput(session,'slt_algo',choices = reg.mdls,selected = reg.mdls) 39 | } else { 40 | updateSelectizeInput(session,'slt_algo',choices = cls.mdls,selected = cls.mdls) 41 | 42 | } 43 | }) 44 | 45 | observe({ 46 | 47 | yvar <- input$yvar 48 | xvars <- input$xvar 49 | testsize <- input$sld_testsplit 50 | 51 | if(is.null(yvar)||yvar=='') 52 | return(NULL) 53 | 54 | # extract y and X from raw data 55 | y <- isolate(rawdata()[,yvar]) 56 | X <- isolate(rawdata()[,xvars]) 57 | 58 | # deal with NA values 59 | yi <- !is.na(y) 60 | Xi <- complete.cases(X) 61 | 62 | df2 <- cbind(y,X)[yi&Xi,] 63 | 64 | 65 | c <- class(df2$y) 66 | lvls <- length(unique(df2$y)) 67 | if(lvls<10|(c!='numeric'&c!='integer')){ 68 | modelType <<-'Classification' 69 | df2$y <- factor(df2$y) 70 | } else { 71 | modelType <<-'Regression' 72 | if(input$chk_logY){df2$y <- log(df2$y+0.1)} 73 | } 74 | 75 | trainIndex <- createDataPartition(df2$y, 76 | p = 1-(testsize/100), 77 | list = FALSE, 78 | times = 1) 79 | isolate({ 80 | dataTrain <<- df2[ trainIndex,] 81 | dataTest <<- df2[-trainIndex,] 82 | }) 83 | }) 84 | 85 | 86 | 87 | 88 | observeEvent(input$btn_train,{ 89 | 90 | disable('btn_train') 91 | on.exit(enable('btn_train')) 92 | 93 | mdls <- isolate(input$slt_algo) 94 | 95 | fitControl <- trainControl(method = "cv",savePredictions = T, 96 | number = as.integer(input$rdo_CVtype)) 97 | 98 | trainArgs <- list( 99 | 'svmLinear'=list(form=y ~ ., 100 | data = dataTrain, 101 | preProcess = c('scale','center'), 102 | method = 'svmLinear', 103 | trControl = fitControl, 104 | tuneGrid=tuneParams[['svmLinear']]), 105 | 'svmPoly'= list(form=y ~ ., 106 | data = dataTrain, 107 | preProcess = c('scale','center'), 108 | method = 'svmPoly', 109 | trControl = fitControl, 110 | tuneGrid=tuneParams[['svmPoly']]), 111 | 'nnet'=list(form=y ~ ., 112 | data = dataTrain, 113 | preProcess = c('scale','center'), 114 | method = 'nnet', 115 | trControl = fitControl, 116 | tuneGrid=tuneParams[['nnet']], 117 | linout=T), 118 | 'rf'=list(form=y ~ ., 119 | data = dataTrain, 120 | preProcess = c('scale','center'), 121 | method = 'rf', 122 | trControl = fitControl, 123 | tuneGrid=tuneParams[['rf']], 124 | ntree=1e3), 125 | 'knn'=list(form=y ~ ., 126 | data = dataTrain, 127 | preProcess = c('scale','center'), 128 | method = 'knn', 129 | trControl = fitControl, 130 | tuneGrid=tuneParams[['knn']]), 131 | 'nb'=list(form=y ~ ., 132 | data = dataTrain, 133 | preProcess = c('scale','center'), 134 | method = 'nb', 135 | trControl = fitControl, 136 | tuneGrid=tuneParams[['nb']]), 137 | 'glm'=list(form=y ~ ., 138 | data = dataTrain, 139 | preProcess = c('scale','center'), 140 | method = 'glm', 141 | trControl = fitControl, 142 | tuneGrid=NULL), 143 | 'gam'=list(form=y ~ ., 144 | data = dataTrain, 145 | preProcess = c('scale','center'), 146 | method = 'gam', 147 | trControl = fitControl) 148 | ) 149 | 150 | tune <- lapply(mdls,function(m){ 151 | do.call('train',trainArgs[[m]]) 152 | }) 153 | 154 | names(tune) <- mdls 155 | CVtune <<- tune 156 | # saveRDS(CVtune,'initState.Rdata') 157 | 158 | }) 159 | 160 | 161 | CVres <- reactive({ 162 | 163 | if(is.null(CVtune)) return(NULL) 164 | 165 | fits <- CVtune 166 | getRes <- function(i){ 167 | name <- names(fits)[i] 168 | res <- fits[[i]]$results 169 | df <- res[(ncol(res)-3):ncol(res)] 170 | apply(res,1,function(r) paste(r[1:(ncol(res)-4)],collapse = '-')) %>% 171 | paste(name,.,sep='-') -> model 172 | cbind.data.frame(model,df,name=name[[1]],stringsAsFactors =F) 173 | } 174 | 175 | df <- plyr::ldply(1:length(fits),getRes) 176 | 177 | if(isolate(modelType)=='Regression'){ 178 | df$rank <- rank(rank(df$RMSE)+rank(1-df$Rsquared),ties.method = 'first') 179 | } else { 180 | df$rank <- rank(rank(1-df$Accuracy)+rank(1-df$Kappa),ties.method = 'first') 181 | } 182 | df[2:5] <- round(df[2:5],3) 183 | df[order(df$rank),] 184 | }) 185 | 186 | CVpredObs <- reactive({ 187 | 188 | fits <- CVtune 189 | 190 | getObsPred <- function(i){ 191 | # i <- 2 192 | bst <- fits[[i]]$bestTune 193 | preds <- fits[[i]]$pred 194 | preds$name <- names(fits)[i] 195 | preds$model <- paste(bst,collapse = '-') %>% paste(names(fits)[i],.,sep='-') 196 | 197 | ii <- lapply(1:length(bst),function(p){ 198 | preds[names(bst)[p]]==as.character(bst[p][[1]]) 199 | }) 200 | if(length(bst)>1) data.frame(ii) %>% apply(.,1,all) -> ii else unlist(ii) ->ii 201 | preds[ii,-which(names(preds)%in%names(bst))] 202 | } 203 | 204 | df <- plyr::ldply(1:length(fits),getObsPred) 205 | str(df) 206 | # saveRDS(df,'CVpredObs.Rdata') 207 | df 208 | 209 | }) 210 | 211 | topModels <- reactive({ 212 | if(is.null(CVres())) 213 | return() 214 | CVres() %>% group_by(name) %>% filter(rank==min(rank)) -> df 215 | # 216 | lst <- df$name[order(df$rank)] 217 | names(lst) <- df$model[order(df$rank)] 218 | lst 219 | }) 220 | 221 | observe({ 222 | lst <- topModels() 223 | updateSelectizeInput(session,'slt_Finalalgo',choices = lst,selected = lst[1]) 224 | 225 | }) 226 | 227 | testPreds <- reactive({ 228 | 229 | tune <- isolate(CVtune) 230 | if(is.null(tune)) return(NULL) 231 | 232 | lapply(CVtune[input$slt_Finalalgo], 233 | predict.train,isolate(dataTest)) %>% 234 | data.frame() -> df 235 | 236 | if(isolate(modelType)=='Regression'){ 237 | c <- apply(df[input$slt_Finalalgo],1,mean) 238 | 239 | s1 <- 1 - mean((dataTest$y-c)^2)/mean((dataTest$y-mean(dataTest$y))^2) 240 | s2 <- sqrt(mean((dataTest$y-c)^2)) 241 | 242 | } else { 243 | c <- apply(df[input$slt_Finalalgo],1,modal) 244 | s1 <- sum(c==dataTest$y)/nrow(dataTest) 245 | s2 <- vcd::Kappa(table(c, dataTest$y))$Unweighted[1] 246 | } 247 | list(c=c,s1=s1,s2=s2) 248 | 249 | }) 250 | 251 | 252 | makeReactiveBinding('sigTable') 253 | observeEvent(input$btn_sigTest,{ 254 | 255 | 256 | permute <- function(v1,v2,nreps=1e4){ 257 | rmsq <- function(v){sqrt(mean(v^2))} 258 | obs.diff = (rmsq(v1) - rmsq(v2)) 259 | v12 = c(v1, v2) 260 | l12 = length(v12) 261 | l1 = length(v1) 262 | 263 | sim.diff = rep(0, nreps) 264 | for (j in 1:nreps) { 265 | perm = sample(v12) 266 | sim.diff[j] = rmsq(perm[1:l1]) - rmsq(perm[(l1 + 1):l12]) 267 | } 268 | bigger = sim.diff[(sim.diff) >= obs.diff] 269 | pvalue = length(bigger)/(nreps) 270 | pvalue 271 | } 272 | 273 | # df <- readRDS('CVpredObs.Rdata') 274 | df <- CVpredObs() 275 | bst <- names(topModels())[[1]] 276 | print(bst) 277 | best <- df$pred[df$model==bst]-df$obs[df$model==bst] 278 | 279 | sigTable <<- df %>% #filter(model!=bst) %>% 280 | group_by(model) %>% 281 | summarise(`p-value`=permute(pred-obs,best)) %>% 282 | .[rev(order(.$'p-value')),] 283 | 284 | 285 | }) 286 | 287 | 288 | 289 | # Outputs --------------------------------------------------------------------- 290 | 291 | output$sgTable <- renderTable({ 292 | sigTable 293 | }) 294 | 295 | 296 | output$testsetPlot <- renderPlot({ 297 | 298 | 299 | df <- data.frame(obs=dataTest$y,pred=testPreds()$c) 300 | 301 | col <- pal[topModels()[[1]]] 302 | 303 | if(isolate(modelType)=='Regression'){ 304 | lims <- c(min(df$obs),max(df$obs)) 305 | ggplot(df)+ 306 | geom_abline(alpha=0.5)+ 307 | geom_point(aes(x=obs,y=pred),color=col,size=2)+ 308 | scale_x_continuous(limits = lims)+ 309 | scale_y_continuous(limits = lims)+ 310 | # scale_color_manual(values=pal)+ 311 | coord_equal()+ 312 | # facet_wrap(~name)+ 313 | theme_bw()+ 314 | xlab('Observed')+ 315 | ylab('Predicted')+ 316 | theme(legend.position='none') 317 | } else { 318 | df$pred <- factor(df$pred,levels=levels(df$obs)) 319 | df %>% group_by(pred,obs) %>% 320 | summarise(n=n()) %>% 321 | ggplot(.)+ 322 | geom_raster(aes(x=obs,y=pred,alpha=n),fill=col)+ 323 | geom_text(aes(x=obs,y=pred,label=n))+ 324 | # scale_fill_manual(values=pal)+ 325 | coord_equal()+ 326 | # facet_wrap(~name)+ 327 | theme_bw()+ 328 | xlab('Observed')+ 329 | ylab('Predicted')+ 330 | theme(legend.position='none') 331 | 332 | } 333 | 334 | }) 335 | 336 | output$testsetS1 <- renderValueBox({ 337 | 338 | lab <- ifelse(isolate(modelType)=='Regression','Variance explained','Accuracy') 339 | 340 | valueBox(paste(round(testPreds()$s1*100,1),'%'),lab,icon = icon('cube')) 341 | 342 | }) 343 | 344 | output$testsetS2<- renderValueBox({ 345 | lab <- ifelse(isolate(modelType)=='Regression','RMSE','Kappa') 346 | valueBox(round(testPreds()$s2,3),subtitle = lab,icon = icon('cube')) 347 | }) 348 | 349 | 350 | 351 | 352 | output$rawdata <- renderDataTable({rawdata()}, 353 | options = list(pageLength = 10,searching = FALSE)) 354 | 355 | output$model_info <- renderDataTable({ 356 | CVres()[c(7,1:6)] 357 | 358 | }, options = list(rowCallback = I( 359 | lapply(1:length(mdls),function(i) tableCSS(mdls[i],pal[i])) %>% 360 | unlist %>% 361 | paste(.,collapse = '') %>% 362 | paste('function(row, data) {',.,'}') 363 | ), 364 | pageLength = 10,searching = FALSE 365 | ) 366 | ) 367 | 368 | 369 | output$CVplot2 <- renderPlot({ 370 | 371 | type <- isolate(modelType) 372 | df <-CVpredObs() 373 | 374 | if(type=='Regression'){ 375 | lims <- c(min(df$obs),max(df$obs)) 376 | ggplot(df)+ 377 | geom_abline(alpha=0.5)+ 378 | geom_point(aes(x=obs,y=pred,col=name))+ 379 | scale_x_continuous(limits = lims)+ 380 | scale_y_continuous(limits = lims)+ 381 | scale_color_manual(values=pal)+ 382 | coord_equal()+ 383 | facet_wrap(~name)+ 384 | theme_bw()+ 385 | xlab('Observed')+ 386 | ylab('Predicted')+ 387 | theme(legend.position='none') 388 | } else { 389 | df %>% group_by(pred,obs,name) %>% 390 | summarise(n=n()) %>% 391 | ggplot(.)+ 392 | geom_raster(aes(x=obs,y=pred,fill=name,alpha=n))+ 393 | geom_text(aes(x=obs,y=pred,label=n))+ 394 | scale_fill_manual(values=pal)+ 395 | coord_equal()+ 396 | facet_wrap(~name)+ 397 | theme_bw()+ 398 | xlab('Observed')+ 399 | ylab('Predicted')+ 400 | theme(legend.position='none') 401 | 402 | } 403 | }) 404 | 405 | output$CVplot1 <- renderPlot({ 406 | resdf <- CVres() 407 | type <- isolate(modelType) 408 | 409 | resdf$model <- factor(resdf$model,levels = rev(resdf$model[resdf$rank])) 410 | if(type=='Regression'){ 411 | ggplot(resdf,aes(x=model,color=name))+ 412 | geom_errorbar(aes(ymin=RMSE-RMSESD,ymax=RMSE+RMSESD),size=1)+ 413 | geom_point(aes(y=RMSE),size=3)+ 414 | scale_color_manual(values=pal)+ 415 | coord_flip()+ 416 | theme_bw()+ 417 | xlab('')+ 418 | theme(legend.position='none') -> p1 419 | ggplot(resdf,aes(x=model,color=name))+ 420 | geom_errorbar(aes(ymin=Rsquared-RsquaredSD,ymax=Rsquared+RsquaredSD),size=1)+ 421 | geom_point(aes(y=Rsquared),size=3)+ 422 | scale_color_manual(values=pal)+ 423 | coord_flip()+ 424 | theme_bw()+ 425 | xlab('')+ 426 | theme(legend.position='none') -> p2 427 | } else { 428 | ggplot(resdf,aes(x=model,color=name))+ 429 | geom_errorbar(aes(ymin=Kappa-KappaSD,ymax=Kappa+KappaSD),size=1)+ 430 | geom_point(aes(y=Kappa),size=3)+ 431 | scale_color_manual(values=pal)+ 432 | coord_flip()+ 433 | theme_bw()+ 434 | xlab('')+ 435 | theme(legend.position='none') -> p1 436 | ggplot(resdf,aes(x=model,color=name))+ 437 | geom_errorbar(aes(ymin=Accuracy-AccuracySD,ymax=Accuracy+AccuracySD),size=1)+ 438 | geom_point(aes(y=Accuracy),size=3)+ 439 | scale_color_manual(values=pal)+ 440 | coord_flip()+ 441 | theme_bw()+ 442 | xlab('')+ 443 | theme(legend.position='none') -> p2 444 | } 445 | 446 | gridExtra::grid.arrange(p2,p1,ncol=2) 447 | 448 | }) 449 | 450 | output$Ytype <- renderText(class(dataTrain$y)) 451 | output$txt_dataset <- renderPrint(cat('Dataset:',input$dataset)) 452 | output$txt_n <- renderPrint(cat('n obs:',nrow(rawdata()))) 453 | output$txt_Yvar <- renderPrint(cat('Y var:',input$yvar)) 454 | output$txt_testSet <- renderPrint(cat('Test set:',input$sld_testsplit,'%')) 455 | output$txt_Type <- renderPrint(cat('Model Type:',modelType)) 456 | output$txt_CV <- renderPrint(cat('CV folds:',input$rdo_CVtype)) 457 | output$txt_nModels <- renderPrint(cat('Models trained:',nrow(CVres()))) 458 | output$txt_bestModel <- renderPrint(cat('Best Model:',(CVres()$model[1]))) 459 | output$txt_bestModelStat1 <- renderPrint({ 460 | if(modelType=='Regression'){ 461 | cat('Variance Explained:',(CVres()$Rsquared[1]*100),'%') 462 | } else { 463 | cat('Accuracy:',(CVres()$Accuracy[1])) 464 | } 465 | }) 466 | output$txt_bestModelStat2 <- renderPrint({ 467 | if(modelType=='Regression'){ 468 | cat('RMSE:',(CVres()$RMSE[1])) 469 | } else { 470 | cat('Kappa:',(CVres()$Kappa[1])) 471 | } 472 | }) 473 | 474 | 475 | 476 | 477 | output$Ystats <- renderPrint({ 478 | 479 | summary(dataTrain$y) 480 | 481 | }) 482 | 483 | output$Yplot <- renderPlot({ 484 | 485 | if(modelType=='Regression'){ 486 | 487 | 488 | ggplot(dataTrain,aes(x=y))+ 489 | geom_density(alpha=0.7,adjust=0.5,fill="#5BBCD6")+ 490 | theme_bw()+ 491 | ggtitle('Y Distribution')+ 492 | xlab('') 493 | 494 | 495 | # wes_palettes$Darjeeling 496 | 497 | } else { 498 | pal <- wes_palette('Darjeeling',n = length(unique(dataTrain$y)),type = 'c') 499 | ggplot(dataTrain,aes(x=y,fill=y))+ 500 | geom_bar(stat='count')+ 501 | scale_fill_manual(values=pal)+ 502 | xlab('')+ 503 | ggtitle('Y Class Frequency')+ 504 | coord_flip()+ 505 | theme(legend.position='none') 506 | } 507 | 508 | }) 509 | 510 | output$featImp <- renderPlot({ 511 | 512 | rf <- randomForest(y~.,dataTrain) 513 | vi <- as.data.frame(varImpPlot(rf)) 514 | vi$Feature <- row.names(vi) 515 | names(vi)[1] <- 'Score' 516 | vi$Feature <- factor(vi$Feature,levels=vi$Feature[order(vi$Score)]) 517 | str(vi) 518 | ggplot(vi,aes(x=Feature,y=Score))+ 519 | geom_bar(stat='identity',fill="#5BBCD6")+ 520 | coord_flip()+ 521 | xlab('')+ 522 | ylab('Relative Importance Score') 523 | 524 | }) 525 | 526 | 527 | } 528 | 529 | 530 | 531 | 532 | 533 | # UI ---------------------------------------------------------------------- 534 | 535 | ui <- bootstrapPage(useShinyjs(), 536 | # Add custom CSS & Javascript; 537 | tagList(tags$head( 538 | tags$link(rel="stylesheet", type="text/css",href="style.css"), 539 | tags$script(type="text/javascript", src = "busy.js"), 540 | lapply(1:length(mdls),function(i) modelCSS(mdls[i],pal[i])) 541 | 542 | )), 543 | 544 | dashboardPage(#skin = 'red', 545 | dashboardHeader(title = HTML(paste(icon('cubes'),'machLearn')) 546 | ), 547 | dashboardSidebar( 548 | sidebarMenu( 549 | # Setting id makes input$tabs give the tabName of currently-selected tab 550 | id = "tabs", 551 | menuItem("Step 1: Input Data", tabName = "setup", icon = icon("cog")), 552 | menuItem("Step 2: Training & CV",tabName = "model", icon = icon("sitemap"),selected = T), 553 | menuItem("Step 3: Model Performance",tabName = "test", icon = icon("bar-chart")), 554 | menuItem("Exploration", icon = icon(">>"), 555 | menuSubItem("Feature Importance",tabName = "imp")) 556 | # menuSubItem("Feature Selection", tabName = "boruta")) 557 | # 558 | # menuItem("Feature Importance",tabName = "featSel", icon = icon("sitemap")) 559 | # menuItem("Info",tabName = "Info", icon = icon("info")) 560 | ), 561 | hr(), 562 | fluidRow( 563 | column(width=1), 564 | column(width=10, 565 | h5(textOutput('txt_dataset')), 566 | h5(textOutput('txt_n')), 567 | h5(textOutput('txt_Yvar')), 568 | h5(textOutput('txt_testSet')) 569 | 570 | 571 | ), 572 | column(width=1) 573 | ), 574 | absolutePanel( 575 | bottom = 10, 576 | left = 10, 577 | draggable = F, 578 | width='100%', 579 | height='auto', 580 | a(icon('github fa-2x'),href='https://github.com/davesteps/machLearn',target='_blank') 581 | ) 582 | ), 583 | dashboardBody( 584 | tabItems( 585 | tabItem("setup", 586 | box(width = 4,title = 'Input Dataset',solidHeader = T,status = 'primary', 587 | selectInput('dataset',label = 'Choose Dataset', 588 | choices = names(datasets),selected='iris'), 589 | fileInput('fileIn',label = 'Upload data') %>% disabled(), 590 | actionButton('btn_viewData',label = 'View Data',icon=icon('table')), 591 | hr(), 592 | 593 | 594 | sliderInput('sld_testsplit',label = label.help('Test set %','lbl_testsplit'),min = 33,max = 90,step = 1,value = 33), 595 | bsTooltip(id = "lbl_testsplit", title = "% of data to set aside for test data", 596 | placement = "right", trigger = "hover") 597 | 598 | 599 | ), 600 | box(width=4,title = 'y variable',solidHeader = T,status = 'primary', 601 | helpText('Select the variable we would like to predict'), 602 | selectizeInput('yvar',label=label.help('y var','lbl_yvar'),choices = character(0)), 603 | helpText(HTML(paste('data type:', textOutput('Ytype')))), 604 | bsTooltip(id = "lbl_yvar", title = "Variable to predict", 605 | placement = "right", trigger = "hover"), 606 | hr(), 607 | plotOutput('Yplot',height=260), 608 | conditionalPanel("output.Ytype == 'numeric'|output.Ytype == 'integer'", 609 | checkboxInput('chk_logY',label = 'log transform') 610 | ), 611 | verbatimTextOutput('Ystats') 612 | 613 | ), 614 | box(width=4,title = 'X vars',solidHeader = T,status = 'primary', 615 | selectizeInput('xvar',label=label.help('X (Predict Y as function of):','lbl_xvar'),choices = character(0),multiple = T), 616 | bsTooltip(id = "lbl_xvar", title = "Try and predict Y as function of these variables", 617 | placement = "right", trigger = "hover") 618 | ), 619 | bsModal('data',title = 'Dataset',trigger = 'btn_viewData',size = 'large', 620 | dataTableOutput('rawdata') 621 | ) 622 | ), 623 | tabItem("model", 624 | # bsModal('mdl_tune','Tuning Options',trigger = 'btn_tune', 625 | 626 | # ), 627 | column(width=3, 628 | box(width = 12,title = 'Model Options',solidHeader = T,status = 'primary', 629 | selectInput('slt_algo',label = 'Algorithm:'%>%label.help('lbl_algo'), 630 | choices = reg.mdls,selected = reg.mdls,multiple=T), 631 | selectizeInput('slt_Tune','Parameter Tuning'%>%label.help('lbl_Tune'), 632 | choices = c('Coarse auto-tune (fast)','Fine auto-tune (slow)','manual')), 633 | # actionButton('btn_tune',label = 'Tuning Options',icon = icon('sliders') 634 | # ), 635 | # p(), 636 | 637 | radioButtons('rdo_CVtype',label = 'Cross-validation folds'%>%label.help('lbl_CV'), 638 | choices = c('3-fold'=3,'5-fold'=5,'10-fold'=10),inline = T), 639 | 640 | actionButton('btn_train',label = 'Train Models', 641 | icon = icon('cogs'),#'bullseye','rocket' 642 | class='btn-danger fa-lg', 643 | width='100%'), 644 | bsTooltip(id = "lbl_algo", title = "Which algorithms to test", 645 | placement = "right", trigger = "hover"), 646 | bsTooltip(id = "lbl_Tune", title = "Type of tuning which is performed to optimize model parameters", 647 | placement = "right", trigger = "hover"), 648 | bsTooltip(id = "lbl_CV", title = "Number of splits of training data used to tune parameters", 649 | placement = "right", trigger = "hover") 650 | 651 | ), 652 | box(width = 12,title = 'Summary',solidHeader = F, 653 | status = 'primary', 654 | helpText(textOutput('txt_bestModel')), 655 | helpText(textOutput('txt_bestModelStat1')), 656 | helpText(textOutput('txt_bestModelStat2')), 657 | hr(), 658 | helpText(textOutput('txt_Type')), 659 | helpText(textOutput('txt_CV')), 660 | helpText(textOutput('txt_nModels')) 661 | 662 | ) 663 | ) 664 | , 665 | tabBox(width = 9, 666 | tabPanel(title = 'CV Model Rank',#icon = icon('sort-amount-asc'), 667 | h4('Cross-validation results'), 668 | plotOutput('CVplot1',height=600) 669 | ), 670 | tabPanel(title = 'CV Pred vs Obs', 671 | h4('Observed vs Predicted (best candidate for algorithm)'), 672 | plotOutput('CVplot2',height=600) 673 | ), 674 | tabPanel(title = 'CV Stats', 675 | h4('Performance statiscs from cross-validation'), 676 | 677 | dataTableOutput('model_info') 678 | ) 679 | # tabPanel(title = 'CV Sig testing', 680 | # h4('Statisical significance of cross-validation results'), 681 | # helpText('Perform permutation test of error statisic to determine 682 | # whether the score of the best model was significantly higher 683 | # than the other candidates.'), 684 | # actionButton('btn_sigTest',label = 'Perform permutation test'), 685 | # tableOutput('sgTable') 686 | # 687 | # 688 | # ) 689 | ) 690 | ), 691 | tabItem("test", 692 | column(width=3, 693 | box(width = 12,title = 'Test Set Predictions',solidHeader = F,status = 'primary', 694 | # radioButtons('rdo_finalModel','Final model', 695 | # c('Best Model','Ensemble of top models')), 696 | selectInput('slt_Finalalgo',label = 'Final Model:'%>%label.help('lbl_Finalalgo'), 697 | choices=mdls,multiple=T), 698 | helpText('The best cross-validated model is selected by default. 699 | Multiple models can be selected to make ensemble predictions'), 700 | bsTooltip(id = "lbl_Finalalgo", title = "Which algorithms to use to predict test", 701 | placement = "right", trigger = "hover") 702 | 703 | ), 704 | valueBoxOutput('testsetS1',width=12), 705 | valueBoxOutput('testsetS2',width=12) 706 | ), 707 | box(width = 6,title = 'Test Set observed vs Predicted', 708 | solidHeader = T,status = 'primary', 709 | plotOutput('testsetPlot') 710 | ) 711 | ), 712 | tabItem("imp", 713 | box(width = 6,title = 'Feature importance', 714 | helpText('Relative feature importance indicated from randomForest'), 715 | 716 | plotOutput('featImp') 717 | ) 718 | ) 719 | ) 720 | ) 721 | 722 | 723 | ), 724 | div(class = "busy", 725 | h4("working..."), 726 | h2(HTML('')) 727 | ) 728 | ) 729 | 730 | shinyApp(ui, server) --------------------------------------------------------------------------------