├── README.md
├── .gitignore
├── initState.Rdata
├── machLearn.Rproj
├── www
    ├── busy.js
    └── style.css
├── init.R
├── LICENSE
└── app.R


/README.md:
--------------------------------------------------------------------------------
1 | # machLearn Shiny app
2 | 
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | 


--------------------------------------------------------------------------------
/initState.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davesteps/machLearn/HEAD/initState.Rdata


--------------------------------------------------------------------------------
/machLearn.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/www/busy.js:
--------------------------------------------------------------------------------
 1 | setInterval(function(){
 2 |   if ($('html').attr('class')=='shiny-busy') {
 3 |     setTimeout(function() {
 4 |       if ($('html').attr('class')=='shiny-busy') {
 5 |         $('div.busy').show()
 6 |       }
 7 |     }, 1500)
 8 |   } else {
 9 |     $('div.busy').hide()
10 |   }
11 | }, 100)


--------------------------------------------------------------------------------
/www/style.css:
--------------------------------------------------------------------------------
 1 | div.busy { 
 2 |   position:absolute;
 3 |   top: 40%;
 4 |   left: 50%;
 5 |   margin-top: -100px;
 6 |   margin-left: -50px;
 7 |   display:none;
 8 |   background: rgba(230, 230, 230, .8);
 9 |   text-align: center;
10 |   padding-top: 20px;
11 |   padding-left: 30px;
12 |   padding-bottom: 40px;
13 |   padding-right: 30px;
14 |   border-radius: 5px;
15 | }
16 | 
17 | 
18 | div.foot { 
19 |   position:absolute;
20 |   bottom: 0%;
21 |   left: 1%;
22 |   text-align: center;
23 |   padding-top: 0px;
24 |   padding-left: 30px;
25 |   padding-bottom: 0px;
26 |   padding-right: 30px;
27 |   border-radius: 5px;
28 | 
29 | }
30 | 
31 | div.container
32 | {
33 |     width:840px;
34 |     height:317px;
35 |     overflow:scroll;     /* if you don't want a scrollbar, set to hidden */
36 |     overflow-x:hidden;   /* hides horizontal scrollbar on newer browsers */
37 | 
38 |     /* resize and min-height are optional, allows user to resize viewable area */
39 |     -webkit-resize:vertical; 
40 |     -moz-resize:vertical;
41 |     resize:vertical;
42 |     min-height:317px;
43 | }
44 | 
45 | iframe.embed
46 | {
47 |     width:1000px;       /* set this to approximate width of entire page you're embedding */
48 |     height:2000px;      /* determines where the bottom of the page cuts off */
49 |     margin-left:-183px; /* clipping left side of page */
50 |     margin-top:-244px;  /* clipping top of page */
51 |     overflow:hidden;
52 | 
53 |     /* resize seems to inherit in at least Firefox */
54 |     -webkit-resize:none;
55 |     -moz-resize:none;
56 |     resize:none;
57 | }
58 | 


--------------------------------------------------------------------------------
/init.R:
--------------------------------------------------------------------------------
  1 | library(shiny)
  2 | require(shinyBS)
  3 | require(shinydashboard)
  4 | require(shinyjs)
  5 | require(caret)
  6 | require(plyr)
  7 | require(dplyr)
  8 | require(tidyr)
  9 | require(Cairo)
 10 | require(raster)
 11 | require(gstat)
 12 | require(wesanderson)
 13 | require(nnet)
 14 | require(randomForest)
 15 | 
 16 | # car, foreach, methods, plyr, nlme, reshape2, stats, stats4, utils, grDevices
 17 | 
 18 | 
 19 | # Not all of these are required but shinyapps.io was crashing and 
 20 | # importing one of these solved the issue
 21 | require(kernlab)
 22 | require(klaR)
 23 | require(vcd)
 24 | require(e1071)
 25 | require(gam)
 26 | require(ipred)
 27 | require(MASS)
 28 | require(ellipse)
 29 | require(mda)
 30 | require(mgcv)
 31 | require(mlbench)
 32 | require(party)
 33 | require(MLmetrics)
 34 | require(Cubist)
 35 | require(testthat)
 36 | 
 37 | 
 38 | data(meuse)
 39 | 
 40 | dmnds <- diamonds#[sample(1:nrow(diamonds),1e3),]
 41 | 
 42 | # leaf <- read.csv('/Users/davesteps/Desktop/kaggle_data/leaf/train.csv')
 43 | 
 44 | datasets <- list(
 45 |   'iris'=iris,
 46 |   'cars'=mtcars,
 47 |   'meuse'=meuse,
 48 |   'diamonds'=data.frame(dmnds),
 49 |   'Boston'=Boston
 50 |   # 'leaf'=leaf
 51 |   # 'midwest'=data.frame(midwest),
 52 |   # 'mpg'=data.frame(mpg),
 53 |   # 'msleep'=data.frame(msleep),
 54 |   # 'txhousing'=data.frame(txhousing)
 55 | )
 56 | 
 57 | tuneParams <- list(
 58 |   'svmLinear'=data.frame(C=c(0.01,0.1,1)),
 59 |   'svmPoly'= expand.grid(degree=1:3,scale=c(0.01,0.1),C=c(0.25,0.5,1)),
 60 |   'nnet'=expand.grid(size=c(1,3,5),decay=c(0.01,0.1,1)),
 61 |   'rf'=data.frame(mtry=c(2,3,4)),
 62 |   'knn'=data.frame(k=c(1,3,5,7,9)),
 63 |   'nb'=expand.grid(usekernel=c(T,F),adjust=c(0.01,0.1,1),fL=c(0.01,0.1,1)),
 64 |   'glm'=NULL#data.frame()
 65 | )
 66 | 
 67 | 
 68 | mdls <- list('svmLinear'='svmLinear',
 69 |              'svmPoly'='svmPoly',
 70 |              'Neural Network'='nnet',
 71 |              'randomForest'='rf',
 72 |              'k-NN'='knn',
 73 |              'Naive Bayes'='nb',
 74 |              'GLM'='glm',
 75 |              'GAM'='gam')
 76 | #multinom
 77 | 
 78 | mdli <- list(
 79 |   'Regression'=c(T,T,T,T,T,F,T,F),
 80 |   'Classification'=c(T,T,T,T,T,T,F,F)
 81 | )  
 82 | 
 83 | reg.mdls <- mdls[mdli[['Regression']]]
 84 | cls.mdls <- mdls[mdli[['Classification']]]
 85 | 
 86 | 
 87 | # 
 88 | pal <- c('#b2df8a','#33a02c','#ff7f00','#cab2d6','#b15928',
 89 |          '#fdbf6f','#a6cee3','#fb9a99','#1f78b4','#e31a1c')
 90 | set.seed(3)
 91 | pal <- sample(pal,length(mdls),F)
 92 | names(pal) <- mdls
 93 | 
 94 | modelCSS <-   function(item,col){
 95 |   tags$style(HTML(paste0(".selectize-input [data-value=\"",item,"\"] {background: ",col," !important}")))
 96 | }
 97 | 
 98 | 
 99 | tableCSS <- function(model,col){
100 |   paste0('if (data[6] == "',model,'")
101 |          $("td", row).css("background", "',col,'");')
102 | }  
103 | 
104 | label.help <- function(label,id){
105 |   HTML(paste0(label,actionLink(id,label=NULL,icon=icon('question-circle'))))
106 | }
107 | 
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/app.R:
--------------------------------------------------------------------------------
  1 | source('init.R',local = T)
  2 | 
  3 | 
  4 | # Server ------------------------------------------------------------------
  5 | 
  6 | server <- function(input, output,session) {
  7 |   
  8 |   
  9 |   CVtune <- readRDS('initState.Rdata')
 10 |   makeReactiveBinding('CVtune')
 11 |   
 12 |   rawdata <- reactive({
 13 |     datasets[[input$dataset]]
 14 |   })
 15 |   
 16 |   observe({
 17 |     updateSelectizeInput(session,'yvar',choices=names(rawdata()),selected = names(rawdata())[1])
 18 |   })
 19 |   
 20 |   
 21 |   observe({
 22 |     nms <- names(rawdata())[names(rawdata())!=input$yvar]
 23 |     updateSelectizeInput(session,'xvar',choices=nms,selected = nms)
 24 |   })
 25 |   
 26 |   dataTrain <- NULL
 27 |   dataTest <- NULL
 28 |   
 29 |   makeReactiveBinding('dataTrain')
 30 |   # makeReactiveBinding('dataTest')
 31 |   modelType <- 'Regression'
 32 |   
 33 |   makeReactiveBinding('modelType')
 34 |   
 35 |   observeEvent(modelType,{
 36 |     
 37 |     if(modelType=='Regression'){
 38 |       updateSelectizeInput(session,'slt_algo',choices = reg.mdls,selected = reg.mdls)
 39 |     } else {
 40 |       updateSelectizeInput(session,'slt_algo',choices = cls.mdls,selected = cls.mdls)
 41 |       
 42 |     }
 43 |   })
 44 |   
 45 |   observe({
 46 |     
 47 |     yvar <- input$yvar
 48 |     xvars <- input$xvar
 49 |     testsize <- input$sld_testsplit
 50 |     
 51 |     if(is.null(yvar)||yvar=='')
 52 |       return(NULL)
 53 |     
 54 |     # extract y and X from raw data
 55 |     y <- isolate(rawdata()[,yvar])
 56 |     X <-  isolate(rawdata()[,xvars])
 57 |     
 58 |     # deal with NA values
 59 |     yi <- !is.na(y)
 60 |     Xi <- complete.cases(X)
 61 |     
 62 |     df2 <- cbind(y,X)[yi&Xi,]
 63 |     
 64 |     
 65 |     c <- class(df2$y)
 66 |     lvls <- length(unique(df2$y))
 67 |     if(lvls<10|(c!='numeric'&c!='integer')){
 68 |       modelType <<-'Classification'
 69 |       df2$y <- factor(df2$y)
 70 |     } else {
 71 |       modelType <<-'Regression'
 72 |       if(input$chk_logY){df2$y <- log(df2$y+0.1)}
 73 |     }
 74 |     
 75 |     trainIndex <- createDataPartition(df2$y,
 76 |                                       p = 1-(testsize/100),
 77 |                                       list = FALSE,
 78 |                                       times = 1)
 79 |     isolate({
 80 |       dataTrain <<- df2[ trainIndex,]
 81 |       dataTest  <<- df2[-trainIndex,]
 82 |     })
 83 |   })
 84 |   
 85 |   
 86 |   
 87 |   
 88 |   observeEvent(input$btn_train,{
 89 |     
 90 |     disable('btn_train')
 91 |     on.exit(enable('btn_train'))
 92 |     
 93 |     mdls <- isolate(input$slt_algo)
 94 |     
 95 |     fitControl <- trainControl(method = "cv",savePredictions = T,
 96 |                                number = as.integer(input$rdo_CVtype))
 97 |     
 98 |     trainArgs <- list(
 99 |       'svmLinear'=list(form=y ~ .,
100 |                        data = dataTrain,
101 |                        preProcess = c('scale','center'),
102 |                        method = 'svmLinear',
103 |                        trControl = fitControl,
104 |                        tuneGrid=tuneParams[['svmLinear']]),
105 |       'svmPoly'= list(form=y ~ .,
106 |                       data = dataTrain,
107 |                       preProcess = c('scale','center'),
108 |                       method = 'svmPoly',
109 |                       trControl = fitControl,
110 |                       tuneGrid=tuneParams[['svmPoly']]),
111 |       'nnet'=list(form=y ~ .,
112 |                   data = dataTrain,
113 |                   preProcess = c('scale','center'),
114 |                   method = 'nnet',
115 |                   trControl = fitControl,
116 |                   tuneGrid=tuneParams[['nnet']],
117 |                   linout=T),
118 |       'rf'=list(form=y ~ .,
119 |                 data = dataTrain,
120 |                 preProcess = c('scale','center'),
121 |                 method = 'rf',
122 |                 trControl = fitControl,
123 |                 tuneGrid=tuneParams[['rf']],
124 |                 ntree=1e3),
125 |       'knn'=list(form=y ~ .,
126 |                  data = dataTrain,
127 |                  preProcess = c('scale','center'),
128 |                  method = 'knn',
129 |                  trControl = fitControl,
130 |                  tuneGrid=tuneParams[['knn']]),
131 |       'nb'=list(form=y ~ .,
132 |                 data = dataTrain,
133 |                 preProcess = c('scale','center'),
134 |                 method = 'nb',
135 |                 trControl = fitControl,
136 |                 tuneGrid=tuneParams[['nb']]),
137 |       'glm'=list(form=y ~ .,
138 |                  data = dataTrain,
139 |                  preProcess = c('scale','center'),
140 |                  method = 'glm',
141 |                  trControl = fitControl,
142 |                  tuneGrid=NULL),
143 |       'gam'=list(form=y ~ .,
144 |                  data = dataTrain,
145 |                  preProcess = c('scale','center'),
146 |                  method = 'gam',
147 |                  trControl = fitControl)
148 |     )
149 |     
150 |     tune <- lapply(mdls,function(m){
151 |       do.call('train',trainArgs[[m]])
152 |     })
153 |     
154 |     names(tune) <- mdls
155 |     CVtune <<- tune
156 |     # saveRDS(CVtune,'initState.Rdata')
157 |     
158 |   })
159 |   
160 |   
161 |   CVres <- reactive({
162 | 
163 |     if(is.null(CVtune)) return(NULL)
164 |     
165 |     fits <- CVtune
166 |     getRes <- function(i){
167 |       name <- names(fits)[i]
168 |       res <- fits[[i]]$results
169 |       df <- res[(ncol(res)-3):ncol(res)]
170 |       apply(res,1,function(r) paste(r[1:(ncol(res)-4)],collapse = '-')) %>% 
171 |         paste(name,.,sep='-') -> model
172 |       cbind.data.frame(model,df,name=name[[1]],stringsAsFactors =F)
173 |     }
174 |     
175 |     df <- plyr::ldply(1:length(fits),getRes)
176 |     
177 |     if(isolate(modelType)=='Regression'){
178 |       df$rank <- rank(rank(df$RMSE)+rank(1-df$Rsquared),ties.method = 'first')
179 |     } else {
180 |       df$rank <- rank(rank(1-df$Accuracy)+rank(1-df$Kappa),ties.method = 'first')
181 |     }
182 |     df[2:5] <- round(df[2:5],3)
183 |     df[order(df$rank),]
184 |   })
185 |   
186 |   CVpredObs <- reactive({
187 |     
188 |     fits <- CVtune
189 | 
190 |     getObsPred <- function(i){
191 |       # i <- 2
192 |       bst <- fits[[i]]$bestTune
193 |       preds <- fits[[i]]$pred
194 |       preds$name <- names(fits)[i]
195 |       preds$model <- paste(bst,collapse = '-') %>% paste(names(fits)[i],.,sep='-')
196 |       
197 |       ii <- lapply(1:length(bst),function(p){
198 |         preds[names(bst)[p]]==as.character(bst[p][[1]])
199 |       })
200 |       if(length(bst)>1) data.frame(ii) %>% apply(.,1,all) -> ii else unlist(ii) ->ii
201 |       preds[ii,-which(names(preds)%in%names(bst))]
202 |     }
203 |     
204 |     df <- plyr::ldply(1:length(fits),getObsPred)
205 |     str(df)
206 |     # saveRDS(df,'CVpredObs.Rdata')
207 |     df
208 |     
209 |   })
210 |   
211 |   topModels <- reactive({
212 |     if(is.null(CVres()))
213 |       return()
214 |     CVres() %>% group_by(name) %>% filter(rank==min(rank)) -> df
215 |     # 
216 |     lst <- df$name[order(df$rank)]
217 |     names(lst) <- df$model[order(df$rank)]
218 |     lst
219 |   }) 
220 |   
221 |   observe({
222 |     lst <- topModels()
223 |     updateSelectizeInput(session,'slt_Finalalgo',choices = lst,selected = lst[1])
224 |     
225 |   })
226 |   
227 |   testPreds <- reactive({
228 |     
229 |     tune <- isolate(CVtune)
230 |     if(is.null(tune)) return(NULL)
231 |     
232 |     lapply(CVtune[input$slt_Finalalgo],
233 |            predict.train,isolate(dataTest)) %>% 
234 |       data.frame() -> df
235 |     
236 |     if(isolate(modelType)=='Regression'){
237 |       c <- apply(df[input$slt_Finalalgo],1,mean)
238 |       
239 |       s1 <- 1 - mean((dataTest$y-c)^2)/mean((dataTest$y-mean(dataTest$y))^2)
240 |       s2 <- sqrt(mean((dataTest$y-c)^2))
241 |       
242 |     } else {
243 |       c <- apply(df[input$slt_Finalalgo],1,modal)
244 |       s1 <- sum(c==dataTest$y)/nrow(dataTest)
245 |       s2 <- vcd::Kappa(table(c, dataTest$y))$Unweighted[1]
246 |     }
247 |     list(c=c,s1=s1,s2=s2)
248 |     
249 |   })
250 |   
251 |   
252 |   makeReactiveBinding('sigTable')
253 |   observeEvent(input$btn_sigTest,{
254 |     
255 |   
256 |     permute <- function(v1,v2,nreps=1e4){
257 |       rmsq <- function(v){sqrt(mean(v^2))}
258 |       obs.diff = (rmsq(v1) - rmsq(v2))
259 |       v12 = c(v1, v2)
260 |       l12 = length(v12)
261 |       l1 = length(v1)
262 |       
263 |       sim.diff = rep(0, nreps)
264 |       for (j in 1:nreps) {
265 |         perm = sample(v12)
266 |         sim.diff[j] = rmsq(perm[1:l1]) - rmsq(perm[(l1 + 1):l12])
267 |       }
268 |       bigger = sim.diff[(sim.diff) >= obs.diff]
269 |       pvalue = length(bigger)/(nreps)
270 |       pvalue
271 |     }
272 |       
273 |     # df <- readRDS('CVpredObs.Rdata')
274 |     df <- CVpredObs()
275 |     bst <- names(topModels())[[1]]
276 |     print(bst)
277 |     best <- df$pred[df$model==bst]-df$obs[df$model==bst]
278 |     
279 |     sigTable <<- df %>% #filter(model!=bst) %>% 
280 |       group_by(model) %>% 
281 |       summarise(`p-value`=permute(pred-obs,best)) %>% 
282 |       .[rev(order(.$'p-value')),]
283 |   
284 | 
285 |   })
286 |   
287 |   
288 |   
289 |   # Outputs ---------------------------------------------------------------------
290 |   
291 |   output$sgTable <- renderTable({
292 |     sigTable
293 |   })
294 |   
295 |   
296 |   output$testsetPlot <- renderPlot({
297 |     
298 |     
299 |     df <- data.frame(obs=dataTest$y,pred=testPreds()$c)
300 |     
301 |     col <- pal[topModels()[[1]]]
302 |     
303 |     if(isolate(modelType)=='Regression'){
304 |       lims <- c(min(df$obs),max(df$obs))
305 |       ggplot(df)+
306 |         geom_abline(alpha=0.5)+
307 |         geom_point(aes(x=obs,y=pred),color=col,size=2)+
308 |         scale_x_continuous(limits = lims)+
309 |         scale_y_continuous(limits = lims)+
310 |         # scale_color_manual(values=pal)+
311 |         coord_equal()+
312 |         # facet_wrap(~name)+
313 |         theme_bw()+
314 |         xlab('Observed')+
315 |         ylab('Predicted')+
316 |         theme(legend.position='none')
317 |     } else {
318 |       df$pred <- factor(df$pred,levels=levels(df$obs))
319 |       df %>% group_by(pred,obs) %>% 
320 |         summarise(n=n()) %>% 
321 |         ggplot(.)+
322 |         geom_raster(aes(x=obs,y=pred,alpha=n),fill=col)+
323 |         geom_text(aes(x=obs,y=pred,label=n))+
324 |         # scale_fill_manual(values=pal)+
325 |         coord_equal()+
326 |         # facet_wrap(~name)+
327 |         theme_bw()+
328 |         xlab('Observed')+
329 |         ylab('Predicted')+
330 |         theme(legend.position='none')
331 |       
332 |     }
333 |     
334 |   })
335 |   
336 |   output$testsetS1 <- renderValueBox({
337 |     
338 |     lab <- ifelse(isolate(modelType)=='Regression','Variance explained','Accuracy')
339 |     
340 |     valueBox(paste(round(testPreds()$s1*100,1),'%'),lab,icon = icon('cube'))
341 |     
342 |   })
343 |   
344 |   output$testsetS2<- renderValueBox({
345 |     lab <- ifelse(isolate(modelType)=='Regression','RMSE','Kappa')
346 |     valueBox(round(testPreds()$s2,3),subtitle = lab,icon = icon('cube'))
347 |   })
348 |   
349 |   
350 |   
351 |   
352 |   output$rawdata <- renderDataTable({rawdata()},
353 |                                     options = list(pageLength = 10,searching = FALSE))
354 |   
355 |   output$model_info <- renderDataTable({
356 |     CVres()[c(7,1:6)]
357 |     
358 |   },    options = list(rowCallback = I(
359 |     lapply(1:length(mdls),function(i) tableCSS(mdls[i],pal[i])) %>% 
360 |       unlist %>% 
361 |       paste(.,collapse = '') %>% 
362 |       paste('function(row, data) {',.,'}')
363 |   ),
364 |   pageLength = 10,searching = FALSE
365 |   )
366 |   )
367 |   
368 |   
369 |   output$CVplot2 <- renderPlot({
370 |     
371 |     type <- isolate(modelType)
372 |     df <-CVpredObs()
373 | 
374 |     if(type=='Regression'){
375 |       lims <- c(min(df$obs),max(df$obs))
376 |       ggplot(df)+
377 |         geom_abline(alpha=0.5)+
378 |         geom_point(aes(x=obs,y=pred,col=name))+
379 |         scale_x_continuous(limits = lims)+
380 |         scale_y_continuous(limits = lims)+
381 |         scale_color_manual(values=pal)+
382 |         coord_equal()+
383 |         facet_wrap(~name)+
384 |         theme_bw()+
385 |         xlab('Observed')+
386 |         ylab('Predicted')+
387 |         theme(legend.position='none')
388 |     } else {
389 |       df %>% group_by(pred,obs,name) %>% 
390 |         summarise(n=n()) %>% 
391 |         ggplot(.)+
392 |         geom_raster(aes(x=obs,y=pred,fill=name,alpha=n))+
393 |         geom_text(aes(x=obs,y=pred,label=n))+
394 |         scale_fill_manual(values=pal)+
395 |         coord_equal()+
396 |         facet_wrap(~name)+
397 |         theme_bw()+
398 |         xlab('Observed')+
399 |         ylab('Predicted')+
400 |         theme(legend.position='none')
401 |       
402 |     }
403 |   })
404 |   
405 |   output$CVplot1 <- renderPlot({
406 |     resdf <- CVres()
407 |     type <- isolate(modelType)
408 |     
409 |     resdf$model <- factor(resdf$model,levels = rev(resdf$model[resdf$rank]))
410 |     if(type=='Regression'){
411 |       ggplot(resdf,aes(x=model,color=name))+
412 |         geom_errorbar(aes(ymin=RMSE-RMSESD,ymax=RMSE+RMSESD),size=1)+
413 |         geom_point(aes(y=RMSE),size=3)+
414 |         scale_color_manual(values=pal)+
415 |         coord_flip()+
416 |         theme_bw()+
417 |         xlab('')+
418 |         theme(legend.position='none') -> p1
419 |       ggplot(resdf,aes(x=model,color=name))+
420 |         geom_errorbar(aes(ymin=Rsquared-RsquaredSD,ymax=Rsquared+RsquaredSD),size=1)+
421 |         geom_point(aes(y=Rsquared),size=3)+
422 |         scale_color_manual(values=pal)+
423 |         coord_flip()+
424 |         theme_bw()+
425 |         xlab('')+
426 |         theme(legend.position='none') -> p2
427 |     } else {
428 |       ggplot(resdf,aes(x=model,color=name))+
429 |         geom_errorbar(aes(ymin=Kappa-KappaSD,ymax=Kappa+KappaSD),size=1)+
430 |         geom_point(aes(y=Kappa),size=3)+
431 |         scale_color_manual(values=pal)+
432 |         coord_flip()+
433 |         theme_bw()+
434 |         xlab('')+
435 |         theme(legend.position='none') -> p1
436 |       ggplot(resdf,aes(x=model,color=name))+
437 |         geom_errorbar(aes(ymin=Accuracy-AccuracySD,ymax=Accuracy+AccuracySD),size=1)+
438 |         geom_point(aes(y=Accuracy),size=3)+
439 |         scale_color_manual(values=pal)+
440 |         coord_flip()+
441 |         theme_bw()+
442 |         xlab('')+
443 |         theme(legend.position='none') -> p2
444 |     }
445 |     
446 |     gridExtra::grid.arrange(p2,p1,ncol=2)
447 |     
448 |   })
449 |   
450 |   output$Ytype <- renderText(class(dataTrain$y))
451 |   output$txt_dataset <- renderPrint(cat('Dataset:',input$dataset))
452 |   output$txt_n <- renderPrint(cat('n obs:',nrow(rawdata())))
453 |   output$txt_Yvar <- renderPrint(cat('Y var:',input$yvar))
454 |   output$txt_testSet <- renderPrint(cat('Test set:',input$sld_testsplit,'%'))
455 |   output$txt_Type <- renderPrint(cat('Model Type:',modelType))
456 |   output$txt_CV <- renderPrint(cat('CV folds:',input$rdo_CVtype))
457 |   output$txt_nModels <- renderPrint(cat('Models trained:',nrow(CVres())))
458 |   output$txt_bestModel <- renderPrint(cat('Best Model:',(CVres()$model[1])))
459 |   output$txt_bestModelStat1 <- renderPrint({
460 |     if(modelType=='Regression'){
461 |       cat('Variance Explained:',(CVres()$Rsquared[1]*100),'%')
462 |     } else {
463 |       cat('Accuracy:',(CVres()$Accuracy[1]))
464 |     }
465 |   })
466 |   output$txt_bestModelStat2 <- renderPrint({
467 |     if(modelType=='Regression'){
468 |       cat('RMSE:',(CVres()$RMSE[1]))
469 |     } else {
470 |       cat('Kappa:',(CVres()$Kappa[1]))
471 |     }
472 |   })
473 |   
474 |   
475 |   
476 |   
477 |   output$Ystats <- renderPrint({
478 |     
479 |     summary(dataTrain$y)
480 |     
481 |   })
482 |   
483 |   output$Yplot <- renderPlot({
484 |     
485 |     if(modelType=='Regression'){
486 |       
487 |       
488 |       ggplot(dataTrain,aes(x=y))+
489 |         geom_density(alpha=0.7,adjust=0.5,fill="#5BBCD6")+
490 |         theme_bw()+
491 |         ggtitle('Y Distribution')+
492 |         xlab('')
493 |       
494 |       
495 |       # wes_palettes$Darjeeling
496 |       
497 |     } else {
498 |       pal <- wes_palette('Darjeeling',n = length(unique(dataTrain$y)),type = 'c')
499 |       ggplot(dataTrain,aes(x=y,fill=y))+
500 |         geom_bar(stat='count')+
501 |         scale_fill_manual(values=pal)+
502 |         xlab('')+
503 |         ggtitle('Y Class Frequency')+
504 |         coord_flip()+
505 |         theme(legend.position='none')
506 |     }
507 |     
508 |   })
509 |   
510 |   output$featImp <- renderPlot({
511 |     
512 |     rf <- randomForest(y~.,dataTrain)
513 |     vi <- as.data.frame(varImpPlot(rf))
514 |     vi$Feature <- row.names(vi)
515 |     names(vi)[1] <- 'Score'
516 |     vi$Feature <- factor(vi$Feature,levels=vi$Feature[order(vi$Score)])
517 |   str(vi)  
518 |     ggplot(vi,aes(x=Feature,y=Score))+
519 |       geom_bar(stat='identity',fill="#5BBCD6")+
520 |       coord_flip()+
521 |       xlab('')+
522 |       ylab('Relative Importance Score')
523 |     
524 |   })
525 |   
526 |   
527 | }
528 | 
529 | 
530 | 
531 | 
532 | 
533 | # UI ----------------------------------------------------------------------
534 | 
535 | ui <- bootstrapPage(useShinyjs(),
536 |                     # Add custom CSS & Javascript;
537 |                     tagList(tags$head(
538 |                       tags$link(rel="stylesheet", type="text/css",href="style.css"),
539 |                       tags$script(type="text/javascript", src = "busy.js"),
540 |                       lapply(1:length(mdls),function(i) modelCSS(mdls[i],pal[i]))
541 |                       
542 |                     )),
543 |                     
544 |                     dashboardPage(#skin = 'red',
545 |                       dashboardHeader(title = HTML(paste(icon('cubes'),'machLearn'))
546 |                       ),
547 |                       dashboardSidebar(
548 |                         sidebarMenu(
549 |                           # Setting id makes input$tabs give the tabName of currently-selected tab
550 |                           id = "tabs",
551 |                           menuItem("Step 1: Input Data", tabName = "setup", icon = icon("cog")),
552 |                           menuItem("Step 2: Training & CV",tabName = "model", icon = icon("sitemap"),selected = T),
553 |                           menuItem("Step 3: Model Performance",tabName = "test", icon = icon("bar-chart")),
554 |                           menuItem("Exploration", icon = icon(">>"),
555 |                                    menuSubItem("Feature Importance",tabName = "imp"))
556 |                                    # menuSubItem("Feature Selection", tabName = "boruta"))
557 |                           # 
558 |                           # menuItem("Feature Importance",tabName = "featSel", icon = icon("sitemap"))
559 |                           # menuItem("Info",tabName = "Info", icon = icon("info"))
560 |                         ),
561 |                         hr(),
562 |                         fluidRow(
563 |                           column(width=1),
564 |                           column(width=10,
565 |                                  h5(textOutput('txt_dataset')),
566 |                                  h5(textOutput('txt_n')),
567 |                                  h5(textOutput('txt_Yvar')),
568 |                                  h5(textOutput('txt_testSet'))
569 |                                  
570 |                                  
571 |                           ),
572 |                           column(width=1)
573 |                         ),
574 |                         absolutePanel(
575 |                           bottom = 10,
576 |                           left = 10,
577 |                           draggable = F,
578 |                           width='100%',
579 |                           height='auto',
580 |                           a(icon('github fa-2x'),href='https://github.com/davesteps/machLearn',target='_blank')
581 |                         )                  
582 |                       ),
583 |                       dashboardBody(
584 |                         tabItems(
585 |                           tabItem("setup",
586 |                                   box(width = 4,title = 'Input Dataset',solidHeader = T,status = 'primary',
587 |                                       selectInput('dataset',label = 'Choose Dataset',
588 |                                                   choices = names(datasets),selected='iris'),
589 |                                       fileInput('fileIn',label = 'Upload data') %>% disabled(),
590 |                                       actionButton('btn_viewData',label = 'View Data',icon=icon('table')),
591 |                                       hr(),
592 |                                       
593 |                                       
594 |                                       sliderInput('sld_testsplit',label = label.help('Test set %','lbl_testsplit'),min = 33,max = 90,step = 1,value = 33),
595 |                                       bsTooltip(id = "lbl_testsplit", title = "% of data to set aside for test data", 
596 |                                                 placement = "right", trigger = "hover")
597 |                                       
598 |                                       
599 |                                   ),
600 |                                   box(width=4,title = 'y variable',solidHeader = T,status = 'primary',
601 |                                       helpText('Select the variable we would like to predict'),
602 |                                       selectizeInput('yvar',label=label.help('y var','lbl_yvar'),choices = character(0)),
603 |                                       helpText(HTML(paste('data type:', textOutput('Ytype')))),
604 |                                       bsTooltip(id = "lbl_yvar", title = "Variable to predict", 
605 |                                                 placement = "right", trigger = "hover"),
606 |                                       hr(),
607 |                                       plotOutput('Yplot',height=260),
608 |                                       conditionalPanel("output.Ytype == 'numeric'|output.Ytype == 'integer'",
609 |                                                        checkboxInput('chk_logY',label = 'log transform')
610 |                                       ),
611 |                                       verbatimTextOutput('Ystats')
612 |                                       
613 |                                   ),
614 |                                   box(width=4,title = 'X vars',solidHeader = T,status = 'primary',
615 |                                       selectizeInput('xvar',label=label.help('X (Predict Y as function of):','lbl_xvar'),choices = character(0),multiple = T),
616 |                                       bsTooltip(id = "lbl_xvar", title = "Try and predict Y as function of these variables", 
617 |                                                 placement = "right", trigger = "hover")
618 |                                   ),
619 |                                   bsModal('data',title = 'Dataset',trigger = 'btn_viewData',size = 'large',
620 |                                           dataTableOutput('rawdata')
621 |                                   )
622 |                           ),
623 |                           tabItem("model",
624 |                                   # bsModal('mdl_tune','Tuning Options',trigger = 'btn_tune',
625 |                                   
626 |                                   # ),
627 |                                   column(width=3,
628 |                                          box(width = 12,title = 'Model Options',solidHeader = T,status = 'primary',
629 |                                              selectInput('slt_algo',label = 'Algorithm:'%>%label.help('lbl_algo'),
630 |                                                          choices = reg.mdls,selected = reg.mdls,multiple=T),
631 |                                              selectizeInput('slt_Tune','Parameter Tuning'%>%label.help('lbl_Tune'),
632 |                                                             choices = c('Coarse auto-tune (fast)','Fine auto-tune (slow)','manual')),
633 |                                              # actionButton('btn_tune',label = 'Tuning Options',icon = icon('sliders')
634 |                                              # ),
635 |                                              # p(),
636 |                                              
637 |                                              radioButtons('rdo_CVtype',label = 'Cross-validation folds'%>%label.help('lbl_CV'),
638 |                                                           choices = c('3-fold'=3,'5-fold'=5,'10-fold'=10),inline = T),
639 |                                              
640 |                                              actionButton('btn_train',label = 'Train Models',
641 |                                                           icon = icon('cogs'),#'bullseye','rocket'
642 |                                                           class='btn-danger fa-lg',
643 |                                                           width='100%'),
644 |                                              bsTooltip(id = "lbl_algo", title = "Which algorithms to test", 
645 |                                                        placement = "right", trigger = "hover"),
646 |                                              bsTooltip(id = "lbl_Tune", title = "Type of tuning which is performed to optimize model parameters", 
647 |                                                        placement = "right", trigger = "hover"),
648 |                                              bsTooltip(id = "lbl_CV", title = "Number of splits of training data used to tune parameters", 
649 |                                                        placement = "right", trigger = "hover")
650 |                                              
651 |                                          ),
652 |                                          box(width = 12,title = 'Summary',solidHeader = F,
653 |                                              status = 'primary',
654 |                                              helpText(textOutput('txt_bestModel')),
655 |                                              helpText(textOutput('txt_bestModelStat1')),
656 |                                              helpText(textOutput('txt_bestModelStat2')),
657 |                                              hr(),
658 |                                              helpText(textOutput('txt_Type')),
659 |                                              helpText(textOutput('txt_CV')),
660 |                                              helpText(textOutput('txt_nModels'))
661 |                                              
662 |                                          )
663 |                                   )
664 |                                   ,
665 |                                   tabBox(width = 9,
666 |                                          tabPanel(title = 'CV Model Rank',#icon = icon('sort-amount-asc'),
667 |                                                   h4('Cross-validation results'),
668 |                                                   plotOutput('CVplot1',height=600)
669 |                                          ),
670 |                                          tabPanel(title = 'CV Pred vs Obs',
671 |                                                   h4('Observed vs Predicted (best candidate for algorithm)'),
672 |                                                   plotOutput('CVplot2',height=600)
673 |                                          ),
674 |                                          tabPanel(title = 'CV Stats',
675 |                                                   h4('Performance statiscs from cross-validation'),
676 |                                                   
677 |                                                   dataTableOutput('model_info')
678 |                                          )
679 |                                          # tabPanel(title = 'CV Sig testing',
680 |                                          #          h4('Statisical significance of cross-validation results'),
681 |                                          #          helpText('Perform permutation test of error statisic to determine
682 |                                          #                   whether the score of the best model was significantly higher
683 |                                          #                   than the other candidates.'),
684 |                                          #          actionButton('btn_sigTest',label = 'Perform permutation test'),
685 |                                          #          tableOutput('sgTable')
686 |                                          #          
687 |                                          #          
688 |                                          # )
689 |                                   )
690 |                           ),
691 |                           tabItem("test",
692 |                                   column(width=3,
693 |                                          box(width = 12,title = 'Test Set Predictions',solidHeader = F,status = 'primary',
694 |                                              # radioButtons('rdo_finalModel','Final model',
695 |                                              #              c('Best Model','Ensemble of top models')),
696 |                                              selectInput('slt_Finalalgo',label = 'Final Model:'%>%label.help('lbl_Finalalgo'),
697 |                                                          choices=mdls,multiple=T),
698 |                                              helpText('The best cross-validated model is selected by default. 
699 |                                                 Multiple models can be selected to make ensemble predictions'),
700 |                                              bsTooltip(id = "lbl_Finalalgo", title = "Which algorithms to use to predict test", 
701 |                                                        placement = "right", trigger = "hover")
702 |                                              
703 |                                          ),
704 |                                          valueBoxOutput('testsetS1',width=12),
705 |                                          valueBoxOutput('testsetS2',width=12)
706 |                                   ),
707 |                                   box(width = 6,title = 'Test Set observed vs Predicted',
708 |                                       solidHeader = T,status = 'primary',
709 |                                       plotOutput('testsetPlot')
710 |                                   )
711 |                           ),
712 |                           tabItem("imp",
713 |                                   box(width = 6,title = 'Feature importance',
714 |                                       helpText('Relative feature importance indicated from randomForest'),
715 |                                       
716 |                                       plotOutput('featImp')
717 |                                   )
718 |                           )
719 |                         )
720 |                       )
721 |                       
722 |                       
723 |                     ),
724 |                     div(class = "busy", 
725 |                         h4("working..."),
726 |                         h2(HTML('<i class="fa fa-cog fa-spin fa-2x"></i>'))
727 |                     )
728 | )
729 | 
730 | shinyApp(ui, server)


--------------------------------------------------------------------------------