├── .gitignore
├── DESCRIPTION
├── LICENSE.md
├── NAMESPACE
├── R
    └── run_app.R
├── README.md
├── data
    └── TMM_Example_v1.0.csv
├── img
    ├── screen-shot-1.png
    ├── screen-shot-2.png
    └── screen-shot-3.png
├── inst
    └── app
    │   ├── app.R
    │   ├── directoryInput.R
    │   ├── functions.R
    │   └── www
    │       └── js
    │           └── directory_input_binding.js
└── topicApp.Rproj


/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Example code in package build process
 6 | *-Ex.R
 7 | 
 8 | # RStudio files
 9 | .Rproj.user/
10 | 
11 | # produced vignettes
12 | vignettes/*.html
13 | vignettes/*.pdf
14 | 
15 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
16 | .httr-oauth
17 | .Rproj.user
18 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: topicApp 
 2 | Title: A Shiny App to run Topic Models
 3 | Version: 0.1.0
 4 | Authors@R: person("Ryan", "Wesslen", email =  "rwesslen@gmail.com", role = c("aut", "cre"))
 5 | Description: This package is a Shiny App to analyze text data (from flat file) using topic models (LDA).
 6 | Depends: R (>= 3.2.0)
 7 | License: GPL-2 
 8 | LazyData: true
 9 | Imports: 
10 |     shiny (>= 1.0.0),
11 |     shinydashboard (>= 0.5.3),
12 |     shinyBS (>= 0.61),
13 |     shinyjs (>= 0.9),
14 |     quanteda (>= 0.9.9-24),
15 |     RColorBrewer (>= 1.1-2),
16 |     visNetwork (>= 1.0.3),
17 |     igraph (>= 1.0.1),
18 |     reshape (>= 0.8.6),
19 |     grid (>= 3.3.2),
20 |     tidyverse (>= 1.0.0),
21 |     DT (>= 0.2),
22 |     tm (>= 0.7-1),
23 |     ggwordcloud (>= 0.3.0)
24 | RoxygenNote: 6.0.1


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | The MIT License (MIT)
 3 | 
 4 | Copyright (c) 2017 wesslen
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 | 
3 | export(runApp)
4 | 


--------------------------------------------------------------------------------
/R/run_app.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | runApp <- function() {
 3 |   appDir <- system.file("app", package = "topicApp")
 4 |   if (appDir == "") {
 5 |     stop("Could not find example directory. Try re-installing `topicApp`.", call. = FALSE)
 6 |   }
 7 | 
 8 |   shiny::runApp(appDir, display.mode = "normal")
 9 | }
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # topicApp: A Simple Shiny App for Topic Modeling
 2 | 
 3 | This GitHub repo provides an interactive app for running small (< 5 MB) text datasets. For larger datasets, we recommend our [GitHub repo](https://github.com/wesslen/text-analysis-org-science) that provides code examples of how to run manually topic modeling.
 4 | 
 5 | All materials are also archived on this [Dataverse]() page.
 6 | 
 7 | ## Prerequisites for Windows users
 8 | 
 9 | Typically, R packages will automatically install dependent CRAN packages via the `install_github` function from `devtools`. However, for Windows (R >= 3.3.0), this function does not work.
10 | 
11 | Therefore, if you're running Windows, please run the following code to ensure you have the dependent packages before running the next step.
12 | 
13 | This may take a few minutes but you only need to run once.
14 | 
15 | ```{r}
16 | packages <- c("shiny","quanteda","shinydashboard","RColorBrewer","DT","visNetwork","ggwordcloud",
17 |               "igraph","tm","reshape","grid","tidyverse","shinyjs","shinyBS","stm")
18 | 
19 | install.packages(packages)
20 | ```
21 | 
22 | ## Download and Running
23 | 
24 | To install and the run the app, open R/R Studio and run the following code:
25 | 
26 | ```{r}
27 | install.packages("devtools")
28 | devtools::install_github("wesslen/topicApp")
29 | topicApp::runApp()
30 | ```
31 | 
32 | ![Screen Shot 1](img/screen-shot-1.png)
33 | 
34 | ![Screen Shot 2](img/screen-shot-2.png)
35 | 
36 | ![Screen Shot 3](img/screen-shot-3.png)
37 | 
38 | ## FAQ/Help
39 | 
40 | 1.  I'm getting an error message when trying to install the `slam` package (a dependency for several other packages).
41 | 
42 | Try to run this command in your R (or R Studio) Console:
43 | 
44 | ```{r}
45 | slam_url <- "https://cran.r-project.org/src/contrib/Archive/slam/slam_0.1-37.tar.gz" 
46 | devtools::install_url(slam_url)
47 | ```
48 | 
49 | This should manually install the `slam` package.
50 | 
51 | 2.  I want to use more features of the `stm` packages (e.g., include covariates).
52 | 
53 | This app has been created for only simple analyses (e.g., small datasets, no covariates, limited functionality). If you're interested in more functionality of the `stm` package, you should considering either running the code individually or use Dan Zangri's `stmgui` package (see [stmGUI GitHub](https://github.com/dzangri/stmGUI))
54 | 
55 | 3.  I received an error that the file uploaded exceeded the maximum size (5MB+).
56 | 
57 | Yes. You will either (1) need to reduce the size of your dataset (e.g., remove unnecessary columns, sample records) or (2) consider running code manually. topicApp is only intended for small datasets and a starter solution for researchers new to R and topic modeling. For more advanced researchers, we recommend the code we've provided in our code repo [https://github.com/wesslen/text-analysis-org-science](https://github.com/wesslen/text-analysis-org-science).
58 | 


--------------------------------------------------------------------------------
/data/TMM_Example_v1.0.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wesslen/topicApp/69b46971db327f02f6d157c39e1ece96535e7003/data/TMM_Example_v1.0.csv


--------------------------------------------------------------------------------
/img/screen-shot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wesslen/topicApp/69b46971db327f02f6d157c39e1ece96535e7003/img/screen-shot-1.png


--------------------------------------------------------------------------------
/img/screen-shot-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wesslen/topicApp/69b46971db327f02f6d157c39e1ece96535e7003/img/screen-shot-2.png


--------------------------------------------------------------------------------
/img/screen-shot-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wesslen/topicApp/69b46971db327f02f6d157c39e1ece96535e7003/img/screen-shot-3.png


--------------------------------------------------------------------------------
/inst/app/app.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #packages
  3 | packages <- c("shiny","quanteda","shinydashboard","RColorBrewer","DT","visNetwork","ggwordcloud",
  4 |               "igraph","reshape","grid","tidyverse","shinyjs","shinyBS","stm")
  5 | 
  6 | lapply(packages,library,character.only = TRUE)
  7 | source('directoryInput.R')
  8 | source('functions.R')
  9 | 
 10 | #source("./inst/app/functions.R")
 11 | 
 12 | # put stop words to start with here
 13 | exp.stop <- c()
 14 | 
 15 | ###################################################
 16 | ##############       UI       #####################
 17 | ###################################################
 18 | 
 19 | ############### Header content ####################
 20 | 
 21 | header <- dashboardHeader(title = "topicApp")
 22 | 
 23 | ############### Sidebar content ###################
 24 | 
 25 | sidebar <- dashboardSidebar(
 26 |   sidebarMenu(
 27 |     menuItem("Model Parameters", tabName = "model", icon = icon("tasks")),
 28 |     menuItem("Topics", tabName = "topics", icon = icon("th")),
 29 |     #menuItem("Document-Level", tabName = "companies", icon = icon("users")),
 30 |     menuItem("Validation", tabName = "validation", icon = icon("check"))
 31 |   )
 32 | )
 33 | 
 34 | ############### Body content ######################
 35 | 
 36 | body <-   dashboardBody(
 37 |   tabItems(
 38 |     # Topic Modeling Tab
 39 |     
 40 |     tabItem(tabName = "model",
 41 |             fluidRow(
 42 |               box(title = "Step 1: Load Dataset",
 43 |                   column(9,
 44 |                          fileInput("dataFileToUpload", "Choose Data File To Upload")
 45 |                   ),
 46 |                   hr(),
 47 |                   # Code below was from stmGUI: https://github.com/dzangri/stmGUI
 48 |                   actionButton("submitDataForUpload", "Submit"),
 49 |                   hr(),
 50 |                   #a(id = "toggleAdvDataUpload", "Show/hide advanced options"),
 51 |                     div(id = "advUploadOptions",
 52 |                         checkboxInput("headerPresent", "Header Row Present", TRUE),
 53 |                         radioButtons("columnSeparator",
 54 |                                      "Separator",
 55 |                                      c(Comma = ",",
 56 |                                        Semicolon = ";",
 57 |                                        Tab = "\t"),
 58 |                                      inline = TRUE,
 59 |                                      ","),
 60 |                         radioButtons("quoteAroundData", "Quotes Around Text",
 61 |                                      c(None = "",
 62 |                                        "Double Quote" = "\"",
 63 |                                        "Single Quote" = "'"),
 64 |                                      inline = TRUE,
 65 |                                      "\"")
 66 |                     ),
 67 |                   hr(),
 68 |                       directoryInput('load.directory', label = 'Or load a previous model (then move to Step 4)', value = '~'),
 69 |                       bsTooltip("load.directory", "Select the directory to load a model.",
 70 |                                 "left", options = list(container = "body")),
 71 |                       actionButton("load.model","Load Model")
 72 |                   
 73 |                   ),
 74 |               box(title = "Step 2: Pre-processing",
 75 |                   selectInput("tpDocs",
 76 |                               "Select Text Column",
 77 |                               c()),
 78 |                   bsTooltip("tpDocs", "Select which column contains the column of text.",
 79 |                             "left", options = list(container = "body")),
 80 |                   textInput("stopwords", label = "Stop Words", 
 81 |                             value = paste(exp.stop, collapse = ", "),
 82 |                             placeholder = "also, such, really..."),
 83 |                   bsTooltip("stopwords", "Include additional stop words to remove:",
 84 |                             "left", options = list(container = "body")),
 85 |                   sliderInput("minDoc",
 86 |                               "Minimum # of Documents (for Terms):",
 87 |                               min = 0,  max = 100,  value = 10, step = 1),
 88 |                   bsTooltip("minDoc", "Remove sparse terms:",
 89 |                             "left", options = list(container = "body")),
 90 |                   box(checkboxInput("stemming", label = "Stemming", value = FALSE),
 91 |                   radioButtons("ngrams", label = "n-grams",
 92 |                                choices = list("Unigrams" = 1, "Bigrams" = 2), selected = 1)),
 93 |                   box(actionButton("dfm.update", "Create DFM"))
 94 |                   )
 95 |               ),
 96 |               fluidRow(
 97 |               box(title = "Step 3: Topic Model",
 98 |                   column(1.5,
 99 |                     sliderInput("num.topics",
100 |                                 "Number of Topics:",
101 |                                 min = 0,  max = 100,  value = 10, step = 1),
102 |                     bsTooltip("num.topics", "Set to zero to auto-detect topics.",
103 |                               "left", options = list(container = "body"))),
104 |                   column(1.5,
105 |                     sliderInput("iter",
106 |                                 "Maximum Number of Iterations:",
107 |                                 min = 20,  max = 200,  value = 100, step = 20),
108 |                     bsTooltip("iter", "Adjust higher if the algorithm is not converging.",
109 |                               "left", options = list(container = "body"))),
110 |                     hr(),
111 |                     actionButton("topic.update", "Run Model")
112 |               ),
113 |               box(title = "Step 4: Topic Network Settings",
114 |                   sliderInput("parm",
115 |                               "Minimum Correlation",
116 |                               min = 0,  max = 0.2,  value = 0.1, step = 0.01),
117 |                   bsTooltip("parm", "Higher threshold means less edges, Lower means more edges.",
118 |                             "left", options = list(container = "body")),
119 |                   hr(),
120 |                   actionButton("network.update", "Create Network")
121 |                 ),
122 |               box(title = "Save results",
123 |                   
124 |                   directoryInput('directory', label = 'Selected Directory', value = '~'),
125 |                   bsTooltip("directory", "Select the directory  to save the results.",
126 |                             "left", options = list(container = "body")),
127 |                   
128 |                   actionButton("save.results","Save Model")
129 |                   )
130 |               )
131 |             )
132 |     ,
133 |     # Topics Tab
134 |     tabItem(tabName = "topics",
135 |             fluidRow(
136 |               #actionButton("tabBut", "View Topic"),
137 |               box(title = "Topic Network", 
138 |                   visNetworkOutput("topic.network", height = "400px"), width = 12, collapsible = F)
139 |               ),
140 |              fluidRow(
141 |                box(title = "Topic Word Cloud: Size Proportional to Word Probability", 
142 |                    plotOutput("topic.wordcloud"),
143 |                    width = 12
144 |                    )
145 |                ),
146 |             fluidRow(
147 |               box(title = "Representative Documents",
148 |                   dataTableOutput("doc.table"), width = 12
149 |                   )
150 |                 )
151 |             )
152 |     ,
153 |     # # Document Tab 
154 |     # tabItem(tabName = "document",
155 |     #         fluidRow(
156 |     #           box(
157 |     #             title = "Document Attributes",
158 |     #             #selectInput("document", "Choose a document:", choices = cmpyData$Company),
159 |     #             dataTableOutput("doc.attribute"),
160 |     #             height = 400),
161 |     #           box(
162 |     #             title = "Document Topics", 
163 |     #             plotOutput("doc.treemap")),
164 |     #           height = 400,
165 |     #           collapsible = T
166 |     #         )
167 |     #         ,
168 |     #         fluidRow(    
169 |     #           box(title = "Document's Webpages", dataTableOutput("company.webpage"), width = 12, collapsible = T)
170 |     #         )
171 |     # ),
172 |     # Validation tab
173 |     tabItem(tabName = "validation",
174 |             fluidRow(
175 |               box(
176 |                 title = "Topic Validation",
177 |                 checkboxGroupInput("k.validation", "K Topics to Run:",
178 |                                    c("5" = 1,
179 |                                      "10" = 2,
180 |                                      "20" = 3,
181 |                                      "30" = 4,
182 |                                      "50" = 5,
183 |                                      "75" = 6,
184 |                                      "100" = 7), selected = c(1,2,3,4,5,6,7), inline = TRUE),
185 |                 numericInput("search.seed", label = "Seed", value = 1),
186 |                 bsTooltip("search.seed", "Random seed used to partition dataset for Cross-Validation",
187 |                           "left", options = list(container = "body")),
188 |                 hr(),
189 |                 actionButton("run.validation", "Run Validation")
190 |               )
191 |             ),
192 |             fluidRow(
193 |               box(
194 |                 plotOutput('valid.plot'), width = 12
195 |                 )
196 |               )
197 |             )
198 |     )
199 | )
200 | 
201 | ############### Dashboard page ####################
202 | 
203 | ui <- dashboardPage(header, sidebar, body)
204 | 
205 | ##############    SERVER       #####################
206 | 
207 | server <- function(input, output, session) {
208 | 
209 |   # reactive object that stores intermediate results
210 |   storedData <- reactiveValues()
211 |   
212 |   storedData$data <- NULL
213 | 
214 |   # load previous model
215 |   
216 |   observeEvent(
217 |     ignoreNULL = TRUE,
218 |     eventExpr = {
219 |       input$load.directory
220 |     },
221 |     handlerExpr = {
222 |       if (input$load.directory > 0) {
223 |         # condition prevents handler execution on initial app launch
224 |         
225 |         path = choose.dir(default = readDirectoryInput(session, 'load.directory'))
226 |         updateDirectoryInput(session, 'load.directory', value = path)
227 |       }
228 |     }
229 |   )
230 |   
231 |   output$directory = renderText({
232 |     readDirectoryInput(session, 'load.directory')
233 |   })
234 |   
235 |   # save model
236 |   
237 |   observeEvent(
238 |     ignoreNULL = TRUE,
239 |     eventExpr = {
240 |       input$directory
241 |     },
242 |     handlerExpr = {
243 |       if (input$directory > 0) {
244 |         # condition prevents handler execution on initial app launch
245 |         
246 |         path = choose.dir(default = readDirectoryInput(session, 'directory'))
247 |         updateDirectoryInput(session, 'directory', value = path)
248 |       }
249 |     }
250 |   )
251 |   
252 |   output$directory = renderText({
253 |     readDirectoryInput(session, 'directory')
254 |   })
255 |   
256 |   # shinyjs below was from stmGUI: https://github.com/dzangri/stmGUI
257 |   
258 |   shinyjs::onclick("toggleAdvDataUpload",
259 |                    shinyjs::toggle(id = "advUploadOptions",
260 |                                    anim = TRUE))
261 |   observe({
262 |     shinyjs::toggleState("submitDataForUpload",
263 |                          !is.null(input$dataFileToUpload))
264 |   })
265 |   observe({
266 |     shinyjs::toggleState("dataInputTitle-nextStep",
267 |                          !is.null(storedData$data))
268 |   })
269 |   
270 |   observeEvent(input$submitDataForUpload, ({
271 |     shinyjs::html("dataInputTextResult", "")
272 |     
273 |     userData <- input$dataFileToUpload
274 |     
275 |     withProgress(message = "Loading data, please wait...", {
276 |       setProgress(0.5)
277 |       
278 |       readDataArgs <- list(userData$datapath, header = input$headerPresent, sep = input$columnSeparator,
279 |                            quote = input$quoteAroundData)
280 |       
281 |       shinyjs::toggleState("moveFromStep1To2")
282 |       
283 |       tryCatch({
284 |         storedData$data <- do.call(read.csv, readDataArgs)
285 |         storedData$data$rowNum <- 1:nrow(storedData$data)
286 |       }, error = function(e) {
287 |         funName <- deparse(substitute(read.csv))
288 |         shinyjs::html("dataInputTextResult",
289 |                       paste("ERROR: Error while running '",
290 |                             funName, "':\n",
291 |                             e,
292 |                             sep = ""))
293 |         storedData$data <- NULL
294 |         return(NULL)
295 |       }, warning = function(w) {
296 |         shinyjs::html("dataInputTextResult",
297 |                       paste("WARNING: Warning while reading data:\n",
298 |                             w,
299 |                             sep = "\n"))
300 |         storedData$data <- NULL
301 |         return(NULL)
302 |       }, finally = {
303 |       })
304 |       
305 |       setProgress(1)
306 |     })
307 | 
308 |   }))
309 |   
310 |   observe({
311 |     userData <- storedData$data
312 |     if (!is.null(userData)) {
313 |       shinyjs::enable("tpDocs")
314 |       dataColumnNames <- colnames(userData)
315 |       updateSelectInput(session, "tpDocs", choices = dataColumnNames)
316 |     } else {
317 |       shinyjs::disable("tpDocs")
318 |     }
319 |   })
320 |   
321 |     # Topic
322 |   z <- reactiveValues(Corpus = NULL, dtm = NULL, dfm = NULL)
323 | 
324 |   observeEvent(input$dfm.update, {
325 |     MyCorpus <- corpus(as.character(storedData$data[,input$tpDocs]))
326 |     
327 |     # sets input data row number as primary key -- ensures matchback for datasets without a primary key
328 |     docvars(MyCorpus, "rowNum") <- storedData$data$rowNum
329 |     stp <- unlist(strsplit(input$stopwords,","))
330 |     stp <- trimws(stp)
331 |     ngram <- ifelse(input$ngrams==1,1L, 1L:2L)
332 |     
333 |     Dfm <- dfm(MyCorpus, remove = c(stopwords("english"), stp), remove_numbers = TRUE, remove_punct = TRUE,
334 |                   stem = input$stemming, ngrams = ngram
335 |                )
336 |   
337 |     tdfm <- dfm_trim(Dfm, min_docfreq = input$minDoc)
338 | 
339 |     # we now export to a format that we can run the topic model with
340 |     z$Corpus <- MyCorpus
341 |     z$dtm <- convert(tdfm, to= "topicmodels")
342 |     z$dfm <- convert(tdfm, to = "stm", docvars = docvars(MyCorpus))
343 |     
344 |     print("DFM created")
345 |   })
346 |   
347 |   v <- reactiveValues(probtopics = NULL, probterms = NULL, topicnames = NULL, stmFit = NULL, out = NULL)
348 |   
349 |   # topic models
350 |   
351 |   observeEvent(input$topic.update, {
352 |     k <- input$num.topics
353 |     dfm <- z$dfm
354 |     
355 |     # use quanteda converter to convert our Dfm
356 |     out <- prepDocuments(dfm$documents, dfm$vocab, dfm$meta, lower.thresh = 1, subsample = NULL)
357 |     
358 |     stmFit <- stm(out$documents, out$vocab, K = k, #prevalence =~ Party + s(Time),
359 |                   max.em.its = input$iter, data = out$meta, init.type = "Spectral", seed = 300)
360 |     
361 |     probterms <- data.frame(t(data.frame(probs = stmFit$beta[[1]])))  # words (rows) x topics (columns)
362 |     row.names(probterms) <- stmFit$vocab
363 |     probdocs <- data.frame(stmFit$theta)
364 |     
365 |     topic.names <- character(length = ncol(stmFit$theta))
366 |     
367 |     for (i in 1:ncol(stmFit$theta)){
368 |       temp <- order(-probterms[,i])
369 |       temp2 <- rownames(probterms[temp,])
370 |       topic.names[i] <- paste(temp2[1:5], collapse = " \n ")
371 |     }
372 |     v$out <- out
373 |     v$stmFit <- stmFit
374 |     v$probdocs <- probdocs
375 |     v$probterms <- probterms
376 |     v$topicnames <- topic.names
377 |   })
378 | 
379 |   # Network
380 |   x <- reactiveValues(nodes = NULL, edges = NULL)
381 | 
382 |   observeEvent(input$network.update, {
383 |    results <- new.topic.network(v$stmFit, input$parm, v$topicnames)
384 |    x$nodes <- results[[1]]
385 |    x$edges <- results[[2]]
386 |    print("Network created")
387 |   })
388 |   
389 |   # load and save
390 |   
391 |   observeEvent(input$load.model, {
392 |     dir <- readDirectoryInput(session, 'load.directory')
393 |     v$probterms <- read.csv(file = paste0(dir,"/prob-terms.csv"), stringsAsFactors = F, row.names = 1)
394 |     v$probdocs <- read.csv(file = paste0(dir,"/prob-docs.csv"), stringsAsFactors = F, row.names = 1)
395 |     load(paste0(dir,"/stmFit.RData"))
396 |     v$stmFit <- stmFit
397 |     load(paste0(dir,"/out.RData"))
398 |     v$out <- out
399 | 
400 |     topic.names <- character(length = ncol(v$probterms))
401 |     for (i in 1:ncol(v$probterms)){
402 |       temp <- order(-v$probterms[,i])
403 |       temp2 <- rownames(v$probterms[temp,])
404 |       topic.names[i] <- paste(temp2[1:5], collapse = " \n ")
405 |     }
406 |     v$topicnames <- topic.names
407 |     
408 |     print("Model Uploaded!")
409 |   })
410 | 
411 |   observeEvent(input$save.results, {
412 | 
413 |     dir <- readDirectoryInput(session, 'directory')
414 |     
415 |     dir.terms <- paste0(dir,"/prob-terms.csv")
416 |     dir.docs <- paste0(dir,"/prob-docs.csv")
417 |     dir.topics <- paste0(dir,"/topic-names.csv")
418 |     dir.parms <- paste0(dir,"/sparameters.csv")
419 |     
420 |     write.csv(v$probterms, dir.terms, row.names = T)
421 |     write.csv(v$probdocs, dir.docs, row.names = T)
422 |     write.csv(v$topicnames, dir.topics, row.names = F)
423 |     parameters <- data.frame(Stopwords = input$stopwords,
424 |                              minDoc = input$minDoc,
425 |                              stem = input$stemming,
426 |                              unigrams = input$ngrams,
427 |                              NumTopics = input$num.topics,
428 |                              Iterations = input$iter)
429 |     
430 |     write.csv(parameters, dir.parms, row.names = F)
431 |      
432 |     stmFit <- v$stmFit
433 |     out <- v$out
434 |     save(stmFit, file = paste0(dir,"/stmFit.RData"))
435 |     save(out, file = paste0(dir,"/out.RData"))
436 |     
437 |     print("Topic model saved")
438 |   })
439 |   
440 |   ### Network
441 |   
442 |   output$topic.network <- renderVisNetwork({
443 | 
444 |     visNetwork(x$nodes, x$edges, submain = "A topic is a word list of word co-occurrence clusters. Each node is a topic and each edge represents shared words between clusters.", height = "600px") %>%
445 |       #visExport() %>%
446 |       visNodes(labelHighlightBold = T) %>%
447 |       visOptions(highlightNearest = T, selectedBy = "community", nodesIdSelection = T) %>%
448 |       visInteraction(navigationButtons = T)
449 |   })
450 |   
451 |   # terms <- reactive({
452 |   #   freq <- data.frame(v$probterms)
453 |   #   temp <- as.integer(input$topic.network_selected)
454 |   #   data.frame(word = rownames(v$probterms), freq = freq[,temp])
455 |   # })
456 |   
457 |   terms <- reactive({
458 |     validate(
459 |       need(input$topic.network_selected != "", "Please select a topic")
460 |     )
461 |     freq <- data.frame(v$probterms)
462 |     temp <- as.integer(input$topic.network_selected)
463 |     data.frame(word = rownames(v$probterms), freq = freq[,temp])
464 |   })
465 |   
466 |   docs <- reactive({
467 |     validate(
468 |       need(input$topic.network_selected != "", "Please select a topic")
469 |     )
470 |     freq <- data.frame(v$probdocs)
471 |     temp <- as.integer(input$topic.network_selected)
472 |     data.frame(docname = rownames(v$probdocs), freq = freq[,temp], rowNum = v$out$meta$rowNum)
473 |   })
474 | 
475 |   output$topic.wordcloud <- renderPlot({
476 |     w <- terms()
477 |     
478 |     w %>%
479 |       mutate(word = as.character(word)) %>%
480 |       mutate(freq = round(exp(freq)*100)) %>%
481 |       ggplot(aes(label = word, size = freq)) +
482 |       geom_text_wordcloud() +
483 |       scale_size_area(max_size = 24) +
484 |       theme_minimal()
485 |     })
486 |   
487 |   
488 |   # expert table
489 |   Docs <- reactive({
490 |     d <- docs()
491 |     ldaProbs <- data.frame(rowNum = d$rowNum, Prob = exp(d$freq), stringsAsFactors = F)
492 |     ldaProbs <- merge(ldaProbs, storedData$data, by = "rowNum")
493 |     ldaProbs[order(ldaProbs$Prob, decreasing = T), c("rowNum","Prob",input$tpDocs)]
494 |   })
495 |   
496 |   #Representative Document
497 |   
498 |   output$doc.table <- renderDataTable({
499 |     temp <- Docs()
500 |     colnames(temp) <- c("Row Num","Topic Prob","Text")
501 |     temp[,2] <- round(log(temp[,2]),3)
502 |     temp$Text <- as.character(temp$Text)
503 |     temp
504 |   }, options = list(pageLength = 5, dom = 'tip') , rownames= FALSE)
505 |   
506 |   valid <- reactiveValues(results = NULL, K = NULL)
507 |   
508 |   # Validation
509 |   
510 |   observeEvent(input$run.validation, {
511 |   
512 |     K <- c(5,10,20,30,50,75,100)
513 |     K <- K[as.numeric(input$k.validation)]
514 |     
515 |     dfm <- z$dfm
516 |     
517 |     # use quanteda converter to convert our Dfm
518 |     out <- prepDocuments(dfm$documents, dfm$vocab, dfm$meta, lower.thresh = 1, subsample = NULL)
519 |     
520 |     valid$results <- searchK(out$documents, 
521 |                              out$vocab, 
522 |                              K, 
523 |                              init.type = "Spectral", 
524 |                              proportion = 0.5, 
525 |                              heldout.seed = input$search.seed, 
526 |                              max.em.its = 200)
527 |     valid$K <- K
528 |     })
529 |   
530 |   output$valid.plot <- renderPlot({
531 |     try <- try(plot(valid$results))
532 |     if("try-error" %in% class(try)){print("Select the number of topics to test and run topic validation.")
533 |     }else{plot(valid$results)}
534 |   })
535 | }
536 | 
537 | # Run the application 
538 | shinyApp(ui = ui, server = server)
539 | 
540 | 
541 | 


--------------------------------------------------------------------------------
/inst/app/directoryInput.R:
--------------------------------------------------------------------------------
  1 | #' Choose a Folder Interactively (Mac OS X)
  2 | #'
  3 | #' Display a folder selection dialog under Mac OS X
  4 | #'
  5 | #' @param default which folder to show initially
  6 | #' @param caption the caption on the selection dialog
  7 | #'
  8 | #' @details
  9 | #' Uses an Apple Script to display a folder selection dialog.  With \code{default = NA},
 10 | #' the initial folder selection is determined by default behavior of the
 11 | #' "choose folder" Apple Script command.  Otherwise, paths are expanded with
 12 | #' \link{path.expand}.
 13 | #'
 14 | #' @return
 15 | #' A length one character vector, character NA if 'Cancel' was selected.
 16 | #'
 17 | if (Sys.info()['sysname'] == 'Darwin') {
 18 |   choose.dir = function(default = NA, caption = NA) {
 19 |     command = 'osascript'
 20 |     args = '-e "POSIX path of (choose folder{{prompt}}{{default}})"'
 21 |     
 22 |     if (!is.null(caption) && !is.na(caption) && nzchar(caption)) {
 23 |       prompt = sprintf(' with prompt \\"%s\\"', caption)
 24 |     } else {
 25 |       prompt = ''
 26 |     }
 27 |     args = sub('{{prompt}}', prompt, args, fixed = T)
 28 |     
 29 |     if (!is.null(default) && !is.na(default) && nzchar(default)) {
 30 |       default = sprintf(' default location \\"%s\\"', path.expand(default))
 31 |     } else {
 32 |       default = ''
 33 |     }
 34 |     args = sub('{{default}}', default, args, fixed = T)
 35 |     
 36 |     suppressWarnings({
 37 |       path = system2(command, args = args, stderr = TRUE)
 38 |     })
 39 |     if (!is.null(attr(path, 'status')) && attr(path, 'status')) {
 40 |       # user canceled
 41 |       path = NA
 42 |     }
 43 |     
 44 |     return(path)
 45 |   }
 46 | } else if (Sys.info()['sysname'] == 'Linux') {
 47 |   choose.dir = function(default = NA, caption = NA) {
 48 |     command = 'zenity'
 49 |     args = '--file-selection --directory --title="Choose a folder"'
 50 |     
 51 |     suppressWarnings({
 52 |       path = system2(command, args = args, stderr = TRUE)
 53 |     })
 54 |     
 55 |     #Return NA if user hits cancel
 56 |     if (!is.null(attr(path, 'status')) && attr(path, 'status')) {
 57 |       # user canceled
 58 |       return(default)
 59 |     }
 60 |     
 61 |     #Error: Gtk-Message: GtkDialog mapped without a transient parent
 62 |     if(length(path) == 2){
 63 |       path = path[2]
 64 |     }
 65 |     
 66 |     return(path)
 67 |   }
 68 | }
 69 | 
 70 | #' Directory Selection Control
 71 | #'
 72 | #' Create a directory selection control to select a directory on the server
 73 | #'
 74 | #' @param inputId The \code{input} slot that will be used to access the value
 75 | #' @param label Display label for the control, or NULL for no label
 76 | #' @param value Initial value.  Paths are exapnded via \code{\link{path.expand}}.
 77 | #'
 78 | #' @details
 79 | #' This widget relies on \link{\code{choose.dir}} to present an interactive
 80 | #' dialog to users for selecting a directory on the local filesystem.  Therefore,
 81 | #' this widget is intended for shiny apps that are run locally - i.e. on the
 82 | #' same system that files/directories are to be accessed - and not from hosted
 83 | #' applications (e.g. from shinyapps.io).
 84 | #'
 85 | #' @return
 86 | #' A directory input control that can be added to a UI definition.
 87 | #'
 88 | #' @seealso
 89 | #' \link{updateDirectoryInput}, \link{readDirectoryInput}, \link[utils]{choose.dir}
 90 | directoryInput = function(inputId, label, value = NULL) {
 91 |   if (!is.null(value) && !is.na(value)) {
 92 |     value = path.expand(value)
 93 |   }
 94 |   
 95 |   tagList(
 96 |     singleton(
 97 |       tags$head(
 98 |         tags$script(src = 'js/directory_input_binding.js')
 99 |       )
100 |     ),
101 |     
102 |     div(
103 |       class = 'form-group directory-input-container',
104 |       shiny:::`%AND%`(label, tags$label(label)),
105 |       div(
106 |         span(
107 |           class = 'col-xs-9 col-md-11',
108 |           style = 'padding-left: 0; padding-right: 5px;',
109 |           div(
110 |             class = 'input-group shiny-input-container',
111 |             style = 'width:100%;',
112 |             div(class = 'input-group-addon', icon('folder-o')),
113 |             tags$input(
114 |               id = sprintf('%s__chosen_dir', inputId),
115 |               value = value,
116 |               type = 'text',
117 |               class = 'form-control directory-input-chosen-dir',
118 |               readonly = 'readonly'
119 |             )
120 |           )
121 |         ),
122 |         span(
123 |           class = 'shiny-input-container',
124 |           tags$button(
125 |             id = inputId,
126 |             class = 'btn btn-default directory-input',
127 |             '...'
128 |           )
129 |         )
130 |       )
131 |     )
132 |     
133 |   )
134 |   
135 | }
136 | 
137 | #' Change the value of a directoryInput on the client
138 | #'
139 | #' @param session The \code{session} object passed to function given to \code{shinyServer}.
140 | #' @param inputId The id of the input object.
141 | #' @param value A directory path to set
142 | #' @param ... Additional arguments passed to \link{\code{choose.dir}}.  Only used
143 | #'    if \code{value} is \code{NULL}.
144 | #'
145 | #' @details
146 | #' Sends a message to the client, telling it to change the value of the input
147 | #' object.  For \code{directoryInput} objects, this changes the value displayed
148 | #' in the text-field and triggers a client-side change event.  A directory
149 | #' selection dialog is not displayed.
150 | #'
151 | updateDirectoryInput = function(session, inputId, value = NULL, ...) {
152 |   if (is.null(value)) {
153 |     value = choose.dir(...)
154 |   }
155 |   session$sendInputMessage(inputId, list(chosen_dir = value))
156 | }
157 | 
158 | #' Read the value of a directoryInput
159 | #'
160 | #' @param session The \code{session} object passed to function given to \code{shinyServer}.
161 | #' @param inputId The id of the input object
162 | #'
163 | #' @details
164 | #' Reads the value of the text field associated with a \code{directoryInput}
165 | #' object that stores the user selected directory path.
166 | #'
167 | readDirectoryInput = function(session, inputId) {
168 |   session$input[[sprintf('%s__chosen_dir', inputId)]]
169 | }
170 | 
171 | 


--------------------------------------------------------------------------------
/inst/app/functions.R:
--------------------------------------------------------------------------------
 1 | new.topic.network <- function(stmFit, threshold, topic.names){
 2 |   #mod.out.corr <- topicCorr(stmFit, method = "simple", cutoff = threshold)
 3 |   cormat <- cor(stmFit$theta)
 4 |   adjmat <- ifelse(abs(cormat) > threshold,1,0)
 5 |   
 6 |   links2 <- as.matrix(adjmat)
 7 |   net2 <- graph_from_adjacency_matrix(links2, mode = "undirected")
 8 |   net2 <- igraph::simplify(net2,  remove.multiple = FALSE, remove.loops = TRUE) 
 9 | 
10 |   data <- toVisNetworkData(net2)
11 |   
12 |   nodes <- data[[1]]
13 |   edges <- data[[2]]
14 |   
15 |   # Community Detection
16 |   clp <- cluster_label_prop(net2)
17 |   nodes$community <- clp$membership
18 |   qual_col_pals = RColorBrewer::brewer.pal.info[brewer.pal.info$category == 'qual',]
19 |   col_vector = unlist(mapply(RColorBrewer::brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
20 |   col_vector <- c(col_vector,col_vector)
21 |   
22 |   col <- col_vector[nodes$community+1]
23 |   
24 |   links <- igraph::as_data_frame(net2, what="edges")
25 |   nodes <- igraph::as_data_frame(net2, what="vertices")
26 | 
27 |   TopicProportions = colMeans(stmFit$theta)
28 |   
29 |   #visNetwork edits
30 |   nodes$shape <- "dot"  
31 |   nodes$shadow <- TRUE # Nodes will drop shadow
32 |   nodes$title <- topic.names # Text on click
33 |   nodes$label <- topic.names # Node label
34 |   nodes$size <- (TopicProportions / max(TopicProportions)) * 40 # Node size
35 |   nodes$borderWidth <- 2 # Node border width
36 |   
37 |   nodes$color.background <- col
38 |   nodes$color.border <- "black"
39 |   nodes$color.highlight.background <- "orange"
40 |   nodes$color.highlight.border <- "darkred"
41 |   nodes$id <- 1:nrow(nodes)
42 |   
43 |   v <- list(nodes, edges)
44 |   
45 |   return(v)
46 | }
47 | 


--------------------------------------------------------------------------------
/inst/app/www/js/directory_input_binding.js:
--------------------------------------------------------------------------------
 1 | (function() {
 2 | /**
 3 |  * Shiny Registration
 4 |  */
 5 | 
 6 | var directoryInputBinding = new Shiny.InputBinding();
 7 | $.extend(directoryInputBinding, {
 8 |   find: function(scope) {
 9 |     return( $(scope).find(".directory-input") );
10 |   },
11 |   initialize: function(el) {
12 |     // called when document is ready using initial values defined in ui.R
13 |     // documented in input_binding.js but not in docs (articles)
14 |   },
15 |   getId: function(el) {
16 |     return($(el).attr('id'));
17 |   },
18 |   getValue: function(el) {
19 |     return($(el).data('val') || 0);
20 |   },
21 |   setValue: function(el, value) {
22 |     $(el).data('val', value);
23 |   },
24 |   receiveMessage: function(el, data) {
25 |     // This is used for receiving messages that tell the input object to do
26 |     // things, such as setting values (including min, max, and others).
27 |     // documented in input_binding.js but not in docs (articles)
28 |     var $widget = $(el).parentsUntil('.directory-input-container').parent();
29 |     var $path = $widget.find('input.directory-input-chosen-dir');
30 | 
31 |     console.log('message received: ' + data.chosen_dir);
32 | 
33 |     if (data.chosen_dir) {
34 |       $path.val(data.chosen_dir);
35 |       $path.trigger('change');
36 |     }
37 |   },
38 |   subscribe: function(el, callback) {
39 |     $(el).on("click.directoryInputBinding", function(e) {
40 |       var $el = $(this);
41 |       var val = $el.data('val') || 0;
42 |       $el.data('val', val + 1);
43 | 
44 |       console.log('in subscribe: click');
45 |       callback();
46 |     });
47 |   },
48 |   unsubscribe: function(el) {
49 |     $(el).off(".directoryInputBinding");
50 |   }
51 | });
52 | 
53 | Shiny.inputBindings
54 |   .register(directoryInputBinding, "oddhypothesis.directoryInputBinding");
55 | 
56 | 
57 | })();
58 | 
59 | 


--------------------------------------------------------------------------------
/topicApp.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------