├── README.md
├── NMF.r
├── LICENSE
├── kwictopics.R
├── NMF.rmd
├── LDA.Rmd
└── optimal_K.r


/README.md:
--------------------------------------------------------------------------------
 1 | # Github repo to accompany the useR2018 presentation entitled "Topic Modeling with LDA and NMF from a Qualitative Content Analysis Perspective"
 2 | 
 3 | Slides: https://goo.gl/XoZusf 
 4 | 
 5 | LDA.Rmd contains code to perform Latent Dirichlet Allocation using the Topicmodels package. 
 6 | 
 7 | NMF.Rmd contains code to use Non Negative Matrix Factorization using the NMF package.
 8 | 
 9 | KwicRTopics.r contains code to load a dev version of the experimental topic model browser that incorporates keywords in context functionality as an R HTMLWidget. The KwicRTopics github repo: https://github.com/aneesha/kwicRtopics
10 | 
11 | Also check out the textmineR package as it incorporates some recent research on topic coherence, model selection and labeling: https://cran.r-project.org/web/packages/textmineR/vignettes/c_topic_modeling.html
12 | 
13 | Additional Resources:
14 | https://github.com/trinker/topicmodels_learning
15 | 


--------------------------------------------------------------------------------
/NMF.r:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "R Notebook"
 3 | output: html_notebook
 4 | ---
 5 | 
 6 | This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 
 7 | 
 8 | Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Cmd+Shift+Enter*. 
 9 | 
10 | ```{r}
11 | plot(cars)
12 | ```
13 | 
14 | Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Cmd+Option+I*.
15 | 
16 | When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Cmd+Shift+K* to preview the HTML file). 
17 | 
18 | The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
19 | 
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 aneesha
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/kwictopics.R:
--------------------------------------------------------------------------------
 1 | # Code to test KwicRTopics Widget
 2 | 
 3 | library("htmlwidgets")
 4 | 
 5 | # navigate to package dir
 6 | setwd("kwictopics")       
 7 | 
 8 | survey_responses_raw<-read.csv("/Users/aneesha/user2018/surveyresponses.csv", stringsAsFactors = FALSE)
 9 | # Perform Character Encoding
10 | survey_responses$documents <- iconv(survey_responses_raw$documents, "latin1", "ASCII", sub="")
11 | # Vectorize the documents
12 | survey_responses <- Corpus(VectorSource(as.vector(survey_responses$documents))) 
13 | # Text Preprocessing
14 | # Convert to lowercase
15 | survey_responses <- tm_map(survey_responses, tolower)
16 | # remove punctuation
17 | survey_responses <- tm_map(survey_responses, removePunctuation)
18 | #remove numbers
19 | survey_responses <- tm_map(survey_responses, removeNumbers);
20 | # remove generic and custom stopwords
21 | stopword <- c(stopwords('english'), "best");
22 | survey_responses <- tm_map(survey_responses, removeWords, stopword)
23 | survey_responses <- tm_map(survey_responses, stemDocument)
24 | 
25 | survey_Dtm <- DocumentTermMatrix(survey_responses, control = list(minWordLength = 2));
26 | survey_Dtm2 <- removeSparseTerms(survey_Dtm, sparse=0.98)
27 | 
28 | k = 12;
29 | SEED = 1234;
30 | 
31 | no_top_words = 6
32 | no_top_documents = 6
33 | 
34 | survey.lda <- LDA(survey_Dtm2, k, method="Gibbs", control=list(seed = SEED))
35 | 
36 | devtools::install()                                      
37 | library(kwictopics)
38 | 
39 | kwictopicsWidget(survey.lda, k, survey_responses_raw$documents)
40 | 
41 | 


--------------------------------------------------------------------------------
/NMF.rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "NMF Topic Modeling in R"
 3 | output: html_notebook
 4 | ---
 5 | 
 6 | Uses TM for text data pre-processing and topicmodels package. 
 7 | 
 8 | 
 9 | ```{r}
10 | library(NMF)
11 | library(ggplot2)
12 | library(tm)
13 | library(SnowballC)
14 | ```
15 | 
16 | ```{r}
17 | survey_responses<-read.csv("surveyresponses.csv", stringsAsFactors = FALSE)
18 | # Perform Character Encoding
19 | survey_responses$documents <- iconv(survey_responses$documents, "latin1", "ASCII", sub="")
20 | # Vectorize the documents
21 | survey_responses <- Corpus(VectorSource(as.vector(survey_responses$documents))) 
22 | ```
23 | 
24 | ```{r}
25 | # Text Preprocessing
26 | # convert to lowercase
27 | survey_responses <- tm_map(survey_responses, tolower)
28 | # remove punctuation
29 | survey_responses <- tm_map(survey_responses, removePunctuation)
30 | #remove numbers
31 | survey_responses <- tm_map(survey_responses, removeNumbers);
32 | # remove generic and custom stopwords
33 | stopword <- c(stopwords('english'), "best");
34 | survey_responses <- tm_map(survey_responses, removeWords, stopword)
35 | survey_responses <- tm_map(survey_responses, stemDocument)
36 | ```
37 | 
38 | ```{r}
39 | survey_Dtm <- DocumentTermMatrix(survey_responses, control = list(minWordLength = 2));
40 | survey_Dtm2 <- removeSparseTerms(survey_Dtm, sparse=0.98)
41 | 
42 | ```
43 | 
44 | ```{r}     
45 | new_matrix <- as.matrix(survey_Dtm2)
46 | rownames(new_matrix) <- new_matrix[,1]
47 | 
48 | k = 12 # number of topics
49 | # Run NMF
50 | nmf_tm <- nmf(new_matrix, k,"lee", seed="nndsvd")  # nndsvd greatly helps with convergence
51 | 
52 | w <- basis(nmf_tm) #  W matrix
53 | dim(w) # show dimensions 
54 | h <- coef(nmf_tm) # H matrix
55 | dim(h) # show dimensions
56 | 
57 | # print both top words and top documents in a topic
58 | no_top_words = 6
59 | no_top_documents = 6
60 | 
61 | for (topic_no in 1:k){
62 |   print(paste("Topic", topic_no))
63 |   words_in_topic <- h[topic_no,]
64 |   ordered_words <- order(words_in_topic, decreasing = TRUE)
65 |   top_ordered_words <- ordered_words[1:no_top_words]
66 |   print(top_ordered_words)
67 |   for (word in top_ordered_words){
68 |     print(names(words_in_topic)[word])
69 |   }
70 | }
71 | ```
72 | ```{r}
73 | # Display graphs to help estimate number of topics
74 | estim.r <- nmf(new_matrix, 2:18, nrun=4, seed="nndsvd")
75 | plot(estim.r)
76 | ```
77 | 
78 | 


--------------------------------------------------------------------------------
/LDA.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "LDA Topic Modeling in R"
  3 | output: html_notebook
  4 | ---
  5 | 
  6 | Uses TM for text data pre-processing and topicmodels package. 
  7 | 
  8 | 
  9 | ```{r}
 10 | setwd("/Users/aneesha/user2018/")
 11 | ```
 12 | 
 13 | ```{r}
 14 | library(tm)
 15 | library(SnowballC)
 16 | library(topicmodels)
 17 | ```
 18 | 
 19 | ```{r}
 20 | survey_responses_raw<-read.csv("surveyresponses.csv", stringsAsFactors = FALSE)
 21 | ```
 22 | 
 23 | ```{r}
 24 | # show column names
 25 | colnames(survey_responses_raw)
 26 | ```
 27 | ```{r}
 28 | # display a document
 29 | survey_responses_raw$documents[3]
 30 | ```
 31 | 
 32 | ```{r}
 33 | # Perform Character Encoding
 34 | survey_responses_raw$documents <- iconv(survey_responses_raw$documents, "latin1", "ASCII", sub="")
 35 | # Vectorize the documents
 36 | survey_responses <- Corpus(VectorSource(as.vector(survey_responses_raw$documents))) 
 37 | 
 38 | ```
 39 | 
 40 | ```{r}
 41 | # Text Pre-processing
 42 | # convert to lowercase
 43 | survey_responses <- tm_map(survey_responses, tolower)
 44 | # remove punctuation
 45 | survey_responses <- tm_map(survey_responses, removePunctuation)
 46 | # remove numbers
 47 | survey_responses <- tm_map(survey_responses, removeNumbers);
 48 | # remove stopwords
 49 | stopword <- c(stopwords('english'), "best");
 50 | survey_responses <- tm_map(survey_responses, removeWords, stopword)
 51 | survey_responses <- tm_map(survey_responses, stemDocument)
 52 | ```
 53 | 
 54 | ```{r}
 55 | # Create document term matrix
 56 | survey_Dtm <- DocumentTermMatrix(survey_responses, control = list(minWordLength = 2));
 57 | survey_Dtm2 <- removeSparseTerms(survey_Dtm, sparse=0.98)
 58 | 
 59 | # Find best starting value for the number of topics (k)
 60 | 
 61 | source("optimal_K.r")
 62 | control <- list(burnin = 500, iter = 1000, keep = 100)
 63 | 
 64 | ## Determine Optimal Number of Topics
 65 | (k <- optimal_k(survey_Dtm2, 20, control = control))
 66 | ```
 67 | ```
 68 | 
 69 | ```{r}
 70 | k = 12; #number of topics
 71 | SEED = 1234;
 72 | 
 73 | no_top_words = 6 #no of top words to show
 74 | no_top_documents = 6 #no top documents to show
 75 | 
 76 | # Run LDA
 77 | survey.lda <- LDA(survey_Dtm2, k, method="Gibbs", control=list(seed = SEED))
 78 | 
 79 | lda.topics <- as.matrix(topics(survey.lda, 6))
 80 | lda.terms <- terms(survey.lda, no_top_words)
 81 | lda.terms
 82 | 
 83 | # print both top words and top documents in a topic
 84 | for (topic_no in 1:k){
 85 |   print(paste("Topic", topic_no))
 86 |   print(lda.terms[,topic_no])
 87 |   docs_in_topic <- survey.lda@gamma[,topic_no]
 88 |   ordered_docs <- order(docs_in_topic, decreasing = TRUE)
 89 |   top_ordered_docs <- ordered_docs[1:no_top_documents]
 90 |   print(top_ordered_docs)
 91 |   for (doc in top_ordered_docs){
 92 |     print(survey_responses_raw$documents[doc])
 93 |   }
 94 | }
 95 | 
 96 | ```
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/optimal_K.r:
--------------------------------------------------------------------------------
  1 | #' Find Optimal Number of Topics
  2 | #' Original code from: https://github.com/trinker/topicmodels_learning
  3 | #' Iteratively produces models and then compares the harmonic mean of the log 
  4 | #' likelihoods in a graphical output.
  5 | #' 
  6 | #' @param x A \code{\link[tm]{DocumentTermMatrix}}.
  7 | #' @param max.k Maximum number of topics to fit (start small [i.e., default of 
  8 | #' 30] and add as necessary).
  9 | #' @param harmonic.mean Logical.  If \code{TRUE} the harmonic means of the 
 10 | #' log likelihoods are used to determine k (see 
 11 | #' \url{http://stackoverflow.com/a/21394092/1000343}).  Otherwise just the log 
 12 | #' likelihoods are graphed against k (see 
 13 | #' \url{http://stats.stackexchange.com/a/25128/7482}). 
 14 | #' @param burnin Object of class \code{"integer"}; number of omitted Gibbs 
 15 | #' iterations at beginning, by default equals 0.
 16 | #' @param iter Object of class \code{"integer"}; number of Gibbs iterations, by 
 17 | #' default equals 2000.
 18 | #' @param keep Object of class \code{"integer"}; if a positive integer, the 
 19 | #' log likelihood is saved every keep iterations.
 20 | #' @param method The method to be used for fitting; currently 
 21 | #' \code{method = "VEM"} or \code{method= "Gibbs"} are supported.
 22 | #' @param drop.seed Logical.  If \code{TRUE} \code{seed} argument is dropped from
 23 | #' \code{control}.
 24 | #' @param \ldots Other arguments passed to \code{??LDAcontrol}.
 25 | #' @return Returns the \code{\link[base]{data.frame}} of k (nuber of topics) and 
 26 | #' the associated log likelihood.
 27 | #' @references \url{http://stackoverflow.com/a/21394092/1000343} \cr
 28 | #' \url{http://stats.stackexchange.com/a/25128/7482} \cr
 29 | #' Ponweiser, M. (2012). Latent Dirichlet Allocation in R (Diploma Thesis). 
 30 | #' Vienna University of Economics and Business, Vienna. 
 31 | #' http://epub.wu.ac.at/3558/1/main.pdf \cr\cr
 32 | #' Griffiths, T.L., and Steyvers, M. (2004). Finding scientific topics. 
 33 | #' Proceedings of the National Academy of Sciences of the United States of America 
 34 | #' 101(Suppl 1), 5228 - 5235. \url{http://www.pnas.org/content/101/suppl_1/5228.full.pdf}
 35 | #' @keywords k topicmodel 
 36 | #' @export
 37 | #' @author Ben Marwick and Tyler Rinker <tyler.rinker@@gmail.com>.
 38 | #' @examples
 39 | #' ## Install/Load Tools & Data
 40 | #' if (!require("pacman")) install.packages("pacman")
 41 | #' pacman::p_load_gh("trinker/gofastr")
 42 | #' pacman::p_load(tm, topicmodels, dplyr, tidyr,  devtools, LDAvis, ggplot2)
 43 | #' 
 44 | #' 
 45 | #' ## Source topicmodels2LDAvis function
 46 | #' devtools::source_url("https://gist.githubusercontent.com/trinker/477d7ae65ff6ca73cace/raw/79dbc9d64b17c3c8befde2436fdeb8ec2124b07b/topicmodels2LDAvis")
 47 | #' 
 48 | #' data(presidential_debates_2012)
 49 | #' 
 50 | #' 
 51 | #' ## Generate Stopwords 
 52 | #' stops <- c(
 53 | #'         tm::stopwords("english"),
 54 | #'         "governor", "president", "mister", "obama","romney"
 55 | #'     ) %>%
 56 | #'     gofastr::prep_stopwords() 
 57 | #' 
 58 | #' 
 59 | #' ## Create the DocumentTermMatrix
 60 | #' doc_term_mat <- presidential_debates_2012 %>%
 61 | #'     with(gofastr::q_dtm_stem(dialogue, paste(person, time, sep = "_"))) %>%           
 62 | #'     gofastr::remove_stopwords(stops) %>%                                                    
 63 | #'     gofastr::filter_tf_idf() %>%
 64 | #'     gofastr::filter_documents() 
 65 | #' 
 66 | #' 
 67 | #' opti_k1 <- optimal_k(doc_term_mat)
 68 | #' opti_k1
 69 | #'
 70 | #' opti_k2 <- optimal_k(doc_term_mat, harmonic.mean = FALSE)
 71 | #' opti_k2
 72 | optimal_k <- function(x, max.k = 30, harmonic.mean = TRUE, 
 73 |                       control = if (harmonic.mean) list(burnin = 500, iter = 1000, keep = 100) else  NULL,
 74 |                       method = if (harmonic.mean) "Gibbs" else "VEM", verbose = TRUE, drop.seed = TRUE, ...){
 75 |   
 76 |   if (isTRUE(drop.seed)){
 77 |     control[["seed"]] <- NULL
 78 |   }
 79 |   
 80 |   if (isTRUE(harmonic.mean)) {
 81 |     optimal_k1(x, max.k = max.k, control = control, method = method, verbose = verbose, ...)
 82 |   } else {
 83 |     optimal_k2(x, max.k = max.k, control = control, method = method, ...)
 84 |   }
 85 | }
 86 | 
 87 | #' Plots a plot.optimal_k1 Object
 88 | #' 
 89 | #' Plots a plot.optimal_k1 object
 90 | #' 
 91 | #' @param x A \code{optimal_k1} object.
 92 | #' @param \ldots Ignored.
 93 | #' @method plot plot.optimal_k1
 94 | #' @export 
 95 | plot.optimal_k1 <- function(x, ...){
 96 |   
 97 |   y <- attributes(x)[["k_dataframe"]]
 98 |   y <- y[y[["k"]] == as.numeric(x), ]
 99 |   
100 |   ggplot2::ggplot(attributes(x)[["k_dataframe"]], ggplot2::aes_string(x="k", y="harmonic_mean")) + 
101 |     ggplot2::xlab(sprintf("Number of Topics (Optimal Number: %s)", as.numeric(x))) + 
102 |     ggplot2::ylab("Harmonic Mean of Log Likelihood") + 
103 |     ggplot2::geom_smooth(method = "loess", fill=NA) + 
104 |     ggplot2::geom_point(data=y, color="red", fill=NA, size = 6, shape = 21) +
105 |     ggplot2::geom_line(size=1) + 
106 |     ggplot2::theme_bw()  + 
107 |     ggplot2::theme(
108 |       axis.title.x = ggplot2::element_text(vjust = -0.25, size = 14),
109 |       axis.title.y = ggplot2::element_text(size = 14, angle=90)
110 |     ) 
111 | }
112 | 
113 | #' Prints a optimal_k Object
114 | #' 
115 | #' Prints a optimal_k object
116 | #' 
117 | #' @param x A \code{optimal_k} object.
118 | #' @param \ldots Ignored.
119 | #' @method print optimal_k
120 | #' @export 
121 | print.optimal_k <- function(x, ...){
122 |   
123 |   print(graphics::plot(x))
124 |   
125 | }
126 | 
127 | 
128 | 
129 | 
130 | optimal_k1 <- function(x, max.k = 30, 
131 |                        control = list(burnin = 500, iter = 1000, keep = 100), method = "Gibbs", 
132 |                        verbose = TRUE, ...){
133 |   
134 |   
135 |   if (max.k > 20) {
136 |     message("\nGrab a cup of coffee this could take a while...\n")
137 |     flush.console()
138 |   }
139 |   
140 |   tic <- Sys.time()
141 |   v <- rep(NA, floor(max.k/10))
142 |   dat <- data.frame(k = v, time = v)
143 |   end <- data.frame(k = max.k^2)
144 |   
145 |   hm_many <- sapply(2:max.k, function(k){
146 |     if (k %% 10 == 0){
147 |       time <- as.numeric(difftime(Sys.time(), tic, units = "mins"))        
148 |       dat[k/10, 1:2] <<- c(k^2, time)          
149 |       if (k/10 > 1) {
150 |         fit <- with(dat, lm(time~k))
151 |         pred <- predict(fit, end) - time
152 |         if (pred < 0) pred <- 0
153 |         est <- paste0("; Remaining: ~", time2char(pred), " mins")
154 |       } else {
155 |         est <- ""
156 |       }
157 |       cur <- format(Sys.time(), format="%I:%M:%S")
158 |       elapsed <- time2char(time)
159 |       #gsub("^0+", "", as.character(round(as.numeric(difftime(Sys.time(), tic, units = "mins")), 1)))
160 |       cat(sprintf("%s of %s iterations (Current: %s; Elapsed: %s mins%s)\n", k, max.k, cur, elapsed, est)); flush.console()
161 |     }
162 |     burnin <- control[["burnin"]]
163 |     keep <- control[["keep"]]
164 |     if (is.null(burnin) | is.null(keep)) stop("Supply burnin & keep to control")
165 |     fitted <- topicmodels::LDA(x, k = k, method = method, control = control)
166 |     logLiks <- fitted@logLiks[-c(1:(burnin/keep))]
167 |     harmonicMean(logLiks)
168 |   })
169 |   
170 |   out <- c(2:max.k)[which.max(hm_many)]
171 |   if (which.max(hm_many) == max.k) warning("Optimal K is last value; suggest increasing `max.k`")
172 |   class(out) <- c("optimal_k", "optimal_k1", class(out))
173 |   attributes(out)[["k_dataframe"]] <- data.frame(
174 |     k = 2:max.k, 
175 |     harmonic_mean = hm_many
176 |   )
177 |   if (isTRUE(verbose)) cat(sprintf("Optimal number of topics = %s\n",as.numeric(out)))
178 |   out
179 | }
180 | 
181 | harmonicMean <- function(logLikelihoods, precision=2000L) {
182 |   llMed <- Rmpfr::median(logLikelihoods)
183 |   as.double(llMed - log(Rmpfr::mean(exp(-Rmpfr::mpfr(logLikelihoods, prec = precision) + llMed))))
184 | }
185 | 
186 | optimal_k2 <- function(x, max.k = 30, control = NULL, method = "VEM", ...){
187 |   
188 |   if (max.k > 20) {
189 |     message("\nGrab a cup of coffee this could take a while...\n")
190 |     flush.console()
191 |   }
192 |   
193 |   tic <- Sys.time()
194 |   v <- rep(NA, floor(max.k/10))
195 |   dat <- data.frame(k = v, time = v)
196 |   end <- data.frame(k = max.k^2)
197 |   
198 |   best_model <- lapply(seq(2, max.k, by=1), function(k){
199 |     if (k %% 10 == 0){
200 |       time <- as.numeric(difftime(Sys.time(), tic, units = "mins"))        
201 |       dat[k/10, 1:2] <<- c(k^2, time)            
202 |       if (k/10 > 1) {
203 |         fit <- with(dat, lm(time~k))
204 |         est <- paste0("; Remaining: ~", time2char(predict(fit, end) - time), " mins")
205 |       } else {
206 |         est <- ""
207 |       }
208 |       cur <- format(Sys.time(), format="%I:%M:%S")
209 |       elapsed <- time2char(time)
210 |       #gsub("^0+", "", as.character(round(as.numeric(difftime(Sys.time(), tic, units = "mins")), 1)))
211 |       cat(sprintf("%s of %s iterations (Current: %s; Elapsed: %s mins%s)\n", k, max.k, cur, elapsed, est)); flush.console()
212 |     }
213 |     topicmodels::LDA(x, k = k, method = method, control = control, ...)
214 |   })
215 |   
216 |   out <- data.frame(
217 |     k = c(2:max.k), 
218 |     logLik = sapply(best_model, logLik)
219 |   )
220 |   
221 |   class(out) <- c("optimal_k", "optimal_k2", "data.frame")
222 |   out
223 | }
224 | 
225 | time2char <- function(x){
226 |   x <- as.character(round(x, 1))
227 |   if (identical("0", x)) return(x)
228 |   gsub("^0+", "", x)
229 | }
230 | 
231 | #' Plots a plot.optimal_k2 Object
232 | #' 
233 | #' Plots a plot.optimal_k2 object
234 | #' 
235 | #' @param x A \code{optimal_k2} object.
236 | #' @param \ldots Ignored.
237 | #' @method plot plot.optimal_k2
238 | #' @export 
239 | plot.optimal_k2 <- function(x, ...){
240 |   
241 |   ggplot2::ggplot(x, ggplot2::aes_string(x="k", y="logLik")) + 
242 |     ggplot2::xlab("Number of Topics") + 
243 |     ggplot2::ylab("Log Likelihood") + 
244 |     ggplot2::geom_smooth(size=.8, se=FALSE, method="loess") + 
245 |     ggplot2::geom_line(size=1) + 
246 |     ggplot2::theme_bw()  + 
247 |     ggplot2::theme(
248 |       axis.title.x = ggplot2::element_text(vjust = -0.25, size = 14),
249 |       axis.title.y = ggplot2::element_text(size = 14, angle=90)
250 |     )
251 |   
252 | }
253 | 
254 | 
255 | #pacman::p_load(ggplot2, topicmodels, Rmpfr)


--------------------------------------------------------------------------------