├── .DS_Store ├── DESCRIPTION ├── NAMESPACE ├── R ├── .DS_Store ├── checkAgree.R ├── combMass.R ├── evalResults.R ├── getResults.R ├── mixGold.R ├── pickLabel.R ├── plotResults.R ├── recordTasks.R ├── sendTasks.R ├── validateLabel.R └── validateTopic.R ├── README.md ├── data ├── R4WSItasktest.rda ├── allR4WSItasktest.rda ├── goldR4WSItest.rda ├── heldouttest.rda ├── keypostedtest.rda ├── masstest.rda ├── modtest.rda ├── recordtest.rda ├── resultstest.rda └── stmPreptest.rda ├── man ├── checkAgree.Rd ├── combMass.Rd ├── evalResults.Rd ├── getResults.Rd ├── mixGold.Rd ├── pickLabel.Rd ├── plotResults.Rd ├── recordTasks.Rd ├── sendTasks.Rd ├── validateLabel.Rd └── validateTopic.Rd └── tests ├── testthat.R └── testthat ├── testcombMass.R ├── testevalResults.R ├── testmixGold.R ├── testrecordTasks.R └── testvalidateTopic.R /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luwei-Ying/validateIt/8f2da445e0c81c8418444e2d0fc9df34fb2a3077/.DS_Store -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: validateIt 2 | Title: Validating Topic Coherence and Topic Labels 3 | Version: 0.2.01 4 | Authors@R: c( 5 | person(given = "Luwei", 6 | family = "Ying", 7 | role = c("aut", "cre"), 8 | email = "luwei.ying@wustl.edu", 9 | comment = c(ORCID = "0000-0001-7307-4834")), 10 | person(given = "Jacob", 11 | family = "Montgomery", 12 | role = c("aut")), 13 | person(given = "Brandon", 14 | family = "Stewart", 15 | role = c("aut"))) 16 | Description: validatIt creates crowd-sourcing tasks to validate topic coherence from unsupervised or semisupervised learning, as well as the topic labels specified by researchers. It provides easy access to the Amazon's Mechanical Turk (MTurk) API to post tasks and retrieve results. 17 | License: What license it uses 18 | Encoding: UTF-8 19 | LazyData: true 20 | RoxygenNote: 7.1.1 21 | Imports: 22 | pyMTurkR, 23 | stm, 24 | tm 25 | Suggests: 26 | testthat 27 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(checkAgree) 4 | export(combMass) 5 | export(evalResults) 6 | export(getResults) 7 | export(mixGold) 8 | export(pickLabel) 9 | export(plotResults) 10 | export(recordTasks) 11 | export(sendTasks) 12 | export(validateLabel) 13 | export(validateTopic) 14 | import(pyMTurkR) 15 | import(tm) 16 | -------------------------------------------------------------------------------- /R/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luwei-Ying/validateIt/8f2da445e0c81c8418444e2d0fc9df34fb2a3077/R/.DS_Store -------------------------------------------------------------------------------- /R/checkAgree.R: -------------------------------------------------------------------------------- 1 | #' Check Agreement Rate between Identical Trails 2 | #' 3 | #' @details 4 | #' Evaluate workers' performance by agreement rate between identical trails 5 | #' (Notice that this means the two input, results1 and results2, must be identical.); 6 | #' Return 1) the exact agreement rate when both workers agree on the exact same choice, and 7 | #' 2) the binary agreement rate when both workers get the task either right or wrong simultaneously 8 | #' 9 | #' @param results1 first batch of results; outputs from getResults() 10 | #' @param results2 first batch of results; outputs from getResults() 11 | #' @param key the local task record; outputs from recordTasks() 12 | #' @param type Task structures to be specified. Must be one of "WI" (word intrusion), 13 | #' "T8WSI" (top 8 word set intrusion), "R4WSI" (random 4 word set intrusion), 14 | #' "LI" (Label Intrusion), and "OL" (Optimal Label) 15 | #' 16 | #' @export 17 | 18 | checkAgree <- function(results1, results2, key, type = NULL){ 19 | if(sum(!(key[[1]]$id %in% results1$local_task_id)) != 0){ 20 | key[[2]] <- key[[2]][key[[1]]$id %in% results1$local_task_id,] 21 | key[[1]] <- key[[1]][key[[1]]$id %in% results1$local_task_id,] 22 | } 23 | 24 | # message(paste0(sum(results1[,5] != 0), ' / ', nrow(key[[2]]), ' results will be evaluated')) 25 | 26 | # Remove the gold-standard HITs 27 | results1 <- results1[key[[1]][,1] != "gold",] 28 | results2 <- results2[key[[1]][,1] != "gold",] 29 | key[[2]] <- key[[2]][key[[1]][,1] != "gold",] 30 | key[[1]] <- key[[1]][key[[1]][,1] != "gold",] 31 | 32 | # Agreement rate on picking up the EXACT same choice 33 | AgreeExact <- sum(results1$result[results1$result != 0] == results2$result[results2$result != 0])/length(results1$result[results1$result != 0]) 34 | 35 | results1 <- as.matrix(results1) 36 | results2 <- as.matrix(results2) 37 | 38 | # Calculate the "correct" vector 39 | indicator1 <- NULL 40 | indicator2 <- NULL 41 | if(type == "R4WSI0" | type == "T8WSI" | type == "LI" | type == "OL"){ 42 | for(i in 1:nrow(results1)){ 43 | if (results1[i,6] != 0){ 44 | correct <- as.vector(key[[2]][i,-1])[as.numeric(results1[i,6])] == as.character(key[[1]][i, (ncol(key[[1]])-1)]) 45 | } else { 46 | correct <- NA 47 | } 48 | indicator1 <- c(indicator1, correct) 49 | } 50 | for(i in 1:nrow(results2)){ 51 | if (results2[i,6] != 0){ 52 | correct <- as.vector(key[[2]][i,-1])[as.numeric(results2[i,6])] == as.character(key[[1]][i, (ncol(key[[1]])-1)]) 53 | } else { 54 | correct <- NA 55 | } 56 | indicator2 <- c(indicator2, correct) 57 | } 58 | } else if (type == "R4WSI"){ 59 | for(i in 1:nrow(results1)){ 60 | if (results1[i,6] != 0){ 61 | correct <- as.vector(key[[2]][i,])[as.numeric(results1[i,6])] == as.character(key[[1]][i, (ncol(key[[1]])-1)]) 62 | } else { 63 | correct <- NA 64 | } 65 | indicator1 <- c(indicator1, correct) 66 | } 67 | for(i in 1:nrow(results2)){ 68 | if (results2[i,6] != 0){ 69 | correct <- as.vector(key[[2]][i,])[as.numeric(results2[i,6])] == as.character(key[[1]][i, (ncol(key[[1]])-1)]) 70 | } else { 71 | correct <- NA 72 | } 73 | indicator2 <- c(indicator2, correct) 74 | } 75 | } else if (type == "WI"){ 76 | for(i in 1:nrow(results1)){ 77 | if (results1[i,6] != 0){ 78 | correct <- as.vector(key[[2]][i,])[as.numeric(results1[i,6])] == as.character(key[[1]][i, (ncol(key[[1]])-1)]) 79 | } else { 80 | correct <- NA 81 | } 82 | indicator1 <- c(indicator1, correct) 83 | } 84 | for(i in 1:nrow(results2)){ 85 | if (results2[i,6] != 0){ 86 | correct <- as.vector(key[[2]][i,])[as.numeric(results2[i,6])] == as.character(key[[1]][i, (ncol(key[[1]])-1)]) 87 | } else { 88 | correct <- NA 89 | } 90 | indicator2 <- c(indicator2, correct) 91 | } 92 | } else { 93 | stop("Please specify a valid task type.") 94 | } 95 | 96 | # Agreement rate either both correcly or both wrongly 97 | AgreeBinary <- sum(indicator1[!is.na(indicator1)] == indicator2[!is.na(indicator1)])/length(indicator1[!is.na(indicator1)]) 98 | 99 | output <- list(AgreeExact, AgreeBinary) 100 | names(output) <- c("Both workers agree on the exact same choice", 101 | "Both workers answer correctly or wrongly") 102 | return(output) 103 | } 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /R/combMass.R: -------------------------------------------------------------------------------- 1 | #' Combine the mass of words with the same root 2 | #' 3 | #' @details 4 | #' Use as a preparing step for validating unstemmed topic models. 5 | #' 6 | #' @param mod Fitted structural topic models. 7 | #' @param vocab A character vector specifying the words in the corpus. Usually, it 8 | #' can be found in topic model output. 9 | #' @param beta A matrix of word probabilities for each topic. Each row represents a 10 | #' topic and each column represents a word. Note this should not be in the logged form. 11 | #' 12 | #' @import tm 13 | #' 14 | #' @export 15 | 16 | combMass <- function(mod = NULL, vocab = NULL, beta = NULL){ 17 | if(class(mod) == "STM"){ 18 | vocab <- mod$vocab 19 | rawbeta <- exp(mod$beta$logbeta[[1]]) 20 | } else { 21 | if(is.null(vocab) | is.null(beta)){ 22 | stop("\"vocab\" and \"beta\" must be specified for topic models that are not STM.") 23 | } 24 | rawbeta <- beta 25 | } 26 | # test: rowSums(rawbeta) == rep(1, nrow(beta)) 27 | stemmed_vocab <- stemDocument(vocab) 28 | # colnames(rawbeta) <- stemmed_vocab 29 | # newbeta <- t(rowsum(t(rawbeta), colnames(rawbeta))) 30 | newvocab <- matrix(NA, nrow = nrow(rawbeta), ncol = length(unique(stemmed_vocab))) 31 | newbeta <- matrix(NA, nrow = nrow(rawbeta), ncol = length(unique(stemmed_vocab))) 32 | for (i in 1:nrow(rawbeta)){ 33 | mapping <- cbind.data.frame(vocab, stemmed_vocab, rawbeta[i,], stringsAsFactors = F) 34 | colnames(mapping)[3] <- "prob" 35 | maxtable <- aggregate(mapping$prob, by = list(mapping$stemmed_vocab), max) 36 | sumtable <- aggregate(mapping$prob, by = list(mapping$stemmed_vocab), sum) 37 | grouptable <- cbind.data.frame(maxtable, sumtable[,2]) 38 | colnames(grouptable) <- c("stemmed_vocab", "prob", "sum.prob") 39 | mapping <- merge(mapping, grouptable, 40 | by = c("stemmed_vocab", "prob"), 41 | all.x = T, all.y = F) 42 | mapping <- mapping[!is.na(mapping$sum.prob) & !duplicated(mapping[,c("stemmed_vocab", "prob")]),] 43 | newvocab[i,] <- mapping$vocab 44 | newbeta[i,] <- mapping$sum.prob 45 | # # super slow 46 | # for(j in 1:length(unique(stemmed_vocab))){ 47 | # group <- mapping[mapping$stemmed_vocab == unique(stemmed_vocab)[j],] 48 | # newvocab[i, j] <- as.character(group$vocab[which.max(group[,3])]) 49 | # newbeta[i, j] <- sum(group[,3]) 50 | # } 51 | } 52 | return(list(newvocab, newbeta)) 53 | } 54 | -------------------------------------------------------------------------------- /R/evalResults.R: -------------------------------------------------------------------------------- 1 | #' Evaluate results 2 | #' 3 | #' @details 4 | #' Evaluate worker performance by gold-standard HITs; 5 | #' Return the accuracy rate (proportion correct) for a specified batch 6 | #' 7 | #' @param results results of human choice; outputs from getResults() 8 | #' @param key the local task record; outputs form recordTasks() 9 | #' @param type Task structures to be specified. Must be one of "WI" (word intrusion), 10 | #' "T8WSI" (top 8 word set intrusion), "R4WSI" (random 4 word set intrusion), 11 | #' "LI" (Label Intrusion), and "OL" (Optimal Label) 12 | #' 13 | #' @export 14 | 15 | evalResults <- function(results, key, type = NULL){ 16 | if(sum(!(key[[1]]$id %in% results$local_task_id)) != 0){ 17 | key[[2]] <- key[[2]][key[[1]]$id %in% results$local_task_id,] 18 | key[[1]] <- key[[1]][key[[1]]$id %in% results$local_task_id,] 19 | } 20 | 21 | message(paste0(sum(results[,5] != 0), ' / ', nrow(key[[2]]), ' results will be evaluated')) 22 | results <- as.matrix(results) 23 | indicator <- NULL 24 | if(type == "R4WSI0" | type == "T8WSI" | type == "LI" | type == "OL"){ 25 | for(i in 1:nrow(results)){ 26 | if (results[i,6] != 0){ 27 | correct <- as.vector(key[[2]][i,-1])[as.numeric(results[i,6])] == as.character(key[[1]][i, (ncol(key[[1]])-1)]) 28 | } else { 29 | correct <- NA 30 | } 31 | indicator <- c(indicator, correct) 32 | } 33 | } else if (type == "R4WSI"){ 34 | for(i in 1:nrow(results)){ 35 | if (results[i,6] != 0){ 36 | correct <- as.vector(key[[2]][i,])[as.numeric(results[i,6])] == as.character(key[[1]][i, (ncol(key[[1]])-1)]) 37 | } else { 38 | correct <- NA 39 | } 40 | indicator <- c(indicator, correct) 41 | } 42 | } else if (type == "WI"){ 43 | for(i in 1:nrow(results)){ 44 | if (results[i,6] != 0){ 45 | correct <- as.vector(key[[2]][i,])[as.numeric(results[i,6])] == as.character(key[[1]][i, (ncol(key[[1]])-1)]) 46 | } else { 47 | correct <- NA 48 | } 49 | indicator <- c(indicator, correct) 50 | } 51 | } else { 52 | stop("Please specify a valid task type.") 53 | } 54 | # overall gold-standard hit correct rate 55 | num <- sum(indicator[key[[1]][,1] == "gold"], na.rm = T) 56 | denum <- sum(key[[1]][,1] == "gold" & results[,6] != 0) 57 | goldcorrect <- c(num/denum, paste0(num, ' / ', denum)) 58 | message(paste0(num, ' / ', denum, ' gold-standard HITs are answered correct')) 59 | 60 | # gold-standard hit correct rate by workers 61 | goldcorrectbyworker <- table(results[key[[1]][,1] == "gold",5], indicator[key[[1]][,1] == "gold"]) 62 | 63 | # non-gold-standard HITs correct rate 64 | num <- sum(indicator[key[[1]][,1] != "gold"], na.rm = T) 65 | denum <- sum(key[[1]][,1] != "gold" & results[,6] != 0) 66 | nongoldcorrect <- c(num/denum, paste0(num, ' / ', denum)) 67 | message(paste0(num, ' / ', denum, ' non-gold-standard HITs are answered correct')) 68 | 69 | output <- list(goldcorrect, goldcorrectbyworker, nongoldcorrect) 70 | names(output) <- c("Gold-standard HIT Correct Rate", 71 | "Gold-standard HIT Correct Rate by Workers", 72 | "Non-gold-standard HIT Correct Rate") 73 | return(output) 74 | } 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /R/getResults.R: -------------------------------------------------------------------------------- 1 | #' Get results from Mturk 2 | #' 3 | #' @details 4 | #' this function works for complete or incomplete batches 5 | #' 6 | #' @param batch_id any number or string to annotate the batch 7 | #' @param hit_ids hit ids returned from the MTurk API, i.e., output of sendTasks() 8 | #' @param retry if TRUE, retry retriving results from Mturk API five times; default to TRUE 9 | #' @param retry_in_seconds default to 60 seconds 10 | #' @param AWS_id AWS_ACCESS_KEY_ID 11 | #' @param AWS_secret AWS_SECRET_ACCESS_KEY 12 | #' @param sandbox sanbox setting 13 | #' 14 | #' @import pyMTurkR 15 | #' 16 | #' @export 17 | 18 | getResults <- function(batch_id = "unspecified", 19 | hit_ids, 20 | retry = TRUE, 21 | retry_in_seconds = 60, 22 | AWS_id = Sys.getenv("AWS_ACCESS_KEY_ID"), 23 | AWS_secret = Sys.getenv("AWS_SECRET_ACCESS_KEY"), 24 | sandbox = getOption("pyMTurkR.sandbox", TRUE)){ 25 | # check that amazon credentials & sandbox settings apply 26 | if(nchar(Sys.getenv("AWS_ACCESS_KEY_ID")) == 0){ 27 | Sys.setenv(AWS_ACCESS_KEY_ID = AWS_id) 28 | Sys.setenv(AWS_SECRET_ACCESS_KEY = AWS_secret) 29 | } 30 | options(pyMTurkR.sandbox = sandbox) 31 | 32 | # convert all hit ids to character 33 | task_ids <- as.character(hit_ids[[2]][,1]) 34 | mturk_ids <- as.character(hit_ids[[2]][,2]) 35 | 36 | # retrieve results from mturk 37 | raw_results <- data.frame(stringsAsFactors = FALSE) 38 | message('Start getting HITs...') 39 | for(i in 1:length(mturk_ids)){ 40 | turk_data <- suppressMessages(GetAssignment(hit = mturk_ids[i], 41 | get.answers = T)) 42 | if(nrow(turk_data$Answers) == 0){ 43 | this_hit_result <- as.data.frame(cbind(task_ids[i], mturk_ids[i], 0, 0, 0, 0), 44 | stringsAsFactors = FALSE) 45 | } else { 46 | this_hit_result <- as.data.frame(cbind(task_ids[i], mturk_ids[i], 47 | turk_data$Answers$AssignmentId, 48 | turk_data$Answers$WorkerId, 49 | as.numeric(turk_data$Answers$FreeText), 50 | as.character(turk_data$Assignments$SubmitTime)), 51 | stringsAsFactors = FALSE) 52 | } 53 | raw_results <- rbind(raw_results, this_hit_result) 54 | } 55 | 56 | results <- cbind(batch_id, raw_results, stringsAsFactors = FALSE) 57 | colnames(results) <- c("batch_id", "local_task_id", "mturk_hit_id", "assignment_id", "worker_id", "result", "completed_at") 58 | 59 | n_results <- sum(results$result != 0) 60 | if(n_results == length(mturk_ids)){ 61 | message(paste0('All ', n_results, ' HITs retrieved')) 62 | } else { 63 | message(paste0(n_results, ' / ', length(mturk_ids), ' results retrieved')) 64 | if(retry == T){ 65 | Sys.sleep(retry_in_seconds) 66 | return(getResults(batch_id, 67 | hit_ids, 68 | retry, 69 | retry_in_seconds, 70 | AWS_id, 71 | AWS_secret, 72 | sandbox)) 73 | } 74 | } 75 | return(results) 76 | } 77 | -------------------------------------------------------------------------------- /R/mixGold.R: -------------------------------------------------------------------------------- 1 | #' Mix the gold-standard tasks with the tasks need to be validated 2 | #' 3 | #' @param tasks All tasks need to be validated 4 | #' @param golds Gold standard tasks with the same structure 5 | #' 6 | #' @export 7 | 8 | mixGold <- function(tasks, golds){ 9 | if (nrow(tasks)/nrow(golds) != round(nrow(tasks)/nrow(golds))){ 10 | stop("The number of tasks needs to be a multiple of the number of golds.") 11 | } 12 | tasks <- as.data.frame(tasks, stringsAsFactors = F) 13 | placeholder <- seq(0, (nrow(tasks)+nrow(golds) - 1), by = (nrow(tasks)/nrow(golds)+1)) 14 | indices <- sample(1:(nrow(tasks)/nrow(golds)+1), nrow(golds), replace = T) 15 | if(indices[1] == 1){ 16 | indices[1] <- 2 17 | } 18 | for(i in 1:nrow(golds)){ 19 | tasks <- rbind(tasks[0:(placeholder[i]+indices[i]-1),], 20 | golds[i,], 21 | tasks[-(0:(placeholder[i]+indices[i]-1)),]) 22 | } 23 | tasks <- cbind(tasks, 1:nrow(tasks)) 24 | row.names(tasks) <- 1:nrow(tasks) 25 | colnames(tasks)[ncol(tasks)] <- "id" 26 | return(tasks) 27 | } 28 | -------------------------------------------------------------------------------- /R/pickLabel.R: -------------------------------------------------------------------------------- 1 | #' Pick the optimal label from candidate labels 2 | #' 3 | #' @details 4 | #' Users need to specify four plausible labels for each topic 5 | #' 6 | #' @param n The number of desired tasks 7 | #' @param text.predict A data frame or matrix containing both the text and the indicator(s) 8 | #' of the model predicted topic(s). 9 | #' @param text.name variable name in `text.predict` that indicates the text 10 | #' @param top1.name variable name in `text.predict` that indicates the top1 model predicted topic 11 | #' @param labels.index The topic index in correspondence with the labels, e.g., c(10, 12, 15). 12 | #' @param candidate.labels A list of vectors containing the user-defined labels assigned to the topics, 13 | #' Must be in the same length and order with `labels.index`. 14 | #' 15 | #' @export 16 | 17 | pickLabel <- function(n, text.predict = NULL, text.name = "text", 18 | top1.name = "top1", 19 | labels.index = NULL, candidate.labels = NULL){ 20 | if(!(is.data.frame(text.predict) | is.matrix(text.predict))){ 21 | stop("\"text.predict\" needs to be a data.frame or matrix") 22 | } 23 | if(!(text.name %in% colnames(text.predict))){ 24 | stop("Please specify the correct variable name for text.") 25 | } 26 | if(!(top1.name %in% colnames(text.predict))){ 27 | stop("Please specify the correct variable name for the model predicted most likely topic.") 28 | } 29 | text <- text.predict[,text.name] 30 | top1 <- text.predict[,top1.name] 31 | if(!all(labels.index %in% unique(top1))){ 32 | stop("Some topic (labels.index) does not have any corresponding representative text. 33 | Consider removing that topic.") 34 | } 35 | if(length(candidate.labels) != length(labels.index)){ 36 | stop("\"candidate.labels\" and \"labels.index\" have to be of the same length and in the exact same order.") 37 | } 38 | topic <- rep(labels.index, length.out = n) 39 | # define the output 40 | out <- matrix(NA, ncol = 6, nrow = n) 41 | colnames(out) <- c('topic', 'doc', 'opt1', 'opt2', 'opt3', 'optcrt') 42 | for(i in 1:n){ 43 | # sample document by topic 44 | k <- topic[i] 45 | doc <- gsub('\n', '
', sample(text[top1 == k], 1)) 46 | # put together a task 47 | out[i,] <- c(k, doc, unlist(candidate.labels[labels.index == k])) 48 | } 49 | return(out) 50 | } -------------------------------------------------------------------------------- /R/plotResults.R: -------------------------------------------------------------------------------- 1 | #' Plot results 2 | #' 3 | #' @details 4 | #' Visualize the accuracy rate (proportion correct) for a specified batch 5 | #' 6 | #' @param path path to store the plot 7 | #' @param x a vector of counts of successes; could be obtained from getResults() 8 | #' @param n a vector of counts of trials 9 | #' @param taskname the name of the task for labeling, e.g., Word Intrusion, Optimal Label. 10 | #' 11 | #' @export 12 | 13 | plotResults <- function(path, x, n, taskname, ...){ 14 | pdf(path, width = 3, height = 7) 15 | par(mgp = c(1.5, 0, 0), mar = c(2, 3, .7, .7)) 16 | plot(NULL, 17 | main = NA, 18 | ylim=c(0, 1.02), 19 | xlim = c(0.8, 1.2), 20 | ylab = "Proportion Correct", 21 | xlab = NA, 22 | cex.lab = 1.2, 23 | axes = F, ...) 24 | axis(side = 2, at = seq(0, 1, by = 0.2), col.ticks = NA, cex.axis = 1.2) 25 | axis(side = 1, at = 1, 26 | labels = taskname, 27 | las = 1, 28 | col = NA, 29 | col.ticks = NA, 30 | cex.axis = 1.2) 31 | # legend(0.55, 1.03, 32 | # c("Model"), 33 | # col = c(blue), 34 | # lty = 1, 35 | # lwd = 3, 36 | # cex = 1.2, 37 | # bty = 'n') 38 | 39 | # ---------------------------------------------------------------- 40 | # abline(h = 0.25, col = "gray", lty = 1, lwd = 2) 41 | abline(h = 0.5, col = "gray", lty = 1, lwd = 2) 42 | 43 | # -------------------------------- bars --------------------------------- 44 | first <- x[1]/n[1] 45 | second <- x[2]/n[2] 46 | pool <- (first + second)/2 47 | 48 | points(x = 1, y = first, pch = 20, col = "blue", cex = 1.5) 49 | segments(x0 = 1, y0 = first-1.96*sqrt(first*(1-first)/n[1]), x1 = 1, y1 = first+1.96*sqrt(first*(1-first)/n[1]), col = "blue", lty = 2, lwd = 2) 50 | points(x = 1.02, y = second, pch = 20, col = "blue", lty = 2, cex = 1.5) 51 | segments(x0 = 1.02, y0 = second-1.96*sqrt(second*(1-second)/n[2]), x1 = 1.02, y1 = second+1.96*sqrt(second*(1-second)/n[2]), col = "blue", lty = 2, lwd = 2) 52 | points(x = 1.05, y = pool, pch = 20, col = "blue", cex = 2) 53 | segments(x0 = 1.05, y0 = pool-1.96*sqrt(pool*(1-pool)/(n[1]+n[2])), x1 = 1.05, y1 = pool+1.96*sqrt(pool*(1-pool)/(n[1]+n[2])), col = "blue", lwd = 3) 54 | 55 | dev.off() 56 | } -------------------------------------------------------------------------------- /R/recordTasks.R: -------------------------------------------------------------------------------- 1 | #' Reform tasks to facilitate sending to Mturk 2 | #' 3 | #' @details 4 | #' Randomize the order of options and record the tasks in a specified local directory 5 | #' 6 | #' @param type one of WI, T8WSI, R4WSI 7 | #' @param tasks outputs from validateTopic(), validateLabel(), or mixGold() if users mix in gold-standard HITs 8 | #' @param path path to record the tasks (with meta-information) 9 | #' 10 | #' @export 11 | 12 | recordTasks <- function(type, tasks, path){ 13 | if(type == "WI"){ 14 | optionidx <- 2:6 15 | optRandom <- tasks[,optionidx] 16 | optRandom <- as.data.frame(t(apply(optRandom, 1, function(x) x[sample(length(x))])), 17 | stringsAsFactors = F) 18 | colnames(optRandom) <- paste0("word", 1:length(optionidx)) 19 | } else if (type == "R4WSI0"|type == "T8WSI"|type == "LI"|type == "OL"){ 20 | docindix <- 2 21 | optionidx <- 3:6 22 | optRandom <- tasks[,optionidx] 23 | optRandom <- as.data.frame(t(apply(optRandom, 1, function(x) x[sample(length(x))])), 24 | stringsAsFactors = F) 25 | optRandom <- cbind.data.frame(tasks[,docindix], optRandom, 26 | stringsAsFactors = F) 27 | colnames(optRandom) <- c("passage", paste0("word", 1:length(optionidx))) 28 | } else if (type == "R4WSI"){ 29 | optionidx <- 2:5 30 | optRandom <- tasks[,optionidx] 31 | optRandom <- as.data.frame(t(apply(optRandom, 1, function(x) x[sample(length(x))])), 32 | stringsAsFactors = F) 33 | colnames(optRandom) <- paste0("word", 1:length(optionidx)) 34 | } else { 35 | stop("Please specify a validate task type.") 36 | } 37 | record <- list(tasks, optRandom) 38 | save(record, file = path) 39 | message(paste("Record saved to", path)) 40 | return(record) 41 | } 42 | -------------------------------------------------------------------------------- /R/sendTasks.R: -------------------------------------------------------------------------------- 1 | #' Send prepared task to Mturk and record the API-returned HIT ids. 2 | #' 3 | #' @details 4 | #' Pairs the local ids with Mturk ids and save them to specified paths 5 | #' 6 | #' @param hit_type find from the Mturk requester's dashboard 7 | #' @param hit_layout find from the Mturk requester's dashboard 8 | #' @param type one of WI, T8WSI, R4WSI 9 | #' @param tasksrecord output of recordTasks() 10 | #' @param tasksids ids of tasks to send in numeric form. If left unspecified, the whole batch will be posted 11 | #' @param HITidspath path to record the returned HITids 12 | #' @param n_assignments number of of assignments per task. For the validation tasks, people almost always want 1 13 | #' @param expire_in_seconds default 8 hours 14 | #' @param batch_annotation add if needed 15 | #' 16 | #' @import pyMTurkR 17 | #' 18 | #' @export 19 | 20 | sendTasks <- function(hit_type = NULL, 21 | hit_layout = NULL, 22 | type = NULL, 23 | tasksrecord = NULL, 24 | tasksids = NULL, 25 | HITidspath = NULL, 26 | n_assignments = '1', 27 | expire_in_seconds = as.character(60 * 60 * 8), 28 | batch_annotation = NULL){ 29 | 30 | if(is.null(tasksids)){ 31 | tasksids <- tasksrecord[[1]][,"id"] 32 | } 33 | tasksids <- sort(tasksids) 34 | tosend <- tasksrecord[[2]][tasksrecord[[1]][,"id"] %in% tasksids,] 35 | 36 | if(type == "R4WSI0" | type == "T8WSI" | type == "LI" | type == "OL"){ 37 | hit_param_names <- c('passage', 'word1', 'word2', 'word3', 'word4') 38 | } else if (type == "WI"){ 39 | hit_param_names <- c('word1', 'word2', 'word3', 'word4', 'word5') 40 | } else if (type == "R4WSI"){ 41 | hit_param_names <- c('word1', 'word2', 'word3', 'word4') 42 | } else { 43 | stop("Invalid task types") 44 | } 45 | 46 | current_HIT_ids <- rep(NA, nrow(tosend)) 47 | map_ids <- as.data.frame(matrix(NA, nrow = nrow(tosend), ncol = 2)) 48 | colnames(map_ids) <- c("tasksids", "Mturkids") 49 | message('Sending task to MTurk') 50 | for(i in 1:nrow(tosend)){ 51 | hit_params <- list() 52 | for(j in 1:length(hit_param_names)){ 53 | hit_params[[j]] <- list(Name = hit_param_names[j], 54 | Value = tosend[i, j]) 55 | } 56 | current_HIT_ids[i] <- suppressMessages(CreateHIT(hit.type = hit_type, 57 | hitlayoutid = hit_layout, 58 | hitlayoutparameters = hit_params, 59 | assignments = n_assignments, 60 | expiration = expire_in_seconds, 61 | annotation = batch_annotation, 62 | verbose = FALSE))$HITId 63 | 64 | map_ids[i,] <- cbind(tasksids[i], current_HIT_ids[i]) 65 | } 66 | HITids <- list(current_HIT_ids, map_ids) 67 | save(HITids, file = HITidspath) 68 | message(paste("HITids saved to", HITidspath)) 69 | return(HITids) 70 | } 71 | -------------------------------------------------------------------------------- /R/validateLabel.R: -------------------------------------------------------------------------------- 1 | #' Create validation tasks for labels assigned to the topics in the topic model of choice. 2 | #' 3 | #' @details 4 | #' Users need to pick a topic model that they deem to be good and label the topics 5 | #' they later would like to use as measures. 6 | #' 7 | #' @param type Task structures to be specified. Must be one of "LI" (Label Intrusion) 8 | #' and "OL" (Optimal Label). 9 | #' @param n The number of desired tasks 10 | #' @param text.predict A data frame or matrix containing both the text and the indicator(s) 11 | #' of the model predicted topic(s). 12 | #' @param text.name variable name in `text.predict` that indicates the text 13 | #' @param top1.name variable name in `text.predict` that indicates the top1 model predicted topic 14 | #' @param top2.name variable name in `text.predict` that indicates the top2 model predicted topic 15 | #' @param top3.name variable name in `text.predict` that indicates the top3 model predicted topic 16 | #' @param labels The user-defined labels assigned to the topics 17 | #' @param labels.index The topic index in correspondence with the labels, e.g., c(10, 12, 15). 18 | #' Must be in the same length and order with `label`. 19 | #' @param labels.add Labels from other broad catagories. Default to NULL. Users could 20 | #' specify them to evaluate how well different broad categories are distinguished from 21 | #' one another. 22 | #' 23 | #' @export 24 | 25 | validateLabel <- function(type, n, text.predict = NULL, text.name = "text", 26 | top1.name = "top1", top2.name = "top2", top3.name = "top3", 27 | labels = NULL, labels.index = NULL, labels.add = NULL){ 28 | if(!(is.data.frame(text.predict) | is.matrix(text.predict))){ 29 | stop("\"text.predict\" needs to be a data.frame or matrix") 30 | } 31 | if(type == "OL"){ 32 | if(!(text.name %in% colnames(text.predict))){ 33 | stop("Please specify the correct variable name for text.") 34 | } 35 | if(!(top1.name %in% colnames(text.predict))){ 36 | stop("Please specify the correct variable name for the model predicted most likely topic.") 37 | } 38 | text <- text.predict[,text.name] 39 | top1 <- text.predict[,top1.name] 40 | if(!all(labels.index %in% unique(top1))){ 41 | stop("Some topic (labels.index) does not have any corresponding representative text. 42 | Consider removing that topic.") 43 | } 44 | if(length(labels) != length(labels.index)){ 45 | stop("\"labels\" and \"labels.index\" have to be of the same length and in the exact same order.") 46 | } 47 | topic <- rep(labels.index, length.out = n) 48 | # define the output 49 | out <- matrix(NA, ncol = 6, nrow = n) 50 | colnames(out) <- c('topic', 'doc', 'opt1', 'opt2', 'opt3', 'optcrt') 51 | for(i in 1:n){ 52 | # sample document by topic 53 | k <- topic[i] 54 | doc <- gsub('\n', '
', sample(text[top1 == k], 1)) 55 | # prepare labels 56 | best.label <- labels[labels.index == k] 57 | if (is.null(labels.add)){ 58 | intr.labels <- sample(labels[labels.index != k], 3) 59 | } else { 60 | intr.labels <- sample(c(labels[labels.index != k], labels.add), 3) 61 | } 62 | # put together a task 63 | out[i,] <- c(k, doc, intr.labels[1], intr.labels[2], intr.labels[3], best.label) 64 | } 65 | } else if (type == "LI"){ 66 | if(!(text.name %in% colnames(text.predict))){ 67 | stop("Please specify the correct variable name for text.") 68 | } 69 | if(!(top1.name %in% colnames(text.predict) & 70 | top2.name %in% colnames(text.predict) & 71 | top3.name %in% colnames(text.predict))){ 72 | stop("Please specify the correct variable names for the model predicted top3 topics.") 73 | } 74 | if(!all(labels.index %in% unique(c(text.predict[,c(top1.name)], 75 | text.predict[,c(top2.name)], 76 | text.predict[,c(top3.name)])))){ 77 | warning("Some topic (labels.index) does not have any corresponding representative text. 78 | Consider removing that topic.") 79 | } 80 | if(!all(unique(c(text.predict[,c(top1.name)], 81 | text.predict[,c(top2.name)], 82 | text.predict[,c(top3.name)])) %in% labels.index)){ 83 | stop("The top3 topics associated with some text are not all relevant. 84 | Consider refining the text pool.") 85 | } 86 | if(length(labels) != length(labels.index)){ 87 | stop("\"labels\" and \"labels.index\" have to be of the same length and in the exact same order.") 88 | } 89 | # define the output 90 | out <- matrix(NA, ncol = 6, nrow = n) 91 | colnames(out) <- c('topic', 'doc', 'opt1', 'opt2', 'opt3', 'optcrt') 92 | for(i in 1:n){ 93 | # randomly sample a row from the pool 94 | doc.idx <- sample(1:nrow(text.predict), 1) 95 | # prepare doc and labels 96 | doc <- gsub('\n', '
', text.predict[doc.idx, text.name]) 97 | label1 <- labels[labels.index == text.predict[doc.idx, top1.name]] 98 | label2 <- labels[labels.index == text.predict[doc.idx, top2.name]] 99 | label3 <- labels[labels.index == text.predict[doc.idx, top3.name]] 100 | pred3 <- unlist(text.predict[doc.idx, c(top1.name, top2.name, top3.name)]) 101 | if (is.null(labels.add)){ 102 | intr.label <- sample(labels[!(labels.index %in% pred3)], 1) 103 | } else { 104 | intr.label <- sample(c(labels[!(labels.index %in% pred3)], labels.add), 1) 105 | } 106 | # put together the question 107 | out[i,] <- c(toString(pred3), doc, label1, label2, label3, intr.label) 108 | } 109 | } else { 110 | stop("Please specify a valid task structure.") 111 | } 112 | return(out) 113 | } -------------------------------------------------------------------------------- /R/validateTopic.R: -------------------------------------------------------------------------------- 1 | #' Create validation tasks for topic model selection 2 | #' 3 | #' @details 4 | #' Users need to fit their own topic models. 5 | #' 6 | #' @param type Task structures to be specified. Must be one of "WI" (word intrusion), 7 | #' "T8WSI" (top 8 word set intrusion), and "R4WSI" (random 4 word set intrusion). 8 | #' @param n The number of desired tasks 9 | #' @param text The pool of documents to be shown to the Mturk workers 10 | #' @param vocab A character vector specifying the words in the corpus. Usually, it 11 | #' can be found in topic model output. 12 | #' @param beta A matrix of word probabilities for each topic. Each row represents a 13 | #' topic and each column represents a word. Note this should not be in the logged form. 14 | #' @param theta A matrix of topic proportions. Each row represents a document and each 15 | #' clums represents a topic. Must be specified if task = "T8WSI" or "R4WSI". 16 | #' @param thres the threshold to draw words from, default to top 50 words. 17 | #' 18 | #' @export 19 | 20 | validateTopic <- function(type, n, text = NULL, vocab, beta, theta = NULL, thres = 20){ 21 | if (type == "WI"){ 22 | if (is.vector(vocab)){ 23 | vocab <- matrix(vocab, nrow = nrow(beta), ncol = length(vocab), byrow = T) 24 | } 25 | if (ncol(vocab) != ncol(beta)){ 26 | stop("beta matrix does not correspond with the vocabulary.") 27 | } 28 | orderbeta <- t(apply(beta, 1, order, decreasing = TRUE)) 29 | topic <- rep(1:nrow(beta), length.out = n) 30 | out <- matrix(NA, ncol = 6, nrow = n) 31 | colnames(out) <- c('topic', 'opt1', 'opt2', 'opt3', 'opt4', 'optcrt') 32 | for(i in 1:n){ 33 | k <- topic[i] 34 | non.intr <- as.character(sample(vocab[k, orderbeta[k, 1:thres]], 4, 35 | prob = beta[k, orderbeta[k, 1:thres]])) 36 | intr.k <- sample((1:nrow(beta))[-k], 1) 37 | intr <- as.character(sample(vocab[intr.k, orderbeta[intr.k, 1:thres]], 1, 38 | prob = beta[intr.k, orderbeta[intr.k, 1:thres]])) 39 | out[i,] <- c(k, non.intr, intr) 40 | } 41 | } else if (type == "T8WSI"){ 42 | if (length(text) != nrow(theta)){ 43 | stop("theta matrix does not correspond with the documents.") 44 | } 45 | out <- matrix(NA, ncol = 6, nrow = n) 46 | colnames(out) <- c('topic', 'doc', 'opt1', 'opt2', 'opt3', 'optcrt') 47 | topwords <- lapply(1:nrow(beta), 48 | function(x) toString(vocab[x, order(beta[x,], decreasing = T)][1:8])) 49 | for(i in 1:n){ 50 | doc.idx <- sample(1:length(text), 1) 51 | # doc <- paste('

', gsub('\n', '
', text[doc.idx]), '

') 52 | doc <- gsub('\n', '
', text[doc.idx]) 53 | pred3 <- order(theta[doc.idx,], decreasing = T)[1:3] 54 | intr <- sample(order(theta[doc.idx,], decreasing = T)[-(1:3)], 1) 55 | out[i,] <- c(toString(pred3), doc, 56 | topwords[[pred3[1]]], topwords[[pred3[2]]], 57 | topwords[[pred3[3]]], topwords[[intr]]) 58 | } 59 | } else if (type == "R4WSI0"){ 60 | if (ncol(vocab) != ncol(beta)){ 61 | stop("beta matrix does not correspond with the vocabulary.") 62 | } 63 | if (length(text) != nrow(theta)){ 64 | stop("theta matrix does not correspond with the documents.") 65 | } 66 | pred1 <- t(apply(theta, 1, order, decreasing = T))[,1] # predict top 1 topic for each doc 67 | topic <- rep(sort(unique(pred1)), length.out = n) 68 | orderbeta <- t(apply(beta, 1, order, decreasing = TRUE)) 69 | out <- matrix(NA, ncol = 6, nrow = n) 70 | colnames(out) <- c('topic', 'doc', 'opt1', 'opt2', 'opt3', 'optcrt') 71 | for(i in 1:n){ 72 | k <- topic[i] 73 | # doc <- paste('

', gsub('\n', '
', sample(text[pred1 == k], 1)), '

') 74 | doc <- gsub('\n', '
', sample(text[pred1 == k], 1)) 75 | non.intr <- as.character(sample(vocab[k, orderbeta[k, 1:thres]], 12, 76 | prob = beta[k, orderbeta[k, 1:thres]])) 77 | intr.k <- sample((1:nrow(beta))[-k], 1) 78 | intr <- as.character(sample(vocab[intr.k, orderbeta[intr.k, 1:thres]], 4, 79 | prob = beta[intr.k, orderbeta[intr.k, 1:thres]])) 80 | asgn.n.intr <- sample(c(rep(1:3, 4))) 81 | out[i,] <- c(k, doc, 82 | toString(non.intr[asgn.n.intr==1]), 83 | toString(non.intr[asgn.n.intr==2]), 84 | toString(non.intr[asgn.n.intr==3]), 85 | toString(intr)) 86 | } 87 | } else if (type == "R4WSI"){ 88 | if (is.vector(vocab)){ 89 | vocab <- matrix(vocab, nrow = nrow(beta), ncol = length(vocab), byrow = T) 90 | } 91 | if (ncol(vocab) != ncol(beta)){ 92 | stop("beta matrix does not correspond with the vocabulary.") 93 | } 94 | orderbeta <- t(apply(beta, 1, order, decreasing = TRUE)) 95 | topic <- rep(1:nrow(beta), length.out = n) 96 | out <- matrix(NA, ncol = 5, nrow = n) 97 | colnames(out) <- c('topic', 'opt1', 'opt2', 'opt3', 'optcrt') 98 | for(i in 1:n){ 99 | k <- topic[i] 100 | non.intr <- as.character(sample(vocab[k, orderbeta[k, 1:thres]], 12, 101 | prob = beta[k, orderbeta[k, 1:thres]])) 102 | intr.k <- sample((1:nrow(beta))[-k], 1) 103 | intr <- as.character(sample(vocab[intr.k, orderbeta[intr.k, 1:thres]], 4, 104 | prob = beta[intr.k, orderbeta[intr.k, 1:thres]])) 105 | asgn.n.intr <- sample(c(rep(1:3, 4))) 106 | out[i,] <- c(k, 107 | toString(non.intr[asgn.n.intr==1]), 108 | toString(non.intr[asgn.n.intr==2]), 109 | toString(non.intr[asgn.n.intr==3]), 110 | toString(intr)) 111 | } 112 | } else { 113 | stop("Please specify a valid task structure.") 114 | } 115 | return(out) 116 | } 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## validateIt: An R Package for Topic and Label Validation 2 | 3 | Authors: [Luwei Ying](http://luweiying.org), [Jacob Montgomery](https://pages.wustl.edu/montgomery) and [Brandon Stewart](http://brandonstewart.org) 4 | 5 | Please email all comments/questions to luwei.ying [AT] wustl.edu 6 | 7 | ### Installation Instructions 8 | The package is currently not available on CRAN. You can install the most recent development version using the devtools package. First you have to install devtools using the following code. Note that you only have to do this once 9 | 10 | ``` 11 | if(!require(devtools)) install.packages("devtools") 12 | ``` 13 | 14 | Then you'll need to install the dependency, "pyMTurkR", using the function `install_github`. If you need to interact with Mturk, be sure to follow the detailed instructions here: github.com/cloudyr/pyMTurkR 15 | ``` 16 | devtools::install_github("cloudyr/pyMTurkR") 17 | ``` 18 | 19 | Now you can install our package 20 | 21 | ``` 22 | devtools::install_github("Luwei-Ying/validateIt", dependencies=TRUE) 23 | ``` 24 | 25 | Note that this will install all the packages suggested and required to run our package. It may take a few minutes the first time, but this only needs to be done on the first use. In the future you can update to the most recent development version using the same code. 26 | -------------------------------------------------------------------------------- /data/R4WSItasktest.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luwei-Ying/validateIt/8f2da445e0c81c8418444e2d0fc9df34fb2a3077/data/R4WSItasktest.rda -------------------------------------------------------------------------------- /data/allR4WSItasktest.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luwei-Ying/validateIt/8f2da445e0c81c8418444e2d0fc9df34fb2a3077/data/allR4WSItasktest.rda -------------------------------------------------------------------------------- /data/goldR4WSItest.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luwei-Ying/validateIt/8f2da445e0c81c8418444e2d0fc9df34fb2a3077/data/goldR4WSItest.rda -------------------------------------------------------------------------------- /data/heldouttest.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luwei-Ying/validateIt/8f2da445e0c81c8418444e2d0fc9df34fb2a3077/data/heldouttest.rda -------------------------------------------------------------------------------- /data/keypostedtest.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luwei-Ying/validateIt/8f2da445e0c81c8418444e2d0fc9df34fb2a3077/data/keypostedtest.rda -------------------------------------------------------------------------------- /data/masstest.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luwei-Ying/validateIt/8f2da445e0c81c8418444e2d0fc9df34fb2a3077/data/masstest.rda -------------------------------------------------------------------------------- /data/modtest.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luwei-Ying/validateIt/8f2da445e0c81c8418444e2d0fc9df34fb2a3077/data/modtest.rda -------------------------------------------------------------------------------- /data/recordtest.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luwei-Ying/validateIt/8f2da445e0c81c8418444e2d0fc9df34fb2a3077/data/recordtest.rda -------------------------------------------------------------------------------- /data/resultstest.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luwei-Ying/validateIt/8f2da445e0c81c8418444e2d0fc9df34fb2a3077/data/resultstest.rda -------------------------------------------------------------------------------- /data/stmPreptest.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luwei-Ying/validateIt/8f2da445e0c81c8418444e2d0fc9df34fb2a3077/data/stmPreptest.rda -------------------------------------------------------------------------------- /man/checkAgree.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/checkAgree.R 3 | \name{checkAgree} 4 | \alias{checkAgree} 5 | \title{Check Agreement Rate between Identical Trails} 6 | \usage{ 7 | checkAgree(results1, results2, key, type = NULL) 8 | } 9 | \arguments{ 10 | \item{results1}{first batch of results; outputs from getResults()} 11 | 12 | \item{results2}{first batch of results; outputs from getResults()} 13 | 14 | \item{key}{the local task record; outputs from recordTasks()} 15 | 16 | \item{type}{Task structures to be specified. Must be one of "WI" (word intrusion), 17 | "T8WSI" (top 8 word set intrusion), "R4WSI" (random 4 word set intrusion), 18 | "LI" (Label Intrusion), and "OL" (Optimal Label)} 19 | } 20 | \description{ 21 | Check Agreement Rate between Identical Trails 22 | } 23 | \details{ 24 | Evaluate workers' performance by agreement rate between identical trails 25 | (Notice that this means the two input, results1 and results2, must be identical.); 26 | Return 1) the exact agreement rate when both workers agree on the exact same choice, and 27 | 2) the binary agreement rate when both workers get the task either right or wrong simultaneously 28 | } 29 | -------------------------------------------------------------------------------- /man/combMass.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/combMass.R 3 | \name{combMass} 4 | \alias{combMass} 5 | \title{Combine the mass of words with the same root} 6 | \usage{ 7 | combMass(mod = NULL, vocab = NULL, beta = NULL) 8 | } 9 | \arguments{ 10 | \item{mod}{Fitted structural topic models.} 11 | 12 | \item{vocab}{A character vector specifying the words in the corpus. Usually, it 13 | can be found in topic model output.} 14 | 15 | \item{beta}{A matrix of word probabilities for each topic. Each row represents a 16 | topic and each column represents a word. Note this should not be in the logged form.} 17 | } 18 | \description{ 19 | Combine the mass of words with the same root 20 | } 21 | \details{ 22 | Use as a preparing step for validating unstemmed topic models. 23 | } 24 | -------------------------------------------------------------------------------- /man/evalResults.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/evalResults.R 3 | \name{evalResults} 4 | \alias{evalResults} 5 | \title{Evaluate results} 6 | \usage{ 7 | evalResults(results, key, type = NULL) 8 | } 9 | \arguments{ 10 | \item{results}{results of human choice; outputs from getResults()} 11 | 12 | \item{key}{the local task record; outputs form recordTasks()} 13 | 14 | \item{type}{Task structures to be specified. Must be one of "WI" (word intrusion), 15 | "T8WSI" (top 8 word set intrusion), "R4WSI" (random 4 word set intrusion), 16 | "LI" (Label Intrusion), and "OL" (Optimal Label)} 17 | } 18 | \description{ 19 | Evaluate results 20 | } 21 | \details{ 22 | Evaluate worker performance by gold-standard HITs; 23 | Return the accuracy rate (proportion correct) for a specified batch 24 | } 25 | -------------------------------------------------------------------------------- /man/getResults.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getResults.R 3 | \name{getResults} 4 | \alias{getResults} 5 | \title{Get results from Mturk} 6 | \usage{ 7 | getResults( 8 | batch_id = "unspecified", 9 | hit_ids, 10 | retry = TRUE, 11 | retry_in_seconds = 60, 12 | AWS_id = Sys.getenv("AWS_ACCESS_KEY_ID"), 13 | AWS_secret = Sys.getenv("AWS_SECRET_ACCESS_KEY"), 14 | sandbox = getOption("pyMTurkR.sandbox", TRUE) 15 | ) 16 | } 17 | \arguments{ 18 | \item{batch_id}{any number or string to annotate the batch} 19 | 20 | \item{hit_ids}{hit ids returned from the MTurk API, i.e., output of sendTasks()} 21 | 22 | \item{retry}{if TRUE, retry retriving results from Mturk API five times; default to TRUE} 23 | 24 | \item{retry_in_seconds}{default to 60 seconds} 25 | 26 | \item{AWS_id}{AWS_ACCESS_KEY_ID} 27 | 28 | \item{AWS_secret}{AWS_SECRET_ACCESS_KEY} 29 | 30 | \item{sandbox}{sanbox setting} 31 | } 32 | \description{ 33 | Get results from Mturk 34 | } 35 | \details{ 36 | this function works for complete or incomplete batches 37 | } 38 | -------------------------------------------------------------------------------- /man/mixGold.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mixGold.R 3 | \name{mixGold} 4 | \alias{mixGold} 5 | \title{Mix the gold-standard tasks with the tasks need to be validated} 6 | \usage{ 7 | mixGold(tasks, golds) 8 | } 9 | \arguments{ 10 | \item{tasks}{All tasks need to be validated} 11 | 12 | \item{golds}{Gold standard tasks with the same structure} 13 | } 14 | \description{ 15 | Mix the gold-standard tasks with the tasks need to be validated 16 | } 17 | -------------------------------------------------------------------------------- /man/pickLabel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pickLabel.R 3 | \name{pickLabel} 4 | \alias{pickLabel} 5 | \title{Pick the optimal label from candidate labels} 6 | \usage{ 7 | pickLabel( 8 | n, 9 | text.predict = NULL, 10 | text.name = "text", 11 | top1.name = "top1", 12 | labels.index = NULL, 13 | candidate.labels = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{n}{The number of desired tasks} 18 | 19 | \item{text.predict}{A data frame or matrix containing both the text and the indicator(s) 20 | of the model predicted topic(s).} 21 | 22 | \item{text.name}{variable name in `text.predict` that indicates the text} 23 | 24 | \item{top1.name}{variable name in `text.predict` that indicates the top1 model predicted topic} 25 | 26 | \item{labels.index}{The topic index in correspondence with the labels, e.g., c(10, 12, 15).} 27 | 28 | \item{candidate.labels}{A list of vectors containing the user-defined labels assigned to the topics, 29 | Must be in the same length and order with `labels.index`.} 30 | } 31 | \description{ 32 | Pick the optimal label from candidate labels 33 | } 34 | \details{ 35 | Users need to specify four plausible labels for each topic 36 | } 37 | -------------------------------------------------------------------------------- /man/plotResults.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotResults.R 3 | \name{plotResults} 4 | \alias{plotResults} 5 | \title{Plot results} 6 | \usage{ 7 | plotResults(path, x, n, taskname, ...) 8 | } 9 | \arguments{ 10 | \item{path}{path to store the plot} 11 | 12 | \item{x}{a vector of counts of successes; could be obtained from getResults()} 13 | 14 | \item{n}{a vector of counts of trials} 15 | 16 | \item{taskname}{the name of the task for labeling, e.g., Word Intrusion, Optimal Label.} 17 | } 18 | \description{ 19 | Plot results 20 | } 21 | \details{ 22 | Visualize the accuracy rate (proportion correct) for a specified batch 23 | } 24 | -------------------------------------------------------------------------------- /man/recordTasks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/recordTasks.R 3 | \name{recordTasks} 4 | \alias{recordTasks} 5 | \title{Reform tasks to facilitate sending to Mturk} 6 | \usage{ 7 | recordTasks(type, tasks, path) 8 | } 9 | \arguments{ 10 | \item{type}{one of WI, T8WSI, R4WSI} 11 | 12 | \item{tasks}{outputs from validateTopic(), validateLabel(), or mixGold() if users mix in gold-standard HITs} 13 | 14 | \item{path}{path to record the tasks (with meta-information)} 15 | } 16 | \description{ 17 | Reform tasks to facilitate sending to Mturk 18 | } 19 | \details{ 20 | Randomize the order of options and record the tasks in a specified local directory 21 | } 22 | -------------------------------------------------------------------------------- /man/sendTasks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sendTasks.R 3 | \name{sendTasks} 4 | \alias{sendTasks} 5 | \title{Send prepared task to Mturk and record the API-returned HIT ids.} 6 | \usage{ 7 | sendTasks( 8 | hit_type = NULL, 9 | hit_layout = NULL, 10 | type = NULL, 11 | tasksrecord = NULL, 12 | tasksids = NULL, 13 | HITidspath = NULL, 14 | n_assignments = "1", 15 | expire_in_seconds = as.character(60 * 60 * 8), 16 | batch_annotation = NULL 17 | ) 18 | } 19 | \arguments{ 20 | \item{hit_type}{find from the Mturk requester's dashboard} 21 | 22 | \item{hit_layout}{find from the Mturk requester's dashboard} 23 | 24 | \item{type}{one of WI, T8WSI, R4WSI} 25 | 26 | \item{tasksrecord}{output of recordTasks()} 27 | 28 | \item{tasksids}{ids of tasks to send in numeric form. If left unspecified, the whole batch will be posted} 29 | 30 | \item{HITidspath}{path to record the returned HITids} 31 | 32 | \item{n_assignments}{number of of assignments per task. For the validation tasks, people almost always want 1} 33 | 34 | \item{expire_in_seconds}{default 8 hours} 35 | 36 | \item{batch_annotation}{add if needed} 37 | } 38 | \description{ 39 | Send prepared task to Mturk and record the API-returned HIT ids. 40 | } 41 | \details{ 42 | Pairs the local ids with Mturk ids and save them to specified paths 43 | } 44 | -------------------------------------------------------------------------------- /man/validateLabel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validateLabel.R 3 | \name{validateLabel} 4 | \alias{validateLabel} 5 | \title{Create validation tasks for labels assigned to the topics in the topic model of choice.} 6 | \usage{ 7 | validateLabel( 8 | type, 9 | n, 10 | text.predict = NULL, 11 | text.name = "text", 12 | top1.name = "top1", 13 | top2.name = "top2", 14 | top3.name = "top3", 15 | labels = NULL, 16 | labels.index = NULL, 17 | labels.add = NULL 18 | ) 19 | } 20 | \arguments{ 21 | \item{type}{Task structures to be specified. Must be one of "LI" (Label Intrusion) 22 | and "OL" (Optimal Label).} 23 | 24 | \item{n}{The number of desired tasks} 25 | 26 | \item{text.predict}{A data frame or matrix containing both the text and the indicator(s) 27 | of the model predicted topic(s).} 28 | 29 | \item{text.name}{variable name in `text.predict` that indicates the text} 30 | 31 | \item{top1.name}{variable name in `text.predict` that indicates the top1 model predicted topic} 32 | 33 | \item{top2.name}{variable name in `text.predict` that indicates the top2 model predicted topic} 34 | 35 | \item{top3.name}{variable name in `text.predict` that indicates the top3 model predicted topic} 36 | 37 | \item{labels}{The user-defined labels assigned to the topics} 38 | 39 | \item{labels.index}{The topic index in correspondence with the labels, e.g., c(10, 12, 15). 40 | Must be in the same length and order with `label`.} 41 | 42 | \item{labels.add}{Labels from other broad catagories. Default to NULL. Users could 43 | specify them to evaluate how well different broad categories are distinguished from 44 | one another.} 45 | } 46 | \description{ 47 | Create validation tasks for labels assigned to the topics in the topic model of choice. 48 | } 49 | \details{ 50 | Users need to pick a topic model that they deem to be good and label the topics 51 | they later would like to use as measures. 52 | } 53 | -------------------------------------------------------------------------------- /man/validateTopic.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validateTopic.R 3 | \name{validateTopic} 4 | \alias{validateTopic} 5 | \title{Create validation tasks for topic model selection} 6 | \usage{ 7 | validateTopic(type, n, text = NULL, vocab, beta, theta = NULL, thres = 20) 8 | } 9 | \arguments{ 10 | \item{type}{Task structures to be specified. Must be one of "WI" (word intrusion), 11 | "T8WSI" (top 8 word set intrusion), and "R4WSI" (random 4 word set intrusion).} 12 | 13 | \item{n}{The number of desired tasks} 14 | 15 | \item{text}{The pool of documents to be shown to the Mturk workers} 16 | 17 | \item{vocab}{A character vector specifying the words in the corpus. Usually, it 18 | can be found in topic model output.} 19 | 20 | \item{beta}{A matrix of word probabilities for each topic. Each row represents a 21 | topic and each column represents a word. Note this should not be in the logged form.} 22 | 23 | \item{theta}{A matrix of topic proportions. Each row represents a document and each 24 | clums represents a topic. Must be specified if task = "T8WSI" or "R4WSI".} 25 | 26 | \item{thres}{the threshold to draw words from, default to top 50 words.} 27 | } 28 | \description{ 29 | Create validation tasks for topic model selection 30 | } 31 | \details{ 32 | Users need to fit their own topic models. 33 | } 34 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | Sys.setenv("R_TESTS" = "") 2 | library(testthat) 3 | library(validateIt) 4 | 5 | test_check("validateIt") 6 | -------------------------------------------------------------------------------- /tests/testthat/testcombMass.R: -------------------------------------------------------------------------------- 1 | load("../../data/modtest.rda") 2 | 3 | combMass_slow <- function(mod = NULL){ 4 | vocab <- mod$vocab 5 | rawbeta <- exp(mod$beta$logbeta[[1]]) 6 | stemmed_vocab <- stemDocument(vocab) 7 | newvocab <- matrix(NA, nrow = nrow(rawbeta), ncol = length(unique(stemmed_vocab))) 8 | newbeta <- matrix(NA, nrow = nrow(rawbeta), ncol = length(unique(stemmed_vocab))) 9 | for (i in 1:nrow(rawbeta)){ 10 | mapping <- cbind.data.frame(vocab, stemmed_vocab, rawbeta[i,], stringsAsFactors = F) 11 | for(j in 1:length(unique(stemmed_vocab))){ 12 | group <- mapping[mapping$stemmed_vocab == unique(stemmed_vocab)[j],] 13 | newvocab[i, j] <- as.character(group$vocab[which.max(group[,3])]) 14 | newbeta[i, j] <- sum(group[,3]) 15 | } 16 | } 17 | return(list(newvocab, newbeta)) 18 | } 19 | 20 | test_that("combMass() works as expected",{ 21 | out1 <- combMass(modtest) 22 | out2 <- combMass_slow(modtest) 23 | 24 | expect_equal(nrow(out1[[1]]), nrow(out1[[2]])) 25 | expect_equal(ncol(out1[[1]]), ncol(out1[[2]])) 26 | expect_identical(out1[[1]][1,], out1[[1]][1,]) 27 | expect_identical(out1[[1]][2,], out1[[1]][2,]) 28 | expect_identical(out1[[1]][3,], out1[[1]][3,]) 29 | expect_identical(out1[[2]][1,], out1[[2]][1,]) 30 | expect_identical(out1[[2]][2,], out1[[2]][2,]) 31 | expect_identical(out1[[2]][3,], out1[[2]][3,]) 32 | }) 33 | -------------------------------------------------------------------------------- /tests/testthat/testevalResults.R: -------------------------------------------------------------------------------- 1 | load("../../data/resultstest.rda") 2 | load("../../data/keypostedtest.rda") 3 | 4 | evaluations <- evalResults(results = resultstest, 5 | key = keypostedtest, 6 | type = "R4WSI") 7 | 8 | test_that("Evaluations in the correct format",{ 9 | expect_length(evaluations, 3) 10 | expect_is(evaluations$`Gold-standard HIT Correct Rate by Workers`, 11 | "table") 12 | }) 13 | -------------------------------------------------------------------------------- /tests/testthat/testmixGold.R: -------------------------------------------------------------------------------- 1 | load("../../data/R4WSItasktest.rda") 2 | load("../../data/goldR4WSItest.rda") 3 | 4 | test_that("Test mixGold",{ 5 | allR4WSItasktest <- mixGold(tasks = R4WSItasktest, golds = goldR4WSItest) 6 | 7 | expect_is(allR4WSItasktest, "data.frame") 8 | expect_is(allR4WSItasktest[,2], "character") 9 | expect_is(allR4WSItasktest[,3], "character") 10 | expect_is(allR4WSItasktest[,4], "character") 11 | expect_is(allR4WSItasktest[,5], "character") 12 | expect_is(allR4WSItasktest[,6], "character") 13 | expect_is(allR4WSItasktest[,7], "integer") 14 | expect_equal(length(allR4WSItasktest$id), length(unique(allR4WSItasktest$id))) 15 | expect_true(nchar(allR4WSItasktest[nrow(allR4WSItasktest),2]) > 1) 16 | expect_equal(sum(is.na(allR4WSItasktest)), 0) 17 | expect_equal(colnames(allR4WSItasktest)[1], "topic") 18 | expect_equal(colnames(allR4WSItasktest)[2], "doc") 19 | expect_equal(colnames(allR4WSItasktest)[3], "opt1") 20 | expect_equal(colnames(allR4WSItasktest)[4], "opt2") 21 | expect_equal(colnames(allR4WSItasktest)[5], "opt3") 22 | expect_equal(colnames(allR4WSItasktest)[6], "optcrt") 23 | expect_equal(colnames(allR4WSItasktest)[7], "id") 24 | expect_equal(length(unique(allR4WSItasktest[,1])), nrow(masstest[[1]])+1) 25 | expect_true(("gold" %in% unique(allR4WSItasktest[,1]))) 26 | }) -------------------------------------------------------------------------------- /tests/testthat/testrecordTasks.R: -------------------------------------------------------------------------------- 1 | load("../../data/allR4WSItasktest.rda") 2 | 3 | record <- recordTasks(type = "R4WSI0", 4 | tasks = allR4WSItasktest, 5 | path = "../../data/recordtest.rda") 6 | 7 | test_that("Record exists in path",{ 8 | expect_true(file.exists("../../data/recordtest.rda")) 9 | # file created within 1 min 10 | expect_true(Sys.time() - file.info("../../data/recordtest.rda")$ctime < 5) 11 | }) 12 | 13 | test_that("Record is in the correct format",{ 14 | expect_length(record, 2) 15 | expect_is(record[[1]], "data.frame") 16 | expect_is(record[[1]][,6], "character") 17 | expect_is(record[[1]][,7], "integer") 18 | expect_equal(ncol(record[[1]]), 7) 19 | expect_is(record[[2]], "data.frame") 20 | expect_equal(nrow(record[[1]]), nrow(record[[2]])) 21 | expect_equal(ncol(record[[2]]), 5) 22 | expect_is(record[[2]][,1], "character") 23 | expect_is(record[[2]][,2], "character") 24 | expect_is(record[[2]][,3], "character") 25 | expect_is(record[[2]][,4], "character") 26 | expect_is(record[[2]][,5], "character") 27 | # check documents are in order 28 | expect_identical(record[[1]][1, 2], record[[2]][1, 1]) 29 | expect_identical(record[[1]][2, 2], record[[2]][2, 1]) 30 | expect_identical(record[[1]][nrow(record[[1]]), 2], record[[2]][nrow(record[[2]]), 1]) 31 | # word sets are retained 32 | expect_equal(sum(unique(record[[2]][1, 2:5]) %in% unique(record[[1]][1, 3:6])), 33 | 4) 34 | expect_equal(sum(unique(record[[2]][nrow(record[[2]]), 2:5]) %in% unique(record[[1]][nrow(record[[1]]), 3:6])), 35 | 4) 36 | # order of word sets changed 37 | expect_false(identical(record[[1]][,2:6], record[[2]])) 38 | }) 39 | -------------------------------------------------------------------------------- /tests/testthat/testvalidateTopic.R: -------------------------------------------------------------------------------- 1 | load("../../data/masstest.rda") 2 | load("../../data/heldouttest.rda") 3 | load("../../data/stmPreptest.rda") 4 | 5 | test_that("Test class",{ 6 | WItask <- validateTopic(type = "WI", n = 5, vocab = masstest[[1]], beta = masstest[[2]]) 7 | 8 | expect_is(WItask, "matrix") 9 | }) 10 | 11 | test_that("Test topic index",{ 12 | R4WSI0task <- validateTopic(type = "R4WSI0", n = 15, 13 | vocab = masstest[[1]], 14 | beta = masstest[[2]], 15 | text = stmPreptest$meta$post_text[-heldouttest$missing$index], 16 | theta = modtest$theta[-heldouttest$missing$index,]) 17 | 18 | expect_is(R4WSI0task, "matrix") 19 | expect_equal(colnames(R4WSI0task)[1], "topic") 20 | expect_equal(colnames(R4WSI0task)[2], "doc") 21 | expect_equal(colnames(R4WSI0task)[3], "opt1") 22 | expect_equal(colnames(R4WSI0task)[4], "opt2") 23 | expect_equal(colnames(R4WSI0task)[5], "opt3") 24 | expect_equal(colnames(R4WSI0task)[6], "optcrt") 25 | expect_equal(length(unique(R4WSI0task[,1])), nrow(masstest[[1]])) 26 | }) 27 | 28 | --------------------------------------------------------------------------------