├── .gitignore ├── CITATION ├── DESCRIPTION ├── NAMESPACE ├── NEWS ├── R ├── config.R ├── createVirtualTable.R ├── explainTable.R ├── explainVariable.R ├── findVariable.R ├── generateSQL.R ├── getFunctionArguments.R ├── listVariables.R ├── myPersonality.R ├── myPersonalitySQL.R ├── myPersonality_pkg.R └── utilities.R ├── README.md ├── inst └── tableDefinitions.R ├── man ├── explainTable.Rd ├── explainVariable.Rd ├── findVariable.Rd ├── myPersonality.Rd ├── myPersonalityPackage.Rd ├── myPersonalitySQL.Rd ├── print.table.help.Rd └── print.variable.help.Rd └── tests └── virtualTable_tests.R /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | # Example code in package build process 4 | *-Ex.R 5 | .Rproj.user 6 | # RStudio project files 7 | .Rproj 8 | .Rbuildignore 9 | myPersonality.Rproj 10 | sandbox/ 11 | README.html 12 | -------------------------------------------------------------------------------- /CITATION: -------------------------------------------------------------------------------- 1 | bibentry("Manual", 2 | title = "myPersonality Research Wiki", 3 | author = c( 4 | person(c("David", "J."), "Stillwell", role = "aut"), 5 | person("Michal", "Kosinsky", role = "aut"), 6 | person(c("Ilmo", "K."), "van der Lowe", role = "aut"), 7 | person(c("Alex", "B."), "Kogan", role = "aut") 8 | ), 9 | organization = "Cambridge Psychometrics Centre", 10 | address = "Cambridge, United Kingdom", 11 | year = substring(Sys.time(),1,4), 12 | url = "http://mypersonality.org/", 13 | 14 | mheader = "To cite myPersonality in publications use:" 15 | ) 16 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: myPersonality 2 | Title: Tools for accessing myPersonality dataset provided by Cambridge 3 | Psychometrics Centre 4 | Description: myPersonality is a large dataset of millions of Facebook users and 5 | their various test results. Because the dataset is too large to distribute 6 | as files, this package provides access to the dataset on a database 7 | administered by Cambridge Psychometrics Centre. 8 | URL: http://mypersonality.org 9 | Version: 1.0 10 | Maintainer: Ilmo van der Lowe 11 | Author: Ilmo van der Lowe 12 | Depends: 13 | R (>= 2.14), 14 | data.table 15 | Suggests: 16 | testthat (>= 0.2), 17 | RMySQL, 18 | RODBC 19 | License: MIT 20 | Collate: 21 | 'config.R' 22 | 'createVirtualTable.R' 23 | 'generateSQL.R' 24 | 'getFunctionArguments.R' 25 | 'myPersonality.R' 26 | 'myPersonalitySQL.R' 27 | 'utilities.R' 28 | 'explainTable.R' 29 | 'explainVariable.R' 30 | 'findVariable.R' 31 | 'listVariables.R' 32 | 'myPersonality_pkg.R' 33 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | S3method(print,table.help) 2 | S3method(print,variable.help) 3 | export(explainTable) 4 | export(explainVariable) 5 | export(findVariable) 6 | export(myPersonality) 7 | export(myPersonalitySQL) 8 | import(data.table) 9 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | myPersonality 0.1 2 | =========== 3 | 4 | * Convenience functions to access data in a subset of tables. 5 | * Helpful feedback when using convenience functions without arguments. 6 | * Convenience function arguments understood as both column names and filter statements. 7 | * `myPersonalitySQL` function that communicates with the database 8 | administered by Cambridge Psychometrics Centre. -------------------------------------------------------------------------------- /R/config.R: -------------------------------------------------------------------------------- 1 | config <- function(user = NULL, password = NULL) { 2 | 3 | # Set default server address and database name 4 | Sys.setenv("myPersonality_host" = "alex.e-psychometrics.com") 5 | Sys.setenv("myPersonality_database" = "cpw_myPersonality") 6 | 7 | if (interactive()) { 8 | # R is running in interactive mode. 9 | # If user name is provided as an argument, use it. 10 | if (!is.null(user)) { 11 | Sys.setenv("myPersonality_user" = user) 12 | } else { 13 | # User name has not been supplied as an argument. 14 | if (Sys.getenv("myPersonality_user") == "") { 15 | # If user name has not been set in env vars, prompt for it. 16 | uid <- readline("Please enter your user name: ") 17 | Sys.setenv("myPersonality_user" = uid) 18 | } else { 19 | # Environment variable is already set, nothing to do 20 | } 21 | } 22 | 23 | # If password is provided as an argument, use it. 24 | if (!is.null(password)) { 25 | Sys.setenv("myPersonality_password" = password) 26 | } else { 27 | # Password has not been supplied as an argument. 28 | if (Sys.getenv("myPersonality_password") == "") { 29 | # If user name has not been set in env vars, prompt for it. 30 | pwd <- readline("Please enter your password: ") 31 | Sys.setenv("myPersonality_password" = pwd) 32 | } else { 33 | # Password is already set, nothing to do 34 | } 35 | } 36 | 37 | } else { 38 | # Set dummy user for non-interactive use. 39 | # Note: Dummy user has no privileges beyond reading meta tables 40 | Sys.setenv("myPersonality_user" = "connection_test") 41 | Sys.setenv("myPersonality_password" = "foobar") 42 | } 43 | 44 | if (interactive()) { 45 | # Set up data access functions 46 | source(system.file("tableDefinitions.R", package = "myPersonality", mustWork = T)) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /R/createVirtualTable.R: -------------------------------------------------------------------------------- 1 | createVirtualTable <- function(table.name, ...) { 2 | # Create a placeholder list for object data 3 | o <- list() 4 | o$table.name <- table.name 5 | o$display.name <- getDisplayName(table.name) 6 | 7 | # Get table definition from the database 8 | o$table.definition <- myPersonalitySQL(sprintf("SHOW COLUMNS FROM %s", table.name)) 9 | valid.columns <- o$table.definition$Field 10 | 11 | # Identify primary key 12 | o$key <- as.character(subset(o$table.definition, Key == "PRI")$Field) 13 | 14 | # Identify columns and WHERE statements from arguments supplied by user 15 | args <- processFunctionArguments(...) 16 | 17 | # If no arguments were given, display brief help instead. 18 | if (is.null(args)) { 19 | print(explainTable(o$display.name)) 20 | return(invisible(NULL)) 21 | } 22 | 23 | o$columns <- c(o$key, args$columns) # Always include primary key for merges later on. 24 | # Note: Cases in which there is no primary key will be handled downstream by getData() 25 | 26 | if (!all(o$columns %in% valid.columns)) { 27 | wrong.columns <- paste(setdiff(o$columns, valid.columns), collapse = ", ") 28 | error.msg <- sprintf("\nThe following are not valid variable names:\n%s", wrong.columns) 29 | stop(error.msg) 30 | } 31 | 32 | o$where <- args$where 33 | 34 | class(o) <- "virtual.table" 35 | results <- getData(o) 36 | return(results) 37 | } 38 | 39 | print.virtual.table <- function(x) { 40 | cat(generateSQL(x),"\n") 41 | } 42 | 43 | getData <- function(x) { 44 | sql.data <- data.table(myPersonalitySQL(generateSQL(x))) 45 | if (!identical(x$key, character(0))) { 46 | setkeyv(sql.data, x$key) 47 | } else { 48 | # There is no primary key for the table 49 | } 50 | 51 | return(sql.data) 52 | } 53 | -------------------------------------------------------------------------------- /R/explainTable.R: -------------------------------------------------------------------------------- 1 | #' Information about myPersonality database tables. 2 | #' 3 | #' This function retrieves metadata about tables from the Cambridge Psychometrics Centre's myPersonality database. 4 | #' However, it is easier to run the data access function without an argument to get the same results (e.g., \code{participants()}). 5 | #' 6 | #' @param table.name A character string that specifies the table that you are interested in. 7 | #' @keywords attribute 8 | #' @export 9 | #' @seealso \link{myPersonality}, \link{explainVariable} 10 | #' @examples 11 | #' \dontrun{ 12 | #' explainTable("address") 13 | #' } 14 | 15 | explainTable <- function(table.name) { 16 | if (missing(table.name)) { 17 | stop("You must provide a table name.") 18 | } 19 | o <- list() 20 | 21 | # Fetch basic information 22 | sql <- sprintf('SELECT * FROM `_meta_tables` WHERE display_name = "%s"', table.name) 23 | 24 | o$table.info <- try(myPersonalitySQL(sql), silent = T) 25 | if (class(o) == "try-error" || nrow(o$table.info) == 0) { 26 | stop("Please check your table name.") 27 | } 28 | 29 | o$row_count <- myPersonalitySQL(sprintf("SELECT COUNT(*) FROM %s", o$table.info$db_name))[, 1] 30 | 31 | # Check if there are related tables 32 | sql <- sprintf('SELECT child_table FROM `_meta_related_tables` WHERE parent_table = "%s"', o$table.info$db_name) 33 | o$related <- myPersonalitySQL(sql) 34 | if (nrow(o$related) == 0) { 35 | o$related <- NA 36 | } else { 37 | o$related <- getDisplayName(o$related[, 1]) 38 | } 39 | 40 | class(o) <- "table.help" 41 | return(o) 42 | } 43 | 44 | #' Prints myPersonality table information. 45 | #' 46 | #' This function extends generic print function. 47 | #' 48 | #' @param x An object returned by explainTable 49 | #' @keywords attribute 50 | #' @method print table.help 51 | #' @S3method print table.help 52 | #' @examples 53 | #' \dontrun{explainTable("address")} 54 | 55 | print.table.help <- function(x) { 56 | info <- x$table.info 57 | showInfo(info$details, "") 58 | showInfo(info$note, "\nNOTE: ", "\n\n") 59 | showInfo(info$citation, "For more information about these data, please see: ") 60 | showInfo(info$url,"", "\n\n") 61 | 62 | cat("The database table", as.character(info$db_name), "contains the following variables:\n") 63 | print(listVariables(info$display_name)) 64 | 65 | showInfo(x$related, "\nPlease use ", "() to see related data.") 66 | cat("\nThe table has", x$row_count, "rows.") 67 | } 68 | -------------------------------------------------------------------------------- /R/explainVariable.R: -------------------------------------------------------------------------------- 1 | #' Provide information about specific myPersonality variable. 2 | #' 3 | #' This function describes the speficied variable by fetching additional metainformation about it from the myPersonality database. 4 | #' If a variable exists in multiple tables, the function asks the user to specify which table/variable pair to show. 5 | #' 6 | #' @param variable.name A character string that specifies the variable of interest. 7 | #' @keywords attribute 8 | #' @seealso \link{findVariable} 9 | #' @export 10 | #' @examples 11 | #' \dontrun{ 12 | #' explainVariable("gender") 13 | #' } 14 | #' # Multiple tables contain variable 'userid'. 15 | #' # Read instructions in the function output to select the right table. 16 | #' \dontrun{ 17 | #' explainVariable("userid") 18 | #' } 19 | 20 | explainVariable <- function(variable.name) { 21 | sql <- sprintf('SELECT * FROM _meta_variables WHERE name = "%s"', variable.name) 22 | results <- myPersonalitySQL(sql) 23 | class(results) <- "variable.help" 24 | return(results) 25 | } 26 | 27 | #' Prints myPersonality variable information. 28 | #' 29 | #' This function extends generic print function. 30 | #' 31 | #' @param x An object returned by either findVariable or explainVariable functions 32 | #' @keywords attribute 33 | #' @method print variable.help 34 | #' @S3method print variable.help 35 | #' @examples 36 | #' # explainVariable("gender") 37 | #' # explainVariable("userid") # Multiple tables contain variable 'userid' 38 | 39 | print.variable.help <- function(x) { 40 | class(x) <- "data.frame" 41 | if (nrow(x) > 1) { 42 | cat("The query returns multiple results.\n") 43 | for (i in 1:nrow(x)) { 44 | cat(i) 45 | cat(":", getDisplayName(x$parent_table[i]), "-", as.character(x$name[i]), "\n") 46 | } 47 | n <- readline("Please select one from the list by entering its number: ") 48 | n <- as.numeric(n) 49 | if (class(n) != "numeric") {stop("You must enter an integer.")} 50 | x <- x[n,] 51 | } 52 | 53 | showInfo(x$name, "Variable: ") 54 | showInfo(getDisplayName(x$parent_table), "Access function to retrieve data: ", sprintf("('%s')\n", x$name)) 55 | showInfo(x$description, "Description: ","\n") 56 | showInfo(x$note, "\nDetails:\n") 57 | } 58 | -------------------------------------------------------------------------------- /R/findVariable.R: -------------------------------------------------------------------------------- 1 | #' Locate variables of interest by a text search. 2 | #' 3 | #' This function attempts to locate variables whose name, description, or notes contain a specific sequence of characters. 4 | #' Please note that the search does not pay attention to word boundaries, so search query 'age' would also return 'marriage' as a result. 5 | #' 6 | #' @param query A character string containing the text to be searched. It is advisable to use queries that are longer than 3 characters. 7 | #' @keywords attribute 8 | #' @export 9 | #' @seealso \link{explainVariable} 10 | #' @examples 11 | #' \dontrun{ 12 | #' findVariable("sex") 13 | #' } 14 | 15 | findVariable <- function(query) { 16 | sql <- sprintf('SELECT * FROM _meta_variables WHERE name LIKE "%%%s" OR description LIKE "%%%s%%" OR note LIKE "%%%s%%"', query, query, query) 17 | results <- myPersonalitySQL(sql) 18 | if (nrow(results) == 0) { 19 | message("No results.") 20 | return(invisible(NULL)) 21 | } 22 | class(results) <- "variable.help" 23 | return(results) 24 | } 25 | -------------------------------------------------------------------------------- /R/generateSQL.R: -------------------------------------------------------------------------------- 1 | generateSQL <- function(x) { 2 | if (!inherits(x, "virtual.table")) { 3 | stop("The function requires a 'virtual.table' object.") 4 | } 5 | 6 | if (is.null(x$where)) { 7 | where <- "" 8 | } else { 9 | where <- generateWHERE(x$where, x$table.name) 10 | } 11 | sql.cmd <- sprintf("SELECT %s FROM %s %s", paste(x$table.name, unique(x$columns), sep = ".", collapse = ","), x$table.name, where) 12 | 13 | return(sql.cmd) 14 | } 15 | 16 | generateWHERE <- function(x, table.name) { 17 | 18 | if (length(x) == 0) {return("")} 19 | 20 | where <- "WHERE " 21 | for (i in 1:length(x)) { 22 | value <- x[i] 23 | thisWhere <- paste(table.name, value, sep =".") 24 | if (i > 1) { 25 | where <- paste(where, thisWhere, sep = " AND ") 26 | } else { 27 | where <- paste(where, thisWhere, sep = "") 28 | } 29 | 30 | } 31 | 32 | return(where) 33 | } 34 | -------------------------------------------------------------------------------- /R/getFunctionArguments.R: -------------------------------------------------------------------------------- 1 | .forbidden.symbols <- c(" ", "!", "<", ">", "(", ")", "?") 2 | 3 | isWHERE <- function(x) { 4 | x.chars <- strsplit(x, "")[[1]] 5 | if (any(.forbidden.symbols %in% x.chars)) { 6 | return(TRUE) 7 | } else { 8 | return(FALSE) 9 | } 10 | } 11 | 12 | extractColumn <- function(x) { 13 | if (length(x) > 1) { 14 | stop("This function is not vectorized. You must supply only one string at a time.") 15 | } 16 | for (thisSymbol in .forbidden.symbols) { 17 | s <- strsplit(x, thisSymbol, fixed = TRUE)[[1]] 18 | if (length(s) > 1) {return(s[1])} 19 | } 20 | names(x) <- NULL 21 | return(x) 22 | } 23 | 24 | getFunctionArguments <- function(...) { 25 | args <- list(...) 26 | results <- unlist(args) 27 | names(results) <- NULL 28 | return(results) 29 | } 30 | 31 | processFunctionArguments <- function(...) { 32 | args <- getFunctionArguments(...) 33 | if (is.null(args)) {return(NULL)} 34 | 35 | results <- list() 36 | results$columns <- unlist(lapply(args, extractColumn)) 37 | 38 | wheres <- args[sapply(args, isWHERE)] 39 | if (!identical(wheres, character(0))) { 40 | results$where <- wheres 41 | } else { 42 | results$where <- NULL 43 | } 44 | # Check if no WHERE argements were provided 45 | 46 | return(results) 47 | } 48 | -------------------------------------------------------------------------------- /R/listVariables.R: -------------------------------------------------------------------------------- 1 | listVariables <- function(table.name) { 2 | db_name <- getDbName(table.name) 3 | 4 | # Get table definition from the database 5 | fields <- myPersonalitySQL(sprintf("SHOW COLUMNS FROM %s", db_name))$Field 6 | variables <- data.frame(variable = fields, stringsAsFactors = F) 7 | 8 | # Get basic variable data from _meta_variables 9 | sql <- sprintf('SELECT name, description, note FROM _meta_variables WHERE parent_table = "%s"', db_name) 10 | var.info <- defactor(myPersonalitySQL(sql)) 11 | 12 | if (!nrow(var.info) == 0) { 13 | variables <- merge(variables, var.info, by.x = "variable", by.y = "name", all.x = T) 14 | } 15 | 16 | class(variables) <- "variable.list.help" 17 | return(variables) 18 | } 19 | 20 | print.variable.list.help <- function(x) { 21 | class(x) <- "data.frame" 22 | notes.flag <- FALSE 23 | 24 | if ("note" %in% names(x)) { 25 | for (i in 1:nrow(x)) { 26 | if (x[i, "note"] %in% c(NA,""," ")) { 27 | x[i, "note"] <- "" 28 | } else { 29 | x[i, "note"] <- "" 30 | x[i, "description"] <- paste(gsub("^ ", "", x[i, "description"]), "*", sep = "") 31 | notes.flag <- TRUE 32 | } 33 | } 34 | x$note <- NULL 35 | } 36 | 37 | write.table(format(x, justify="left"), row.names=F, col.names=F, quote=F, sep = " ") 38 | if (notes.flag) { 39 | cat("\n* Use command 'explainVariable(\"variable_name_here\")' to see additional variable notes.") 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /R/myPersonality.R: -------------------------------------------------------------------------------- 1 | #' Set up a connection to myPersonality database server. 2 | #' 3 | #' This function starts your session and sets up your connection to the Cambridge Psychometrics Centre's myPersonality database. 4 | #' Running this function and providing valid user name and password when prompted sets up other data access functions, such as \code{participants()}. 5 | #' The exact names and number of the data access functions depends on your access privileges. 6 | #' 7 | #' Once the other data access functions have been set up, you can use them to retrieve and filter data (see examples below). 8 | #' For more instructions, please visit \url{https://github.com/vanderlowe/myPersonality/blob/master/README.md} and see \link{myPersonalityPackage}. 9 | #' 10 | #' @keywords manip 11 | #' @export 12 | #' @param user Your user name (as provided by the Cambridge Psychometrics Centre). 13 | #' @param password Your password (as provided by the Cambridge Psychometrics Centre) 14 | #' @return A printout of data access functions. 15 | #' @note You must contact the Cambridge Psychometrics Centre to obtain access privileges. \bold{You cannot use this package without a valid user name and password.} If you do not have one, please visit \url{http://mypersonality.org/wiki/doku.php?id=database_use_guidelines} to register as a collaborator. 16 | #' @seealso \link{myPersonalityPackage}, \link{findVariable} 17 | #' @examples 18 | #' # Establish database connection and generate data access functions. 19 | #' \dontrun{ 20 | #' myPersonality() 21 | #' } 22 | #' # Show information about participants, including available variables. 23 | #' \dontrun{ 24 | #' participants() 25 | #' } 26 | #' # Retrieve age and gender data for all participants. 27 | #' \dontrun{ 28 | #' participants("age", "gender") 29 | #' } 30 | #' # Retrieve age and gender data for participants older than 90 years. 31 | #' \dontrun{ 32 | #' participants("age > 90", "gender") 33 | #' } 34 | 35 | myPersonality <- function(user = NULL, password = NULL) { 36 | config(user = user, password = password) 37 | defined.funcs <- myPersonalitySQL("SHOW TABLES", user = user, password = password)[,1] # MySQL will only show tables to which the user has access 38 | cat("Currently, the following data access functions are available to you:\n") 39 | 40 | for (f in defined.funcs) { 41 | if (substring(f, 1, 1) == "_") {next} 42 | f.name <- getDisplayName(f) 43 | if (!identical(f.name, character(0))) { 44 | cat(as.character(f.name)) # If _meta_tables 45 | cat("()\n") 46 | } 47 | 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /R/myPersonalitySQL.R: -------------------------------------------------------------------------------- 1 | #' Execute SQL query on myPersonality database server. 2 | #' 3 | #' This function executes SQL queries on the Cambridge Psychometrics Centre database. It is meant for advanced users only. 4 | #' By default, users only have privileges for queries using SELECT statements. 5 | #' 6 | #' @param query SQL query string to be executed. Defaults to "SHOW TABLES;" 7 | #' @keywords manip 8 | #' @import data.table 9 | #' @export 10 | #' @return A \code{data.table} object. 11 | #' @note The query must be enclosed in \bold{single} quotation marks. 12 | #' @examples 13 | #' \dontrun{ 14 | #' myPersonalitySQL("SELECT * FROM demog") 15 | #' } 16 | 17 | myPersonalitySQL <- function(query = 'SHOW TABLES;', user = NULL, password = NULL) { 18 | 19 | if (!interactive()) {return(NULL)} 20 | 21 | # Check whether necessary environment variables exist. If not, run configuration. 22 | if ( 23 | Sys.getenv("myPersonality_user") %in% c("", "connection_test") & 24 | Sys.getenv("myPersonality_password") %in% c("", "foobar") & 25 | interactive() 26 | ) { 27 | config(user, password) 28 | } 29 | 30 | # Get environment variables needed for db access 31 | myPersonality_host <- Sys.getenv("myPersonality_host") 32 | myPersonality_user <- Sys.getenv("myPersonality_user") 33 | myPersonality_password <- Sys.getenv("myPersonality_password") 34 | myPersonality_database <- Sys.getenv("myPersonality_database") 35 | 36 | # Check whether the user is on a Windows or Mac system. 37 | # This is needed, because Mac users will connect via RMySQL 38 | # and Windows users via RODBC. 39 | 40 | if (Sys.info()[1] == "Darwin") { 41 | # Access for Mac users 42 | if (!require(RMySQL)) { 43 | # Install RMySQL if not available 44 | install.packages("RMySQL") 45 | require(RMySQL) 46 | } 47 | 48 | # Establish MySQL connection 49 | # client.flag=32 enables compression 50 | con <- dbConnect("MySQL", host = myPersonality_host, user = myPersonality_user, password = myPersonality_password, dbname = myPersonality_database, client.flag=32) 51 | 52 | # Log query before execution (helps to identify user queries that fail to execute) 53 | dbGetQuery(con, sprintf("INSERT INTO `_usage_log` (query, user) VALUES ('%s','%s')", query, myPersonality_user)) 54 | 55 | # Run query with timer 56 | timer <- system.time(results <- dbGetQuery(con, query)) 57 | 58 | # Log query execution time after execution (helps to identify queries could be optimized) 59 | dbGetQuery(con, sprintf("UPDATE `_usage_log` SET `execution_time` = %f WHERE `id` = (SELECT MAX(`id`) AS `id` FROM (SELECT `id` FROM `_usage_log` WHERE `user` = '%s') AS x)", timer[3], myPersonality_user)) 60 | 61 | dbDisconnect(con) 62 | return(results) 63 | 64 | } else { 65 | # PC code in here 66 | if (!require(RODBC)) { 67 | # Install RODBC if not available 68 | install.packages("RODBC") 69 | require(RODBC) 70 | } 71 | 72 | channel <- odbcConnect("myPersonality", uid = myPersonality_user, pwd = myPersonality_password) 73 | sqlQuery(channel, sprintf("USE %s;", myPersonality_database)) # Use the right database 74 | 75 | # Log query before execution (helps to identify user queries that fail to execute) 76 | sqlQuery(channel, sprintf("INSERT INTO `_usage_log` (query, user) VALUES ('%s','%s')", query, myPersonality_user)) 77 | 78 | # Run query with timer 79 | timer <- system.time(results <- sqlQuery(channel, query)) 80 | 81 | # Log query execution time after execution (helps to identify queries could be optimized) 82 | sqlQuery(channel, sprintf("UPDATE `_usage_log` SET `execution_time` = %f WHERE `id` = (SELECT MAX(`id`) AS `id` FROM (SELECT `id` FROM `_usage_log` WHERE `user` = '%s') AS x)", timer[3], myPersonality_user)) 83 | 84 | odbcClose(channel) 85 | return(results) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /R/myPersonality_pkg.R: -------------------------------------------------------------------------------- 1 | #' Easy access to myPersonality dataset. 2 | #' 3 | #' \code{myPersonality} package provides tools that make it easy to access data in the large dataset collected and maintained by the Cambridge Psychometrics Centre. 4 | #' 5 | #' @section Register as a collaborator: 6 | #' To gain access to the data, please request access privileges by registering as a collaborator. 7 | #' For information how to register, please visit \url{http://mypersonality.org/wiki/doku.php?id=database_use_guidelines}. 8 | #' 9 | #' @section Getting started: 10 | #' Once you have your user name and password, start by running \code{myPersonality()} function. 11 | #' You will be prompted for your user name and password. Once successfully connected, you will see a list of functions for accessing the data (e.g., \code{participants()}. 12 | #' These functions are dynamically created depending on your access rights. 13 | #' The data access functions are self-documenting: by running one without arguments (e.g., \code{participants()}), the function will print additional information about itself. 14 | #' 15 | #' @section Accessing data: 16 | #' Each data access function allows you to request data from the myPersonality database by listing variable names available through the function. 17 | #' For example, to retrieve age and gender data of all participants in the database, you can type \code{participants("age", "gender")}. 18 | #' To find out which variables are availabe through each function, you can type the function without any arguments. 19 | #' 20 | #' For more information, please visit \url{https://github.com/vanderlowe/myPersonality/blob/master/README.md}. 21 | #' 22 | #' @references Kosinski, M., Stillwell D. J., & Graepel, T. (2013). Private traits and attributes are predictable from digital records of human behavior. Proceedings of the National Academy of Sciences, 110(15), 5802-5805. 23 | #' @aliases myPersonalityPackage 24 | #' @seealso \link{myPersonality}, \link{findVariable}, \link{explainVariable} 25 | #' @import data.table 26 | #' @rdname myPersonalityPackage 27 | #' @name myPersonalityPackage 28 | NULL -------------------------------------------------------------------------------- /R/utilities.R: -------------------------------------------------------------------------------- 1 | getDisplayName <- function(db_name) { 2 | # Get usr-friendly table name based on the actual table name 3 | if (exists(".meta.tables")) { 4 | # Local cache 5 | return(as.character(.meta.tables[as.character(.meta.tables$db_name) == as.character(db_name),]$display_name)) 6 | } else { 7 | # Fetch from db 8 | sql <- sprintf('SELECT display_name FROM _meta_tables WHERE db_name = "%s"', db_name) 9 | return(as.character(myPersonalitySQL(sql)$display_name)) 10 | } 11 | } 12 | 13 | getDbName <- function(display_name) { 14 | # Get usr-friendly table name based on the actual table name 15 | if (exists(".meta.tables")) { 16 | # Local cache 17 | return(as.character(.meta.tables[as.character(.meta.tables$display_name) == as.character(display_name),]$db_name)) 18 | } else { 19 | sql <- sprintf('SELECT db_name FROM _meta_tables WHERE display_name = "%s"', display_name) 20 | return(myPersonalitySQL(sql)$db_name) 21 | } 22 | } 23 | 24 | showInfo <- function(x, prefix = "", postfix = "\n") { 25 | if (is.null(x)) {return(invisible(x))} 26 | if (!x %in% c(NA, "", " ")) { 27 | if (!prefix == "") {cat(prefix)} 28 | cat(as.character(x), postfix, sep = "") 29 | } 30 | } 31 | 32 | defactor <- function(df) { 33 | for (i in 1:ncol(df)) { 34 | if (class(df[, i]) == "factor") { 35 | df[, i] <- as.character(df[, i]) 36 | } 37 | } 38 | return(df) 39 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | myPersonality 2 | ============= 3 | 4 | The 'myPersonality' R package provides easy access to a rich dataset created by [Cambridge Psychometrics Centre](http://www.psychometrics.cam.ac.uk). To learn more about the dataset itself, please visit [myPersonality research wiki](http://mypersonality.org). Because the dataset contains millions of respondents, the easiest way to access the data is through a database connection. The _myPersonality_ package provides a set of utility functions to request specific variables of interest from the data. 5 | 6 | # Data access 7 | The dataset is only available by special permission to our academic collaborators. If you are interested in using the dataset in your research, please [contact us](http://mypersonality.org/wiki/doku.php?id=database_use_guidelines) to request access privileges. **Please make sure you have received your user name and password from Cambridge Psychometrics Centre before proceeding with installation.** 8 | 9 | # Installation 10 | Please follow these instructions carefully and to the letter. You need to do the setup only once. 11 | 12 | ## Install database drivers (Windows users only) 13 | On computers running Windows operating system, _myPersonality_ depends on _RODBC_ package to establish database connections. This requires the installation of a _MySQL ODBC driver_ and _Data Source Name_ (DSN) on your computer. 14 | 15 | ### MySQL ODBC driver 16 | Please download and install the _MySQL ODBC driver_ from [MySQL developer website](http://dev.mysql.com/downloads/connector/odbc/5.2.html#downloads). On the download page, find _Windows (x86, 64-bit), MSI Installer_, if you are running R in a 64-bit environment. For a 32-bit compatible MySQL driver, please choose _Windows (x86, 32-bit), MSI Installer 17 | Connector-ODBC_. 18 | * If you are not sure whether your PC running the 32-bit or 64-bit version of Windows, Microsoft has provided [information on how to find it out](http://windows.microsoft.com/en-GB/windows7/find-out-32-or-64-bit). 19 | 20 | ### Data Source Name 21 | 1. Once you have installed the _MySQL ODBC driver_, click the Windows Start menu and type `ODBC` into the search box. (For 32-bit Windows machines, please use `odbcad32` instead.) 22 | 2. Click on "Data Source (ODBC)" in the search results. Wait for the program to open. 23 | 3. Select the "System DSN" tab. 24 | 4. Click the "Add..." button. 25 | 5. Choose "MySQL ODBC 5.2w Driver". 26 | 6. In the "Data Source Name:" field, type `myPersonality`. 27 | 7. In the "TCP/IP Server field:", type `alex.e-psychometrics.com`. **Note to Michal and David: The production server details go here when the server is up.** 28 | 8. Leave "Description", "User", "Password", and "Database" fields blank. The "Port:" field should read 3306 by default. 29 | 9. Optionally, click "Details >>" and check the box "Use compression". This will improve data download speeds. 30 | 10. Click "OK" to save the DSN. 31 | 32 | ## Install the `myPersonality` package itself 33 | The _myPersonality_ package is installed using the _[devtools](https://github.com/hadley/devtools/)_ package: 34 | ``` 35 | install.packages("devtools") 36 | library(devtools) 37 | install_github('myPersonality', username = 'vanderlowe') 38 | ``` 39 | 40 | # Example usage: Novice users 41 | At the start of each session, you must load the _myPersonality_ package to make the functions available in R. You can do this by typing: 42 | ``` 43 | library(myPersonality) 44 | ``` 45 | 46 | ## Testing the connection 47 | To test that your connection works, type the following: 48 | ``` 49 | myPersonality() 50 | ``` 51 | 52 | First, you should be prompted for your user name and password. Once you enter these, you should see the following message (or similar, as the available functions depend on the extent of your access privileges): 53 | ``` 54 | Currently, the following data access functions are available to you: 55 | adress() 56 | participants() 57 | satisfaction_with_life() 58 | ``` 59 | Each of these functions gives you access to data in our database. 60 | 61 | ## Exploring the dataset 62 | Let's start with basic information about participants in myPersonality database. Go ahead and type: 63 | ``` 64 | participants() 65 | ``` 66 | You should see the following message: 67 | ``` 68 | This table contains basic demographic information about the myPersonality participants. 69 | For more information about these data, please see: Kosinski, M., Stillwell D. J., & Graepel, T. (2013). Private traits and attributes are predictable from digital records of human behavior. Proceedings of the National Academy of Sciences, 110(15), 5802-5805. 70 | http://mypersonality.org 71 | 72 | The database table demog contains the following variables: 73 | age 74 | birthday 75 | gender Gender of the user* 76 | interested_in Interested In* 77 | locale language version of Facebook interface* 78 | mf_dating Meeting other for dating 79 | mf_friendship Meeting other for friendship 80 | mf_networking Meeting other for networking 81 | mf_random Meeting other for random play 82 | mf_relationship Meeting other for relationship 83 | mf_whatever Meeting other for whatever I can get 84 | network_size Number of friends* 85 | relationship_status Relationship status* 86 | timezone User's Timezone 87 | userid Unique user identifier 88 | 89 | * Use command 'explainVariable("variable_name_here")' to see additional variable notes. 90 | The table has 4282858 rows. 91 | ``` 92 | Since we did not specify which variable (i.e., a field in the database) we wanted, the function provided us with information about the participant data, including a list of available variables. 93 | 94 | You may have noticed that some of the variable descriptions are marked with an asterisk. This means that there is additional information available about this variable. The 'explainVariable' function allows you to find out this extra information. Try typing: 95 | ``` 96 | explainVariable("gender") 97 | ``` 98 | The results should look something like this: 99 | ``` 100 | Variable: gender 101 | Access function to retrieve data: participants('gender') 102 | Description: Gender of the user 103 | 104 | Details: 105 | 1=female, 0=male 106 | ``` 107 | With this additional information, we can tell that participants whose gender is coded as `1` are female. For your convenience, the `Access function` line provides you the code to retrieve the data. 108 | 109 | To find variables that might be interesting to you, you can use the `findVariables` function to search the dataset. Let's say that you are interested in the zip code of the participants. To find out if the dataset contains zip codes, you can type: 110 | ``` 111 | findVariable("zip") 112 | ``` 113 | This function will search the variable name, description, and additional notes for instances of the word 'zip' and return the results for your inspection. Please note that if there are multiple results, the function will stop to ask you to indicate which result you would like to see in detail: 114 | ``` 115 | The query returns multiple results. 116 | 1: address - current_location_zip 117 | 2: address - hometown_location_zip 118 | Please select one from the list by entering its number: 119 | ``` 120 | If you are interested in the hometown zip (instead of the current location), you would type `2` on your keyboard, producing information about variable `hometown_location_zip`. 121 | ``` 122 | Variable: hometown_location_zip 123 | Access function: address('hometown_location_zip') 124 | ``` 125 | 126 | ## Loading data 127 | Let's say we want to get the age, gender, and relationship status of all users and assign it to variable `people`. For this, you would type: 128 | ``` 129 | people <- participants("age", "gender", "relationship_status") 130 | ``` 131 | You can provide as many or as few variable names as you wish. However, keep in mind that more variables mean more data to transfer and requesting many variables at a time may be very slow. 132 | 133 | ### Filtering data 134 | You can also easily filter the results by specifying a criterion after the variable name. Let's get the same data as above for participants over the age of 90 and assign the results to variable `elderly`. 135 | ``` 136 | elderly <- participants("age > 90", "gender", "relationship_status") 137 | ``` 138 | 139 | ## Merging data 140 | The results from different tables can be combined. Let's get data for all myPersonality participants over the age of 90 who live in smallish communities with a population greater than 1000, but less than 10,000. Since we already have the variable `elderly` from the example above, we only need to request the necessary location data. 141 | ``` 142 | location <- address("current_location_city", "population > 1000", "population < 10,000") 143 | elderly.in.small.towns <- merge(elderly, location) 144 | ``` 145 | 146 | # Example usage: Advanced users 147 | Behind the scenes, all data access is done via the `myPersonalitySQL` function. It allows you to execute SQL queries on the database (only read-only queries are allowed). 148 | ``` 149 | elderly.in.Miami <- myPersonalitySQL(' 150 | SELECT demog.age, demog.gender, demog.relationship_status, address.current_location_city 151 | FROM demog 152 | LEFT JOIN address 153 | ON demog.userid = address.userid 154 | WHERE demog.age > 90 AND address.current_location_city = "Miami" 155 | ') 156 | ``` 157 | Please note that the query must be enclosed in *single* quotation marks. Also, the actual database table names may differ from the data access function names. To find out the database table names, run the corresponding data access function. The database table name is shown in the output, right before the list of variables. 158 | 159 | # Known ~~bugs~~ features 160 | * You may encounter "Error in fetch(key) : internal error -3 in R_decompress1" if trying to access documentation immediately after installing `myPersonality` package. This is a [known issue with R](http://stackoverflow.com/questions/10373098/error-in-fetchkey-internal-error-3-in-r-decompress1). Restarting R will resolve the issue. -------------------------------------------------------------------------------- /inst/tableDefinitions.R: -------------------------------------------------------------------------------- 1 | # This script creates functions to access data based on the tables in the database. 2 | # Please note that the table names must be matched to display names in _meta_tables table. 3 | 4 | all.tables <- myPersonalitySQL("SHOW TABLES")[,1] 5 | .meta.tables <- myPersonalitySQL("SELECT * FROM _meta_tables") 6 | 7 | for (this.table in all.tables) { 8 | if (substring(this.table, 1, 1) == "_") {next} # Skip meta tables 9 | 10 | # Create functions from database tables 11 | function.template <- "%s <- function(...) { mySQL.table.name <- '%s' 12 | return(myPersonality:::createVirtualTable(mySQL.table.name, ...)) 13 | }" 14 | 15 | sql <- sprintf('SELECT display_name FROM `_meta_tables` WHERE db_name = "%s"', this.table) 16 | 17 | display.name <- myPersonalitySQL(sql) 18 | if (nrow(display.name) == 0) { 19 | # There was no suitable display name in _meta_tables. 20 | next 21 | } else { 22 | display.name <- display.name[, 1] 23 | } 24 | 25 | eval(parse(text = sprintf(function.template, display.name, this.table))) 26 | } 27 | rm(all.tables, this.table, display.name, function.template, sql) -------------------------------------------------------------------------------- /man/explainTable.Rd: -------------------------------------------------------------------------------- 1 | \name{explainTable} 2 | \alias{explainTable} 3 | \title{Information about myPersonality database tables.} 4 | \usage{ 5 | explainTable(table.name) 6 | } 7 | \arguments{ 8 | \item{table.name}{A character string that specifies the 9 | table that you are interested in.} 10 | } 11 | \description{ 12 | This function retrieves metadata about tables from the 13 | Cambridge Psychometrics Centre's myPersonality database. 14 | However, it is easier to run the data access function 15 | without an argument to get the same results (e.g., 16 | \code{participants()}). 17 | } 18 | \examples{ 19 | \dontrun{ 20 | explainTable("address") 21 | } 22 | } 23 | \seealso{ 24 | \link{myPersonality}, \link{explainVariable} 25 | } 26 | \keyword{attribute} 27 | 28 | -------------------------------------------------------------------------------- /man/explainVariable.Rd: -------------------------------------------------------------------------------- 1 | \name{explainVariable} 2 | \alias{explainVariable} 3 | \title{Provide information about specific myPersonality variable.} 4 | \usage{ 5 | explainVariable(variable.name) 6 | } 7 | \arguments{ 8 | \item{variable.name}{A character string that specifies 9 | the variable of interest.} 10 | } 11 | \description{ 12 | This function describes the speficied variable by 13 | fetching additional metainformation about it from the 14 | myPersonality database. If a variable exists in multiple 15 | tables, the function asks the user to specify which 16 | table/variable pair to show. 17 | } 18 | \examples{ 19 | \dontrun{ 20 | explainVariable("gender") 21 | } 22 | # Multiple tables contain variable 'userid'. 23 | # Read instructions in the function output to select the right table. 24 | \dontrun{ 25 | explainVariable("userid") 26 | } 27 | } 28 | \seealso{ 29 | \link{findVariable} 30 | } 31 | \keyword{attribute} 32 | 33 | -------------------------------------------------------------------------------- /man/findVariable.Rd: -------------------------------------------------------------------------------- 1 | \name{findVariable} 2 | \alias{findVariable} 3 | \title{Locate variables of interest by a text search.} 4 | \usage{ 5 | findVariable(query) 6 | } 7 | \arguments{ 8 | \item{query}{A character string containing the text to be 9 | searched. It is advisable to use queries that are longer 10 | than 3 characters.} 11 | } 12 | \description{ 13 | This function attempts to locate variables whose name, 14 | description, or notes contain a specific sequence of 15 | characters. Please note that the search does not pay 16 | attention to word boundaries, so search query 'age' would 17 | also return 'marriage' as a result. 18 | } 19 | \examples{ 20 | \dontrun{ 21 | findVariable("sex") 22 | } 23 | } 24 | \seealso{ 25 | \link{explainVariable} 26 | } 27 | \keyword{attribute} 28 | 29 | -------------------------------------------------------------------------------- /man/myPersonality.Rd: -------------------------------------------------------------------------------- 1 | \name{myPersonality} 2 | \alias{myPersonality} 3 | \title{Set up a connection to myPersonality database server.} 4 | \usage{ 5 | myPersonality(user = NULL, password = NULL) 6 | } 7 | \arguments{ 8 | \item{user}{Your user name (as provided by the Cambridge 9 | Psychometrics Centre).} 10 | 11 | \item{password}{Your password (as provided by the 12 | Cambridge Psychometrics Centre)} 13 | } 14 | \value{ 15 | A printout of data access functions. 16 | } 17 | \description{ 18 | This function starts your session and sets up your 19 | connection to the Cambridge Psychometrics Centre's 20 | myPersonality database. Running this function and 21 | providing valid user name and password when prompted sets 22 | up other data access functions, such as 23 | \code{participants()}. The exact names and number of the 24 | data access functions depends on your access privileges. 25 | } 26 | \details{ 27 | Once the other data access functions have been set up, 28 | you can use them to retrieve and filter data (see 29 | examples below). For more instructions, please visit 30 | \url{https://github.com/vanderlowe/myPersonality/blob/master/README.md} 31 | and see \link{myPersonalityPackage}. 32 | } 33 | \note{ 34 | You must contact the Cambridge Psychometrics Centre to 35 | obtain access privileges. \bold{You cannot use this 36 | package without a valid user name and password.} If you 37 | do not have one, please visit 38 | \url{http://mypersonality.org/wiki/doku.php?id=database_use_guidelines} 39 | to register as a collaborator. 40 | } 41 | \examples{ 42 | # Establish database connection and generate data access functions. 43 | \dontrun{ 44 | myPersonality() 45 | } 46 | # Show information about participants, including available variables. 47 | \dontrun{ 48 | participants() 49 | } 50 | # Retrieve age and gender data for all participants. 51 | \dontrun{ 52 | participants("age", "gender") 53 | } 54 | # Retrieve age and gender data for participants older than 90 years. 55 | \dontrun{ 56 | participants("age > 90", "gender") 57 | } 58 | } 59 | \seealso{ 60 | \link{myPersonalityPackage}, \link{findVariable} 61 | } 62 | \keyword{manip} 63 | 64 | -------------------------------------------------------------------------------- /man/myPersonalityPackage.Rd: -------------------------------------------------------------------------------- 1 | \name{myPersonalityPackage} 2 | \alias{myPersonalityPackage} 3 | \title{Easy access to myPersonality dataset.} 4 | \description{ 5 | \code{myPersonality} package provides tools that make it 6 | easy to access data in the large dataset collected and 7 | maintained by the Cambridge Psychometrics Centre. 8 | } 9 | \section{Register as a collaborator}{ 10 | To gain access to the data, please request access 11 | privileges by registering as a collaborator. For 12 | information how to register, please visit 13 | \url{http://mypersonality.org/wiki/doku.php?id=database_use_guidelines}. 14 | } 15 | 16 | \section{Getting started}{ 17 | Once you have your user name and password, start by 18 | running \code{myPersonality()} function. You will be 19 | prompted for your user name and password. Once 20 | successfully connected, you will see a list of functions 21 | for accessing the data (e.g., \code{participants()}. 22 | These functions are dynamically created depending on your 23 | access rights. The data access functions are 24 | self-documenting: by running one without arguments (e.g., 25 | \code{participants()}), the function will print 26 | additional information about itself. 27 | } 28 | 29 | \section{Accessing data}{ 30 | Each data access function allows you to request data from 31 | the myPersonality database by listing variable names 32 | available through the function. For example, to retrieve 33 | age and gender data of all participants in the database, 34 | you can type \code{participants("age", "gender")}. To 35 | find out which variables are availabe through each 36 | function, you can type the function without any 37 | arguments. 38 | 39 | For more information, please visit 40 | \url{https://github.com/vanderlowe/myPersonality/blob/master/README.md}. 41 | } 42 | \references{ 43 | Kosinski, M., Stillwell D. J., & Graepel, T. (2013). 44 | Private traits and attributes are predictable from 45 | digital records of human behavior. Proceedings of the 46 | National Academy of Sciences, 110(15), 5802-5805. 47 | } 48 | \seealso{ 49 | \link{myPersonality}, \link{findVariable}, 50 | \link{explainVariable} 51 | } 52 | 53 | -------------------------------------------------------------------------------- /man/myPersonalitySQL.Rd: -------------------------------------------------------------------------------- 1 | \name{myPersonalitySQL} 2 | \alias{myPersonalitySQL} 3 | \title{Execute SQL query on myPersonality database server.} 4 | \usage{ 5 | myPersonalitySQL(query = "SHOW TABLES;", user = NULL, 6 | password = NULL) 7 | } 8 | \arguments{ 9 | \item{query}{SQL query string to be executed. Defaults to 10 | "SHOW TABLES;"} 11 | } 12 | \value{ 13 | A \code{data.table} object. 14 | } 15 | \description{ 16 | This function executes SQL queries on the Cambridge 17 | Psychometrics Centre database. It is meant for advanced 18 | users only. By default, users only have privileges for 19 | queries using SELECT statements. 20 | } 21 | \note{ 22 | The query must be enclosed in \bold{single} quotation 23 | marks. 24 | } 25 | \examples{ 26 | \dontrun{ 27 | myPersonalitySQL("SELECT * FROM demog") 28 | } 29 | } 30 | \keyword{manip} 31 | 32 | -------------------------------------------------------------------------------- /man/print.table.help.Rd: -------------------------------------------------------------------------------- 1 | \name{print.table.help} 2 | \alias{print.table.help} 3 | \title{Prints myPersonality table information.} 4 | \usage{ 5 | \method{print}{table.help} (x) 6 | } 7 | \arguments{ 8 | \item{x}{An object returned by explainTable} 9 | } 10 | \description{ 11 | This function extends generic print function. 12 | } 13 | \examples{ 14 | \dontrun{explainTable("address")} 15 | } 16 | \keyword{attribute} 17 | 18 | -------------------------------------------------------------------------------- /man/print.variable.help.Rd: -------------------------------------------------------------------------------- 1 | \name{print.variable.help} 2 | \alias{print.variable.help} 3 | \title{Prints myPersonality variable information.} 4 | \usage{ 5 | \method{print}{variable.help} (x) 6 | } 7 | \arguments{ 8 | \item{x}{An object returned by either findVariable or 9 | explainVariable functions} 10 | } 11 | \description{ 12 | This function extends generic print function. 13 | } 14 | \examples{ 15 | # explainVariable("gender") 16 | # explainVariable("userid") # Multiple tables contain variable 'userid' 17 | } 18 | \keyword{attribute} 19 | 20 | -------------------------------------------------------------------------------- /tests/virtualTable_tests.R: -------------------------------------------------------------------------------- 1 | require(testthat) 2 | 3 | test_that("extractColumn extracts the right information", { 4 | # Normal cases 5 | expect_that(extractColumn("foo"), equals("foo")) 6 | expect_that(extractColumn("foo > 1"), equals("foo")) 7 | expect_that(extractColumn("foo<1"), equals("foo")) 8 | expect_that(extractColumn("foo = 1"), equals("foo")) 9 | expect_that(extractColumn("foo = 'bar'"), equals("foo")) 10 | expect_that(extractColumn("foo IS NOT 'bar'"), equals("foo")) 11 | 12 | # Abnormal cases 13 | expect_error(extractColumn(c("foo","bar"))) # Because the function is not vectorized (yet). 14 | expect_error(extractColumn(1)) # Can only process strings 15 | expect_error(extractColumn()) # Needs input 16 | }) 17 | 18 | test_that("isWHERE", { 19 | # Normal cases 20 | expect_false(isWHERE("bar")) 21 | expect_true(isWHERE("bar > 1")) 22 | 23 | # Abnormal cases 24 | expect_error(isWHERE()) 25 | expect_error(isWHERE(1)) 26 | }) 27 | 28 | test_that("function arguments get parsed", { 29 | expect_equal(getFunctionArguments(), NULL) 30 | expect_equal(getFunctionArguments("foo"), "foo") 31 | expect_equal(getFunctionArguments(arg1 = "foo"), "foo") 32 | expect_equal(getFunctionArguments("foo", "bar > 1", foobar = T), c("foo", "bar > 1", "TRUE")) 33 | }) 34 | 35 | test_that("column names and WHERE statements get parsed from arguments", { 36 | expect_equal(processFunctionArguments(), NULL) 37 | expect_equal(processFunctionArguments("foo"), list(columns = "foo")) 38 | expect_equal(processFunctionArguments("foo > 1"), list(columns = "foo", where = "foo > 1")) 39 | expect_equal(processFunctionArguments("foo > 1", "bar"), list(columns = c("foo", "bar"), where = "foo > 1")) 40 | }) 41 | 42 | test_that("participants virtual table can be created", { 43 | # To test this, you must have a local MySQL version of myPersonality_dev database 44 | localConfig() 45 | 46 | # Normal cases 47 | expect_output(participants("age"), "SELECT demog.userid,demog.age FROM demog") 48 | expect_output(participants("age > 18"), "SELECT demog.userid,demog.age FROM demog WHERE demog.age > 18") 49 | 50 | # Abnormal cases 51 | expect_that(participants(), throws_error()) 52 | expect_that(participants("foo"), throws_error()) # Cannot request non-existent variables 53 | 54 | }) 55 | 56 | --------------------------------------------------------------------------------