├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R ├── api.R ├── stack_answers.R ├── stack_badges.R ├── stack_comments.R ├── stack_info.R ├── stack_posts.R ├── stack_privileges.R ├── stack_questions.R ├── stack_revisions.R ├── stack_search.R ├── stack_sites.R ├── stack_suggested_edits.R ├── stack_tags.R └── stack_users.R ├── README.md ├── man-roxygen ├── api_options.R ├── type_answer.R └── type_question.R ├── man ├── combine_url.Rd ├── stack_GET.Rd ├── stack_answers.Rd ├── stack_badges.Rd ├── stack_comments.Rd ├── stack_info.Rd ├── stack_parse.Rd ├── stack_posts.Rd ├── stack_privileges.Rd ├── stack_questions.Rd ├── stack_revisions.Rd ├── stack_search.Rd ├── stack_sites.Rd ├── stack_suggested_edits.Rd ├── stack_tags.Rd └── stack_users.Rd ├── stackr.Rproj └── vignettes ├── example.Rmd └── introduction.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | man-roxygen 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | inst/doc 5 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: stackr 2 | Title: Client for the Stack Exchange API 3 | Version: 0.0.0.9000 4 | Authors@R: person("David", "Robinson", email = "admiral.david@gmail.com", role = c("aut", "cre")) 5 | Description: This is a simple client for the read-only features of the Stack Exchange API. 6 | Depends: R (>= 3.1.1) 7 | Imports: 8 | httr, 9 | dplyr (>= 0.4.1), 10 | jsonlite 11 | Suggests: 12 | knitr, 13 | lubridate, 14 | wordcloud, 15 | ggplot2 16 | License: MIT + file LICENSE 17 | LazyData: true 18 | Authors: David Robinson [aut, cre] 19 | URL: http://github.com/dgrtwo/stackr 20 | VignetteBuilder: knitr 21 | BugReports: http://github.com/dgrtwo/stackr/issues 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2015 2 | COPYRIGHT HOLDER: David Robinson 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2 (4.1.1): do not edit by hand 2 | 3 | export(stack_answers) 4 | export(stack_badges) 5 | export(stack_comments) 6 | export(stack_info) 7 | export(stack_posts) 8 | export(stack_privileges) 9 | export(stack_questions) 10 | export(stack_revisions) 11 | export(stack_search) 12 | export(stack_sites) 13 | export(stack_suggested_edits) 14 | export(stack_tags) 15 | export(stack_users) 16 | -------------------------------------------------------------------------------- /R/api.R: -------------------------------------------------------------------------------- 1 | # API utilities 2 | 3 | #' Parse the results of a Stack Exchange API query into a data.frame. 4 | #' 5 | #' The additional metadata, such as "has_more", "quota_max", and 6 | #' "quota_remaining" is included in a list as the attribute "metadata". 7 | #' 8 | #' @param req a request from httr::GET 9 | #' 10 | stack_parse <- function(req) { 11 | text <- httr::content(req, as = "text") 12 | 13 | if (identical(text, "")) stop("No output to parse", call. = FALSE) 14 | 15 | j <- jsonlite::fromJSON(text) 16 | if (!is.null(j$error_id)) { 17 | stop(paste0("Error ", j$error_id, ": ", j$error_message)) 18 | } 19 | items <- j$items 20 | 21 | if (length(items) == 0 || nrow(items) == 0) { 22 | return(NULL) 23 | } 24 | 25 | # fix tags to be comma-separated 26 | if (!is.null(items$tags)) { 27 | items$tags <- sapply(items$tags, paste, collapse = ",") 28 | } 29 | # "shallow user" ends up being a data.frame. Turn it into separate 30 | # columns 31 | if (any(sapply(items, is.data.frame))) { 32 | items <- jsonlite::flatten(items) 33 | } 34 | # replace dots, as in owner.user_id, with _ 35 | colnames(items) <- gsub("\\.", "_", colnames(items)) 36 | # convert all dates, which fortunately always end in _date 37 | for (col in colnames(items)) { 38 | if (grepl("_date$", col)) { 39 | items[[col]] <- as.POSIXct(items[[col]], origin = "1970-01-01") 40 | } 41 | } 42 | 43 | # add metadata as an attribute 44 | attr(items, "metadata") <- j[-1] 45 | 46 | if (!is.null(j[-1]$backoff)) { 47 | message("Response has backoff parameter: must wait ", 48 | j[-1]$backoff, " seconds before performing same method") 49 | } 50 | 51 | items 52 | } 53 | 54 | 55 | #' Make a GET request to the Stack Exchange API 56 | #' 57 | #' @param path the query path, such as "answers/" or "users/{id}" 58 | #' @param site site to query; by default Stack Overflow 59 | #' @param page which page to start from 60 | #' @param num_pages number of consecutive pages to query; by default 1 61 | #' @param ... additional parameters to the method 62 | stack_GET <- function(path, site = "stackoverflow", page = 1, num_pages = 1, ...) { 63 | # auth <- github_auth(pat) 64 | base_path <- "https://api.stackexchange.com/2.2/" 65 | query <- list(site = site, page = page, ...) 66 | 67 | stack_key <- Sys.getenv("STACK_EXCHANGE_KEY") 68 | if (stack_key != "") { 69 | query$key <- stack_key 70 | } 71 | 72 | tbls <- NULL 73 | tbl <- NULL 74 | while (num_pages > 0) { 75 | req <- httr::GET(base_path, path = path, query = query) 76 | 77 | tbl <- stack_parse(req) 78 | tbls <- c(tbls, list(tbl)) 79 | 80 | metadata <- attr(tbl, "metadata") 81 | 82 | if (!is.null(metadata$backoff)) { 83 | Sys.sleep(metadata$backoff) 84 | } 85 | 86 | if (!metadata$has_more) { 87 | # finished pagination, can quit 88 | break 89 | } 90 | 91 | # set up for next iteration 92 | query$page <- query$page + 1 93 | num_pages <- num_pages - 1 94 | } 95 | 96 | # combine them all 97 | ret <- as.data.frame(dplyr::bind_rows(tbls)) 98 | attr(ret, "metadata") <- attr(tbl, "metadata") 99 | ret 100 | } 101 | 102 | 103 | #' construct a query URL for a request, including checking special 104 | #' operations 105 | #' 106 | #' @param base base of query, such as "answers" or "questions" 107 | #' @param id vector of IDs to search 108 | #' @param special special parameter, which specifies the action (such as 109 | #' retrieving an associated object with an ID) 110 | #' @param special_ids vector of possible special parameters that require IDs 111 | #' @param special_no_ids vector of possible special parameters that don't 112 | #' require IDs 113 | combine_url <- function(base, id, special = NULL, special_ids = c(), 114 | special_no_ids = c()) { 115 | url <- paste0(base, "/") 116 | 117 | if (!is.null(id)) { 118 | url <- paste0(url, paste(id, collapse = ";"), "/") 119 | } 120 | 121 | if (!is.null(special)) { 122 | special <- match.arg(special, c(special_ids, special_no_ids)) 123 | 124 | if (is.null(id)) { 125 | if (!(special %in% special_no_ids)) { 126 | stop(paste(special, "requires one or more IDs")) 127 | } 128 | } else { 129 | if (!(special %in% special_ids)) { 130 | stop(paste(special, "does not accept IDs")) 131 | } 132 | } 133 | url <- paste0(url, special) 134 | } 135 | 136 | url 137 | } 138 | -------------------------------------------------------------------------------- /R/stack_answers.R: -------------------------------------------------------------------------------- 1 | #' Query answers from the Stack Exchange API 2 | #' 3 | #' Query for a list of answers, or information related to a one or more 4 | #' specific answers. 5 | #' 6 | #' @param id A vector containing one or more answer IDs, or none to query 7 | #' all answers 8 | #' @param special If \code{"comments"}, return the comments on the answers 9 | #' rather than the answers themselves 10 | #' @template api_options 11 | #' 12 | #' @return A \code{data.frame} of answers. 13 | #' 14 | #' @template type_answer 15 | #' 16 | #' @export 17 | stack_answers <- function(id = NULL, special = NULL, ...) { 18 | url <- combine_url("answers", id, special, c("comments", "questions")) 19 | stack_GET(url, ...) 20 | } 21 | -------------------------------------------------------------------------------- /R/stack_badges.R: -------------------------------------------------------------------------------- 1 | #' Query badges from the Stack Exchange API 2 | #' 3 | #' Query for badges from a particular site 4 | #' 5 | #' @param id A vector containing one or more IDs of badges, or none to 6 | #' retrieve all badges in alphabetical order 7 | #' @param special One of \code{c("name", "tags", "recipients")}, to 8 | #' retrive only non-tag badges, only tag badges, or to retrieve recipients 9 | #' of the given badges 10 | #' @template api_options 11 | #' 12 | #' @return A \code{data.frame} of badges (TODO) 13 | #' 14 | #' @export 15 | stack_badges <- function(id = NULL, special = NULL, ...) { 16 | # note that "recipients" can be used with or without IDs 17 | special_ids <- c("recipients") 18 | special_no_ids <- c("name", "tags", "recipients") 19 | 20 | url <- combine_url("badges", id, special, special_ids, special_no_ids) 21 | stack_GET(url, ...) 22 | } 23 | -------------------------------------------------------------------------------- /R/stack_comments.R: -------------------------------------------------------------------------------- 1 | #' Query comments from the Stack Exchange API 2 | #' 3 | #' Query for comments, either based on IDs or on other filters. 4 | #' 5 | #' @param id A vector containing one or more IDs, or none to query 6 | #' all comments 7 | #' @template api_options 8 | #' 9 | #' @return A \code{data.frame} of questions or answers (TODO) 10 | #' 11 | #' @export 12 | stack_comments <- function(id = NULL, ...) { 13 | url <- combine_url("comments", id) 14 | stack_GET(url, ...) 15 | } 16 | -------------------------------------------------------------------------------- /R/stack_info.R: -------------------------------------------------------------------------------- 1 | #' Query general info for a Stack Exchange site 2 | #' 3 | #' Query for general information from a site, such as the number of users, 4 | #' answers, and questions, and statistics for activity per minute 5 | #' 6 | #' @param site Stack Exchange site to query (default Stack Overflow) 7 | #' @param ... Additional API arguments (not used) 8 | #' 9 | #' @return A one-row \code{data.frame} containing statistics 10 | #' about the site. 11 | #' 12 | #' @export 13 | stack_info <- function(site = "stackoverflow", ...) { 14 | stack_GET("info", site = site, ...) 15 | } 16 | -------------------------------------------------------------------------------- /R/stack_posts.R: -------------------------------------------------------------------------------- 1 | #' Query posts from the Stack Exchange API 2 | #' 3 | #' Query for posts, either questions or answers, based on IDs. 4 | #' 5 | #' @param id A vector containing one or more IDs, or none to query 6 | #' all questions and answers 7 | #' @param special One of \code{c("comments", "revisions", "suggested-edits")}, to 8 | #' return that information associated with the specified answers rather than the 9 | #' answers themselves 10 | #' @template api_options 11 | #' 12 | #' @return A \code{data.frame} of questions or answers (TODO) 13 | #' 14 | #' @export 15 | stack_posts <- function(id = NULL, special = NULL, ...) { 16 | special_ids <- c("comments", "revisions", "suggested-edits") 17 | url <- combine_url("posts", id, special, special_ids) 18 | stack_GET(url, ...) 19 | } 20 | -------------------------------------------------------------------------------- /R/stack_privileges.R: -------------------------------------------------------------------------------- 1 | #' Query privileges for a Stack Exchange site 2 | #' 3 | #' Query for reputation-based privileges from a Stack Exchange site. 4 | #' 5 | #' @param site Stack Exchange site to query (default Stack Overflow) 6 | #' @param ... Additional API arguments (not used) 7 | #' 8 | #' @return A one-row \code{data.frame} containing statistics 9 | #' about the site. 10 | #' 11 | #' @export 12 | stack_privileges <- function(site = "stackoverflow", ...) { 13 | stack_GET("privileges", site = site, ...) 14 | } 15 | -------------------------------------------------------------------------------- /R/stack_questions.R: -------------------------------------------------------------------------------- 1 | #' Query questions from the Stack Exchange API 2 | #' 3 | #' Query for a list of questions, or information related to a one or more 4 | #' specific questions. 5 | #' 6 | #' @param id A vector containing one or more answer IDs 7 | #' @param special One of \code{c("answers", "comments", "linked", "related", 8 | #' "timeline")}, describing what information to retrieve about specific questions, or 9 | #' one of \code{c("featured", "no-answers", "unanswered")}, describing a filter 10 | #' to place on returned quesitons. 11 | #' @template api_options 12 | #' 13 | #' @return A \code{data.frame} of questions. 14 | #' 15 | #' @template type_question 16 | #' 17 | #' @export 18 | stack_questions <- function(id = NULL, special = NULL, ...) { 19 | special_ids <- c("answers", "comments", "linked", "related", "timeline") 20 | special_no_ids <- c("featured", "no-answers", "unanswered") 21 | 22 | url <- combine_url("questions", id, special, special_ids, special_no_ids) 23 | 24 | stack_GET(url, ...) 25 | } 26 | -------------------------------------------------------------------------------- /R/stack_revisions.R: -------------------------------------------------------------------------------- 1 | #' Query revisions from the Stack Exchange API 2 | #' 3 | #' Query for revisions (edits), based on particular IDs 4 | #' 5 | #' @param id A vector containing one or more IDs of revisions 6 | #' @template api_options 7 | #' 8 | #' @return A \code{data.frame} of revisions (TODO) 9 | #' 10 | #' @details Note that unlike IDs of other types in the API, revision IDs are 11 | #' strings. 12 | #' 13 | #' @export 14 | stack_revisions <- function(id, ...) { 15 | if (missing(id) || is.null(id)) { 16 | stop("stack_revisions requires one or more IDs") 17 | } 18 | url <- combine_url("revisions", id) 19 | stack_GET(url, ...) 20 | } 21 | -------------------------------------------------------------------------------- /R/stack_search.R: -------------------------------------------------------------------------------- 1 | #' Search a Stack Exchange site via the API 2 | #' 3 | #' Access the Stack Exchange search functionality. 4 | #' 5 | #' @param intitle string to use for searching title 6 | #' @param tagged vector of tags that must be included in questions 7 | #' @param nottagged vector of tags to be excluded from questions 8 | #' @param q free form text parameter that matches questions based on Stack 9 | #' Exchange's (undocumented) algorithm 10 | #' @param accepted boolean to filter for accepted or unaccepted questions 11 | #' @param answers minimum number of answers 12 | #' @param body text that must appear in the body of questions 13 | #' @param closed boolean to filter for open or closed questions 14 | #' @param migrated boolean to filter for questions that were, or were not, 15 | #' migrated to another site 16 | #' @param notice boolean to filter for questions with a post notice 17 | #' @param title text which must appear in a title; redundant with intitle above 18 | #' @param user the id of the user to filter for 19 | #' @param url url that must be included in the question 20 | #' @param views the minimum number of views a question can have 21 | #' @param wiki boolean to filter for questions that are (or are not) community 22 | #' wiki 23 | #' @template api_options 24 | #' 25 | #' @return A \code{data.frame} of questions. 26 | #' 27 | #' @template type_question 28 | #' 29 | #' @details The values that can be used for \code{sort} are: 30 | #' \describe{ 31 | #' \item{activity}{Last activity date} 32 | #' \item{creation}{Creation date} 33 | #' \item{votes}{Score} 34 | #' \item{relevance}{Relevance tab on site (does not allow min or max)} 35 | #' } 36 | #' 37 | #' @export 38 | stack_search <- function(intitle, tagged, nottagged, q, accepted, 39 | answers, body, closed, migrated, 40 | notice, title, user, url, views, wiki, ...) { 41 | # pass arguments on to stack_GET 42 | args <- as.list(match.call())[-1] 43 | 44 | if (is.null(args$tagged) && is.null(args$intitle)) { 45 | stop("Either tagged or intitle must be set in searches") 46 | } 47 | 48 | url <- "search/" 49 | extra_args <- c("q", "accepted", "answers", "body", "closed", "migrated", 50 | "notice", "title", "user", "url", "views", "wiki") 51 | if (any(names(args) %in% extra_args)) { 52 | url <- paste0(url, "advanced/") 53 | 54 | # replace intitle with title 55 | if (!is.null(args$intitle)) { 56 | if (!is.null(args$title)) { 57 | stop("Cannot provide both title and intitle") 58 | } 59 | args$title <- args$intitle 60 | } 61 | } 62 | do.call(stack_GET, c(list(url), args)) 63 | } 64 | -------------------------------------------------------------------------------- /R/stack_sites.R: -------------------------------------------------------------------------------- 1 | #' Query sites from the Stack Exchange API 2 | #' 3 | #' Query for a list of sites in the network 4 | #' 5 | #' @param ... Extra options are \code{page} and \code{pagesize} 6 | #' 7 | #' @return A \code{data.frame} of sites 8 | #' 9 | #' @export 10 | stack_sites <- function(...) { 11 | stack_GET("sites/", site = NULL, ...) 12 | } 13 | -------------------------------------------------------------------------------- /R/stack_suggested_edits.R: -------------------------------------------------------------------------------- 1 | #' Query suggested edits from the Stack Exchange API 2 | #' 3 | #' Query for suggested edits, either based on IDs or on other filters. 4 | #' 5 | #' @param id A vector containing one or more IDs, or none to query 6 | #' all suggested edits 7 | #' @template api_options 8 | #' 9 | #' @return A \code{data.frame} of questions or answers (TODO) 10 | #' 11 | #' @details The options for the "sort" field are \code{c("creation", 12 | #' "approval", "rejection")}, with \code{"creation"} the default. 13 | #' 14 | #' @export 15 | stack_suggested_edits <- function(id = NULL, ...) { 16 | url <- combine_url("suggested-edits", id) 17 | stack_GET(url, ...) 18 | } 19 | -------------------------------------------------------------------------------- /R/stack_tags.R: -------------------------------------------------------------------------------- 1 | #' Query tags from the Stack Exchange API 2 | #' 3 | #' Query for tags, either based on names or on other filters. 4 | #' 5 | #' @param name A vector containing one or more names of tags, or none to query 6 | #' from all tags 7 | #' @param special One of \code{c("faq", "related", "synonyms", "wikis")}, to 8 | #' extract information about a particular tag, or one of 9 | #' \code{c("moderator-only", "required", "synonyms")}, to request a list of a 10 | #' particular kind of tags, or all synonyms on the site. 11 | #' @template api_options 12 | #' 13 | #' @return A \code{data.frame} of questions or answers (TODO) 14 | #' 15 | #' @details The options for the "sort" field are \code{c("creation", 16 | #' "approval", "rejection")}, with \code{"creation"} the default. 17 | #' 18 | #' @export 19 | stack_tags <- function(name = NULL, special = NULL, ...) { 20 | if (!is.null(name) && is.null(special)) { 21 | # tags has a different naming convention, where "info" extracts from tags 22 | special <- "info" 23 | } 24 | special_ids <- c("faq", "related", "synonyms", "wikis", "info") 25 | special_no_ids <- c("moderator-only", "required", "synonyms") 26 | 27 | # TODO: top answerers/askers in a tag 28 | 29 | url <- combine_url("tags", name, special, special_ids, special_no_ids) 30 | stack_GET(url, ...) 31 | } 32 | -------------------------------------------------------------------------------- /R/stack_users.R: -------------------------------------------------------------------------------- 1 | #' Query information about users from the Stack Exchange API 2 | #' 3 | #' Query for a list of users, or information related to one or more 4 | #' specific users. 5 | #' 6 | #' @param id A vector containing one or more user IDs 7 | #' @param special One of many options to describe what information to 8 | #' extract from each user 9 | #' @template api_options 10 | #' 11 | #' @return A \code{data.frame} containing each returned user 12 | #' 13 | #' @return A \code{data.frame} of users 14 | #' 15 | #' @export 16 | stack_users <- function(id = NULL, special = NULL, ...) { 17 | special_id<- c("top-answer-tags", "top-question-tags", "top-tags", 18 | "privileges", "notifications") 19 | special_ids <- c("answers", "badges", "comments", "favorites", "mentioned", 20 | "network-activity", "posts", "questions", "reputation", 21 | "reputation-history", "suggested-edits", "tags", 22 | special_id) 23 | 24 | special_no_ids <- c("moderators") 25 | 26 | if ((!is.null(special) && (special %in% special_id)) && length(id) > 1) { 27 | stop(paste(special, "can be used only with a single ID")) 28 | } 29 | 30 | url <- combine_url("users", id, special, special_ids, special_no_ids) 31 | 32 | stack_GET(url, ...) 33 | } 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | stackr: an R package for connecting to the Stack Exchange API 2 | ---------------------------- 3 | 4 | This R package serves as an unofficial wrapper for the read-only features of the [Stack Exchange API](https://api.stackexchange.com/) with the ability to download information on questions, answers, users, tags, and other aspects of the site so that they can be analyzed in R. It is *not* affiliated with Stack Exchange. 5 | 6 | The [documentation](https://api.stackexchange.com/docs/) of the Stack Exchange API is worth reviewing, as the package is built to resemble that interface while remaining true to R's style and syntax. 7 | 8 | ## Installation 9 | 10 | You can install the package with [devtools](https://github.com/hadley/devtools) as such: 11 | 12 | ```{r} 13 | # install.packages("devtools") 14 | devtools::install_github("dgrtwo/stackr") 15 | 16 | # if you want to access the vignettes from within the package: 17 | devtools::install_github("dgrtwo/stackr", build_vignettes = TRUE) 18 | browseVignettes("stackr") 19 | ``` 20 | 21 | ## Basics 22 | 23 | Methods for querying objects from the APIare implemented in functions of the form `stack_[object]`. Each of these functions returns a data frame, with one row per object. 24 | 25 | For example, one could query recent questions with: 26 | 27 | 28 | ```r 29 | q <- stack_questions() 30 | ``` 31 | 32 | And recent answers with: 33 | 34 | ```r 35 | a <- stack_answers() 36 | ``` 37 | 38 | Almost all of these functions can take as their first argument one or more IDs. For example, one could query a specific question: 39 | 40 | ```r 41 | stack_questions(11227809) 42 | ``` 43 | 44 | Or one could query multiple answers using a vector: 45 | 46 | ```r 47 | stack_answers(c(179147, 2219560, 180085)) 48 | ``` 49 | 50 | Other results you can query include users: 51 | 52 | ```r 53 | stack_users(712603) 54 | ``` 55 | 56 | Or tags, which are queried by name instead of id: 57 | 58 | 59 | ```r 60 | stack_tags(c("r", "ggplot2", "dplyr")) 61 | ``` 62 | 63 | ## Returned values 64 | 65 | Each of these functions returns a `data.frame`. The columns that are included depend on the object being returned, with documentation available on the Stack Exchange API site: 66 | 67 | * [answer](https://api.stackexchange.com/docs/types/answer) 68 | * [badge](https://api.stackexchange.com/docs/types/badge) 69 | * [comment](https://api.stackexchange.com/docs/types/comment) 70 | * [info](https://api.stackexchange.com/docs/types/info) 71 | * [post](https://api.stackexchange.com/docs/types/post) 72 | * [privilege](https://api.stackexchange.com/docs/types/privilege) 73 | * [question](https://api.stackexchange.com/docs/types/question) 74 | * [revision](https://api.stackexchange.com/docs/types/revision) 75 | * [suggested-edit](https://api.stackexchange.com/docs/types/suggested-edit) 76 | * [tags](https://api.stackexchange.com/docs/types/tags) 77 | * [user](https://api.stackexchange.com/docs/types/user) 78 | 79 | ## Special queries 80 | 81 | A function like `stack_questions` does not *necessarily* return questions. By providing a second argument to the query, one can extract objects that are related to that object. For example, one could extract all the answers to a particular question with: 82 | 83 | ```r 84 | answers <- stack_questions(11227809, "answers") 85 | ``` 86 | 87 | Similarly, one could extract the comments, linked questions, or related questions with: 88 | 89 | 90 | ```r 91 | comments <- stack_questions(11227809, "comments") 92 | linked <- stack_questions(11227809, "linked") 93 | related <- stack_questions(11227809, "related") 94 | ``` 95 | 96 | There are many other combinations: one could extract a user's comments: 97 | 98 | 99 | ```r 100 | my_comments <- stack_users(712603, "comments") 101 | ``` 102 | 103 | The combinations of methods and actions is best explained in the [documentation](https://api.stackexchange.com/docs/). 104 | 105 | ## Pagination 106 | 107 | You can set the `pagesize` argument to any method to determine the number of objects to be returned. However, the maximum value of this is 100, which means multiple requests must be made to download a list larger than 100. 108 | 109 | `stackr` handles this pagination with the `num_pages` argument, which all methods accept. This gives a maximum number of pages (and therefore requests) that will be iterated through, combining them together at the end. 110 | 111 | ## API Key 112 | 113 | It's a good idea to set up a registered API key with Stack Exchange, since it increases your daily quota of queries from 300 to 10,000. You can [register an app here](http://stackapps.com/apps/oauth/register). Once you have your Stack Exchange application key, set up an environment variable, by adding the following line to your `.Rprofile`: 114 | 115 | 116 | ```r 117 | Sys.setenv(STACK_EXCHANGE_KEY = "YOUR_KEY_HERE") 118 | ``` 119 | 120 | After that, queries made from your system will use your key. 121 | 122 | Future plans 123 | ------------------- 124 | 125 | Currently, no methods requiring authentication are implemented. OAuth 2.0 could be implemented through the same httr framework ([see here](http://cran.r-project.org/web/packages/httr/vignettes/api-packages.html)), but my current judgment is that R is likely to be used for data analysis operations rather than actual front-ends for Stack Exchange, which negates the need for most authentication-based operations. 126 | 127 | So far, no network methods (such as "/sites", or "/apps") have yet been implemented; only per-site methods. 128 | 129 | Bug reports are very welcome [here](http://github.com/dgrtwo/stackr/issues). 130 | -------------------------------------------------------------------------------- /man-roxygen/api_options.R: -------------------------------------------------------------------------------- 1 | #' @param ... Additional arguments to API (see below) 2 | #' 3 | #' @details API querying methods allow the following additional options: 4 | #' \describe{ 5 | #' \item{site}{ID of Stack Exchange site (by default, Stack Overflow)} 6 | #' \item{sort}{field to sort by} 7 | #' \item{order}{whether the \code{sort} field should be arranged in descending 8 | #' ("desc") or ascending ("asc") order} 9 | #' \item{min}{Minimum value of the \code{sort} field} 10 | #' \item{max}{Maximum value of the \code{sort} field} 11 | #' \item{fromdate}{Starting date} 12 | #' \item{todate}{Ending date} 13 | #' \item{page}{Which page to start from} 14 | #' \item{pagesize}{Size of each page to extract (max 100)} 15 | #' \item{num_pages}{Number of pages to extract} 16 | #' } 17 | -------------------------------------------------------------------------------- /man-roxygen/type_answer.R: -------------------------------------------------------------------------------- 1 | #' @return An answer object is documented in full here: 2 | #' 3 | #' https://api.stackexchange.com/docs/types/answer 4 | #' 5 | #' It comes with the following columns: 6 | #' 7 | #' \itemize{ 8 | #' \item{answer_id} 9 | #' \item{community_owned_date} 10 | #' \item{creation_date} 11 | #' \item{is_accepted} 12 | #' \item{last_activity_date} 13 | #' \item{last_edit_date} 14 | #' \item{locked_date} 15 | #' \item{owner} 16 | #' \item{protected_date} 17 | #' \item{question_id} 18 | #' \item{score} 19 | #' } 20 | -------------------------------------------------------------------------------- /man-roxygen/type_question.R: -------------------------------------------------------------------------------- 1 | #' @return A question object is documented in full here: 2 | #' 3 | #' https://api.stackexchange.com/docs/types/question 4 | #' 5 | #' It comes with the following columns: 6 | #' 7 | #' \itemize{ 8 | #' \item{accepted_answer_id} 9 | #' \item{answer_count} 10 | #' \item{bounty_amount} 11 | #' \item{bounty_closes_date} 12 | #' \item{closed_date} 13 | #' \item{closed_reason} 14 | #' \item{community_owned_date} 15 | #' \item{creation_date} 16 | #' \item{is_answered} 17 | #' \item{last_activity_date} 18 | #' \item{last_edit_date} 19 | #' \item{link} 20 | #' \item{locked_date} 21 | #' \item{migrated_from} 22 | #' \item{migrated_to} 23 | #' \item{owner} 24 | #' \item{protected_date} 25 | #' \item{question_id} 26 | #' \item{score} 27 | #' \item{tags} 28 | #' \item{title} 29 | #' \item{view_count} 30 | #' } 31 | -------------------------------------------------------------------------------- /man/combine_url.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{combine_url} 4 | \alias{combine_url} 5 | \title{construct a query URL for a request, including checking special 6 | operations} 7 | \usage{ 8 | combine_url(base, id, special = NULL, special_ids = c(), 9 | special_no_ids = c()) 10 | } 11 | \arguments{ 12 | \item{base}{base of query, such as "answers" or "questions"} 13 | 14 | \item{id}{vector of IDs to search} 15 | 16 | \item{special}{special parameter, which specifies the action (such as 17 | retrieving an associated object with an ID)} 18 | 19 | \item{special_ids}{vector of possible special parameters that require IDs} 20 | 21 | \item{special_no_ids}{vector of possible special parameters that don't 22 | require IDs} 23 | } 24 | \description{ 25 | construct a query URL for a request, including checking special 26 | operations 27 | } 28 | 29 | -------------------------------------------------------------------------------- /man/stack_GET.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{stack_GET} 4 | \alias{stack_GET} 5 | \title{Make a GET request to the Stack Exchange API} 6 | \usage{ 7 | stack_GET(path, site = "stackoverflow", page = 1, num_pages = 1, ...) 8 | } 9 | \arguments{ 10 | \item{path}{the query path, such as "answers/" or "users/{id}"} 11 | 12 | \item{site}{site to query; by default Stack Overflow} 13 | 14 | \item{page}{which page to start from} 15 | 16 | \item{num_pages}{number of consecutive pages to query; by default 1} 17 | 18 | \item{...}{additional parameters to the method} 19 | } 20 | \description{ 21 | Make a GET request to the Stack Exchange API 22 | } 23 | 24 | -------------------------------------------------------------------------------- /man/stack_answers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/stack_answers.R 3 | \name{stack_answers} 4 | \alias{stack_answers} 5 | \title{Query answers from the Stack Exchange API} 6 | \usage{ 7 | stack_answers(id = NULL, special = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{id}{A vector containing one or more answer IDs, or none to query 11 | all answers} 12 | 13 | \item{special}{If \code{"comments"}, return the comments on the answers 14 | rather than the answers themselves} 15 | 16 | \item{...}{Additional arguments to API (see below)} 17 | } 18 | \value{ 19 | A \code{data.frame} of answers. 20 | 21 | An answer object is documented in full here: 22 | 23 | https://api.stackexchange.com/docs/types/answer 24 | 25 | It comes with the following columns: 26 | 27 | \itemize{ 28 | \item{answer_id} 29 | \item{community_owned_date} 30 | \item{creation_date} 31 | \item{is_accepted} 32 | \item{last_activity_date} 33 | \item{last_edit_date} 34 | \item{locked_date} 35 | \item{owner} 36 | \item{protected_date} 37 | \item{question_id} 38 | \item{score} 39 | } 40 | } 41 | \description{ 42 | Query for a list of answers, or information related to a one or more 43 | specific answers. 44 | } 45 | \details{ 46 | API querying methods allow the following additional options: 47 | \describe{ 48 | \item{site}{ID of Stack Exchange site (by default, Stack Overflow)} 49 | \item{sort}{field to sort by} 50 | \item{order}{whether the \code{sort} field should be arranged in descending 51 | ("desc") or ascending ("asc") order} 52 | \item{min}{Minimum value of the \code{sort} field} 53 | \item{max}{Maximum value of the \code{sort} field} 54 | \item{fromdate}{Starting date} 55 | \item{todate}{Ending date} 56 | \item{page}{Which page to start from} 57 | \item{pagesize}{Size of each page to extract (max 100)} 58 | \item{num_pages}{Number of pages to extract} 59 | } 60 | } 61 | 62 | -------------------------------------------------------------------------------- /man/stack_badges.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/stack_badges.R 3 | \name{stack_badges} 4 | \alias{stack_badges} 5 | \title{Query badges from the Stack Exchange API} 6 | \usage{ 7 | stack_badges(id = NULL, special = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{id}{A vector containing one or more IDs of badges, or none to 11 | retrieve all badges in alphabetical order} 12 | 13 | \item{special}{One of \code{c("name", "tags", "recipients")}, to 14 | retrive only non-tag badges, only tag badges, or to retrieve recipients 15 | of the given badges} 16 | 17 | \item{...}{Additional arguments to API (see below)} 18 | } 19 | \value{ 20 | A \code{data.frame} of badges (TODO) 21 | } 22 | \description{ 23 | Query for badges from a particular site 24 | } 25 | \details{ 26 | API querying methods allow the following additional options: 27 | \describe{ 28 | \item{site}{ID of Stack Exchange site (by default, Stack Overflow)} 29 | \item{sort}{field to sort by} 30 | \item{order}{whether the \code{sort} field should be arranged in descending 31 | ("desc") or ascending ("asc") order} 32 | \item{min}{Minimum value of the \code{sort} field} 33 | \item{max}{Maximum value of the \code{sort} field} 34 | \item{fromdate}{Starting date} 35 | \item{todate}{Ending date} 36 | \item{page}{Which page to start from} 37 | \item{pagesize}{Size of each page to extract (max 100)} 38 | \item{num_pages}{Number of pages to extract} 39 | } 40 | } 41 | 42 | -------------------------------------------------------------------------------- /man/stack_comments.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/stack_comments.R 3 | \name{stack_comments} 4 | \alias{stack_comments} 5 | \title{Query comments from the Stack Exchange API} 6 | \usage{ 7 | stack_comments(id = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{id}{A vector containing one or more IDs, or none to query 11 | all comments} 12 | 13 | \item{...}{Additional arguments to API (see below)} 14 | } 15 | \value{ 16 | A \code{data.frame} of questions or answers (TODO) 17 | } 18 | \description{ 19 | Query for comments, either based on IDs or on other filters. 20 | } 21 | \details{ 22 | API querying methods allow the following additional options: 23 | \describe{ 24 | \item{site}{ID of Stack Exchange site (by default, Stack Overflow)} 25 | \item{sort}{field to sort by} 26 | \item{order}{whether the \code{sort} field should be arranged in descending 27 | ("desc") or ascending ("asc") order} 28 | \item{min}{Minimum value of the \code{sort} field} 29 | \item{max}{Maximum value of the \code{sort} field} 30 | \item{fromdate}{Starting date} 31 | \item{todate}{Ending date} 32 | \item{page}{Which page to start from} 33 | \item{pagesize}{Size of each page to extract (max 100)} 34 | \item{num_pages}{Number of pages to extract} 35 | } 36 | } 37 | 38 | -------------------------------------------------------------------------------- /man/stack_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/stack_info.R 3 | \name{stack_info} 4 | \alias{stack_info} 5 | \title{Query general info for a Stack Exchange site} 6 | \usage{ 7 | stack_info(site = "stackoverflow", ...) 8 | } 9 | \arguments{ 10 | \item{site}{Stack Exchange site to query (default Stack Overflow)} 11 | 12 | \item{...}{Additional API arguments (not used)} 13 | } 14 | \value{ 15 | A one-row \code{data.frame} containing statistics 16 | about the site. 17 | } 18 | \description{ 19 | Query for general information from a site, such as the number of users, 20 | answers, and questions, and statistics for activity per minute 21 | } 22 | 23 | -------------------------------------------------------------------------------- /man/stack_parse.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/api.R 3 | \name{stack_parse} 4 | \alias{stack_parse} 5 | \title{Parse the results of a Stack Exchange API query into a data.frame.} 6 | \usage{ 7 | stack_parse(req) 8 | } 9 | \arguments{ 10 | \item{req}{a request from httr::GET} 11 | } 12 | \description{ 13 | The additional metadata, such as "has_more", "quota_max", and 14 | "quota_remaining" is included in a list as the attribute "metadata". 15 | } 16 | 17 | -------------------------------------------------------------------------------- /man/stack_posts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/stack_posts.R 3 | \name{stack_posts} 4 | \alias{stack_posts} 5 | \title{Query posts from the Stack Exchange API} 6 | \usage{ 7 | stack_posts(id = NULL, special = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{id}{A vector containing one or more IDs, or none to query 11 | all questions and answers} 12 | 13 | \item{special}{One of \code{c("comments", "revisions", "suggested-edits")}, to 14 | return that information associated with the specified answers rather than the 15 | answers themselves} 16 | 17 | \item{...}{Additional arguments to API (see below)} 18 | } 19 | \value{ 20 | A \code{data.frame} of questions or answers (TODO) 21 | } 22 | \description{ 23 | Query for posts, either questions or answers, based on IDs. 24 | } 25 | \details{ 26 | API querying methods allow the following additional options: 27 | \describe{ 28 | \item{site}{ID of Stack Exchange site (by default, Stack Overflow)} 29 | \item{sort}{field to sort by} 30 | \item{order}{whether the \code{sort} field should be arranged in descending 31 | ("desc") or ascending ("asc") order} 32 | \item{min}{Minimum value of the \code{sort} field} 33 | \item{max}{Maximum value of the \code{sort} field} 34 | \item{fromdate}{Starting date} 35 | \item{todate}{Ending date} 36 | \item{page}{Which page to start from} 37 | \item{pagesize}{Size of each page to extract (max 100)} 38 | \item{num_pages}{Number of pages to extract} 39 | } 40 | } 41 | 42 | -------------------------------------------------------------------------------- /man/stack_privileges.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/stack_privileges.R 3 | \name{stack_privileges} 4 | \alias{stack_privileges} 5 | \title{Query privileges for a Stack Exchange site} 6 | \usage{ 7 | stack_privileges(site = "stackoverflow", ...) 8 | } 9 | \arguments{ 10 | \item{site}{Stack Exchange site to query (default Stack Overflow)} 11 | 12 | \item{...}{Additional API arguments (not used)} 13 | } 14 | \value{ 15 | A one-row \code{data.frame} containing statistics 16 | about the site. 17 | } 18 | \description{ 19 | Query for reputation-based privileges from a Stack Exchange site. 20 | } 21 | 22 | -------------------------------------------------------------------------------- /man/stack_questions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/stack_questions.R 3 | \name{stack_questions} 4 | \alias{stack_questions} 5 | \title{Query questions from the Stack Exchange API} 6 | \usage{ 7 | stack_questions(id = NULL, special = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{id}{A vector containing one or more answer IDs} 11 | 12 | \item{special}{One of \code{c("answers", "comments", "linked", "related", 13 | "timeline")}, describing what information to retrieve about specific questions, or 14 | one of \code{c("featured", "no-answers", "unanswered")}, describing a filter 15 | to place on returned quesitons.} 16 | 17 | \item{...}{Additional arguments to API (see below)} 18 | } 19 | \value{ 20 | A \code{data.frame} of questions. 21 | 22 | A question object is documented in full here: 23 | 24 | https://api.stackexchange.com/docs/types/question 25 | 26 | It comes with the following columns: 27 | 28 | \itemize{ 29 | \item{accepted_answer_id} 30 | \item{answer_count} 31 | \item{bounty_amount} 32 | \item{bounty_closes_date} 33 | \item{closed_date} 34 | \item{closed_reason} 35 | \item{community_owned_date} 36 | \item{creation_date} 37 | \item{is_answered} 38 | \item{last_activity_date} 39 | \item{last_edit_date} 40 | \item{link} 41 | \item{locked_date} 42 | \item{migrated_from} 43 | \item{migrated_to} 44 | \item{owner} 45 | \item{protected_date} 46 | \item{question_id} 47 | \item{score} 48 | \item{tags} 49 | \item{title} 50 | \item{view_count} 51 | } 52 | } 53 | \description{ 54 | Query for a list of questions, or information related to a one or more 55 | specific questions. 56 | } 57 | \details{ 58 | API querying methods allow the following additional options: 59 | \describe{ 60 | \item{site}{ID of Stack Exchange site (by default, Stack Overflow)} 61 | \item{sort}{field to sort by} 62 | \item{order}{whether the \code{sort} field should be arranged in descending 63 | ("desc") or ascending ("asc") order} 64 | \item{min}{Minimum value of the \code{sort} field} 65 | \item{max}{Maximum value of the \code{sort} field} 66 | \item{fromdate}{Starting date} 67 | \item{todate}{Ending date} 68 | \item{page}{Which page to start from} 69 | \item{pagesize}{Size of each page to extract (max 100)} 70 | \item{num_pages}{Number of pages to extract} 71 | } 72 | } 73 | 74 | -------------------------------------------------------------------------------- /man/stack_revisions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/stack_revisions.R 3 | \name{stack_revisions} 4 | \alias{stack_revisions} 5 | \title{Query revisions from the Stack Exchange API} 6 | \usage{ 7 | stack_revisions(id, ...) 8 | } 9 | \arguments{ 10 | \item{id}{A vector containing one or more IDs of revisions} 11 | 12 | \item{...}{Additional arguments to API (see below)} 13 | } 14 | \value{ 15 | A \code{data.frame} of revisions (TODO) 16 | } 17 | \description{ 18 | Query for revisions (edits), based on particular IDs 19 | } 20 | \details{ 21 | API querying methods allow the following additional options: 22 | \describe{ 23 | \item{site}{ID of Stack Exchange site (by default, Stack Overflow)} 24 | \item{sort}{field to sort by} 25 | \item{order}{whether the \code{sort} field should be arranged in descending 26 | ("desc") or ascending ("asc") order} 27 | \item{min}{Minimum value of the \code{sort} field} 28 | \item{max}{Maximum value of the \code{sort} field} 29 | \item{fromdate}{Starting date} 30 | \item{todate}{Ending date} 31 | \item{page}{Which page to start from} 32 | \item{pagesize}{Size of each page to extract (max 100)} 33 | \item{num_pages}{Number of pages to extract} 34 | } 35 | } 36 | 37 | -------------------------------------------------------------------------------- /man/stack_search.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/stack_search.R 3 | \name{stack_search} 4 | \alias{stack_search} 5 | \title{Search a Stack Exchange site via the API} 6 | \usage{ 7 | stack_search(intitle, tagged, nottagged, q, accepted, answers, body, closed, 8 | migrated, notice, title, user, url, views, wiki, ...) 9 | } 10 | \arguments{ 11 | \item{intitle}{string to use for searching title} 12 | 13 | \item{tagged}{vector of tags that must be included in questions} 14 | 15 | \item{nottagged}{vector of tags to be excluded from questions} 16 | 17 | \item{q}{free form text parameter that matches questions based on Stack 18 | Exchange's (undocumented) algorithm} 19 | 20 | \item{accepted}{boolean to filter for accepted or unaccepted questions} 21 | 22 | \item{answers}{minimum number of answers} 23 | 24 | \item{body}{text that must appear in the body of questions} 25 | 26 | \item{closed}{boolean to filter for open or closed questions} 27 | 28 | \item{migrated}{boolean to filter for questions that were, or were not, 29 | migrated to another site} 30 | 31 | \item{notice}{boolean to filter for questions with a post notice} 32 | 33 | \item{title}{text which must appear in a title; redundant with intitle above} 34 | 35 | \item{user}{the id of the user to filter for} 36 | 37 | \item{url}{url that must be included in the question} 38 | 39 | \item{views}{the minimum number of views a question can have} 40 | 41 | \item{wiki}{boolean to filter for questions that are (or are not) community 42 | wiki} 43 | 44 | \item{...}{Additional arguments to API (see below)} 45 | } 46 | \value{ 47 | A \code{data.frame} of questions. 48 | 49 | A question object is documented in full here: 50 | 51 | https://api.stackexchange.com/docs/types/question 52 | 53 | It comes with the following columns: 54 | 55 | \itemize{ 56 | \item{accepted_answer_id} 57 | \item{answer_count} 58 | \item{bounty_amount} 59 | \item{bounty_closes_date} 60 | \item{closed_date} 61 | \item{closed_reason} 62 | \item{community_owned_date} 63 | \item{creation_date} 64 | \item{is_answered} 65 | \item{last_activity_date} 66 | \item{last_edit_date} 67 | \item{link} 68 | \item{locked_date} 69 | \item{migrated_from} 70 | \item{migrated_to} 71 | \item{owner} 72 | \item{protected_date} 73 | \item{question_id} 74 | \item{score} 75 | \item{tags} 76 | \item{title} 77 | \item{view_count} 78 | } 79 | } 80 | \description{ 81 | Access the Stack Exchange search functionality. 82 | } 83 | \details{ 84 | API querying methods allow the following additional options: 85 | \describe{ 86 | \item{site}{ID of Stack Exchange site (by default, Stack Overflow)} 87 | \item{sort}{field to sort by} 88 | \item{order}{whether the \code{sort} field should be arranged in descending 89 | ("desc") or ascending ("asc") order} 90 | \item{min}{Minimum value of the \code{sort} field} 91 | \item{max}{Maximum value of the \code{sort} field} 92 | \item{fromdate}{Starting date} 93 | \item{todate}{Ending date} 94 | \item{page}{Which page to start from} 95 | \item{pagesize}{Size of each page to extract (max 100)} 96 | \item{num_pages}{Number of pages to extract} 97 | } 98 | } 99 | 100 | -------------------------------------------------------------------------------- /man/stack_sites.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/stack_sites.R 3 | \name{stack_sites} 4 | \alias{stack_sites} 5 | \title{Query sites from the Stack Exchange API} 6 | \usage{ 7 | stack_sites(...) 8 | } 9 | \arguments{ 10 | \item{...}{Extra options are \code{page} and \code{pagesize}} 11 | } 12 | \value{ 13 | A \code{data.frame} of sites 14 | } 15 | \description{ 16 | Query for a list of sites in the network 17 | } 18 | 19 | -------------------------------------------------------------------------------- /man/stack_suggested_edits.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/stack_suggested_edits.R 3 | \name{stack_suggested_edits} 4 | \alias{stack_suggested_edits} 5 | \title{Query suggested edits from the Stack Exchange API} 6 | \usage{ 7 | stack_suggested_edits(id = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{id}{A vector containing one or more IDs, or none to query 11 | all suggested edits} 12 | 13 | \item{...}{Additional arguments to API (see below)} 14 | } 15 | \value{ 16 | A \code{data.frame} of questions or answers (TODO) 17 | } 18 | \description{ 19 | Query for suggested edits, either based on IDs or on other filters. 20 | } 21 | \details{ 22 | API querying methods allow the following additional options: 23 | \describe{ 24 | \item{site}{ID of Stack Exchange site (by default, Stack Overflow)} 25 | \item{sort}{field to sort by} 26 | \item{order}{whether the \code{sort} field should be arranged in descending 27 | ("desc") or ascending ("asc") order} 28 | \item{min}{Minimum value of the \code{sort} field} 29 | \item{max}{Maximum value of the \code{sort} field} 30 | \item{fromdate}{Starting date} 31 | \item{todate}{Ending date} 32 | \item{page}{Which page to start from} 33 | \item{pagesize}{Size of each page to extract (max 100)} 34 | \item{num_pages}{Number of pages to extract} 35 | } 36 | } 37 | 38 | -------------------------------------------------------------------------------- /man/stack_tags.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/stack_tags.R 3 | \name{stack_tags} 4 | \alias{stack_tags} 5 | \title{Query tags from the Stack Exchange API} 6 | \usage{ 7 | stack_tags(name = NULL, special = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{name}{A vector containing one or more names of tags, or none to query 11 | from all tags} 12 | 13 | \item{special}{One of \code{c("faq", "related", "synonyms", "wikis")}, to 14 | extract information about a particular tag, or one of 15 | \code{c("moderator-only", "required", "synonyms")}, to request a list of a 16 | particular kind of tags, or all synonyms on the site.} 17 | 18 | \item{...}{Additional arguments to API (see below)} 19 | } 20 | \value{ 21 | A \code{data.frame} of questions or answers (TODO) 22 | } 23 | \description{ 24 | Query for tags, either based on names or on other filters. 25 | } 26 | \details{ 27 | API querying methods allow the following additional options: 28 | \describe{ 29 | \item{site}{ID of Stack Exchange site (by default, Stack Overflow)} 30 | \item{sort}{field to sort by} 31 | \item{order}{whether the \code{sort} field should be arranged in descending 32 | ("desc") or ascending ("asc") order} 33 | \item{min}{Minimum value of the \code{sort} field} 34 | \item{max}{Maximum value of the \code{sort} field} 35 | \item{fromdate}{Starting date} 36 | \item{todate}{Ending date} 37 | \item{page}{Which page to start from} 38 | \item{pagesize}{Size of each page to extract (max 100)} 39 | \item{num_pages}{Number of pages to extract} 40 | } 41 | } 42 | 43 | -------------------------------------------------------------------------------- /man/stack_users.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/stack_users.R 3 | \name{stack_users} 4 | \alias{stack_users} 5 | \title{Query information about users from the Stack Exchange API} 6 | \usage{ 7 | stack_users(id = NULL, special = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{id}{A vector containing one or more user IDs} 11 | 12 | \item{special}{One of many options to describe what information to 13 | extract from each user} 14 | 15 | \item{...}{Additional arguments to API (see below)} 16 | } 17 | \value{ 18 | A \code{data.frame} containing each returned user 19 | 20 | A \code{data.frame} of users 21 | } 22 | \description{ 23 | Query for a list of users, or information related to one or more 24 | specific users. 25 | } 26 | \details{ 27 | API querying methods allow the following additional options: 28 | \describe{ 29 | \item{site}{ID of Stack Exchange site (by default, Stack Overflow)} 30 | \item{sort}{field to sort by} 31 | \item{order}{whether the \code{sort} field should be arranged in descending 32 | ("desc") or ascending ("asc") order} 33 | \item{min}{Minimum value of the \code{sort} field} 34 | \item{max}{Maximum value of the \code{sort} field} 35 | \item{fromdate}{Starting date} 36 | \item{todate}{Ending date} 37 | \item{page}{Which page to start from} 38 | \item{pagesize}{Size of each page to extract (max 100)} 39 | \item{num_pages}{Number of pages to extract} 40 | } 41 | } 42 | 43 | -------------------------------------------------------------------------------- /stackr.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /vignettes/example.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Example analysis of a Stack Overflow user" 3 | author: "David Robinson" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Example analysis of a Stack Overflow user} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | \usepackage[utf8]{inputenc} 10 | --- 11 | 12 | ```{r, echo = FALSE} 13 | library(knitr) 14 | opts_chunk$set(message = FALSE, warning = FALSE) 15 | ``` 16 | 17 | Here I'll show an example of using the `stackr` package to analyze an individual user. While Stack Overflow provides many summaries and analyses of each user already, the `stackr` package lets us bring the data seamlessly into R. The package provides the tools to perform similar analyses of a given tag, of recently asked questions, or to answer other similar questions. 18 | 19 | Let's start by picking a Stack Overflow user at random. Eeny, meeny, miny... [me](http://stackoverflow.com/users/712603/david-robinson). (OK, that might not have been random). We can start by getting the information on the profile page like this (712603 is my ID, which can be seen in the URL of the aforementioned link): 20 | 21 | ```{r} 22 | library(stackr) 23 | u <- stack_users(712603) 24 | u 25 | ``` 26 | 27 | But that's not too exciting, since it just shows the profile information. Instead, let's extract all of my answers. (Note that this requires making use of pagination since there are more than 100 answers). We'll also turn the result into a `tbl_df` so that it prints more reasonably: 28 | 29 | ```{r} 30 | library(dplyr) 31 | answers <- stack_users(712603, "answers", num_pages = 10, pagesize = 100) 32 | answers <- tbl_df(answers) 33 | answers 34 | ``` 35 | 36 | This lets me find out a lot about myself: for starters, that I've answered `r nrow(answers)` questions. What percentage of my answers were accepted by the asker? 37 | 38 | ```{r} 39 | mean(answers$is_accepted) 40 | ``` 41 | 42 | And what is the distribution of scores my answers have received? 43 | 44 | ```{r} 45 | library(ggplot2) 46 | ggplot(answers, aes(score)) + geom_histogram(binwidth = 1) 47 | ``` 48 | 49 | How has my answering activity changed over time? To find this out, I can count the number of answers per month and graph it: 50 | 51 | ```{r} 52 | library(lubridate) 53 | 54 | answers %>% mutate(month = round_date(creation_date, "month")) %>% 55 | count(month) %>% 56 | ggplot(aes(month, n)) + geom_line() 57 | ``` 58 | 59 | Well, it looks like it's been decreasing. How about how my answering activity changes over the course of a day? 60 | 61 | ```{r} 62 | answers %>% mutate(hour = hour(creation_date)) %>% 63 | count(hour) %>% 64 | ggplot(aes(hour, n)) + geom_line() 65 | ``` 66 | 67 | (Note that the times are in my own time zone, EST). Unsurprisingly, I answer more during the day than at night, but I've still done some answering even around 4-6 AM. You can also spot two conspicuous dips: one at 12 when I eat lunch, and one at 6 when I take the train home from work. 68 | 69 | (If that's not enough invasion of my privacy, you could look at my commenting activity with `stack_users(712603, "comments", ...`, but it generally shows the same trends). 70 | 71 | ### Top tags 72 | 73 | The API also makes it easy to extract the tags I've most answered, which is another handy way to extract and visualize information about my answering activity: 74 | 75 | ```{r} 76 | top_tags <- stack_users(712603, "top-answer-tags", pagesize = 100) 77 | head(top_tags) 78 | 79 | top_tags %>% mutate(tag_name = reorder(tag_name, -answer_score)) %>% 80 | head(20) %>% 81 | ggplot(aes(tag_name, answer_score)) + geom_bar(stat = "identity") + 82 | theme(axis.text.x = element_text(angle = 90, hjust = 1)) 83 | ``` 84 | 85 | We could also view it using the [wordcloud](http://cran.r-project.org/web/packages/wordcloud/wordcloud.pdf) package: 86 | 87 | ```{r} 88 | library(wordcloud) 89 | wordcloud(top_tags$tag_name, top_tags$answer_count) 90 | ``` 91 | 92 | This is just scratching the surface of the information that the API can download, analyze, and visualize. 93 | -------------------------------------------------------------------------------- /vignettes/introduction.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Introduction to stackr" 3 | author: "David Robinson" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Introduction to stackr} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | \usepackage[utf8]{inputenc} 10 | --- 11 | 12 | ```{r, echo = FALSE} 13 | library(stackr) 14 | ``` 15 | 16 | This R package serves as an unofficial client for the read-only features of the [Stack Exchange API](https://api.stackexchange.com/). It is *not* affiliated with Stack Exchange. 17 | 18 | The [documentation](https://api.stackexchange.com/docs/) of the Stack Exchange API is worth reviewing, as the package is built to resemble that interface while remaining true to R's style and syntax. 19 | 20 | ## Basics 21 | 22 | Methods for querying objects are implemented in functions of the form `stack_[object]`. Each of these functions returns a data frame, with one row per object. 23 | 24 | For example, one could query recent questions with: 25 | 26 | ```{r} 27 | q <- stack_questions() 28 | head(q, 3) 29 | ``` 30 | 31 | And recent answers with: 32 | 33 | ```{r} 34 | a <- stack_answers() 35 | head(a, 3) 36 | ``` 37 | 38 | Almost all of these functions can take as their first argument one or more IDs. For example, one could query a specific question: 39 | 40 | ```{r} 41 | stack_questions(11227809) 42 | ``` 43 | 44 | Or one could query multiple answers using a vector: 45 | 46 | ```{r} 47 | stack_answers(c(179147, 2219560, 180085)) 48 | ``` 49 | 50 | Other results you can query include users: 51 | 52 | ```{r} 53 | stack_users(712603) 54 | ``` 55 | 56 | Or tags, which are queried by name instead of id: 57 | 58 | ```{r} 59 | stack_tags(c("r", "ggplot2", "dplyr")) 60 | ``` 61 | 62 | ## Returned values 63 | 64 | Each of these functions returns a `data.frame`. The columns that are included depend on the object being returned, with documentation available on the Stack Exchange API site: 65 | 66 | * [answer](https://api.stackexchange.com/docs/types/answer) 67 | * [badge](https://api.stackexchange.com/docs/types/badge) 68 | * [comment](https://api.stackexchange.com/docs/types/comment) 69 | * [info](https://api.stackexchange.com/docs/types/info) 70 | * [post](https://api.stackexchange.com/docs/types/post) 71 | * [privilege](https://api.stackexchange.com/docs/types/privilege) 72 | * [question](https://api.stackexchange.com/docs/types/question) 73 | * [revision](https://api.stackexchange.com/docs/types/revision) 74 | * [suggested-edit](https://api.stackexchange.com/docs/types/suggested-edit) 75 | * [tags](https://api.stackexchange.com/docs/types/tags) 76 | * [user](https://api.stackexchange.com/docs/types/user) 77 | 78 | ## Special queries 79 | 80 | A function like `stack_questions` does not *necessarily* return questions. By providing a second argument (`special`) to the query, one can extract objects that are related to the query. For example, one could extract all the answers to a particular question with: 81 | 82 | ```{r} 83 | answers <- stack_questions(11227809, "answers") 84 | ``` 85 | 86 | Similarly, one could extract the comments, linked questions, or related questions with: 87 | 88 | ```{r} 89 | comments <- stack_questions(11227809, "comments") 90 | linked <- stack_questions(11227809, "linked") 91 | related <- stack_questions(11227809, "related") 92 | ``` 93 | 94 | There are many other combinations: one could extract a user's comments: 95 | 96 | ```{r} 97 | my_comments <- stack_users(712603, "comments") 98 | ``` 99 | 100 | ## Pagination 101 | 102 | You can set the `pagesize` argument to any method to determine the number of objects to be returned. However, the maximum value of this is 100, which means multiple requests must be made to download a list larger than 100. 103 | 104 | `stackr` handles this pagination with the `num_pages` argument, which all methods accept. This gives a maximum number of pages (and therefore requests) that will be iterated through, combining them together at the end. 105 | 106 | ## API Key 107 | 108 | It's a good idea to set up a registered API key with Stack Exchange, since it increases your daily quota of queries from 300 to 10,000. You can [register an app here](http://stackapps.com/apps/oauth/register). Once you have your Stack Exchange application key, set up an environment variable, by adding the following line to your `.Rprofile`: 109 | 110 | ```{r eval = FALSE} 111 | Sys.setenv(STACK_EXCHANGE_KEY = "YOUR_KEY_HERE") 112 | ``` 113 | 114 | After that, queries made from your system will use your key. 115 | 116 | Future plans 117 | ------------------- 118 | 119 | Currently, no methods requiring authentication are implemented. OAuth 2.0 could be implemented through the same httr framework ([see here](http://cran.r-project.org/web/packages/httr/vignettes/api-packages.html)), but my current judgment is that R is likely to be used for data analysis operations rather than actual front-ends for Stack Exchange, which negates the need for most auth-based operations. 120 | 121 | So far, no network methods (such as "/sites", or "/apps") have yet been implemented; only per-site methods. 122 | 123 | Bug reports are very welcome [here](http://github.com/dgrtwo/stackr/issues). 124 | --------------------------------------------------------------------------------