├── LICENSE ├── .gitignore ├── man ├── figures │ └── logo.png ├── P.Rd ├── PP.Rd ├── rtweet.download-package.Rd ├── set_token.Rd ├── search_tweets_download.Rd ├── lookup_users_download.Rd ├── get_followers_download.Rd └── get_friends_download.Rd ├── tests ├── testthat.R └── testthat │ └── test-friends.R ├── .Rbuildignore ├── R ├── envir.R ├── utils-P.R ├── count.R ├── sleep.R ├── rtweet.recipes-package.R ├── lookup_tweets.R ├── assert.R ├── rate_limit.R ├── set-token.R ├── search_tweets.R ├── wrappers.R ├── token.R ├── utils.R ├── lookup_users.R ├── lookup_users_og.R ├── followers.R └── friends.R ├── rtweet.download.Rproj ├── NAMESPACE ├── DESCRIPTION ├── LICENSE.md ├── README.md └── README.Rmd /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2019 2 | COPYRIGHT HOLDER: Michael W. Kearney 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | list-members.R 5 | TODO.R 6 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkearney/rtweet.download/HEAD/man/figures/logo.png -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(rtweet.recipes) 3 | 4 | test_check("rtweet.recipes") 5 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^rtweet\.recipes\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^README\.Rmd$ 5 | ^rtweet\.download\.Rproj$ 6 | -------------------------------------------------------------------------------- /tests/testthat/test-friends.R: -------------------------------------------------------------------------------- 1 | test_that("get_friends_download works", { 2 | expect_error(get_friends_download()) 3 | expect_error(get_friends_download(data.frame(x = rnorm(5), y = rnorm(5), z = rnorm(5)))) 4 | }) 5 | -------------------------------------------------------------------------------- /R/envir.R: -------------------------------------------------------------------------------- 1 | exists.rr <- function(x) exists(x, envir = .rr, inherits = FALSE) 2 | 3 | get.rr <- function(x) get(x, envir = .rr, inherits = FALSE) 4 | 5 | assign.rr <- function(...) { 6 | mmap(assign, names(pretty_dots(...)), list(...), MoreArgs = list(envir = .rr, inherits = FALSE)) 7 | invisible() 8 | } 9 | -------------------------------------------------------------------------------- /man/P.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-P.R 3 | \name{\%P\%} 4 | \alias{\%P\%} 5 | \title{Paste operator} 6 | \usage{ 7 | lhs \%P\% rhs 8 | } 9 | \description{ 10 | See \code{tfse::\link[tfse]{\%P\%}} for details. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/PP.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-P.R 3 | \name{\%PP\%} 4 | \alias{\%PP\%} 5 | \title{Paste with Parameters operator} 6 | \usage{ 7 | lhs \%PP\% rhs 8 | } 9 | \description{ 10 | See \code{tfse::\link[tfse]{\%PP\%}} for details. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /rtweet.download.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: XeLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /R/utils-P.R: -------------------------------------------------------------------------------- 1 | #' Paste operator 2 | #' 3 | #' See \code{tfse::\link[tfse]{\%P\%}} for details. 4 | #' 5 | #' @name %P% 6 | #' @rdname P 7 | #' @keywords internal 8 | #' @export 9 | #' @importFrom tfse %P% 10 | #' @usage lhs \%P\% rhs 11 | NULL 12 | 13 | #' Paste with Parameters operator 14 | #' 15 | #' See \code{tfse::\link[tfse]{\%PP\%}} for details. 16 | #' 17 | #' @name %PP% 18 | #' @rdname PP 19 | #' @keywords internal 20 | #' @export 21 | #' @importFrom tfse %PP% 22 | #' @usage lhs \%PP\% rhs 23 | NULL 24 | -------------------------------------------------------------------------------- /R/count.R: -------------------------------------------------------------------------------- 1 | determine_count <- function(token, query, first = FALSE) { 2 | if (is_bearer(token[["token"]])) { 3 | n <- switch(query, 4 | 'search_tweets' = 45000, 5 | 'lookup_users' = 30000, 6 | 'get_friends' = 15 7 | ) 8 | } else { 9 | n <- switch(query, 10 | 'search_tweets' = 18000, 11 | 'lookup_users' = 90000, 12 | 'get_friends' = 15 13 | ) 14 | } 15 | if (!first) { 16 | return(n) 17 | } 18 | rl <- rate_limit2(token = token[["token"]], query = query) 19 | if (rl[["remaining"]] == 0) { 20 | return(n) 21 | } 22 | rl[["remaining"]] * (n / rl[["limit"]]) 23 | } 24 | -------------------------------------------------------------------------------- /R/sleep.R: -------------------------------------------------------------------------------- 1 | nap_wait <- function(s) { 2 | pb <- progress::progress_bar$new( 3 | format = crayon::blue("Waiting on rate limit [:bar] :eta"), 4 | total = 500, clear = TRUE, width = 60) 5 | pb$tick(0) 6 | for (i in seq_len(500)) { 7 | Sys.sleep(s / 500) 8 | pb$tick() 9 | } 10 | invisible(TRUE) 11 | } 12 | 13 | nap_retry <- function(s) { 14 | pb <- progress::progress_bar$new( 15 | format = crayon::blue("Waiting to retry [:bar] :eta"), 16 | total = 500, clear = TRUE, width = 60) 17 | pb$tick(0) 18 | for (i in seq_len(500)) { 19 | Sys.sleep(s / 500) 20 | pb$tick() 21 | } 22 | invisible(TRUE) 23 | } 24 | -------------------------------------------------------------------------------- /R/rtweet.recipes-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | #' @import rtweet 3 | "_PACKAGE" 4 | 5 | # The following block is used by usethis to automatically manage 6 | # roxygen namespace tags. Modify with care! 7 | ## usethis namespace: start 8 | ## usethis namespace: end 9 | NULL 10 | 11 | .rr <- new.env(parent = emptyenv()) 12 | 13 | .onLoad <- function(libname, pkgname) { 14 | # op <- options() 15 | # op.rtweet.recipes <- list( 16 | # rtweet.recipes.token_path = "~/R-dev" 17 | # ) 18 | # toset <- !(names(op.rtweet.recipes) %in% names(op)) 19 | # if (any(toset)) { 20 | # options(op.rtweet.recipes[toset]) 21 | # } 22 | invisible() 23 | } 24 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(get_followers_download,character) 4 | S3method(get_followers_download,data.frame) 5 | S3method(get_friends_download,character) 6 | S3method(get_friends_download,data.frame) 7 | S3method(lookup_users_download,character) 8 | S3method(lookup_users_download,data.frame) 9 | S3method(set_token,Token) 10 | S3method(set_token,bearer) 11 | S3method(set_token,character) 12 | export("%P%") 13 | export("%PP%") 14 | export(get_followers_download) 15 | export(get_friends_download) 16 | export(lookup_users_download) 17 | export(search_tweets_download) 18 | export(set_token) 19 | import(rtweet) 20 | importFrom(tfse,"%P%") 21 | importFrom(tfse,"%PP%") 22 | -------------------------------------------------------------------------------- /R/lookup_tweets.R: -------------------------------------------------------------------------------- 1 | lookup_tweets_download <- function(x, ..., tokens = NULL) { 2 | token <- prep_tokens(token) 3 | outpt <- vector("list") 4 | first <- TRUE 5 | while (length(x) > 0) { 6 | token <- determine_token(token, "lookup_users") 7 | count <- determine_count(token, "lookup_users", first = first) 8 | outpt[[length(outpt) + 1L]] <- lookup_users_warning_nap( 9 | x[seq_len(count)], ..., token = token$token) 10 | first <- FALSE 11 | } 12 | outpt 13 | } 14 | 15 | 16 | prep_twtout <- function(.twt = NULL, n, token) { 17 | num <- 18000 18 | if ("bearer" %in% names(token)) { 19 | num <- c(num, 45000) 20 | } 21 | len <- ceiling(n / sum(num)) + 2 22 | if (!is_null(.stw)) { 23 | if (length(.stw) < len) { 24 | .stw <- c(.stw, vector("list", len - length(.stw))) 25 | } 26 | return(.stw) 27 | } 28 | vector("list", len) 29 | } 30 | -------------------------------------------------------------------------------- /man/rtweet.download-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rtweet.recipes-package.R 3 | \docType{package} 4 | \name{rtweet.download-package} 5 | \alias{rtweet.download} 6 | \alias{rtweet.download-package} 7 | \title{rtweet.download: Automate Large Downloads with 'rtweet'} 8 | \description{ 9 | \if{html}{\figure{logo.png}{options: align='right' alt='logo' width='120'}} 10 | 11 | Robust tools for automating large and/or time-consuming tasks 12 | involving the collection of Twitter data via 'rtweet' . 13 | } 14 | \seealso{ 15 | Useful links: 16 | \itemize{ 17 | \item \url{https://github.com/mkearney/rtweet.download} 18 | \item Report bugs at \url{https://github.com/mkearney/rtweet.download/issues} 19 | } 20 | 21 | } 22 | \author{ 23 | \strong{Maintainer}: Michael W. Kearney \email{kearneymw@missouri.edu} (\href{https://orcid.org/0000-0002-0730-4694}{ORCID}) 24 | 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: rtweet.download 2 | Title: Automate Large Downloads with 'rtweet' 3 | Version: 0.0.1 4 | Authors@R: c( 5 | person("Michael W.", "Kearney", , 6 | email = "kearneymw@missouri.edu", role = c("aut", "cre"), 7 | comment = c(ORCID = "0000-0002-0730-4694")) 8 | ## add contributor template (middle name/initial optional) 9 | #person("First Middle", "Last", , 10 | #email = "email@address.com", role = c("ctb")) 11 | ) 12 | Description: Robust tools for automating large and/or time-consuming tasks 13 | involving the collection of Twitter data via 'rtweet' . 14 | Depends: 15 | R (>= 3.1.0) 16 | License: MIT + file LICENSE 17 | Encoding: UTF-8 18 | LazyData: true 19 | Roxygen: list(markdown = TRUE) 20 | RoxygenNote: 7.0.2 21 | Imports: 22 | rtweet, 23 | dapr, 24 | crayon, 25 | progress, 26 | tidyselect, 27 | clisymbols, 28 | tfse, 29 | tibble 30 | URL: https://github.com/mkearney/rtweet.download 31 | BugReports: https://github.com/mkearney/rtweet.download/issues 32 | Suggests: 33 | testthat (>= 2.1.0) 34 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2019 Michael W. Kearney 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /man/set_token.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/set-token.R 3 | \name{set_token} 4 | \alias{set_token} 5 | \title{Set token} 6 | \usage{ 7 | set_token(x) 8 | } 9 | \arguments{ 10 | \item{x}{Either a token or path to a token. If path, the token is read using 11 | \code{readRDS} (this is default {rtweet} behavior). If token, it is saved 12 | in the current working directory as ".rtweet_token"} 13 | } 14 | \value{ 15 | The token is invisibly returned but more importantly the environment 16 | variable "TWITTER_PAT" is set to point toward the saved token file. This 17 | will be reset at the end of the session. 18 | } 19 | \description{ 20 | Stores Twitter API token information for the duration of the session 21 | } 22 | \examples{ 23 | 24 | ## if your system already has an environment variable for an rtweet token, 25 | ## this will return the path 26 | (pat <- Sys.getenv("TWITTER_PAT")) 27 | 28 | ## if your system doesn't have this environment variable OR if you wish to 29 | ## override this value, then enter the desired path or token object 30 | #pat <- "/path/to/rtweet-token.rds" 31 | 32 | ## and then set the token for use for the remainder of the session 33 | #set_token(pat) 34 | 35 | } 36 | -------------------------------------------------------------------------------- /man/search_tweets_download.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/search_tweets.R 3 | \name{search_tweets_download} 4 | \alias{search_tweets_download} 5 | \title{Search tweets download} 6 | \usage{ 7 | search_tweets_download(q, n, ..., .stw = NULL) 8 | } 9 | \arguments{ 10 | \item{q}{Search query to be used to find matching tweets from the past 6-9 11 | days. See \code{\link[rtweet]{search_tweets}}) for more information on 12 | Twitter search query syntax.} 13 | 14 | \item{n}{Number of desired tweets to return. See details for more information 15 | about relevant rate limits.} 16 | 17 | \item{...}{Other parameters are passed to 18 | \code{\link[rtweet]{search_tweets}}).} 19 | 20 | \item{.stw}{Optionally supply a preexisting output vector (like that returned 21 | by this function)–if NULL, the default, this function will start fresh.} 22 | } 23 | \value{ 24 | Returns a list data frames of search data 25 | } 26 | \description{ 27 | Automate the data collection for large Twitter searches (via 28 | \code{\link[rtweet]{search_tweets}}) 29 | } 30 | \details{ 31 | This function attempts to search and collect data for up to 18,000 32 | (when using the default rtweet authorization token) or 63,000 (when using 33 | token generated from your own Twitter app with sufficient bearer token- 34 | level permissions) statuses every 15 minutes, sleeping between calls unti 35 | Twitter's API rate limit resets. 36 | } 37 | \concept{.stw} 38 | -------------------------------------------------------------------------------- /man/lookup_users_download.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lookup_users.R 3 | \name{lookup_users_download} 4 | \alias{lookup_users_download} 5 | \title{Lookup users download} 6 | \usage{ 7 | lookup_users_download(x, output = NULL, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{Either a data frame or character vector containing user identifiers 11 | for which users will be retrieved. See details for more information 12 | about how this works.} 13 | 14 | \item{output}{Optionally supply a preexisting output vector (like that returned 15 | by this function)–if NULL, the default, this function will start fresh.} 16 | 17 | \item{verbose}{Whether the function should print information/status updates, 18 | defaults to TRUE. Setting this to FALSE will silent most printing.} 19 | } 20 | \value{ 21 | Returns a list data frames with looked up user information. See 22 | \code{\link[rtweet]{lookup_users}} for more information. 23 | } 24 | \description{ 25 | Automate users data collection for a large number of users (via 26 | \code{\link[rtweet]{lookup_users}}) 27 | } 28 | \details{ 29 | This function attempts to lookup users information for as many as 30 | 90,000 users every 15 minutes, sleeping between calls until Twitter's API 31 | rate limit resets. 32 | 33 | It's worth noting that information on many users will not be returned due 34 | to changed screen names, account suspensions, deactivations, etc. 35 | } 36 | \concept{users} 37 | -------------------------------------------------------------------------------- /R/assert.R: -------------------------------------------------------------------------------- 1 | 2 | assert_that <- function(..., env = parent.frame(), msg = NULL) { 3 | res <- see_if(..., env = env, msg = msg) 4 | if (res) return(TRUE) 5 | 6 | stop(assert_error(attr(res, "msg"))) 7 | } 8 | 9 | assert_error <- function (message, call = NULL) { 10 | class <- c("assert_error", "simpleError", "error", "condition") 11 | structure(list(message = message, call = call), class = class) 12 | } 13 | 14 | see_if <- function(..., env = parent.frame(), msg = NULL) { 15 | asserts <- eval(substitute(alist(...))) 16 | 17 | for (assertion in asserts) { 18 | res <- tryCatch({ 19 | eval(assertion, env) 20 | }, assert_error = function(e) { 21 | structure(FALSE, msg = e$message) 22 | }) 23 | 24 | if (!res) { 25 | if (is.null(msg)) 26 | msg <- get_message(res, assertion, env) 27 | return(structure(FALSE, msg = msg)) 28 | } 29 | } 30 | 31 | res 32 | } 33 | 34 | 35 | has_msg <- function(x) !is.null(attr(x, "msg", exact = TRUE)) 36 | 37 | get_message <- function(res, call, env = parent.frame()) { 38 | if (has_msg(res)) { 39 | return(attr(res, "msg")) 40 | } 41 | f <- eval(call[[1]], env) 42 | if (!is.primitive(f)) call <- match.call(f, call) 43 | fname <- deparse(call[[1]]) 44 | fail <- on_fail(f) %||% base_fs[[fname]] %||% fail_default 45 | fail(call, env) 46 | } 47 | 48 | fail_default <- function(call, env) { 49 | call_string <- deparse(call, width.cutoff = 60L) 50 | if (length(call_string) > 1L) { 51 | call_string <- paste0(call_string[1L], "...") 52 | } 53 | paste0(call_string, " is not TRUE") 54 | } 55 | 56 | on_fail <- function(x) attr(x, "fail") 57 | 58 | base_fs <- new.env(parent = emptyenv()) 59 | -------------------------------------------------------------------------------- /R/rate_limit.R: -------------------------------------------------------------------------------- 1 | 2 | determine_token <- function(token, query) { 3 | ## if next_token already exists 4 | if ("next_token" %in% names(token)) { 5 | if (token[["next_token"]] == "user" && "bearer" %in% names(token)) { 6 | token[["next_token"]] <- "bearer" 7 | token[["token"]] <- "user" 8 | return(token) 9 | } 10 | if (token[["next_token"]] == "bearer") { 11 | token[["token"]] <- "bearer" 12 | token[["next_token"]] <- "user" 13 | return(token) 14 | } 15 | ## if only user token, then don't need to change anything 16 | return(token) 17 | } 18 | 19 | 20 | ## if not bearable, then just use the one 21 | if (!"bearer" %in% names(token)) { 22 | token[["token"]] <- token[["user"]] 23 | token[["next_token"]] <- token[["user"]] 24 | return(token) 25 | } 26 | ## determine next token 27 | rlu <- rate_limit2(token = token[["user"]], query = query) 28 | rlb <- rate_limit2(token = token[["bearer"]], query = query) 29 | if (isTRUE(rlb[["remaining"]] > rlu[["remaining"]])) { 30 | token[["token"]] <- token[["bearer"]] 31 | token[["next_token"]] <- token[["user"]] 32 | } else if (isTRUE(rlu[["remaining"]] > rlb[["remaining"]])) { 33 | token[["token"]] <- token[["user"]] 34 | token[["next_token"]] <- token[["bearer"]] 35 | } else if (isTRUE(rlu[["reset"]] <= rlb[["reset"]])) { 36 | token[["token"]] <- token[["user"]] 37 | token[["next_token"]] <- token[["bearer"]] 38 | } else { 39 | token[["token"]] <- token[["bearer"]] 40 | token[["next_token"]] <- token[["user"]] 41 | } 42 | token 43 | } 44 | 45 | prep_tokens <- function(token = NULL) { 46 | token <- token %||% rtweet::get_token() 47 | if (!is_bearable(token)) { 48 | return(list(user = token)) 49 | } 50 | list(user = token, bearer = rtweet::bearer_token(token)) 51 | } 52 | 53 | -------------------------------------------------------------------------------- /man/get_followers_download.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/followers.R 3 | \name{get_followers_download} 4 | \alias{get_followers_download} 5 | \title{Get followers download} 6 | \usage{ 7 | get_followers_download(x, ..., output = NULL, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{Either a data frame or character vector containing user identifiers 11 | for which follower IDs will be retrieved. See details for more information 12 | about how this works.} 13 | 14 | \item{...}{If \code{x} is a data frame this can be used to select columns 15 | containing the appropriate user identifying information (user_id and/or 16 | screen_name). This uses the tidyselect specification. If \code{x} is a 17 | character vector, then the first unnamed or non-argument named value is 18 | assumed to be labels (screen names) corresponding with \code{x}.} 19 | 20 | \item{output}{Optionally supply a preexisting output vector (like that returned 21 | by this function)–if NULL, the default, this function will start fresh.} 22 | 23 | \item{verbose}{Whether the function should print information/status updates, 24 | defaults to TRUE. Setting this to FALSE will silent most printing.} 25 | } 26 | \value{ 27 | Returns a list data frames with user and follower ID information. See 28 | \code{\link[rtweet]{get_followers}} for more information. 29 | } 30 | \description{ 31 | Automate the collection of follower IDs for a large number of users (via 32 | \code{\link[rtweet]{get_followers}}) 33 | } 34 | \details{ 35 | This function attempts to retrieve up to 75,000 follower IDs every 36 | 15 minutes, sleeping between calls until Twitter's API rate 37 | limit resets. If your API token is linked to your own Twitter APP and has 38 | appropriate permissions to create a 'bearer token', then this function will 39 | collect closer to 150,000 followers per 15 mintues. 40 | } 41 | \concept{followers} 42 | -------------------------------------------------------------------------------- /R/set-token.R: -------------------------------------------------------------------------------- 1 | 2 | #' Set token 3 | #' 4 | #' Stores Twitter API token information for the duration of the session 5 | #' 6 | #' @param x Either a token or path to a token. If path, the token is read using 7 | #' \code{readRDS} (this is default {rtweet} behavior). If token, it is saved 8 | #' in the current working directory as ".rtweet_token" 9 | #' @return The token is invisibly returned but more importantly the environment 10 | #' variable "TWITTER_PAT" is set to point toward the saved token file. This 11 | #' will be reset at the end of the session. 12 | #' @examples 13 | #' 14 | #' ## if your system already has an environment variable for an rtweet token, 15 | #' ## this will return the path 16 | #' (pat <- Sys.getenv("TWITTER_PAT")) 17 | #' 18 | #' ## if your system doesn't have this environment variable OR if you wish to 19 | #' ## override this value, then enter the desired path or token object 20 | #' #pat <- "/path/to/rtweet-token.rds" 21 | #' 22 | #' ## and then set the token for use for the remainder of the session 23 | #' #set_token(pat) 24 | #' 25 | #' @export 26 | set_token <- function(x) { 27 | UseMethod("set_token") 28 | } 29 | 30 | #' @export 31 | set_token.character <- function(x) { 32 | if (!file.exists(x)) { 33 | stop("Couldn't find token file", call. = FALSE) 34 | } 35 | stopifnot( 36 | is_usertoken(readRDS(x)) || is_bearer(readRDS(x)) 37 | ) 38 | Sys.setenv(TWITTER_PAT = x) 39 | complete("Environment variable set: 'TWITTER_PAT=" %P% x, "'") 40 | invisible(readRDS(x)) 41 | } 42 | 43 | #' @export 44 | set_token.Token <- function(x) { 45 | saveRDS(x, ".rtweet_token.rds") 46 | Sys.setenv(TWITTER_PAT = ".rtweet_token.rds") 47 | complete("Token saved and environment variable set: 'TWITTER_PAT=.rtweet_token.rds'") 48 | invisible(x) 49 | } 50 | 51 | #' @export 52 | set_token.bearer <- function(x) { 53 | saveRDS(x, ".rtweet_token.rds") 54 | Sys.setenv(TWITTER_PAT = ".rtweet_token.rds") 55 | complete("Token saved and environment variable set: 'TWITTER_PAT=.rtweet_token.rds'") 56 | invisible(x) 57 | } 58 | -------------------------------------------------------------------------------- /R/search_tweets.R: -------------------------------------------------------------------------------- 1 | #' Search tweets download 2 | #' 3 | #' Automate the data collection for large Twitter searches (via 4 | #' \code{\link[rtweet]{search_tweets}}) 5 | #' 6 | #' @param q Search query to be used to find matching tweets from the past 6-9 7 | #' days. See \code{\link[rtweet]{search_tweets}}) for more information on 8 | #' Twitter search query syntax. 9 | #' @param n Number of desired tweets to return. See details for more information 10 | #' about relevant rate limits. 11 | #' @param ... Other parameters are passed to 12 | #' \code{\link[rtweet]{search_tweets}}). 13 | #' @param .stw Optionally supply a preexisting output vector (like that returned 14 | #' by this function)–if NULL, the default, this function will start fresh. 15 | #' @return Returns a list data frames of search data 16 | #' @family .stw 17 | #' @details This function attempts to search and collect data for up to 18,000 18 | #' (when using the default rtweet authorization token) or 63,000 (when using 19 | #' token generated from your own Twitter app with sufficient bearer token- 20 | #' level permissions) statuses every 15 minutes, sleeping between calls unti 21 | #' Twitter's API rate limit resets. 22 | #' @export 23 | search_tweets_download <- function(q, n, ..., .stw = NULL) { 24 | total <- n 25 | token <- prep_tokens(rtweet::get_token()) 26 | .stw <- prep_stwout(.stw, n, token) 27 | first <- TRUE 28 | while (total > 0) { 29 | token <- determine_token(token, "search_tweets") 30 | count <- determine_count(token, "search_tweets", first = first) 31 | .stw[[length(.stw) + 1L]] <- search_tweets_warning_nap(q, 32 | n = count, ..., token = token$token) 33 | total <- total - count 34 | nrows_collected <- sum(dapr::vap_int(.stw, NROW)) 35 | spf <- repc("1", nchar(use_commas_int(n))) 36 | complete("Collected data for ", cint(nrows_collected, spf), 37 | " tweets (", cdbl((n - total) / n * 100, "11.1"), "%)") 38 | first <- FALSE 39 | } 40 | .stw 41 | } 42 | 43 | prep_stwout <- function(.stw = NULL, n, token) { 44 | num <- 18000 45 | if ("bearer" %in% names(token)) { 46 | num <- c(num, 45000) 47 | } 48 | len <- ceiling(n / sum(num)) + 2 49 | if (!is_null(.stw)) { 50 | if (length(.stw) < len) { 51 | .stw <- c(.stw, vector("list", len - length(.stw))) 52 | } 53 | return(.stw) 54 | } 55 | vector("list", len) 56 | } 57 | -------------------------------------------------------------------------------- /man/get_friends_download.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/friends.R 3 | \name{get_friends_download} 4 | \alias{get_friends_download} 5 | \title{Get friends download} 6 | \usage{ 7 | get_friends_download(x, ..., output = NULL, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{Either a data frame or character vector containing user identifiers 11 | for which friends IDs will be retrieved. See details for more information 12 | about how this works.} 13 | 14 | \item{...}{If \code{x} is a data frame this can be used to select columns 15 | containing the appropriate user identifying information (user_id and/or 16 | screen_name). This uses the tidyselect specification. If \code{x} is a 17 | character vector, then the first unnamed or non-argument named value is 18 | assumed to be labels (screen names) corresponding with \code{x}.} 19 | 20 | \item{output}{Optionally supply a preexisting output vector (like that returned 21 | by this function)–if NULL, the default, this function will start fresh.} 22 | 23 | \item{verbose}{Whether the function should print information/status updates, 24 | defaults to TRUE. Setting this to FALSE will silent most printing.} 25 | } 26 | \value{ 27 | Returns a list data frames with user and friend ID information. See 28 | \code{\link[rtweet]{get_friends}} for more information. 29 | } 30 | \description{ 31 | Automate the collection of friend IDs for a large number of users (via 32 | \code{\link[rtweet]{get_friends}}) 33 | } 34 | \details{ 35 | This function attempts to retrieve friends IDs for as many as 15 to 36 | 30 users every 15 minutes, sleeping between calls until Twitter's API rate 37 | limit resets. If your API token is linked to your own Twitter APP and has 38 | appropriate permissions to create a 'bearer token', then this function will 39 | make 30 calls every 15 minutes. If the token cannot be used via bearer 40 | authorization, then 15 calls are made every 15 minutes. 41 | 42 | It's worth noting that 15 calls does not mean 15 complete friend IDs. 43 | Twitter limits the number of returned friends returned by a single call to 44 | 5,000, which is more friends than 95\% of Twitter users, but at times is 45 | frustrating because some users like Senator Amy Klobuchar follow over 46 | 170,000 accounts–that many accounts can take 45 mintues to collect! It's 47 | interesting, though, that Amy Klobuchar is also unusually prolific in both 48 | following Twitter users and writing/sponsoring legislation. 49 | } 50 | \concept{friends} 51 | -------------------------------------------------------------------------------- /R/wrappers.R: -------------------------------------------------------------------------------- 1 | run_it_back <- function(.f, .times = 3L) { 2 | eval(parse(text = paste0('function(...) { 3 | .i <- 0L 4 | while ( 5 | is.null(x <- tryCatch(', .f, '(...), error = function(e) NULL)) && 6 | .i <= ', .times, ' 7 | ) { 8 | .i <- .i + 1L 9 | Sys.sleep(1.5) 10 | } 11 | x 12 | }'))) 13 | } 14 | 15 | get_timeline2 <- run_it_back("rtweet::get_timeline") 16 | 17 | rate_limit2 <- run_it_back("rtweet::rate_limit") 18 | 19 | lookup_users2 <- run_it_back("rtweet::lookup_users") 20 | 21 | search_tweets2 <- run_it_back("rtweet::search_tweets") 22 | 23 | search_users2 <- run_it_back("rtweet::search_users") 24 | 25 | get_friends2 <- run_it_back("rtweet::get_friends") 26 | 27 | get_followers2 <- run_it_back("rtweet::get_followers") 28 | 29 | get_favorites2 <- run_it_back("rtweet::get_favorites") 30 | 31 | warning_fun <- function(w, nap = 30) { 32 | if (any(grepl("Rate limit|\\b88\\b", w))) { 33 | nap_retry(nap) 34 | } 35 | } 36 | 37 | exhaust_it <- function(.fun, .x, ..., .token) { 38 | args <- list("placeholder", ..., token = .token) 39 | o <- vector("list", length(.x)) 40 | for (i in seq_along(.x)) { 41 | args[[1]] <- .x[[i]] 42 | o[[i]] <- do.call(.fun, args) 43 | } 44 | o 45 | } 46 | # o <- exhaust_it(rtweet::search_tweets, c("#rstats", "#rtweet"), n = 200, 47 | # .token = rtweet::get_token()) 48 | 49 | fun_warning_nap <- function(.fn, .f, iter = 5, nap = 30) { 50 | .f <- c(.f, paste0(.fn, "_w", seq_len(iter - 1))) 51 | body <- paste0(' ', .fn, '_w', seq_len(iter), ' <- function(...) { 52 | tryCatch( 53 | ', .f, '(...), 54 | warning = function(w) { 55 | if (any(grepl("Rate limit|\\b88\\b|too many", w, ignore.case = TRUE))) { 56 | nap_retry(', nap, ') 57 | } 58 | ', .f, '(...) 59 | }, 60 | error = function(e) { 61 | Sys.sleep(1.0) 62 | tryCatch(', .f, '(...), error = function(e) tibble::tibble()) 63 | } 64 | ) 65 | }', collapse = "\n") 66 | 67 | eval(parse(text = paste0( 68 | 'function(...) {\n', 69 | body, '\n', 70 | 71 | ' ', .fn, '_w', iter, '(...)\n}' 72 | ))) 73 | } 74 | 75 | get_friends_warning_nap <- fun_warning_nap("get_friends", "get_friends", 5) 76 | 77 | get_followers_warning_nap <- fun_warning_nap("get_followers", "get_followers", 5) 78 | 79 | search_tweets_warning_nap <- fun_warning_nap("search_tweets", "search_tweets", 5) 80 | 81 | get_timeline_warning_nap <- fun_warning_nap("get_timeline", "get_timeline", 5) 82 | 83 | get_favorites_warning_nap <- fun_warning_nap("get_favorites", "get_favorites", 5) 84 | 85 | lookup_users_warning_nap <- fun_warning_nap("lookup_users", "lookup_users", 5) 86 | -------------------------------------------------------------------------------- /R/token.R: -------------------------------------------------------------------------------- 1 | determine_token <- function(token, query) { 2 | ## if next_token already exists 3 | if ("next_token" %in% names(token)) { 4 | currentoken <- token$token 5 | token$token <- token$next_token 6 | token$next_token <- currentoken 7 | return(token) 8 | } 9 | ## if not bearable, then just use the one 10 | if (!"bearer" %in% names(token)) { 11 | token$token <- "user" 12 | token$next_token <- "user" 13 | return(token) 14 | } 15 | ## determine next token 16 | rlu <- rate_limit2(token = token$user, query = query) 17 | rlb <- rate_limit2(token = token$bearer, query = query) 18 | if (isTRUE(rlb$remaining > rlu$remaining)) { 19 | token$token <- "bearer" 20 | token$next_token <- "user" 21 | } else if (isTRUE(rlu$remaining > rlb$remaining)) { 22 | token$token <- "user" 23 | token$next_token <- "bearer" 24 | } else if (isTRUE(rlu$reset_at <= rlb$reset_at)) { 25 | token$token <- "user" 26 | token$next_token <- "bearer" 27 | } else { 28 | token$token <- "bearer" 29 | token$next_token <- "user" 30 | } 31 | token 32 | } 33 | this_token <- function(token) token[[token[["token"]]]] 34 | 35 | next_token <- function(token) token[[token[["next_token"]]]] 36 | 37 | determine_token_ <- function(token, query) { 38 | ## if next_token already exists 39 | if ("next_token" %in% names(token)) { 40 | if (token[["next_token"]] == "user" && "bearer" %in% names(token)) { 41 | token[["next_token"]] <- "bearer" 42 | token[["token"]] <- "user" 43 | return(token) 44 | } 45 | if (token[["next_token"]] == "bearer") { 46 | token[["next_token"]] <- "bearer" 47 | return(token) 48 | } 49 | return(token) 50 | } 51 | ## if not bearable, then just use the one 52 | if (!"bearer" %in% names(token)) { 53 | token[["token"]] <- token[["user"]] 54 | token[["next_token"]] <- token[["user"]] 55 | return(token) 56 | } 57 | ## determine next token 58 | rlu <- rate_limit2(token = token[["user"]], query = query) 59 | rlb <- rate_limit2(token = token[["bearer"]], query = query) 60 | if (isTRUE(rlb[["remaining"]] > rlu[["remaining"]])) { 61 | token[["token"]] <- token[["bearer"]] 62 | token[["next_token"]] <- token[["user"]] 63 | } else if (isTRUE(rlu[["remaining"]] > rlb[["remaining"]])) { 64 | token[["token"]] <- token[["user"]] 65 | token[["next_token"]] <- token[["bearer"]] 66 | } else if (isTRUE(rlu[["reset"]] <= rlb[["reset"]])) { 67 | token[["token"]] <- token[["user"]] 68 | token[["next_token"]] <- token[["bearer"]] 69 | } else { 70 | token[["token"]] <- token[["bearer"]] 71 | token[["next_token"]] <- token[["user"]] 72 | } 73 | token 74 | } 75 | 76 | has_bearer <- function(token) { 77 | isTRUE("bearer" %in% names(token)) 78 | } 79 | 80 | prep_tokens <- function(token) { 81 | token <- add_bearable_attr(token) 82 | if (!is_bearable(token)) { 83 | return(list(user = token)) 84 | } 85 | list(user = token, bearer = rtweet::bearer_token(token)) 86 | } 87 | 88 | has_bearable_attr <- function(x) isTRUE("is_bearable" %in% names(attributes(x))) 89 | 90 | add_bearable_attr <- function(token) { 91 | if (has_bearable_attr(token)) { 92 | return(token) 93 | } 94 | attr(token, "is_bearable") <- isTRUE(grepl("read-write", rtweet:::api_access_level(token))) 95 | token 96 | } 97 | 98 | 99 | is_bearable <- function(token = NULL) { 100 | isTRUE(attr(token, "is_bearable")) 101 | } 102 | 103 | is_bearer <- function(x) inherits(x, "bearer") 104 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | pgray <- function(x) { 2 | crayon::make_style("#444444")(x) 3 | } 4 | pgold <- function(x) { 5 | crayon::make_style("#884400")(x) 6 | } 7 | pblue <- function(x) { 8 | crayon::make_style("#001199")(x) 9 | } 10 | complete <- function(..., fill = TRUE) { 11 | cat(paste0(crayon::green(clisymbols::symbol$tick), " ", ...), fill = fill) 12 | } 13 | 14 | info <- function(..., fill = TRUE) { 15 | cat(paste0(crayon::magenta(clisymbols::symbol$info), " ", ...), fill = fill) 16 | } 17 | 18 | dotdotdot <- function(..., fill = TRUE) { 19 | cat(paste0(crayon::magenta(clisymbols::symbol$ellipsis), " ", ...), fill = fill) 20 | } 21 | 22 | this <- function(..., fill = TRUE) { 23 | cat(paste0(crayon::blue(clisymbols::symbol$arrow_right), " ", ...), fill = fill) 24 | } 25 | 26 | 27 | cat_line <- function(...) { 28 | cat(paste0(..., "\n"), sep = "") 29 | } 30 | 31 | cint2 <- function(x, sp = NULL) { 32 | if (!is.integer(x)) { 33 | x <- round(x, 0) 34 | } 35 | if (is.null(sp) || sp < max(nchar(x))) { 36 | sp <- max(nchar(x)) 37 | } 38 | x <- sub("\\.\\d+", "", sprintf(paste0("%", sp, "f"), x)) 39 | while (grepl("\\d{4}", x)) { 40 | x <- sub("(?<=\\d)((?=\\d{3}$)|(?=\\d{3},))", ",", x, perl = TRUE) 41 | } 42 | x 43 | } 44 | 45 | use_commas_dbl <- function(x) { 46 | x <- as.character(x) 47 | dec <- sub("^[^\\.]+(?=\\.)", "", x, perl = TRUE) 48 | x <- tfse::regmatches_first(x, "^[^\\.]+") 49 | while (grepl("\\d{4}", x)) { 50 | x <- sub("(?<=\\d)((?=\\d{3}$)|(?=\\d{3},))", ",", x, perl = TRUE) 51 | x <- sub("^[ ]{1}", "", x) 52 | } 53 | paste0(x, dec) 54 | } 55 | use_commas_int <- function(x) { 56 | x <- as.character(x) 57 | while (grepl("\\d{4}", x)) { 58 | x <- sub("(?<=\\d)((?=\\d{3}$)|(?=\\d{3},))", ",", x, perl = TRUE) 59 | x <- sub("^[ ]{1}", "", x) 60 | } 61 | x 62 | } 63 | 64 | format_num <- function(x, f = "1.1") { 65 | f2 <- nchar(sub("^[^\\.]{0,}\\.", "", f)) 66 | f1 <- nchar(sub("(?<=\\.).*", "", f, perl = TRUE)) + f2 67 | f <- paste0("%", f1, ".", f2, "f") 68 | sprintf(f, x) 69 | } 70 | format_int <- function(x, f = "1") { 71 | f <- paste0("%", nchar(f), ".", 0, "f") 72 | sprintf(f, x) 73 | } 74 | cdbl <- function(x, f = "1.1") { 75 | x <- format_num(x, f) 76 | use_commas_dbl(x) 77 | } 78 | cint <- function(x, f = "1") { 79 | x <- format_int(x, f) 80 | use_commas_int(x) 81 | } 82 | repc <- function(x, n, collapse = "") paste(rep(x, n), collapse = collapse) 83 | 84 | capture_dots <- function(...) { 85 | eval(substitute(alist(...)), envir = parent.frame()) 86 | } 87 | 88 | pretty_dots <- function(...) { 89 | dots <- capture_dots(...) 90 | if (length(dots) == 0) { 91 | return(NULL) 92 | } 93 | if (is.null(names(dots))) { 94 | names(dots) <- expr_names(dots) 95 | } 96 | nms <- names(dots) 97 | if ("" %in% nms) { 98 | names(dots)[nms == ""] <- expr_names(dots[nms == ""]) 99 | } 100 | dots 101 | } 102 | 103 | expr_names <- function(args) { 104 | vapply(args, deparse, USE.NAMES = FALSE, FUN.VALUE = character(1)) 105 | } 106 | 107 | mmap <- function(f, ...) { 108 | f <- match.fun(f) 109 | mapply(FUN = f, ..., SIMPLIFY = FALSE, USE.NAMES = FALSE) 110 | } 111 | 112 | 113 | `%||%` <- function(x, y) { 114 | if (is_null(x)) 115 | y 116 | else x 117 | } 118 | 119 | is_null <- function(x) length(x) == 0L 120 | 121 | rd_timestamp <- function() format(Sys.time(), "%b %d %H:%M:%S") 122 | 123 | is_usertoken <- function(x) inherits(x, "Token") 124 | 125 | not_token <- function(x) is.list(x) && !is_bearable(x) && !is_usertoken(x) 126 | 127 | n_row <- function(...) { 128 | NROW(tryCatch(..., error = function(e) NULL)) 129 | } 130 | 131 | dots1 <- function(...) { 132 | tryCatch(list(...)[[1]], error = function(e) NULL) 133 | } 134 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # rtweet.download 5 | 6 | 7 | 8 | [![CRAN 9 | status](https://www.r-pkg.org/badges/version/rtweet.download)](https://CRAN.R-project.org/package=rtweet.download) 10 | [![Lifecycle: 11 | experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) 12 | 13 | 14 | Automating the collection of Twitter data (via 15 | [**{rtweet}**](https://rtweet.info)) 16 | 17 | ## Installation 18 | 19 | You can install the released version of **{rtweet.download}** from 20 | [CRAN](https://CRAN.R-project.org) with: 21 | 22 | ``` r 23 | install.packages("rtweet.download") 24 | ``` 25 | 26 | And the development version from [GitHub](https://github.com/) with: 27 | 28 | ``` r 29 | # install.packages("remotes") 30 | remotes::install_github("mkearney/rtweet.download") 31 | ``` 32 | 33 | ## Friends IDs 34 | 35 | Twitter’s `"friends/ids"` API endpoint is rate limited to 15 requests 36 | (or the friend IDs of 15[\*](#notes) accounts) per 15 minutes. So while 37 | a single call using `rtweet::get_friends()` can retrieve the friend IDs 38 | of up to 15 users, a single call using 39 | `rtweet.download::get_friends_download()` can retrieve the friend IDs of 40 | hundreds or even thousands of users\! 41 | 42 | | | | 43 | | --------------------------------------- | ------------------------ | 44 | | **API Feature** | **Value** | 45 | |   Endpoint | `"friends/ids"` | 46 | |   Rate limit (per 15 min.) | `15` | 47 | |   Friends per request | `5000`[\*](#notes) | 48 | | **R Package** | **Function** | 49 | |   {rtweet} | `get_friends()` | 50 | |   {rtweet.download} | `get_friends_download()` | 51 | 52 | The example below uses `get_friends_download()` to automate the 53 | collection of friend (accounts followed by) IDs of users on [@Teradata’s 54 | list of data science 55 | influencers](https://twitter.com/Teradata/lists/data-science-influencers/members). 56 | 57 | ``` r 58 | ## get members on data science influencers influence 59 | data_sci_influencers <- rtweet::lists_members( 60 | owner_user = "Teradata", slug = "data-science-influencers" 61 | ) 62 | 63 | ## download friend IDs for each user 64 | fds <- get_friends_download(data_sci_influencers$screen_name) 65 | 66 | ## preview data 67 | head(fds) 68 | ``` 69 | 70 | ## Users data 71 | 72 | Twitter’s `"users/lookup"` API endpoint is rate limited to 900 requests 73 | (or 90,000 users) per 15 minutes. So while a single call using 74 | `rtweet::lookup_users()` can retrieve data on up to 90,000 users, a 75 | single call using `rtweet.download::lookup_users_download()` can collect 76 | data on hundreds of thousands or even millions of users\! 77 | 78 | | | | 79 | | --------------------------------------- | ------------------------- | 80 | | **API Feature** | **Value** | 81 | |   Endpoint | `"users/lookup"` | 82 | |   Rate limit (per 15 min.) | `900` | 83 | |   Users per request | `100` | 84 | | **R Package** | **Function** | 85 | |   {rtweet} | `lookup_users()` | 86 | |   {rtweet.download} | `lookup_users_download()` | 87 | 88 | The example below uses `lookup_users_download()` to automate data 89 | collection for the previously collected accounts followed by data 90 | science influencers. 91 | 92 | ``` r 93 | ## download users data 94 | fds_data <- lookup_users_download(fds$user_id) 95 | 96 | ## preview data 97 | head(fds) 98 | ``` 99 | 100 | ## Notes 101 | 102 | \* The `"friends/ids"` endpoint returns the **up to 5,000 friend IDs of 103 | a single user**, so 15 requests can only return all the friend IDs of 15 104 | users if all 15 of those users follow 5,000 or fewer accounts. To 105 | retrieve all the friend IDs for users following more than 5,000 106 | accounts, multiple requests (friends\_count / 5,000) are required. 107 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | library(rtweet.download) 15 | options(width = 90) 16 | ``` 17 | 18 | # rtweet.download 19 | 20 | 21 | [![CRAN status](https://www.r-pkg.org/badges/version/rtweet.download)](https://CRAN.R-project.org/package=rtweet.download) 22 | [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) 23 | 24 | 25 | Automating the collection of Twitter data (via [**{rtweet}**](https://rtweet.info)) 26 | 27 | ## Installation 28 | 29 | You can install the released version of **{rtweet.download}** from [CRAN](https://CRAN.R-project.org) with: 30 | 31 | ``` r 32 | install.packages("rtweet.download") 33 | ``` 34 | 35 | And the development version from [GitHub](https://github.com/) with: 36 | 37 | ``` r 38 | # install.packages("remotes") 39 | remotes::install_github("mkearney/rtweet.download") 40 | ``` 41 | 42 | 43 | ## Friends IDs 44 | 45 | Twitter's `"friends/ids"` API endpoint is rate limited to 15 requests (or the friend IDs of 15[*](#notes) accounts) per 15 minutes. So while a single call using `rtweet::get_friends()` can retrieve the friend IDs of up to 15 users, a single call using `rtweet.download::get_friends_download()` can retrieve the friend IDs of hundreds or even thousands of users! 46 | 47 | | | | 48 | |----------------------------------------------|--------------------------| 49 | | **API Feature** | **Value** | 50 | |   Endpoint | `"friends/ids"` | 51 | |   Rate limit (per 15 min.) | `15` | 52 | |   Friends per request | `5000`[*](#notes) | 53 | | **R Package** | **Function** | 54 | |   {rtweet} | `get_friends()` | 55 | |   {rtweet.download} | `get_friends_download()` | 56 | 57 | The example below uses `get_friends_download()` to automate the collection of friend (accounts followed by) IDs of users on [\@Teradata's list of data science influencers](https://twitter.com/Teradata/lists/data-science-influencers/members). 58 | 59 | ``` r 60 | ## get members on data science influencers influence 61 | data_sci_influencers <- rtweet::lists_members( 62 | owner_user = "Teradata", slug = "data-science-influencers" 63 | ) 64 | 65 | ## download friend IDs for each user 66 | fds <- get_friends_download(data_sci_influencers$screen_name) 67 | 68 | ## preview data 69 | head(fds) 70 | ``` 71 | 72 | ## Users data 73 | 74 | Twitter's `"users/lookup"` API endpoint is rate limited to 900 requests (or 90,000 users) per 15 minutes. So while a single call using `rtweet::lookup_users()` can retrieve data on up to 90,000 users, a single call using `rtweet.download::lookup_users_download()` can collect data on hundreds of thousands or even millions of users! 75 | 76 | | | | 77 | |----------------------------------------------|---------------------------| 78 | | **API Feature** | **Value** | 79 | |   Endpoint | `"users/lookup"` | 80 | |   Rate limit (per 15 min.) | `900` | 81 | |   Users per request | `100` | 82 | | **R Package** | **Function** | 83 | |   {rtweet} | `lookup_users()` | 84 | |   {rtweet.download} | `lookup_users_download()` | 85 | 86 | The example below uses `lookup_users_download()` to automate data collection for the previously collected accounts followed by data science influencers. 87 | 88 | ``` r 89 | ## download users data 90 | fds_data <- lookup_users_download(fds$user_id) 91 | 92 | ## preview data 93 | head(fds) 94 | ``` 95 | 96 | 97 | ## Notes 98 | 99 | \* The `"friends/ids"` endpoint returns the **up to 5,000 friend IDs of a single user**, so 15 requests can only return all the friend IDs of 15 users if all 15 of those users follow 5,000 or fewer accounts. To retrieve all the friend IDs for users following more than 5,000 accounts, multiple requests (friends_count / 5,000) are required. 100 | -------------------------------------------------------------------------------- /R/lookup_users.R: -------------------------------------------------------------------------------- 1 | #' Lookup users download 2 | #' 3 | #' Automate users data collection for a large number of users (via 4 | #' \code{\link[rtweet]{lookup_users}}) 5 | #' 6 | #' @param x Either a data frame or character vector containing user identifiers 7 | #' for which users will be retrieved. See details for more information 8 | #' about how this works. 9 | #' @param output Optionally supply a preexisting output vector (like that returned 10 | #' by this function)–if NULL, the default, this function will start fresh. 11 | #' @param verbose Whether the function should print information/status updates, 12 | #' defaults to TRUE. Setting this to FALSE will silent most printing. 13 | #' @return Returns a list data frames with looked up user information. See 14 | #' \code{\link[rtweet]{lookup_users}} for more information. 15 | #' @family users 16 | #' @details This function attempts to lookup users information for as many as 17 | #' 90,000 users every 15 minutes, sleeping between calls until Twitter's API 18 | #' rate limit resets. 19 | #' 20 | #' It's worth noting that information on many users will not be returned due 21 | #' to changed screen names, account suspensions, deactivations, etc. 22 | #' @export 23 | lookup_users_download <- function(x, output = NULL, verbose = TRUE) { 24 | UseMethod("lookup_users_download") 25 | } 26 | 27 | #' @export 28 | lookup_users_download.data.frame <- function(x, output = NULL, verbose = TRUE) { 29 | if (n_row(x) == 0) { 30 | stop("No users found in this data frame") 31 | } 32 | x <- x[, dapr::vap_lgl(x, is.atomic), drop = FALSE] 33 | if (NCOL(x) == 0) { 34 | stop("No atomic columns found in this data frame") 35 | } 36 | if (ncol(x) == 1L && inherits(x[[1]], c("character", "factor"))) { 37 | x <- as.character(x[[1]]) 38 | return(lookup_users_download(x, output = output, verbose = verbose)) 39 | } 40 | if (grepl("user_id$|screen_name$", names(x)[1])) { 41 | x <- as.character(x[[1]]) 42 | return(lookup_users_download(x, output = output, verbose = verbose)) 43 | } 44 | if ("user_id" %in% names(x)) { 45 | x <- as.character(x[["user_id"]]) 46 | return(lookup_users_download(x, output = output, verbose = verbose)) 47 | } 48 | if ("screen_name" %in% names(x)) { 49 | x <- as.character(x[["screen_name"]]) 50 | return(lookup_users_download(x, output = output, verbose = verbose)) 51 | } 52 | if (any(grepl("user_id$|screen_name$", names(x)))) { 53 | x <- as.character(x[[grep("user_id$|screen_name$", names(x))[1]]]) 54 | return(lookup_users_download(x, output = output, verbose = verbose)) 55 | } 56 | stop("data frame must contain atomic 'user_id' or 'screen_name' column") 57 | } 58 | 59 | #' @export 60 | lookup_users_download.character <- function(x, output = NULL, verbose = TRUE) { 61 | x <- x[!is.na(x) & !duplicated(x)] 62 | 63 | ## prepare and create token(s) object 64 | token <- rtweet::get_token() 65 | 66 | ## if output vector is not supplied 67 | if (is_null(output)) { 68 | output <- vector("list", ceiling(length(x) / 90000) + 1L) 69 | } else { 70 | stopifnot( 71 | is.list(output), 72 | length(output) >= ceiling(length(x) / 90000) 73 | ) 74 | } 75 | tusrs <- length(x) 76 | if (verbose) { 77 | dotdotdot("This should take around ", cdbl(tusrs / 90000 * 15), " mins") 78 | } 79 | n <- usrs_rate_limit_sleep(token) 80 | ctr <- 0L 81 | 82 | tryCatch({ 83 | ## for loop 84 | for (i in seq_along(output)) { 85 | ## skip if data already exists 86 | if (n_row(output[[i]]) > 0) { 87 | ctr <- ctr + n_row(output[[i]]) 88 | x <- x[-seq_len(n_row(output[[i]]))] 89 | next 90 | } 91 | ## check rate limit remaining / change out token if possible 92 | while (n == 0) { 93 | n <- usrs_rate_limit_sleep(token) 94 | } 95 | 96 | if (n > length(x)) { 97 | n <- length(x) 98 | } 99 | 100 | ## lookup users data 101 | output[[i]] <- lookup_users_warning_nap(x[seq_len(n)], token = token) 102 | x <- x[-seq_len(n)] 103 | ctr <- ctr + n 104 | n <- 0L 105 | 106 | if (verbose) { 107 | complete(pgray(rd_timestamp()), 108 | "", pgold(cint(n_row(output[[i]]), "00,000")), 109 | pgray(" users looked up "), 110 | pgray(cli::symbol$ellipsis), pgray(" ("), 111 | pgray(cdbl(i / tusrs * 100, "1.1")), pgray("%)")) 112 | } 113 | if (length(x) == 0) { 114 | break 115 | } 116 | } 117 | output 118 | }, 119 | interrupt = function(i) return(output), 120 | error = function(e) return(output) 121 | ) 122 | } 123 | 124 | 125 | usrs_rate_limit_sleep <- function(token) { 126 | rl <- rate_limit2(query = "lookup_users", token = token) 127 | rlm <- rl[["remaining"]] %||% 0L 128 | if (rlm > 0) { 129 | return(rlm * 100L) 130 | } 131 | s <- as.numeric(rl[["reset"]] %||% 900, "secs") 132 | nap_wait(s + 60) 133 | 90000L 134 | } 135 | -------------------------------------------------------------------------------- /R/lookup_users_og.R: -------------------------------------------------------------------------------- 1 | #' #' Lookup users download 2 | #' #' 3 | #' #' Automate users data collection for a large number of users (via 4 | #' #' \code{\link[rtweet]{lookup_users}}) 5 | #' #' 6 | #' #' @param x A vector of user IDs or screen names for which data will be looked up. 7 | #' #' See details for more information about how this works. 8 | #' #' @param .usr Optionally supply a preexisting output vector (like that returned 9 | #' #' by this function)–if NULL, the default, this function will start fresh. 10 | #' #' @return Returns a list data frames of user data 11 | #' #' @family .usr 12 | #' #' @details This function attempts to lookup data for 90,000 users every 15 13 | #' #' minutes, sleeping between calls until Twitter's API rate limit resets. 14 | #' #' @export 15 | #' lookup_users_download <- function(x, output = NULL, verbose = TRUE) { 16 | #' x <- unique(x[!is.na(x)]) 17 | #' 18 | #' ## if .usr is not supplied 19 | #' if (is_null(.usr)) { 20 | #' .usr <- vector("list", ceiling(length(x) / 90000) + 1L) 21 | #' } else { 22 | #' ## if .usr is supplied, ignore any users w/ data already collected 23 | #' if (all(grepl("^\\d+$", x))) { 24 | #' dr <- x %in% unlist(lapply(.usr, "[[", "user_id")) 25 | #' } else { 26 | #' dr <- x %in% unlist(lapply(.usr, "[[", "screen_name")) 27 | #' } 28 | #' if (any(dr)) { 29 | #' x <- x[!dr] 30 | #' complete("Omit ", cint(sum(dr)), " users already collected") 31 | #' } 32 | #' } 33 | #' tusrs <- length(x) 34 | #' info("This should take around ", cdbl(tusrs / 90000 * 15), " mins") 35 | #' 36 | #' ## for loop 37 | #' for (i in seq_along(.usr)) { 38 | #' ## skip if data already exists 39 | #' if (NROW(.usr[[i]]) > 0) { 40 | #' next 41 | #' } 42 | #' ## determine number of users to lookup 43 | #' if (90000 > length(x)) { 44 | #' n <- length(x) 45 | #' } else { 46 | #' n <- 90000 47 | #' } 48 | #' rl <- rate_limit2("lookup_users") 49 | #' if (is_null(rl)) { 50 | #' s <- 60 * 15 51 | #' r <- 900 * 100 52 | #' } else if (rl$remaining == 0) { 53 | #' s <- as.numeric(rl$reset, "secs") 54 | #' r <- rl$limit * 100 55 | #' } else { 56 | #' s <- 0 57 | #' r <- rl$remaining * 100 58 | #' } 59 | #' if (s > 0) { 60 | #' nap_wait(s) 61 | #' } 62 | #' if (n > r) { 63 | #' n <- r 64 | #' } 65 | #' ## lookup users 66 | #' .usr[[i]] <- lookup_users2(x[seq_len(n)]) 67 | #' ## drop the ones already looked up 68 | #' x <- x[-seq_len(n)] 69 | #' 70 | #' nrows_collected <- sum(dapr::vap_int(.usr, NROW)) 71 | #' spf <- repc("1", nchar(use_commas_int(tusrs))) 72 | #' complete("Collected data for ", cint(nrows_collected, spf), 73 | #' " users (", cdbl((tusrs - length(x)) / tusrs * 100, "11.1"), "%)") 74 | #' if (length(x) == 0) { 75 | #' break 76 | #' } 77 | #' 78 | #' } 79 | #' if (length(.usr) > 1L && is_null(.usr[[length(.usr)]])) { 80 | #' .usr <- .usr[-length(.usr)] 81 | #' } 82 | #' 83 | #' .usr 84 | #' } 85 | #' 86 | #' 87 | #' 88 | #' usr_rate_limit_sleep <- function() { 89 | #' tryCatch({ 90 | #' ## .tkn = current token 91 | #' if (!exists.rr(".tkn")) { 92 | #' .tkn <- rtweet::get_token() 93 | #' assign.rr(.tkn = .tkn) 94 | #' } else { 95 | #' .tkn <- get.rr(".tkn") 96 | #' } 97 | #' rl <- rate_limit2("lookup_users", token = .tkn) 98 | #' rlm <- (rl[["remaining"]] %||% 0L) 99 | #' if (rlm > 0) { 100 | #' return(rlm) 101 | #' } 102 | #' if (rlm == 0 && !inherits(.tkn, "bearer") && is_bearable(.tkn)) { 103 | #' .btkn <- rtweet::bearer_token(.tkn) 104 | #' rlb <- rate_limit2("lookup_users", token = .btkn) 105 | #' rlbm <- (rlb[["remaining"]] %||% 0L) 106 | #' if (rlbm > 0) { 107 | #' assign.rr(.tkn = .btkn) 108 | #' return(rlbm) 109 | #' } 110 | #' rlra <- as.numeric(rl[["reset"]] %||% 900, "secs") 111 | #' rlbra <- as.numeric(rlb[["reset"]] %||% 900, "secs") 112 | #' if (rlra <= rlbra) { 113 | #' s <- rlra 114 | #' assign.rr(.tkn = .tkn) 115 | #' } else { 116 | #' s <- rlbra 117 | #' assign.rr(.tkn = .btkn) 118 | #' } 119 | #' if (s < 0) { 120 | #' s <- 900 121 | #' } 122 | #' nap_wait(s + 1) 123 | #' return(15L) 124 | #' } 125 | #' 126 | #' if (rlm == 0 && inherits(.tkn, "bearer")) { 127 | #' .btkn <- rtweet::get_token() 128 | #' rlb <- rate_limit2("lookup_users", token = .btkn) 129 | #' rlbm <- (rlb[["remaining"]] %||% 0L) 130 | #' if (rlbm > 0) { 131 | #' assign.rr(.tkn = .btkn) 132 | #' return(rlbm) 133 | #' } 134 | #' rlra <- as.numeric(rl[["reset"]] %||% 900, "secs") 135 | #' rlbra <- as.numeric(rlb[["reset"]] %||% 900, "secs") 136 | #' if (rlra <= rlbra) { 137 | #' s <- rlra 138 | #' assign.rr(.tkn = .tkn) 139 | #' } else { 140 | #' s <- rlbra 141 | #' assign.rr(.tkn = .btkn) 142 | #' } 143 | #' if (s < 0) { 144 | #' s <- 900 145 | #' } 146 | #' nap_wait(s + 1) 147 | #' return(15L) 148 | #' } 149 | #' s <- as.numeric(rl[["reset"]] %||% 900, "secs") 150 | #' nap_wait(s + 1L) 151 | #' 15L 152 | #' }, 153 | #' interrupt = function(i) 1L, 154 | #' error = function(e) 1L) 155 | #' } 156 | -------------------------------------------------------------------------------- /R/followers.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' Get followers download 4 | #' 5 | #' Automate the collection of follower IDs for a large number of users (via 6 | #' \code{\link[rtweet]{get_followers}}) 7 | #' 8 | #' @param x Either a data frame or character vector containing user identifiers 9 | #' for which follower IDs will be retrieved. See details for more information 10 | #' about how this works. 11 | #' @param ... If \code{x} is a data frame this can be used to select columns 12 | #' containing the appropriate user identifying information (user_id and/or 13 | #' screen_name). This uses the tidyselect specification. If \code{x} is a 14 | #' character vector, then the first unnamed or non-argument named value is 15 | #' assumed to be labels (screen names) corresponding with \code{x}. 16 | #' @param output Optionally supply a preexisting output vector (like that returned 17 | #' by this function)–if NULL, the default, this function will start fresh. 18 | #' @param verbose Whether the function should print information/status updates, 19 | #' defaults to TRUE. Setting this to FALSE will silent most printing. 20 | #' @return Returns a list data frames with user and follower ID information. See 21 | #' \code{\link[rtweet]{get_followers}} for more information. 22 | #' @family followers 23 | #' @details This function attempts to retrieve up to 75,000 follower IDs every 24 | #' 15 minutes, sleeping between calls until Twitter's API rate 25 | #' limit resets. If your API token is linked to your own Twitter APP and has 26 | #' appropriate permissions to create a 'bearer token', then this function will 27 | #' collect closer to 150,000 followers per 15 mintues. 28 | #' @export 29 | get_followers_download <- function(x, ..., output = NULL, verbose = TRUE) { 30 | UseMethod("get_followers_download") 31 | } 32 | 33 | #' @export 34 | get_followers_download.data.frame <- function(x, ..., output = NULL, verbose = TRUE) { 35 | vars <- tidyselect::vars_select(names(x), ...) 36 | if (length(vars) == 0) { 37 | vars <- names(x) 38 | } 39 | x <- x[, vars, drop = FALSE] 40 | if (ncol(x) > 3L && any(c("user_id", "screen_name") %in% names(x))) { 41 | x <- x[, names(x) %in% c("user_id", "screen_name"), drop = FALSE] 42 | } 43 | stopifnot( 44 | nrow(x) > 0L, 45 | ncol(x) < 3L 46 | ) 47 | if (ncol(x) == 1L) { 48 | sns <- x[[1]] 49 | x <- x[[1]] 50 | } else if (all(grepl("^\\d+$", x[[1]]))) { 51 | sns <- x[[2]] 52 | x <- x[[1]] 53 | } else { 54 | sns <- x[[1]] 55 | x <- x[[2]] 56 | } 57 | get_followers_download(x, sns, output = output, verbose = verbose) 58 | } 59 | 60 | #' @export 61 | get_followers_download.character <- function(x, ..., output = NULL, verbose = TRUE) { 62 | ## prepare screen names and user IDs 63 | sns <- dots1(x) %||% x 64 | sns <- sns[!is.na(x) & !duplicated(x)] 65 | x <- x[!is.na(x) & !duplicated(x)] 66 | if (all(!grepl("^\\d+$", sns))) { 67 | sns <- paste0("@", sns) 68 | } 69 | mchars <- max(nchar(sns)) 70 | sns <- paste0(dapr::vap_chr(mchars - nchar(sns), ~ 71 | paste0(rep(" ", .x), collapse = "")), sns) 72 | 73 | ## prepare and create token(s) object 74 | token <- prep_tokens(rtweet::get_token()) 75 | 76 | ## if output vector is not supplied 77 | if (is_null(output)) { 78 | output <- vector("list", length(x)) 79 | } else { 80 | stopifnot( 81 | is.list(output), 82 | length(output) == length(x) 83 | ) 84 | } 85 | tusrs <- length(x) 86 | if (has_bearer(token)) { 87 | rlc <- 30 88 | } else { 89 | rlc <- 15 90 | } 91 | if (verbose) { 92 | dotdotdot("This should take around ", cdbl(tusrs / rlc * 15), " mins") 93 | } 94 | token <- determine_token(token, "get_followers") 95 | n <- flw_rate_limit_sleep(token) 96 | 97 | tryCatch({ 98 | ## for loop 99 | for (i in seq_along(output)) { 100 | ## skip if data already exists 101 | if (n_row(output[[i]]) > 0) { 102 | next 103 | } 104 | ## check rate limit remaining / change out token if possible 105 | while (n == 0) { 106 | token <- determine_token(token, "get_followers") 107 | n <- flw_rate_limit_sleep(token) 108 | } 109 | 110 | ## get followers list – and extract next cursor (page) value 111 | output[[i]] <- get_followers_warning_nap(x[i], token = this_token(token)) 112 | n <- n - 1L 113 | np <- next_cursor_download(output[[i]]) 114 | 115 | ## if user follows more than 5,000 accounts, make additional calls using np 116 | while (length(np) > 0 && !np %in% c(0, -1)) { 117 | while (n == 0) { 118 | token <- determine_token(token, "get_followers") 119 | n <- flw_rate_limit_sleep(token) 120 | } 121 | flwi <- get_followers_warning_nap(x[i], page = np, token = this_token(token)) 122 | n <- n - 1L 123 | np <- next_cursor_download(flwi) 124 | if (n_row(flwi) > 0) { 125 | output[[i]] <- rbind(output[[i]], flwi) 126 | } 127 | } 128 | if (verbose) { 129 | complete(pgray(rd_timestamp()), 130 | "", pgold(cint(n_row(output[[i]]), "10,000,000")), 131 | pgray(" friend IDs for "), pblue(sns[i]), " ", 132 | pgray(cli::symbol$ellipsis), pgray(" ("), 133 | pgray(cdbl(i / tusrs * 100, "1.1")), pgray("%)")) 134 | } 135 | } 136 | output 137 | }, 138 | interrupt = function(i) return(output), 139 | error = function(e) return(output) 140 | ) 141 | } 142 | 143 | 144 | next_cursor_download <- function(x) { 145 | tryCatch( 146 | rtweet::next_cursor(x), 147 | error = function(e) NULL 148 | ) 149 | } 150 | 151 | flw_rate_limit_sleep <- function(token) { 152 | rl <- rate_limit2(query = "get_followers", token = this_token(token)) 153 | rlm <- rl[["remaining"]] %||% 0L 154 | if (rlm > 0) { 155 | return(rlm) 156 | } 157 | s <- as.numeric(rl[["reset"]] %||% 900, "secs") 158 | nap_wait(s + 60) 159 | 15L 160 | } 161 | -------------------------------------------------------------------------------- /R/friends.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' Get friends download 4 | #' 5 | #' Automate the collection of friend IDs for a large number of users (via 6 | #' \code{\link[rtweet]{get_friends}}) 7 | #' 8 | #' @param x Either a data frame or character vector containing user identifiers 9 | #' for which friends IDs will be retrieved. See details for more information 10 | #' about how this works. 11 | #' @param ... If \code{x} is a data frame this can be used to select columns 12 | #' containing the appropriate user identifying information (user_id and/or 13 | #' screen_name). This uses the tidyselect specification. If \code{x} is a 14 | #' character vector, then the first unnamed or non-argument named value is 15 | #' assumed to be labels (screen names) corresponding with \code{x}. 16 | #' @param output Optionally supply a preexisting output vector (like that returned 17 | #' by this function)–if NULL, the default, this function will start fresh. 18 | #' @param verbose Whether the function should print information/status updates, 19 | #' defaults to TRUE. Setting this to FALSE will silent most printing. 20 | #' @return Returns a list data frames with user and friend ID information. See 21 | #' \code{\link[rtweet]{get_friends}} for more information. 22 | #' @family friends 23 | #' @details This function attempts to retrieve friends IDs for as many as 15 to 24 | #' 30 users every 15 minutes, sleeping between calls until Twitter's API rate 25 | #' limit resets. If your API token is linked to your own Twitter APP and has 26 | #' appropriate permissions to create a 'bearer token', then this function will 27 | #' make 30 calls every 15 minutes. If the token cannot be used via bearer 28 | #' authorization, then 15 calls are made every 15 minutes. 29 | #' 30 | #' It's worth noting that 15 calls does not mean 15 complete friend IDs. 31 | #' Twitter limits the number of returned friends returned by a single call to 32 | #' 5,000, which is more friends than 95% of Twitter users, but at times is 33 | #' frustrating because some users like Senator Amy Klobuchar follow over 34 | #' 170,000 accounts–that many accounts can take 45 mintues to collect! It's 35 | #' interesting, though, that Amy Klobuchar is also unusually prolific in both 36 | #' following Twitter users and writing/sponsoring legislation. 37 | #' @export 38 | get_friends_download <- function(x, ..., output = NULL, verbose = TRUE) { 39 | UseMethod("get_friends_download") 40 | } 41 | 42 | #' @export 43 | get_friends_download.data.frame <- function(x, ..., output = NULL, verbose = TRUE) { 44 | vars <- tidyselect::vars_select(names(x), ...) 45 | if (length(vars) == 0) { 46 | vars <- names(x) 47 | } 48 | x <- x[, vars, drop = FALSE] 49 | if (ncol(x) > 3L && any(c("user_id", "screen_name") %in% names(x))) { 50 | x <- x[, names(x) %in% c("user_id", "screen_name"), drop = FALSE] 51 | } 52 | stopifnot( 53 | nrow(x) > 0L, 54 | ncol(x) < 3L 55 | ) 56 | if (ncol(x) == 1L) { 57 | sns <- x[[1]] 58 | x <- x[[1]] 59 | } else if (all(grepl("^\\d+$", x[[1]]))) { 60 | sns <- x[[2]] 61 | x <- x[[1]] 62 | } else { 63 | sns <- x[[1]] 64 | x <- x[[2]] 65 | } 66 | get_friends_download(x, sns, output = output, verbose = verbose) 67 | } 68 | 69 | #' @export 70 | get_friends_download.character <- function(x, ..., output = NULL, verbose = TRUE) { 71 | ## prepare screen names and user IDs 72 | sns <- dots1(x) %||% x 73 | sns <- sns[!is.na(x) & !duplicated(x)] 74 | x <- x[!is.na(x) & !duplicated(x)] 75 | if (all(!grepl("^\\d+$", sns))) { 76 | sns <- paste0("@", sns) 77 | } 78 | mchars <- max(nchar(sns)) 79 | sns <- paste0(dapr::vap_chr(mchars - nchar(sns), ~ 80 | paste0(rep(" ", .x), collapse = "")), sns) 81 | 82 | ## prepare and create token(s) object 83 | token <- prep_tokens(rtweet::get_token()) 84 | 85 | ## if output vector is not supplied 86 | if (is_null(output)) { 87 | output <- vector("list", length(x)) 88 | } else { 89 | stopifnot( 90 | is.list(output), 91 | length(output) == length(x) 92 | ) 93 | } 94 | tusrs <- length(x) 95 | if (has_bearer(token)) { 96 | rlc <- 30 97 | } else { 98 | rlc <- 15 99 | } 100 | if (verbose) { 101 | dotdotdot("This should take around ", cdbl(tusrs / rlc * 15), " mins") 102 | } 103 | token <- determine_token(token, "get_friends") 104 | n <- fds_rate_limit_sleep(token) 105 | 106 | tryCatch({ 107 | ## for loop 108 | for (i in seq_along(output)) { 109 | ## skip if data already exists 110 | if (n_row(output[[i]]) > 0) { 111 | next 112 | } 113 | ## check rate limit remaining / change out token if possible 114 | while (n == 0) { 115 | token <- determine_token(token, "get_friends") 116 | n <- fds_rate_limit_sleep(token) 117 | } 118 | 119 | ## get friends list – and extract next cursor (page) value 120 | output[[i]] <- get_friends_warning_nap(x[i], token = this_token(token)) 121 | n <- n - 1L 122 | np <- next_cursor_download(output[[i]]) 123 | 124 | ## if user follows more than 5,000 accounts, make additional calls using np 125 | while (length(np) > 0 && !np %in% c(0, -1)) { 126 | while (n == 0) { 127 | token <- determine_token(token, "get_friends") 128 | n <- fds_rate_limit_sleep(token) 129 | } 130 | fdsi <- get_friends_warning_nap(x[i], page = np, token = this_token(token)) 131 | n <- n - 1L 132 | np <- next_cursor_download(fdsi) 133 | if (n_row(fdsi) > 0) { 134 | output[[i]] <- rbind(output[[i]], fdsi) 135 | } 136 | } 137 | if (verbose) { 138 | complete(pgray(rd_timestamp()), 139 | "", pgold(cint(n_row(output[[i]]), "100,000")), 140 | pgray(" friend IDs for "), pblue(sns[i]), " ", 141 | pgray(cli::symbol$ellipsis), pgray(" ("), 142 | pgray(cdbl(i / tusrs * 100, "1.1")), pgray("%)")) 143 | } 144 | } 145 | output 146 | }, 147 | interrupt = function(i) return(output), 148 | error = function(e) return(output) 149 | ) 150 | } 151 | 152 | 153 | next_cursor_download <- function(x) { 154 | tryCatch( 155 | rtweet::next_cursor(x), 156 | error = function(e) NULL 157 | ) 158 | } 159 | 160 | fds_rate_limit_sleep <- function(token) { 161 | rl <- rate_limit2(query = "get_friends", token = this_token(token)) 162 | rlm <- rl[["remaining"]] %||% 0L 163 | if (rlm > 0) { 164 | return(rlm) 165 | } 166 | s <- as.numeric(rl[["reset"]] %||% 900, "secs") 167 | nap_wait(s + 60) 168 | 15L 169 | } 170 | --------------------------------------------------------------------------------