├── LICENSE
├── .gitignore
├── man
    ├── figures
    │   └── logo.png
    ├── P.Rd
    ├── PP.Rd
    ├── rtweet.download-package.Rd
    ├── set_token.Rd
    ├── search_tweets_download.Rd
    ├── lookup_users_download.Rd
    ├── get_followers_download.Rd
    └── get_friends_download.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   └── test-friends.R
├── .Rbuildignore
├── R
    ├── envir.R
    ├── utils-P.R
    ├── count.R
    ├── sleep.R
    ├── rtweet.recipes-package.R
    ├── lookup_tweets.R
    ├── assert.R
    ├── rate_limit.R
    ├── set-token.R
    ├── search_tweets.R
    ├── wrappers.R
    ├── token.R
    ├── utils.R
    ├── lookup_users.R
    ├── lookup_users_og.R
    ├── followers.R
    └── friends.R
├── rtweet.download.Rproj
├── NAMESPACE
├── DESCRIPTION
├── LICENSE.md
├── README.md
└── README.Rmd


/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2019
2 | COPYRIGHT HOLDER: Michael W. Kearney
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | list-members.R
5 | TODO.R
6 | 


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mkearney/rtweet.download/HEAD/man/figures/logo.png


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(rtweet.recipes)
3 | 
4 | test_check("rtweet.recipes")
5 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^rtweet\.recipes\.Rproj$
2 | ^\.Rproj\.user$
3 | ^LICENSE\.md$
4 | ^README\.Rmd$
5 | ^rtweet\.download\.Rproj$
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test-friends.R:
--------------------------------------------------------------------------------
1 | test_that("get_friends_download works", {
2 |   expect_error(get_friends_download())
3 |   expect_error(get_friends_download(data.frame(x = rnorm(5), y = rnorm(5), z = rnorm(5))))
4 | })
5 | 


--------------------------------------------------------------------------------
/R/envir.R:
--------------------------------------------------------------------------------
1 | exists.rr <- function(x) exists(x, envir = .rr, inherits = FALSE)
2 | 
3 | get.rr <- function(x) get(x, envir = .rr, inherits = FALSE)
4 | 
5 | assign.rr <- function(...) {
6 |   mmap(assign, names(pretty_dots(...)), list(...), MoreArgs = list(envir = .rr, inherits = FALSE))
7 |   invisible()
8 | }
9 | 


--------------------------------------------------------------------------------
/man/P.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils-P.R
 3 | \name{\%P\%}
 4 | \alias{\%P\%}
 5 | \title{Paste operator}
 6 | \usage{
 7 | lhs \%P\% rhs
 8 | }
 9 | \description{
10 | See \code{tfse::\link[tfse]{\%P\%}} for details.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/PP.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils-P.R
 3 | \name{\%PP\%}
 4 | \alias{\%PP\%}
 5 | \title{Paste with Parameters operator}
 6 | \usage{
 7 | lhs \%PP\% rhs
 8 | }
 9 | \description{
10 | See \code{tfse::\link[tfse]{\%PP\%}} for details.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/rtweet.download.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: XeLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/R/utils-P.R:
--------------------------------------------------------------------------------
 1 | #' Paste operator
 2 | #'
 3 | #' See \code{tfse::\link[tfse]{\%P\%}} for details.
 4 | #'
 5 | #' @name %P%
 6 | #' @rdname P
 7 | #' @keywords internal
 8 | #' @export
 9 | #' @importFrom tfse %P%
10 | #' @usage lhs \%P\% rhs
11 | NULL
12 | 
13 | #' Paste with Parameters operator
14 | #'
15 | #' See \code{tfse::\link[tfse]{\%PP\%}} for details.
16 | #'
17 | #' @name %PP%
18 | #' @rdname PP
19 | #' @keywords internal
20 | #' @export
21 | #' @importFrom tfse %PP%
22 | #' @usage lhs \%PP\% rhs
23 | NULL
24 | 


--------------------------------------------------------------------------------
/R/count.R:
--------------------------------------------------------------------------------
 1 | determine_count <- function(token, query, first = FALSE) {
 2 |   if (is_bearer(token[["token"]])) {
 3 |     n <- switch(query,
 4 |       'search_tweets' = 45000,
 5 |       'lookup_users' = 30000,
 6 |       'get_friends' = 15
 7 |     )
 8 |   } else {
 9 |     n <- switch(query,
10 |       'search_tweets' = 18000,
11 |       'lookup_users' = 90000,
12 |       'get_friends' = 15
13 |     )
14 |   }
15 |   if (!first) {
16 |     return(n)
17 |   }
18 |   rl <- rate_limit2(token = token[["token"]], query = query)
19 |   if (rl[["remaining"]] == 0) {
20 |     return(n)
21 |   }
22 |   rl[["remaining"]] * (n / rl[["limit"]])
23 | }
24 | 


--------------------------------------------------------------------------------
/R/sleep.R:
--------------------------------------------------------------------------------
 1 | nap_wait <- function(s) {
 2 |   pb <- progress::progress_bar$new(
 3 |     format = crayon::blue("Waiting on rate limit [:bar] :eta"),
 4 |     total = 500, clear = TRUE, width = 60)
 5 |   pb$tick(0)
 6 |   for (i in seq_len(500)) {
 7 |     Sys.sleep(s / 500)
 8 |     pb$tick()
 9 |   }
10 |   invisible(TRUE)
11 | }
12 | 
13 | nap_retry <- function(s) {
14 |   pb <- progress::progress_bar$new(
15 |     format = crayon::blue("Waiting to retry [:bar] :eta"),
16 |     total = 500, clear = TRUE, width = 60)
17 |   pb$tick(0)
18 |   for (i in seq_len(500)) {
19 |     Sys.sleep(s / 500)
20 |     pb$tick()
21 |   }
22 |   invisible(TRUE)
23 | }
24 | 


--------------------------------------------------------------------------------
/R/rtweet.recipes-package.R:
--------------------------------------------------------------------------------
 1 | #' @keywords internal
 2 | #' @import rtweet
 3 | "_PACKAGE"
 4 | 
 5 | # The following block is used by usethis to automatically manage
 6 | # roxygen namespace tags. Modify with care!
 7 | ## usethis namespace: start
 8 | ## usethis namespace: end
 9 | NULL
10 | 
11 | .rr <- new.env(parent = emptyenv())
12 | 
13 | .onLoad <- function(libname, pkgname) {
14 |   # op <- options()
15 |   # op.rtweet.recipes <- list(
16 |   #   rtweet.recipes.token_path = "~/R-dev"
17 |   # )
18 |   # toset <- !(names(op.rtweet.recipes) %in% names(op))
19 |   # if (any(toset)) {
20 |   #   options(op.rtweet.recipes[toset])
21 |   # }
22 |   invisible()
23 | }
24 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(get_followers_download,character)
 4 | S3method(get_followers_download,data.frame)
 5 | S3method(get_friends_download,character)
 6 | S3method(get_friends_download,data.frame)
 7 | S3method(lookup_users_download,character)
 8 | S3method(lookup_users_download,data.frame)
 9 | S3method(set_token,Token)
10 | S3method(set_token,bearer)
11 | S3method(set_token,character)
12 | export("%P%")
13 | export("%PP%")
14 | export(get_followers_download)
15 | export(get_friends_download)
16 | export(lookup_users_download)
17 | export(search_tweets_download)
18 | export(set_token)
19 | import(rtweet)
20 | importFrom(tfse,"%P%")
21 | importFrom(tfse,"%PP%")
22 | 


--------------------------------------------------------------------------------
/R/lookup_tweets.R:
--------------------------------------------------------------------------------
 1 | lookup_tweets_download <- function(x, ..., tokens = NULL) {
 2 |   token <- prep_tokens(token)
 3 |   outpt <- vector("list")
 4 |   first <- TRUE
 5 |   while (length(x) > 0) {
 6 |     token <- determine_token(token, "lookup_users")
 7 |     count <- determine_count(token, "lookup_users", first = first)
 8 |     outpt[[length(outpt) + 1L]] <- lookup_users_warning_nap(
 9 |       x[seq_len(count)], ..., token = token$token)
10 |     first <- FALSE
11 |   }
12 |   outpt
13 | }
14 | 
15 | 
16 | prep_twtout <- function(.twt = NULL, n, token) {
17 |   num <- 18000
18 |   if ("bearer" %in% names(token)) {
19 |     num <- c(num, 45000)
20 |   }
21 |   len <- ceiling(n / sum(num)) + 2
22 |   if (!is_null(.stw)) {
23 |     if (length(.stw) < len) {
24 |       .stw <- c(.stw, vector("list", len - length(.stw)))
25 |     }
26 |     return(.stw)
27 |   }
28 |   vector("list", len)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/rtweet.download-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rtweet.recipes-package.R
 3 | \docType{package}
 4 | \name{rtweet.download-package}
 5 | \alias{rtweet.download}
 6 | \alias{rtweet.download-package}
 7 | \title{rtweet.download: Automate Large Downloads with 'rtweet'}
 8 | \description{
 9 | \if{html}{\figure{logo.png}{options: align='right' alt='logo' width='120'}}
10 | 
11 | Robust tools for automating large and/or time-consuming tasks
12 |     involving the collection of Twitter data via 'rtweet' <https://rtweet.info>.
13 | }
14 | \seealso{
15 | Useful links:
16 | \itemize{
17 |   \item \url{https://github.com/mkearney/rtweet.download}
18 |   \item Report bugs at \url{https://github.com/mkearney/rtweet.download/issues}
19 | }
20 | 
21 | }
22 | \author{
23 | \strong{Maintainer}: Michael W. Kearney \email{kearneymw@missouri.edu} (\href{https://orcid.org/0000-0002-0730-4694}{ORCID})
24 | 
25 | }
26 | \keyword{internal}
27 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: rtweet.download
 2 | Title: Automate Large Downloads with 'rtweet'
 3 | Version: 0.0.1
 4 | Authors@R: c(
 5 |     person("Michael W.", "Kearney", ,
 6 |       email = "kearneymw@missouri.edu", role = c("aut", "cre"),
 7 |       comment = c(ORCID = "0000-0002-0730-4694"))
 8 |     ## add contributor template (middle name/initial optional)
 9 |     #person("First Middle", "Last", ,
10 |     #email = "email@address.com", role = c("ctb"))
11 |     )
12 | Description: Robust tools for automating large and/or time-consuming tasks
13 |     involving the collection of Twitter data via 'rtweet' <https://rtweet.info>.
14 | Depends:
15 |     R (>= 3.1.0)
16 | License: MIT + file LICENSE
17 | Encoding: UTF-8
18 | LazyData: true
19 | Roxygen: list(markdown = TRUE)
20 | RoxygenNote: 7.0.2
21 | Imports: 
22 |     rtweet,
23 |     dapr,
24 |     crayon,
25 |     progress,
26 |     tidyselect,
27 |     clisymbols,
28 |     tfse,
29 |     tibble
30 | URL: https://github.com/mkearney/rtweet.download
31 | BugReports: https://github.com/mkearney/rtweet.download/issues
32 | Suggests: 
33 |     testthat (>= 2.1.0)
34 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2019 Michael W. Kearney
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/man/set_token.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/set-token.R
 3 | \name{set_token}
 4 | \alias{set_token}
 5 | \title{Set token}
 6 | \usage{
 7 | set_token(x)
 8 | }
 9 | \arguments{
10 | \item{x}{Either a token or path to a token. If path, the token is read using
11 | \code{readRDS} (this is default {rtweet} behavior). If token, it is saved
12 | in the current working directory as ".rtweet_token"}
13 | }
14 | \value{
15 | The token is invisibly returned but more importantly the environment
16 | variable "TWITTER_PAT" is set to point toward the saved token file. This
17 | will be reset at the end of the session.
18 | }
19 | \description{
20 | Stores Twitter API token information for the duration of the session
21 | }
22 | \examples{
23 | 
24 | ## if your system already has an environment variable for an rtweet token,
25 | ## this will return the path
26 | (pat <- Sys.getenv("TWITTER_PAT"))
27 | 
28 | ## if your system doesn't have this environment variable OR if you wish to
29 | ## override this value, then enter the desired path or token object
30 | #pat <- "/path/to/rtweet-token.rds"
31 | 
32 | ## and then set the token for use for the remainder of the session
33 | #set_token(pat)
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/man/search_tweets_download.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/search_tweets.R
 3 | \name{search_tweets_download}
 4 | \alias{search_tweets_download}
 5 | \title{Search tweets download}
 6 | \usage{
 7 | search_tweets_download(q, n, ..., .stw = NULL)
 8 | }
 9 | \arguments{
10 | \item{q}{Search query to be used to find matching tweets from the past 6-9
11 | days. See \code{\link[rtweet]{search_tweets}}) for more information on
12 | Twitter search query syntax.}
13 | 
14 | \item{n}{Number of desired tweets to return. See details for more information
15 | about relevant rate limits.}
16 | 
17 | \item{...}{Other parameters are passed to
18 | \code{\link[rtweet]{search_tweets}}).}
19 | 
20 | \item{.stw}{Optionally supply a preexisting output vector (like that returned
21 | by this function)–if NULL, the default, this function will start fresh.}
22 | }
23 | \value{
24 | Returns a list data frames of search data
25 | }
26 | \description{
27 | Automate the data collection for large Twitter searches (via
28 | \code{\link[rtweet]{search_tweets}})
29 | }
30 | \details{
31 | This function attempts to search and collect data for up to 18,000
32 | (when using the default rtweet authorization token) or 63,000 (when using
33 | token generated from your own Twitter app with sufficient bearer token-
34 | level permissions) statuses every 15 minutes, sleeping between calls unti
35 | Twitter's API rate limit resets.
36 | }
37 | \concept{.stw}
38 | 


--------------------------------------------------------------------------------
/man/lookup_users_download.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lookup_users.R
 3 | \name{lookup_users_download}
 4 | \alias{lookup_users_download}
 5 | \title{Lookup users download}
 6 | \usage{
 7 | lookup_users_download(x, output = NULL, verbose = TRUE)
 8 | }
 9 | \arguments{
10 | \item{x}{Either a data frame or character vector containing user identifiers
11 | for which users will be retrieved. See details for more information
12 | about how this works.}
13 | 
14 | \item{output}{Optionally supply a preexisting output vector (like that returned
15 | by this function)–if NULL, the default, this function will start fresh.}
16 | 
17 | \item{verbose}{Whether the function should print information/status updates,
18 | defaults to TRUE. Setting this to FALSE will silent most printing.}
19 | }
20 | \value{
21 | Returns a list data frames with looked up user information. See
22 | \code{\link[rtweet]{lookup_users}} for more information.
23 | }
24 | \description{
25 | Automate users data collection for a large number of users (via
26 | \code{\link[rtweet]{lookup_users}})
27 | }
28 | \details{
29 | This function attempts to lookup users information for as many as
30 | 90,000 users every 15 minutes, sleeping between calls until Twitter's API
31 | rate limit resets.
32 | 
33 | It's worth noting that information on many users will not be returned due
34 | to changed screen names, account suspensions, deactivations, etc.
35 | }
36 | \concept{users}
37 | 


--------------------------------------------------------------------------------
/R/assert.R:
--------------------------------------------------------------------------------
 1 | 
 2 | assert_that <- function(..., env = parent.frame(), msg = NULL) {
 3 |   res <- see_if(..., env = env, msg = msg)
 4 |   if (res) return(TRUE)
 5 | 
 6 |   stop(assert_error(attr(res, "msg")))
 7 | }
 8 | 
 9 | assert_error <- function (message, call = NULL) {
10 |   class <- c("assert_error", "simpleError", "error", "condition")
11 |   structure(list(message = message, call = call), class = class)
12 | }
13 | 
14 | see_if <- function(..., env = parent.frame(), msg = NULL) {
15 |   asserts <- eval(substitute(alist(...)))
16 | 
17 |   for (assertion in asserts) {
18 |     res <- tryCatch({
19 |       eval(assertion, env)
20 |     }, assert_error = function(e) {
21 |       structure(FALSE, msg = e$message)
22 |     })
23 | 
24 |     if (!res) {
25 |       if (is.null(msg))
26 |         msg <- get_message(res, assertion, env)
27 |       return(structure(FALSE, msg = msg))
28 |     }
29 |   }
30 | 
31 |   res
32 | }
33 | 
34 | 
35 | has_msg <- function(x) !is.null(attr(x, "msg", exact = TRUE))
36 | 
37 | get_message <- function(res, call, env = parent.frame()) {
38 |   if (has_msg(res)) {
39 |     return(attr(res, "msg"))
40 |   }
41 |   f <- eval(call[[1]], env)
42 |   if (!is.primitive(f)) call <- match.call(f, call)
43 |   fname <- deparse(call[[1]])
44 |   fail <- on_fail(f) %||% base_fs[[fname]] %||% fail_default
45 |   fail(call, env)
46 | }
47 | 
48 | fail_default <- function(call, env) {
49 |   call_string <- deparse(call, width.cutoff = 60L)
50 |   if (length(call_string) > 1L) {
51 |     call_string <- paste0(call_string[1L], "...")
52 |   }
53 |   paste0(call_string, " is not TRUE")
54 | }
55 | 
56 | on_fail <- function(x) attr(x, "fail")
57 | 
58 | base_fs <- new.env(parent = emptyenv())
59 | 


--------------------------------------------------------------------------------
/R/rate_limit.R:
--------------------------------------------------------------------------------
 1 | 
 2 | determine_token <- function(token, query) {
 3 |   ## if next_token already exists
 4 |   if ("next_token" %in% names(token)) {
 5 |     if (token[["next_token"]] == "user" && "bearer" %in% names(token)) {
 6 |       token[["next_token"]] <- "bearer"
 7 |       token[["token"]] <- "user"
 8 |       return(token)
 9 |     }
10 |     if (token[["next_token"]] == "bearer") {
11 |       token[["token"]] <- "bearer"
12 |       token[["next_token"]] <- "user"
13 |       return(token)
14 |     }
15 |     ## if only user token, then don't need to change anything
16 |     return(token)
17 |   }
18 | 
19 | 
20 |   ## if not bearable, then just use the one
21 |   if (!"bearer" %in% names(token)) {
22 |     token[["token"]]      <- token[["user"]]
23 |     token[["next_token"]] <- token[["user"]]
24 |     return(token)
25 |   }
26 |   ## determine next token
27 |   rlu <- rate_limit2(token = token[["user"]], query = query)
28 |   rlb <- rate_limit2(token = token[["bearer"]], query = query)
29 |   if (isTRUE(rlb[["remaining"]] > rlu[["remaining"]])) {
30 |     token[["token"]]      <- token[["bearer"]]
31 |     token[["next_token"]] <- token[["user"]]
32 |   } else if (isTRUE(rlu[["remaining"]] > rlb[["remaining"]])) {
33 |     token[["token"]]      <- token[["user"]]
34 |     token[["next_token"]] <- token[["bearer"]]
35 |   } else if (isTRUE(rlu[["reset"]] <= rlb[["reset"]])) {
36 |     token[["token"]]      <- token[["user"]]
37 |     token[["next_token"]] <- token[["bearer"]]
38 |   } else {
39 |     token[["token"]]      <- token[["bearer"]]
40 |     token[["next_token"]] <- token[["user"]]
41 |   }
42 |   token
43 | }
44 | 
45 | prep_tokens <- function(token = NULL) {
46 |   token <- token %||% rtweet::get_token()
47 |   if (!is_bearable(token)) {
48 |     return(list(user = token))
49 |   }
50 |   list(user = token, bearer = rtweet::bearer_token(token))
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/man/get_followers_download.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/followers.R
 3 | \name{get_followers_download}
 4 | \alias{get_followers_download}
 5 | \title{Get followers download}
 6 | \usage{
 7 | get_followers_download(x, ..., output = NULL, verbose = TRUE)
 8 | }
 9 | \arguments{
10 | \item{x}{Either a data frame or character vector containing user identifiers
11 | for which follower IDs will be retrieved. See details for more information
12 | about how this works.}
13 | 
14 | \item{...}{If \code{x} is a data frame this can be used to select columns
15 | containing the appropriate user identifying information (user_id and/or
16 | screen_name). This uses the tidyselect specification. If \code{x} is a
17 | character vector, then the first unnamed or non-argument named value is
18 | assumed to be labels (screen names) corresponding with \code{x}.}
19 | 
20 | \item{output}{Optionally supply a preexisting output vector (like that returned
21 | by this function)–if NULL, the default, this function will start fresh.}
22 | 
23 | \item{verbose}{Whether the function should print information/status updates,
24 | defaults to TRUE. Setting this to FALSE will silent most printing.}
25 | }
26 | \value{
27 | Returns a list data frames with user and follower ID information. See
28 | \code{\link[rtweet]{get_followers}} for more information.
29 | }
30 | \description{
31 | Automate the collection of follower IDs for a large number of users (via
32 | \code{\link[rtweet]{get_followers}})
33 | }
34 | \details{
35 | This function attempts to retrieve up to 75,000 follower IDs every
36 | 15 minutes, sleeping between calls until Twitter's API rate
37 | limit resets. If your API token is linked to your own Twitter APP and has
38 | appropriate permissions to create a 'bearer token', then this function will
39 | collect closer to 150,000 followers per 15 mintues.
40 | }
41 | \concept{followers}
42 | 


--------------------------------------------------------------------------------
/R/set-token.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Set token
 3 | #'
 4 | #' Stores Twitter API token information for the duration of the session
 5 | #'
 6 | #' @param x Either a token or path to a token. If path, the token is read using
 7 | #'   \code{readRDS} (this is default {rtweet} behavior). If token, it is saved
 8 | #'   in the current working directory as ".rtweet_token"
 9 | #' @return The token is invisibly returned but more importantly the environment
10 | #'   variable "TWITTER_PAT" is set to point toward the saved token file. This
11 | #'   will be reset at the end of the session.
12 | #' @examples
13 | #'
14 | #' ## if your system already has an environment variable for an rtweet token,
15 | #' ## this will return the path
16 | #' (pat <- Sys.getenv("TWITTER_PAT"))
17 | #'
18 | #' ## if your system doesn't have this environment variable OR if you wish to
19 | #' ## override this value, then enter the desired path or token object
20 | #' #pat <- "/path/to/rtweet-token.rds"
21 | #'
22 | #' ## and then set the token for use for the remainder of the session
23 | #' #set_token(pat)
24 | #'
25 | #' @export
26 | set_token <- function(x) {
27 |   UseMethod("set_token")
28 | }
29 | 
30 | #' @export
31 | set_token.character <- function(x) {
32 |   if (!file.exists(x)) {
33 |     stop("Couldn't find token file", call. = FALSE)
34 |   }
35 |   stopifnot(
36 |     is_usertoken(readRDS(x)) || is_bearer(readRDS(x))
37 |   )
38 |   Sys.setenv(TWITTER_PAT = x)
39 |   complete("Environment variable set: 'TWITTER_PAT=" %P% x, "'")
40 |   invisible(readRDS(x))
41 | }
42 | 
43 | #' @export
44 | set_token.Token <- function(x) {
45 |   saveRDS(x, ".rtweet_token.rds")
46 |   Sys.setenv(TWITTER_PAT = ".rtweet_token.rds")
47 |   complete("Token saved and environment variable set: 'TWITTER_PAT=.rtweet_token.rds'")
48 |   invisible(x)
49 | }
50 | 
51 | #' @export
52 | set_token.bearer <- function(x) {
53 |   saveRDS(x, ".rtweet_token.rds")
54 |   Sys.setenv(TWITTER_PAT = ".rtweet_token.rds")
55 |   complete("Token saved and environment variable set: 'TWITTER_PAT=.rtweet_token.rds'")
56 |   invisible(x)
57 | }
58 | 


--------------------------------------------------------------------------------
/R/search_tweets.R:
--------------------------------------------------------------------------------
 1 | #' Search tweets download
 2 | #'
 3 | #' Automate the data collection for large Twitter searches (via
 4 | #' \code{\link[rtweet]{search_tweets}})
 5 | #'
 6 | #' @param q Search query to be used to find matching tweets from the past 6-9
 7 | #'   days. See \code{\link[rtweet]{search_tweets}}) for more information on
 8 | #'   Twitter search query syntax.
 9 | #' @param n Number of desired tweets to return. See details for more information
10 | #'   about relevant rate limits.
11 | #' @param ... Other parameters are passed to
12 | #'   \code{\link[rtweet]{search_tweets}}).
13 | #' @param .stw Optionally supply a preexisting output vector (like that returned
14 | #'   by this function)–if NULL, the default, this function will start fresh.
15 | #' @return Returns a list data frames of search data
16 | #' @family .stw
17 | #' @details This function attempts to search and collect data for up to 18,000
18 | #'   (when using the default rtweet authorization token) or 63,000 (when using
19 | #'   token generated from your own Twitter app with sufficient bearer token-
20 | #'   level permissions) statuses every 15 minutes, sleeping between calls unti
21 | #'   Twitter's API rate limit resets.
22 | #' @export
23 | search_tweets_download <- function(q, n, ..., .stw = NULL) {
24 |   total <- n
25 |   token <- prep_tokens(rtweet::get_token())
26 |   .stw  <- prep_stwout(.stw, n, token)
27 |   first <- TRUE
28 |   while (total > 0) {
29 |     token <- determine_token(token, "search_tweets")
30 |     count <- determine_count(token, "search_tweets", first = first)
31 |     .stw[[length(.stw) + 1L]] <- search_tweets_warning_nap(q,
32 |       n = count, ..., token = token$token)
33 |     total <- total - count
34 |     nrows_collected <- sum(dapr::vap_int(.stw, NROW))
35 |     spf <- repc("1", nchar(use_commas_int(n)))
36 |     complete("Collected data for ", cint(nrows_collected, spf),
37 |       " tweets (", cdbl((n - total) / n * 100, "11.1"), "%)")
38 |     first <- FALSE
39 |   }
40 |   .stw
41 | }
42 | 
43 | prep_stwout <- function(.stw = NULL, n, token) {
44 |   num <- 18000
45 |   if ("bearer" %in% names(token)) {
46 |     num <- c(num, 45000)
47 |   }
48 |   len <- ceiling(n / sum(num)) + 2
49 |   if (!is_null(.stw)) {
50 |     if (length(.stw) < len) {
51 |       .stw <- c(.stw, vector("list", len - length(.stw)))
52 |     }
53 |     return(.stw)
54 |   }
55 |   vector("list", len)
56 | }
57 | 


--------------------------------------------------------------------------------
/man/get_friends_download.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/friends.R
 3 | \name{get_friends_download}
 4 | \alias{get_friends_download}
 5 | \title{Get friends download}
 6 | \usage{
 7 | get_friends_download(x, ..., output = NULL, verbose = TRUE)
 8 | }
 9 | \arguments{
10 | \item{x}{Either a data frame or character vector containing user identifiers
11 | for which friends IDs will be retrieved. See details for more information
12 | about how this works.}
13 | 
14 | \item{...}{If \code{x} is a data frame this can be used to select columns
15 | containing the appropriate user identifying information (user_id and/or
16 | screen_name). This uses the tidyselect specification. If \code{x} is a
17 | character vector, then the first unnamed or non-argument named value is
18 | assumed to be labels (screen names) corresponding with \code{x}.}
19 | 
20 | \item{output}{Optionally supply a preexisting output vector (like that returned
21 | by this function)–if NULL, the default, this function will start fresh.}
22 | 
23 | \item{verbose}{Whether the function should print information/status updates,
24 | defaults to TRUE. Setting this to FALSE will silent most printing.}
25 | }
26 | \value{
27 | Returns a list data frames with user and friend ID information. See
28 | \code{\link[rtweet]{get_friends}} for more information.
29 | }
30 | \description{
31 | Automate the collection of friend IDs for a large number of users (via
32 | \code{\link[rtweet]{get_friends}})
33 | }
34 | \details{
35 | This function attempts to retrieve friends IDs for as many as 15 to
36 | 30 users every 15 minutes, sleeping between calls until Twitter's API rate
37 | limit resets. If your API token is linked to your own Twitter APP and has
38 | appropriate permissions to create a 'bearer token', then this function will
39 | make 30 calls every 15 minutes. If the token cannot be used via bearer
40 | authorization, then 15 calls are made every 15 minutes.
41 | 
42 | It's worth noting that 15 calls does not mean 15 complete friend IDs.
43 | Twitter limits the number of returned friends returned by a single call to
44 | 5,000, which is more friends than 95\% of Twitter users, but at times is
45 | frustrating because some users like Senator Amy Klobuchar follow over
46 | 170,000 accounts–that many accounts can take 45 mintues to collect! It's
47 | interesting, though, that Amy Klobuchar is also unusually prolific in both
48 | following Twitter users and writing/sponsoring legislation.
49 | }
50 | \concept{friends}
51 | 


--------------------------------------------------------------------------------
/R/wrappers.R:
--------------------------------------------------------------------------------
 1 | run_it_back <- function(.f, .times = 3L) {
 2 |   eval(parse(text = paste0('function(...) {
 3 |     .i <- 0L
 4 |     while (
 5 |       is.null(x <- tryCatch(', .f, '(...), error = function(e) NULL)) &&
 6 |         .i <= ', .times, '
 7 |     ) {
 8 |       .i <- .i + 1L
 9 |       Sys.sleep(1.5)
10 |     }
11 |     x
12 |   }')))
13 | }
14 | 
15 | get_timeline2  <- run_it_back("rtweet::get_timeline")
16 | 
17 | rate_limit2    <- run_it_back("rtweet::rate_limit")
18 | 
19 | lookup_users2  <- run_it_back("rtweet::lookup_users")
20 | 
21 | search_tweets2 <- run_it_back("rtweet::search_tweets")
22 | 
23 | search_users2  <- run_it_back("rtweet::search_users")
24 | 
25 | get_friends2   <- run_it_back("rtweet::get_friends")
26 | 
27 | get_followers2 <- run_it_back("rtweet::get_followers")
28 | 
29 | get_favorites2 <- run_it_back("rtweet::get_favorites")
30 | 
31 | warning_fun <- function(w, nap = 30) {
32 |   if (any(grepl("Rate limit|\\b88\\b", w))) {
33 |     nap_retry(nap)
34 |   }
35 | }
36 | 
37 | exhaust_it <- function(.fun, .x, ..., .token) {
38 |   args <- list("placeholder", ..., token = .token)
39 |   o <- vector("list", length(.x))
40 |   for (i in seq_along(.x)) {
41 |     args[[1]] <- .x[[i]]
42 |     o[[i]] <- do.call(.fun, args)
43 |   }
44 |   o
45 | }
46 | # o <- exhaust_it(rtweet::search_tweets, c("#rstats", "#rtweet"), n = 200,
47 | #   .token = rtweet::get_token())
48 | 
49 | fun_warning_nap <- function(.fn, .f, iter = 5, nap = 30) {
50 |   .f <- c(.f, paste0(.fn, "_w", seq_len(iter - 1)))
51 |   body <- paste0('    ', .fn, '_w', seq_len(iter), ' <- function(...) {
52 |       tryCatch(
53 |         ', .f, '(...),
54 |         warning = function(w) {
55 |           if (any(grepl("Rate limit|\\b88\\b|too many", w, ignore.case = TRUE))) {
56 |             nap_retry(', nap, ')
57 |           }
58 |           ', .f, '(...)
59 |         },
60 |         error = function(e) {
61 |           Sys.sleep(1.0)
62 |           tryCatch(', .f, '(...), error = function(e) tibble::tibble())
63 |         }
64 |       )
65 |     }', collapse = "\n")
66 | 
67 |   eval(parse(text = paste0(
68 |     'function(...) {\n',
69 |     body, '\n',
70 | 
71 |     '    ', .fn, '_w', iter, '(...)\n}'
72 |   )))
73 | }
74 | 
75 | get_friends_warning_nap   <- fun_warning_nap("get_friends",   "get_friends",   5)
76 | 
77 | get_followers_warning_nap <- fun_warning_nap("get_followers", "get_followers", 5)
78 | 
79 | search_tweets_warning_nap <- fun_warning_nap("search_tweets", "search_tweets", 5)
80 | 
81 | get_timeline_warning_nap  <- fun_warning_nap("get_timeline",  "get_timeline",  5)
82 | 
83 | get_favorites_warning_nap <- fun_warning_nap("get_favorites", "get_favorites", 5)
84 | 
85 | lookup_users_warning_nap <- fun_warning_nap("lookup_users", "lookup_users", 5)
86 | 


--------------------------------------------------------------------------------
/R/token.R:
--------------------------------------------------------------------------------
  1 | determine_token <- function(token, query) {
  2 |   ## if next_token already exists
  3 |   if ("next_token" %in% names(token)) {
  4 |     currentoken      <- token$token
  5 |     token$token      <- token$next_token
  6 |     token$next_token <- currentoken
  7 |     return(token)
  8 |   }
  9 |   ## if not bearable, then just use the one
 10 |   if (!"bearer" %in% names(token)) {
 11 |     token$token      <- "user"
 12 |     token$next_token <- "user"
 13 |     return(token)
 14 |   }
 15 |   ## determine next token
 16 |   rlu <- rate_limit2(token = token$user, query = query)
 17 |   rlb <- rate_limit2(token = token$bearer, query = query)
 18 |   if (isTRUE(rlb$remaining > rlu$remaining)) {
 19 |     token$token      <- "bearer"
 20 |     token$next_token <- "user"
 21 |   } else if (isTRUE(rlu$remaining > rlb$remaining)) {
 22 |     token$token      <- "user"
 23 |     token$next_token <- "bearer"
 24 |   } else if (isTRUE(rlu$reset_at <= rlb$reset_at)) {
 25 |     token$token      <- "user"
 26 |     token$next_token <- "bearer"
 27 |   } else {
 28 |     token$token      <- "bearer"
 29 |     token$next_token <- "user"
 30 |   }
 31 |   token
 32 | }
 33 | this_token <- function(token) token[[token[["token"]]]]
 34 | 
 35 | next_token <- function(token) token[[token[["next_token"]]]]
 36 | 
 37 | determine_token_ <- function(token, query) {
 38 |   ## if next_token already exists
 39 |   if ("next_token" %in% names(token)) {
 40 |     if (token[["next_token"]] == "user" && "bearer" %in% names(token)) {
 41 |       token[["next_token"]] <- "bearer"
 42 |       token[["token"]] <- "user"
 43 |       return(token)
 44 |     }
 45 |     if (token[["next_token"]] == "bearer") {
 46 |       token[["next_token"]] <- "bearer"
 47 |       return(token)
 48 |     }
 49 |     return(token)
 50 |   }
 51 |   ## if not bearable, then just use the one
 52 |   if (!"bearer" %in% names(token)) {
 53 |     token[["token"]]      <- token[["user"]]
 54 |     token[["next_token"]] <- token[["user"]]
 55 |     return(token)
 56 |   }
 57 |   ## determine next token
 58 |   rlu <- rate_limit2(token = token[["user"]], query = query)
 59 |   rlb <- rate_limit2(token = token[["bearer"]], query = query)
 60 |   if (isTRUE(rlb[["remaining"]] > rlu[["remaining"]])) {
 61 |     token[["token"]]      <- token[["bearer"]]
 62 |     token[["next_token"]] <- token[["user"]]
 63 |   } else if (isTRUE(rlu[["remaining"]] > rlb[["remaining"]])) {
 64 |     token[["token"]]      <- token[["user"]]
 65 |     token[["next_token"]] <- token[["bearer"]]
 66 |   } else if (isTRUE(rlu[["reset"]] <= rlb[["reset"]])) {
 67 |     token[["token"]]      <- token[["user"]]
 68 |     token[["next_token"]] <- token[["bearer"]]
 69 |   } else {
 70 |     token[["token"]]      <- token[["bearer"]]
 71 |     token[["next_token"]] <- token[["user"]]
 72 |   }
 73 |   token
 74 | }
 75 | 
 76 | has_bearer <- function(token) {
 77 |   isTRUE("bearer" %in% names(token))
 78 | }
 79 | 
 80 | prep_tokens <- function(token) {
 81 |   token <- add_bearable_attr(token)
 82 |   if (!is_bearable(token)) {
 83 |     return(list(user = token))
 84 |   }
 85 |   list(user = token, bearer = rtweet::bearer_token(token))
 86 | }
 87 | 
 88 | has_bearable_attr <- function(x) isTRUE("is_bearable" %in% names(attributes(x)))
 89 | 
 90 | add_bearable_attr <- function(token) {
 91 |   if (has_bearable_attr(token)) {
 92 |     return(token)
 93 |   }
 94 |   attr(token, "is_bearable") <- isTRUE(grepl("read-write", rtweet:::api_access_level(token)))
 95 |   token
 96 | }
 97 | 
 98 | 
 99 | is_bearable <- function(token = NULL) {
100 |   isTRUE(attr(token, "is_bearable"))
101 | }
102 | 
103 | is_bearer <- function(x) inherits(x, "bearer")
104 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | pgray <- function(x) {
  2 |   crayon::make_style("#444444")(x)
  3 | }
  4 | pgold <- function(x) {
  5 |   crayon::make_style("#884400")(x)
  6 | }
  7 | pblue <- function(x) {
  8 |   crayon::make_style("#001199")(x)
  9 | }
 10 | complete <- function(..., fill = TRUE) {
 11 |   cat(paste0(crayon::green(clisymbols::symbol$tick), " ", ...), fill = fill)
 12 | }
 13 | 
 14 | info <- function(..., fill = TRUE) {
 15 |   cat(paste0(crayon::magenta(clisymbols::symbol$info), " ", ...), fill = fill)
 16 | }
 17 | 
 18 | dotdotdot <- function(..., fill = TRUE) {
 19 |   cat(paste0(crayon::magenta(clisymbols::symbol$ellipsis), " ", ...), fill = fill)
 20 | }
 21 | 
 22 | this <- function(..., fill = TRUE) {
 23 |   cat(paste0(crayon::blue(clisymbols::symbol$arrow_right), " ", ...), fill = fill)
 24 | }
 25 | 
 26 | 
 27 | cat_line <- function(...) {
 28 |   cat(paste0(..., "\n"), sep = "")
 29 | }
 30 | 
 31 | cint2 <- function(x, sp = NULL) {
 32 |   if (!is.integer(x)) {
 33 |     x <- round(x, 0)
 34 |   }
 35 |   if (is.null(sp) || sp < max(nchar(x))) {
 36 |     sp <- max(nchar(x))
 37 |   }
 38 |   x <- sub("\\.\\d+", "", sprintf(paste0("%", sp, "f"), x))
 39 |   while (grepl("\\d{4}", x)) {
 40 |     x <- sub("(?<=\\d)((?=\\d{3}$)|(?=\\d{3},))", ",", x, perl = TRUE)
 41 |   }
 42 |   x
 43 | }
 44 | 
 45 | use_commas_dbl <- function(x) {
 46 |   x <- as.character(x)
 47 |   dec <- sub("^[^\\.]+(?=\\.)", "", x, perl = TRUE)
 48 |   x <- tfse::regmatches_first(x, "^[^\\.]+")
 49 |   while (grepl("\\d{4}", x)) {
 50 |     x <- sub("(?<=\\d)((?=\\d{3}$)|(?=\\d{3},))", ",", x, perl = TRUE)
 51 |     x <- sub("^[ ]{1}", "", x)
 52 |   }
 53 |   paste0(x, dec)
 54 | }
 55 | use_commas_int <- function(x) {
 56 |   x <- as.character(x)
 57 |   while (grepl("\\d{4}", x)) {
 58 |     x <- sub("(?<=\\d)((?=\\d{3}$)|(?=\\d{3},))", ",", x, perl = TRUE)
 59 |     x <- sub("^[ ]{1}", "", x)
 60 |   }
 61 |   x
 62 | }
 63 | 
 64 | format_num <- function(x, f = "1.1") {
 65 |   f2 <- nchar(sub("^[^\\.]{0,}\\.", "", f))
 66 |   f1 <- nchar(sub("(?<=\\.).*", "", f, perl = TRUE)) + f2
 67 |   f <- paste0("%", f1, ".", f2, "f")
 68 |   sprintf(f, x)
 69 | }
 70 | format_int <- function(x, f = "1") {
 71 |   f <- paste0("%", nchar(f), ".", 0, "f")
 72 |   sprintf(f, x)
 73 | }
 74 | cdbl <- function(x, f = "1.1") {
 75 |   x <- format_num(x, f)
 76 |   use_commas_dbl(x)
 77 | }
 78 | cint <- function(x, f = "1") {
 79 |   x <- format_int(x, f)
 80 |   use_commas_int(x)
 81 | }
 82 | repc <- function(x, n, collapse = "") paste(rep(x, n), collapse = collapse)
 83 | 
 84 | capture_dots <- function(...) {
 85 |   eval(substitute(alist(...)), envir = parent.frame())
 86 | }
 87 | 
 88 | pretty_dots <- function(...) {
 89 |   dots <- capture_dots(...)
 90 |   if (length(dots) == 0) {
 91 |     return(NULL)
 92 |   }
 93 |   if (is.null(names(dots))) {
 94 |     names(dots) <- expr_names(dots)
 95 |   }
 96 |   nms <- names(dots)
 97 |   if ("" %in% nms) {
 98 |     names(dots)[nms == ""] <- expr_names(dots[nms == ""])
 99 |   }
100 |   dots
101 | }
102 | 
103 | expr_names <- function(args) {
104 |   vapply(args, deparse, USE.NAMES = FALSE, FUN.VALUE = character(1))
105 | }
106 | 
107 | mmap <- function(f, ...) {
108 |   f <- match.fun(f)
109 |   mapply(FUN = f, ..., SIMPLIFY = FALSE, USE.NAMES = FALSE)
110 | }
111 | 
112 | 
113 | `%||%` <- function(x, y) {
114 |   if (is_null(x))
115 |     y
116 |   else x
117 | }
118 | 
119 | is_null <- function(x) length(x) == 0L
120 | 
121 | rd_timestamp <- function() format(Sys.time(), "%b %d %H:%M:%S")
122 | 
123 | is_usertoken <- function(x) inherits(x, "Token")
124 | 
125 | not_token <- function(x) is.list(x) && !is_bearable(x) && !is_usertoken(x)
126 | 
127 | n_row <- function(...) {
128 |   NROW(tryCatch(..., error = function(e) NULL))
129 | }
130 | 
131 | dots1 <- function(...) {
132 |   tryCatch(list(...)[[1]], error = function(e) NULL)
133 | }
134 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # rtweet.download <img src="man/figures/logo.png" width="160px" align="right" />
  5 | 
  6 | <!-- badges: start -->
  7 | 
  8 | [![CRAN
  9 | status](https://www.r-pkg.org/badges/version/rtweet.download)](https://CRAN.R-project.org/package=rtweet.download)
 10 | [![Lifecycle:
 11 | experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental)
 12 | <!-- badges: end -->
 13 | 
 14 | Automating the collection of Twitter data (via
 15 | [**{rtweet}**](https://rtweet.info))
 16 | 
 17 | ## Installation
 18 | 
 19 | You can install the released version of **{rtweet.download}** from
 20 | [CRAN](https://CRAN.R-project.org) with:
 21 | 
 22 | ``` r
 23 | install.packages("rtweet.download")
 24 | ```
 25 | 
 26 | And the development version from [GitHub](https://github.com/) with:
 27 | 
 28 | ``` r
 29 | # install.packages("remotes")
 30 | remotes::install_github("mkearney/rtweet.download")
 31 | ```
 32 | 
 33 | ## Friends IDs
 34 | 
 35 | Twitter’s `"friends/ids"` API endpoint is rate limited to 15 requests
 36 | (or the friend IDs of 15[\*](#notes) accounts) per 15 minutes. So while
 37 | a single call using `rtweet::get_friends()` can retrieve the friend IDs
 38 | of up to 15 users, a single call using
 39 | `rtweet.download::get_friends_download()` can retrieve the friend IDs of
 40 | hundreds or even thousands of users\!
 41 | 
 42 | |                                         |                          |
 43 | | --------------------------------------- | ------------------------ |
 44 | | **API Feature**                         | **Value**                |
 45 | | <span> </span> Endpoint                 | `"friends/ids"`          |
 46 | | <span> </span> Rate limit (per 15 min.) | `15`                     |
 47 | | <span> </span> Friends per request      | `5000`[\*](#notes)       |
 48 | | **R Package**                           | **Function**             |
 49 | | <span> </span> {rtweet}                 | `get_friends()`          |
 50 | | <span> </span> {rtweet.download}        | `get_friends_download()` |
 51 | 
 52 | The example below uses `get_friends_download()` to automate the
 53 | collection of friend (accounts followed by) IDs of users on [@Teradata’s
 54 | list of data science
 55 | influencers](https://twitter.com/Teradata/lists/data-science-influencers/members).
 56 | 
 57 | ``` r
 58 | ## get members on data science influencers influence
 59 | data_sci_influencers <- rtweet::lists_members(
 60 |   owner_user = "Teradata", slug = "data-science-influencers"
 61 | )
 62 | 
 63 | ## download friend IDs for each user
 64 | fds <- get_friends_download(data_sci_influencers$screen_name)
 65 | 
 66 | ## preview data
 67 | head(fds)
 68 | ```
 69 | 
 70 | ## Users data
 71 | 
 72 | Twitter’s `"users/lookup"` API endpoint is rate limited to 900 requests
 73 | (or 90,000 users) per 15 minutes. So while a single call using
 74 | `rtweet::lookup_users()` can retrieve data on up to 90,000 users, a
 75 | single call using `rtweet.download::lookup_users_download()` can collect
 76 | data on hundreds of thousands or even millions of users\!
 77 | 
 78 | |                                         |                           |
 79 | | --------------------------------------- | ------------------------- |
 80 | | **API Feature**                         | **Value**                 |
 81 | | <span> </span> Endpoint                 | `"users/lookup"`          |
 82 | | <span> </span> Rate limit (per 15 min.) | `900`                     |
 83 | | <span> </span> Users per request        | `100`                     |
 84 | | **R Package**                           | **Function**              |
 85 | | <span> </span> {rtweet}                 | `lookup_users()`          |
 86 | | <span> </span> {rtweet.download}        | `lookup_users_download()` |
 87 | 
 88 | The example below uses `lookup_users_download()` to automate data
 89 | collection for the previously collected accounts followed by data
 90 | science influencers.
 91 | 
 92 | ``` r
 93 | ## download users data
 94 | fds_data <- lookup_users_download(fds$user_id)
 95 | 
 96 | ## preview data
 97 | head(fds)
 98 | ```
 99 | 
100 | ## Notes
101 | 
102 | \* The `"friends/ids"` endpoint returns the **up to 5,000 friend IDs of
103 | a single user**, so 15 requests can only return all the friend IDs of 15
104 | users if all 15 of those users follow 5,000 or fewer accounts. To
105 | retrieve all the friend IDs for users following more than 5,000
106 | accounts, multiple requests (friends\_count / 5,000) are required.
107 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, include = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "#>",
 11 |   fig.path = "man/figures/README-",
 12 |   out.width = "100%"
 13 | )
 14 | library(rtweet.download)
 15 | options(width = 90)
 16 | ```
 17 | 
 18 | # rtweet.download <img src="man/figures/logo.png" width="160px" align="right" />
 19 | 
 20 | <!-- badges: start -->
 21 | [![CRAN status](https://www.r-pkg.org/badges/version/rtweet.download)](https://CRAN.R-project.org/package=rtweet.download)
 22 | [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental)
 23 | <!-- badges: end -->
 24 | 
 25 | Automating the collection of Twitter data (via [**{rtweet}**](https://rtweet.info))
 26 | 
 27 | ## Installation
 28 | 
 29 | You can install the released version of **{rtweet.download}** from [CRAN](https://CRAN.R-project.org) with:
 30 | 
 31 | ``` r
 32 | install.packages("rtweet.download")
 33 | ```
 34 | 
 35 | And the development version from [GitHub](https://github.com/) with:
 36 | 
 37 | ``` r
 38 | # install.packages("remotes")
 39 | remotes::install_github("mkearney/rtweet.download")
 40 | ```
 41 | 
 42 | 
 43 | ## Friends IDs
 44 | 
 45 | Twitter's `"friends/ids"` API endpoint is rate limited to 15 requests (or the friend IDs of 15[*](#notes) accounts) per 15 minutes. So while a single call using `rtweet::get_friends()` can retrieve the friend IDs of up to 15 users, a single call using `rtweet.download::get_friends_download()` can retrieve the friend IDs of hundreds or even thousands of users!
 46 | 
 47 | |                                              |                          |
 48 | |----------------------------------------------|--------------------------|
 49 | | **API Feature**                              | **Value**                |
 50 | | <span>&nbsp;</span> Endpoint                 | `"friends/ids"`          |
 51 | | <span>&nbsp;</span> Rate limit (per 15 min.) | `15`                     |
 52 | | <span>&nbsp;</span> Friends per request      | `5000`[*](#notes)        |
 53 | | **R Package**                                | **Function**             |
 54 | | <span>&nbsp;</span> {rtweet}                 | `get_friends()`          |
 55 | | <span>&nbsp;</span> {rtweet.download}        | `get_friends_download()` |
 56 | 
 57 | The example below uses `get_friends_download()` to automate the collection of friend (accounts followed by) IDs of users on [\@Teradata's list of data science influencers](https://twitter.com/Teradata/lists/data-science-influencers/members).
 58 | 
 59 | ``` r
 60 | ## get members on data science influencers influence
 61 | data_sci_influencers <- rtweet::lists_members(
 62 |   owner_user = "Teradata", slug = "data-science-influencers"
 63 | )
 64 | 
 65 | ## download friend IDs for each user
 66 | fds <- get_friends_download(data_sci_influencers$screen_name)
 67 | 
 68 | ## preview data
 69 | head(fds)
 70 | ```
 71 | 
 72 | ## Users data
 73 | 
 74 | Twitter's `"users/lookup"` API endpoint is rate limited to 900 requests (or 90,000 users) per 15 minutes. So while a single call using `rtweet::lookup_users()` can retrieve data on up to 90,000 users, a single call using `rtweet.download::lookup_users_download()` can collect data on hundreds of thousands or even millions of users!
 75 | 
 76 | |                                              |                           |
 77 | |----------------------------------------------|---------------------------|
 78 | | **API Feature**                              | **Value**                 |
 79 | | <span>&nbsp;</span> Endpoint                 | `"users/lookup"`          |
 80 | | <span>&nbsp;</span> Rate limit (per 15 min.) | `900`                     |
 81 | | <span>&nbsp;</span> Users per request        | `100`                     |
 82 | | **R Package**                                | **Function**              |
 83 | | <span>&nbsp;</span> {rtweet}                 | `lookup_users()`          |
 84 | | <span>&nbsp;</span> {rtweet.download}        | `lookup_users_download()` |
 85 | 
 86 | The example below uses `lookup_users_download()` to automate data collection for the previously collected accounts followed by data science influencers. 
 87 | 
 88 | ``` r
 89 | ## download users data
 90 | fds_data <- lookup_users_download(fds$user_id)
 91 | 
 92 | ## preview data
 93 | head(fds)
 94 | ```
 95 | 
 96 | 
 97 | ## Notes
 98 | 
 99 | \* The `"friends/ids"` endpoint returns the **up to 5,000 friend IDs of a single user**, so 15 requests can only return all the friend IDs of 15 users if all 15 of those users follow 5,000 or fewer accounts. To retrieve all the friend IDs for users following more than 5,000 accounts, multiple requests (friends_count / 5,000) are required.
100 | 


--------------------------------------------------------------------------------
/R/lookup_users.R:
--------------------------------------------------------------------------------
  1 | #' Lookup users download
  2 | #'
  3 | #' Automate users data collection for a large number of users (via
  4 | #' \code{\link[rtweet]{lookup_users}})
  5 | #'
  6 | #' @param x Either a data frame or character vector containing user identifiers
  7 | #'   for which users will be retrieved. See details for more information
  8 | #'   about how this works.
  9 | #' @param output Optionally supply a preexisting output vector (like that returned
 10 | #'   by this function)–if NULL, the default, this function will start fresh.
 11 | #' @param verbose Whether the function should print information/status updates,
 12 | #'   defaults to TRUE. Setting this to FALSE will silent most printing.
 13 | #' @return Returns a list data frames with looked up user information. See
 14 | #'   \code{\link[rtweet]{lookup_users}} for more information.
 15 | #' @family users
 16 | #' @details This function attempts to lookup users information for as many as
 17 | #'   90,000 users every 15 minutes, sleeping between calls until Twitter's API
 18 | #'   rate limit resets.
 19 | #'
 20 | #'   It's worth noting that information on many users will not be returned due
 21 | #'   to changed screen names, account suspensions, deactivations, etc.
 22 | #' @export
 23 | lookup_users_download <- function(x, output = NULL, verbose = TRUE) {
 24 |   UseMethod("lookup_users_download")
 25 | }
 26 | 
 27 | #' @export
 28 | lookup_users_download.data.frame <- function(x, output = NULL, verbose = TRUE) {
 29 |   if (n_row(x) == 0) {
 30 |     stop("No users found in this data frame")
 31 |   }
 32 |   x <- x[, dapr::vap_lgl(x, is.atomic), drop = FALSE]
 33 |   if (NCOL(x) == 0) {
 34 |     stop("No atomic columns found in this data frame")
 35 |   }
 36 |   if (ncol(x) == 1L && inherits(x[[1]], c("character", "factor"))) {
 37 |     x <- as.character(x[[1]])
 38 |     return(lookup_users_download(x, output = output, verbose = verbose))
 39 |   }
 40 |   if (grepl("user_id$|screen_name$", names(x)[1])) {
 41 |     x <- as.character(x[[1]])
 42 |     return(lookup_users_download(x, output = output, verbose = verbose))
 43 |   }
 44 |   if ("user_id" %in% names(x)) {
 45 |     x <- as.character(x[["user_id"]])
 46 |     return(lookup_users_download(x, output = output, verbose = verbose))
 47 |   }
 48 |   if ("screen_name" %in% names(x)) {
 49 |     x <- as.character(x[["screen_name"]])
 50 |     return(lookup_users_download(x, output = output, verbose = verbose))
 51 |   }
 52 |   if (any(grepl("user_id$|screen_name$", names(x)))) {
 53 |     x <- as.character(x[[grep("user_id$|screen_name$", names(x))[1]]])
 54 |     return(lookup_users_download(x, output = output, verbose = verbose))
 55 |   }
 56 |   stop("data frame must contain atomic 'user_id' or 'screen_name' column")
 57 | }
 58 | 
 59 | #' @export
 60 | lookup_users_download.character <- function(x, output = NULL, verbose = TRUE) {
 61 |   x <- x[!is.na(x) & !duplicated(x)]
 62 | 
 63 |   ## prepare and create token(s) object
 64 |   token <- rtweet::get_token()
 65 | 
 66 |   ## if output vector is not supplied
 67 |   if (is_null(output)) {
 68 |     output <- vector("list", ceiling(length(x) / 90000) + 1L)
 69 |   } else {
 70 |     stopifnot(
 71 |       is.list(output),
 72 |       length(output) >= ceiling(length(x) / 90000)
 73 |     )
 74 |   }
 75 |   tusrs <- length(x)
 76 |   if (verbose) {
 77 |     dotdotdot("This should take around ", cdbl(tusrs / 90000 * 15), " mins")
 78 |   }
 79 |   n <- usrs_rate_limit_sleep(token)
 80 |   ctr <- 0L
 81 | 
 82 |   tryCatch({
 83 |     ## for loop
 84 |     for (i in seq_along(output)) {
 85 |       ## skip if data already exists
 86 |       if (n_row(output[[i]]) > 0) {
 87 |         ctr <- ctr + n_row(output[[i]])
 88 |         x <- x[-seq_len(n_row(output[[i]]))]
 89 |         next
 90 |       }
 91 |       ## check rate limit remaining / change out token if possible
 92 |       while (n == 0) {
 93 |         n <- usrs_rate_limit_sleep(token)
 94 |       }
 95 | 
 96 |       if (n > length(x)) {
 97 |         n <- length(x)
 98 |       }
 99 | 
100 |       ## lookup users data
101 |       output[[i]] <- lookup_users_warning_nap(x[seq_len(n)], token = token)
102 |       x <- x[-seq_len(n)]
103 |       ctr <- ctr + n
104 |       n <- 0L
105 | 
106 |       if (verbose) {
107 |         complete(pgray(rd_timestamp()),
108 |           "", pgold(cint(n_row(output[[i]]), "00,000")),
109 |           pgray(" users looked up "),
110 |           pgray(cli::symbol$ellipsis), pgray(" ("),
111 |           pgray(cdbl(i / tusrs * 100, "1.1")), pgray("%)"))
112 |       }
113 |       if (length(x) == 0) {
114 |         break
115 |       }
116 |     }
117 |     output
118 |   },
119 |     interrupt = function(i) return(output),
120 |     error = function(e) return(output)
121 |   )
122 | }
123 | 
124 | 
125 | usrs_rate_limit_sleep <- function(token) {
126 |   rl <- rate_limit2(query = "lookup_users", token = token)
127 |   rlm <- rl[["remaining"]] %||% 0L
128 |   if (rlm > 0) {
129 |     return(rlm * 100L)
130 |   }
131 |   s <- as.numeric(rl[["reset"]] %||% 900, "secs")
132 |   nap_wait(s + 60)
133 |   90000L
134 | }
135 | 


--------------------------------------------------------------------------------
/R/lookup_users_og.R:
--------------------------------------------------------------------------------
  1 | #' #' Lookup users download
  2 | #' #'
  3 | #' #' Automate users data collection for a large number of users (via
  4 | #' #' \code{\link[rtweet]{lookup_users}})
  5 | #' #'
  6 | #' #' @param x A vector of user IDs or screen names for which data will be looked up.
  7 | #' #'   See details for more information about how this works.
  8 | #' #' @param .usr Optionally supply a preexisting output vector (like that returned
  9 | #' #'   by this function)–if NULL, the default, this function will start fresh.
 10 | #' #' @return Returns a list data frames of user data
 11 | #' #' @family .usr
 12 | #' #' @details This function attempts to lookup data for 90,000 users every 15
 13 | #' #'   minutes, sleeping between calls until Twitter's API rate limit resets.
 14 | #' #' @export
 15 | #' lookup_users_download <- function(x, output = NULL, verbose = TRUE) {
 16 | #'   x <- unique(x[!is.na(x)])
 17 | #'
 18 | #'   ## if .usr is not supplied
 19 | #'   if (is_null(.usr)) {
 20 | #'     .usr <- vector("list", ceiling(length(x) / 90000) + 1L)
 21 | #'   } else {
 22 | #'     ## if .usr is supplied, ignore any users w/ data already collected
 23 | #'     if (all(grepl("^\\d+$", x))) {
 24 | #'       dr <- x %in% unlist(lapply(.usr, "[[", "user_id"))
 25 | #'     } else {
 26 | #'       dr <- x %in% unlist(lapply(.usr, "[[", "screen_name"))
 27 | #'     }
 28 | #'     if (any(dr)) {
 29 | #'       x <- x[!dr]
 30 | #'       complete("Omit ", cint(sum(dr)), " users already collected")
 31 | #'     }
 32 | #'   }
 33 | #'   tusrs <- length(x)
 34 | #'   info("This should take around ", cdbl(tusrs / 90000 * 15), " mins")
 35 | #'
 36 | #'   ## for loop
 37 | #'   for (i in seq_along(.usr)) {
 38 | #'     ## skip if data already exists
 39 | #'     if (NROW(.usr[[i]]) > 0) {
 40 | #'       next
 41 | #'     }
 42 | #'     ## determine number of users to lookup
 43 | #'     if (90000 > length(x)) {
 44 | #'       n <- length(x)
 45 | #'     } else {
 46 | #'       n <- 90000
 47 | #'     }
 48 | #'     rl <- rate_limit2("lookup_users")
 49 | #'     if (is_null(rl)) {
 50 | #'       s <- 60 * 15
 51 | #'       r <- 900 * 100
 52 | #'     } else if (rl$remaining == 0) {
 53 | #'       s <- as.numeric(rl$reset, "secs")
 54 | #'       r <- rl$limit * 100
 55 | #'     } else {
 56 | #'       s <- 0
 57 | #'       r <- rl$remaining * 100
 58 | #'     }
 59 | #'     if (s > 0) {
 60 | #'       nap_wait(s)
 61 | #'     }
 62 | #'     if (n > r) {
 63 | #'       n <- r
 64 | #'     }
 65 | #'     ## lookup users
 66 | #'     .usr[[i]] <- lookup_users2(x[seq_len(n)])
 67 | #'     ## drop the ones already looked up
 68 | #'     x <- x[-seq_len(n)]
 69 | #'
 70 | #'     nrows_collected <- sum(dapr::vap_int(.usr, NROW))
 71 | #'     spf <- repc("1", nchar(use_commas_int(tusrs)))
 72 | #'     complete("Collected data for ", cint(nrows_collected, spf),
 73 | #'       " users (", cdbl((tusrs - length(x)) / tusrs * 100, "11.1"), "%)")
 74 | #'     if (length(x) == 0) {
 75 | #'       break
 76 | #'     }
 77 | #'
 78 | #'   }
 79 | #'   if (length(.usr) > 1L && is_null(.usr[[length(.usr)]])) {
 80 | #'     .usr <- .usr[-length(.usr)]
 81 | #'   }
 82 | #'
 83 | #'   .usr
 84 | #' }
 85 | #'
 86 | #'
 87 | #'
 88 | #' usr_rate_limit_sleep <- function() {
 89 | #'   tryCatch({
 90 | #'     ## .tkn = current token
 91 | #'     if (!exists.rr(".tkn")) {
 92 | #'       .tkn <- rtweet::get_token()
 93 | #'       assign.rr(.tkn = .tkn)
 94 | #'     } else {
 95 | #'       .tkn <- get.rr(".tkn")
 96 | #'     }
 97 | #'     rl <- rate_limit2("lookup_users", token = .tkn)
 98 | #'     rlm <- (rl[["remaining"]] %||% 0L)
 99 | #'     if (rlm > 0) {
100 | #'       return(rlm)
101 | #'     }
102 | #'     if (rlm == 0 && !inherits(.tkn, "bearer") && is_bearable(.tkn)) {
103 | #'       .btkn <- rtweet::bearer_token(.tkn)
104 | #'       rlb <- rate_limit2("lookup_users", token = .btkn)
105 | #'       rlbm <- (rlb[["remaining"]] %||% 0L)
106 | #'       if (rlbm > 0) {
107 | #'         assign.rr(.tkn = .btkn)
108 | #'         return(rlbm)
109 | #'       }
110 | #'       rlra <- as.numeric(rl[["reset"]] %||% 900, "secs")
111 | #'       rlbra <- as.numeric(rlb[["reset"]] %||% 900, "secs")
112 | #'       if (rlra <= rlbra) {
113 | #'         s <- rlra
114 | #'         assign.rr(.tkn = .tkn)
115 | #'       } else {
116 | #'         s <- rlbra
117 | #'         assign.rr(.tkn = .btkn)
118 | #'       }
119 | #'       if (s < 0) {
120 | #'         s <- 900
121 | #'       }
122 | #'       nap_wait(s + 1)
123 | #'       return(15L)
124 | #'     }
125 | #'
126 | #'     if (rlm == 0 && inherits(.tkn, "bearer")) {
127 | #'       .btkn <- rtweet::get_token()
128 | #'       rlb <- rate_limit2("lookup_users", token = .btkn)
129 | #'       rlbm <- (rlb[["remaining"]] %||% 0L)
130 | #'       if (rlbm > 0) {
131 | #'         assign.rr(.tkn = .btkn)
132 | #'         return(rlbm)
133 | #'       }
134 | #'       rlra <- as.numeric(rl[["reset"]] %||% 900, "secs")
135 | #'       rlbra <- as.numeric(rlb[["reset"]] %||% 900, "secs")
136 | #'       if (rlra <= rlbra) {
137 | #'         s <- rlra
138 | #'         assign.rr(.tkn = .tkn)
139 | #'       } else {
140 | #'         s <- rlbra
141 | #'         assign.rr(.tkn = .btkn)
142 | #'       }
143 | #'       if (s < 0) {
144 | #'         s <- 900
145 | #'       }
146 | #'       nap_wait(s + 1)
147 | #'       return(15L)
148 | #'     }
149 | #'     s <- as.numeric(rl[["reset"]] %||% 900, "secs")
150 | #'     nap_wait(s + 1L)
151 | #'     15L
152 | #'   },
153 | #'     interrupt = function(i) 1L,
154 | #'     error = function(e) 1L)
155 | #' }
156 | 


--------------------------------------------------------------------------------
/R/followers.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' Get followers download
  4 | #'
  5 | #' Automate the collection of follower IDs for a large number of users (via
  6 | #' \code{\link[rtweet]{get_followers}})
  7 | #'
  8 | #' @param x Either a data frame or character vector containing user identifiers
  9 | #'   for which follower IDs will be retrieved. See details for more information
 10 | #'   about how this works.
 11 | #' @param ... If \code{x} is a data frame this can be used to select columns
 12 | #'   containing the appropriate user identifying information (user_id and/or
 13 | #'   screen_name). This uses the tidyselect specification. If \code{x} is a
 14 | #'   character vector, then the first unnamed or non-argument named value is
 15 | #'   assumed to be labels (screen names) corresponding with \code{x}.
 16 | #' @param output Optionally supply a preexisting output vector (like that returned
 17 | #'   by this function)–if NULL, the default, this function will start fresh.
 18 | #' @param verbose Whether the function should print information/status updates,
 19 | #'   defaults to TRUE. Setting this to FALSE will silent most printing.
 20 | #' @return Returns a list data frames with user and follower ID information. See
 21 | #'   \code{\link[rtweet]{get_followers}} for more information.
 22 | #' @family followers
 23 | #' @details This function attempts to retrieve up to 75,000 follower IDs every
 24 | #'   15 minutes, sleeping between calls until Twitter's API rate
 25 | #'   limit resets. If your API token is linked to your own Twitter APP and has
 26 | #'   appropriate permissions to create a 'bearer token', then this function will
 27 | #'   collect closer to 150,000 followers per 15 mintues.
 28 | #' @export
 29 | get_followers_download <- function(x, ..., output = NULL, verbose = TRUE) {
 30 |   UseMethod("get_followers_download")
 31 | }
 32 | 
 33 | #' @export
 34 | get_followers_download.data.frame <- function(x, ..., output = NULL, verbose = TRUE) {
 35 |   vars <- tidyselect::vars_select(names(x), ...)
 36 |   if (length(vars) == 0) {
 37 |     vars <- names(x)
 38 |   }
 39 |   x <- x[, vars, drop = FALSE]
 40 |   if (ncol(x) > 3L && any(c("user_id", "screen_name") %in% names(x))) {
 41 |     x <- x[, names(x) %in% c("user_id", "screen_name"), drop = FALSE]
 42 |   }
 43 |   stopifnot(
 44 |     nrow(x) > 0L,
 45 |     ncol(x) < 3L
 46 |   )
 47 |   if (ncol(x) == 1L) {
 48 |     sns <- x[[1]]
 49 |     x <- x[[1]]
 50 |   } else if (all(grepl("^\\d+$", x[[1]]))) {
 51 |     sns <- x[[2]]
 52 |     x <- x[[1]]
 53 |   } else {
 54 |     sns <- x[[1]]
 55 |     x <- x[[2]]
 56 |   }
 57 |   get_followers_download(x, sns, output = output, verbose = verbose)
 58 | }
 59 | 
 60 | #' @export
 61 | get_followers_download.character <- function(x, ..., output = NULL, verbose = TRUE) {
 62 |   ## prepare screen names and user IDs
 63 |   sns <- dots1(x) %||% x
 64 |   sns <- sns[!is.na(x) & !duplicated(x)]
 65 |   x <- x[!is.na(x) & !duplicated(x)]
 66 |   if (all(!grepl("^\\d+$", sns))) {
 67 |     sns <- paste0("@", sns)
 68 |   }
 69 |   mchars <- max(nchar(sns))
 70 |   sns <- paste0(dapr::vap_chr(mchars - nchar(sns), ~
 71 |       paste0(rep(" ", .x), collapse = "")), sns)
 72 | 
 73 |   ## prepare and create token(s) object
 74 |   token <- prep_tokens(rtweet::get_token())
 75 | 
 76 |   ## if output vector is not supplied
 77 |   if (is_null(output)) {
 78 |     output <- vector("list", length(x))
 79 |   } else {
 80 |     stopifnot(
 81 |       is.list(output),
 82 |       length(output) == length(x)
 83 |     )
 84 |   }
 85 |   tusrs <- length(x)
 86 |   if (has_bearer(token)) {
 87 |     rlc <- 30
 88 |   } else {
 89 |     rlc <- 15
 90 |   }
 91 |   if (verbose) {
 92 |     dotdotdot("This should take around ", cdbl(tusrs / rlc * 15), " mins")
 93 |   }
 94 |   token <- determine_token(token, "get_followers")
 95 |   n <- flw_rate_limit_sleep(token)
 96 | 
 97 |   tryCatch({
 98 |     ## for loop
 99 |     for (i in seq_along(output)) {
100 |       ## skip if data already exists
101 |       if (n_row(output[[i]]) > 0) {
102 |         next
103 |       }
104 |       ## check rate limit remaining / change out token if possible
105 |       while (n == 0) {
106 |         token <- determine_token(token, "get_followers")
107 |         n <- flw_rate_limit_sleep(token)
108 |       }
109 | 
110 |       ## get followers list – and extract next cursor (page) value
111 |       output[[i]] <- get_followers_warning_nap(x[i], token = this_token(token))
112 |       n <- n - 1L
113 |       np <- next_cursor_download(output[[i]])
114 | 
115 |       ## if user follows more than 5,000 accounts, make additional calls using np
116 |       while (length(np) > 0 && !np %in% c(0, -1)) {
117 |         while (n == 0) {
118 |           token <- determine_token(token, "get_followers")
119 |           n <- flw_rate_limit_sleep(token)
120 |         }
121 |         flwi <- get_followers_warning_nap(x[i], page = np, token = this_token(token))
122 |         n <- n - 1L
123 |         np <- next_cursor_download(flwi)
124 |         if (n_row(flwi) > 0) {
125 |           output[[i]] <- rbind(output[[i]], flwi)
126 |         }
127 |       }
128 |       if (verbose) {
129 |         complete(pgray(rd_timestamp()),
130 |           "", pgold(cint(n_row(output[[i]]), "10,000,000")),
131 |           pgray(" friend IDs for "), pblue(sns[i]), " ",
132 |           pgray(cli::symbol$ellipsis), pgray(" ("),
133 |           pgray(cdbl(i / tusrs * 100, "1.1")), pgray("%)"))
134 |       }
135 |     }
136 |     output
137 |   },
138 |     interrupt = function(i) return(output),
139 |     error = function(e) return(output)
140 |   )
141 | }
142 | 
143 | 
144 | next_cursor_download <- function(x) {
145 |   tryCatch(
146 |     rtweet::next_cursor(x),
147 |     error = function(e) NULL
148 |   )
149 | }
150 | 
151 | flw_rate_limit_sleep <- function(token) {
152 |   rl <- rate_limit2(query = "get_followers", token = this_token(token))
153 |   rlm <- rl[["remaining"]] %||% 0L
154 |   if (rlm > 0) {
155 |     return(rlm)
156 |   }
157 |   s <- as.numeric(rl[["reset"]] %||% 900, "secs")
158 |   nap_wait(s + 60)
159 |   15L
160 | }
161 | 


--------------------------------------------------------------------------------
/R/friends.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' Get friends download
  4 | #'
  5 | #' Automate the collection of friend IDs for a large number of users (via
  6 | #' \code{\link[rtweet]{get_friends}})
  7 | #'
  8 | #' @param x Either a data frame or character vector containing user identifiers
  9 | #'   for which friends IDs will be retrieved. See details for more information
 10 | #'   about how this works.
 11 | #' @param ... If \code{x} is a data frame this can be used to select columns
 12 | #'   containing the appropriate user identifying information (user_id and/or
 13 | #'   screen_name). This uses the tidyselect specification. If \code{x} is a
 14 | #'   character vector, then the first unnamed or non-argument named value is
 15 | #'   assumed to be labels (screen names) corresponding with \code{x}.
 16 | #' @param output Optionally supply a preexisting output vector (like that returned
 17 | #'   by this function)–if NULL, the default, this function will start fresh.
 18 | #' @param verbose Whether the function should print information/status updates,
 19 | #'   defaults to TRUE. Setting this to FALSE will silent most printing.
 20 | #' @return Returns a list data frames with user and friend ID information. See
 21 | #'   \code{\link[rtweet]{get_friends}} for more information.
 22 | #' @family friends
 23 | #' @details This function attempts to retrieve friends IDs for as many as 15 to
 24 | #'   30 users every 15 minutes, sleeping between calls until Twitter's API rate
 25 | #'   limit resets. If your API token is linked to your own Twitter APP and has
 26 | #'   appropriate permissions to create a 'bearer token', then this function will
 27 | #'   make 30 calls every 15 minutes. If the token cannot be used via bearer
 28 | #'   authorization, then 15 calls are made every 15 minutes.
 29 | #'
 30 | #'   It's worth noting that 15 calls does not mean 15 complete friend IDs.
 31 | #'   Twitter limits the number of returned friends returned by a single call to
 32 | #'   5,000, which is more friends than 95% of Twitter users, but at times is
 33 | #'   frustrating because some users like Senator Amy Klobuchar follow over
 34 | #'   170,000 accounts–that many accounts can take 45 mintues to collect! It's
 35 | #'   interesting, though, that Amy Klobuchar is also unusually prolific in both
 36 | #'   following Twitter users and writing/sponsoring legislation.
 37 | #' @export
 38 | get_friends_download <- function(x, ..., output = NULL, verbose = TRUE) {
 39 |   UseMethod("get_friends_download")
 40 | }
 41 | 
 42 | #' @export
 43 | get_friends_download.data.frame <- function(x, ..., output = NULL, verbose = TRUE) {
 44 |   vars <- tidyselect::vars_select(names(x), ...)
 45 |   if (length(vars) == 0) {
 46 |     vars <- names(x)
 47 |   }
 48 |   x <- x[, vars, drop = FALSE]
 49 |   if (ncol(x) > 3L && any(c("user_id", "screen_name") %in% names(x))) {
 50 |     x <- x[, names(x) %in% c("user_id", "screen_name"), drop = FALSE]
 51 |   }
 52 |   stopifnot(
 53 |     nrow(x) > 0L,
 54 |     ncol(x) < 3L
 55 |   )
 56 |   if (ncol(x) == 1L) {
 57 |     sns <- x[[1]]
 58 |     x <- x[[1]]
 59 |   } else if (all(grepl("^\\d+$", x[[1]]))) {
 60 |     sns <- x[[2]]
 61 |     x <- x[[1]]
 62 |   } else {
 63 |     sns <- x[[1]]
 64 |     x <- x[[2]]
 65 |   }
 66 |   get_friends_download(x, sns, output = output, verbose = verbose)
 67 | }
 68 | 
 69 | #' @export
 70 | get_friends_download.character <- function(x, ..., output = NULL, verbose = TRUE) {
 71 |   ## prepare screen names and user IDs
 72 |   sns <- dots1(x) %||% x
 73 |   sns <- sns[!is.na(x) & !duplicated(x)]
 74 |   x <- x[!is.na(x) & !duplicated(x)]
 75 |   if (all(!grepl("^\\d+$", sns))) {
 76 |     sns <- paste0("@", sns)
 77 |   }
 78 |   mchars <- max(nchar(sns))
 79 |   sns <- paste0(dapr::vap_chr(mchars - nchar(sns), ~
 80 |       paste0(rep(" ", .x), collapse = "")), sns)
 81 | 
 82 |   ## prepare and create token(s) object
 83 |   token <- prep_tokens(rtweet::get_token())
 84 | 
 85 |   ## if output vector is not supplied
 86 |   if (is_null(output)) {
 87 |     output <- vector("list", length(x))
 88 |   } else {
 89 |     stopifnot(
 90 |       is.list(output),
 91 |       length(output) == length(x)
 92 |     )
 93 |   }
 94 |   tusrs <- length(x)
 95 |   if (has_bearer(token)) {
 96 |     rlc <- 30
 97 |   } else {
 98 |     rlc <- 15
 99 |   }
100 |   if (verbose) {
101 |     dotdotdot("This should take around ", cdbl(tusrs / rlc * 15), " mins")
102 |   }
103 |   token <- determine_token(token, "get_friends")
104 |   n <- fds_rate_limit_sleep(token)
105 | 
106 |   tryCatch({
107 |     ## for loop
108 |     for (i in seq_along(output)) {
109 |       ## skip if data already exists
110 |       if (n_row(output[[i]]) > 0) {
111 |         next
112 |       }
113 |       ## check rate limit remaining / change out token if possible
114 |       while (n == 0) {
115 |         token <- determine_token(token, "get_friends")
116 |         n <- fds_rate_limit_sleep(token)
117 |       }
118 | 
119 |       ## get friends list – and extract next cursor (page) value
120 |       output[[i]] <- get_friends_warning_nap(x[i], token = this_token(token))
121 |       n <- n - 1L
122 |       np <- next_cursor_download(output[[i]])
123 | 
124 |       ## if user follows more than 5,000 accounts, make additional calls using np
125 |       while (length(np) > 0 && !np %in% c(0, -1)) {
126 |         while (n == 0) {
127 |           token <- determine_token(token, "get_friends")
128 |           n <- fds_rate_limit_sleep(token)
129 |         }
130 |         fdsi <- get_friends_warning_nap(x[i], page = np, token = this_token(token))
131 |         n <- n - 1L
132 |         np <- next_cursor_download(fdsi)
133 |         if (n_row(fdsi) > 0) {
134 |           output[[i]] <- rbind(output[[i]], fdsi)
135 |         }
136 |       }
137 |       if (verbose) {
138 |         complete(pgray(rd_timestamp()),
139 |           "", pgold(cint(n_row(output[[i]]), "100,000")),
140 |           pgray(" friend IDs for "), pblue(sns[i]), " ",
141 |           pgray(cli::symbol$ellipsis), pgray(" ("),
142 |           pgray(cdbl(i / tusrs * 100, "1.1")), pgray("%)"))
143 |       }
144 |     }
145 |     output
146 |   },
147 |     interrupt = function(i) return(output),
148 |     error = function(e) return(output)
149 |   )
150 | }
151 | 
152 | 
153 | next_cursor_download <- function(x) {
154 |   tryCatch(
155 |     rtweet::next_cursor(x),
156 |     error = function(e) NULL
157 |   )
158 | }
159 | 
160 | fds_rate_limit_sleep <- function(token) {
161 |   rl <- rate_limit2(query = "get_friends", token = this_token(token))
162 |   rlm <- rl[["remaining"]] %||% 0L
163 |   if (rlm > 0) {
164 |     return(rlm)
165 |   }
166 |   s <- as.numeric(rl[["reset"]] %||% 900, "secs")
167 |   nap_wait(s + 60)
168 |   15L
169 | }
170 | 


--------------------------------------------------------------------------------