├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ ├── pkgdown.yaml │ └── still-working.yaml ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── api_hidden.r ├── api_research.r ├── api_research_extended.r ├── auth_check.r ├── auth_hidden.r ├── auth_research.r ├── last_.r ├── parse_hidden.r ├── parse_research.r ├── query_research.r ├── shorthands.r └── utils.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── inst ├── CITATION └── WORDLIST ├── man ├── auth_check.Rd ├── auth_hidden.Rd ├── auth_research.Rd ├── figures │ ├── api-both.svg │ ├── api-research.svg │ ├── api-unofficial.svg │ └── logo.png ├── last_query.Rd ├── print.traktok_query.Rd ├── print.tt_results.Rd ├── query.Rd ├── tt_comments_api.Rd ├── tt_get_follower.Rd ├── tt_get_following_hidden.Rd ├── tt_json.Rd ├── tt_playlist_api.Rd ├── tt_request_hidden.Rd ├── tt_search.Rd ├── tt_search_api.Rd ├── tt_search_hidden.Rd ├── tt_user_follower_api.Rd ├── tt_user_info_api.Rd ├── tt_user_info_hidden.Rd ├── tt_user_liked_videos_api.Rd ├── tt_user_pinned_videos_api.Rd ├── tt_user_reposted_api.Rd ├── tt_user_videos.Rd ├── tt_user_videos_api.Rd ├── tt_user_videos_hidden.Rd └── tt_videos_hidden.Rd ├── tests ├── spelling.R ├── testthat.R └── testthat │ ├── example_query.json │ ├── example_request.json │ ├── example_resp_comments.json │ ├── example_resp_q_user.json │ ├── example_resp_q_videos.json │ ├── test-research_api.R │ ├── test-research_auth.R │ ├── test-tt_utils.R │ └── test-tt_videos.R ├── traktok.Rproj └── vignettes ├── .gitignore ├── cookies.png ├── function_overview.csv ├── make_vignettes.r ├── research-api.Rmd ├── research-api.Rmd.orig ├── secuid.png ├── source-code.png ├── unofficial-api.Rmd └── unofficial-api.Rmd.orig /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | tiktok.com_cookies.txt 4 | ^README\.Rmd$ 5 | ^codecov\.yml$ 6 | ^\.github$ 7 | ^doc$ 8 | ^Meta$ 9 | ^_pkgdown\.yml$ 10 | ^docs$ 11 | ^pkgdown$ 12 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macOS-latest, r: 'release'} 22 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 23 | - {os: ubuntu-latest, r: 'release'} 24 | 25 | env: 26 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 27 | R_KEEP_PKG_SOURCE: yes 28 | TT_COOKIES: ${{ secrets.TT_COOKIES }} 29 | 30 | steps: 31 | - uses: actions/checkout@v2 32 | 33 | - uses: r-lib/actions/setup-pandoc@v2 34 | 35 | - uses: r-lib/actions/setup-r@v2 36 | with: 37 | r-version: ${{ matrix.config.r }} 38 | http-user-agent: ${{ matrix.config.http-user-agent }} 39 | use-public-rspm: true 40 | 41 | - uses: r-lib/actions/setup-r-dependencies@v2 42 | with: 43 | extra-packages: any::rcmdcheck 44 | needs: check 45 | 46 | - uses: r-lib/actions/check-r-package@v2 47 | with: 48 | upload-snapshots: true 49 | 50 | - name: Test coverage 51 | run: covr::codecov() 52 | shell: Rscript {0} 53 | 54 | 55 | - name: Upload check results 56 | if: failure() 57 | uses: actions/upload-artifact@main 58 | with: 59 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 60 | path: check 61 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | jobs: 15 | pkgdown: 16 | runs-on: ubuntu-latest 17 | # Only restrict concurrency for non-PR jobs 18 | concurrency: 19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | permissions: 23 | contents: write 24 | steps: 25 | - uses: actions/checkout@v4 26 | 27 | - uses: r-lib/actions/setup-pandoc@v2 28 | 29 | - uses: r-lib/actions/setup-r@v2 30 | with: 31 | use-public-rspm: true 32 | 33 | - uses: r-lib/actions/setup-r-dependencies@v2 34 | with: 35 | extra-packages: any::pkgdown, local::. 36 | needs: website 37 | 38 | - name: Build site 39 | run: | 40 | options(rmarkdown.html_vignette.check_title = FALSE) 41 | pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 42 | shell: Rscript {0} 43 | 44 | - name: Deploy to GitHub pages 🚀 45 | if: github.event_name != 'pull_request' 46 | uses: JamesIves/github-pages-deploy-action@v4.5.0 47 | with: 48 | clean: false 49 | branch: gh-pages 50 | folder: docs 51 | -------------------------------------------------------------------------------- /.github/workflows/still-working.yaml: -------------------------------------------------------------------------------- 1 | # reduced workflow to test if the package still works or if TikTok has 2 | # changed anything 3 | on: 4 | schedule: 5 | - cron: '30 10 * * *' 6 | 7 | name: Still-Working? 8 | 9 | jobs: 10 | R-CMD-check: 11 | runs-on: ${{ matrix.config.os }} 12 | 13 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 14 | 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | config: 19 | - {os: ubuntu-latest, r: 'release'} 20 | 21 | env: 22 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 23 | R_KEEP_PKG_SOURCE: yes 24 | TT_COOKIES: ${{ secrets.TT_COOKIES }} 25 | 26 | steps: 27 | - uses: actions/checkout@v2 28 | 29 | - uses: r-lib/actions/setup-r@v2 30 | with: 31 | r-version: ${{ matrix.config.r }} 32 | http-user-agent: ${{ matrix.config.http-user-agent }} 33 | use-public-rspm: true 34 | 35 | - uses: r-lib/actions/setup-r-dependencies@v2 36 | with: 37 | extra-packages: any::rcmdcheck 38 | needs: check 39 | 40 | - uses: r-lib/actions/check-r-package@v2 41 | with: 42 | upload-snapshots: true 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | tiktok.com_cookies.txt 6 | .Rproj.user 7 | .Renviron 8 | inst/doc 9 | /doc/ 10 | /Meta/ 11 | www.tiktok.com_cookies.txt 12 | cookies.txt 13 | test.r 14 | docs 15 | pkgdown 16 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: traktok 2 | Type: Package 3 | Title: Collecting TikTok Data 4 | Version: 0.0.7.9000 5 | Authors@R: 6 | person(given = "Johannes B.", 7 | family = "Gruber", 8 | email = "JohannesB.Gruber@gmail.com", 9 | role = c("aut", "cre"), 10 | comment = c(ORCID = "0000-0001-9177-1772")) 11 | Description: Getting TikTok data () through the official 12 | and unofficial APIs—in other words, you can track TikTok. Originally a port 13 | of Deen Freelon's Pyktok () Python module. 14 | License: GPL-3 15 | Depends: 16 | R (>= 4.1.0) 17 | Imports: 18 | askpass, 19 | cli, 20 | cookiemonster, 21 | curl, 22 | dplyr, 23 | glue, 24 | httr2, 25 | jsonlite, 26 | lobstr, 27 | methods, 28 | openssl, 29 | purrr, 30 | rlang, 31 | rvest, 32 | stats, 33 | tibble 34 | Suggests: 35 | covr, 36 | knitr, 37 | rmarkdown, 38 | spelling, 39 | testthat (>= 3.0.0) 40 | URL: https://github.com/JBGruber/traktok, https://jbgruber.github.io/traktok/ 41 | BugReports: https://github.com/JBGruber/traktok/issues 42 | Encoding: UTF-8 43 | RoxygenNote: 7.3.2 44 | Language: en-GB 45 | Config/testthat/edition: 3 46 | Config/testthat/parallel: false 47 | VignetteBuilder: knitr 48 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,traktok_query) 4 | S3method(print,tt_results) 5 | export(auth_check) 6 | export(auth_hidden) 7 | export(auth_research) 8 | export(last_comments) 9 | export(last_query) 10 | export(query) 11 | export(query_and) 12 | export(query_not) 13 | export(query_or) 14 | export(tt_comments) 15 | export(tt_comments_api) 16 | export(tt_get_follower) 17 | export(tt_get_follower_hidden) 18 | export(tt_get_following) 19 | export(tt_get_following_hidden) 20 | export(tt_get_liked) 21 | export(tt_get_pinned) 22 | export(tt_get_reposted) 23 | export(tt_json) 24 | export(tt_playlist) 25 | export(tt_playlist_api) 26 | export(tt_query_videos) 27 | export(tt_request_hidden) 28 | export(tt_search) 29 | export(tt_search_api) 30 | export(tt_search_hidden) 31 | export(tt_user_follower_api) 32 | export(tt_user_following_api) 33 | export(tt_user_info) 34 | export(tt_user_info_api) 35 | export(tt_user_info_hidden) 36 | export(tt_user_liked_videos_api) 37 | export(tt_user_pinned_videos_api) 38 | export(tt_user_reposted_api) 39 | export(tt_user_videos) 40 | export(tt_user_videos_api) 41 | export(tt_user_videos_hidden) 42 | export(tt_videos) 43 | export(tt_videos_hidden) 44 | importFrom(rlang,.data) 45 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # traktok 0.0.7.9000 2 | 3 | * overhauls tt_user_info_hidden (some breaking changes as names in the output have changed) 4 | 5 | # traktok 0.0.6.9000 6 | 7 | * adds access to additional Research API endpoints (tt_user_liked_videos_api, tt_user_pinned_videos_api, tt_user_follower_api, tt_user_following_api, tt_user_reposted_api, and tt_playlist_info_api) 8 | * tt_videos_hidden now supports Video IDs 9 | * adds tt_user_videos_api, a wrapper around tt_search_api to query user videos 10 | 11 | # traktok 0.0.5.9000 12 | 13 | * adds experimental tt_user_videos_hidden and tt_user_info_hidden that rely on chromote 14 | -------------------------------------------------------------------------------- /R/api_hidden.r: -------------------------------------------------------------------------------- 1 | #' Get video metadata and video files from URLs 2 | #' 3 | #' @description \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on: 4 | #' Unofficial API]'}}{\strong{[Works on: Unofficial API]}} 5 | #' 6 | #' @param video_urls vector of URLs or IDs to TikTok videos. 7 | #' @param save_video logical. Should the videos be downloaded. 8 | #' @param overwrite logical. If save_video=TRUE and the file already exists, 9 | #' should it be overwritten? 10 | #' @param dir directory to save videos files to. 11 | #' @param cache_dir if set to a path, one RDS file with metadata will be written 12 | #' to disk for each video. This is useful if you have many videos and want to 13 | #' pick up where you left if something goes wrong. 14 | #' @param sleep_pool a vector of numbers from which a waiting period is randomly 15 | #' drawn. 16 | #' @param max_tries how often to retry if a request fails. 17 | #' @param cookiefile path to your cookiefile. Usually not needed after running 18 | #' \link{auth_hidden} once. See \code{vignette("unofficial-api", package = 19 | #' "traktok")} for more information on authentication. 20 | #' @param verbose should the function print status updates to the screen? 21 | #' @param ... handed to \code{tt_videos_hidden} (for tt_videos) and (further) to 22 | #' \link{tt_request_hidden}. 23 | #' 24 | #' @details The function will wait between scraping two videos to make it less 25 | #' obvious that a scraper is accessing the site. The period is drawn randomly 26 | #' from the `sleep_pool` and multiplied by a random fraction. 27 | #' 28 | #' @details Note that the video file has to be requested in the same session as 29 | #' the metadata. So while the URL to the video file is included in the 30 | #' metadata, this link will not work in most cases. 31 | #' 32 | #' 33 | #' @return a data.frame 34 | #' @export 35 | #' 36 | #' @examples 37 | #' \dontrun{ 38 | #' tt_videos("https://www.tiktok.com/@tiktok/video/7106594312292453675") 39 | #' } 40 | tt_videos_hidden <- function(video_urls, 41 | save_video = TRUE, 42 | overwrite = FALSE, 43 | dir = ".", 44 | cache_dir = NULL, 45 | sleep_pool = 1:10, 46 | max_tries = 5L, 47 | cookiefile = NULL, 48 | verbose = TRUE, 49 | ...) { 50 | 51 | video_urls <- unique(video_urls) 52 | n_urls <- length(video_urls) 53 | video_urls <- id2url(video_urls) 54 | 55 | if (verbose) cli::cli_alert_info("Getting {n_urls} unique link{?s}") 56 | if (!is.null(cookiefile)) cookiemonster::add_cookies(cookiefile) 57 | cookies <- cookiemonster::get_cookies("^(www.)*tiktok.com", as = "string") 58 | f_name <- "" 59 | 60 | check_dir(dir, "dir") 61 | check_dir(cache_dir, "cache_dir") 62 | 63 | dplyr::bind_rows(purrr::map(video_urls, function(u) { 64 | video_id <- extract_regex( 65 | u, 66 | "(?<=/video/)(.+?)(?=\\?|$)|(?<=/photo/)(.+?)(?=\\?|$)|(?<=https://vm.tiktok.com/).+?(?=/|$)" 67 | ) 68 | i <- which(u == video_urls) 69 | done_msg <- "" 70 | if (verbose) cli::cli_progress_step( 71 | "Getting video {video_id}", 72 | msg_done = "Got video {video_id} ({i}/{n_urls}). {done_msg}" 73 | ) 74 | 75 | the$retries <- 5L 76 | video_dat <- get_video(url = u, 77 | video_id = video_id, 78 | overwrite = overwrite, 79 | cache_dir = cache_dir, 80 | max_tries = max_tries, 81 | cookies = cookies, 82 | verbose = verbose) 83 | 84 | if (isTRUE(video_dat$video_status_code == 0L)) { 85 | if (save_video) { 86 | if (!isTRUE(video_dat$is_slides)) { 87 | video_fn <- file.path(dir, paste0(video_dat$author_username, "_video_", 88 | video_dat$video_id, ".mp4")) 89 | 90 | f_name <- save_video(video_dat = video_dat, 91 | video_fn = video_fn, 92 | overwrite = overwrite, 93 | max_tries = max_tries, 94 | cookies = cookies) 95 | 96 | f_size <- file.size(f_name) 97 | if (isTRUE(f_size > 1000)) { 98 | done_msg <- glue::glue("File size: {utils:::format.object_size(f_size, 'auto')}.") 99 | } else { 100 | cli::cli_warn("Video {video_id} has a very small file size (less than 1kB) and is likely corrupt.") 101 | } 102 | video_dat$video_fn <- video_fn 103 | } else { # for slides 104 | download_urls <- strsplit(video_dat$download_url, ", ", fixed = TRUE) |> 105 | unlist() 106 | video_fns <- file.path(dir, paste0(video_dat$author_username, 107 | "_video_", 108 | video_dat$video_id, 109 | "_", 110 | seq_along(download_urls), 111 | ".jpeg")) 112 | purrr::walk2(download_urls, video_fns, function(u, f) { 113 | curl::curl_download(url = u, destfile = f, quiet = TRUE) 114 | }) 115 | } 116 | } 117 | } 118 | 119 | if (all(i != n_urls, !isTRUE(the$skipped))) { 120 | wait(sleep_pool, verbose) 121 | } 122 | the$skipped <- FALSE # reset skipped 123 | 124 | return(video_dat) 125 | })) 126 | 127 | } 128 | 129 | 130 | #' @noRd 131 | get_video <- function(url, 132 | video_id, 133 | overwrite, 134 | cache_dir, 135 | max_tries, 136 | cookies, 137 | verbose) { 138 | 139 | json_fn <- "" 140 | if (!is.null(cache_dir)) json_fn <- file.path(cache_dir, 141 | paste0(video_id, ".json")) 142 | 143 | if (overwrite || !file.exists(json_fn)) { 144 | tt_json <- tt_request_hidden(url, max_tries = max_tries) 145 | if (!is.null(cache_dir)) writeLines(tt_json, json_fn, useBytes = TRUE) 146 | } else { 147 | tt_json <- readChar(json_fn, nchars = file.size(json_fn), useBytes = TRUE) 148 | # TODO: not ideal as not consistent with request 149 | attr(tt_json,"url_full") <- url 150 | attr(tt_json,"html_status") <- 200L 151 | the$skipped <- TRUE 152 | } 153 | # make sure json can be parsed, otherwise retry 154 | out <- try(parse_video(tt_json, video_id), silent = TRUE) 155 | if (methods::is(out, "try-error") && the$retries > 0) { 156 | the$retries <- the$retries - 1 157 | out <- get_video(url, 158 | video_id, 159 | overwrite = TRUE, # most common reason for failure here is a malformed cached json 160 | cache_dir, 161 | max_tries, 162 | cookies, 163 | verbose) 164 | } 165 | return(out) 166 | } 167 | 168 | 169 | #' @noRd 170 | save_video <- function(video_dat, 171 | video_fn, 172 | overwrite, 173 | max_tries, 174 | cookies) { 175 | 176 | video_url <- video_dat$download_url 177 | f <- structure("", class = "try-error") 178 | if (!is.null(video_url)) { 179 | 180 | if (overwrite || !file.exists(video_fn)) { 181 | while (methods::is(f, "try-error") && max_tries > 0) { 182 | the$skipped <- FALSE 183 | h <- curl::handle_setopt( 184 | curl::new_handle(), 185 | cookie = cookies, 186 | referer = "https://www.tiktok.com/" 187 | ) 188 | f <- try(curl::curl_download( 189 | video_url, video_fn, quiet = TRUE, handle = h 190 | ), silent = TRUE) 191 | 192 | if (methods::is(f, "try-error")) { 193 | cli::cli_alert_warning( 194 | "Download failed, retrying after 10 seconds. {max_tries} left." 195 | ) 196 | # if this fails, the download link has likely expired, so better get a 197 | # new one 198 | video_url <- get_video(url = video_dat$video_url, 199 | video_id = video_dat$video_id, 200 | overwrite = TRUE, 201 | cache_dir = NULL, 202 | max_tries = 1, 203 | cookies = NULL, 204 | verbose = FALSE)$download_url 205 | Sys.sleep(10) 206 | } 207 | 208 | max_tries <- max_tries - 1 209 | } 210 | } else if (file.exists(video_fn)) { 211 | f <- video_fn 212 | the$skipped <- TRUE 213 | } 214 | 215 | } else { 216 | cli::cli_warn("No valid video URL found for download.") 217 | } 218 | return(f) 219 | 220 | } 221 | 222 | 223 | #' Get json string from a TikTok URL using the hidden API 224 | #' 225 | #' @description \ifelse{html}{\figure{api-unofficial}{options: 226 | #' alt='[Works on: Unofficial API]'}}{\strong{[Works on: Unofficial API]}} 227 | #' 228 | #' Use this function in case you want to check the full data for a given 229 | #' TikTok video or account. In tt_videos, only an opinionated selection of 230 | #' data is included in the final object. If you want some different 231 | #' information, you can use this function. 232 | #' 233 | #' @param url a URL to a TikTok video or account 234 | #' 235 | #' @inheritParams tt_videos_hidden 236 | #' @export 237 | tt_request_hidden <- function(url, 238 | max_tries = 5L, 239 | cookiefile = NULL) { 240 | 241 | if (!is.null(cookiefile)) cookiemonster::add_cookies(cookiefile) 242 | cookies <- cookiemonster::get_cookies("^(www.)*tiktok.com", as = "string") 243 | 244 | req <- httr2::request(url) |> 245 | httr2::req_headers( 246 | "Accept-Encoding" = "gzip, deflate, sdch", 247 | "Accept-Language" = "en-US,en;q=0.8", 248 | "Upgrade-Insecure-Requests" = "1", 249 | "User-Agent" = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36", 250 | "Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", 251 | "Cache-Control" = "max-age=0", 252 | "Connection" = "keep-alive" 253 | ) |> 254 | httr2::req_options(cookie = cookies) |> 255 | httr2::req_retry(max_tries = max_tries) |> 256 | httr2::req_timeout(seconds = 60L) |> 257 | httr2::req_error(is_error = function(x) FALSE) 258 | 259 | res <- httr2::req_perform(req) 260 | status <- httr2::resp_status(res) 261 | if (status >= 400) { 262 | cli::cli_warn("Retrieving {url} resulted in html status {status}, the row will contain NAs.") 263 | out <- paste0('{"__DEFAULT_SCOPE__":{"webapp.video-detail":{"statusCode":"', status, '","statusMsg":"html_error"}}}') 264 | } else { 265 | out <- res |> 266 | httr2::resp_body_html() |> 267 | rvest::html_node("#SIGI_STATE,#__UNIVERSAL_DATA_FOR_REHYDRATION__") |> 268 | rvest::html_text() 269 | } 270 | 271 | if (isFALSE(nchar(out) > 10)) stop("no json found") 272 | 273 | attr(out, "url_full") <- res$url 274 | attr(out, "html_status") <- status 275 | attr(out, "set-cookies") <- httr2::resp_headers(res)[["set-cookie"]] 276 | return(out) 277 | } 278 | 279 | 280 | #' Search videos 281 | #' 282 | #' @description \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on: 283 | #' Unofficial API]'}}{\strong{[Works on: Unofficial API]}} 284 | #' 285 | #' This is the version of \link{tt_search} that explicitly uses the unofficial 286 | #' API. Use \link{tt_search_api} for the Research API version. 287 | #' 288 | #' @param query query as one string 289 | #' @param offset how many videos to skip. For example, if you already have the 290 | #' first X of a search. 291 | #' @param max_pages how many pages to get before stopping the search. 292 | #' 293 | #' @inheritParams tt_videos_hidden 294 | #' 295 | #' @details The function will wait between scraping two videos to make it less 296 | #' obvious that a scraper is accessing the site. The period is drawn randomly 297 | #' from the `sleep_pool` and multiplied by a random fraction. 298 | #' 299 | #' @return a data.frame 300 | #' @export 301 | #' 302 | #' @examples 303 | #' \dontrun{ 304 | #' tt_search_hidden("#rstats", max_pages = 2) 305 | #' } 306 | tt_search_hidden <- function(query, 307 | offset = 0, 308 | max_pages = Inf, 309 | sleep_pool = 1:10, 310 | max_tries = 5L, 311 | cookiefile = NULL, 312 | verbose = TRUE) { 313 | 314 | if (!is.null(cookiefile)) cookiemonster::add_cookies(cookiefile) 315 | cookies <- cookiemonster::get_cookies("^(www.)*tiktok.com", as = "string") 316 | 317 | results <- list() 318 | page <- 1 319 | has_more <- TRUE 320 | done_msg <- "" 321 | search_id <- NULL 322 | 323 | while(page <= max_pages && has_more) { 324 | if (verbose) cli::cli_progress_step( 325 | "Getting page {page}", 326 | # for some reason already uses updated page value 327 | msg_done = "Got page {page - 1}. {done_msg}" 328 | ) 329 | 330 | req <- httr2::request("https://www.tiktok.com/api/search/general/full/") |> 331 | httr2::req_url_query( 332 | aid = "1988", 333 | "cookie_enabled" = "true", 334 | "from_page" = "search", 335 | "keyword" = query, 336 | "offset" = offset, 337 | search_id = search_id 338 | ) |> 339 | httr2::req_options(cookie = cookies) |> 340 | httr2::req_headers( 341 | authority = "www.tiktok.com", 342 | accept = "*/*", 343 | `accept-language` = "en-GB,en;q=0.9,de-DE;q=0.8,de;q=0.7,en-US;q=0.6", 344 | `sec-ch-ua` = "\"Chromium\";v=115\", \"Not/A)Brand\";v=\"99", 345 | `sec-ch-ua-mobile` = "?0", 346 | `sec-ch-ua-platform` = "\"Linux\"", 347 | `sec-fetch-dest` = "empty", 348 | `sec-fetch-mode` = "cors", 349 | `sec-fetch-site` = "same-origin", 350 | `user-agent` = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36", 351 | ) |> 352 | httr2::req_retry(max_tries = max_tries) |> 353 | httr2::req_timeout(seconds = 60L) |> 354 | httr2::req_error(is_error = function(x) FALSE) 355 | 356 | resp <- httr2::req_perform(req) 357 | status <- httr2::resp_status(resp) 358 | if (status < 400L) results[[page]] <- parse_search(resp) 359 | offset <- attr(results[[page]], "cursor") 360 | search_id <- attr(results[[page]], "search_id") 361 | has_more <- attr(results[[page]], "has_more") 362 | done_msg <- glue::glue("Found {nrow(results[[page]])} videos.") 363 | page <- page + 1 364 | if (!has_more) { 365 | if (verbose) cli::cli_progress_done("Reached end of results") 366 | break 367 | } 368 | if (page <= max_pages) wait(sleep_pool) 369 | } 370 | 371 | video_id <- NULL 372 | dplyr::bind_rows(results) |> 373 | dplyr::filter(!is.na(video_id)) 374 | } 375 | 376 | 377 | #' Get infos about a user from the hidden API 378 | #' 379 | #' @param username A URL to a video or username. 380 | #' @param parse Whether to parse the data into a data.frame (set to FALSE to get 381 | #' the full list). 382 | #' 383 | #' @return A data.frame of user info. 384 | #' @export 385 | #' 386 | #' @examples 387 | #' \dontrun{ 388 | #' df <- tt_user_info_hidden("https://www.tiktok.com/@fpoe_at") 389 | #' } 390 | tt_user_info_hidden <- function(username, 391 | parse = TRUE) { 392 | 393 | rlang::check_installed("rvest", reason = "to use this function", version = "1.0.4") 394 | 395 | if (!grepl("^http[s]*://", username)) { 396 | username <- paste0("https://www.tiktok.com/@", username) 397 | } 398 | 399 | if (!grepl("^http[s]*://[www.]*tiktok\\.com/@.+", username)) { 400 | cli::cli_abort("The provided username does not resolve to a TikTok account URL: {username}") 401 | } 402 | 403 | sess <- rvest::read_html_live(username) 404 | 405 | json <- sess |> 406 | rvest::html_element("#__UNIVERSAL_DATA_FOR_REHYDRATION__") |> 407 | rvest::html_text() 408 | 409 | if (!is.na(json)) { 410 | user_data <- json |> 411 | jsonlite::fromJSON() 412 | } else { 413 | cli::cli_alert_warning("Could not retrieve data for user") 414 | user_data <- list() 415 | } 416 | 417 | if (parse) { 418 | return(parse_user(user_data)) 419 | } else { 420 | return(user_data) 421 | } 422 | 423 | } 424 | 425 | 426 | #' @title Get followers and following of a user from the hidden API 427 | #' 428 | #' @description \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on: 429 | #' Unofficial API]'}}{\strong{[Works on: Unofficial API]}} 430 | #' 431 | #' Get up to 5,000 accounts who follow a user or accounts a user follows. 432 | #' 433 | #' @param secuid The secuid of a user. You can get it with 434 | #' \link{tt_user_info_hidden} by querying an account (see example). 435 | #' @inheritParams tt_search_hidden 436 | #' 437 | #' @return a data.frame of followers 438 | #' @export 439 | #' 440 | #' @examples 441 | #' \dontrun{ 442 | #' df <- tt_user_info_hidden("https://www.tiktok.com/@fpoe_at") 443 | #' tt_get_follower_hidden(df$secUid) 444 | #' } 445 | tt_get_following_hidden <- function(secuid, 446 | sleep_pool = 1:10, 447 | max_tries = 5L, 448 | cookiefile = NULL, 449 | verbose = TRUE) { 450 | 451 | if (!is.null(cookiefile)) cookiemonster::add_cookies(cookiefile) 452 | cookies <- cookiemonster::get_cookies("^(www.)*tiktok.com", as = "string") 453 | 454 | new_data <- list(minCursor = 0, 455 | total = Inf, 456 | hasMore = TRUE) 457 | follower_data <- list() 458 | 459 | while (isTRUE(new_data$hasMore)) { 460 | if (verbose) cli::cli_progress_step( 461 | msg = ifelse(length(follower_data) == 0L, "Getting followers...", "Getting more followers..."), 462 | msg_done = "Got {length(follower_data)} followers." 463 | ) 464 | resp <- httr2::request("https://www.tiktok.com/api/user/list/") |> 465 | httr2::req_url_query( 466 | count = "30", 467 | minCursor = new_data$minCursor, 468 | scene = "21", 469 | secUid = secuid, 470 | ) |> 471 | httr2::req_options(cookie = cookies) |> 472 | httr2::req_retry(max_tries = max_tries) |> 473 | httr2::req_perform() 474 | 475 | new_data <- try(httr2::resp_body_json(resp), silent = TRUE) 476 | if (methods::is(new_data, "try-error")) { 477 | new_data <- list(minCursor = 0, 478 | total = Inf, 479 | hasMore = TRUE) 480 | } else { 481 | follower_data <- c(follower_data, purrr::pluck(new_data, "userList", .default = list())) 482 | } 483 | if (isTRUE(new_data$hasMore)) wait(sleep_pool) 484 | } 485 | if (verbose) cli::cli_progress_done() 486 | 487 | if (verbose) cli::cli_progress_step( 488 | msg = "Parsing results" 489 | ) 490 | return(parse_followers(follower_data)) 491 | 492 | } 493 | 494 | #' @rdname tt_get_following_hidden 495 | #' @export 496 | tt_get_follower_hidden <- function(secuid, 497 | sleep_pool = 1:10, 498 | max_tries = 5L, 499 | cookiefile = NULL, 500 | verbose = TRUE) { 501 | 502 | if (!is.null(cookiefile)) cookiemonster::add_cookies(cookiefile) 503 | cookies <- cookiemonster::get_cookies("^(www.)*tiktok.com", as = "string") 504 | 505 | new_data <- list(minCursor = 0, 506 | total = Inf, 507 | hasMore = TRUE) 508 | follower_data <- list() 509 | 510 | while (isTRUE(new_data$hasMore)) { 511 | if (verbose) cli::cli_progress_step( 512 | msg = ifelse(length(follower_data) == 0L, "Getting followers...", "Getting more followers..."), 513 | msg_done = "Got {length(follower_data)} followers." 514 | ) 515 | resp <- httr2::request("https://www.tiktok.com/api/user/list/") |> 516 | httr2::req_url_query( 517 | count = "30", 518 | minCursor = new_data$minCursor, 519 | scene = "67", 520 | secUid = secuid, 521 | ) |> 522 | httr2::req_options(cookie = cookies) |> 523 | httr2::req_retry(max_tries = max_tries) |> 524 | httr2::req_perform() 525 | 526 | new_data <- try(httr2::resp_body_json(resp), silent = TRUE) 527 | if (methods::is(new_data, "try-error")) { 528 | new_data <- list(minCursor = 0, 529 | total = Inf, 530 | hasMore = TRUE) 531 | } else { 532 | follower_data <- c(follower_data, purrr::pluck(new_data, "userList", .default = list())) 533 | } 534 | if (isTRUE(new_data$hasMore)) wait(sleep_pool) 535 | } 536 | if (verbose) cli::cli_progress_done() 537 | 538 | if (verbose) cli::cli_progress_step( 539 | msg = "Parsing results" 540 | ) 541 | return(parse_followers(follower_data)) 542 | 543 | } 544 | 545 | 546 | #' Get videos from a TikTok user's profile 547 | #' 548 | #' This function uses rvest to scrape a TikTok user's profile and retrieve any hidden videos. 549 | #' @description \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on: 550 | #' Unofficial API]'}}{\strong{[Works on: Unofficial API]}} 551 | #' 552 | #' Get all videos posted by a TikTok user. 553 | #' 554 | #' @param username The username of the TikTok user whose hidden videos you want to retrieve. 555 | #' @param solve_captchas open browser to solve appearing captchas manually. 556 | #' @param return_urls return video URLs instead of downloading the vidoes. 557 | #' @param timeout time (in seconds) to wait between scrolling and solving captchas. 558 | #' @param verbose should the function print status updates to the screen? 559 | #' @param ... Additional arguments to be passed to the \code{\link{tt_videos_hidden}} function. 560 | #' 561 | #' @return A list of video data or URLs, depending on the value of \code{return_urls}. 562 | #' 563 | #' @examples 564 | #' \dontrun{ 565 | #' # Get hidden videos from the user "fpoe_at" 566 | #' tt_user_videos_hidden("fpoe_at") 567 | #' } 568 | #' @export 569 | tt_user_videos_hidden <- function(username, 570 | solve_captchas = FALSE, 571 | return_urls = FALSE, 572 | timeout = 5L, 573 | verbose = TRUE, 574 | ...) { 575 | 576 | rlang::check_installed("rvest", reason = "to use this function", version = "1.0.4") 577 | 578 | if (!grepl("^http[s]*://", username)) { 579 | username <- paste0("https://www.tiktok.com/@", username) 580 | } 581 | 582 | if (!grepl("^http[s]*://[www.]*tiktok\\.com/@.+", username)) { 583 | cli::cli_abort("The provided username does not resolve to a TikTok account URL: {username}") 584 | } 585 | 586 | if (verbose) cli::cli_progress_step("Opening {username}") 587 | # reset captcha warning 588 | the$captcha <- NULL 589 | sess <- rvest::read_html_live(username) 590 | last_y <- -1 591 | #scroll as far as possible 592 | if (verbose) cli::cli_progress_bar(format = "{cli::pb_spin} Scrolling down (y={last_y})") 593 | while (sess$get_scroll_position()$y > last_y) { 594 | solve_captcha(sess, solve = solve_captchas) 595 | last_y <- sess$get_scroll_position()$y 596 | sess$scroll_to(top = 10 ^ 5) 597 | if (verbose) cli::cli_progress_update() 598 | Sys.sleep(timeout * stats::runif(1, 1, 3)) 599 | } 600 | if (verbose) cli::cli_progress_step("Collecting discovered URLs") 601 | urls <- sess |> 602 | rvest::html_elements("a") |> 603 | rvest::html_attr("href") 604 | urls <- grep(username, x = urls, value = TRUE) |> 605 | unique() 606 | if (verbose) { 607 | cli::cli_progress_done() 608 | cli::cli_alert_success("{length(urls)} URLs discovered") 609 | } 610 | if (return_urls) return(urls) 611 | tt_videos_hidden(urls, ...) 612 | } 613 | 614 | 615 | solve_captcha <- function(sess, solve) { 616 | captcha <- rvest::html_element(sess, "#captcha-verify-image,.captcha-verify-container") 617 | if (length(captcha) == 0L) { 618 | the$view <- NULL 619 | the$captcha <- NULL 620 | return(TRUE) 621 | } 622 | # display status once 623 | if (is.null(the$captcha)) { 624 | cli::cli_alert_info("Captcha discovered") 625 | the$captcha <- TRUE 626 | } 627 | if (solve) { 628 | if (is.null(the$view)) 629 | the$view <- sess$view() 630 | solve_captcha(sess, solve = solve) 631 | } 632 | } 633 | -------------------------------------------------------------------------------- /R/api_research.r: -------------------------------------------------------------------------------- 1 | #' Query TikTok videos using the research API 2 | #' 3 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 4 | #' Research API]'}}{\strong{[Works on: Research API]}} 5 | #' 6 | #' This is the version of \link{tt_search} that explicitly uses Research API. 7 | #' Use \link{tt_search_hidden} for the unofficial API version. 8 | #' 9 | #' @param query A query string or object (see \link{query}). 10 | #' @param start_date,end_date A start and end date to narrow the search 11 | #' (required; can be a maximum of 30 days apart). 12 | #' @param fields The fields to be returned (defaults to all) 13 | #' @param start_cursor The starting cursor, i.e., how many results to skip (for 14 | #' picking up an old search). 15 | #' @param search_id The search id (for picking up an old search). 16 | #' @param is_random Whether the query is random (defaults to FALSE). 17 | #' @param max_pages results are returned in batches/pages with 100 videos. How 18 | #' many should be requested before the function stops? 19 | #' @param parse Should the results be parsed? Otherwise, the original JSON 20 | #' object is returned as a nested list. 21 | #' @param cache should progress be saved in the current session? It can then be 22 | #' retrieved with \code{last_query()} if an error occurs. But the function 23 | #' will use extra memory. 24 | #' @param verbose should the function print status updates to the screen? 25 | #' @param token The authentication token (usually supplied automatically after 26 | #' running \link{auth_research} once). 27 | #' @return A data.frame of parsed TikTok videos (or a nested list). 28 | #' @export 29 | #' @examples 30 | #' \dontrun{ 31 | #' # look for a keyword or hashtag by default 32 | #' tt_search_api("rstats") 33 | #' 34 | #' # or build a more elaborate query 35 | #' query() |> 36 | #' query_and(field_name = "region_code", 37 | #' operation = "IN", 38 | #' field_values = c("JP", "US")) |> 39 | #' query_or(field_name = "hashtag_name", 40 | #' operation = "EQ", # rstats is the only hashtag 41 | #' field_values = "rstats") |> 42 | #' query_or(field_name = "keyword", 43 | #' operation = "IN", # rstats is one of the keywords 44 | #' field_values = "rstats") |> 45 | #' query_not(operation = "EQ", 46 | #' field_name = "video_length", 47 | #' field_values = "SHORT") |> 48 | #' tt_search_api() 49 | #' 50 | #' # when a search fails after a while, get the results and pick it back up 51 | #' # (only work with same parameters) 52 | #' last_pull <- last_query() 53 | #' query() |> 54 | #' query_and(field_name = "region_code", 55 | #' operation = "IN", 56 | #' field_values = c("JP", "US")) |> 57 | #' query_or(field_name = "hashtag_name", 58 | #' operation = "EQ", # rstats is the only hashtag 59 | #' field_values = "rstats") |> 60 | #' query_or(field_name = "keyword", 61 | #' operation = "IN", # rstats is one of the keywords 62 | #' field_values = "rstats") |> 63 | #' query_not(operation = "EQ", 64 | #' field_name = "video_length", 65 | #' field_values = "SHORT") |> 66 | #' tt_search_api(start_cursor = length(last_pull) + 1, 67 | #' search_id = attr(last_pull, "search_id")) 68 | #' } 69 | tt_search_api <- function(query, 70 | start_date = Sys.Date() - 1, 71 | end_date = Sys.Date(), 72 | fields = "all", 73 | start_cursor = 0L, 74 | search_id = NULL, 75 | is_random = FALSE, 76 | max_pages = 1, 77 | parse = TRUE, 78 | cache = TRUE, 79 | verbose = TRUE, 80 | token = NULL) { 81 | 82 | if (is.character(query)) { 83 | query <- query(or = list( 84 | list( 85 | field_name = "hashtag_name", 86 | operation = "IN", 87 | field_values = as.list(sub("#", "", strsplit(query, " ", fixed = TRUE))) 88 | ), 89 | list( 90 | field_name = "keyword", 91 | operation = "IN", 92 | field_values = as.list(strsplit(query, " ", fixed = TRUE)) 93 | ) 94 | )) 95 | } 96 | 97 | if (fields == "all") 98 | fields <- "id,video_description,create_time,region_code,share_count,view_count,like_count,comment_count,music_id,hashtag_names,username,effect_ids,playlist_id,voice_to_text" 99 | 100 | if (is_datetime(start_date)) { 101 | start_date <- format(start_date, "%Y%m%d") 102 | } else if (!grepl("\\d{8}", start_date)) { 103 | cli::cli_abort("{.code start_date} needs to be a valid date or a string like, e.g., \"20210102\"") 104 | } 105 | 106 | if (is_datetime(end_date)) { 107 | end_date <- format(end_date, "%Y%m%d") 108 | } else if (!grepl("\\d{8}", start_date)) { 109 | cli::cli_abort("{.code start_date} needs to be a valid date or a string like, e.g., \"20210102\"") 110 | } 111 | 112 | if (verbose) { 113 | cli::cli_progress_step("Making initial request") 114 | } 115 | 116 | res <- tt_query_request( 117 | endpoint = "query/", 118 | query = query, 119 | start_date = start_date, 120 | end_date = end_date, 121 | fields = fields, 122 | cursor = start_cursor, 123 | search_id = search_id, 124 | is_random = is_random, 125 | token = token 126 | ) 127 | videos <- purrr::pluck(res, "data", "videos") 128 | the$search_id <- spluck(res, "data", "search_id") 129 | the$cursor <- spluck(res, "data", "cursor") 130 | the$videos <- videos 131 | 132 | the$page <- 1 133 | 134 | if (verbose) cli::cli_progress_bar( 135 | format = "{cli::pb_spin} Got {page} page{?s} with {length(videos)} video{?s} {cli::col_silver('[', cli::pb_elapsed, ']')}", 136 | format_done = "{cli::col_green(cli::symbol$tick)} Got {page} page{?s} with {length(videos)} video{?s}", 137 | .envir = the 138 | ) 139 | 140 | while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) { 141 | the$page <- the$page + 1 142 | the$cursor <- spluck(res, "data", "cursor") 143 | if (verbose) cli::cli_progress_update(force = TRUE, .envir = the) 144 | res <- tt_query_request( 145 | endpoint = "query/", 146 | query = query, 147 | start_date = start_date, 148 | end_date = end_date, 149 | fields = fields, 150 | cursor = the$cursor, 151 | search_id = the$search_id, 152 | is_random = is_random, 153 | token = token 154 | ) 155 | videos <- c(videos, purrr::pluck(res, "data", "videos")) 156 | if (cache) { 157 | the$videos <- videos 158 | } 159 | if (verbose) cli::cli_progress_done() 160 | } 161 | 162 | if (parse) { 163 | if (verbose) { 164 | cli::cli_progress_done() 165 | cli::cli_progress_step("Parsing data") 166 | } 167 | videos <- parse_api_search(videos) 168 | if (verbose) cli::cli_progress_done() 169 | } 170 | class(videos) <- c("tt_results", class(videos)) 171 | attr(videos, "search_id") <- the$search_id 172 | attr(videos, "cursor") <- the$cursor 173 | return(videos) 174 | } 175 | 176 | 177 | #' @export 178 | #' @rdname tt_search_api 179 | tt_query_videos <- tt_search_api 180 | 181 | 182 | # used to iterate over search requests 183 | tt_query_request <- function(endpoint, 184 | query = NULL, 185 | video_id = NULL, 186 | start_date = NULL, 187 | end_date = NULL, 188 | fields = NULL, 189 | cursor = NULL, 190 | search_id = NULL, 191 | is_random = NULL, 192 | token = NULL) { 193 | 194 | if (is.null(token)) token <- get_token() 195 | 196 | if (!is.null(query) && !is_query(query)) 197 | cli::cli_abort("query needs to be a query object (see {.code ?query})") 198 | 199 | body <- list(query = unclass(query), 200 | video_id = video_id, 201 | start_date = start_date, 202 | end_date = end_date, 203 | max_count = 100L, 204 | cursor = cursor, 205 | search_id = search_id, 206 | is_random = is_random) 207 | 208 | httr2::request("https://open.tiktokapis.com/v2/research/video/") |> 209 | httr2::req_url_path_append(endpoint) |> 210 | httr2::req_method("POST") |> 211 | httr2::req_url_query(fields = fields) |> 212 | httr2::req_headers("Content-Type" = "application/json") |> 213 | httr2::req_auth_bearer_token(token$access_token) |> 214 | httr2::req_body_json(data = purrr::discard(body, is.null)) |> 215 | httr2::req_error(body = api_error_handler) |> 216 | httr2::req_retry( 217 | max_tries = 5L, 218 | # don't retry when daily quota is reached (429) 219 | is_transient = function(resp) 220 | httr2::resp_status(resp) %in% c(301:399, 401:428, 430:599), 221 | # increase backoff after each try 222 | backoff = function(t) t ^ 3 223 | ) |> 224 | httr2::req_perform() |> 225 | httr2::resp_body_json(bigint_as_char = TRUE) 226 | 227 | } 228 | 229 | 230 | #' Lookup which videos were liked by a user using the research API 231 | #' 232 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 233 | #' Research API]'}}{\strong{[Works on: Research API]}} 234 | #' 235 | #' @param username name(s) of the user(s) to be queried 236 | #' @param fields The fields to be returned (defaults to all) 237 | #' @inheritParams tt_search_api 238 | #' 239 | #' @return A data.frame of parsed TikTok videos the user has posted 240 | #' @export 241 | #' 242 | #' @examples 243 | #' \dontrun{ 244 | #' tt_get_liked("jbgruber") 245 | #' # OR 246 | #' tt_user_liked_videos_api("https://www.tiktok.com/@tiktok") 247 | #' # OR 248 | #' tt_user_liked_videos_api("https://www.tiktok.com/@tiktok") 249 | #' 250 | #' # note: none of these work because I could not find any account that 251 | #' # has likes public! 252 | #' } 253 | tt_user_liked_videos_api <- function(username, 254 | fields = "all", 255 | max_pages = 1, 256 | cache = TRUE, 257 | verbose = TRUE, 258 | token = NULL) { 259 | 260 | purrr::map(username, function(u) { 261 | # if username is given as URL 262 | if (grepl("/", u)) { 263 | u <- extract_regex( 264 | u, 265 | "(?<=.com/@)(.+?)(?=\\?|$|/)" 266 | ) 267 | } 268 | if (verbose) cli::cli_progress_step(msg = "Getting user {u}", 269 | msg_done = "Got user {u}") 270 | the$result <- TRUE 271 | if (is.null(token)) token <- get_token() 272 | 273 | if (fields == "all") { 274 | fields <- c( 275 | "id", 276 | "create_time", 277 | "username", 278 | "region_code", 279 | "video_description", 280 | "music_id", 281 | "like_count", 282 | "comment_count", 283 | "share_count", 284 | "view_count", 285 | "hashtag_names", 286 | " is_stem_verified", 287 | # " favourites_count", 288 | " video_duration" 289 | ) |> 290 | paste0(collapse = ",") 291 | } 292 | 293 | res <- list(data = list(has_more = TRUE, cursor = NULL)) 294 | the$page <- 0L 295 | videos <- list() 296 | # iterate over pages 297 | while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) { 298 | the$page <- the$page + 1 299 | the$cursor <- purrr::pluck(res, "data", "cursor") 300 | 301 | res <- tt_user_request(endpoint = "liked_videos/", 302 | username = u, 303 | fields = fields, 304 | cursor = the$cursor, 305 | token = token) 306 | 307 | videos <- c(videos, purrr::pluck(res, "data", "user_liked_videos")) 308 | if (cache) { 309 | the$videos <- videos 310 | } 311 | } 312 | 313 | if (length(videos) > 0) { 314 | videos <- videos |> 315 | purrr::map(as_tibble_onerow) |> 316 | dplyr::bind_rows() |> 317 | # somehow, the order changes between, calls. So I fix it here 318 | dplyr::relocate("id", 319 | "username", 320 | "create_time", 321 | "video_description", 322 | "region_code", 323 | "video_duration", 324 | "view_count", 325 | "like_count", 326 | "comment_count", 327 | "share_count", 328 | "music_id") 329 | 330 | videos <- tibble::add_column(videos, liked_by_user = u) 331 | if (verbose) cli::cli_progress_done( 332 | result = ifelse(length(videos) > 1, "done", "failed") 333 | ) 334 | 335 | return(videos) 336 | } 337 | }) |> 338 | dplyr::bind_rows() 339 | } 340 | 341 | 342 | #' Lookup which videos were liked by a user using the research API 343 | #' 344 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 345 | #' Research API]'}}{\strong{[Works on: Research API]}} 346 | #' 347 | #' @param username name(s) of the user(s) to be queried 348 | #' @param fields The fields to be returned (defaults to all) 349 | #' @inheritParams tt_search_api 350 | #' 351 | #' @return A data.frame of parsed TikTok videos the user has posted 352 | #' @export 353 | #' 354 | #' @examples 355 | #' \dontrun{ 356 | #' tt_get_reposted("jbgruber") 357 | #' # OR 358 | #' tt_user_reposted_api("https://www.tiktok.com/@tiktok") 359 | #' # OR 360 | #' tt_user_reposted_api("https://www.tiktok.com/@tiktok") 361 | #' 362 | #' # note: none of these work because nobody has this enabled! 363 | #' } 364 | tt_user_reposted_api <- function(username, 365 | fields = "all", 366 | max_pages = 1, 367 | cache = TRUE, 368 | verbose = TRUE, 369 | token = NULL) { 370 | 371 | purrr::map(username, function(u) { 372 | # if username is given as URL 373 | if (grepl("/", u)) { 374 | u <- extract_regex( 375 | u, 376 | "(?<=.com/@)(.+?)(?=\\?|$|/)" 377 | ) 378 | } 379 | if (verbose) cli::cli_progress_step(msg = "Getting user {u}", 380 | msg_done = "Got user {u}") 381 | the$result <- TRUE 382 | if (is.null(token)) token <- get_token() 383 | 384 | if (fields == "all") { 385 | fields <- c( 386 | "id", 387 | "create_time", 388 | "username", 389 | "region_code", 390 | "video_description", 391 | "music_id", 392 | "like_count", 393 | "comment_count", 394 | "share_count", 395 | "view_count", 396 | "hashtag_names", 397 | "is_stem_verified", 398 | "favourites_count", 399 | "video_duration" 400 | ) |> 401 | paste0(collapse = ",") 402 | } 403 | 404 | res <- list(data = list(has_more = TRUE, cursor = NULL)) 405 | the$page <- 0L 406 | videos <- list() 407 | # iterate over pages 408 | while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) { 409 | the$page <- the$page + 1 410 | the$cursor <- purrr::pluck(res, "data", "cursor") 411 | 412 | res <- tt_user_request(endpoint = "reposted_videos/", 413 | username = u, 414 | fields = fields, 415 | cursor = the$cursor, 416 | token = token) 417 | 418 | videos <- c(videos, purrr::pluck(res, "data", "reposted_videos")) 419 | if (cache) { 420 | the$videos <- videos 421 | } 422 | } 423 | 424 | videos2 <- videos |> 425 | purrr::map(as_tibble_onerow) |> 426 | dplyr::bind_rows() |> 427 | # somehow, the order changes between, calls. So I fix it here 428 | dplyr::relocate("id", 429 | "username", 430 | "create_time", 431 | "video_description", 432 | "region_code", 433 | "video_duration", 434 | "view_count", 435 | "like_count", 436 | "comment_count", 437 | "share_count", 438 | "music_id") 439 | 440 | videos <- tibble::add_column(videos, reposted_by_user = u) 441 | if (verbose) cli::cli_progress_done( 442 | result = ifelse(length(videos) > 1, "done", "failed") 443 | ) 444 | 445 | return(videos) 446 | }) |> 447 | dplyr::bind_rows() 448 | } 449 | 450 | 451 | #' Lookup which videos were pinned by a user using the research API 452 | #' 453 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 454 | #' Research API]'}}{\strong{[Works on: Research API]}} 455 | #' 456 | #' @param username vector of user names (handles) or URLs to users' pages. 457 | #' @inheritParams tt_search_api 458 | #' 459 | #' @return A data.frame of parsed TikTok videos the user has posted 460 | #' @export 461 | #' 462 | #' @examples 463 | #' \dontrun{ 464 | #' tt_get_pinned("jbgruber") 465 | #' # OR 466 | #' tt_user_pinned_videos_api("https://www.tiktok.com/@tiktok") 467 | #' # OR 468 | #' tt_user_pinned_videos_api("https://www.tiktok.com/@tiktok") 469 | #' } 470 | tt_user_pinned_videos_api <- function(username, 471 | fields = "all", 472 | cache = TRUE, 473 | verbose = TRUE, 474 | token = NULL) { 475 | 476 | purrr::map(username, function(u) { 477 | # if username is given as URL 478 | if (grepl("/", u)) { 479 | u <- extract_regex( 480 | u, 481 | "(?<=.com/@)(.+?)(?=\\?|$|/)" 482 | ) 483 | } 484 | if (verbose) cli::cli_progress_step(msg = "Getting user {u}", 485 | msg_done = "Got user {u}") 486 | the$result <- TRUE 487 | if (is.null(token)) token <- get_token() 488 | 489 | if (fields == "all") { 490 | fields <- c( 491 | "id", 492 | "create_time", 493 | "username", 494 | "region_code", 495 | "video_description", 496 | "music_id", 497 | "like_count", 498 | "comment_count", 499 | "share_count", 500 | "view_count", 501 | "hashtag_names", 502 | "is_stem_verified", 503 | # mentioned in docs, but does not work 504 | # "favourites_count", 505 | "video_duration" 506 | ) |> 507 | paste0(collapse = ",") 508 | } 509 | 510 | res <- tt_user_request(endpoint = "pinned_videos/", 511 | username = u, 512 | fields = fields, 513 | cursor = NULL, 514 | token = token) 515 | 516 | videos <- purrr::pluck(res, "data", "pinned_videos_list") |> 517 | purrr::map(as_tibble_onerow) |> 518 | dplyr::bind_rows() |> 519 | tibble::add_column(pinned_by_user = u) 520 | 521 | if (cache) { 522 | the$videos <- videos 523 | } 524 | 525 | if (verbose) cli::cli_progress_done( 526 | result = ifelse(length(videos) > 1, "done", "failed") 527 | ) 528 | 529 | return(videos) 530 | }) |> 531 | dplyr::bind_rows() 532 | } 533 | 534 | 535 | #' @title Get followers and following of users from the research API 536 | #' 537 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 538 | #' Research API]'}}{\strong{[Works on: Research API]}} 539 | #' 540 | #' @param username name(s) of the user(s) to be queried 541 | #' @inheritParams tt_search_api 542 | #' 543 | #' @return A data.frame 544 | #' @export 545 | #' 546 | #' @examples 547 | #' \dontrun{ 548 | #' tt_user_follower_api("jbgruber") 549 | #' # OR 550 | #' tt_user_following_api("https://www.tiktok.com/@tiktok") 551 | #' # OR 552 | #' tt_get_follower("https://www.tiktok.com/@tiktok") 553 | #' } 554 | tt_user_follower_api <- function(username, 555 | max_pages = 1, 556 | cache = TRUE, 557 | verbose = TRUE, 558 | token = NULL) { 559 | 560 | tt_user_follow(endpoint = "followers/", 561 | username = username, 562 | max_pages = max_pages, 563 | cache = cache, 564 | verbose = verbose, 565 | token = token) 566 | } 567 | 568 | 569 | #' @rdname tt_user_follower_api 570 | #' @export 571 | tt_user_following_api <- function(username, 572 | max_pages = 1, 573 | cache = TRUE, 574 | verbose = TRUE, 575 | token = NULL) { 576 | 577 | tt_user_follow(endpoint = "following/", 578 | username = username, 579 | max_pages = max_pages, 580 | cache = cache, 581 | verbose = verbose, 582 | token = token) 583 | } 584 | 585 | 586 | tt_user_follow <- function(endpoint, 587 | username, 588 | max_pages = 1, 589 | cache = TRUE, 590 | verbose = TRUE, 591 | token = NULL) { 592 | 593 | purrr::map(username, function(u) { 594 | # if username is given as URL 595 | if (grepl("/", u)) { 596 | u <- extract_regex( 597 | u, 598 | "(?<=.com/@)(.+?)(?=\\?|$|/)" 599 | ) 600 | } 601 | if (verbose) cli::cli_progress_step(msg = "Getting user {u}", 602 | msg_done = "Got user {u}") 603 | the$result <- TRUE 604 | if (is.null(token)) token <- get_token() 605 | 606 | res <- list(data = list(has_more = TRUE, cursor = NULL)) 607 | the$page <- 0L 608 | followers <- list() 609 | # iterate over pages 610 | while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) { 611 | the$page <- the$page + 1 612 | the$cursor <- purrr::pluck(res, "data", "cursor") 613 | 614 | res <- tt_user_request(endpoint = endpoint, 615 | username = u, 616 | cursor = the$cursor, 617 | token = token) 618 | 619 | followers <- c(followers, purrr::pluck( 620 | res, 621 | "data", ifelse(endpoint == "followers/", 622 | "user_followers", 623 | "user_following")) 624 | ) 625 | if (cache) { 626 | the$videos <- followers 627 | } 628 | } 629 | 630 | followers <- dplyr::bind_rows(followers) 631 | followers <- tibble::add_column(followers, following_user = u) 632 | if (verbose) cli::cli_progress_done( 633 | result = ifelse(length(followers) > 1, "done", "failed") 634 | ) 635 | 636 | return(followers) 637 | }) |> 638 | dplyr::bind_rows() 639 | } 640 | 641 | # used to iterate over search requests 642 | tt_user_request <- function(endpoint, 643 | username, 644 | fields, 645 | cursor, 646 | token) { 647 | 648 | req <- httr2::request("https://open.tiktokapis.com/v2/research/user/") |> 649 | httr2::req_url_path_append(endpoint) |> 650 | httr2::req_method("POST") |> 651 | httr2::req_headers("Content-Type" = "application/json") |> 652 | httr2::req_auth_bearer_token(token$access_token) |> 653 | httr2::req_body_json(data = list(username = username, 654 | max_count = 100L, 655 | cursor = cursor)) |> 656 | httr2::req_error(is_error = api_user_error_checker, 657 | body = api_error_handler) |> 658 | httr2::req_retry(max_tries = 5) 659 | 660 | if (!missing(fields)) { 661 | req <- req |> 662 | httr2::req_url_query(fields = fields) 663 | } 664 | 665 | req |> 666 | httr2::req_perform() |> 667 | httr2::resp_body_json(bigint_as_char = TRUE) 668 | 669 | } 670 | 671 | 672 | #' Lookup TikTok information about a user using the research API 673 | #' 674 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 675 | #' Research API]'}}{\strong{[Works on: Research API]}} 676 | #' 677 | #' @inheritParams tt_user_liked_videos_api 678 | #' 679 | #' @return A data.frame of parsed TikTok videos the user has posted 680 | #' @export 681 | #' 682 | #' @examples 683 | #' \dontrun{ 684 | #' tt_user_info_api("jbgruber") 685 | #' # OR 686 | #' tt_user_info_api("https://www.tiktok.com/@tiktok") 687 | #' # OR 688 | #' tt_user_info("https://www.tiktok.com/@tiktok") 689 | #' } 690 | tt_user_info_api <- function(username, 691 | fields = "all", 692 | verbose = TRUE, 693 | token = NULL) { 694 | 695 | out <- purrr::map(username, function(u) { 696 | # if username is given as URL 697 | if (grepl("/", u)) { 698 | u <- extract_regex( 699 | u, 700 | "(?<=.com/@)(.+?)(?=\\?|$|/)" 701 | ) 702 | } 703 | if (verbose) cli::cli_progress_step(msg = "Getting user {u}", 704 | msg_done = "Got user {u}") 705 | the$result <- TRUE 706 | if (is.null(token)) token <- get_token() 707 | 708 | if (fields == "all") { 709 | fields <- c( 710 | "display_name", 711 | "bio_description", 712 | "avatar_url", 713 | "is_verified", 714 | "follower_count", 715 | "following_count", 716 | "likes_count", 717 | "video_count" 718 | ) |> 719 | paste0(collapse = ",") 720 | } 721 | 722 | # /tests/testthat/example_resp_q_user.json 723 | out <- httr2::request("https://open.tiktokapis.com/v2/research/user/info/") |> 724 | httr2::req_method("POST") |> 725 | httr2::req_url_query(fields = fields) |> 726 | httr2::req_headers("Content-Type" = "application/json") |> 727 | httr2::req_auth_bearer_token(token$access_token) |> 728 | httr2::req_body_json(data = list(username = u)) |> 729 | httr2::req_error(is_error = api_user_error_checker, 730 | body = api_error_handler) |> 731 | httr2::req_retry(max_tries = 5, 732 | backoff = function(t) t ^ 3) |> 733 | httr2::req_perform() |> 734 | httr2::resp_body_json(bigint_as_char = TRUE) |> 735 | purrr::pluck("data") |> 736 | tibble::as_tibble() 737 | if (verbose & !the$result) cli::cli_progress_done(result = "failed") 738 | return(out) 739 | }) |> 740 | dplyr::bind_rows() 741 | if (verbose) cli::cli_progress_done() 742 | return(out) 743 | } 744 | 745 | 746 | #' Retrieve video comments 747 | #' 748 | #' @description 749 | #' \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: Research API]'}}{\strong{[Works on: Research API]}} 750 | #' 751 | #' @param video_id The id or URL of a video 752 | #' @inheritParams tt_search_api 753 | #' 754 | #' @return A data.frame of parsed comments 755 | #' @export 756 | #' 757 | #' @examples 758 | #' \dontrun{ 759 | #' tt_comments("https://www.tiktok.com/@tiktok/video/7106594312292453675") 760 | #' # OR 761 | #' tt_comments("7106594312292453675") 762 | #' # OR 763 | #' tt_comments_api("7106594312292453675") 764 | #' } 765 | tt_comments_api <- function(video_id, 766 | fields = "all", 767 | start_cursor = 0L, 768 | max_pages = 1L, 769 | cache = TRUE, 770 | verbose = TRUE, 771 | token = NULL) { 772 | 773 | # if video_id is given as URL 774 | if (grepl("[^0-9]", video_id)) { 775 | video_id <- extract_regex( 776 | video_id, 777 | "(?<=/video/)(.+?)(?=\\?|$)|(?<=https://vm.tiktok.com/).+?(?=/|$)" 778 | ) 779 | } 780 | 781 | if (fields == "all") 782 | fields <- "id,video_id,text,like_count,reply_count,parent_comment_id,create_time" 783 | 784 | if (verbose) cli::cli_progress_step("Making initial request") 785 | 786 | res <- tt_query_request( 787 | endpoint = "comment/list/", 788 | video_id = video_id, 789 | fields = fields, 790 | cursor = start_cursor, 791 | token = token 792 | ) 793 | comments <- purrr::pluck(res, "data", "comments") 794 | if (cache) the$comments <- comments 795 | the$page <- 1 796 | 797 | if (verbose) cli::cli_progress_bar( 798 | format = "{cli::pb_spin} Got {page} page{?s} with {length(the$comments)} comment{?s} {cli::col_silver('[', cli::pb_elapsed, ']')}", 799 | format_done = "{cli::col_green(cli::symbol$tick)} Got {page} page{?s} with {length(the$comments)} comment{?s}", 800 | .envir = the 801 | ) 802 | 803 | while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) { 804 | the$page <- the$page + 1 805 | if (verbose) cli::cli_progress_update(.envir = the) 806 | res <- tt_query_request( 807 | endpoint = "comment/list/", 808 | video_id = video_id, 809 | fields = fields, 810 | cursor = purrr::pluck(res, "data", "cursor", .default = NULL), 811 | token = token 812 | ) 813 | comments <- c(comments, purrr::pluck(res, "data", "comments")) 814 | if (cache) the$comments <- comments 815 | if (verbose) cli::cli_progress_done() 816 | } 817 | 818 | if (verbose) { 819 | cli::cli_progress_done() 820 | cli::cli_progress_step("Parsing data") 821 | } 822 | out <- parse_api_comments(comments) 823 | 824 | return(out) 825 | } 826 | 827 | 828 | #' Lookup TikTok playlist using the research API 829 | #' 830 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 831 | #' Research API]'}}{\strong{[Works on: Research API]}} 832 | #' 833 | #' @param playlist_id playlist ID or URL to a playlist. 834 | #' @inheritParams tt_user_info_api 835 | #' 836 | #' @return A data.frame 837 | #' @export 838 | tt_playlist_api <- function(playlist_id, 839 | verbose = TRUE, 840 | token = NULL) { 841 | 842 | # the docs mention a cursor, but it's not implemented as far as I can tell 843 | cursor <- NULL 844 | 845 | if (grepl("/", playlist_id)) { 846 | playlist_id <- extract_regex( 847 | playlist_id, 848 | "(?<=-)([0-9]+?)(?=\\?|$|/)" 849 | ) 850 | } 851 | 852 | if (is.null(token)) token <- get_token() 853 | 854 | out <- httr2::request("https://open.tiktokapis.com/v2/research/playlist/info/") |> 855 | httr2::req_method("POST") |> 856 | httr2::req_headers("Content-Type" = "application/json") |> 857 | httr2::req_auth_bearer_token(token$access_token) |> 858 | httr2::req_body_json(data = list(playlist_id = playlist_id, 859 | cursor = cursor)) |> 860 | httr2::req_error(is_error = function(resp) 861 | # API always seems to send 500, even when successful 862 | !httr2::resp_status(resp) %in% c(100:399, 500), 863 | body = api_error_handler) |> 864 | httr2::req_retry(max_tries = 5) |> 865 | httr2::req_perform() |> 866 | httr2::resp_body_json(bigint_as_char = TRUE) |> 867 | purrr::pluck("data") |> 868 | tibble::as_tibble() 869 | 870 | return(out) 871 | } 872 | 873 | 874 | api_error_handler <- function(resp) { 875 | 876 | # failsafe save already collected videos to disk 877 | if (purrr::pluck_exists(the, "videos")) { 878 | q <- the$videos 879 | attr(q, "search_id") <- the$search_id 880 | saveRDS(q, tempfile(fileext = ".rds")) 881 | } 882 | 883 | if (httr2::resp_content_type(resp) == "application/json") { 884 | return( 885 | c( 886 | paste("status:", httr2::resp_body_json(resp)$error$code), 887 | paste("message:", httr2::resp_body_json(resp)$error$message), 888 | paste("log_id:", httr2::resp_body_json(resp)$error$log_id) 889 | ) 890 | ) 891 | } 892 | 893 | if (httr2::resp_content_type(resp) == "text/html") { 894 | res <- httr2::resp_body_html(resp) 895 | return( 896 | c( 897 | paste("status:", rvest::html_text2(rvest::html_element(res, "title"))), 898 | paste("message:", rvest::html_text2(rvest::html_element(res, "body"))) 899 | ) 900 | ) 901 | } 902 | } 903 | 904 | 905 | api_user_error_checker <- function(resp) { 906 | resp <<- resp 907 | if (httr2::resp_status(resp) < 400L) return(FALSE) 908 | if (httr2::resp_status(resp) == 404L) return(TRUE) 909 | # it looks like the API sometimes returns 500 falsely, but in these cases, no 910 | # error message is present 911 | if (httr2::resp_status(resp) == 500L && 912 | !purrr::pluck_exists(httr2::resp_body_json(resp), "error", "message")) { 913 | return(FALSE) 914 | } 915 | # if likes can't be accessed, which is true for many users, this should 916 | # not throw an error 917 | issue1 <- grepl("information.cannot.be.returned", 918 | httr2::resp_body_json(resp)$error$message) 919 | # if the user can't be found, this should not throw an error, which 920 | # would break the loop 921 | issue2 <- grepl("cannot.find.the.user", 922 | httr2::resp_body_json(resp)$error$message) 923 | # if account is private 924 | issue3 <- grepl("is.private", 925 | httr2::resp_body_json(resp)$error$message) 926 | issue4 <- grepl("API.cannot.return.this.user's.information", 927 | httr2::resp_body_json(resp)$error$message) 928 | 929 | if (any(issue1, issue2, issue3, issue4)) { 930 | cli::cli_alert_warning(httr2::resp_body_json(resp)$error$message) 931 | the$result <- FALSE 932 | return(FALSE) 933 | } 934 | return(TRUE) 935 | } 936 | -------------------------------------------------------------------------------- /R/api_research_extended.r: -------------------------------------------------------------------------------- 1 | #' Get videos from a TikTok user's profile 2 | #' 3 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 4 | #' Research API]'}}{\strong{[Works on: Research API]}} 5 | #' 6 | #' Get all videos posted by a user or multiple user's. This is a convenience 7 | #' wrapper around \code{\link{tt_search_api}} that takes care of moving time 8 | #' windows (search is limited to 30 days). This is the version of 9 | #' \link{tt_user_videos} that explicitly uses Research API. Use 10 | #' \link{tt_user_videos_hidden} for the unofficial API version. 11 | #' 12 | #' @param username The username or usernames whose videos you want to retrieve. 13 | #' @param since,to limits from/to when to go through the account in 30 day windows. 14 | #' @param ... Additional arguments to be passed to the 15 | #' \code{\link{tt_search_api}} function. 16 | #' 17 | #' @inheritParams tt_search_api 18 | #' 19 | #' @examples 20 | #' \dontrun{ 21 | #' # Get videos from the user "fpoe_at" since October 2024 22 | #' tt_user_videos_api("fpoe_at", since = "2024-10-01") 23 | #' 24 | #' # often makes sense to combine this with the account creation time from the 25 | #' # hidden URL 26 | #' fpoe_at_info <- tt_user_info_hidden(username = "fpoe_at") 27 | #' tt_user_videos_api("fpoe_at", since = fpoe_at_info$create_time) 28 | #' 29 | #' } 30 | #' @export 31 | tt_user_videos_api <- function(username, 32 | since = "2020-01-01", 33 | to = Sys.Date(), 34 | verbose = TRUE, 35 | ...) { 36 | 37 | dates_from <- seq.Date(from = as.Date(since), 38 | to = as.Date(to), 39 | by = "31 day") 40 | dates_to <- dates_from + 30 41 | # we want the last window to end today 42 | dates_to[length(dates_to)] <- as.Date(to) 43 | 44 | pb <- FALSE 45 | if (verbose) { 46 | pb <- list( 47 | format = "{cli::pb_spin} searching time window {cli::pb_current} of {cli::pb_total} | {cli::pb_percent} done | ETA: {cli::pb_eta}" 48 | ) 49 | } 50 | 51 | purrr::map2(dates_from, dates_to, function(from, to) { 52 | out <- query() |> 53 | query_or(field_name = "username", 54 | operation = "IN", 55 | field_values = username) |> 56 | tt_search_api(start_date = from, 57 | end_date = to, 58 | verbose = FALSE, 59 | ...) 60 | if (nrow(out) > 0) return(out) 61 | }, .progress = pb) |> 62 | dplyr::bind_rows() 63 | 64 | } 65 | -------------------------------------------------------------------------------- /R/auth_check.r: -------------------------------------------------------------------------------- 1 | #' Check whether you are authenticated 2 | #' 3 | #' @description \ifelse{html}{\figure{api-both.svg}{options: 4 | #' alt='[Works on: Both]'}}{\strong{[Works on: Both]}} 5 | #' 6 | #' Check if the necessary token or cookies are stored on your computer 7 | #' already. By default, the function checks for the authentication of the 8 | #' research and hidden API. To learn how you can authenticate, look at the 9 | #' vignette for the research (\code{vignette("research-api", package = 10 | #' "traktok")}) or hidden (\code{vignette("unofficial-api", package = 11 | #' "traktok")}) API. 12 | #' 13 | #' @param research,hidden turn check on/off for the research or hidden API. 14 | #' @param silent only return if check(s) were successful, no status on the 15 | #' screen 16 | #' 17 | #' @return logical vector (invisible) 18 | #' @export 19 | #' 20 | #' @examples 21 | #' auth_check() 22 | auth_check <- function(research = TRUE, hidden = TRUE, silent = FALSE) { 23 | auth <- vector() 24 | if (research) { 25 | if (!isFALSE(get_token(auth = FALSE))) { 26 | auth <- c(research = TRUE) 27 | if (!silent) cli::cli_alert_success("Research API authenticated") 28 | } 29 | } 30 | if (hidden) { 31 | cookies <- try(cookiemonster::get_cookies("^(www.)*tiktok.com")) 32 | if (is.data.frame(cookies) && "tt_chain_token" %in% cookies$name) { 33 | auth <- c(auth, hidden = TRUE) 34 | if (!silent) cli::cli_alert_success("Hidden API authenticated") 35 | } 36 | } 37 | invisible(auth) 38 | } 39 | -------------------------------------------------------------------------------- /R/auth_hidden.r: -------------------------------------------------------------------------------- 1 | #' Authenticate for the hidden/unofficial API 2 | #' 3 | #' @description Guides you through authentication for the hidden/unofficial API 4 | #' 5 | #' @param cookiefile path to your cookiefile. Usually not needed after running 6 | #' \link{auth_hidden} once. See \code{vignette("unofficial-api", package = 7 | #' "traktok")} for more information on authentication. 8 | #' @param live opens Chromium browser to guide you through the auth process 9 | #' (experimental). 10 | #' 11 | #' @return nothing. Called to set up authentication 12 | #' @export 13 | #' 14 | #' @examples 15 | #' \dontrun{ 16 | #' # to run through the steps of authentication 17 | #' auth_hidden() 18 | #' # or point to a cookie file directly 19 | #' auth_hidden("www.tiktok.com_cookies.txt") 20 | #' } 21 | auth_hidden <- function(cookiefile, live = interactive()) { 22 | 23 | if (!missing(cookiefile)) { 24 | cookiemonster::add_cookies(cookiefile) 25 | return(invisible(TRUE)) 26 | } 27 | msg <- paste0( 28 | "Supply either a cookiefile (see {.url https://jbgruber.github.io/traktok/", 29 | "articles/unofficial-api.html#authentication})" 30 | ) 31 | if (live && isTRUE(utils::askYesNo("Do you want to try live authentication using Chrome? (experimental)"))) { 32 | 33 | rlang::check_installed("rvest", reason = "to use this function", version = "1.0.4") 34 | 35 | sess <- rvest::read_html_live("https://www.tiktok.com/") 36 | # TODO: find way to click cookie banner 37 | # sess$click(".tiktok-cookie-banner>button") 38 | # sess$session$send_command('const button = document.querySelector("body > tiktok-cookie-banner").shadowRoot.querySelector("div > div.button-wrapper > button:nth-child(2)");') 39 | if (check_element_exists(sess, "#header-login-button")) { 40 | sess$click("#header-login-button") 41 | sess$view() 42 | } 43 | cli::cli_progress_bar(format = "{cli::pb_spin} Waiting for login", 44 | format_done = "Got cookies!") 45 | Sys.sleep(5) # give time to load login 46 | while (check_element_exists(sess, "#loginContainer")) { 47 | Sys.sleep(1 / 30) 48 | cli::cli_progress_update() 49 | } 50 | 51 | cli::cli_progress_done() 52 | cli::cli_alert_success("Got cookies!") 53 | cookiemonster::add_cookies(session = sess) 54 | return(invisible(TRUE)) 55 | } else { 56 | msg <- paste0(msg, " or set {.code live = TRUE} to use interactive authentication") 57 | } 58 | cli::cli_abort(msg) 59 | } 60 | 61 | 62 | check_element_exists <- function(sess, css) { 63 | res <- try(rvest::html_element(sess, css), silent = TRUE) 64 | if (methods::is(res, "try-error")) return(TRUE) 65 | return(length(rvest::html_element(sess, css)) > 0L) 66 | } 67 | 68 | -------------------------------------------------------------------------------- /R/auth_research.r: -------------------------------------------------------------------------------- 1 | #' Authenticate for the official research API 2 | #' 3 | #' @description 4 | #' Guides you through authentication for the Research API 5 | #' 6 | #' 7 | #' @param client_key Client key for authentication 8 | #' @param client_secret Client secret for authentication 9 | #' 10 | #' @returns An authentication token (invisible) 11 | #' 12 | #' @details You need to apply for access to the API and get the key 13 | #' and secret from TikTok. See 14 | #' \url{https://developers.tiktok.com/products/research-api/} for more 15 | #' information. 16 | #' 17 | #' 18 | #' @export 19 | #' 20 | #' @examples 21 | #' \dontrun{ 22 | #' auth_research(client_key, client_secret) 23 | #' } 24 | auth_research <- function(client_key, client_secret) { 25 | 26 | if (missing(client_key)) { 27 | cli::cli_alert_info(c("Head to {.url https://developers.tiktok.com/research}", 28 | " to get your credentials")) 29 | client_key <- askpass::askpass("Please enter your client key") 30 | } 31 | 32 | if (missing(client_secret)) 33 | client_secret <- askpass::askpass("Please enter your client secret") 34 | 35 | token <- req_token(client_key, client_secret) 36 | 37 | token$access_token <- httr2::obfuscated(token$access_token) 38 | token$access_token_expires <- Sys.time() + token$expires_in 39 | 40 | # attach for refresh 41 | token$client_key <- enc(client_key) 42 | token$client_secret <- enc(client_secret) 43 | 44 | f <- Sys.getenv("TIKTOK_TOKEN", unset = "token.rds") 45 | p <- tools::R_user_dir("traktok", "cache") 46 | dir.create(p, showWarnings = FALSE, recursive = TRUE) 47 | # store in cache 48 | rlang::env_poke(env = the, nm = "tiktok_token", value = token, create = TRUE) 49 | 50 | httr2::secret_write_rds(x = token, path = file.path(p, f), 51 | key = I(rlang::hash("traktok"))) 52 | 53 | cli::cli_alert_success("Succesfully authenticated!") 54 | invisible(token) 55 | } 56 | 57 | 58 | req_token <- function(client_key, client_secret) { 59 | 60 | if (methods::is(client_key, "raw")) client_key <- dec(client_key) 61 | if (methods::is(client_secret, "raw")) client_secret <- dec(client_secret) 62 | 63 | # https://developers.tiktok.com/doc/client-access-token-management 64 | resp <- httr2::request("https://open.tiktokapis.com/v2/oauth/token/") |> 65 | httr2::req_method("POST") |> 66 | httr2::req_headers( 67 | "Content-Type" = "application/x-www-form-urlencoded", 68 | "Cache-Control" = "no-cache") |> 69 | httr2::req_body_form( 70 | "client_key" = client_key, 71 | "client_secret" = client_secret, 72 | "grant_type" = "client_credentials" 73 | ) |> 74 | httr2::req_error(is_error = function(x) FALSE) |> 75 | httr2::req_perform() |> 76 | httr2::resp_body_json() 77 | 78 | if (!is.null(resp$error)) 79 | cli::cli_abort("Request failed with {.emph {resp$error}}: {.emph {resp$error_description}}") 80 | 81 | invisible(resp) 82 | } 83 | 84 | 85 | get_token <- function(auth = TRUE) { 86 | 87 | f <- file.path(tools::R_user_dir("traktok", "cache"), 88 | Sys.getenv("TIKTOK_TOKEN", unset = "token.rds")) 89 | 90 | if (rlang::env_has(the, nms = "tiktok_token")) { 91 | token <- rlang::env_get(the, nm = "tiktok_token", I(rlang::hash("traktok"))) 92 | } else if (file.exists(f)) { 93 | token <- httr2::secret_read_rds(f, I(rlang::hash("traktok"))) 94 | } else if (auth) { 95 | token <- auth_research() 96 | } else return(FALSE) 97 | 98 | # refresh token if expired 99 | if (token$access_token_expires <= Sys.time() + 5) { 100 | token <- auth_research(client_key = dec(token$client_key), 101 | client_secret = dec(token$client_secret)) 102 | } 103 | 104 | return(token) 105 | } 106 | 107 | 108 | #' encrypt a single element 109 | #' @noRd 110 | enc <- function(x, key = NULL) { 111 | if (is.null(key)) key <- openssl::sha256(charToRaw(Sys.getenv("COOKIE_KEY", unset = "supergeheim"))) 112 | openssl::aes_ctr_encrypt(charToRaw(x), key) 113 | } 114 | 115 | #' decrypt a single element 116 | #' @noRd 117 | dec <- function(x, key = NULL) { 118 | if (is.null(key)) key <- openssl::sha256(charToRaw(Sys.getenv("COOKIE_KEY", unset = "supergeheim"))) 119 | rawToChar(openssl::aes_ctr_decrypt(x, key)) 120 | } 121 | 122 | -------------------------------------------------------------------------------- /R/last_.r: -------------------------------------------------------------------------------- 1 | #' Retrieve most recent query 2 | #' 3 | #' If \code{tt_search_api} or \code{tt_comments_api} fail after already getting 4 | #' several pages, you can use this function to get all videos that have been 5 | #' retrieved so far from memory. Does not work when the session has crashed. In 6 | #' that case, look in \code{tempdir()} for an RDS file as a last resort. 7 | #' 8 | #' @return a list of unparsed videos 9 | #' @export 10 | last_query <- function() { 11 | q <- the$videos 12 | out <- try(parse_api_search(q), silent = TRUE) 13 | if (methods::is(out, "try-error")) { 14 | attr(q, "search_id") <- the$search_id 15 | attr(out, "cursor") <- the$cursor 16 | return(q) 17 | } 18 | return(out) 19 | } 20 | 21 | 22 | #' @rdname last_query 23 | #' @export 24 | last_comments <- function() { 25 | the$comments 26 | } 27 | 28 | -------------------------------------------------------------------------------- /R/parse_hidden.r: -------------------------------------------------------------------------------- 1 | #' @noRd 2 | parse_video <- function(json_string, video_id) { 3 | 4 | tt_data <- jsonlite::fromJSON(json_string) 5 | 6 | video_url <- attr(json_string, "url_full") 7 | html_status <- attr(json_string, "html_status") 8 | 9 | video_data <- purrr::pluck(tt_data, "ItemModule") 10 | 11 | if (!is.null(video_data)) { 12 | video_timestamp <- purrr::pluck(video_data, video_id, "createTime", 13 | .default = NA_character_) |> 14 | as.integer() |> 15 | as.POSIXct(tz = "UTC", origin = "1970-01-01") 16 | 17 | return(tibble::tibble( 18 | video_id = video_id, 19 | video_url = video_url, 20 | video_timestamp = video_timestamp, 21 | video_length = spluck(video_data, video_id, "video", "duration"), 22 | video_title = spluck(video_data, video_id, "desc"), 23 | video_locationcreated = spluck(video_data, video_id, "locationCreated"), 24 | video_diggcount = spluck(video_data, video_id, "stats", "diggCount"), 25 | video_sharecount = spluck(video_data, video_id, "stats", "shareCount"), 26 | video_commentcount = spluck(video_data, video_id, "stats", "commentCount"), 27 | video_playcount = spluck(video_data, video_id, "stats", "playCount"), 28 | author_username = spluck(video_data, video_id, "author"), 29 | author_nickname = spluck(tt_data, "UserModule", "users", 1, "nickname"), 30 | author_bio = spluck(tt_data, "UserModule", "users", 1, "signature"), 31 | download_url = spluck(video_data, video_id, "video", "downloadAddr"), 32 | html_status = html_status, 33 | music = list(spluck(video_data, video_id, "music")), 34 | challenges = list(spluck(video_data, video_id, "challenges")), 35 | is_classified = isTRUE(spluck(video_data, video_id, "isContentClassified")), 36 | video_status = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusMsg"), 37 | video_status_code = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusCode") 38 | )) 39 | } 40 | 41 | video_data <- purrr::pluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "itemInfo", "itemStruct") 42 | 43 | if (!is.null(video_data)) { 44 | video_timestamp <- purrr::pluck(video_data, "createTime", 45 | .default = NA_character_) |> 46 | as.integer() |> 47 | as.POSIXct(tz = "UTC", origin = "1970-01-01") 48 | 49 | out <- tibble::tibble( 50 | video_id = video_id, 51 | video_url = video_url, 52 | video_timestamp = video_timestamp, 53 | video_length = spluck(video_data, "video", "duration"), 54 | video_title = spluck(video_data, "desc"), 55 | video_locationcreated = spluck(video_data, "locationCreated"), 56 | video_diggcount = spluck(video_data, "stats", "diggCount"), 57 | video_sharecount = spluck(video_data, "stats", "shareCount"), 58 | video_commentcount = spluck(video_data, "stats", "commentCount"), 59 | video_playcount = spluck(video_data, "stats", "playCount"), 60 | author_id = spluck(video_data, "author", "id"), 61 | author_secuid = spluck(video_data, "author", "secUid"), 62 | author_username = spluck(video_data, "author", "uniqueId"), 63 | author_nickname = spluck(video_data, "author", "nickname"), 64 | author_bio = spluck(video_data, "author", "signature"), 65 | download_url = spluck(video_data, "video", "downloadAddr"), 66 | html_status = html_status, 67 | music = list(spluck(video_data, "music")), 68 | challenges = list(spluck(video_data, "challenges")), 69 | is_secret = isTRUE(spluck(video_data, "secret")), 70 | is_for_friend = isTRUE(spluck(video_data, "forFriend")), 71 | is_slides = FALSE, 72 | video_status = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusMsg"), 73 | video_status_code = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusCode"), 74 | content_classified = purrr::pluck(video_data, "isContentClassified", .default = FALSE) 75 | ) 76 | 77 | if (identical(out$download_url, "")) { 78 | out$download_url <- spluck(video_data, "video", "playAddr") 79 | } 80 | 81 | if (identical(out$download_url, "")) { 82 | out$download_url <- purrr::pluck(video_data, "imagePost", "images", "imageURL", "urlList") |> 83 | purrr::map_chr(1L) |> 84 | toString() 85 | out$is_slides <- TRUE 86 | } 87 | 88 | } else { 89 | out <- tibble::tibble( 90 | video_id = video_id, 91 | video_url = video_url, 92 | video_timestamp = NA, 93 | video_length = NA, 94 | video_title = NA, 95 | video_locationcreated = NA, 96 | video_diggcount = NA, 97 | video_sharecount = NA, 98 | video_commentcount = NA, 99 | video_playcount = NA, 100 | author_id = NA, 101 | author_secuid = NA, 102 | author_username = NA, 103 | author_nickname = NA, 104 | author_bio = NA, 105 | download_url = NA, 106 | html_status = html_status, 107 | music = NA, 108 | challenges = NA, 109 | is_secret = NA, 110 | is_for_friend = NA, 111 | is_slides = NA, 112 | video_status = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusMsg"), 113 | video_status_code = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusCode") 114 | ) 115 | cli::cli_warn("No video data found") 116 | } 117 | return(out) 118 | } 119 | 120 | 121 | #' @noRd 122 | parse_search <- function(res) { 123 | if (length(purrr::pluck(res, "body")) == 0L) 124 | cli::cli_abort("Unfortunalty, the search endpoint has changed and returns empty results. See {.url https://github.com/JBGruber/traktok/issues/14}.") 125 | 126 | tt_data <- res |> 127 | httr2::resp_body_json() 128 | 129 | tt_videos <- spluck(tt_data, "data") 130 | 131 | author_name <- vpluck(tt_videos, "item", "author", "uniqueId") 132 | video_id <- vpluck(tt_videos, "item", "id") 133 | video_url <- glue::glue("https://www.tiktok.com/@{author_name}/video/{video_id}") 134 | video_timestamp <- vpluck(tt_videos, "item", "createTime", val = "integer") |> 135 | as.integer() |> 136 | as.POSIXct(tz = "UTC", origin = "1970-01-01") 137 | 138 | out <- tibble::tibble( 139 | video_id = video_id, 140 | video_timestamp = video_timestamp, 141 | video_url = video_url, 142 | video_length = vpluck(tt_videos, "item", "video", "duration", val = "integer"), 143 | video_title = vpluck(tt_videos, "item", "desc"), 144 | video_diggcount = vpluck(tt_videos, "item", "stats", "diggCount", val = "integer"), 145 | video_sharecount = vpluck(tt_videos, "item", "stats", "shareCount", val = "integer"), 146 | video_commentcount = vpluck(tt_videos, "item", "stats", "commentCount", val = "integer"), 147 | video_playcount = vpluck(tt_videos, "item", "stats", "playCount", val = "integer"), 148 | video_is_ad = vpluck(tt_videos, "item", "isAd", val = "logical"), 149 | author_name = vpluck(tt_videos, "item", "author", "uniqueId"), 150 | author_nickname = vpluck(tt_videos, "item", "author", "nickname"), 151 | author_followercount = vpluck(tt_videos, "item", "authorStats", "followerCount", val = "integer"), 152 | author_followingcount = vpluck(tt_videos, "item", "authorStats", "followingCount", val = "integer"), 153 | author_heartcount = vpluck(tt_videos, "item", "authorStats", "heartCount", val = "integer"), 154 | author_videocount = vpluck(tt_videos, "item", "authorStats", "videoCount", val = "integer"), 155 | author_diggcount = vpluck(tt_videos, "item", "authorStats", "diggCount", val = "integer"), 156 | music = vpluck(tt_videos, "item", "music", val = "list"), 157 | challenges = vpluck(tt_videos, "item", "challenges", val = "list"), 158 | download_url = vpluck(tt_videos, "item", "video", "downloadAddr") 159 | ) 160 | 161 | attr(out, "cursor") <- purrr::pluck(tt_data, "cursor", .default = NA) 162 | attr(out, "search_id") <- purrr::pluck(tt_data, "log_pb", "impr_id", .default = NA) 163 | attr(out, "has_more") <- as.logical(purrr::pluck(tt_data, "has_more", .default = FALSE)) 164 | 165 | return(out) 166 | } 167 | 168 | 169 | #' @noRd 170 | #' @importFrom rlang .data 171 | parse_user <- function(user_data) { 172 | 173 | user_info <- spluck(user_data, "__DEFAULT_SCOPE__", "webapp.user-detail", "userInfo") 174 | 175 | user_info |> 176 | purrr::keep_at(c("user", "stats")) |> 177 | purrr::list_flatten(name_spec = "{inner}") |> 178 | purrr::list_flatten() |> 179 | as_tibble_onerow(.name_repair = clean_names) |> 180 | # for minimal backwards compatibility 181 | dplyr::rename(user_name = .data$unique_id, 182 | secUid = .data$sec_uid) |> 183 | dplyr::mutate(create_time = as_datetime(.data$create_time), 184 | nick_name_modify_time = as_datetime(.data$nick_name_modify_time)) 185 | 186 | } 187 | 188 | #' @noRd 189 | parse_followers <- function(follower_data) { 190 | 191 | purrr::map(follower_data, function(f) { 192 | dplyr::bind_cols(f$user, f$stats) 193 | }) |> 194 | dplyr::bind_rows() 195 | 196 | } 197 | -------------------------------------------------------------------------------- /R/parse_research.r: -------------------------------------------------------------------------------- 1 | #' @noRd 2 | parse_api_search <- function(x) { 3 | 4 | out <- tibble::tibble( 5 | video_id = vpluck(x, "video_id", val = "character"), 6 | author_name = vpluck(x, "username", val = "character"), 7 | view_count = vpluck(x, "view_count", val = "integer"), 8 | comment_count = vpluck(x, "comment_count", val = "integer"), 9 | share_count = vpluck(x, "share_count", val = "integer"), 10 | like_count = vpluck(x, "like_count", val = "integer"), 11 | region_code = vpluck(x, "region_code", val = "character"), 12 | create_time = as.POSIXct(vpluck(x, "create_time", val = "integer"), 13 | tz = "UTC", origin = "1970-01-01"), 14 | effect_ids = vpluck(x, "effect_ids", val = "list"), 15 | music_id = purrr::map_chr(x, function(i) 16 | purrr::pluck(i, "music_id", .default = NA_character_)), 17 | video_description = vpluck(x, "video_description", val = "character"), 18 | hashtag_names = vpluck(x, "hashtag_names", val = "list"), 19 | voice_to_text = vpluck(x, "voice_to_text", val = "character"), 20 | ) 21 | 22 | out$video_id <- ifelse(is.na(out$video_id), 23 | vpluck(x, "id", val = "character"), 24 | out$video_id) 25 | if (nrow(out) == 1L && is.na(out$video_id)) { 26 | out <- out[-1, ] 27 | } 28 | return(out) 29 | } 30 | 31 | 32 | #' @noRd 33 | parse_api_comments <- function(x) { 34 | 35 | out <- x |> 36 | dplyr::bind_rows() 37 | 38 | class(out) <- c("tt_results", class(out)) 39 | 40 | return(out) 41 | } 42 | 43 | 44 | #' @title Print search result 45 | #' @description Print a traktok search results 46 | #' @param x An object of class \code{tt_results} 47 | #' @param ... not used. 48 | #' @export 49 | print.tt_results <- function(x, ...) { 50 | cli::cat_rule(paste("search id:", cli::col_red(attr(x, "search_id")))) 51 | print(tibble::as_tibble(x)) 52 | } 53 | -------------------------------------------------------------------------------- /R/query_research.r: -------------------------------------------------------------------------------- 1 | #' Create a traktok query 2 | #' 3 | #' Create a traktok query from the given parameters. 4 | #' 5 | #' @param and,or,not A list of AND/OR/NOT conditions. Must contain one 6 | #' or multiple lists with \code{field_name}, \code{operation}, and 7 | #' \code{field_values} each (see example). 8 | #' @param q A traktok query created with \code{query}. 9 | #' @param field_name The field name to query against. One of: 10 | #' "create_date", "username", "region_code", "video_id", 11 | #' "hashtag_name", "keyword", "music_id", "effect_id", 12 | #' "video_length". 13 | #' @param operation One of: "EQ", "IN", "GT", "GTE", "LT", "LTE". 14 | #' @param field_values A vector of values to search for. 15 | #' 16 | #' @details TikTok's query consists of rather complicated lists 17 | #' dividing query elements into AND, OR and NOT: 18 | #' 19 | #' - **and**: The and conditions specify that all the conditions in the list must be met 20 | #' - **or**: The or conditions specify that at least one of the conditions in the list must be met 21 | #' - **not**: The not conditions specify that none of the conditions in the list must be met 22 | #' 23 | #' The query can be constructed by writing the list for each entry 24 | #' yourself, like in the first example. Alternatively, traktok 25 | #' provides convenience functions to build up a query using 26 | #' \code{query_and}, \code{query_or}, and \code{query_not}, which 27 | #' make building a query a little easier. You can learn more at 28 | #' . 29 | #' 30 | #' @return A traktok query. 31 | #' 32 | #' @export 33 | #' 34 | #' @examples 35 | #' \dontrun{ 36 | #' # using query directly and supplying the list 37 | #' query(or = list( 38 | #' list( 39 | #' field_name = "hashtag_name", 40 | #' operation = "EQ", 41 | #' field_values = "rstats" 42 | #' ), 43 | #' list( 44 | #' field_name = "keyword", 45 | #' operation = "EQ", 46 | #' field_values = list("rstats", "API") 47 | #' ) 48 | #' )) 49 | #' # starting an empty query and building it up using the query_* functions 50 | #' query() |> 51 | #' query_or(field_name = "hashtag_name", 52 | #' operation = "EQ", 53 | #' field_values = "rstats") |> 54 | #' query_or(field_name = "keyword", 55 | #' operation = "IN", 56 | #' field_values = c("rstats", "API")) 57 | #' } 58 | #' 59 | #' @md 60 | query <- function(and = NULL, or = NULL, not = NULL) { 61 | q <- list(and = and, or = or, not = not) 62 | class(q) <- "traktok_query" 63 | return(clean_query(q)) 64 | } 65 | 66 | 67 | #' @rdname query 68 | #' @export 69 | query_and <- function(q, field_name, operation, field_values) { 70 | if (!is_query(q)) 71 | cli::cli_abort("{.fn query_and} needs a query as input") 72 | 73 | # TODO: is this really the best way to append the list? 74 | q$and[[length(q$and) + 1]] <- list(field_name = field_name, 75 | operation = operation, 76 | field_values = as.list(field_values)) 77 | 78 | return(clean_query(q)) 79 | } 80 | 81 | 82 | #' @rdname query 83 | #' @export 84 | query_or <- function(q, field_name, operation, field_values) { 85 | if (!is_query(q)) 86 | cli::cli_abort("{.fn query_or} needs a query as input") 87 | 88 | q$or[[length(q$or) + 1]] <- list(field_name = field_name, 89 | operation = operation, 90 | field_values = as.list(field_values)) 91 | 92 | return(clean_query(q)) 93 | } 94 | 95 | 96 | #' @rdname query 97 | #' @export 98 | query_not <- function(q, field_name, operation, field_values) { 99 | if (!is_query(q)) 100 | cli::cli_abort("{.fn query_not} needs a query as input") 101 | 102 | q$not[[length(q$not) + 1]] <- list(field_name = field_name, 103 | operation = operation, 104 | field_values = as.list(field_values)) 105 | 106 | return(clean_query(q)) 107 | } 108 | 109 | 110 | is_query <- function(q) { 111 | methods::is(q, "traktok_query") 112 | } 113 | 114 | 115 | # make sure query only consists of valid entries 116 | clean_query <- function(q) { 117 | 118 | for (o in names(q)) { 119 | q[[o]][purrr::map_int(q[[o]], length) != 3] <- NULL 120 | q[!purrr::map_int(q, length) > 0] <- NULL 121 | } 122 | 123 | return(q) 124 | } 125 | 126 | 127 | #' @title Print a traktok query 128 | #' @description Print a traktok query as a tree 129 | #' @param x An object of class \code{traktok_query} 130 | #' @param ... Additional arguments passed to \code{lobstr::tree} 131 | #' @export 132 | #' @examples 133 | #' query() |> 134 | #' query_and(field_name = "hashtag_name", 135 | #' operation = "EQ", 136 | #' field_values = "rstats") |> 137 | #' print() 138 | print.traktok_query <- function(x, ...) { 139 | lobstr::tree(as.list(x), ...) 140 | } 141 | -------------------------------------------------------------------------------- /R/shorthands.r: -------------------------------------------------------------------------------- 1 | #' Search videos 2 | #' 3 | #' @description \ifelse{html}{\figure{api-both.svg}{options: 4 | #' alt='[Works on: Both]'}}{\strong{[Works on: Both]}} 5 | #' 6 | #' Searches videos using either the Research API (if an authentication token 7 | #' is present, see \link{auth_research}) or otherwise the unofficial hidden 8 | #' API. See \link{tt_search_api} or \link{tt_search_hidden} respectively for 9 | #' information about these functions. 10 | #' 11 | #' @param ... arguments passed to \link{tt_search_api} or 12 | #' \link{tt_search_hidden}. To use the research API, include \code{token} 13 | #' (e.g., \code{token = NULL}). 14 | #' 15 | #' @return a data.frame 16 | #' @export 17 | tt_search <- function(...) { 18 | 19 | params <- list(...) 20 | token <- params$token 21 | params$token <- NULL 22 | if (is.null(token)) token <- get_token(auth = FALSE) 23 | if (isFALSE(token)) { 24 | tt_search_hidden(...) 25 | } else { 26 | tt_search_api(..., token) 27 | } 28 | 29 | } 30 | 31 | 32 | #' Get videos from a TikTok user's profile 33 | #' 34 | #' @description \ifelse{html}{\figure{api-both.svg}{options: 35 | #' alt='[Works on: Both]'}}{\strong{[Works on: Both]}} 36 | #' 37 | #' Get all videos posted by a user (or multiple user's for the Research API). 38 | #' Searches videos using either the Research API (if an authentication token 39 | #' is present, see \link{auth_research}) or otherwise the unofficial hidden 40 | #' API. See \link{tt_user_videos_api} or \link{tt_user_videos_hidden} respectively for 41 | #' information about these functions. 42 | #' 43 | #' @param username The username or usernames whose videos you want to retrieve. 44 | #' @param ... Additional arguments to be passed to the \code{\link{tt_user_videos_hidden}} or 45 | #' \code{\link{tt_user_videos_api}} function. 46 | #' 47 | #' @examples 48 | #' \dontrun{ 49 | #' # Get hidden videos from the user "fpoe_at" 50 | #' tt_user_videos("fpoe_at") 51 | #' } 52 | #' @export 53 | tt_user_videos <- function(username, ...) { 54 | params <- list(...) 55 | token <- params$token 56 | params$token <- NULL 57 | if (is.null(token)) token <- get_token(auth = FALSE) 58 | if (isFALSE(token)) { 59 | tt_search_hidden(username, ...) 60 | } else { 61 | tt_user_videos_api(username, ...) 62 | } 63 | } 64 | 65 | 66 | #' @rdname tt_videos_hidden 67 | #' @export 68 | tt_videos <- function(...) { 69 | # mainly here in case the research API gains the ability to dowload videos 70 | tt_videos_hidden(...) 71 | } 72 | 73 | 74 | #' @rdname tt_user_info_api 75 | #' @export 76 | tt_user_info <- tt_user_info_api 77 | 78 | 79 | #' @rdname tt_playlist_api 80 | #' @export 81 | tt_playlist <- tt_playlist_api 82 | 83 | 84 | #' @rdname tt_user_liked_videos_api 85 | #' @export 86 | tt_get_liked <- tt_user_liked_videos_api 87 | 88 | 89 | #' @rdname tt_user_reposted_api 90 | #' @export 91 | tt_get_reposted <- tt_user_reposted_api 92 | 93 | 94 | #' @rdname tt_user_pinned_videos_api 95 | #' @export 96 | tt_get_pinned <- tt_user_pinned_videos_api 97 | 98 | 99 | #' @rdname tt_comments_api 100 | #' @export 101 | tt_comments <- tt_comments_api 102 | 103 | 104 | #' Get followers and following of users 105 | #' 106 | #' @description \ifelse{html}{\figure{api-both.svg}{options: 107 | #' alt='[Works on: Both]'}}{\strong{[Works on: Both]}} 108 | #' 109 | #' Get usernames of users who follows a user (tt_get_follower) or get who a 110 | #' user is following (tt_get_following). 111 | #' 112 | #' @param ... arguments passed to \link{tt_user_follower_api} or 113 | #' \link{tt_get_follower_hidden}. To use the research API, include \code{token} 114 | #' (e.g., \code{token = NULL}). 115 | #' 116 | #' @return a data.frame 117 | #' @export 118 | tt_get_follower <- function(...) { 119 | 120 | params <- list(...) 121 | token <- params$token 122 | params$token <- NULL 123 | if (is.null(token)) token <- get_token(auth = FALSE) 124 | if (isFALSE(token)) { 125 | tt_get_follower_hidden(...) 126 | } else { 127 | tt_user_follower_api(..., token) 128 | } 129 | 130 | } 131 | 132 | 133 | #' @rdname tt_get_follower 134 | #' @export 135 | tt_get_following <- function(...) { 136 | 137 | params <- list(...) 138 | token <- params$token 139 | params$token <- NULL 140 | if (is.null(token)) token <- get_token(auth = FALSE) 141 | if (isFALSE(token)) { 142 | tt_get_following_hidden(...) 143 | } else { 144 | tt_user_following_api(..., token) 145 | } 146 | 147 | } 148 | 149 | 150 | #' Get json file from a TikTok URL 151 | #' 152 | #' This function was replaced by \code{tt_request_hidden()}. 153 | #' 154 | #' @param ... \code{tt_request_hidden()}. 155 | #' @export 156 | tt_json <- function(...) { 157 | 158 | cli::cli_warn("This function has been replaced by {.fn tt_request_hidden}") 159 | tt_request_hidden(...) 160 | } 161 | 162 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | the <- new.env() 2 | 3 | # base function for extracting regex 4 | #' @noRd 5 | extract_regex <- function(str, pattern) { 6 | regmatches( 7 | str, 8 | regexpr(pattern, str, perl = TRUE) 9 | ) 10 | } 11 | 12 | 13 | # check if selected directory exists 14 | #' @noRd 15 | check_dir <- function(dir, name) { 16 | if (!is.null(dir)) { 17 | if (!dir.exists(dir)) { 18 | msg <- paste0("The selected `", name, 19 | "` directory does not exist.") 20 | if (utils::askYesNo(paste(msg, "Do you want to create it?"))) { 21 | dir.create(dir, showWarnings = FALSE) 22 | } else { 23 | stop(msg) 24 | } 25 | } 26 | } 27 | } 28 | 29 | 30 | #' @noRd 31 | wait <- function(sleep_pool, verbose = TRUE) { 32 | sleep <- stats::runif(1) * sample(sleep_pool, 1L) 33 | if (verbose) cli::cli_progress_message("\U23F2 waiting {round(sleep, 1)} seconds", current = FALSE) 34 | Sys.sleep(sleep) 35 | } 36 | 37 | 38 | # vectorised safe pluck 39 | #' @noRd 40 | vpluck <- function(x, ..., val = "character") { 41 | dots <- list(...) 42 | switch( 43 | val, 44 | "character" = { 45 | def <- NA_character_ 46 | val <- character(1) 47 | }, 48 | "integer" = { 49 | def <- NA_integer_ 50 | val <- integer(1) 51 | }, 52 | "double" = { 53 | def <- NA_integer_ 54 | val <- numeric(1) 55 | }, 56 | "logical" = { 57 | def <- NA 58 | val <- logical(1) 59 | }, 60 | "list" = { 61 | val <- list() 62 | } 63 | ) 64 | if (!is.list(val)) { 65 | vapply(x, purrr::pluck, !!!dots, .default = def, FUN.VALUE = val) 66 | } else { 67 | purrr::map(x, purrr::pluck, !!!dots) 68 | } 69 | } 70 | 71 | # safe pluck 72 | #' @noRd 73 | spluck <- function(.x, ...) { 74 | purrr::pluck(.x, ..., .default = NA) 75 | } 76 | 77 | 78 | # makes sure list can be turned into tibble 79 | as_tibble_onerow <- function(l, ...) { 80 | l <- purrr::map(l, function(c) { 81 | if (length(c) != 1) { 82 | return(list(c)) 83 | } 84 | return(c) 85 | }) 86 | tibble::as_tibble(l, ...) 87 | } 88 | 89 | 90 | is_datetime <- function(x) { 91 | methods::is(x, "POSIXct") + 92 | methods::is(x, "POSIXlt") + 93 | methods::is(x, "Date") > 0 94 | } 95 | 96 | as_datetime <- function(x) { 97 | # TikTok returns 0 for missing 98 | if (all(x > 0)) { 99 | as.POSIXct(x, origin = "1970-01-01") 100 | } else { 101 | NA 102 | } 103 | } 104 | 105 | id2url <- function(x) { 106 | if (!is.character(x)) { 107 | cli::cli_abort("You need to supply a character vector of video URLs or IDs") 108 | } 109 | x[!grepl("\\D", x)] <- paste0("https://www.tiktok.com/@/video/", x[!grepl("\\D", x)]) 110 | return(x) 111 | } 112 | 113 | 114 | clean_names <- function(x) { 115 | gsub(pattern = "([A-Z])", replacement = "_\\L\\1", x = x, perl = TRUE) 116 | } 117 | 118 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # traktok 17 | 18 | 19 | [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-green.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) 20 | [![R-CMD-check](https://github.com/JBGruber/traktok/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/JBGruber/traktok/actions/workflows/R-CMD-check.yaml) 21 | [![Codecov test coverage](https://codecov.io/gh/JBGruber/traktok/branch/main/graph/badge.svg)](https://codecov.io/gh/JBGruber/traktok?branch=main) 22 | [![say-thanks](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/JBGruber) 23 | 24 | 25 | ## Feature overview 26 | 27 | | Description | Shorthand | Research API | Hidden API | 28 | |:----------------------------|:-----------------|:--------------------------|:------------------------| 29 | | search videos | tt_search | tt_search_api | tt_search_hidden[^1] | 30 | | get video detail (+file) | tt_videos | \- | tt_videos_hidden | 31 | | get user videos | tt_user_videos | tt_user_videos_api | tt_user_videos_hidden | 32 | | get user info | tt_user_info | tt_user_info_api | tt_user_info_hidden | 33 | | get comments under a video | tt_comments | tt_comments_api | \- | 34 | | get who follows a user | tt_get_follower | tt_user_follower_api | tt_get_follower_hidden | 35 | | get who a user is following | tt_get_following | tt_user_following_api | tt_get_following_hidden | 36 | | get videos a user liked | tt_get_liked | tt_user_liked_videos_api | \- | 37 | | get pinned videos of users | tt_get_pinned | tt_user_pinned_videos_api | \- | 38 | | get videos in a playlist | tt_playlist | tt_playlist_api | \- | 39 | | get raw post data | \- | \- | tt_request_hidden | 40 | | authenticate a session | \- | auth_research | auth_hidden | 41 | 42 | [^1]: Currently not working, see [#14](https://github.com/JBGruber/traktok/issues/14). 43 | 44 | 45 | The goal of traktok is to provide easy access to TikTok data. This package one started as an R port of Deen Freelon's [Pyktok](https://github.com/dfreelon/pyktok) Python module (though it is a complete rewrite without Python dependencies). 46 | It now covers functions from the secret hidden API that TikTok is using to show/search/play videos on their Website and the official [Research API](https://developers.tiktok.com/products/research-api/). 47 | Since the Research API misses some important features (and since not everyone has access to it) it can often make sense to still use the hidden API that mocks requests from a browser. 48 | However, an important disclaimer for the hidden API applies: 49 | 50 | > This program may stop working suddenly if TikTok changes how it stores its data ([see Freelon, 2018](https://osf.io/preprints/socarxiv/56f4q/)). 51 | 52 | However, the last times, it was fixed rather quickly (e.g., #12). 53 | 54 | ## Installation 55 | 56 | You can install the development version of traktok from [GitHub](https://github.com/) with: 57 | 58 | ``` r 59 | # install.packages("remotes") 60 | remotes::install_github("JBGruber/traktok") 61 | ``` 62 | 63 | ## In Research 64 | 65 | The research papers and projects below have used traktok to gather their data: 66 | 67 | 1. Hohner, J., Kakavand, A., & Rothut, S. (2024). Analyzing Radical Visuals at Scale: How Far-Right Groups Mobilize on TikTok. Journal of Digital Social Research, 6(1), 10–30. https://doi.org/10.33621/jdsr.v6i1.200 68 | 1. Bach, P., Gitomer, A., Devries, M., Walker, C., Deyoe, D., Atienza-Bathelemy, J., Foucault Welles, B., Freelon, D., & Zulli, D. (2023, October). Stitching Politics and Identity on TikTok. Panel presented at AoIR2023: The 24th Annual Conference of the Association of Internet Researchers. Philadelphia, PA, USA: AoIR. Retrieved from http://spir.aoir.org 69 | 1. Wirz, D. S., Zai, F., Vogler, D., Urman, A., & Eisenegger, M. (2023). Die Qualität von Schweizer Medien auf Instagram und TikTok. https://doi.org/10.5167/UZH-238605 70 | 1. Giglietto, F. (2024). Dashboard: TikTok Coordinated Sharing Network. https://fabiogiglietto.github.io/tiktok_csbn/tt_viz.html 71 | 1. Widholm, A., Ekman, M., & Larsson, A. O. (2024). A Right-Wing Wave on TikTok? Ideological Orientations, Platform Features, and User Engagement During the Early 2022 Election Campaign in Sweden. Social Media + Society, 10(3). 72 | 73 | If you have used traktok in your research paper or project, please extend this list through a Pull Request or create an issue 74 | 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # traktok 5 | 6 | 7 | 8 | [![Lifecycle: 9 | stable](https://img.shields.io/badge/lifecycle-stable-green.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) 10 | [![R-CMD-check](https://github.com/JBGruber/traktok/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/JBGruber/traktok/actions/workflows/R-CMD-check.yaml) 11 | [![Codecov test 12 | coverage](https://codecov.io/gh/JBGruber/traktok/branch/main/graph/badge.svg)](https://codecov.io/gh/JBGruber/traktok?branch=main) 13 | [![say-thanks](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/JBGruber) 14 | 15 | 16 | ## Feature overview 17 | 18 | | Description | Shorthand | Research API | Hidden API | 19 | |:---|:---|:---|:---| 20 | | search videos | tt_search | tt_search_api | tt_search_hidden[^1] | 21 | | get video detail (+file) | tt_videos | \- | tt_videos_hidden | 22 | | get user videos | tt_user_videos | tt_user_videos_api | tt_user_videos_hidden | 23 | | get user info | tt_user_info | tt_user_info_api | tt_user_info_hidden | 24 | | get comments under a video | tt_comments | tt_comments_api | \- | 25 | | get who follows a user | tt_get_follower | tt_user_follower_api | tt_get_follower_hidden | 26 | | get who a user is following | tt_get_following | tt_user_following_api | tt_get_following_hidden | 27 | | get videos a user liked | tt_get_liked | tt_user_liked_videos_api | \- | 28 | | get pinned videos of users | tt_get_pinned | tt_user_pinned_videos_api | \- | 29 | | get videos in a playlist | tt_playlist | tt_playlist_api | \- | 30 | | get raw post data | \- | \- | tt_request_hidden | 31 | | authenticate a session | \- | auth_research | auth_hidden | 32 | 33 | The goal of traktok is to provide easy access to TikTok data. This 34 | package one started as an R port of Deen Freelon’s 35 | [Pyktok](https://github.com/dfreelon/pyktok) Python module (though it is 36 | a complete rewrite without Python dependencies). It now covers functions 37 | from the secret hidden API that TikTok is using to show/search/play 38 | videos on their Website and the official [Research 39 | API](https://developers.tiktok.com/products/research-api/). Since the 40 | Research API misses some important features (and since not everyone has 41 | access to it) it can often make sense to still use the hidden API that 42 | mocks requests from a browser. However, an important disclaimer for the 43 | hidden API applies: 44 | 45 | > This program may stop working suddenly if TikTok changes how it stores 46 | > its data ([see Freelon, 47 | > 2018](https://osf.io/preprints/socarxiv/56f4q/)). 48 | 49 | However, the last times, it was fixed rather quickly (e.g., \#12). 50 | 51 | ## Installation 52 | 53 | You can install the development version of traktok from 54 | [GitHub](https://github.com/) with: 55 | 56 | ``` r 57 | # install.packages("remotes") 58 | remotes::install_github("JBGruber/traktok") 59 | ``` 60 | 61 | ## In Research 62 | 63 | The research papers and projects below have used traktok to gather their 64 | data: 65 | 66 | 1. Hohner, J., Kakavand, A., & Rothut, S. (2024). Analyzing Radical 67 | Visuals at Scale: How Far-Right Groups Mobilize on TikTok. Journal 68 | of Digital Social Research, 6(1), 10–30. 69 | 70 | 2. Bach, P., Gitomer, A., Devries, M., Walker, C., Deyoe, D., 71 | Atienza-Bathelemy, J., Foucault Welles, B., Freelon, D., & Zulli, D. 72 | (2023, October). Stitching Politics and Identity on TikTok. Panel 73 | presented at AoIR2023: The 24th Annual Conference of the Association 74 | of Internet Researchers. Philadelphia, PA, USA: AoIR. Retrieved from 75 | 76 | 3. Wirz, D. S., Zai, F., Vogler, D., Urman, A., & Eisenegger, M. 77 | (2023). Die Qualität von Schweizer Medien auf Instagram und TikTok. 78 | 79 | 4. Giglietto, F. (2024). Dashboard: TikTok Coordinated Sharing Network. 80 | 81 | 5. Widholm, A., Ekman, M., & Larsson, A. O. (2024). A Right-Wing Wave 82 | on TikTok? Ideological Orientations, Platform Features, and User 83 | Engagement During the Early 2022 Election Campaign in Sweden. Social 84 | Media + Society, 10(3). 85 | 86 | If you have used traktok in your research paper or project, please 87 | extend this list through a Pull Request or create an issue 88 | 89 | [^1]: Currently not working, see 90 | [\#14](https://github.com/JBGruber/traktok/issues/14). 91 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://jbgruber.github.io/traktok/ 2 | template: 3 | bootstrap: 5 4 | 5 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite traktok in publications use:") 2 | 3 | citEntry( 4 | entry = "Manual", 5 | title = "traktok. Getting TikTok data through the official and unofficial APIs", 6 | author = "Johannes B. Gruber", 7 | year = 2023, 8 | url = "https://github.com/JBGruber/traktok", 9 | note = "R package version 0.0.4.9000", 10 | textVersion = paste( 11 | "Gruber, Johannes B. (2023). traktok. An R package to scrape data from TikTok. R package version 0.0.4.9000. https://github.com/JBGruber/traktok." 12 | ) 13 | ) 14 | -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | Analyzing 2 | AoIR 3 | Atienza 4 | Bathelemy 5 | CMD 6 | Codecov 7 | Deen 8 | Devries 9 | Deyoe 10 | Eisenegger 11 | Freelon 12 | Freelon's 13 | Freelon’s 14 | GTE 15 | Giglietto 16 | Gitomer 17 | Hohner 18 | Kakavand 19 | LTE 20 | Lifecycle 21 | Medien 22 | Pyktok 23 | Qualität 24 | Rothut 25 | TikTok 26 | TikTok's 27 | Urman 28 | Vogler 29 | Welles 30 | Wirz 31 | Zai 32 | Zulli 33 | api 34 | auf 35 | chromote 36 | com 37 | cookiefile 38 | etc 39 | json 40 | rstats 41 | rvest 42 | secuid 43 | th 44 | tidyverse 45 | tiktok 46 | tt 47 | ual 48 | und 49 | vidoes 50 | -------------------------------------------------------------------------------- /man/auth_check.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/auth_check.r 3 | \name{auth_check} 4 | \alias{auth_check} 5 | \title{Check whether you are authenticated} 6 | \usage{ 7 | auth_check(research = TRUE, hidden = TRUE, silent = FALSE) 8 | } 9 | \arguments{ 10 | \item{research, hidden}{turn check on/off for the research or hidden API.} 11 | 12 | \item{silent}{only return if check(s) were successful, no status on the 13 | screen} 14 | } 15 | \value{ 16 | logical vector (invisible) 17 | } 18 | \description{ 19 | \ifelse{html}{\figure{api-both.svg}{options: 20 | alt='[Works on: Both]'}}{\strong{[Works on: Both]}} 21 | 22 | Check if the necessary token or cookies are stored on your computer 23 | already. By default, the function checks for the authentication of the 24 | research and hidden API. To learn how you can authenticate, look at the 25 | vignette for the research (\code{vignette("research-api", package = 26 | "traktok")}) or hidden (\code{vignette("unofficial-api", package = 27 | "traktok")}) API. 28 | } 29 | \examples{ 30 | auth_check() 31 | } 32 | -------------------------------------------------------------------------------- /man/auth_hidden.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/auth_hidden.r 3 | \name{auth_hidden} 4 | \alias{auth_hidden} 5 | \title{Authenticate for the hidden/unofficial API} 6 | \usage{ 7 | auth_hidden(cookiefile, live = interactive()) 8 | } 9 | \arguments{ 10 | \item{cookiefile}{path to your cookiefile. Usually not needed after running 11 | \link{auth_hidden} once. See \code{vignette("unofficial-api", package = 12 | "traktok")} for more information on authentication.} 13 | 14 | \item{live}{opens Chromium browser to guide you through the auth process 15 | (experimental).} 16 | } 17 | \value{ 18 | nothing. Called to set up authentication 19 | } 20 | \description{ 21 | Guides you through authentication for the hidden/unofficial API 22 | } 23 | \examples{ 24 | \dontrun{ 25 | # to run through the steps of authentication 26 | auth_hidden() 27 | # or point to a cookie file directly 28 | auth_hidden("www.tiktok.com_cookies.txt") 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /man/auth_research.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/auth_research.r 3 | \name{auth_research} 4 | \alias{auth_research} 5 | \title{Authenticate for the official research API} 6 | \usage{ 7 | auth_research(client_key, client_secret) 8 | } 9 | \arguments{ 10 | \item{client_key}{Client key for authentication} 11 | 12 | \item{client_secret}{Client secret for authentication} 13 | } 14 | \value{ 15 | An authentication token (invisible) 16 | } 17 | \description{ 18 | Guides you through authentication for the Research API 19 | } 20 | \details{ 21 | You need to apply for access to the API and get the key 22 | and secret from TikTok. See 23 | \url{https://developers.tiktok.com/products/research-api/} for more 24 | information. 25 | } 26 | \examples{ 27 | \dontrun{ 28 | auth_research(client_key, client_secret) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /man/figures/api-both.svg: -------------------------------------------------------------------------------- 1 | API: bothAPIboth -------------------------------------------------------------------------------- /man/figures/api-research.svg: -------------------------------------------------------------------------------- 1 | API: researchAPIresearch -------------------------------------------------------------------------------- /man/figures/api-unofficial.svg: -------------------------------------------------------------------------------- 1 | API: unofficialAPIunofficial -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JBGruber/traktok/d97f4ea5fb4f728aba1f64f9f71f5740bd626442/man/figures/logo.png -------------------------------------------------------------------------------- /man/last_query.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/last_.r 3 | \name{last_query} 4 | \alias{last_query} 5 | \alias{last_comments} 6 | \title{Retrieve most recent query} 7 | \usage{ 8 | last_query() 9 | 10 | last_comments() 11 | } 12 | \value{ 13 | a list of unparsed videos 14 | } 15 | \description{ 16 | If \code{tt_search_api} or \code{tt_comments_api} fail after already getting 17 | several pages, you can use this function to get all videos that have been 18 | retrieved so far from memory. Does not work when the session has crashed. In 19 | that case, look in \code{tempdir()} for an RDS file as a last resort. 20 | } 21 | -------------------------------------------------------------------------------- /man/print.traktok_query.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/query_research.r 3 | \name{print.traktok_query} 4 | \alias{print.traktok_query} 5 | \title{Print a traktok query} 6 | \usage{ 7 | \method{print}{traktok_query}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An object of class \code{traktok_query}} 11 | 12 | \item{...}{Additional arguments passed to \code{lobstr::tree}} 13 | } 14 | \description{ 15 | Print a traktok query as a tree 16 | } 17 | \examples{ 18 | query() |> 19 | query_and(field_name = "hashtag_name", 20 | operation = "EQ", 21 | field_values = "rstats") |> 22 | print() 23 | } 24 | -------------------------------------------------------------------------------- /man/print.tt_results.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_research.r 3 | \name{print.tt_results} 4 | \alias{print.tt_results} 5 | \title{Print search result} 6 | \usage{ 7 | \method{print}{tt_results}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An object of class \code{tt_results}} 11 | 12 | \item{...}{not used.} 13 | } 14 | \description{ 15 | Print a traktok search results 16 | } 17 | -------------------------------------------------------------------------------- /man/query.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/query_research.r 3 | \name{query} 4 | \alias{query} 5 | \alias{query_and} 6 | \alias{query_or} 7 | \alias{query_not} 8 | \title{Create a traktok query} 9 | \usage{ 10 | query(and = NULL, or = NULL, not = NULL) 11 | 12 | query_and(q, field_name, operation, field_values) 13 | 14 | query_or(q, field_name, operation, field_values) 15 | 16 | query_not(q, field_name, operation, field_values) 17 | } 18 | \arguments{ 19 | \item{and, or, not}{A list of AND/OR/NOT conditions. Must contain one 20 | or multiple lists with \code{field_name}, \code{operation}, and 21 | \code{field_values} each (see example).} 22 | 23 | \item{q}{A traktok query created with \code{query}.} 24 | 25 | \item{field_name}{The field name to query against. One of: 26 | "create_date", "username", "region_code", "video_id", 27 | "hashtag_name", "keyword", "music_id", "effect_id", 28 | "video_length".} 29 | 30 | \item{operation}{One of: "EQ", "IN", "GT", "GTE", "LT", "LTE".} 31 | 32 | \item{field_values}{A vector of values to search for.} 33 | } 34 | \value{ 35 | A traktok query. 36 | } 37 | \description{ 38 | Create a traktok query from the given parameters. 39 | } 40 | \details{ 41 | TikTok's query consists of rather complicated lists 42 | dividing query elements into AND, OR and NOT: 43 | \itemize{ 44 | \item \strong{and}: The and conditions specify that all the conditions in the list must be met 45 | \item \strong{or}: The or conditions specify that at least one of the conditions in the list must be met 46 | \item \strong{not}: The not conditions specify that none of the conditions in the list must be met 47 | } 48 | 49 | The query can be constructed by writing the list for each entry 50 | yourself, like in the first example. Alternatively, traktok 51 | provides convenience functions to build up a query using 52 | \code{query_and}, \code{query_or}, and \code{query_not}, which 53 | make building a query a little easier. You can learn more at 54 | \url{https://developers.tiktok.com/doc/research-api-specs-query-videos#query}. 55 | } 56 | \examples{ 57 | \dontrun{ 58 | # using query directly and supplying the list 59 | query(or = list( 60 | list( 61 | field_name = "hashtag_name", 62 | operation = "EQ", 63 | field_values = "rstats" 64 | ), 65 | list( 66 | field_name = "keyword", 67 | operation = "EQ", 68 | field_values = list("rstats", "API") 69 | ) 70 | )) 71 | # starting an empty query and building it up using the query_* functions 72 | query() |> 73 | query_or(field_name = "hashtag_name", 74 | operation = "EQ", 75 | field_values = "rstats") |> 76 | query_or(field_name = "keyword", 77 | operation = "IN", 78 | field_values = c("rstats", "API")) 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /man/tt_comments_api.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_research.r, R/shorthands.r 3 | \name{tt_comments_api} 4 | \alias{tt_comments_api} 5 | \alias{tt_comments} 6 | \title{Retrieve video comments} 7 | \usage{ 8 | tt_comments_api( 9 | video_id, 10 | fields = "all", 11 | start_cursor = 0L, 12 | max_pages = 1L, 13 | cache = TRUE, 14 | verbose = TRUE, 15 | token = NULL 16 | ) 17 | 18 | tt_comments( 19 | video_id, 20 | fields = "all", 21 | start_cursor = 0L, 22 | max_pages = 1L, 23 | cache = TRUE, 24 | verbose = TRUE, 25 | token = NULL 26 | ) 27 | } 28 | \arguments{ 29 | \item{video_id}{The id or URL of a video} 30 | 31 | \item{fields}{The fields to be returned (defaults to all)} 32 | 33 | \item{start_cursor}{The starting cursor, i.e., how many results to skip (for 34 | picking up an old search).} 35 | 36 | \item{max_pages}{results are returned in batches/pages with 100 videos. How 37 | many should be requested before the function stops?} 38 | 39 | \item{cache}{should progress be saved in the current session? It can then be 40 | retrieved with \code{last_query()} if an error occurs. But the function 41 | will use extra memory.} 42 | 43 | \item{verbose}{should the function print status updates to the screen?} 44 | 45 | \item{token}{The authentication token (usually supplied automatically after 46 | running \link{auth_research} once).} 47 | } 48 | \value{ 49 | A data.frame of parsed comments 50 | } 51 | \description{ 52 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: Research API]'}}{\strong{[Works on: Research API]}} 53 | } 54 | \examples{ 55 | \dontrun{ 56 | tt_comments("https://www.tiktok.com/@tiktok/video/7106594312292453675") 57 | # OR 58 | tt_comments("7106594312292453675") 59 | # OR 60 | tt_comments_api("7106594312292453675") 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /man/tt_get_follower.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shorthands.r 3 | \name{tt_get_follower} 4 | \alias{tt_get_follower} 5 | \alias{tt_get_following} 6 | \title{Get followers and following of users} 7 | \usage{ 8 | tt_get_follower(...) 9 | 10 | tt_get_following(...) 11 | } 12 | \arguments{ 13 | \item{...}{arguments passed to \link{tt_user_follower_api} or 14 | \link{tt_get_follower_hidden}. To use the research API, include \code{token} 15 | (e.g., \code{token = NULL}).} 16 | } 17 | \value{ 18 | a data.frame 19 | } 20 | \description{ 21 | \ifelse{html}{\figure{api-both.svg}{options: 22 | alt='[Works on: Both]'}}{\strong{[Works on: Both]}} 23 | 24 | Get usernames of users who follows a user (tt_get_follower) or get who a 25 | user is following (tt_get_following). 26 | } 27 | -------------------------------------------------------------------------------- /man/tt_get_following_hidden.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_hidden.r 3 | \name{tt_get_following_hidden} 4 | \alias{tt_get_following_hidden} 5 | \alias{tt_get_follower_hidden} 6 | \title{Get followers and following of a user from the hidden API} 7 | \usage{ 8 | tt_get_following_hidden( 9 | secuid, 10 | sleep_pool = 1:10, 11 | max_tries = 5L, 12 | cookiefile = NULL, 13 | verbose = TRUE 14 | ) 15 | 16 | tt_get_follower_hidden( 17 | secuid, 18 | sleep_pool = 1:10, 19 | max_tries = 5L, 20 | cookiefile = NULL, 21 | verbose = TRUE 22 | ) 23 | } 24 | \arguments{ 25 | \item{secuid}{The secuid of a user. You can get it with 26 | \link{tt_user_info_hidden} by querying an account (see example).} 27 | 28 | \item{sleep_pool}{a vector of numbers from which a waiting period is randomly 29 | drawn.} 30 | 31 | \item{max_tries}{how often to retry if a request fails.} 32 | 33 | \item{cookiefile}{path to your cookiefile. Usually not needed after running 34 | \link{auth_hidden} once. See \code{vignette("unofficial-api", package = 35 | "traktok")} for more information on authentication.} 36 | 37 | \item{verbose}{should the function print status updates to the screen?} 38 | } 39 | \value{ 40 | a data.frame of followers 41 | } 42 | \description{ 43 | \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on: 44 | Unofficial API]'}}{\strong{[Works on: Unofficial API]}} 45 | 46 | Get up to 5,000 accounts who follow a user or accounts a user follows. 47 | } 48 | \examples{ 49 | \dontrun{ 50 | df <- tt_user_info_hidden("https://www.tiktok.com/@fpoe_at") 51 | tt_get_follower_hidden(df$secUid) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /man/tt_json.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shorthands.r 3 | \name{tt_json} 4 | \alias{tt_json} 5 | \title{Get json file from a TikTok URL} 6 | \usage{ 7 | tt_json(...) 8 | } 9 | \arguments{ 10 | \item{...}{\code{tt_request_hidden()}.} 11 | } 12 | \description{ 13 | This function was replaced by \code{tt_request_hidden()}. 14 | } 15 | -------------------------------------------------------------------------------- /man/tt_playlist_api.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_research.r, R/shorthands.r 3 | \name{tt_playlist_api} 4 | \alias{tt_playlist_api} 5 | \alias{tt_playlist} 6 | \title{Lookup TikTok playlist using the research API} 7 | \usage{ 8 | tt_playlist_api(playlist_id, verbose = TRUE, token = NULL) 9 | 10 | tt_playlist(playlist_id, verbose = TRUE, token = NULL) 11 | } 12 | \arguments{ 13 | \item{playlist_id}{playlist ID or URL to a playlist.} 14 | 15 | \item{verbose}{should the function print status updates to the screen?} 16 | 17 | \item{token}{The authentication token (usually supplied automatically after 18 | running \link{auth_research} once).} 19 | } 20 | \value{ 21 | A data.frame 22 | } 23 | \description{ 24 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 25 | Research API]'}}{\strong{[Works on: Research API]}} 26 | } 27 | -------------------------------------------------------------------------------- /man/tt_request_hidden.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_hidden.r 3 | \name{tt_request_hidden} 4 | \alias{tt_request_hidden} 5 | \title{Get json string from a TikTok URL using the hidden API} 6 | \usage{ 7 | tt_request_hidden(url, max_tries = 5L, cookiefile = NULL) 8 | } 9 | \arguments{ 10 | \item{url}{a URL to a TikTok video or account} 11 | 12 | \item{max_tries}{how often to retry if a request fails.} 13 | 14 | \item{cookiefile}{path to your cookiefile. Usually not needed after running 15 | \link{auth_hidden} once. See \code{vignette("unofficial-api", package = 16 | "traktok")} for more information on authentication.} 17 | } 18 | \description{ 19 | \ifelse{html}{\figure{api-unofficial}{options: 20 | alt='[Works on: Unofficial API]'}}{\strong{[Works on: Unofficial API]}} 21 | 22 | Use this function in case you want to check the full data for a given 23 | TikTok video or account. In tt_videos, only an opinionated selection of 24 | data is included in the final object. If you want some different 25 | information, you can use this function. 26 | } 27 | -------------------------------------------------------------------------------- /man/tt_search.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shorthands.r 3 | \name{tt_search} 4 | \alias{tt_search} 5 | \title{Search videos} 6 | \usage{ 7 | tt_search(...) 8 | } 9 | \arguments{ 10 | \item{...}{arguments passed to \link{tt_search_api} or 11 | \link{tt_search_hidden}. To use the research API, include \code{token} 12 | (e.g., \code{token = NULL}).} 13 | } 14 | \value{ 15 | a data.frame 16 | } 17 | \description{ 18 | \ifelse{html}{\figure{api-both.svg}{options: 19 | alt='[Works on: Both]'}}{\strong{[Works on: Both]}} 20 | 21 | Searches videos using either the Research API (if an authentication token 22 | is present, see \link{auth_research}) or otherwise the unofficial hidden 23 | API. See \link{tt_search_api} or \link{tt_search_hidden} respectively for 24 | information about these functions. 25 | } 26 | -------------------------------------------------------------------------------- /man/tt_search_api.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_research.r 3 | \name{tt_search_api} 4 | \alias{tt_search_api} 5 | \alias{tt_query_videos} 6 | \title{Query TikTok videos using the research API} 7 | \usage{ 8 | tt_search_api( 9 | query, 10 | start_date = Sys.Date() - 1, 11 | end_date = Sys.Date(), 12 | fields = "all", 13 | start_cursor = 0L, 14 | search_id = NULL, 15 | is_random = FALSE, 16 | max_pages = 1, 17 | parse = TRUE, 18 | cache = TRUE, 19 | verbose = TRUE, 20 | token = NULL 21 | ) 22 | 23 | tt_query_videos( 24 | query, 25 | start_date = Sys.Date() - 1, 26 | end_date = Sys.Date(), 27 | fields = "all", 28 | start_cursor = 0L, 29 | search_id = NULL, 30 | is_random = FALSE, 31 | max_pages = 1, 32 | parse = TRUE, 33 | cache = TRUE, 34 | verbose = TRUE, 35 | token = NULL 36 | ) 37 | } 38 | \arguments{ 39 | \item{query}{A query string or object (see \link{query}).} 40 | 41 | \item{start_date, end_date}{A start and end date to narrow the search 42 | (required; can be a maximum of 30 days apart).} 43 | 44 | \item{fields}{The fields to be returned (defaults to all)} 45 | 46 | \item{start_cursor}{The starting cursor, i.e., how many results to skip (for 47 | picking up an old search).} 48 | 49 | \item{search_id}{The search id (for picking up an old search).} 50 | 51 | \item{is_random}{Whether the query is random (defaults to FALSE).} 52 | 53 | \item{max_pages}{results are returned in batches/pages with 100 videos. How 54 | many should be requested before the function stops?} 55 | 56 | \item{parse}{Should the results be parsed? Otherwise, the original JSON 57 | object is returned as a nested list.} 58 | 59 | \item{cache}{should progress be saved in the current session? It can then be 60 | retrieved with \code{last_query()} if an error occurs. But the function 61 | will use extra memory.} 62 | 63 | \item{verbose}{should the function print status updates to the screen?} 64 | 65 | \item{token}{The authentication token (usually supplied automatically after 66 | running \link{auth_research} once).} 67 | } 68 | \value{ 69 | A data.frame of parsed TikTok videos (or a nested list). 70 | } 71 | \description{ 72 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 73 | Research API]'}}{\strong{[Works on: Research API]}} 74 | 75 | This is the version of \link{tt_search} that explicitly uses Research API. 76 | Use \link{tt_search_hidden} for the unofficial API version. 77 | } 78 | \examples{ 79 | \dontrun{ 80 | # look for a keyword or hashtag by default 81 | tt_search_api("rstats") 82 | 83 | # or build a more elaborate query 84 | query() |> 85 | query_and(field_name = "region_code", 86 | operation = "IN", 87 | field_values = c("JP", "US")) |> 88 | query_or(field_name = "hashtag_name", 89 | operation = "EQ", # rstats is the only hashtag 90 | field_values = "rstats") |> 91 | query_or(field_name = "keyword", 92 | operation = "IN", # rstats is one of the keywords 93 | field_values = "rstats") |> 94 | query_not(operation = "EQ", 95 | field_name = "video_length", 96 | field_values = "SHORT") |> 97 | tt_search_api() 98 | 99 | # when a search fails after a while, get the results and pick it back up 100 | # (only work with same parameters) 101 | last_pull <- last_query() 102 | query() |> 103 | query_and(field_name = "region_code", 104 | operation = "IN", 105 | field_values = c("JP", "US")) |> 106 | query_or(field_name = "hashtag_name", 107 | operation = "EQ", # rstats is the only hashtag 108 | field_values = "rstats") |> 109 | query_or(field_name = "keyword", 110 | operation = "IN", # rstats is one of the keywords 111 | field_values = "rstats") |> 112 | query_not(operation = "EQ", 113 | field_name = "video_length", 114 | field_values = "SHORT") |> 115 | tt_search_api(start_cursor = length(last_pull) + 1, 116 | search_id = attr(last_pull, "search_id")) 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /man/tt_search_hidden.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_hidden.r 3 | \name{tt_search_hidden} 4 | \alias{tt_search_hidden} 5 | \title{Search videos} 6 | \usage{ 7 | tt_search_hidden( 8 | query, 9 | offset = 0, 10 | max_pages = Inf, 11 | sleep_pool = 1:10, 12 | max_tries = 5L, 13 | cookiefile = NULL, 14 | verbose = TRUE 15 | ) 16 | } 17 | \arguments{ 18 | \item{query}{query as one string} 19 | 20 | \item{offset}{how many videos to skip. For example, if you already have the 21 | first X of a search.} 22 | 23 | \item{max_pages}{how many pages to get before stopping the search.} 24 | 25 | \item{sleep_pool}{a vector of numbers from which a waiting period is randomly 26 | drawn.} 27 | 28 | \item{max_tries}{how often to retry if a request fails.} 29 | 30 | \item{cookiefile}{path to your cookiefile. Usually not needed after running 31 | \link{auth_hidden} once. See \code{vignette("unofficial-api", package = 32 | "traktok")} for more information on authentication.} 33 | 34 | \item{verbose}{should the function print status updates to the screen?} 35 | } 36 | \value{ 37 | a data.frame 38 | } 39 | \description{ 40 | \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on: 41 | Unofficial API]'}}{\strong{[Works on: Unofficial API]}} 42 | 43 | This is the version of \link{tt_search} that explicitly uses the unofficial 44 | API. Use \link{tt_search_api} for the Research API version. 45 | } 46 | \details{ 47 | The function will wait between scraping two videos to make it less 48 | obvious that a scraper is accessing the site. The period is drawn randomly 49 | from the `sleep_pool` and multiplied by a random fraction. 50 | } 51 | \examples{ 52 | \dontrun{ 53 | tt_search_hidden("#rstats", max_pages = 2) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /man/tt_user_follower_api.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_research.r 3 | \name{tt_user_follower_api} 4 | \alias{tt_user_follower_api} 5 | \alias{tt_user_following_api} 6 | \title{Get followers and following of users from the research API} 7 | \usage{ 8 | tt_user_follower_api( 9 | username, 10 | max_pages = 1, 11 | cache = TRUE, 12 | verbose = TRUE, 13 | token = NULL 14 | ) 15 | 16 | tt_user_following_api( 17 | username, 18 | max_pages = 1, 19 | cache = TRUE, 20 | verbose = TRUE, 21 | token = NULL 22 | ) 23 | } 24 | \arguments{ 25 | \item{username}{name(s) of the user(s) to be queried} 26 | 27 | \item{max_pages}{results are returned in batches/pages with 100 videos. How 28 | many should be requested before the function stops?} 29 | 30 | \item{cache}{should progress be saved in the current session? It can then be 31 | retrieved with \code{last_query()} if an error occurs. But the function 32 | will use extra memory.} 33 | 34 | \item{verbose}{should the function print status updates to the screen?} 35 | 36 | \item{token}{The authentication token (usually supplied automatically after 37 | running \link{auth_research} once).} 38 | } 39 | \value{ 40 | A data.frame 41 | } 42 | \description{ 43 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 44 | Research API]'}}{\strong{[Works on: Research API]}} 45 | } 46 | \examples{ 47 | \dontrun{ 48 | tt_user_follower_api("jbgruber") 49 | # OR 50 | tt_user_following_api("https://www.tiktok.com/@tiktok") 51 | # OR 52 | tt_get_follower("https://www.tiktok.com/@tiktok") 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /man/tt_user_info_api.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_research.r, R/shorthands.r 3 | \name{tt_user_info_api} 4 | \alias{tt_user_info_api} 5 | \alias{tt_user_info} 6 | \title{Lookup TikTok information about a user using the research API} 7 | \usage{ 8 | tt_user_info_api(username, fields = "all", verbose = TRUE, token = NULL) 9 | 10 | tt_user_info(username, fields = "all", verbose = TRUE, token = NULL) 11 | } 12 | \arguments{ 13 | \item{username}{name(s) of the user(s) to be queried} 14 | 15 | \item{fields}{The fields to be returned (defaults to all)} 16 | 17 | \item{verbose}{should the function print status updates to the screen?} 18 | 19 | \item{token}{The authentication token (usually supplied automatically after 20 | running \link{auth_research} once).} 21 | } 22 | \value{ 23 | A data.frame of parsed TikTok videos the user has posted 24 | } 25 | \description{ 26 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 27 | Research API]'}}{\strong{[Works on: Research API]}} 28 | } 29 | \examples{ 30 | \dontrun{ 31 | tt_user_info_api("jbgruber") 32 | # OR 33 | tt_user_info_api("https://www.tiktok.com/@tiktok") 34 | # OR 35 | tt_user_info("https://www.tiktok.com/@tiktok") 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /man/tt_user_info_hidden.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_hidden.r 3 | \name{tt_user_info_hidden} 4 | \alias{tt_user_info_hidden} 5 | \title{Get infos about a user from the hidden API} 6 | \usage{ 7 | tt_user_info_hidden(username, parse = TRUE) 8 | } 9 | \arguments{ 10 | \item{username}{A URL to a video or username.} 11 | 12 | \item{parse}{Whether to parse the data into a data.frame (set to FALSE to get 13 | the full list).} 14 | } 15 | \value{ 16 | A data.frame of user info. 17 | } 18 | \description{ 19 | Get infos about a user from the hidden API 20 | } 21 | \examples{ 22 | \dontrun{ 23 | df <- tt_user_info_hidden("https://www.tiktok.com/@fpoe_at") 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /man/tt_user_liked_videos_api.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_research.r, R/shorthands.r 3 | \name{tt_user_liked_videos_api} 4 | \alias{tt_user_liked_videos_api} 5 | \alias{tt_get_liked} 6 | \title{Lookup which videos were liked by a user using the research API} 7 | \usage{ 8 | tt_user_liked_videos_api( 9 | username, 10 | fields = "all", 11 | max_pages = 1, 12 | cache = TRUE, 13 | verbose = TRUE, 14 | token = NULL 15 | ) 16 | 17 | tt_get_liked( 18 | username, 19 | fields = "all", 20 | max_pages = 1, 21 | cache = TRUE, 22 | verbose = TRUE, 23 | token = NULL 24 | ) 25 | } 26 | \arguments{ 27 | \item{username}{name(s) of the user(s) to be queried} 28 | 29 | \item{fields}{The fields to be returned (defaults to all)} 30 | 31 | \item{max_pages}{results are returned in batches/pages with 100 videos. How 32 | many should be requested before the function stops?} 33 | 34 | \item{cache}{should progress be saved in the current session? It can then be 35 | retrieved with \code{last_query()} if an error occurs. But the function 36 | will use extra memory.} 37 | 38 | \item{verbose}{should the function print status updates to the screen?} 39 | 40 | \item{token}{The authentication token (usually supplied automatically after 41 | running \link{auth_research} once).} 42 | } 43 | \value{ 44 | A data.frame of parsed TikTok videos the user has posted 45 | } 46 | \description{ 47 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 48 | Research API]'}}{\strong{[Works on: Research API]}} 49 | } 50 | \examples{ 51 | \dontrun{ 52 | tt_get_liked("jbgruber") 53 | # OR 54 | tt_user_liked_videos_api("https://www.tiktok.com/@tiktok") 55 | # OR 56 | tt_user_liked_videos_api("https://www.tiktok.com/@tiktok") 57 | 58 | # note: none of these work because I could not find any account that 59 | # has likes public! 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /man/tt_user_pinned_videos_api.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_research.r, R/shorthands.r 3 | \name{tt_user_pinned_videos_api} 4 | \alias{tt_user_pinned_videos_api} 5 | \alias{tt_get_pinned} 6 | \title{Lookup which videos were pinned by a user using the research API} 7 | \usage{ 8 | tt_user_pinned_videos_api( 9 | username, 10 | fields = "all", 11 | cache = TRUE, 12 | verbose = TRUE, 13 | token = NULL 14 | ) 15 | 16 | tt_get_pinned( 17 | username, 18 | fields = "all", 19 | cache = TRUE, 20 | verbose = TRUE, 21 | token = NULL 22 | ) 23 | } 24 | \arguments{ 25 | \item{username}{vector of user names (handles) or URLs to users' pages.} 26 | 27 | \item{fields}{The fields to be returned (defaults to all)} 28 | 29 | \item{cache}{should progress be saved in the current session? It can then be 30 | retrieved with \code{last_query()} if an error occurs. But the function 31 | will use extra memory.} 32 | 33 | \item{verbose}{should the function print status updates to the screen?} 34 | 35 | \item{token}{The authentication token (usually supplied automatically after 36 | running \link{auth_research} once).} 37 | } 38 | \value{ 39 | A data.frame of parsed TikTok videos the user has posted 40 | } 41 | \description{ 42 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 43 | Research API]'}}{\strong{[Works on: Research API]}} 44 | } 45 | \examples{ 46 | \dontrun{ 47 | tt_get_pinned("jbgruber") 48 | # OR 49 | tt_user_pinned_videos_api("https://www.tiktok.com/@tiktok") 50 | # OR 51 | tt_user_pinned_videos_api("https://www.tiktok.com/@tiktok") 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /man/tt_user_reposted_api.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_research.r, R/shorthands.r 3 | \name{tt_user_reposted_api} 4 | \alias{tt_user_reposted_api} 5 | \alias{tt_get_reposted} 6 | \title{Lookup which videos were liked by a user using the research API} 7 | \usage{ 8 | tt_user_reposted_api( 9 | username, 10 | fields = "all", 11 | max_pages = 1, 12 | cache = TRUE, 13 | verbose = TRUE, 14 | token = NULL 15 | ) 16 | 17 | tt_get_reposted( 18 | username, 19 | fields = "all", 20 | max_pages = 1, 21 | cache = TRUE, 22 | verbose = TRUE, 23 | token = NULL 24 | ) 25 | } 26 | \arguments{ 27 | \item{username}{name(s) of the user(s) to be queried} 28 | 29 | \item{fields}{The fields to be returned (defaults to all)} 30 | 31 | \item{max_pages}{results are returned in batches/pages with 100 videos. How 32 | many should be requested before the function stops?} 33 | 34 | \item{cache}{should progress be saved in the current session? It can then be 35 | retrieved with \code{last_query()} if an error occurs. But the function 36 | will use extra memory.} 37 | 38 | \item{verbose}{should the function print status updates to the screen?} 39 | 40 | \item{token}{The authentication token (usually supplied automatically after 41 | running \link{auth_research} once).} 42 | } 43 | \value{ 44 | A data.frame of parsed TikTok videos the user has posted 45 | } 46 | \description{ 47 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 48 | Research API]'}}{\strong{[Works on: Research API]}} 49 | } 50 | \examples{ 51 | \dontrun{ 52 | tt_get_reposted("jbgruber") 53 | # OR 54 | tt_user_reposted_api("https://www.tiktok.com/@tiktok") 55 | # OR 56 | tt_user_reposted_api("https://www.tiktok.com/@tiktok") 57 | 58 | # note: none of these work because nobody has this enabled! 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /man/tt_user_videos.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shorthands.r 3 | \name{tt_user_videos} 4 | \alias{tt_user_videos} 5 | \title{Get videos from a TikTok user's profile} 6 | \usage{ 7 | tt_user_videos(username, ...) 8 | } 9 | \arguments{ 10 | \item{username}{The username or usernames whose videos you want to retrieve.} 11 | 12 | \item{...}{Additional arguments to be passed to the \code{\link{tt_user_videos_hidden}} or 13 | \code{\link{tt_user_videos_api}} function.} 14 | } 15 | \description{ 16 | \ifelse{html}{\figure{api-both.svg}{options: 17 | alt='[Works on: Both]'}}{\strong{[Works on: Both]}} 18 | 19 | Get all videos posted by a user (or multiple user's for the Research API). 20 | Searches videos using either the Research API (if an authentication token 21 | is present, see \link{auth_research}) or otherwise the unofficial hidden 22 | API. See \link{tt_user_videos_api} or \link{tt_user_videos_hidden} respectively for 23 | information about these functions. 24 | } 25 | \examples{ 26 | \dontrun{ 27 | # Get hidden videos from the user "fpoe_at" 28 | tt_user_videos("fpoe_at") 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /man/tt_user_videos_api.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_research_extended.r 3 | \name{tt_user_videos_api} 4 | \alias{tt_user_videos_api} 5 | \title{Get videos from a TikTok user's profile} 6 | \usage{ 7 | tt_user_videos_api( 8 | username, 9 | since = "2020-01-01", 10 | to = Sys.Date(), 11 | verbose = TRUE, 12 | ... 13 | ) 14 | } 15 | \arguments{ 16 | \item{username}{The username or usernames whose videos you want to retrieve.} 17 | 18 | \item{since, to}{limits from/to when to go through the account in 30 day windows.} 19 | 20 | \item{verbose}{should the function print status updates to the screen?} 21 | 22 | \item{...}{Additional arguments to be passed to the 23 | \code{\link{tt_search_api}} function.} 24 | } 25 | \description{ 26 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: 27 | Research API]'}}{\strong{[Works on: Research API]}} 28 | 29 | Get all videos posted by a user or multiple user's. This is a convenience 30 | wrapper around \code{\link{tt_search_api}} that takes care of moving time 31 | windows (search is limited to 30 days). This is the version of 32 | \link{tt_user_videos} that explicitly uses Research API. Use 33 | \link{tt_user_videos_hidden} for the unofficial API version. 34 | } 35 | \examples{ 36 | \dontrun{ 37 | # Get videos from the user "fpoe_at" since October 2024 38 | tt_user_videos_api("fpoe_at", since = "2024-10-01") 39 | 40 | # often makes sense to combine this with the account creation time from the 41 | # hidden URL 42 | fpoe_at_info <- tt_user_info_hidden(username = "fpoe_at") 43 | tt_user_videos_api("fpoe_at", since = fpoe_at_info$create_time) 44 | 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /man/tt_user_videos_hidden.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_hidden.r 3 | \name{tt_user_videos_hidden} 4 | \alias{tt_user_videos_hidden} 5 | \title{Get videos from a TikTok user's profile} 6 | \usage{ 7 | tt_user_videos_hidden( 8 | username, 9 | solve_captchas = FALSE, 10 | return_urls = FALSE, 11 | timeout = 5L, 12 | verbose = TRUE, 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{username}{The username of the TikTok user whose hidden videos you want to retrieve.} 18 | 19 | \item{solve_captchas}{open browser to solve appearing captchas manually.} 20 | 21 | \item{return_urls}{return video URLs instead of downloading the vidoes.} 22 | 23 | \item{timeout}{time (in seconds) to wait between scrolling and solving captchas.} 24 | 25 | \item{verbose}{should the function print status updates to the screen?} 26 | 27 | \item{...}{Additional arguments to be passed to the \code{\link{tt_videos_hidden}} function.} 28 | } 29 | \value{ 30 | A list of video data or URLs, depending on the value of \code{return_urls}. 31 | } 32 | \description{ 33 | \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on: 34 | Unofficial API]'}}{\strong{[Works on: Unofficial API]}} 35 | 36 | Get all videos posted by a TikTok user. 37 | } 38 | \details{ 39 | This function uses rvest to scrape a TikTok user's profile and retrieve any hidden videos. 40 | } 41 | \examples{ 42 | \dontrun{ 43 | # Get hidden videos from the user "fpoe_at" 44 | tt_user_videos_hidden("fpoe_at") 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /man/tt_videos_hidden.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/api_hidden.r, R/shorthands.r 3 | \name{tt_videos_hidden} 4 | \alias{tt_videos_hidden} 5 | \alias{tt_videos} 6 | \title{Get video metadata and video files from URLs} 7 | \usage{ 8 | tt_videos_hidden( 9 | video_urls, 10 | save_video = TRUE, 11 | overwrite = FALSE, 12 | dir = ".", 13 | cache_dir = NULL, 14 | sleep_pool = 1:10, 15 | max_tries = 5L, 16 | cookiefile = NULL, 17 | verbose = TRUE, 18 | ... 19 | ) 20 | 21 | tt_videos(...) 22 | } 23 | \arguments{ 24 | \item{video_urls}{vector of URLs or IDs to TikTok videos.} 25 | 26 | \item{save_video}{logical. Should the videos be downloaded.} 27 | 28 | \item{overwrite}{logical. If save_video=TRUE and the file already exists, 29 | should it be overwritten?} 30 | 31 | \item{dir}{directory to save videos files to.} 32 | 33 | \item{cache_dir}{if set to a path, one RDS file with metadata will be written 34 | to disk for each video. This is useful if you have many videos and want to 35 | pick up where you left if something goes wrong.} 36 | 37 | \item{sleep_pool}{a vector of numbers from which a waiting period is randomly 38 | drawn.} 39 | 40 | \item{max_tries}{how often to retry if a request fails.} 41 | 42 | \item{cookiefile}{path to your cookiefile. Usually not needed after running 43 | \link{auth_hidden} once. See \code{vignette("unofficial-api", package = 44 | "traktok")} for more information on authentication.} 45 | 46 | \item{verbose}{should the function print status updates to the screen?} 47 | 48 | \item{...}{handed to \code{tt_videos_hidden} (for tt_videos) and (further) to 49 | \link{tt_request_hidden}.} 50 | } 51 | \value{ 52 | a data.frame 53 | } 54 | \description{ 55 | \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on: 56 | Unofficial API]'}}{\strong{[Works on: Unofficial API]}} 57 | } 58 | \details{ 59 | The function will wait between scraping two videos to make it less 60 | obvious that a scraper is accessing the site. The period is drawn randomly 61 | from the `sleep_pool` and multiplied by a random fraction. 62 | 63 | Note that the video file has to be requested in the same session as 64 | the metadata. So while the URL to the video file is included in the 65 | metadata, this link will not work in most cases. 66 | } 67 | \examples{ 68 | \dontrun{ 69 | tt_videos("https://www.tiktok.com/@tiktok/video/7106594312292453675") 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /tests/spelling.R: -------------------------------------------------------------------------------- 1 | if(requireNamespace("spelling", quietly = TRUE)) { 2 | spelling::spell_check_test( 3 | vignettes = TRUE, 4 | error = TRUE, 5 | skip_on_cran = TRUE 6 | ) 7 | } 8 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/tests.html 7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files 8 | 9 | library(testthat) 10 | library(traktok) 11 | 12 | test_check("traktok") 13 | -------------------------------------------------------------------------------- /tests/testthat/example_query.json: -------------------------------------------------------------------------------- 1 | { 2 | "and": [ 3 | { 4 | "field_name": "region_code", 5 | "operation": "IN", 6 | "field_values": ["JP", "US"] 7 | }, 8 | { 9 | "field_name":"hashtag_name", 10 | "operation":"EQ", 11 | "field_values":["animal"] 12 | } 13 | ], 14 | "not": [ 15 | { 16 | "field_name": "video_length", 17 | "operation": "EQ", 18 | "field_values": ["SHORT"] 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /tests/testthat/example_request.json: -------------------------------------------------------------------------------- 1 | { 2 | "query": { 3 | "and": [ 4 | { 5 | "field_name": "region_code", 6 | "operation": "IN", 7 | "field_values": ["JP", "US"] 8 | }, 9 | { 10 | "field_name":"hashtag_name", 11 | "operation":"EQ", 12 | "field_values":["animal"] 13 | } 14 | ], 15 | "not": [ 16 | { 17 | "field_name": "video_length", 18 | "operation": "EQ", 19 | "field_values": ["SHORT"] 20 | } 21 | ] 22 | }, 23 | "max_count": 100, 24 | "cursor": 0, 25 | "start_date": "20230101", 26 | "end_date": "20230115" 27 | } 28 | -------------------------------------------------------------------------------- /tests/testthat/example_resp_comments.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "comments": [ 4 | { 5 | "text": "AWEEEEEE 🥰🥰🥰", 6 | "video_id": 1234563451201523412, 7 | "create_time": 1671491598, 8 | "id": 12345616934634134, 9 | "like_count": 50, 10 | "parent_comment_id": 1234561201524010, 11 | "reply_count": 10 12 | } 13 | ], 14 | "has_more": true, 15 | "cursor": 300 16 | }, 17 | "error": { 18 | "code": "ok", 19 | "message": "", 20 | "log_id": "202207280326050102231031430C7E754E" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /tests/testthat/example_resp_q_user.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "bio_description": "my_bio", 4 | "is_verified": false, 5 | "likes_count": 27155089, 6 | "video_count": 44, 7 | "avatar_url": "https://some_cdn.com/my_avatar", 8 | "follower_count": 232, 9 | "following_count": 45, 10 | "display_name": "my nick name" 11 | }, 12 | "error": { 13 | "code": "ok", 14 | "message": "", 15 | "log_id": "202207280326050102231031430C7E754E" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /tests/testthat/example_resp_q_videos.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "videos": [ 4 | { 5 | "hashtag_names": [ 6 | "avengers", 7 | "pov" 8 | ], 9 | "region_code": "CA", 10 | "create_time": 1633823999, 11 | "effect_ids": [ 12 | "0" 13 | ], 14 | "video_id": 702874395068494965, 15 | "music_id": 703847506349838790, 16 | "video_description": "lol #pov #avengers", 17 | "view_count": 1050, 18 | "comment_count": 2 19 | }, 20 | { 21 | "hashtag_names": [ 22 | "avengers", 23 | "pov" 24 | ], 25 | "region_code": "CA", 26 | "create_time": 1633823999, 27 | "effect_ids": [ 28 | "0" 29 | ], 30 | "video_id": 702874395068494965, 31 | "music_id": 703847506349838790, 32 | "video_description": "lol #pov #avengers", 33 | "view_count": 1050, 34 | "comment_count": 2 35 | } 36 | ], 37 | "cursor": 100, 38 | "search_id": "7201388525814961198", 39 | "has_more": true 40 | }, 41 | "error": { 42 | "code": "ok", 43 | "message": "", 44 | "log_id": "20230113024658F0D7C5D6CA3A9B79C5B9" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /tests/testthat/test-research_api.R: -------------------------------------------------------------------------------- 1 | test_that("query", { 2 | expect_equal({ 3 | query() |> 4 | query_and(field_name = "region_code", 5 | operation = "IN", 6 | field_values = c("JP", "US")) |> 7 | query_and(field_name = "hashtag_name", 8 | operation = "EQ", 9 | field_values = "animal") |> 10 | query_not(operation = "EQ", 11 | field_name = "video_length", 12 | field_values = "SHORT") |> 13 | unclass() 14 | }, jsonlite::read_json("example_query.json")) 15 | }) 16 | 17 | test_that("request", { 18 | mock_success <- function(req) { 19 | req <<- req # use this to test request below 20 | httr2::response(status_code = 200, 21 | headers = "Content-Type: application/json", 22 | body = charToRaw( 23 | paste0( 24 | readLines("example_resp_q_videos.json"), collapse = "") 25 | ) 26 | ) 27 | } 28 | # mock a query to check against example 29 | q <- query() |> 30 | query_and(field_name = "region_code", 31 | operation = "IN", 32 | field_values = c("JP", "US")) |> 33 | query_and(field_name = "hashtag_name", 34 | operation = "EQ", 35 | field_values = "animal") |> 36 | query_not(field_name = "video_length", 37 | operation = "EQ", 38 | field_values = "SHORT") 39 | 40 | httr2::with_mocked_responses( 41 | mock_success, 42 | tt_search_api(q, 43 | start_date = "20230101", 44 | end_date = "20230115", 45 | is_random = NULL, 46 | token = list(access_token = "test")) 47 | ) 48 | 49 | ex <- jsonlite::read_json("example_request.json") 50 | 51 | expect_equal({ 52 | sort(names(req$body$data)) 53 | }, sort(names(ex))) 54 | 55 | expect_equal({ 56 | req$body$data$query 57 | }, ex$query) 58 | 59 | expect_equal({ 60 | req$body$data$start_date 61 | }, ex$start_date) 62 | 63 | expect_equal({ 64 | req$body$data$end_date 65 | }, ex$end_date) 66 | 67 | expect_equal({ 68 | df <- httr2::with_mocked_responses( 69 | mock_success, 70 | tt_search_api(q, 71 | start_date = "20230101", 72 | end_date = "20230115", 73 | is_random = NULL, 74 | max_pages = 20, 75 | verbose = FALSE, 76 | token = list(access_token = "test")) 77 | ) 78 | nrow(df) 79 | }, 40) 80 | 81 | }) 82 | 83 | 84 | test_that("parsing", { 85 | expect_equal({ 86 | out <- jsonlite::read_json("example_resp_q_videos.json", bigint_as_char = TRUE) |> 87 | purrr::pluck("data", "videos") |> 88 | parse_api_search() 89 | c(out$video_id, nrow(out), ncol(out)) 90 | }, c("702874395068494965", "702874395068494965", "2", "13")) 91 | # apparently, sometimes the video_id is just called id 92 | expect_equal({ 93 | out <- list(list(id = "1"), list(video_id = "2")) |> 94 | parse_api_search() 95 | out$video_id 96 | }, c("1", "2")) 97 | expect_equal({ 98 | out <- jsonlite::read_json("example_resp_q_user.json", bigint_as_char = TRUE) |> 99 | purrr::pluck("data") |> 100 | tibble::as_tibble() 101 | c(nrow(out), ncol(out)) 102 | }, c(1, 8)) 103 | expect_equal({ 104 | out <- jsonlite::read_json("example_resp_comments.json", bigint_as_char = TRUE) |> 105 | purrr::pluck("data", "comments") |> 106 | parse_api_comments() 107 | c(out$video_id, nrow(out), ncol(out)) 108 | }, c("1234563451201523412", "1", "7")) 109 | }) 110 | -------------------------------------------------------------------------------- /tests/testthat/test-research_auth.R: -------------------------------------------------------------------------------- 1 | test_that("authentication works", { 2 | mock_success <- function(req) { 3 | httr2::response( 4 | status_code = 200, 5 | headers = "Content-Type: application/json", 6 | body = charToRaw("{ 7 | \"access_token\": \"clt.example12345Example12345Example\", 8 | \"expires_in\": 7200, 9 | \"token_type\": \"Bearer\" 10 | }")) 11 | } 12 | 13 | Sys.setenv("TIKTOK_TOKEN" = "test.rds") 14 | 15 | expect_equal( 16 | httr2::with_mocked_responses( 17 | mock_success, 18 | req_token(client_key = "test", client_secret = "test") 19 | ), 20 | list(access_token = "clt.example12345Example12345Example", 21 | expires_in = 7200L, 22 | token_type = "Bearer") 23 | ) 24 | 25 | expect_equal( 26 | httr2::with_mocked_responses( 27 | mock_success, 28 | auth_research(client_key = "test", client_secret = "test")$token_type 29 | ), 30 | "Bearer" 31 | ) 32 | 33 | expect_true(file.exists(file.path(tools::R_user_dir("traktok", "cache"), "test.rds"))) 34 | 35 | expect_equal(get_token()$access_token, httr2::obfuscated("clt.example12345Example12345Example")) 36 | 37 | on.exit(file.remove(file.path(tools::R_user_dir("traktok", "cache"), "test.rds"))) 38 | }) 39 | 40 | 41 | test_that("auth error", { 42 | mock_error <- function(req) { 43 | httr2::response( 44 | status_code = 500, 45 | headers = "Content-Type: application/json", 46 | body = charToRaw("{ 47 | \"error\": \"invalid_request\", 48 | \"error_description\": \"Client secret is missed in request.\", 49 | \"log_id\": \"202206221854370101130062072500FFA2\" 50 | }")) 51 | } 52 | expect_error({ 53 | httr2::with_mocked_responses( 54 | mock_error, 55 | req_token(client_key = "test", client_secret = "test") 56 | ) 57 | }, 58 | "Request failed with" 59 | ) 60 | 61 | }) 62 | -------------------------------------------------------------------------------- /tests/testthat/test-tt_utils.R: -------------------------------------------------------------------------------- 1 | # will be rewritten soon anyway 2 | # test_that("1. cookies as string options", { 3 | # options(tt_cookiefile = "tt_csrf_token=test;") 4 | # expect_equal(auth_hidden(save = FALSE), list(tt_csrf_token = "test;")) 5 | # unlink(list.files(tools::R_user_dir("traktok", "config"), full.names = TRUE)) 6 | # }) 7 | # 8 | # test_that("2. default cookie file", { 9 | # tmp <- tempfile() 10 | # options(tt_cookiefile = tmp) 11 | # writeLines("\t\t\t\t\ttt_csrf_token\ttest;", tmp) 12 | # expect_equal(auth_hidden(save = FALSE), 13 | # list(tt_csrf_token = "test;")) 14 | # }) 15 | # 16 | # test_that("3. default directory", { 17 | # options(tt_cookiefile = NULL) 18 | # tmp <- file.path(tools::R_user_dir("traktok", "config"), "aaa") 19 | # writeLines("\t\t\t\t\ttt_csrf_token\ttest;", tmp) 20 | # expect_equal(auth_hidden(save = FALSE), 21 | # list(tt_csrf_token = "test;")) 22 | # unlink(list.files(tools::R_user_dir("traktok", "config"), full.names = TRUE)) 23 | # }) 24 | # 25 | # test_that("4. no/invalid cookies", { 26 | # options(tt_cookiefile = NULL) 27 | # expect_error(auth_hidden(save = FALSE), 28 | # "No cookies provided or found") 29 | # expect_error(auth_hidden(x = "test"), 30 | # "No cookies provided or found") 31 | # }) 32 | # 33 | # test_that("5. invalid cookie string/file", { 34 | # expect_error(auth_hidden(x = "test=test;"), 35 | # " does not contain valid TikTok cookies") 36 | # 37 | # expect_error(auth_hidden(x = list()), 38 | # " does not contain valid TikTok cookies") 39 | # 40 | # tmp <- tempfile() 41 | # writeLines("\t\t\t\t\ttest\ttest;", tmp) 42 | # expect_error(auth_hidden(x = tmp), 43 | # " does not contain valid TikTok cookies") 44 | # }) 45 | # 46 | # 47 | # test_that("vpluck", { 48 | # expect_equal( 49 | # vpluck(list(list(c("A", NA)), list(NULL)), 1, 1), 50 | # c("A", NA_character_) 51 | # ) 52 | # expect_equal( 53 | # vpluck(list(list(c("A", NA)), list(NULL)), 1, 2), 54 | # c(NA_character_, NA_character_) 55 | # ) 56 | # expect_equal( 57 | # vpluck(list(list(c(1L, NA)), list(NULL)), 1, 1, val = "integer"), 58 | # c(1L, NA_integer_) 59 | # ) 60 | # expect_equal( 61 | # vpluck(list(list(c(TRUE, NA)), list(NULL)), 1, 1, val = "logical"), 62 | # c(TRUE, NA) 63 | # ) 64 | # }) 65 | -------------------------------------------------------------------------------- /tests/testthat/test-tt_videos.R: -------------------------------------------------------------------------------- 1 | test_that("get meta and download", { 2 | skip("need to rewrite after refactor") 3 | options(tt_cookiefile = Sys.getenv("TT_COOKIES")) 4 | df <- tt_videos(video_urls = c("https://www.tiktok.com/@tiktok/video/6584647400055377158?is_copy_url=1&is_from_webapp=v1", 5 | "https://www.tiktok.com/@tiktok/video/6584647400055377158?is_copy_url=1&is_from_webapp=v1"), 6 | cache_dir = tempdir(), 7 | save_video = FALSE, 8 | dir = tempdir()) 9 | expect_equal(nrow(df), 2L) 10 | expect_equal(ncol(df), 16L) 11 | # expect_equal(file.exists(df[["video_fn"]][1]), TRUE) 12 | expect_equal(file.exists(paste0(tempdir(), "/video_meta_6584647400055377158.rds")), TRUE) 13 | expect_lte(sum(is.na(df)), 2L) 14 | expect_warning(tt_videos("https://www.tiktok.com/"), 15 | "https://www.tiktok.com/ can't be reached.") 16 | expect_warning(tt_videos("https://www.tiktok.com/@test/video/6"), 17 | "html status 404, the row will contain NAs") 18 | }) 19 | 20 | 21 | test_that("parse", { 22 | expect_warning(parse_video('{"test":1}', video_id = 1L), 23 | "No video data found") 24 | expect_equal( 25 | dim(parse_video('{"ItemModule":{"test":1}}', video_id = 1L)), 26 | c(1L, 18L) 27 | ) 28 | expect_equal( 29 | dim(parse_video('{"__DEFAULT_SCOPE__":{"webapp.video-detail":{"itemInfo":{"itemStruct":{"test":1}}}}}', video_id = 1L)), 30 | c(1L, 22L) 31 | ) 32 | }) 33 | -------------------------------------------------------------------------------- /traktok.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/cookies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JBGruber/traktok/d97f4ea5fb4f728aba1f64f9f71f5740bd626442/vignettes/cookies.png -------------------------------------------------------------------------------- /vignettes/function_overview.csv: -------------------------------------------------------------------------------- 1 | Description,Shorthand,Research API,Hidden API 2 | search videos,tt_search,tt_search_api,tt_search_hidden 3 | get video detail (+file),tt_videos,-,tt_videos_hidden 4 | get user infos,tt_user_info,tt_user_info_api,- 5 | get comments under a video,tt_comments,tt_comments_api,- 6 | get who follows a user,tt_get_follower,-,tt_get_follower_hidden 7 | get who a user is following,tt_get_following,-,tt_get_following_hidden 8 | get raw video data,-,-,tt_request_hidden 9 | authenticate a session,-,auth_research,auth_hidden 10 | -------------------------------------------------------------------------------- /vignettes/make_vignettes.r: -------------------------------------------------------------------------------- 1 | knitr::knit("vignettes/unofficial-api.Rmd.orig", output = "vignettes/unofficial-api.Rmd") 2 | knitr::knit("vignettes/research-api.Rmd.orig", output = "vignettes/research-api.Rmd") 3 | -------------------------------------------------------------------------------- /vignettes/research-api.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Research API" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{research-api} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | 11 | 12 | TikTok's [Research API](https://developers.tiktok.com/products/research-api/), which was made available to researchers in the US and Europe in 2023, offers three endpoints, which are wrapped in three `traktok` functions: 13 | 14 | 1. You can [search videos](https://developers.tiktok.com/doc/research-api-specs-query-videos) with `tt_search_api` or `tt_search` 15 | 2. You can [get basic user information](https://developers.tiktok.com/doc/research-api-specs-query-user-info) with `tt_user_info_api` or `tt_user_info` 16 | 3. You can [obtain all comments of a video](https://developers.tiktok.com/doc/research-api-specs-query-video-comments) with `tt_comments_api` or `tt_comments` 17 | 4. You can [get the videos a user has liked](https://developers.tiktok.com/doc/research-api-specs-query-user-liked-videos) with `tt_user_liked_videos_api` or `tt_get_liked` 18 | 5. You can [get the videos a user has reposted](https://developers.tiktok.com/doc/research-api-specs-query-user-reposted-videos) with `tt_user_reposted_api` or `tt_get_reposted` 19 | 6. You can [get the videos a user has pinned](https://developers.tiktok.com/doc/research-api-specs-query-user-pinned-videos) with `tt_user_pinned_videos_api` or `tt_get_pinned` 20 | 7. You can [get who a user is following](https://developers.tiktok.com/doc/research-api-specs-query-user-following) or [follows a user](https://developers.tiktok.com/doc/research-api-specs-query-user-following) with `tt_get_following`/`tt_get_follower` or `tt_user_following_api`/`tt_user_follower_api` 21 | 22 | 23 | # Authentication 24 | 25 | To get access to the Research API, you need to: 26 | 27 | 1. [be eligible](https://developers.tiktok.com/products/research-api); 28 | 2. [create a developer account](https://developers.tiktok.com/signup); 29 | 3. and then apply for access to the research API: 30 | 31 | Once you have been approved and have your client key and client secret, you can authenticate with: 32 | 33 | 34 | ``` r 35 | library(traktok) 36 | auth_research() 37 | ``` 38 | 39 | 40 | 41 | It is recommended that you run this function only once without arguments, so that your key and secret can be entered through the pop up mask and do not remain unencrypted in your R history or a script. 42 | The function then runs through authentication for you and saves the resulting token encrypted on your hard drive. 43 | Just run it again in case your credentials change. 44 | 45 | 46 | # Usage 47 | ## Search Videos 48 | 49 | TikTok uses a fine-grained, yet complicated [query syntax](https://developers.tiktok.com/doc/research-api-specs-query-videos#query). 50 | For convenience, a query is constructed internally when you search with a key phrase directly: 51 | 52 | 53 | ``` r 54 | tt_query_videos("#rstats", max_pages = 2L) 55 | #> 56 | ℹ Making initial request 57 | 58 | ✔ Making initial request [774ms] 59 | #> 60 | ℹ Parsing data 61 | 62 | ✔ Parsing data [177ms] 63 | #> ── search id: NA ─────────────────────────────────────── 64 | #> # A tibble: 0 × 13 65 | #> # ℹ 13 variables: video_id , author_name , 66 | #> # view_count , comment_count , 67 | #> # share_count , like_count , 68 | #> # region_code , create_time , 69 | #> # effect_ids , music_id , 70 | #> # video_description , hashtag_names , 71 | #> # voice_to_text 72 | ``` 73 | 74 | This will match your keyword or phrase against keywords and hashtags and return up to 200 results (each page has 100 results and 2 pages are requested by default) from today and yesterday. 75 | Every whitespace is treated as an AND operator. 76 | To extend the data range, you can set a start and end (which can be a maximum of 30 days apart, but there is no limit how far you can go back): 77 | 78 | 79 | ``` r 80 | tt_query_videos("#rstats", 81 | max_pages = 2L, 82 | start_date = as.Date("2023-11-01"), 83 | end_date = as.Date("2023-11-29")) 84 | #> 85 | ℹ Making initial request 86 | 87 | ✔ Making initial request [2s] 88 | #> 89 | ℹ Parsing data 90 | 91 | ✔ Parsing data [63ms] 92 | #> ── search id: 7423432753447932974 ────────────────────── 93 | #> # A tibble: 19 × 13 94 | #> video_id author_name view_count comment_count 95 | #> 96 | #> 1 730689385329705… statistics… 909 4 97 | #> 2 730630774458222… learningca… 1104 11 98 | #> 3 730501447636800… picanumeros 4645 8 99 | #> 4 730297066790799… smooth.lea… 98717 17 100 | #> 5 730247037950160… statistics… 508 0 101 | #> 6 730097749816510… statistics… 27387 1 102 | #> 7 730093147605973… rigochando 2603 4 103 | #> 8 730092229522312… elartedeld… 765 0 104 | #> 9 729998705941704… statistics… 1110 1 105 | #> 10 729965751681473… rigochando 905 4 106 | #> 11 729934294487885… rigochando 555 0 107 | #> 12 729896668413454… rigochando 1312 1 108 | #> 13 729691148659145… biofreelan… 19758 7 109 | #> 14 729691148625178… biofreelan… 5763 1 110 | #> 15 729691147878174… biofreelan… 1019 3 111 | #> 16 729668885660947… mrpecners 657 2 112 | #> 17 729651863537426… l_a_kelly 514 5 113 | #> 18 729649864535081… mrpecners 373 0 114 | #> 19 729628884337898… casaresfel… 274 0 115 | #> # ℹ 9 more variables: share_count , 116 | #> # like_count , region_code , 117 | #> # create_time , effect_ids , 118 | #> # music_id , video_description , 119 | #> # hashtag_names , voice_to_text 120 | ``` 121 | 122 | As said, the query syntax that TikTok uses is a little complicated, as you can use AND, OR and NOT boolean operators on a number of fields (`"create_date"`, `"username"`, `"region_code"`, `"video_id"`, `"hashtag_name"`, `"keyword"`, `"music_id"`, `"effect_id"`, and `"video_length"`): 123 | 124 | | Operator | Results are returned if... | 125 | | -------- | ------------------------------------------ | 126 | | AND | ...all specified conditions are met | 127 | | OR | ...any of the specified conditions are met | 128 | | NOT | ...the not conditions are not met | 129 | 130 | To make this easier to use, `traktok` uses a tidyverse style approach to building queries. 131 | For example, to get to the same query that matches #rstats against keywords and hashtags, you need to build the query like this: 132 | 133 | 134 | ``` r 135 | query() |> # start by using query() 136 | query_or(field_name = "hashtag_name", # add an OR condition on the hashtag field 137 | operation = "IN", # the value should be IN the list of hashtags 138 | field_values = "rstats") |> # the hashtag field does not accept the #-symbol 139 | query_or(field_name = "keyword", # add another OR condition 140 | operation = "IN", 141 | field_values = "#rstats") 142 | #> S3 143 | #> └─or: 144 | #> ├─ 145 | #> │ ├─field_name: "hashtag_name" 146 | #> │ ├─operation: "IN" 147 | #> │ └─field_values: 148 | #> │ └─"rstats" 149 | #> └─ 150 | #> ├─field_name: "keyword" 151 | #> ├─operation: "IN" 152 | #> └─field_values: 153 | #> └─"#rstats" 154 | ``` 155 | 156 | If #rstats is found in either the hashtag or keywords of a video, that video is then returned. 157 | Besides checking for `EQ`ual, you can also use one of the other operations: 158 | 159 | | Operation | Results are returned if field_values are... | 160 | | --------- | ----------------------------------------------- | 161 | | EQ | equal to the value in the field | 162 | | IN | equal to a value in the field | 163 | | GT | greater than the value in the field | 164 | | GTE | greater than or equal to the value in the field | 165 | | LT | lower than the value in the field | 166 | | LTE | lower than or equal to the value in the field | 167 | 168 | 169 | This makes building queries relatively complex, but allows for fine-grained searches in the TikTok data: 170 | 171 | 172 | ``` r 173 | search_df <- query() |> 174 | query_and(field_name = "region_code", 175 | operation = "IN", 176 | field_values = c("JP", "US")) |> 177 | query_or(field_name = "hashtag_name", 178 | operation = "EQ", # rstats is the only hashtag 179 | field_values = "rstats") |> 180 | query_or(field_name = "keyword", 181 | operation = "IN", # rstats is one of the keywords 182 | field_values = "rstats") |> 183 | query_not(operation = "EQ", 184 | field_name = "video_length", 185 | field_values = "SHORT") |> 186 | tt_search_api(start_date = as.Date("2023-11-01"), 187 | end_date = as.Date("2023-11-29")) 188 | #> 189 | ℹ Making initial request 190 | 191 | ✔ Making initial request [1.1s] 192 | #> 193 | ℹ Parsing data 194 | 195 | ✔ Parsing data [59ms] 196 | search_df 197 | #> ── search id: 7423432753447965742 ────────────────────── 198 | #> # A tibble: 2 × 13 199 | #> video_id author_name view_count comment_count 200 | #> 201 | #> 1 7296688856609475… mrpecners 657 2 202 | #> 2 7296498645350812… mrpecners 373 0 203 | #> # ℹ 9 more variables: share_count , 204 | #> # like_count , region_code , 205 | #> # create_time , effect_ids , 206 | #> # music_id , video_description , 207 | #> # hashtag_names , voice_to_text 208 | ``` 209 | 210 | This will return videos posted in the US or Japan, that have rstats as the only hashtag or as one of the keywords and have a length of `"MID"`, `"LONG"`, or `"EXTRA_LONG"`.^[ 211 | See for possible values of each field. 212 | ] 213 | 214 | ## Get User Information 215 | 216 | There is not really much to getting basic user info, but this is how you can do it: 217 | 218 | 219 | ``` r 220 | tt_user_info_api(username = c("tiktok", "https://www.tiktok.com/@statisticsglobe")) 221 | #> 222 | ℹ Getting user tiktok 223 | 224 | ✔ Got user tiktok [508ms] 225 | #> 226 | ℹ Getting user statisticsglobe 227 | 228 | ✔ Got user statisticsglobe [518ms] 229 | #> # A tibble: 2 × 8 230 | #> is_verified likes_count video_count avatar_url 231 | #> 232 | #> 1 TRUE 330919903 1073 https://p16-pu-si… 233 | #> 2 FALSE 1660 92 https://p16-sign-… 234 | #> # ℹ 4 more variables: bio_description , 235 | #> # display_name , follower_count , 236 | #> # following_count 237 | ``` 238 | 239 | If you wish to return the videos of a user, your can use the search again: 240 | 241 | 242 | ``` r 243 | query() |> 244 | query_and(field_name = "username", 245 | operation = "EQ", 246 | field_values = "statisticsglobe") |> 247 | tt_search_api(start_date = as.Date("2023-11-01"), 248 | end_date = as.Date("2023-11-29")) 249 | #> 250 | ℹ Making initial request 251 | 252 | ✔ Making initial request [872ms] 253 | #> 254 | ℹ Parsing data 255 | 256 | ✔ Parsing data [65ms] 257 | #> ── search id: 7423432753448064046 ────────────────────── 258 | #> # A tibble: 5 × 13 259 | #> video_id author_name view_count comment_count 260 | #> 261 | #> 1 7306893853297052… statistics… 909 4 262 | #> 2 7302470379501604… statistics… 508 0 263 | #> 3 7300977498165103… statistics… 27387 1 264 | #> 4 7299987059417042… statistics… 1110 1 265 | #> 5 7297389484524506… statistics… 538 2 266 | #> # ℹ 9 more variables: share_count , 267 | #> # like_count , region_code , 268 | #> # create_time , effect_ids , 269 | #> # music_id , video_description , 270 | #> # hashtag_names , voice_to_text 271 | ``` 272 | 273 | You can also find the videos a user has pinned to the top of their page: 274 | 275 | 276 | ``` r 277 | tt_user_pinned_videos_api(c("tiktok", "https://www.tiktok.com/@smooth.learning.c")) 278 | #> 279 | ℹ Getting user tiktok 280 | 281 | ✖ Getting user tiktok [367ms] 282 | #> 283 | ℹ Getting user smooth.learning.c 284 | 285 | ✔ Got user smooth.learning.c [571ms] 286 | #> # A tibble: 1 × 14 287 | #> pinned_by_user create_time id is_stem_verified 288 | #> 289 | #> 1 smooth.learning.c 1690255097 725959… FALSE 290 | #> # ℹ 10 more variables: region_code , 291 | #> # video_duration , view_count , 292 | #> # video_description , comment_count , 293 | #> # hashtag_names , like_count , 294 | #> # music_id , share_count , username 295 | ``` 296 | 297 | 298 | To find out what a user has liked, you can use: 299 | 300 | 301 | ``` r 302 | tt_get_liked("jbgruber") 303 | #> 304 | ℹ Getting user jbgruber 305 | 306 | ✔ Got user jbgruber [1.5s] 307 | #> # A tibble: 98 × 14 308 | #> id username create_time video_description 309 | #> 310 | #> 1 7355902326877… america… 1712679503 "Stitch with @Mr… 311 | #> 2 7268078476102… carterp… 1692231398 "Are you going t… 312 | #> 3 7419692903460… okbrune… 1727531892 "Die ganze Wahrh… 313 | #> 4 7405633113835… funny_s… 1724258332 "#fyp #fypシ #fu… 314 | #> 5 7398532172048… lib0160… 1722605019 "Me and ChatGPT … 315 | #> 6 7364763547038… vquasch… 1714742648 "Einige Medien u… 316 | #> 7 7346577913858… ct_3003 1710508473 "Diese Platine f… 317 | #> 8 7379856141972… lizthed… 1718256663 "Replying to @Ar… 318 | #> 9 7415189182865… felixba… 1726483284 "Es geht wieder … 319 | #> 10 7422673042553… grueneb… 1728225752 "Was Söder uns e… 320 | #> # ℹ 88 more rows 321 | #> # ℹ 10 more variables: region_code , 322 | #> # video_duration , view_count , 323 | #> # like_count , comment_count , 324 | #> # share_count , music_id , 325 | #> # hashtag_names , is_stem_verified , 326 | #> # liked_by_user 327 | ``` 328 | 329 | Note, that making likes public is an opt-in feature of TikTok and almost nobody has this enabled, so it will give you a lot of warning... 330 | 331 | What we can usually get is the information who a user follows: 332 | 333 | 334 | ``` r 335 | tt_user_following_api(username = "jbgruber") 336 | #> 337 | ℹ Getting user jbgruber 338 | 339 | ✔ Got user jbgruber [296ms] 340 | #> # A tibble: 19 × 3 341 | #> display_name username following_user 342 | #> 343 | #> 1 SohoBrody rudeboybrody jbgruber 344 | #> 2 Last Week Tonight lastweektonight… jbgruber 345 | #> 3 schlantologie schlantologie jbgruber 346 | #> 4 Alex Falcone alex_falcone jbgruber 347 | #> 5 dadNRG dadnrg jbgruber 348 | #> 6 Einfach Genial Tictok user22690086508… jbgruber 349 | #> 7 noir_concrete_studio noir_concrete_s… jbgruber 350 | #> 8 fatDumbledore fatdumbledore13… jbgruber 351 | #> 9 fragdenstaat.de fragdenstaat.de jbgruber 352 | #> 10 Erikadbka erikadbka jbgruber 353 | #> 11 BÜNDNIS 90/DIE GRÜNEN diegruenen jbgruber 354 | #> 12 lagedernationclips lagedernationcl… jbgruber 355 | #> 13 Alexandra Ils kitty.fantastico jbgruber 356 | #> 14 future infinitive ☸️ lizthedeveloper jbgruber 357 | #> 15 Tim Achtermeyer achtermeyer jbgruber 358 | #> 16 Jay Foreman jayforeman jbgruber 359 | #> 17 Cosmo whereiswanda jbgruber 360 | #> 18 Tim Walz timwalz jbgruber 361 | #> 19 Shahak Shapira shahakshapira jbgruber 362 | ``` 363 | 364 | And who they are followed by: 365 | 366 | 367 | ``` r 368 | tt_user_follower_api("https://www.tiktok.com/@tiktok") 369 | #> 370 | ℹ Getting user tiktok 371 | 372 | ✔ Got user tiktok [442ms] 373 | #> # A tibble: 90 × 3 374 | #> username display_name following_user 375 | #> 376 | #> 1 galbruwt reeyyp tiktok 377 | #> 2 user5235623178011 👑কিং রানা 🥀 tiktok 378 | #> 3 rokyevay07 👑Rokye Vay👑 tiktok 379 | #> 4 babyylious08 babyylious08 tiktok 380 | #> 5 user8283823357 hd❤️‍🩹jaan❤️‍🩹hi❤️‍🩹❤️ tiktok 381 | #> 6 user45628309141722 سامي tiktok 382 | #> 7 nu.th085 Nâu Thị tiktok 383 | #> 8 halimeysll halimeysll tiktok 384 | #> 9 taru.tristiyanto Taru Tristiyanto tiktok 385 | #> 10 vng.lan.hng09 Vương Lan Hường tiktok 386 | #> # ℹ 80 more rows 387 | ``` 388 | 389 | 390 | ## Obtain all Comments of a Video 391 | 392 | There is again, not much to talk about when it comes to the comments API. 393 | You need to supply a video ID, which you either have already: 394 | 395 | 396 | ``` r 397 | tt_comments_api(video_id = "7302470379501604128") 398 | #> 399 | ℹ Making initial request 400 | 401 | ✔ Making initial request [4.9s] 402 | #> 403 | ℹ Parsing data 404 | 405 | ✔ Parsing data [68ms] 406 | #> ── search id: ───────────────────────────────────────── 407 | #> # A tibble: 1 × 7 408 | #> create_time id like_count parent_comment_id 409 | #> 410 | #> 1 1700243424 730248974199… 0 7302470379501604… 411 | #> # ℹ 3 more variables: reply_count , text , 412 | #> # video_id 413 | ``` 414 | 415 | Or you got it from a search: 416 | 417 | 418 | ``` r 419 | tt_comments_api(video_id = search_df$video_id[1]) 420 | #> 421 | ℹ Making initial request 422 | 423 | ✔ Making initial request [4.8s] 424 | #> 425 | ℹ Parsing data 426 | 427 | ✔ Parsing data [61ms] 428 | #> ── search id: ───────────────────────────────────────── 429 | #> # A tibble: 2 × 7 430 | #> create_time id like_count parent_comment_id 431 | #> 432 | #> 1 1698893206 729669068138… 1 7296688856609475… 433 | #> 2 1698893251 729669083429… 0 7296690681388204… 434 | #> # ℹ 3 more variables: reply_count , text , 435 | #> # video_id 436 | ``` 437 | 438 | Or you let the function extract if from a URL to a video: 439 | 440 | 441 | ``` r 442 | tt_comments_api(video_id = "https://www.tiktok.com/@nicksinghtech/video/7195762648716152107?q=%23rstats") 443 | #> 444 | ℹ Making initial request 445 | 446 | ✔ Making initial request [5.9s] 447 | #> 448 | ℹ Parsing data 449 | 450 | ✔ Parsing data [58ms] 451 | #> ── search id: ───────────────────────────────────────── 452 | #> # A tibble: 96 × 7 453 | #> text video_id create_time id like_count 454 | #> 455 | #> 1 You gotta know… 7195762… 1675394834 7195… 314 456 | #> 2 R is the goat … 7195762… 1675457114 7196… 232 457 | #> 3 Ppl who like E… 7195762… 1675458796 7196… 177 458 | #> 4 Fair but doesn… 7195762… 1675395061 7195… 166 459 | #> 5 babe RStudio i… 7195762… 1675624739 7196… 71 460 | #> 6 Excel is the b… 7195762… 1675465779 7196… 71 461 | #> 7 NOT THE SAS SL… 7195762… 1675494738 7196… 27 462 | #> 8 I won't take t… 7195762… 1675691471 7197… 17 463 | #> 9 No love for ST… 7195762… 1675656122 7196… 16 464 | #> 10 I use SAS 🫡 7195762… 1675440749 7195… 16 465 | #> # ℹ 86 more rows 466 | #> # ℹ 2 more variables: parent_comment_id , 467 | #> # reply_count 468 | ``` 469 | 470 | And that is essentially it. 471 | Note, that if you find the functionality of the Research API lacking, there is nothing that keeps you from using the unofficial API functions. 472 | 473 | 474 | # Dealing with rate limits and continuing old searches 475 | 476 | At the moment of writing this vignette, the TikTok rate limits the Research API as follows: 477 | 478 | > Currently, the daily limit is set at 1000 requests per day, allowing you to obtain up to 100,000 records per day across our APIs. (Video and Comments API can return 100 records per request). The daily quota gets reset at 12 AM UTC. [[Source](https://developers.tiktok.com/doc/research-api-faq?enter_method=left_navigation)] 479 | 480 | Depending on what you would like to do, this might not be enough for you. 481 | In this case, you can actually save a search and pick it back up after the reset. 482 | To facilitate this, search result objects contain two extra pieces of information in the attributes: 483 | 484 | 485 | ``` r 486 | search_df <- query() |> 487 | query_and(field_name = "region_code", 488 | operation = "IN", 489 | field_values = c("JP", "US")) |> 490 | tt_search_api(start_date = as.Date("2023-11-01"), 491 | end_date = as.Date("2023-11-29"), 492 | max_pages = 1) 493 | #> 494 | ℹ Making initial request 495 | 496 | ✔ Making initial request [2.4s] 497 | #> 498 | ℹ Parsing data 499 | 500 | ✔ Parsing data [71ms] 501 | 502 | attr(search_df, "search_id") 503 | #> [1] "7423432753448096814" 504 | attr(search_df, "cursor") 505 | #> [1] 100 506 | ``` 507 | 508 | When you want to continue this search, whether because of rate limit or because you decided you want more results, you can do so by providing `search_id` and `cursor` to `tt_search_api`. 509 | If your search was cut short by the rate limit or another issue, you can retrieve the results already received with `search_df <- last_query()`. 510 | `search_df` will in both cases contain the relevant `search_id` and `cursor` in the attributes: 511 | 512 | 513 | ``` r 514 | search_df2 <- query() |> 515 | query_and(field_name = "region_code", 516 | operation = "IN", 517 | field_values = c("JP", "US")) |> 518 | tt_search_api(start_date = as.Date("2023-11-01"), 519 | end_date = as.Date("2023-11-29"), 520 | 521 | # this part is new 522 | start_cursor = attr(search_df, "cursor"), 523 | search_id = attr(search_df, "search_id"), 524 | #### 525 | max_pages = 1) 526 | #> 527 | ℹ Making initial request 528 | 529 | ✔ Making initial request [5.1s] 530 | #> 531 | ℹ Parsing data 532 | 533 | ✔ Parsing data [21ms] 534 | attr(search_df2, "search_id") 535 | #> [1] "7336340473470063662" 536 | attr(search_df2, "cursor") 537 | #> [1] 200 538 | ``` 539 | 540 | Note that the cursor is not equal to how many videos you got before, as the API also counts videos that are "deleted/marked as private by users etc." [See `max_count` in [Query Videos](https://developers.tiktok.com/doc/research-api-specs-query-videos)]. 541 | 542 | -------------------------------------------------------------------------------- /vignettes/research-api.Rmd.orig: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Research API" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{research-api} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>", 14 | eval = TRUE 15 | ) 16 | ``` 17 | 18 | TikTok's [Research API](https://developers.tiktok.com/products/research-api/), which was made available to researchers in the US and Europe in 2023, offers three endpoints, which are wrapped in three `traktok` functions: 19 | 20 | 1. You can [search videos](https://developers.tiktok.com/doc/research-api-specs-query-videos) with `tt_search_api` or `tt_search` 21 | 2. You can [get basic user information](https://developers.tiktok.com/doc/research-api-specs-query-user-info) with `tt_user_info_api` or `tt_user_info` 22 | 3. You can [obtain all comments of a video](https://developers.tiktok.com/doc/research-api-specs-query-video-comments) with `tt_comments_api` or `tt_comments` 23 | 4. You can [get the videos a user has liked](https://developers.tiktok.com/doc/research-api-specs-query-user-liked-videos) with `tt_user_liked_videos_api` or `tt_get_liked` 24 | 5. You can [get the videos a user has reposted](https://developers.tiktok.com/doc/research-api-specs-query-user-reposted-videos) with `tt_user_reposted_api` or `tt_get_reposted` 25 | 6. You can [get the videos a user has pinned](https://developers.tiktok.com/doc/research-api-specs-query-user-pinned-videos) with `tt_user_pinned_videos_api` or `tt_get_pinned` 26 | 7. You can [get who a user is following](https://developers.tiktok.com/doc/research-api-specs-query-user-following) or [follows a user](https://developers.tiktok.com/doc/research-api-specs-query-user-following) with `tt_get_following`/`tt_get_follower` or `tt_user_following_api`/`tt_user_follower_api` 27 | 28 | 29 | # Authentication 30 | 31 | To get access to the Research API, you need to: 32 | 33 | 1. [be eligible](https://developers.tiktok.com/products/research-api); 34 | 2. [create a developer account](https://developers.tiktok.com/signup); 35 | 3. and then apply for access to the research API: 36 | 37 | Once you have been approved and have your client key and client secret, you can authenticate with: 38 | 39 | ```{r eval=FALSE} 40 | library(traktok) 41 | auth_research() 42 | ``` 43 | 44 | ```{r echo=FALSE} 45 | library(traktok) 46 | ``` 47 | 48 | It is recommended that you run this function only once without arguments, so that your key and secret can be entered through the pop up mask and do not remain unencrypted in your R history or a script. 49 | The function then runs through authentication for you and saves the resulting token encrypted on your hard drive. 50 | Just run it again in case your credentials change. 51 | 52 | 53 | # Usage 54 | ## Search Videos 55 | 56 | TikTok uses a fine-grained, yet complicated [query syntax](https://developers.tiktok.com/doc/research-api-specs-query-videos#query). 57 | For convenience, a query is constructed internally when you search with a key phrase directly: 58 | 59 | ```{r} 60 | tt_query_videos("#rstats", max_pages = 2L) 61 | ``` 62 | 63 | This will match your keyword or phrase against keywords and hashtags and return up to 200 results (each page has 100 results and 2 pages are requested by default) from today and yesterday. 64 | Every whitespace is treated as an AND operator. 65 | To extend the data range, you can set a start and end (which can be a maximum of 30 days apart, but there is no limit how far you can go back): 66 | 67 | ```{r} 68 | tt_query_videos("#rstats", 69 | max_pages = 2L, 70 | start_date = as.Date("2023-11-01"), 71 | end_date = as.Date("2023-11-29")) 72 | ``` 73 | 74 | As said, the query syntax that TikTok uses is a little complicated, as you can use AND, OR and NOT boolean operators on a number of fields (`"create_date"`, `"username"`, `"region_code"`, `"video_id"`, `"hashtag_name"`, `"keyword"`, `"music_id"`, `"effect_id"`, and `"video_length"`): 75 | 76 | | Operator | Results are returned if... | 77 | | -------- | ------------------------------------------ | 78 | | AND | ...all specified conditions are met | 79 | | OR | ...any of the specified conditions are met | 80 | | NOT | ...the not conditions are not met | 81 | 82 | To make this easier to use, `traktok` uses a tidyverse style approach to building queries. 83 | For example, to get to the same query that matches #rstats against keywords and hashtags, you need to build the query like this: 84 | 85 | ```{r} 86 | query() |> # start by using query() 87 | query_or(field_name = "hashtag_name", # add an OR condition on the hashtag field 88 | operation = "IN", # the value should be IN the list of hashtags 89 | field_values = "rstats") |> # the hashtag field does not accept the #-symbol 90 | query_or(field_name = "keyword", # add another OR condition 91 | operation = "IN", 92 | field_values = "#rstats") 93 | ``` 94 | 95 | If #rstats is found in either the hashtag or keywords of a video, that video is then returned. 96 | Besides checking for `EQ`ual, you can also use one of the other operations: 97 | 98 | | Operation | Results are returned if field_values are... | 99 | | --------- | ----------------------------------------------- | 100 | | EQ | equal to the value in the field | 101 | | IN | equal to a value in the field | 102 | | GT | greater than the value in the field | 103 | | GTE | greater than or equal to the value in the field | 104 | | LT | lower than the value in the field | 105 | | LTE | lower than or equal to the value in the field | 106 | 107 | 108 | This makes building queries relatively complex, but allows for fine-grained searches in the TikTok data: 109 | 110 | ```{r} 111 | search_df <- query() |> 112 | query_and(field_name = "region_code", 113 | operation = "IN", 114 | field_values = c("JP", "US")) |> 115 | query_or(field_name = "hashtag_name", 116 | operation = "EQ", # rstats is the only hashtag 117 | field_values = "rstats") |> 118 | query_or(field_name = "keyword", 119 | operation = "IN", # rstats is one of the keywords 120 | field_values = "rstats") |> 121 | query_not(operation = "EQ", 122 | field_name = "video_length", 123 | field_values = "SHORT") |> 124 | tt_search_api(start_date = as.Date("2023-11-01"), 125 | end_date = as.Date("2023-11-29")) 126 | search_df 127 | ``` 128 | 129 | This will return videos posted in the US or Japan, that have rstats as the only hashtag or as one of the keywords and have a length of `"MID"`, `"LONG"`, or `"EXTRA_LONG"`.^[ 130 | See for possible values of each field. 131 | ] 132 | 133 | ## Get User Information 134 | 135 | There is not really much to getting basic user info, but this is how you can do it: 136 | 137 | ```{r} 138 | tt_user_info_api(username = c("tiktok", "https://www.tiktok.com/@statisticsglobe")) 139 | ``` 140 | 141 | If you wish to return the videos of a user, your can use the search again: 142 | 143 | ```{r} 144 | query() |> 145 | query_and(field_name = "username", 146 | operation = "EQ", 147 | field_values = "statisticsglobe") |> 148 | tt_search_api(start_date = as.Date("2023-11-01"), 149 | end_date = as.Date("2023-11-29")) 150 | ``` 151 | 152 | You can also find the videos a user has pinned to the top of their page: 153 | 154 | ```{r} 155 | tt_user_pinned_videos_api(c("tiktok", "https://www.tiktok.com/@smooth.learning.c")) 156 | ``` 157 | 158 | 159 | To find out what a user has liked, you can use: 160 | 161 | ```{r} 162 | tt_get_liked("jbgruber") 163 | ``` 164 | 165 | Note, that making likes public is an opt-in feature of TikTok and almost nobody has this enabled, so it will give you a lot of warning... 166 | 167 | What we can usually get is the information who a user follows: 168 | 169 | ```{r} 170 | tt_user_following_api(username = "jbgruber") 171 | ``` 172 | 173 | And who they are followed by: 174 | 175 | ```{r} 176 | tt_user_follower_api("https://www.tiktok.com/@tiktok") 177 | ``` 178 | 179 | 180 | ## Obtain all Comments of a Video 181 | 182 | There is again, not much to talk about when it comes to the comments API. 183 | You need to supply a video ID, which you either have already: 184 | 185 | ```{r} 186 | tt_comments_api(video_id = "7302470379501604128") 187 | ``` 188 | 189 | Or you got it from a search: 190 | 191 | ```{r} 192 | tt_comments_api(video_id = search_df$video_id[1]) 193 | ``` 194 | 195 | Or you let the function extract if from a URL to a video: 196 | 197 | ```{r} 198 | tt_comments_api(video_id = "https://www.tiktok.com/@nicksinghtech/video/7195762648716152107?q=%23rstats") 199 | ``` 200 | 201 | And that is essentially it. 202 | Note, that if you find the functionality of the Research API lacking, there is nothing that keeps you from using the unofficial API functions. 203 | 204 | 205 | # Dealing with rate limits and continuing old searches 206 | 207 | At the moment of writing this vignette, the TikTok rate limits the Research API as follows: 208 | 209 | > Currently, the daily limit is set at 1000 requests per day, allowing you to obtain up to 100,000 records per day across our APIs. (Video and Comments API can return 100 records per request). The daily quota gets reset at 12 AM UTC. [[Source](https://developers.tiktok.com/doc/research-api-faq?enter_method=left_navigation)] 210 | 211 | Depending on what you would like to do, this might not be enough for you. 212 | In this case, you can actually save a search and pick it back up after the reset. 213 | To facilitate this, search result objects contain two extra pieces of information in the attributes: 214 | 215 | ```{r} 216 | search_df <- query() |> 217 | query_and(field_name = "region_code", 218 | operation = "IN", 219 | field_values = c("JP", "US")) |> 220 | tt_search_api(start_date = as.Date("2023-11-01"), 221 | end_date = as.Date("2023-11-29"), 222 | max_pages = 1) 223 | 224 | attr(search_df, "search_id") 225 | attr(search_df, "cursor") 226 | ``` 227 | 228 | When you want to continue this search, whether because of rate limit or because you decided you want more results, you can do so by providing `search_id` and `cursor` to `tt_search_api`. 229 | If your search was cut short by the rate limit or another issue, you can retrieve the results already received with `search_df <- last_query()`. 230 | `search_df` will in both cases contain the relevant `search_id` and `cursor` in the attributes: 231 | 232 | ```{r} 233 | search_df2 <- query() |> 234 | query_and(field_name = "region_code", 235 | operation = "IN", 236 | field_values = c("JP", "US")) |> 237 | tt_search_api(start_date = as.Date("2023-11-01"), 238 | end_date = as.Date("2023-11-29"), 239 | 240 | # this part is new 241 | start_cursor = attr(search_df, "cursor"), 242 | search_id = attr(search_df, "search_id"), 243 | #### 244 | max_pages = 1) 245 | attr(search_df2, "search_id") 246 | attr(search_df2, "cursor") 247 | ``` 248 | 249 | Note that the cursor is not equal to how many videos you got before, as the API also counts videos that are "deleted/marked as private by users etc." [See `max_count` in [Query Videos](https://developers.tiktok.com/doc/research-api-specs-query-videos)]. 250 | 251 | -------------------------------------------------------------------------------- /vignettes/secuid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JBGruber/traktok/d97f4ea5fb4f728aba1f64f9f71f5740bd626442/vignettes/secuid.png -------------------------------------------------------------------------------- /vignettes/source-code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JBGruber/traktok/d97f4ea5fb4f728aba1f64f9f71f5740bd626442/vignettes/source-code.png -------------------------------------------------------------------------------- /vignettes/unofficial-api.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Unofficial API" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{unofficial-api} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | 11 | 12 | The unofficial or hidden API is essentially what the TikTok website uses to display you content. 13 | Partly based on [Deen Freelon's `Pyktok` Python module](https://github.com/dfreelon/pyktok), `traktok` contains functions to simulate a browser accessing some of these API endpoints. 14 | How these endpoints work was discovered through reverse engineering and TikTok might change how these endpoints operate at any moment. 15 | As of writing this (2023-11-28), there are functions that can: 16 | 17 | - search videos using a search term 18 | - get video details and the video files from a given video URL 19 | - get who follows a user 20 | - get who a user is following 21 | 22 | To use these functions, you have to log into first and then give `R` the cookies the browser uses to identify itself. 23 | 24 | 25 | # Authentication 26 | 27 | The easiest way to get the cookies needed for authentication is to export the necessary cookies from your browser using a browser extension (after logging in at TikTok.com at least once). 28 | I can recommend ["Get cookies.txt"](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) for Chromium based browsers or ["cookies.txt"](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) for Firefox (note that almost all browsers used today are based on one of these). 29 | 30 | 31 | 32 | Save the cookies.txt file, which will look something like this: 33 | 34 | ``` 35 | # Netscape HTTP Cookie File 36 | # https://curl.haxx.se/rfc/cookie_spec.html 37 | # This is a generated file! Do not edit. 38 | 39 | .tiktok.com TRUE / TRUE 1728810805 cookie-consent {%22ga%22:true%2C%22af%... 40 | .tiktok.com TRUE / TRUE 1700471788 passport_csrf_token e07d3487c11ce5258a3... 41 | .tiktok.com TRUE / FALSE 1700471788 passport_csrf_token_default e07d3487c11... 42 | #HttpOnly_.tiktok.com TRUE / TRUE 1700493610 multi_sids 71573310862246389... 43 | #HttpOnly_.tiktok.com TRUE / TRUE 1700493610 cmpl_token AgQQAPORF-RO0rNtH... 44 | ... 45 | ``` 46 | 47 | **It does not matter if you download all cookies or just the ones specific to TikTok**, as we use the `cookiemonster` package to deal with that. 48 | To read the cookies into a specific encrypted file, simply use: 49 | 50 | 51 | ```r 52 | cookiemonster::add_cookies("tiktok.com_cookies.txt") 53 | ``` 54 | 55 | And that's it! 56 | `traktok` will access these cookies whenever necessary. 57 | 58 | 59 | # Usage 60 | ## Search videos 61 | 62 | To search for videos, you can use either `tt_search` or `tt_search_hidden`, which do the same, as long as you do not have a token for the Research API. 63 | To get the first two pages of search results (one page has 12 videos), you can use this command: 64 | 65 | 66 | ```r 67 | rstats_df <- tt_search_hidden("#rstats", max_pages = 2) 68 | #> 69 | ℹ Getting page 1 70 | ⏲ waiting 0.5 seconds 71 | ℹ Getting page 1 72 | ✔ Got page 1. Found 12 videos. [1.9s] 73 | #> 74 | ℹ Getting page 2 75 | ✔ Got page 2. Found 12 videos. [690ms] 76 | rstats_df 77 | #> # A tibble: 24 × 20 78 | #> video_id video_timestamp video_url video_length video_title 79 | #> 80 | #> 1 71151144… 2022-06-30 19:17:53 https://… 135 "R for Beg… 81 | #> 2 72522261… 2023-07-05 07:01:45 https://… 36 "Wow!!! TH… 82 | #> 3 72420686… 2023-06-07 22:05:16 https://… 34 "R GRAPHIC… 83 | #> 4 72134135… 2023-03-22 16:49:12 https://… 6 "R and me … 84 | #> 5 72576898… 2023-07-20 00:23:40 https://… 56 "Pie chart… 85 | #> 6 72999870… 2023-11-10 23:58:21 https://… 51 "Quick R Q… 86 | #> 7 72783048… 2023-09-13 13:40:21 https://… 36 "Quick R Q… 87 | #> 8 73029706… 2023-11-19 00:56:09 https://… 163 "What is c… 88 | #> 9 71670108… 2022-11-17 15:42:56 https://… 58 "Here’s an… 89 | #> 10 72933174… 2023-10-24 00:36:48 https://… 9 "#CapCut #… 90 | #> # ℹ 14 more rows 91 | #> # ℹ 15 more variables: video_diggcount , 92 | #> # video_sharecount , video_commentcount , 93 | #> # video_playcount , video_is_ad , author_name , 94 | #> # author_nickname , author_followercount , 95 | #> # author_followingcount , author_heartcount , 96 | #> # author_videocount , author_diggcount , … 97 | ``` 98 | 99 | This already gives you pretty much all information you could want about the videos that were found. 100 | 101 | ## Get metadata and download videos 102 | 103 | However, you can obtain some more information, and importantly the video file, using `tt_videos`: 104 | 105 | 106 | ```r 107 | rstats_df2 <- tt_videos(rstats_df$video_url[1:2], save_video = TRUE) 108 | #> 109 | ℹ Getting video 7115114419314560298 110 | ⏲ waiting 0.2 seconds 111 | ℹ Getting video 7115114419314560298 112 | ✔ Got video 7115114419314560298 (1/2). File size: 2.5 Mb. [2.5s] 113 | #> 114 | ℹ Getting video 7252226153828584731 115 | ✔ Got video 7252226153828584731 (2/2). File size: 1.7 Mb. [999ms] 116 | rstats_df2 117 | #> # A tibble: 2 × 19 118 | #> video_id video_url video_timestamp video_length video_title 119 | #> 120 | #> 1 711511441… https://… 2022-06-30 19:17:53 135 R for Begi… 121 | #> 2 725222615… https://… 2023-07-05 07:01:45 36 Wow!!! THI… 122 | #> # ℹ 14 more variables: video_locationcreated , 123 | #> # video_diggcount , video_sharecount , 124 | #> # video_commentcount , video_playcount , 125 | #> # author_username , author_nickname , 126 | #> # author_bio , download_url , html_status , 127 | #> # music , challenges , is_classified , 128 | #> # video_fn 129 | ``` 130 | 131 | Per default, the function waits between one and ten seconds (chosen at random) between making two calls, to not make it too obvious that data is scraped from TikTok. 132 | You can speed up the process (at your own risk), by changing the `sleep_pool` argument, which controls the minimum and maximum number of seconds to wait: 133 | 134 | 135 | ```r 136 | rstats_df3 <- tt_videos(rstats_df$video_url[3:4], save_video = TRUE, sleep_pool = 0.1) 137 | #> 138 | ℹ Getting video 7242068680484408581 139 | ⏲ waiting 0.1 seconds 140 | ℹ Getting video 7242068680484408581 141 | ✔ Got video 7242068680484408581 (1/2). File size: 1.8 Mb. [2.6s] 142 | #> 143 | ℹ Getting video 7213413598998056234 144 | ✔ Got video 7213413598998056234 (2/2). File size: 598.1 Kb. [1.7s] 145 | rstats_df3 146 | #> # A tibble: 2 × 19 147 | #> video_id video_url video_timestamp video_length video_title 148 | #> 149 | #> 1 724206868… https://… 2023-06-07 22:05:16 34 "R GRAPHIC… 150 | #> 2 721341359… https://… 2023-03-22 16:49:12 6 "R and me … 151 | #> # ℹ 14 more variables: video_locationcreated , 152 | #> # video_diggcount , video_sharecount , 153 | #> # video_commentcount , video_playcount , 154 | #> # author_username , author_nickname , 155 | #> # author_bio , download_url , html_status , 156 | #> # music , challenges , is_classified , 157 | #> # video_fn 158 | ``` 159 | 160 | When you are scraping a lot of URLs, the function might fail eventually, due to a poor connection or because TikTok is blocking your requests. 161 | It therefore usually makes sense to save your progress in a cache directory: 162 | 163 | 164 | ```r 165 | rstats_df3 <- tt_videos(rstats_df$video_url[5:6], cache_dir = "rstats") 166 | #> 167 | ℹ Getting video 7257689890245201153 168 | ⏲ waiting 1.7 seconds 169 | ℹ Getting video 7257689890245201153 170 | ✔ Got video 7257689890245201153 (1/2). File size: 1.7 Mb. [2.6s] 171 | #> 172 | ℹ Getting video 7299987059417042209 173 | ✔ Got video 7299987059417042209 (2/2). File size: 1.2 Mb. [1.8s] 174 | list.files("rstats") 175 | #> [1] "7257689890245201153.json" "7299987059417042209.json" 176 | ``` 177 | 178 | Note that the video files are downloaded into the `dir` directory (your working directory by default), independently from your cache directory. 179 | 180 | If there are information that you feel are missing from the `data.frame` `tt_videos` returns, you can look at the raw, unparsed json data using: 181 | 182 | 183 | ```r 184 | rstats_list1 <- tt_request_hidden(rstats_df$video_url[1]) |> 185 | jsonlite::fromJSON() 186 | ``` 187 | 188 | Parsing the result into a list using `fromJSON`, results in a rather complex nested list. 189 | You can look through this and see for yourself if the data you are interested in is there 190 | 191 | ## Get followers and who a user is following 192 | 193 | Getting followers and who a user is following is (at the moment?) a little tricky to use, since TikTok blocks requests to a users profile page with anti-scraping measures. 194 | To circumvent that, you can open a users page in your browser and then right-click to show the source code:^[ 195 | You can of course also use tools like [Playwright](https://playwright.dev/), [Selenium](https://www.selenium.dev/), or [](https://pptr.dev/) to automate this process. 196 | But that is beyond the scope of this vignette and package. 197 | ] 198 | 199 | 200 | ![](source-code.png) 201 | 202 | You can then search for and copy the `authorSecId` value: 203 | 204 | ![](secuid.png) 205 | 206 | Once you have this `authorSecId` you can look up a maximum of 5,000 followers per account: 207 | 208 | 209 | ```r 210 | tt_get_follower(secuid = "MS4wLjABAAAAwiH32UMb5RenqEN7duyfLIeGQgSIx9WtgtOILt55q6ueUXgz4gHqZC5HFx4nabPi", 211 | verbose = FALSE) 212 | #> 213 | #> # A tibble: 1,116 × 27 214 | #> avatarLarger avatarMedium avatarThumb commentSetting 215 | #> 216 | #> 1 https://p16-sign-sg.tik… https://p16… https://p1… 0 217 | #> 2 https://p16-sign-va.tik… https://p16… https://p1… 0 218 | #> 3 https://p16-sign-va.tik… https://p16… https://p1… 0 219 | #> 4 https://p16-sign-va.tik… https://p16… https://p1… 0 220 | #> 5 https://p16-sign-va.tik… https://p16… https://p1… 0 221 | #> 6 https://p16-sign-va.tik… https://p16… https://p1… 0 222 | #> 7 https://p16-sign-va.tik… https://p16… https://p1… 0 223 | #> 8 https://p16-sign-va.tik… https://p16… https://p1… 0 224 | #> 9 https://p16-sign-va.tik… https://p16… https://p1… 0 225 | #> 10 https://p16-sign-va.tik… https://p16… https://p1… 0 226 | #> # ℹ 1,106 more rows 227 | #> # ℹ 23 more variables: downloadSetting , duetSetting , 228 | #> # ftc , id , isADVirtual , nickname , 229 | #> # openFavorite , privateAccount , relation , 230 | #> # secUid , secret , signature , 231 | #> # stitchSetting , ttSeller , uniqueId , 232 | #> # verified , diggCount , followerCount , … 233 | ``` 234 | 235 | Likewise, you can also check who this account follows: 236 | 237 | 238 | ```r 239 | tt_get_following(secuid = "MS4wLjABAAAAwiH32UMb5RenqEN7duyfLIeGQgSIx9WtgtOILt55q6ueUXgz4gHqZC5HFx4nabPi", 240 | verbose = FALSE) 241 | #> 242 | #> # A tibble: 489 × 28 243 | #> avatarLarger avatarMedium avatarThumb commentSetting 244 | #> 245 | #> 1 https://p16-sign-va.tik… https://p16… https://p1… 0 246 | #> 2 https://p16-sign-va.tik… https://p16… https://p1… 0 247 | #> 3 https://p16-sign-va.tik… https://p16… https://p1… 0 248 | #> 4 https://p16-sign-va.tik… https://p16… https://p1… 0 249 | #> 5 https://p16-sign-va.tik… https://p16… https://p1… 0 250 | #> 6 https://p16-sign-va.tik… https://p16… https://p1… 0 251 | #> 7 https://p16-sign-va.tik… https://p16… https://p1… 0 252 | #> 8 https://p16-sign-va.tik… https://p16… https://p1… 0 253 | #> 9 https://p16-sign-va.tik… https://p16… https://p1… 0 254 | #> 10 https://p16-sign-va.tik… https://p16… https://p1… 0 255 | #> # ℹ 479 more rows 256 | #> # ℹ 24 more variables: downloadSetting , duetSetting , 257 | #> # ftc , id , isADVirtual , nickname , 258 | #> # openFavorite , privateAccount , relation , 259 | #> # secUid , secret , signature , 260 | #> # stitchSetting , ttSeller , uniqueId , 261 | #> # verified , diggCount , followerCount , … 262 | ``` 263 | 264 | 265 | 266 | ```r 267 | list.files(pattern = ".mp4") |> 268 | unlink() 269 | unlink("rstats", recursive = TRUE) 270 | ``` 271 | 272 | -------------------------------------------------------------------------------- /vignettes/unofficial-api.Rmd.orig: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Unofficial API" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{unofficial-api} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>", 14 | eval = TRUE 15 | ) 16 | ``` 17 | 18 | The unofficial or hidden API is essentially what the TikTok website uses to display you content. 19 | Partly based on [Deen Freelon's `Pyktok` Python module](https://github.com/dfreelon/pyktok), `traktok` contains functions to simulate a browser accessing some of these API endpoints. 20 | How these endpoints work was discovered through reverse engineering and TikTok might change how these endpoints operate at any moment. 21 | As of writing this (2023-11-28), there are functions that can: 22 | 23 | - search videos using a search term 24 | - get video details and the video files from a given video URL 25 | - get who follows a user 26 | - get who a user is following 27 | 28 | To use these functions, you have to log into first and then give `R` the cookies the browser uses to identify itself. 29 | 30 | 31 | # Authentication 32 | 33 | The easiest way to get the cookies needed for authentication is to export the necessary cookies from your browser using a browser extension (after logging in at TikTok.com at least once). 34 | I can recommend ["Get cookies.txt"](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) for Chromium based browsers or ["cookies.txt"](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) for Firefox (note that almost all browsers used today are based on one of these). 35 | 36 | `r knitr::include_graphics("cookies.png")` 37 | 38 | Save the cookies.txt file, which will look something like this: 39 | 40 | ``` 41 | # Netscape HTTP Cookie File 42 | # https://curl.haxx.se/rfc/cookie_spec.html 43 | # This is a generated file! Do not edit. 44 | 45 | .tiktok.com TRUE / TRUE 1728810805 cookie-consent {%22ga%22:true%2C%22af%... 46 | .tiktok.com TRUE / TRUE 1700471788 passport_csrf_token e07d3487c11ce5258a3... 47 | .tiktok.com TRUE / FALSE 1700471788 passport_csrf_token_default e07d3487c11... 48 | #HttpOnly_.tiktok.com TRUE / TRUE 1700493610 multi_sids 71573310862246389... 49 | #HttpOnly_.tiktok.com TRUE / TRUE 1700493610 cmpl_token AgQQAPORF-RO0rNtH... 50 | ... 51 | ``` 52 | 53 | **It does not matter if you download all cookies or just the ones specific to TikTok**, as we use the `cookiemonster` package to deal with that. 54 | To read the cookies into a specific encrypted file, simply use: 55 | 56 | ```{r eval=FALSE} 57 | cookiemonster::add_cookies("tiktok.com_cookies.txt") 58 | ``` 59 | 60 | And that's it! 61 | `traktok` will access these cookies whenever necessary. 62 | 63 | 64 | # Usage 65 | ## Search videos 66 | 67 | To search for videos, you can use either `tt_search` or `tt_search_hidden`, which do the same, as long as you do not have a token for the Research API. 68 | To get the first two pages of search results (one page has 12 videos), you can use this command: 69 | 70 | ```{r} 71 | rstats_df <- tt_search_hidden("#rstats", max_pages = 2) 72 | rstats_df 73 | ``` 74 | 75 | This already gives you pretty much all information you could want about the videos that were found. 76 | 77 | ## Get metadata and download videos 78 | 79 | However, you can obtain some more information, and importantly the video file, using `tt_videos`: 80 | 81 | ```{r} 82 | rstats_df2 <- tt_videos(rstats_df$video_url[1:2], save_video = TRUE) 83 | rstats_df2 84 | ``` 85 | 86 | Per default, the function waits between one and ten seconds (chosen at random) between making two calls, to not make it too obvious that data is scraped from TikTok. 87 | You can speed up the process (at your own risk), by changing the `sleep_pool` argument, which controls the minimum and maximum number of seconds to wait: 88 | 89 | ```{r} 90 | rstats_df3 <- tt_videos(rstats_df$video_url[3:4], save_video = TRUE, sleep_pool = 0.1) 91 | rstats_df3 92 | ``` 93 | 94 | When you are scraping a lot of URLs, the function might fail eventually, due to a poor connection or because TikTok is blocking your requests. 95 | It therefore usually makes sense to save your progress in a cache directory: 96 | 97 | ```{r} 98 | rstats_df3 <- tt_videos(rstats_df$video_url[5:6], cache_dir = "rstats") 99 | list.files("rstats") 100 | ``` 101 | 102 | Note that the video files are downloaded into the `dir` directory (your working directory by default), independently from your cache directory. 103 | 104 | If there are information that you feel are missing from the `data.frame` `tt_videos` returns, you can look at the raw, unparsed json data using: 105 | 106 | ```{r eval=FALSE} 107 | rstats_list1 <- tt_request_hidden(rstats_df$video_url[1]) |> 108 | jsonlite::fromJSON() 109 | ``` 110 | 111 | Parsing the result into a list using `fromJSON`, results in a rather complex nested list. 112 | You can look through this and see for yourself if the data you are interested in is there 113 | 114 | ## Get followers and who a user is following 115 | 116 | Getting followers and who a user is following is (at the moment?) a little tricky to use, since TikTok blocks requests to a users profile page with anti-scraping measures. 117 | To circumvent that, you can open a users page in your browser and then right-click to show the source code:^[ 118 | You can of course also use tools like [Playwright](https://playwright.dev/), [Selenium](https://www.selenium.dev/), or [](https://pptr.dev/) to automate this process. 119 | But that is beyond the scope of this vignette and package. 120 | ] 121 | 122 | 123 | ![](source-code.png) 124 | 125 | You can then search for and copy the `authorSecId` value: 126 | 127 | ![](secuid.png) 128 | 129 | Once you have this `authorSecId` you can look up a maximum of 5,000 followers per account: 130 | 131 | ```{r} 132 | tt_get_follower(secuid = "MS4wLjABAAAAwiH32UMb5RenqEN7duyfLIeGQgSIx9WtgtOILt55q6ueUXgz4gHqZC5HFx4nabPi", 133 | verbose = FALSE) 134 | ``` 135 | 136 | Likewise, you can also check who this account follows: 137 | 138 | ```{r} 139 | tt_get_following(secuid = "MS4wLjABAAAAwiH32UMb5RenqEN7duyfLIeGQgSIx9WtgtOILt55q6ueUXgz4gHqZC5HFx4nabPi", 140 | verbose = FALSE) 141 | ``` 142 | 143 | 144 | ```{r} 145 | list.files(pattern = ".mp4") |> 146 | unlink() 147 | unlink("rstats", recursive = TRUE) 148 | ``` 149 | 150 | --------------------------------------------------------------------------------