├── .Rbuildignore
├── .github
├── .gitignore
└── workflows
│ ├── R-CMD-check.yaml
│ ├── pkgdown.yaml
│ └── still-working.yaml
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
├── api_hidden.r
├── api_research.r
├── api_research_extended.r
├── auth_check.r
├── auth_hidden.r
├── auth_research.r
├── last_.r
├── parse_hidden.r
├── parse_research.r
├── query_research.r
├── shorthands.r
└── utils.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── codecov.yml
├── inst
├── CITATION
└── WORDLIST
├── man
├── auth_check.Rd
├── auth_hidden.Rd
├── auth_research.Rd
├── figures
│ ├── api-both.svg
│ ├── api-research.svg
│ ├── api-unofficial.svg
│ └── logo.png
├── last_query.Rd
├── print.traktok_query.Rd
├── print.tt_results.Rd
├── query.Rd
├── tt_comments_api.Rd
├── tt_get_follower.Rd
├── tt_get_following_hidden.Rd
├── tt_json.Rd
├── tt_playlist_api.Rd
├── tt_request_hidden.Rd
├── tt_search.Rd
├── tt_search_api.Rd
├── tt_search_hidden.Rd
├── tt_user_follower_api.Rd
├── tt_user_info_api.Rd
├── tt_user_info_hidden.Rd
├── tt_user_liked_videos_api.Rd
├── tt_user_pinned_videos_api.Rd
├── tt_user_reposted_api.Rd
├── tt_user_videos.Rd
├── tt_user_videos_api.Rd
├── tt_user_videos_hidden.Rd
└── tt_videos_hidden.Rd
├── tests
├── spelling.R
├── testthat.R
└── testthat
│ ├── example_query.json
│ ├── example_request.json
│ ├── example_resp_comments.json
│ ├── example_resp_q_user.json
│ ├── example_resp_q_videos.json
│ ├── test-research_api.R
│ ├── test-research_auth.R
│ ├── test-tt_utils.R
│ └── test-tt_videos.R
├── traktok.Rproj
└── vignettes
├── .gitignore
├── cookies.png
├── function_overview.csv
├── make_vignettes.r
├── research-api.Rmd
├── research-api.Rmd.orig
├── secuid.png
├── source-code.png
├── unofficial-api.Rmd
└── unofficial-api.Rmd.orig
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | tiktok.com_cookies.txt
4 | ^README\.Rmd$
5 | ^codecov\.yml$
6 | ^\.github$
7 | ^doc$
8 | ^Meta$
9 | ^_pkgdown\.yml$
10 | ^docs$
11 | ^pkgdown$
12 |
--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 |
9 | name: R-CMD-check
10 |
11 | jobs:
12 | R-CMD-check:
13 | runs-on: ${{ matrix.config.os }}
14 |
15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 |
17 | strategy:
18 | fail-fast: false
19 | matrix:
20 | config:
21 | - {os: macOS-latest, r: 'release'}
22 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
23 | - {os: ubuntu-latest, r: 'release'}
24 |
25 | env:
26 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
27 | R_KEEP_PKG_SOURCE: yes
28 | TT_COOKIES: ${{ secrets.TT_COOKIES }}
29 |
30 | steps:
31 | - uses: actions/checkout@v2
32 |
33 | - uses: r-lib/actions/setup-pandoc@v2
34 |
35 | - uses: r-lib/actions/setup-r@v2
36 | with:
37 | r-version: ${{ matrix.config.r }}
38 | http-user-agent: ${{ matrix.config.http-user-agent }}
39 | use-public-rspm: true
40 |
41 | - uses: r-lib/actions/setup-r-dependencies@v2
42 | with:
43 | extra-packages: any::rcmdcheck
44 | needs: check
45 |
46 | - uses: r-lib/actions/check-r-package@v2
47 | with:
48 | upload-snapshots: true
49 |
50 | - name: Test coverage
51 | run: covr::codecov()
52 | shell: Rscript {0}
53 |
54 |
55 | - name: Upload check results
56 | if: failure()
57 | uses: actions/upload-artifact@main
58 | with:
59 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results
60 | path: check
61 |
--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 | release:
9 | types: [published]
10 | workflow_dispatch:
11 |
12 | name: pkgdown
13 |
14 | jobs:
15 | pkgdown:
16 | runs-on: ubuntu-latest
17 | # Only restrict concurrency for non-PR jobs
18 | concurrency:
19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 | env:
21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 | permissions:
23 | contents: write
24 | steps:
25 | - uses: actions/checkout@v4
26 |
27 | - uses: r-lib/actions/setup-pandoc@v2
28 |
29 | - uses: r-lib/actions/setup-r@v2
30 | with:
31 | use-public-rspm: true
32 |
33 | - uses: r-lib/actions/setup-r-dependencies@v2
34 | with:
35 | extra-packages: any::pkgdown, local::.
36 | needs: website
37 |
38 | - name: Build site
39 | run: |
40 | options(rmarkdown.html_vignette.check_title = FALSE)
41 | pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
42 | shell: Rscript {0}
43 |
44 | - name: Deploy to GitHub pages 🚀
45 | if: github.event_name != 'pull_request'
46 | uses: JamesIves/github-pages-deploy-action@v4.5.0
47 | with:
48 | clean: false
49 | branch: gh-pages
50 | folder: docs
51 |
--------------------------------------------------------------------------------
/.github/workflows/still-working.yaml:
--------------------------------------------------------------------------------
1 | # reduced workflow to test if the package still works or if TikTok has
2 | # changed anything
3 | on:
4 | schedule:
5 | - cron: '30 10 * * *'
6 |
7 | name: Still-Working?
8 |
9 | jobs:
10 | R-CMD-check:
11 | runs-on: ${{ matrix.config.os }}
12 |
13 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
14 |
15 | strategy:
16 | fail-fast: false
17 | matrix:
18 | config:
19 | - {os: ubuntu-latest, r: 'release'}
20 |
21 | env:
22 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
23 | R_KEEP_PKG_SOURCE: yes
24 | TT_COOKIES: ${{ secrets.TT_COOKIES }}
25 |
26 | steps:
27 | - uses: actions/checkout@v2
28 |
29 | - uses: r-lib/actions/setup-r@v2
30 | with:
31 | r-version: ${{ matrix.config.r }}
32 | http-user-agent: ${{ matrix.config.http-user-agent }}
33 | use-public-rspm: true
34 |
35 | - uses: r-lib/actions/setup-r-dependencies@v2
36 | with:
37 | extra-packages: any::rcmdcheck
38 | needs: check
39 |
40 | - uses: r-lib/actions/check-r-package@v2
41 | with:
42 | upload-snapshots: true
43 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | tiktok.com_cookies.txt
6 | .Rproj.user
7 | .Renviron
8 | inst/doc
9 | /doc/
10 | /Meta/
11 | www.tiktok.com_cookies.txt
12 | cookies.txt
13 | test.r
14 | docs
15 | pkgdown
16 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: traktok
2 | Type: Package
3 | Title: Collecting TikTok Data
4 | Version: 0.0.7.9000
5 | Authors@R:
6 | person(given = "Johannes B.",
7 | family = "Gruber",
8 | email = "JohannesB.Gruber@gmail.com",
9 | role = c("aut", "cre"),
10 | comment = c(ORCID = "0000-0001-9177-1772"))
11 | Description: Getting TikTok data () through the official
12 | and unofficial APIs—in other words, you can track TikTok. Originally a port
13 | of Deen Freelon's Pyktok () Python module.
14 | License: GPL-3
15 | Depends:
16 | R (>= 4.1.0)
17 | Imports:
18 | askpass,
19 | cli,
20 | cookiemonster,
21 | curl,
22 | dplyr,
23 | glue,
24 | httr2,
25 | jsonlite,
26 | lobstr,
27 | methods,
28 | openssl,
29 | purrr,
30 | rlang,
31 | rvest,
32 | stats,
33 | tibble
34 | Suggests:
35 | covr,
36 | knitr,
37 | rmarkdown,
38 | spelling,
39 | testthat (>= 3.0.0)
40 | URL: https://github.com/JBGruber/traktok, https://jbgruber.github.io/traktok/
41 | BugReports: https://github.com/JBGruber/traktok/issues
42 | Encoding: UTF-8
43 | RoxygenNote: 7.3.2
44 | Language: en-GB
45 | Config/testthat/edition: 3
46 | Config/testthat/parallel: false
47 | VignetteBuilder: knitr
48 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method(print,traktok_query)
4 | S3method(print,tt_results)
5 | export(auth_check)
6 | export(auth_hidden)
7 | export(auth_research)
8 | export(last_comments)
9 | export(last_query)
10 | export(query)
11 | export(query_and)
12 | export(query_not)
13 | export(query_or)
14 | export(tt_comments)
15 | export(tt_comments_api)
16 | export(tt_get_follower)
17 | export(tt_get_follower_hidden)
18 | export(tt_get_following)
19 | export(tt_get_following_hidden)
20 | export(tt_get_liked)
21 | export(tt_get_pinned)
22 | export(tt_get_reposted)
23 | export(tt_json)
24 | export(tt_playlist)
25 | export(tt_playlist_api)
26 | export(tt_query_videos)
27 | export(tt_request_hidden)
28 | export(tt_search)
29 | export(tt_search_api)
30 | export(tt_search_hidden)
31 | export(tt_user_follower_api)
32 | export(tt_user_following_api)
33 | export(tt_user_info)
34 | export(tt_user_info_api)
35 | export(tt_user_info_hidden)
36 | export(tt_user_liked_videos_api)
37 | export(tt_user_pinned_videos_api)
38 | export(tt_user_reposted_api)
39 | export(tt_user_videos)
40 | export(tt_user_videos_api)
41 | export(tt_user_videos_hidden)
42 | export(tt_videos)
43 | export(tt_videos_hidden)
44 | importFrom(rlang,.data)
45 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # traktok 0.0.7.9000
2 |
3 | * overhauls tt_user_info_hidden (some breaking changes as names in the output have changed)
4 |
5 | # traktok 0.0.6.9000
6 |
7 | * adds access to additional Research API endpoints (tt_user_liked_videos_api, tt_user_pinned_videos_api, tt_user_follower_api, tt_user_following_api, tt_user_reposted_api, and tt_playlist_info_api)
8 | * tt_videos_hidden now supports Video IDs
9 | * adds tt_user_videos_api, a wrapper around tt_search_api to query user videos
10 |
11 | # traktok 0.0.5.9000
12 |
13 | * adds experimental tt_user_videos_hidden and tt_user_info_hidden that rely on chromote
14 |
--------------------------------------------------------------------------------
/R/api_hidden.r:
--------------------------------------------------------------------------------
1 | #' Get video metadata and video files from URLs
2 | #'
3 | #' @description \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on:
4 | #' Unofficial API]'}}{\strong{[Works on: Unofficial API]}}
5 | #'
6 | #' @param video_urls vector of URLs or IDs to TikTok videos.
7 | #' @param save_video logical. Should the videos be downloaded.
8 | #' @param overwrite logical. If save_video=TRUE and the file already exists,
9 | #' should it be overwritten?
10 | #' @param dir directory to save videos files to.
11 | #' @param cache_dir if set to a path, one RDS file with metadata will be written
12 | #' to disk for each video. This is useful if you have many videos and want to
13 | #' pick up where you left if something goes wrong.
14 | #' @param sleep_pool a vector of numbers from which a waiting period is randomly
15 | #' drawn.
16 | #' @param max_tries how often to retry if a request fails.
17 | #' @param cookiefile path to your cookiefile. Usually not needed after running
18 | #' \link{auth_hidden} once. See \code{vignette("unofficial-api", package =
19 | #' "traktok")} for more information on authentication.
20 | #' @param verbose should the function print status updates to the screen?
21 | #' @param ... handed to \code{tt_videos_hidden} (for tt_videos) and (further) to
22 | #' \link{tt_request_hidden}.
23 | #'
24 | #' @details The function will wait between scraping two videos to make it less
25 | #' obvious that a scraper is accessing the site. The period is drawn randomly
26 | #' from the `sleep_pool` and multiplied by a random fraction.
27 | #'
28 | #' @details Note that the video file has to be requested in the same session as
29 | #' the metadata. So while the URL to the video file is included in the
30 | #' metadata, this link will not work in most cases.
31 | #'
32 | #'
33 | #' @return a data.frame
34 | #' @export
35 | #'
36 | #' @examples
37 | #' \dontrun{
38 | #' tt_videos("https://www.tiktok.com/@tiktok/video/7106594312292453675")
39 | #' }
40 | tt_videos_hidden <- function(video_urls,
41 | save_video = TRUE,
42 | overwrite = FALSE,
43 | dir = ".",
44 | cache_dir = NULL,
45 | sleep_pool = 1:10,
46 | max_tries = 5L,
47 | cookiefile = NULL,
48 | verbose = TRUE,
49 | ...) {
50 |
51 | video_urls <- unique(video_urls)
52 | n_urls <- length(video_urls)
53 | video_urls <- id2url(video_urls)
54 |
55 | if (verbose) cli::cli_alert_info("Getting {n_urls} unique link{?s}")
56 | if (!is.null(cookiefile)) cookiemonster::add_cookies(cookiefile)
57 | cookies <- cookiemonster::get_cookies("^(www.)*tiktok.com", as = "string")
58 | f_name <- ""
59 |
60 | check_dir(dir, "dir")
61 | check_dir(cache_dir, "cache_dir")
62 |
63 | dplyr::bind_rows(purrr::map(video_urls, function(u) {
64 | video_id <- extract_regex(
65 | u,
66 | "(?<=/video/)(.+?)(?=\\?|$)|(?<=/photo/)(.+?)(?=\\?|$)|(?<=https://vm.tiktok.com/).+?(?=/|$)"
67 | )
68 | i <- which(u == video_urls)
69 | done_msg <- ""
70 | if (verbose) cli::cli_progress_step(
71 | "Getting video {video_id}",
72 | msg_done = "Got video {video_id} ({i}/{n_urls}). {done_msg}"
73 | )
74 |
75 | the$retries <- 5L
76 | video_dat <- get_video(url = u,
77 | video_id = video_id,
78 | overwrite = overwrite,
79 | cache_dir = cache_dir,
80 | max_tries = max_tries,
81 | cookies = cookies,
82 | verbose = verbose)
83 |
84 | if (isTRUE(video_dat$video_status_code == 0L)) {
85 | if (save_video) {
86 | if (!isTRUE(video_dat$is_slides)) {
87 | video_fn <- file.path(dir, paste0(video_dat$author_username, "_video_",
88 | video_dat$video_id, ".mp4"))
89 |
90 | f_name <- save_video(video_dat = video_dat,
91 | video_fn = video_fn,
92 | overwrite = overwrite,
93 | max_tries = max_tries,
94 | cookies = cookies)
95 |
96 | f_size <- file.size(f_name)
97 | if (isTRUE(f_size > 1000)) {
98 | done_msg <- glue::glue("File size: {utils:::format.object_size(f_size, 'auto')}.")
99 | } else {
100 | cli::cli_warn("Video {video_id} has a very small file size (less than 1kB) and is likely corrupt.")
101 | }
102 | video_dat$video_fn <- video_fn
103 | } else { # for slides
104 | download_urls <- strsplit(video_dat$download_url, ", ", fixed = TRUE) |>
105 | unlist()
106 | video_fns <- file.path(dir, paste0(video_dat$author_username,
107 | "_video_",
108 | video_dat$video_id,
109 | "_",
110 | seq_along(download_urls),
111 | ".jpeg"))
112 | purrr::walk2(download_urls, video_fns, function(u, f) {
113 | curl::curl_download(url = u, destfile = f, quiet = TRUE)
114 | })
115 | }
116 | }
117 | }
118 |
119 | if (all(i != n_urls, !isTRUE(the$skipped))) {
120 | wait(sleep_pool, verbose)
121 | }
122 | the$skipped <- FALSE # reset skipped
123 |
124 | return(video_dat)
125 | }))
126 |
127 | }
128 |
129 |
130 | #' @noRd
131 | get_video <- function(url,
132 | video_id,
133 | overwrite,
134 | cache_dir,
135 | max_tries,
136 | cookies,
137 | verbose) {
138 |
139 | json_fn <- ""
140 | if (!is.null(cache_dir)) json_fn <- file.path(cache_dir,
141 | paste0(video_id, ".json"))
142 |
143 | if (overwrite || !file.exists(json_fn)) {
144 | tt_json <- tt_request_hidden(url, max_tries = max_tries)
145 | if (!is.null(cache_dir)) writeLines(tt_json, json_fn, useBytes = TRUE)
146 | } else {
147 | tt_json <- readChar(json_fn, nchars = file.size(json_fn), useBytes = TRUE)
148 | # TODO: not ideal as not consistent with request
149 | attr(tt_json,"url_full") <- url
150 | attr(tt_json,"html_status") <- 200L
151 | the$skipped <- TRUE
152 | }
153 | # make sure json can be parsed, otherwise retry
154 | out <- try(parse_video(tt_json, video_id), silent = TRUE)
155 | if (methods::is(out, "try-error") && the$retries > 0) {
156 | the$retries <- the$retries - 1
157 | out <- get_video(url,
158 | video_id,
159 | overwrite = TRUE, # most common reason for failure here is a malformed cached json
160 | cache_dir,
161 | max_tries,
162 | cookies,
163 | verbose)
164 | }
165 | return(out)
166 | }
167 |
168 |
169 | #' @noRd
170 | save_video <- function(video_dat,
171 | video_fn,
172 | overwrite,
173 | max_tries,
174 | cookies) {
175 |
176 | video_url <- video_dat$download_url
177 | f <- structure("", class = "try-error")
178 | if (!is.null(video_url)) {
179 |
180 | if (overwrite || !file.exists(video_fn)) {
181 | while (methods::is(f, "try-error") && max_tries > 0) {
182 | the$skipped <- FALSE
183 | h <- curl::handle_setopt(
184 | curl::new_handle(),
185 | cookie = cookies,
186 | referer = "https://www.tiktok.com/"
187 | )
188 | f <- try(curl::curl_download(
189 | video_url, video_fn, quiet = TRUE, handle = h
190 | ), silent = TRUE)
191 |
192 | if (methods::is(f, "try-error")) {
193 | cli::cli_alert_warning(
194 | "Download failed, retrying after 10 seconds. {max_tries} left."
195 | )
196 | # if this fails, the download link has likely expired, so better get a
197 | # new one
198 | video_url <- get_video(url = video_dat$video_url,
199 | video_id = video_dat$video_id,
200 | overwrite = TRUE,
201 | cache_dir = NULL,
202 | max_tries = 1,
203 | cookies = NULL,
204 | verbose = FALSE)$download_url
205 | Sys.sleep(10)
206 | }
207 |
208 | max_tries <- max_tries - 1
209 | }
210 | } else if (file.exists(video_fn)) {
211 | f <- video_fn
212 | the$skipped <- TRUE
213 | }
214 |
215 | } else {
216 | cli::cli_warn("No valid video URL found for download.")
217 | }
218 | return(f)
219 |
220 | }
221 |
222 |
223 | #' Get json string from a TikTok URL using the hidden API
224 | #'
225 | #' @description \ifelse{html}{\figure{api-unofficial}{options:
226 | #' alt='[Works on: Unofficial API]'}}{\strong{[Works on: Unofficial API]}}
227 | #'
228 | #' Use this function in case you want to check the full data for a given
229 | #' TikTok video or account. In tt_videos, only an opinionated selection of
230 | #' data is included in the final object. If you want some different
231 | #' information, you can use this function.
232 | #'
233 | #' @param url a URL to a TikTok video or account
234 | #'
235 | #' @inheritParams tt_videos_hidden
236 | #' @export
237 | tt_request_hidden <- function(url,
238 | max_tries = 5L,
239 | cookiefile = NULL) {
240 |
241 | if (!is.null(cookiefile)) cookiemonster::add_cookies(cookiefile)
242 | cookies <- cookiemonster::get_cookies("^(www.)*tiktok.com", as = "string")
243 |
244 | req <- httr2::request(url) |>
245 | httr2::req_headers(
246 | "Accept-Encoding" = "gzip, deflate, sdch",
247 | "Accept-Language" = "en-US,en;q=0.8",
248 | "Upgrade-Insecure-Requests" = "1",
249 | "User-Agent" = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
250 | "Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
251 | "Cache-Control" = "max-age=0",
252 | "Connection" = "keep-alive"
253 | ) |>
254 | httr2::req_options(cookie = cookies) |>
255 | httr2::req_retry(max_tries = max_tries) |>
256 | httr2::req_timeout(seconds = 60L) |>
257 | httr2::req_error(is_error = function(x) FALSE)
258 |
259 | res <- httr2::req_perform(req)
260 | status <- httr2::resp_status(res)
261 | if (status >= 400) {
262 | cli::cli_warn("Retrieving {url} resulted in html status {status}, the row will contain NAs.")
263 | out <- paste0('{"__DEFAULT_SCOPE__":{"webapp.video-detail":{"statusCode":"', status, '","statusMsg":"html_error"}}}')
264 | } else {
265 | out <- res |>
266 | httr2::resp_body_html() |>
267 | rvest::html_node("#SIGI_STATE,#__UNIVERSAL_DATA_FOR_REHYDRATION__") |>
268 | rvest::html_text()
269 | }
270 |
271 | if (isFALSE(nchar(out) > 10)) stop("no json found")
272 |
273 | attr(out, "url_full") <- res$url
274 | attr(out, "html_status") <- status
275 | attr(out, "set-cookies") <- httr2::resp_headers(res)[["set-cookie"]]
276 | return(out)
277 | }
278 |
279 |
280 | #' Search videos
281 | #'
282 | #' @description \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on:
283 | #' Unofficial API]'}}{\strong{[Works on: Unofficial API]}}
284 | #'
285 | #' This is the version of \link{tt_search} that explicitly uses the unofficial
286 | #' API. Use \link{tt_search_api} for the Research API version.
287 | #'
288 | #' @param query query as one string
289 | #' @param offset how many videos to skip. For example, if you already have the
290 | #' first X of a search.
291 | #' @param max_pages how many pages to get before stopping the search.
292 | #'
293 | #' @inheritParams tt_videos_hidden
294 | #'
295 | #' @details The function will wait between scraping two videos to make it less
296 | #' obvious that a scraper is accessing the site. The period is drawn randomly
297 | #' from the `sleep_pool` and multiplied by a random fraction.
298 | #'
299 | #' @return a data.frame
300 | #' @export
301 | #'
302 | #' @examples
303 | #' \dontrun{
304 | #' tt_search_hidden("#rstats", max_pages = 2)
305 | #' }
306 | tt_search_hidden <- function(query,
307 | offset = 0,
308 | max_pages = Inf,
309 | sleep_pool = 1:10,
310 | max_tries = 5L,
311 | cookiefile = NULL,
312 | verbose = TRUE) {
313 |
314 | if (!is.null(cookiefile)) cookiemonster::add_cookies(cookiefile)
315 | cookies <- cookiemonster::get_cookies("^(www.)*tiktok.com", as = "string")
316 |
317 | results <- list()
318 | page <- 1
319 | has_more <- TRUE
320 | done_msg <- ""
321 | search_id <- NULL
322 |
323 | while(page <= max_pages && has_more) {
324 | if (verbose) cli::cli_progress_step(
325 | "Getting page {page}",
326 | # for some reason already uses updated page value
327 | msg_done = "Got page {page - 1}. {done_msg}"
328 | )
329 |
330 | req <- httr2::request("https://www.tiktok.com/api/search/general/full/") |>
331 | httr2::req_url_query(
332 | aid = "1988",
333 | "cookie_enabled" = "true",
334 | "from_page" = "search",
335 | "keyword" = query,
336 | "offset" = offset,
337 | search_id = search_id
338 | ) |>
339 | httr2::req_options(cookie = cookies) |>
340 | httr2::req_headers(
341 | authority = "www.tiktok.com",
342 | accept = "*/*",
343 | `accept-language` = "en-GB,en;q=0.9,de-DE;q=0.8,de;q=0.7,en-US;q=0.6",
344 | `sec-ch-ua` = "\"Chromium\";v=115\", \"Not/A)Brand\";v=\"99",
345 | `sec-ch-ua-mobile` = "?0",
346 | `sec-ch-ua-platform` = "\"Linux\"",
347 | `sec-fetch-dest` = "empty",
348 | `sec-fetch-mode` = "cors",
349 | `sec-fetch-site` = "same-origin",
350 | `user-agent` = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
351 | ) |>
352 | httr2::req_retry(max_tries = max_tries) |>
353 | httr2::req_timeout(seconds = 60L) |>
354 | httr2::req_error(is_error = function(x) FALSE)
355 |
356 | resp <- httr2::req_perform(req)
357 | status <- httr2::resp_status(resp)
358 | if (status < 400L) results[[page]] <- parse_search(resp)
359 | offset <- attr(results[[page]], "cursor")
360 | search_id <- attr(results[[page]], "search_id")
361 | has_more <- attr(results[[page]], "has_more")
362 | done_msg <- glue::glue("Found {nrow(results[[page]])} videos.")
363 | page <- page + 1
364 | if (!has_more) {
365 | if (verbose) cli::cli_progress_done("Reached end of results")
366 | break
367 | }
368 | if (page <= max_pages) wait(sleep_pool)
369 | }
370 |
371 | video_id <- NULL
372 | dplyr::bind_rows(results) |>
373 | dplyr::filter(!is.na(video_id))
374 | }
375 |
376 |
377 | #' Get infos about a user from the hidden API
378 | #'
379 | #' @param username A URL to a video or username.
380 | #' @param parse Whether to parse the data into a data.frame (set to FALSE to get
381 | #' the full list).
382 | #'
383 | #' @return A data.frame of user info.
384 | #' @export
385 | #'
386 | #' @examples
387 | #' \dontrun{
388 | #' df <- tt_user_info_hidden("https://www.tiktok.com/@fpoe_at")
389 | #' }
390 | tt_user_info_hidden <- function(username,
391 | parse = TRUE) {
392 |
393 | rlang::check_installed("rvest", reason = "to use this function", version = "1.0.4")
394 |
395 | if (!grepl("^http[s]*://", username)) {
396 | username <- paste0("https://www.tiktok.com/@", username)
397 | }
398 |
399 | if (!grepl("^http[s]*://[www.]*tiktok\\.com/@.+", username)) {
400 | cli::cli_abort("The provided username does not resolve to a TikTok account URL: {username}")
401 | }
402 |
403 | sess <- rvest::read_html_live(username)
404 |
405 | json <- sess |>
406 | rvest::html_element("#__UNIVERSAL_DATA_FOR_REHYDRATION__") |>
407 | rvest::html_text()
408 |
409 | if (!is.na(json)) {
410 | user_data <- json |>
411 | jsonlite::fromJSON()
412 | } else {
413 | cli::cli_alert_warning("Could not retrieve data for user")
414 | user_data <- list()
415 | }
416 |
417 | if (parse) {
418 | return(parse_user(user_data))
419 | } else {
420 | return(user_data)
421 | }
422 |
423 | }
424 |
425 |
426 | #' @title Get followers and following of a user from the hidden API
427 | #'
428 | #' @description \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on:
429 | #' Unofficial API]'}}{\strong{[Works on: Unofficial API]}}
430 | #'
431 | #' Get up to 5,000 accounts who follow a user or accounts a user follows.
432 | #'
433 | #' @param secuid The secuid of a user. You can get it with
434 | #' \link{tt_user_info_hidden} by querying an account (see example).
435 | #' @inheritParams tt_search_hidden
436 | #'
437 | #' @return a data.frame of followers
438 | #' @export
439 | #'
440 | #' @examples
441 | #' \dontrun{
442 | #' df <- tt_user_info_hidden("https://www.tiktok.com/@fpoe_at")
443 | #' tt_get_follower_hidden(df$secUid)
444 | #' }
445 | tt_get_following_hidden <- function(secuid,
446 | sleep_pool = 1:10,
447 | max_tries = 5L,
448 | cookiefile = NULL,
449 | verbose = TRUE) {
450 |
451 | if (!is.null(cookiefile)) cookiemonster::add_cookies(cookiefile)
452 | cookies <- cookiemonster::get_cookies("^(www.)*tiktok.com", as = "string")
453 |
454 | new_data <- list(minCursor = 0,
455 | total = Inf,
456 | hasMore = TRUE)
457 | follower_data <- list()
458 |
459 | while (isTRUE(new_data$hasMore)) {
460 | if (verbose) cli::cli_progress_step(
461 | msg = ifelse(length(follower_data) == 0L, "Getting followers...", "Getting more followers..."),
462 | msg_done = "Got {length(follower_data)} followers."
463 | )
464 | resp <- httr2::request("https://www.tiktok.com/api/user/list/") |>
465 | httr2::req_url_query(
466 | count = "30",
467 | minCursor = new_data$minCursor,
468 | scene = "21",
469 | secUid = secuid,
470 | ) |>
471 | httr2::req_options(cookie = cookies) |>
472 | httr2::req_retry(max_tries = max_tries) |>
473 | httr2::req_perform()
474 |
475 | new_data <- try(httr2::resp_body_json(resp), silent = TRUE)
476 | if (methods::is(new_data, "try-error")) {
477 | new_data <- list(minCursor = 0,
478 | total = Inf,
479 | hasMore = TRUE)
480 | } else {
481 | follower_data <- c(follower_data, purrr::pluck(new_data, "userList", .default = list()))
482 | }
483 | if (isTRUE(new_data$hasMore)) wait(sleep_pool)
484 | }
485 | if (verbose) cli::cli_progress_done()
486 |
487 | if (verbose) cli::cli_progress_step(
488 | msg = "Parsing results"
489 | )
490 | return(parse_followers(follower_data))
491 |
492 | }
493 |
494 | #' @rdname tt_get_following_hidden
495 | #' @export
496 | tt_get_follower_hidden <- function(secuid,
497 | sleep_pool = 1:10,
498 | max_tries = 5L,
499 | cookiefile = NULL,
500 | verbose = TRUE) {
501 |
502 | if (!is.null(cookiefile)) cookiemonster::add_cookies(cookiefile)
503 | cookies <- cookiemonster::get_cookies("^(www.)*tiktok.com", as = "string")
504 |
505 | new_data <- list(minCursor = 0,
506 | total = Inf,
507 | hasMore = TRUE)
508 | follower_data <- list()
509 |
510 | while (isTRUE(new_data$hasMore)) {
511 | if (verbose) cli::cli_progress_step(
512 | msg = ifelse(length(follower_data) == 0L, "Getting followers...", "Getting more followers..."),
513 | msg_done = "Got {length(follower_data)} followers."
514 | )
515 | resp <- httr2::request("https://www.tiktok.com/api/user/list/") |>
516 | httr2::req_url_query(
517 | count = "30",
518 | minCursor = new_data$minCursor,
519 | scene = "67",
520 | secUid = secuid,
521 | ) |>
522 | httr2::req_options(cookie = cookies) |>
523 | httr2::req_retry(max_tries = max_tries) |>
524 | httr2::req_perform()
525 |
526 | new_data <- try(httr2::resp_body_json(resp), silent = TRUE)
527 | if (methods::is(new_data, "try-error")) {
528 | new_data <- list(minCursor = 0,
529 | total = Inf,
530 | hasMore = TRUE)
531 | } else {
532 | follower_data <- c(follower_data, purrr::pluck(new_data, "userList", .default = list()))
533 | }
534 | if (isTRUE(new_data$hasMore)) wait(sleep_pool)
535 | }
536 | if (verbose) cli::cli_progress_done()
537 |
538 | if (verbose) cli::cli_progress_step(
539 | msg = "Parsing results"
540 | )
541 | return(parse_followers(follower_data))
542 |
543 | }
544 |
545 |
546 | #' Get videos from a TikTok user's profile
547 | #'
548 | #' This function uses rvest to scrape a TikTok user's profile and retrieve any hidden videos.
549 | #' @description \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on:
550 | #' Unofficial API]'}}{\strong{[Works on: Unofficial API]}}
551 | #'
552 | #' Get all videos posted by a TikTok user.
553 | #'
554 | #' @param username The username of the TikTok user whose hidden videos you want to retrieve.
555 | #' @param solve_captchas open browser to solve appearing captchas manually.
556 | #' @param return_urls return video URLs instead of downloading the vidoes.
557 | #' @param timeout time (in seconds) to wait between scrolling and solving captchas.
558 | #' @param verbose should the function print status updates to the screen?
559 | #' @param ... Additional arguments to be passed to the \code{\link{tt_videos_hidden}} function.
560 | #'
561 | #' @return A list of video data or URLs, depending on the value of \code{return_urls}.
562 | #'
563 | #' @examples
564 | #' \dontrun{
565 | #' # Get hidden videos from the user "fpoe_at"
566 | #' tt_user_videos_hidden("fpoe_at")
567 | #' }
568 | #' @export
569 | tt_user_videos_hidden <- function(username,
570 | solve_captchas = FALSE,
571 | return_urls = FALSE,
572 | timeout = 5L,
573 | verbose = TRUE,
574 | ...) {
575 |
576 | rlang::check_installed("rvest", reason = "to use this function", version = "1.0.4")
577 |
578 | if (!grepl("^http[s]*://", username)) {
579 | username <- paste0("https://www.tiktok.com/@", username)
580 | }
581 |
582 | if (!grepl("^http[s]*://[www.]*tiktok\\.com/@.+", username)) {
583 | cli::cli_abort("The provided username does not resolve to a TikTok account URL: {username}")
584 | }
585 |
586 | if (verbose) cli::cli_progress_step("Opening {username}")
587 | # reset captcha warning
588 | the$captcha <- NULL
589 | sess <- rvest::read_html_live(username)
590 | last_y <- -1
591 | #scroll as far as possible
592 | if (verbose) cli::cli_progress_bar(format = "{cli::pb_spin} Scrolling down (y={last_y})")
593 | while (sess$get_scroll_position()$y > last_y) {
594 | solve_captcha(sess, solve = solve_captchas)
595 | last_y <- sess$get_scroll_position()$y
596 | sess$scroll_to(top = 10 ^ 5)
597 | if (verbose) cli::cli_progress_update()
598 | Sys.sleep(timeout * stats::runif(1, 1, 3))
599 | }
600 | if (verbose) cli::cli_progress_step("Collecting discovered URLs")
601 | urls <- sess |>
602 | rvest::html_elements("a") |>
603 | rvest::html_attr("href")
604 | urls <- grep(username, x = urls, value = TRUE) |>
605 | unique()
606 | if (verbose) {
607 | cli::cli_progress_done()
608 | cli::cli_alert_success("{length(urls)} URLs discovered")
609 | }
610 | if (return_urls) return(urls)
611 | tt_videos_hidden(urls, ...)
612 | }
613 |
614 |
615 | solve_captcha <- function(sess, solve) {
616 | captcha <- rvest::html_element(sess, "#captcha-verify-image,.captcha-verify-container")
617 | if (length(captcha) == 0L) {
618 | the$view <- NULL
619 | the$captcha <- NULL
620 | return(TRUE)
621 | }
622 | # display status once
623 | if (is.null(the$captcha)) {
624 | cli::cli_alert_info("Captcha discovered")
625 | the$captcha <- TRUE
626 | }
627 | if (solve) {
628 | if (is.null(the$view))
629 | the$view <- sess$view()
630 | solve_captcha(sess, solve = solve)
631 | }
632 | }
633 |
--------------------------------------------------------------------------------
/R/api_research.r:
--------------------------------------------------------------------------------
1 | #' Query TikTok videos using the research API
2 | #'
3 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
4 | #' Research API]'}}{\strong{[Works on: Research API]}}
5 | #'
6 | #' This is the version of \link{tt_search} that explicitly uses Research API.
7 | #' Use \link{tt_search_hidden} for the unofficial API version.
8 | #'
9 | #' @param query A query string or object (see \link{query}).
10 | #' @param start_date,end_date A start and end date to narrow the search
11 | #' (required; can be a maximum of 30 days apart).
12 | #' @param fields The fields to be returned (defaults to all)
13 | #' @param start_cursor The starting cursor, i.e., how many results to skip (for
14 | #' picking up an old search).
15 | #' @param search_id The search id (for picking up an old search).
16 | #' @param is_random Whether the query is random (defaults to FALSE).
17 | #' @param max_pages results are returned in batches/pages with 100 videos. How
18 | #' many should be requested before the function stops?
19 | #' @param parse Should the results be parsed? Otherwise, the original JSON
20 | #' object is returned as a nested list.
21 | #' @param cache should progress be saved in the current session? It can then be
22 | #' retrieved with \code{last_query()} if an error occurs. But the function
23 | #' will use extra memory.
24 | #' @param verbose should the function print status updates to the screen?
25 | #' @param token The authentication token (usually supplied automatically after
26 | #' running \link{auth_research} once).
27 | #' @return A data.frame of parsed TikTok videos (or a nested list).
28 | #' @export
29 | #' @examples
30 | #' \dontrun{
31 | #' # look for a keyword or hashtag by default
32 | #' tt_search_api("rstats")
33 | #'
34 | #' # or build a more elaborate query
35 | #' query() |>
36 | #' query_and(field_name = "region_code",
37 | #' operation = "IN",
38 | #' field_values = c("JP", "US")) |>
39 | #' query_or(field_name = "hashtag_name",
40 | #' operation = "EQ", # rstats is the only hashtag
41 | #' field_values = "rstats") |>
42 | #' query_or(field_name = "keyword",
43 | #' operation = "IN", # rstats is one of the keywords
44 | #' field_values = "rstats") |>
45 | #' query_not(operation = "EQ",
46 | #' field_name = "video_length",
47 | #' field_values = "SHORT") |>
48 | #' tt_search_api()
49 | #'
50 | #' # when a search fails after a while, get the results and pick it back up
51 | #' # (only work with same parameters)
52 | #' last_pull <- last_query()
53 | #' query() |>
54 | #' query_and(field_name = "region_code",
55 | #' operation = "IN",
56 | #' field_values = c("JP", "US")) |>
57 | #' query_or(field_name = "hashtag_name",
58 | #' operation = "EQ", # rstats is the only hashtag
59 | #' field_values = "rstats") |>
60 | #' query_or(field_name = "keyword",
61 | #' operation = "IN", # rstats is one of the keywords
62 | #' field_values = "rstats") |>
63 | #' query_not(operation = "EQ",
64 | #' field_name = "video_length",
65 | #' field_values = "SHORT") |>
66 | #' tt_search_api(start_cursor = length(last_pull) + 1,
67 | #' search_id = attr(last_pull, "search_id"))
68 | #' }
69 | tt_search_api <- function(query,
70 | start_date = Sys.Date() - 1,
71 | end_date = Sys.Date(),
72 | fields = "all",
73 | start_cursor = 0L,
74 | search_id = NULL,
75 | is_random = FALSE,
76 | max_pages = 1,
77 | parse = TRUE,
78 | cache = TRUE,
79 | verbose = TRUE,
80 | token = NULL) {
81 |
82 | if (is.character(query)) {
83 | query <- query(or = list(
84 | list(
85 | field_name = "hashtag_name",
86 | operation = "IN",
87 | field_values = as.list(sub("#", "", strsplit(query, " ", fixed = TRUE)))
88 | ),
89 | list(
90 | field_name = "keyword",
91 | operation = "IN",
92 | field_values = as.list(strsplit(query, " ", fixed = TRUE))
93 | )
94 | ))
95 | }
96 |
97 | if (fields == "all")
98 | fields <- "id,video_description,create_time,region_code,share_count,view_count,like_count,comment_count,music_id,hashtag_names,username,effect_ids,playlist_id,voice_to_text"
99 |
100 | if (is_datetime(start_date)) {
101 | start_date <- format(start_date, "%Y%m%d")
102 | } else if (!grepl("\\d{8}", start_date)) {
103 | cli::cli_abort("{.code start_date} needs to be a valid date or a string like, e.g., \"20210102\"")
104 | }
105 |
106 | if (is_datetime(end_date)) {
107 | end_date <- format(end_date, "%Y%m%d")
108 | } else if (!grepl("\\d{8}", start_date)) {
109 | cli::cli_abort("{.code start_date} needs to be a valid date or a string like, e.g., \"20210102\"")
110 | }
111 |
112 | if (verbose) {
113 | cli::cli_progress_step("Making initial request")
114 | }
115 |
116 | res <- tt_query_request(
117 | endpoint = "query/",
118 | query = query,
119 | start_date = start_date,
120 | end_date = end_date,
121 | fields = fields,
122 | cursor = start_cursor,
123 | search_id = search_id,
124 | is_random = is_random,
125 | token = token
126 | )
127 | videos <- purrr::pluck(res, "data", "videos")
128 | the$search_id <- spluck(res, "data", "search_id")
129 | the$cursor <- spluck(res, "data", "cursor")
130 | the$videos <- videos
131 |
132 | the$page <- 1
133 |
134 | if (verbose) cli::cli_progress_bar(
135 | format = "{cli::pb_spin} Got {page} page{?s} with {length(videos)} video{?s} {cli::col_silver('[', cli::pb_elapsed, ']')}",
136 | format_done = "{cli::col_green(cli::symbol$tick)} Got {page} page{?s} with {length(videos)} video{?s}",
137 | .envir = the
138 | )
139 |
140 | while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) {
141 | the$page <- the$page + 1
142 | the$cursor <- spluck(res, "data", "cursor")
143 | if (verbose) cli::cli_progress_update(force = TRUE, .envir = the)
144 | res <- tt_query_request(
145 | endpoint = "query/",
146 | query = query,
147 | start_date = start_date,
148 | end_date = end_date,
149 | fields = fields,
150 | cursor = the$cursor,
151 | search_id = the$search_id,
152 | is_random = is_random,
153 | token = token
154 | )
155 | videos <- c(videos, purrr::pluck(res, "data", "videos"))
156 | if (cache) {
157 | the$videos <- videos
158 | }
159 | if (verbose) cli::cli_progress_done()
160 | }
161 |
162 | if (parse) {
163 | if (verbose) {
164 | cli::cli_progress_done()
165 | cli::cli_progress_step("Parsing data")
166 | }
167 | videos <- parse_api_search(videos)
168 | if (verbose) cli::cli_progress_done()
169 | }
170 | class(videos) <- c("tt_results", class(videos))
171 | attr(videos, "search_id") <- the$search_id
172 | attr(videos, "cursor") <- the$cursor
173 | return(videos)
174 | }
175 |
176 |
177 | #' @export
178 | #' @rdname tt_search_api
179 | tt_query_videos <- tt_search_api
180 |
181 |
182 | # used to iterate over search requests
183 | tt_query_request <- function(endpoint,
184 | query = NULL,
185 | video_id = NULL,
186 | start_date = NULL,
187 | end_date = NULL,
188 | fields = NULL,
189 | cursor = NULL,
190 | search_id = NULL,
191 | is_random = NULL,
192 | token = NULL) {
193 |
194 | if (is.null(token)) token <- get_token()
195 |
196 | if (!is.null(query) && !is_query(query))
197 | cli::cli_abort("query needs to be a query object (see {.code ?query})")
198 |
199 | body <- list(query = unclass(query),
200 | video_id = video_id,
201 | start_date = start_date,
202 | end_date = end_date,
203 | max_count = 100L,
204 | cursor = cursor,
205 | search_id = search_id,
206 | is_random = is_random)
207 |
208 | httr2::request("https://open.tiktokapis.com/v2/research/video/") |>
209 | httr2::req_url_path_append(endpoint) |>
210 | httr2::req_method("POST") |>
211 | httr2::req_url_query(fields = fields) |>
212 | httr2::req_headers("Content-Type" = "application/json") |>
213 | httr2::req_auth_bearer_token(token$access_token) |>
214 | httr2::req_body_json(data = purrr::discard(body, is.null)) |>
215 | httr2::req_error(body = api_error_handler) |>
216 | httr2::req_retry(
217 | max_tries = 5L,
218 | # don't retry when daily quota is reached (429)
219 | is_transient = function(resp)
220 | httr2::resp_status(resp) %in% c(301:399, 401:428, 430:599),
221 | # increase backoff after each try
222 | backoff = function(t) t ^ 3
223 | ) |>
224 | httr2::req_perform() |>
225 | httr2::resp_body_json(bigint_as_char = TRUE)
226 |
227 | }
228 |
229 |
230 | #' Lookup which videos were liked by a user using the research API
231 | #'
232 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
233 | #' Research API]'}}{\strong{[Works on: Research API]}}
234 | #'
235 | #' @param username name(s) of the user(s) to be queried
236 | #' @param fields The fields to be returned (defaults to all)
237 | #' @inheritParams tt_search_api
238 | #'
239 | #' @return A data.frame of parsed TikTok videos the user has posted
240 | #' @export
241 | #'
242 | #' @examples
243 | #' \dontrun{
244 | #' tt_get_liked("jbgruber")
245 | #' # OR
246 | #' tt_user_liked_videos_api("https://www.tiktok.com/@tiktok")
247 | #' # OR
248 | #' tt_user_liked_videos_api("https://www.tiktok.com/@tiktok")
249 | #'
250 | #' # note: none of these work because I could not find any account that
251 | #' # has likes public!
252 | #' }
253 | tt_user_liked_videos_api <- function(username,
254 | fields = "all",
255 | max_pages = 1,
256 | cache = TRUE,
257 | verbose = TRUE,
258 | token = NULL) {
259 |
260 | purrr::map(username, function(u) {
261 | # if username is given as URL
262 | if (grepl("/", u)) {
263 | u <- extract_regex(
264 | u,
265 | "(?<=.com/@)(.+?)(?=\\?|$|/)"
266 | )
267 | }
268 | if (verbose) cli::cli_progress_step(msg = "Getting user {u}",
269 | msg_done = "Got user {u}")
270 | the$result <- TRUE
271 | if (is.null(token)) token <- get_token()
272 |
273 | if (fields == "all") {
274 | fields <- c(
275 | "id",
276 | "create_time",
277 | "username",
278 | "region_code",
279 | "video_description",
280 | "music_id",
281 | "like_count",
282 | "comment_count",
283 | "share_count",
284 | "view_count",
285 | "hashtag_names",
286 | " is_stem_verified",
287 | # " favourites_count",
288 | " video_duration"
289 | ) |>
290 | paste0(collapse = ",")
291 | }
292 |
293 | res <- list(data = list(has_more = TRUE, cursor = NULL))
294 | the$page <- 0L
295 | videos <- list()
296 | # iterate over pages
297 | while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) {
298 | the$page <- the$page + 1
299 | the$cursor <- purrr::pluck(res, "data", "cursor")
300 |
301 | res <- tt_user_request(endpoint = "liked_videos/",
302 | username = u,
303 | fields = fields,
304 | cursor = the$cursor,
305 | token = token)
306 |
307 | videos <- c(videos, purrr::pluck(res, "data", "user_liked_videos"))
308 | if (cache) {
309 | the$videos <- videos
310 | }
311 | }
312 |
313 | if (length(videos) > 0) {
314 | videos <- videos |>
315 | purrr::map(as_tibble_onerow) |>
316 | dplyr::bind_rows() |>
317 | # somehow, the order changes between, calls. So I fix it here
318 | dplyr::relocate("id",
319 | "username",
320 | "create_time",
321 | "video_description",
322 | "region_code",
323 | "video_duration",
324 | "view_count",
325 | "like_count",
326 | "comment_count",
327 | "share_count",
328 | "music_id")
329 |
330 | videos <- tibble::add_column(videos, liked_by_user = u)
331 | if (verbose) cli::cli_progress_done(
332 | result = ifelse(length(videos) > 1, "done", "failed")
333 | )
334 |
335 | return(videos)
336 | }
337 | }) |>
338 | dplyr::bind_rows()
339 | }
340 |
341 |
342 | #' Lookup which videos were liked by a user using the research API
343 | #'
344 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
345 | #' Research API]'}}{\strong{[Works on: Research API]}}
346 | #'
347 | #' @param username name(s) of the user(s) to be queried
348 | #' @param fields The fields to be returned (defaults to all)
349 | #' @inheritParams tt_search_api
350 | #'
351 | #' @return A data.frame of parsed TikTok videos the user has posted
352 | #' @export
353 | #'
354 | #' @examples
355 | #' \dontrun{
356 | #' tt_get_reposted("jbgruber")
357 | #' # OR
358 | #' tt_user_reposted_api("https://www.tiktok.com/@tiktok")
359 | #' # OR
360 | #' tt_user_reposted_api("https://www.tiktok.com/@tiktok")
361 | #'
362 | #' # note: none of these work because nobody has this enabled!
363 | #' }
364 | tt_user_reposted_api <- function(username,
365 | fields = "all",
366 | max_pages = 1,
367 | cache = TRUE,
368 | verbose = TRUE,
369 | token = NULL) {
370 |
371 | purrr::map(username, function(u) {
372 | # if username is given as URL
373 | if (grepl("/", u)) {
374 | u <- extract_regex(
375 | u,
376 | "(?<=.com/@)(.+?)(?=\\?|$|/)"
377 | )
378 | }
379 | if (verbose) cli::cli_progress_step(msg = "Getting user {u}",
380 | msg_done = "Got user {u}")
381 | the$result <- TRUE
382 | if (is.null(token)) token <- get_token()
383 |
384 | if (fields == "all") {
385 | fields <- c(
386 | "id",
387 | "create_time",
388 | "username",
389 | "region_code",
390 | "video_description",
391 | "music_id",
392 | "like_count",
393 | "comment_count",
394 | "share_count",
395 | "view_count",
396 | "hashtag_names",
397 | "is_stem_verified",
398 | "favourites_count",
399 | "video_duration"
400 | ) |>
401 | paste0(collapse = ",")
402 | }
403 |
404 | res <- list(data = list(has_more = TRUE, cursor = NULL))
405 | the$page <- 0L
406 | videos <- list()
407 | # iterate over pages
408 | while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) {
409 | the$page <- the$page + 1
410 | the$cursor <- purrr::pluck(res, "data", "cursor")
411 |
412 | res <- tt_user_request(endpoint = "reposted_videos/",
413 | username = u,
414 | fields = fields,
415 | cursor = the$cursor,
416 | token = token)
417 |
418 | videos <- c(videos, purrr::pluck(res, "data", "reposted_videos"))
419 | if (cache) {
420 | the$videos <- videos
421 | }
422 | }
423 |
424 | videos2 <- videos |>
425 | purrr::map(as_tibble_onerow) |>
426 | dplyr::bind_rows() |>
427 | # somehow, the order changes between, calls. So I fix it here
428 | dplyr::relocate("id",
429 | "username",
430 | "create_time",
431 | "video_description",
432 | "region_code",
433 | "video_duration",
434 | "view_count",
435 | "like_count",
436 | "comment_count",
437 | "share_count",
438 | "music_id")
439 |
440 | videos <- tibble::add_column(videos, reposted_by_user = u)
441 | if (verbose) cli::cli_progress_done(
442 | result = ifelse(length(videos) > 1, "done", "failed")
443 | )
444 |
445 | return(videos)
446 | }) |>
447 | dplyr::bind_rows()
448 | }
449 |
450 |
451 | #' Lookup which videos were pinned by a user using the research API
452 | #'
453 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
454 | #' Research API]'}}{\strong{[Works on: Research API]}}
455 | #'
456 | #' @param username vector of user names (handles) or URLs to users' pages.
457 | #' @inheritParams tt_search_api
458 | #'
459 | #' @return A data.frame of parsed TikTok videos the user has posted
460 | #' @export
461 | #'
462 | #' @examples
463 | #' \dontrun{
464 | #' tt_get_pinned("jbgruber")
465 | #' # OR
466 | #' tt_user_pinned_videos_api("https://www.tiktok.com/@tiktok")
467 | #' # OR
468 | #' tt_user_pinned_videos_api("https://www.tiktok.com/@tiktok")
469 | #' }
470 | tt_user_pinned_videos_api <- function(username,
471 | fields = "all",
472 | cache = TRUE,
473 | verbose = TRUE,
474 | token = NULL) {
475 |
476 | purrr::map(username, function(u) {
477 | # if username is given as URL
478 | if (grepl("/", u)) {
479 | u <- extract_regex(
480 | u,
481 | "(?<=.com/@)(.+?)(?=\\?|$|/)"
482 | )
483 | }
484 | if (verbose) cli::cli_progress_step(msg = "Getting user {u}",
485 | msg_done = "Got user {u}")
486 | the$result <- TRUE
487 | if (is.null(token)) token <- get_token()
488 |
489 | if (fields == "all") {
490 | fields <- c(
491 | "id",
492 | "create_time",
493 | "username",
494 | "region_code",
495 | "video_description",
496 | "music_id",
497 | "like_count",
498 | "comment_count",
499 | "share_count",
500 | "view_count",
501 | "hashtag_names",
502 | "is_stem_verified",
503 | # mentioned in docs, but does not work
504 | # "favourites_count",
505 | "video_duration"
506 | ) |>
507 | paste0(collapse = ",")
508 | }
509 |
510 | res <- tt_user_request(endpoint = "pinned_videos/",
511 | username = u,
512 | fields = fields,
513 | cursor = NULL,
514 | token = token)
515 |
516 | videos <- purrr::pluck(res, "data", "pinned_videos_list") |>
517 | purrr::map(as_tibble_onerow) |>
518 | dplyr::bind_rows() |>
519 | tibble::add_column(pinned_by_user = u)
520 |
521 | if (cache) {
522 | the$videos <- videos
523 | }
524 |
525 | if (verbose) cli::cli_progress_done(
526 | result = ifelse(length(videos) > 1, "done", "failed")
527 | )
528 |
529 | return(videos)
530 | }) |>
531 | dplyr::bind_rows()
532 | }
533 |
534 |
535 | #' @title Get followers and following of users from the research API
536 | #'
537 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
538 | #' Research API]'}}{\strong{[Works on: Research API]}}
539 | #'
540 | #' @param username name(s) of the user(s) to be queried
541 | #' @inheritParams tt_search_api
542 | #'
543 | #' @return A data.frame
544 | #' @export
545 | #'
546 | #' @examples
547 | #' \dontrun{
548 | #' tt_user_follower_api("jbgruber")
549 | #' # OR
550 | #' tt_user_following_api("https://www.tiktok.com/@tiktok")
551 | #' # OR
552 | #' tt_get_follower("https://www.tiktok.com/@tiktok")
553 | #' }
554 | tt_user_follower_api <- function(username,
555 | max_pages = 1,
556 | cache = TRUE,
557 | verbose = TRUE,
558 | token = NULL) {
559 |
560 | tt_user_follow(endpoint = "followers/",
561 | username = username,
562 | max_pages = max_pages,
563 | cache = cache,
564 | verbose = verbose,
565 | token = token)
566 | }
567 |
568 |
569 | #' @rdname tt_user_follower_api
570 | #' @export
571 | tt_user_following_api <- function(username,
572 | max_pages = 1,
573 | cache = TRUE,
574 | verbose = TRUE,
575 | token = NULL) {
576 |
577 | tt_user_follow(endpoint = "following/",
578 | username = username,
579 | max_pages = max_pages,
580 | cache = cache,
581 | verbose = verbose,
582 | token = token)
583 | }
584 |
585 |
586 | tt_user_follow <- function(endpoint,
587 | username,
588 | max_pages = 1,
589 | cache = TRUE,
590 | verbose = TRUE,
591 | token = NULL) {
592 |
593 | purrr::map(username, function(u) {
594 | # if username is given as URL
595 | if (grepl("/", u)) {
596 | u <- extract_regex(
597 | u,
598 | "(?<=.com/@)(.+?)(?=\\?|$|/)"
599 | )
600 | }
601 | if (verbose) cli::cli_progress_step(msg = "Getting user {u}",
602 | msg_done = "Got user {u}")
603 | the$result <- TRUE
604 | if (is.null(token)) token <- get_token()
605 |
606 | res <- list(data = list(has_more = TRUE, cursor = NULL))
607 | the$page <- 0L
608 | followers <- list()
609 | # iterate over pages
610 | while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) {
611 | the$page <- the$page + 1
612 | the$cursor <- purrr::pluck(res, "data", "cursor")
613 |
614 | res <- tt_user_request(endpoint = endpoint,
615 | username = u,
616 | cursor = the$cursor,
617 | token = token)
618 |
619 | followers <- c(followers, purrr::pluck(
620 | res,
621 | "data", ifelse(endpoint == "followers/",
622 | "user_followers",
623 | "user_following"))
624 | )
625 | if (cache) {
626 | the$videos <- followers
627 | }
628 | }
629 |
630 | followers <- dplyr::bind_rows(followers)
631 | followers <- tibble::add_column(followers, following_user = u)
632 | if (verbose) cli::cli_progress_done(
633 | result = ifelse(length(followers) > 1, "done", "failed")
634 | )
635 |
636 | return(followers)
637 | }) |>
638 | dplyr::bind_rows()
639 | }
640 |
641 | # used to iterate over search requests
642 | tt_user_request <- function(endpoint,
643 | username,
644 | fields,
645 | cursor,
646 | token) {
647 |
648 | req <- httr2::request("https://open.tiktokapis.com/v2/research/user/") |>
649 | httr2::req_url_path_append(endpoint) |>
650 | httr2::req_method("POST") |>
651 | httr2::req_headers("Content-Type" = "application/json") |>
652 | httr2::req_auth_bearer_token(token$access_token) |>
653 | httr2::req_body_json(data = list(username = username,
654 | max_count = 100L,
655 | cursor = cursor)) |>
656 | httr2::req_error(is_error = api_user_error_checker,
657 | body = api_error_handler) |>
658 | httr2::req_retry(max_tries = 5)
659 |
660 | if (!missing(fields)) {
661 | req <- req |>
662 | httr2::req_url_query(fields = fields)
663 | }
664 |
665 | req |>
666 | httr2::req_perform() |>
667 | httr2::resp_body_json(bigint_as_char = TRUE)
668 |
669 | }
670 |
671 |
672 | #' Lookup TikTok information about a user using the research API
673 | #'
674 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
675 | #' Research API]'}}{\strong{[Works on: Research API]}}
676 | #'
677 | #' @inheritParams tt_user_liked_videos_api
678 | #'
679 | #' @return A data.frame of parsed TikTok videos the user has posted
680 | #' @export
681 | #'
682 | #' @examples
683 | #' \dontrun{
684 | #' tt_user_info_api("jbgruber")
685 | #' # OR
686 | #' tt_user_info_api("https://www.tiktok.com/@tiktok")
687 | #' # OR
688 | #' tt_user_info("https://www.tiktok.com/@tiktok")
689 | #' }
690 | tt_user_info_api <- function(username,
691 | fields = "all",
692 | verbose = TRUE,
693 | token = NULL) {
694 |
695 | out <- purrr::map(username, function(u) {
696 | # if username is given as URL
697 | if (grepl("/", u)) {
698 | u <- extract_regex(
699 | u,
700 | "(?<=.com/@)(.+?)(?=\\?|$|/)"
701 | )
702 | }
703 | if (verbose) cli::cli_progress_step(msg = "Getting user {u}",
704 | msg_done = "Got user {u}")
705 | the$result <- TRUE
706 | if (is.null(token)) token <- get_token()
707 |
708 | if (fields == "all") {
709 | fields <- c(
710 | "display_name",
711 | "bio_description",
712 | "avatar_url",
713 | "is_verified",
714 | "follower_count",
715 | "following_count",
716 | "likes_count",
717 | "video_count"
718 | ) |>
719 | paste0(collapse = ",")
720 | }
721 |
722 | # /tests/testthat/example_resp_q_user.json
723 | out <- httr2::request("https://open.tiktokapis.com/v2/research/user/info/") |>
724 | httr2::req_method("POST") |>
725 | httr2::req_url_query(fields = fields) |>
726 | httr2::req_headers("Content-Type" = "application/json") |>
727 | httr2::req_auth_bearer_token(token$access_token) |>
728 | httr2::req_body_json(data = list(username = u)) |>
729 | httr2::req_error(is_error = api_user_error_checker,
730 | body = api_error_handler) |>
731 | httr2::req_retry(max_tries = 5,
732 | backoff = function(t) t ^ 3) |>
733 | httr2::req_perform() |>
734 | httr2::resp_body_json(bigint_as_char = TRUE) |>
735 | purrr::pluck("data") |>
736 | tibble::as_tibble()
737 | if (verbose & !the$result) cli::cli_progress_done(result = "failed")
738 | return(out)
739 | }) |>
740 | dplyr::bind_rows()
741 | if (verbose) cli::cli_progress_done()
742 | return(out)
743 | }
744 |
745 |
746 | #' Retrieve video comments
747 | #'
748 | #' @description
749 | #' \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: Research API]'}}{\strong{[Works on: Research API]}}
750 | #'
751 | #' @param video_id The id or URL of a video
752 | #' @inheritParams tt_search_api
753 | #'
754 | #' @return A data.frame of parsed comments
755 | #' @export
756 | #'
757 | #' @examples
758 | #' \dontrun{
759 | #' tt_comments("https://www.tiktok.com/@tiktok/video/7106594312292453675")
760 | #' # OR
761 | #' tt_comments("7106594312292453675")
762 | #' # OR
763 | #' tt_comments_api("7106594312292453675")
764 | #' }
765 | tt_comments_api <- function(video_id,
766 | fields = "all",
767 | start_cursor = 0L,
768 | max_pages = 1L,
769 | cache = TRUE,
770 | verbose = TRUE,
771 | token = NULL) {
772 |
773 | # if video_id is given as URL
774 | if (grepl("[^0-9]", video_id)) {
775 | video_id <- extract_regex(
776 | video_id,
777 | "(?<=/video/)(.+?)(?=\\?|$)|(?<=https://vm.tiktok.com/).+?(?=/|$)"
778 | )
779 | }
780 |
781 | if (fields == "all")
782 | fields <- "id,video_id,text,like_count,reply_count,parent_comment_id,create_time"
783 |
784 | if (verbose) cli::cli_progress_step("Making initial request")
785 |
786 | res <- tt_query_request(
787 | endpoint = "comment/list/",
788 | video_id = video_id,
789 | fields = fields,
790 | cursor = start_cursor,
791 | token = token
792 | )
793 | comments <- purrr::pluck(res, "data", "comments")
794 | if (cache) the$comments <- comments
795 | the$page <- 1
796 |
797 | if (verbose) cli::cli_progress_bar(
798 | format = "{cli::pb_spin} Got {page} page{?s} with {length(the$comments)} comment{?s} {cli::col_silver('[', cli::pb_elapsed, ']')}",
799 | format_done = "{cli::col_green(cli::symbol$tick)} Got {page} page{?s} with {length(the$comments)} comment{?s}",
800 | .envir = the
801 | )
802 |
803 | while (purrr::pluck(res, "data", "has_more", .default = FALSE) && the$page < max_pages) {
804 | the$page <- the$page + 1
805 | if (verbose) cli::cli_progress_update(.envir = the)
806 | res <- tt_query_request(
807 | endpoint = "comment/list/",
808 | video_id = video_id,
809 | fields = fields,
810 | cursor = purrr::pluck(res, "data", "cursor", .default = NULL),
811 | token = token
812 | )
813 | comments <- c(comments, purrr::pluck(res, "data", "comments"))
814 | if (cache) the$comments <- comments
815 | if (verbose) cli::cli_progress_done()
816 | }
817 |
818 | if (verbose) {
819 | cli::cli_progress_done()
820 | cli::cli_progress_step("Parsing data")
821 | }
822 | out <- parse_api_comments(comments)
823 |
824 | return(out)
825 | }
826 |
827 |
828 | #' Lookup TikTok playlist using the research API
829 | #'
830 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
831 | #' Research API]'}}{\strong{[Works on: Research API]}}
832 | #'
833 | #' @param playlist_id playlist ID or URL to a playlist.
834 | #' @inheritParams tt_user_info_api
835 | #'
836 | #' @return A data.frame
837 | #' @export
838 | tt_playlist_api <- function(playlist_id,
839 | verbose = TRUE,
840 | token = NULL) {
841 |
842 | # the docs mention a cursor, but it's not implemented as far as I can tell
843 | cursor <- NULL
844 |
845 | if (grepl("/", playlist_id)) {
846 | playlist_id <- extract_regex(
847 | playlist_id,
848 | "(?<=-)([0-9]+?)(?=\\?|$|/)"
849 | )
850 | }
851 |
852 | if (is.null(token)) token <- get_token()
853 |
854 | out <- httr2::request("https://open.tiktokapis.com/v2/research/playlist/info/") |>
855 | httr2::req_method("POST") |>
856 | httr2::req_headers("Content-Type" = "application/json") |>
857 | httr2::req_auth_bearer_token(token$access_token) |>
858 | httr2::req_body_json(data = list(playlist_id = playlist_id,
859 | cursor = cursor)) |>
860 | httr2::req_error(is_error = function(resp)
861 | # API always seems to send 500, even when successful
862 | !httr2::resp_status(resp) %in% c(100:399, 500),
863 | body = api_error_handler) |>
864 | httr2::req_retry(max_tries = 5) |>
865 | httr2::req_perform() |>
866 | httr2::resp_body_json(bigint_as_char = TRUE) |>
867 | purrr::pluck("data") |>
868 | tibble::as_tibble()
869 |
870 | return(out)
871 | }
872 |
873 |
874 | api_error_handler <- function(resp) {
875 |
876 | # failsafe save already collected videos to disk
877 | if (purrr::pluck_exists(the, "videos")) {
878 | q <- the$videos
879 | attr(q, "search_id") <- the$search_id
880 | saveRDS(q, tempfile(fileext = ".rds"))
881 | }
882 |
883 | if (httr2::resp_content_type(resp) == "application/json") {
884 | return(
885 | c(
886 | paste("status:", httr2::resp_body_json(resp)$error$code),
887 | paste("message:", httr2::resp_body_json(resp)$error$message),
888 | paste("log_id:", httr2::resp_body_json(resp)$error$log_id)
889 | )
890 | )
891 | }
892 |
893 | if (httr2::resp_content_type(resp) == "text/html") {
894 | res <- httr2::resp_body_html(resp)
895 | return(
896 | c(
897 | paste("status:", rvest::html_text2(rvest::html_element(res, "title"))),
898 | paste("message:", rvest::html_text2(rvest::html_element(res, "body")))
899 | )
900 | )
901 | }
902 | }
903 |
904 |
905 | api_user_error_checker <- function(resp) {
906 | resp <<- resp
907 | if (httr2::resp_status(resp) < 400L) return(FALSE)
908 | if (httr2::resp_status(resp) == 404L) return(TRUE)
909 | # it looks like the API sometimes returns 500 falsely, but in these cases, no
910 | # error message is present
911 | if (httr2::resp_status(resp) == 500L &&
912 | !purrr::pluck_exists(httr2::resp_body_json(resp), "error", "message")) {
913 | return(FALSE)
914 | }
915 | # if likes can't be accessed, which is true for many users, this should
916 | # not throw an error
917 | issue1 <- grepl("information.cannot.be.returned",
918 | httr2::resp_body_json(resp)$error$message)
919 | # if the user can't be found, this should not throw an error, which
920 | # would break the loop
921 | issue2 <- grepl("cannot.find.the.user",
922 | httr2::resp_body_json(resp)$error$message)
923 | # if account is private
924 | issue3 <- grepl("is.private",
925 | httr2::resp_body_json(resp)$error$message)
926 | issue4 <- grepl("API.cannot.return.this.user's.information",
927 | httr2::resp_body_json(resp)$error$message)
928 |
929 | if (any(issue1, issue2, issue3, issue4)) {
930 | cli::cli_alert_warning(httr2::resp_body_json(resp)$error$message)
931 | the$result <- FALSE
932 | return(FALSE)
933 | }
934 | return(TRUE)
935 | }
936 |
--------------------------------------------------------------------------------
/R/api_research_extended.r:
--------------------------------------------------------------------------------
1 | #' Get videos from a TikTok user's profile
2 | #'
3 | #' @description \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
4 | #' Research API]'}}{\strong{[Works on: Research API]}}
5 | #'
6 | #' Get all videos posted by a user or multiple user's. This is a convenience
7 | #' wrapper around \code{\link{tt_search_api}} that takes care of moving time
8 | #' windows (search is limited to 30 days). This is the version of
9 | #' \link{tt_user_videos} that explicitly uses Research API. Use
10 | #' \link{tt_user_videos_hidden} for the unofficial API version.
11 | #'
12 | #' @param username The username or usernames whose videos you want to retrieve.
13 | #' @param since,to limits from/to when to go through the account in 30 day windows.
14 | #' @param ... Additional arguments to be passed to the
15 | #' \code{\link{tt_search_api}} function.
16 | #'
17 | #' @inheritParams tt_search_api
18 | #'
19 | #' @examples
20 | #' \dontrun{
21 | #' # Get videos from the user "fpoe_at" since October 2024
22 | #' tt_user_videos_api("fpoe_at", since = "2024-10-01")
23 | #'
24 | #' # often makes sense to combine this with the account creation time from the
25 | #' # hidden URL
26 | #' fpoe_at_info <- tt_user_info_hidden(username = "fpoe_at")
27 | #' tt_user_videos_api("fpoe_at", since = fpoe_at_info$create_time)
28 | #'
29 | #' }
30 | #' @export
31 | tt_user_videos_api <- function(username,
32 | since = "2020-01-01",
33 | to = Sys.Date(),
34 | verbose = TRUE,
35 | ...) {
36 |
37 | dates_from <- seq.Date(from = as.Date(since),
38 | to = as.Date(to),
39 | by = "31 day")
40 | dates_to <- dates_from + 30
41 | # we want the last window to end today
42 | dates_to[length(dates_to)] <- as.Date(to)
43 |
44 | pb <- FALSE
45 | if (verbose) {
46 | pb <- list(
47 | format = "{cli::pb_spin} searching time window {cli::pb_current} of {cli::pb_total} | {cli::pb_percent} done | ETA: {cli::pb_eta}"
48 | )
49 | }
50 |
51 | purrr::map2(dates_from, dates_to, function(from, to) {
52 | out <- query() |>
53 | query_or(field_name = "username",
54 | operation = "IN",
55 | field_values = username) |>
56 | tt_search_api(start_date = from,
57 | end_date = to,
58 | verbose = FALSE,
59 | ...)
60 | if (nrow(out) > 0) return(out)
61 | }, .progress = pb) |>
62 | dplyr::bind_rows()
63 |
64 | }
65 |
--------------------------------------------------------------------------------
/R/auth_check.r:
--------------------------------------------------------------------------------
1 | #' Check whether you are authenticated
2 | #'
3 | #' @description \ifelse{html}{\figure{api-both.svg}{options:
4 | #' alt='[Works on: Both]'}}{\strong{[Works on: Both]}}
5 | #'
6 | #' Check if the necessary token or cookies are stored on your computer
7 | #' already. By default, the function checks for the authentication of the
8 | #' research and hidden API. To learn how you can authenticate, look at the
9 | #' vignette for the research (\code{vignette("research-api", package =
10 | #' "traktok")}) or hidden (\code{vignette("unofficial-api", package =
11 | #' "traktok")}) API.
12 | #'
13 | #' @param research,hidden turn check on/off for the research or hidden API.
14 | #' @param silent only return if check(s) were successful, no status on the
15 | #' screen
16 | #'
17 | #' @return logical vector (invisible)
18 | #' @export
19 | #'
20 | #' @examples
21 | #' auth_check()
22 | auth_check <- function(research = TRUE, hidden = TRUE, silent = FALSE) {
23 | auth <- vector()
24 | if (research) {
25 | if (!isFALSE(get_token(auth = FALSE))) {
26 | auth <- c(research = TRUE)
27 | if (!silent) cli::cli_alert_success("Research API authenticated")
28 | }
29 | }
30 | if (hidden) {
31 | cookies <- try(cookiemonster::get_cookies("^(www.)*tiktok.com"))
32 | if (is.data.frame(cookies) && "tt_chain_token" %in% cookies$name) {
33 | auth <- c(auth, hidden = TRUE)
34 | if (!silent) cli::cli_alert_success("Hidden API authenticated")
35 | }
36 | }
37 | invisible(auth)
38 | }
39 |
--------------------------------------------------------------------------------
/R/auth_hidden.r:
--------------------------------------------------------------------------------
1 | #' Authenticate for the hidden/unofficial API
2 | #'
3 | #' @description Guides you through authentication for the hidden/unofficial API
4 | #'
5 | #' @param cookiefile path to your cookiefile. Usually not needed after running
6 | #' \link{auth_hidden} once. See \code{vignette("unofficial-api", package =
7 | #' "traktok")} for more information on authentication.
8 | #' @param live opens Chromium browser to guide you through the auth process
9 | #' (experimental).
10 | #'
11 | #' @return nothing. Called to set up authentication
12 | #' @export
13 | #'
14 | #' @examples
15 | #' \dontrun{
16 | #' # to run through the steps of authentication
17 | #' auth_hidden()
18 | #' # or point to a cookie file directly
19 | #' auth_hidden("www.tiktok.com_cookies.txt")
20 | #' }
21 | auth_hidden <- function(cookiefile, live = interactive()) {
22 |
23 | if (!missing(cookiefile)) {
24 | cookiemonster::add_cookies(cookiefile)
25 | return(invisible(TRUE))
26 | }
27 | msg <- paste0(
28 | "Supply either a cookiefile (see {.url https://jbgruber.github.io/traktok/",
29 | "articles/unofficial-api.html#authentication})"
30 | )
31 | if (live && isTRUE(utils::askYesNo("Do you want to try live authentication using Chrome? (experimental)"))) {
32 |
33 | rlang::check_installed("rvest", reason = "to use this function", version = "1.0.4")
34 |
35 | sess <- rvest::read_html_live("https://www.tiktok.com/")
36 | # TODO: find way to click cookie banner
37 | # sess$click(".tiktok-cookie-banner>button")
38 | # sess$session$send_command('const button = document.querySelector("body > tiktok-cookie-banner").shadowRoot.querySelector("div > div.button-wrapper > button:nth-child(2)");')
39 | if (check_element_exists(sess, "#header-login-button")) {
40 | sess$click("#header-login-button")
41 | sess$view()
42 | }
43 | cli::cli_progress_bar(format = "{cli::pb_spin} Waiting for login",
44 | format_done = "Got cookies!")
45 | Sys.sleep(5) # give time to load login
46 | while (check_element_exists(sess, "#loginContainer")) {
47 | Sys.sleep(1 / 30)
48 | cli::cli_progress_update()
49 | }
50 |
51 | cli::cli_progress_done()
52 | cli::cli_alert_success("Got cookies!")
53 | cookiemonster::add_cookies(session = sess)
54 | return(invisible(TRUE))
55 | } else {
56 | msg <- paste0(msg, " or set {.code live = TRUE} to use interactive authentication")
57 | }
58 | cli::cli_abort(msg)
59 | }
60 |
61 |
62 | check_element_exists <- function(sess, css) {
63 | res <- try(rvest::html_element(sess, css), silent = TRUE)
64 | if (methods::is(res, "try-error")) return(TRUE)
65 | return(length(rvest::html_element(sess, css)) > 0L)
66 | }
67 |
68 |
--------------------------------------------------------------------------------
/R/auth_research.r:
--------------------------------------------------------------------------------
1 | #' Authenticate for the official research API
2 | #'
3 | #' @description
4 | #' Guides you through authentication for the Research API
5 | #'
6 | #'
7 | #' @param client_key Client key for authentication
8 | #' @param client_secret Client secret for authentication
9 | #'
10 | #' @returns An authentication token (invisible)
11 | #'
12 | #' @details You need to apply for access to the API and get the key
13 | #' and secret from TikTok. See
14 | #' \url{https://developers.tiktok.com/products/research-api/} for more
15 | #' information.
16 | #'
17 | #'
18 | #' @export
19 | #'
20 | #' @examples
21 | #' \dontrun{
22 | #' auth_research(client_key, client_secret)
23 | #' }
24 | auth_research <- function(client_key, client_secret) {
25 |
26 | if (missing(client_key)) {
27 | cli::cli_alert_info(c("Head to {.url https://developers.tiktok.com/research}",
28 | " to get your credentials"))
29 | client_key <- askpass::askpass("Please enter your client key")
30 | }
31 |
32 | if (missing(client_secret))
33 | client_secret <- askpass::askpass("Please enter your client secret")
34 |
35 | token <- req_token(client_key, client_secret)
36 |
37 | token$access_token <- httr2::obfuscated(token$access_token)
38 | token$access_token_expires <- Sys.time() + token$expires_in
39 |
40 | # attach for refresh
41 | token$client_key <- enc(client_key)
42 | token$client_secret <- enc(client_secret)
43 |
44 | f <- Sys.getenv("TIKTOK_TOKEN", unset = "token.rds")
45 | p <- tools::R_user_dir("traktok", "cache")
46 | dir.create(p, showWarnings = FALSE, recursive = TRUE)
47 | # store in cache
48 | rlang::env_poke(env = the, nm = "tiktok_token", value = token, create = TRUE)
49 |
50 | httr2::secret_write_rds(x = token, path = file.path(p, f),
51 | key = I(rlang::hash("traktok")))
52 |
53 | cli::cli_alert_success("Succesfully authenticated!")
54 | invisible(token)
55 | }
56 |
57 |
58 | req_token <- function(client_key, client_secret) {
59 |
60 | if (methods::is(client_key, "raw")) client_key <- dec(client_key)
61 | if (methods::is(client_secret, "raw")) client_secret <- dec(client_secret)
62 |
63 | # https://developers.tiktok.com/doc/client-access-token-management
64 | resp <- httr2::request("https://open.tiktokapis.com/v2/oauth/token/") |>
65 | httr2::req_method("POST") |>
66 | httr2::req_headers(
67 | "Content-Type" = "application/x-www-form-urlencoded",
68 | "Cache-Control" = "no-cache") |>
69 | httr2::req_body_form(
70 | "client_key" = client_key,
71 | "client_secret" = client_secret,
72 | "grant_type" = "client_credentials"
73 | ) |>
74 | httr2::req_error(is_error = function(x) FALSE) |>
75 | httr2::req_perform() |>
76 | httr2::resp_body_json()
77 |
78 | if (!is.null(resp$error))
79 | cli::cli_abort("Request failed with {.emph {resp$error}}: {.emph {resp$error_description}}")
80 |
81 | invisible(resp)
82 | }
83 |
84 |
85 | get_token <- function(auth = TRUE) {
86 |
87 | f <- file.path(tools::R_user_dir("traktok", "cache"),
88 | Sys.getenv("TIKTOK_TOKEN", unset = "token.rds"))
89 |
90 | if (rlang::env_has(the, nms = "tiktok_token")) {
91 | token <- rlang::env_get(the, nm = "tiktok_token", I(rlang::hash("traktok")))
92 | } else if (file.exists(f)) {
93 | token <- httr2::secret_read_rds(f, I(rlang::hash("traktok")))
94 | } else if (auth) {
95 | token <- auth_research()
96 | } else return(FALSE)
97 |
98 | # refresh token if expired
99 | if (token$access_token_expires <= Sys.time() + 5) {
100 | token <- auth_research(client_key = dec(token$client_key),
101 | client_secret = dec(token$client_secret))
102 | }
103 |
104 | return(token)
105 | }
106 |
107 |
108 | #' encrypt a single element
109 | #' @noRd
110 | enc <- function(x, key = NULL) {
111 | if (is.null(key)) key <- openssl::sha256(charToRaw(Sys.getenv("COOKIE_KEY", unset = "supergeheim")))
112 | openssl::aes_ctr_encrypt(charToRaw(x), key)
113 | }
114 |
115 | #' decrypt a single element
116 | #' @noRd
117 | dec <- function(x, key = NULL) {
118 | if (is.null(key)) key <- openssl::sha256(charToRaw(Sys.getenv("COOKIE_KEY", unset = "supergeheim")))
119 | rawToChar(openssl::aes_ctr_decrypt(x, key))
120 | }
121 |
122 |
--------------------------------------------------------------------------------
/R/last_.r:
--------------------------------------------------------------------------------
1 | #' Retrieve most recent query
2 | #'
3 | #' If \code{tt_search_api} or \code{tt_comments_api} fail after already getting
4 | #' several pages, you can use this function to get all videos that have been
5 | #' retrieved so far from memory. Does not work when the session has crashed. In
6 | #' that case, look in \code{tempdir()} for an RDS file as a last resort.
7 | #'
8 | #' @return a list of unparsed videos
9 | #' @export
10 | last_query <- function() {
11 | q <- the$videos
12 | out <- try(parse_api_search(q), silent = TRUE)
13 | if (methods::is(out, "try-error")) {
14 | attr(q, "search_id") <- the$search_id
15 | attr(out, "cursor") <- the$cursor
16 | return(q)
17 | }
18 | return(out)
19 | }
20 |
21 |
22 | #' @rdname last_query
23 | #' @export
24 | last_comments <- function() {
25 | the$comments
26 | }
27 |
28 |
--------------------------------------------------------------------------------
/R/parse_hidden.r:
--------------------------------------------------------------------------------
1 | #' @noRd
2 | parse_video <- function(json_string, video_id) {
3 |
4 | tt_data <- jsonlite::fromJSON(json_string)
5 |
6 | video_url <- attr(json_string, "url_full")
7 | html_status <- attr(json_string, "html_status")
8 |
9 | video_data <- purrr::pluck(tt_data, "ItemModule")
10 |
11 | if (!is.null(video_data)) {
12 | video_timestamp <- purrr::pluck(video_data, video_id, "createTime",
13 | .default = NA_character_) |>
14 | as.integer() |>
15 | as.POSIXct(tz = "UTC", origin = "1970-01-01")
16 |
17 | return(tibble::tibble(
18 | video_id = video_id,
19 | video_url = video_url,
20 | video_timestamp = video_timestamp,
21 | video_length = spluck(video_data, video_id, "video", "duration"),
22 | video_title = spluck(video_data, video_id, "desc"),
23 | video_locationcreated = spluck(video_data, video_id, "locationCreated"),
24 | video_diggcount = spluck(video_data, video_id, "stats", "diggCount"),
25 | video_sharecount = spluck(video_data, video_id, "stats", "shareCount"),
26 | video_commentcount = spluck(video_data, video_id, "stats", "commentCount"),
27 | video_playcount = spluck(video_data, video_id, "stats", "playCount"),
28 | author_username = spluck(video_data, video_id, "author"),
29 | author_nickname = spluck(tt_data, "UserModule", "users", 1, "nickname"),
30 | author_bio = spluck(tt_data, "UserModule", "users", 1, "signature"),
31 | download_url = spluck(video_data, video_id, "video", "downloadAddr"),
32 | html_status = html_status,
33 | music = list(spluck(video_data, video_id, "music")),
34 | challenges = list(spluck(video_data, video_id, "challenges")),
35 | is_classified = isTRUE(spluck(video_data, video_id, "isContentClassified")),
36 | video_status = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusMsg"),
37 | video_status_code = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusCode")
38 | ))
39 | }
40 |
41 | video_data <- purrr::pluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "itemInfo", "itemStruct")
42 |
43 | if (!is.null(video_data)) {
44 | video_timestamp <- purrr::pluck(video_data, "createTime",
45 | .default = NA_character_) |>
46 | as.integer() |>
47 | as.POSIXct(tz = "UTC", origin = "1970-01-01")
48 |
49 | out <- tibble::tibble(
50 | video_id = video_id,
51 | video_url = video_url,
52 | video_timestamp = video_timestamp,
53 | video_length = spluck(video_data, "video", "duration"),
54 | video_title = spluck(video_data, "desc"),
55 | video_locationcreated = spluck(video_data, "locationCreated"),
56 | video_diggcount = spluck(video_data, "stats", "diggCount"),
57 | video_sharecount = spluck(video_data, "stats", "shareCount"),
58 | video_commentcount = spluck(video_data, "stats", "commentCount"),
59 | video_playcount = spluck(video_data, "stats", "playCount"),
60 | author_id = spluck(video_data, "author", "id"),
61 | author_secuid = spluck(video_data, "author", "secUid"),
62 | author_username = spluck(video_data, "author", "uniqueId"),
63 | author_nickname = spluck(video_data, "author", "nickname"),
64 | author_bio = spluck(video_data, "author", "signature"),
65 | download_url = spluck(video_data, "video", "downloadAddr"),
66 | html_status = html_status,
67 | music = list(spluck(video_data, "music")),
68 | challenges = list(spluck(video_data, "challenges")),
69 | is_secret = isTRUE(spluck(video_data, "secret")),
70 | is_for_friend = isTRUE(spluck(video_data, "forFriend")),
71 | is_slides = FALSE,
72 | video_status = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusMsg"),
73 | video_status_code = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusCode"),
74 | content_classified = purrr::pluck(video_data, "isContentClassified", .default = FALSE)
75 | )
76 |
77 | if (identical(out$download_url, "")) {
78 | out$download_url <- spluck(video_data, "video", "playAddr")
79 | }
80 |
81 | if (identical(out$download_url, "")) {
82 | out$download_url <- purrr::pluck(video_data, "imagePost", "images", "imageURL", "urlList") |>
83 | purrr::map_chr(1L) |>
84 | toString()
85 | out$is_slides <- TRUE
86 | }
87 |
88 | } else {
89 | out <- tibble::tibble(
90 | video_id = video_id,
91 | video_url = video_url,
92 | video_timestamp = NA,
93 | video_length = NA,
94 | video_title = NA,
95 | video_locationcreated = NA,
96 | video_diggcount = NA,
97 | video_sharecount = NA,
98 | video_commentcount = NA,
99 | video_playcount = NA,
100 | author_id = NA,
101 | author_secuid = NA,
102 | author_username = NA,
103 | author_nickname = NA,
104 | author_bio = NA,
105 | download_url = NA,
106 | html_status = html_status,
107 | music = NA,
108 | challenges = NA,
109 | is_secret = NA,
110 | is_for_friend = NA,
111 | is_slides = NA,
112 | video_status = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusMsg"),
113 | video_status_code = spluck(tt_data, "__DEFAULT_SCOPE__", "webapp.video-detail", "statusCode")
114 | )
115 | cli::cli_warn("No video data found")
116 | }
117 | return(out)
118 | }
119 |
120 |
121 | #' @noRd
122 | parse_search <- function(res) {
123 | if (length(purrr::pluck(res, "body")) == 0L)
124 | cli::cli_abort("Unfortunalty, the search endpoint has changed and returns empty results. See {.url https://github.com/JBGruber/traktok/issues/14}.")
125 |
126 | tt_data <- res |>
127 | httr2::resp_body_json()
128 |
129 | tt_videos <- spluck(tt_data, "data")
130 |
131 | author_name <- vpluck(tt_videos, "item", "author", "uniqueId")
132 | video_id <- vpluck(tt_videos, "item", "id")
133 | video_url <- glue::glue("https://www.tiktok.com/@{author_name}/video/{video_id}")
134 | video_timestamp <- vpluck(tt_videos, "item", "createTime", val = "integer") |>
135 | as.integer() |>
136 | as.POSIXct(tz = "UTC", origin = "1970-01-01")
137 |
138 | out <- tibble::tibble(
139 | video_id = video_id,
140 | video_timestamp = video_timestamp,
141 | video_url = video_url,
142 | video_length = vpluck(tt_videos, "item", "video", "duration", val = "integer"),
143 | video_title = vpluck(tt_videos, "item", "desc"),
144 | video_diggcount = vpluck(tt_videos, "item", "stats", "diggCount", val = "integer"),
145 | video_sharecount = vpluck(tt_videos, "item", "stats", "shareCount", val = "integer"),
146 | video_commentcount = vpluck(tt_videos, "item", "stats", "commentCount", val = "integer"),
147 | video_playcount = vpluck(tt_videos, "item", "stats", "playCount", val = "integer"),
148 | video_is_ad = vpluck(tt_videos, "item", "isAd", val = "logical"),
149 | author_name = vpluck(tt_videos, "item", "author", "uniqueId"),
150 | author_nickname = vpluck(tt_videos, "item", "author", "nickname"),
151 | author_followercount = vpluck(tt_videos, "item", "authorStats", "followerCount", val = "integer"),
152 | author_followingcount = vpluck(tt_videos, "item", "authorStats", "followingCount", val = "integer"),
153 | author_heartcount = vpluck(tt_videos, "item", "authorStats", "heartCount", val = "integer"),
154 | author_videocount = vpluck(tt_videos, "item", "authorStats", "videoCount", val = "integer"),
155 | author_diggcount = vpluck(tt_videos, "item", "authorStats", "diggCount", val = "integer"),
156 | music = vpluck(tt_videos, "item", "music", val = "list"),
157 | challenges = vpluck(tt_videos, "item", "challenges", val = "list"),
158 | download_url = vpluck(tt_videos, "item", "video", "downloadAddr")
159 | )
160 |
161 | attr(out, "cursor") <- purrr::pluck(tt_data, "cursor", .default = NA)
162 | attr(out, "search_id") <- purrr::pluck(tt_data, "log_pb", "impr_id", .default = NA)
163 | attr(out, "has_more") <- as.logical(purrr::pluck(tt_data, "has_more", .default = FALSE))
164 |
165 | return(out)
166 | }
167 |
168 |
169 | #' @noRd
170 | #' @importFrom rlang .data
171 | parse_user <- function(user_data) {
172 |
173 | user_info <- spluck(user_data, "__DEFAULT_SCOPE__", "webapp.user-detail", "userInfo")
174 |
175 | user_info |>
176 | purrr::keep_at(c("user", "stats")) |>
177 | purrr::list_flatten(name_spec = "{inner}") |>
178 | purrr::list_flatten() |>
179 | as_tibble_onerow(.name_repair = clean_names) |>
180 | # for minimal backwards compatibility
181 | dplyr::rename(user_name = .data$unique_id,
182 | secUid = .data$sec_uid) |>
183 | dplyr::mutate(create_time = as_datetime(.data$create_time),
184 | nick_name_modify_time = as_datetime(.data$nick_name_modify_time))
185 |
186 | }
187 |
188 | #' @noRd
189 | parse_followers <- function(follower_data) {
190 |
191 | purrr::map(follower_data, function(f) {
192 | dplyr::bind_cols(f$user, f$stats)
193 | }) |>
194 | dplyr::bind_rows()
195 |
196 | }
197 |
--------------------------------------------------------------------------------
/R/parse_research.r:
--------------------------------------------------------------------------------
1 | #' @noRd
2 | parse_api_search <- function(x) {
3 |
4 | out <- tibble::tibble(
5 | video_id = vpluck(x, "video_id", val = "character"),
6 | author_name = vpluck(x, "username", val = "character"),
7 | view_count = vpluck(x, "view_count", val = "integer"),
8 | comment_count = vpluck(x, "comment_count", val = "integer"),
9 | share_count = vpluck(x, "share_count", val = "integer"),
10 | like_count = vpluck(x, "like_count", val = "integer"),
11 | region_code = vpluck(x, "region_code", val = "character"),
12 | create_time = as.POSIXct(vpluck(x, "create_time", val = "integer"),
13 | tz = "UTC", origin = "1970-01-01"),
14 | effect_ids = vpluck(x, "effect_ids", val = "list"),
15 | music_id = purrr::map_chr(x, function(i)
16 | purrr::pluck(i, "music_id", .default = NA_character_)),
17 | video_description = vpluck(x, "video_description", val = "character"),
18 | hashtag_names = vpluck(x, "hashtag_names", val = "list"),
19 | voice_to_text = vpluck(x, "voice_to_text", val = "character"),
20 | )
21 |
22 | out$video_id <- ifelse(is.na(out$video_id),
23 | vpluck(x, "id", val = "character"),
24 | out$video_id)
25 | if (nrow(out) == 1L && is.na(out$video_id)) {
26 | out <- out[-1, ]
27 | }
28 | return(out)
29 | }
30 |
31 |
32 | #' @noRd
33 | parse_api_comments <- function(x) {
34 |
35 | out <- x |>
36 | dplyr::bind_rows()
37 |
38 | class(out) <- c("tt_results", class(out))
39 |
40 | return(out)
41 | }
42 |
43 |
44 | #' @title Print search result
45 | #' @description Print a traktok search results
46 | #' @param x An object of class \code{tt_results}
47 | #' @param ... not used.
48 | #' @export
49 | print.tt_results <- function(x, ...) {
50 | cli::cat_rule(paste("search id:", cli::col_red(attr(x, "search_id"))))
51 | print(tibble::as_tibble(x))
52 | }
53 |
--------------------------------------------------------------------------------
/R/query_research.r:
--------------------------------------------------------------------------------
1 | #' Create a traktok query
2 | #'
3 | #' Create a traktok query from the given parameters.
4 | #'
5 | #' @param and,or,not A list of AND/OR/NOT conditions. Must contain one
6 | #' or multiple lists with \code{field_name}, \code{operation}, and
7 | #' \code{field_values} each (see example).
8 | #' @param q A traktok query created with \code{query}.
9 | #' @param field_name The field name to query against. One of:
10 | #' "create_date", "username", "region_code", "video_id",
11 | #' "hashtag_name", "keyword", "music_id", "effect_id",
12 | #' "video_length".
13 | #' @param operation One of: "EQ", "IN", "GT", "GTE", "LT", "LTE".
14 | #' @param field_values A vector of values to search for.
15 | #'
16 | #' @details TikTok's query consists of rather complicated lists
17 | #' dividing query elements into AND, OR and NOT:
18 | #'
19 | #' - **and**: The and conditions specify that all the conditions in the list must be met
20 | #' - **or**: The or conditions specify that at least one of the conditions in the list must be met
21 | #' - **not**: The not conditions specify that none of the conditions in the list must be met
22 | #'
23 | #' The query can be constructed by writing the list for each entry
24 | #' yourself, like in the first example. Alternatively, traktok
25 | #' provides convenience functions to build up a query using
26 | #' \code{query_and}, \code{query_or}, and \code{query_not}, which
27 | #' make building a query a little easier. You can learn more at
28 | #' .
29 | #'
30 | #' @return A traktok query.
31 | #'
32 | #' @export
33 | #'
34 | #' @examples
35 | #' \dontrun{
36 | #' # using query directly and supplying the list
37 | #' query(or = list(
38 | #' list(
39 | #' field_name = "hashtag_name",
40 | #' operation = "EQ",
41 | #' field_values = "rstats"
42 | #' ),
43 | #' list(
44 | #' field_name = "keyword",
45 | #' operation = "EQ",
46 | #' field_values = list("rstats", "API")
47 | #' )
48 | #' ))
49 | #' # starting an empty query and building it up using the query_* functions
50 | #' query() |>
51 | #' query_or(field_name = "hashtag_name",
52 | #' operation = "EQ",
53 | #' field_values = "rstats") |>
54 | #' query_or(field_name = "keyword",
55 | #' operation = "IN",
56 | #' field_values = c("rstats", "API"))
57 | #' }
58 | #'
59 | #' @md
60 | query <- function(and = NULL, or = NULL, not = NULL) {
61 | q <- list(and = and, or = or, not = not)
62 | class(q) <- "traktok_query"
63 | return(clean_query(q))
64 | }
65 |
66 |
67 | #' @rdname query
68 | #' @export
69 | query_and <- function(q, field_name, operation, field_values) {
70 | if (!is_query(q))
71 | cli::cli_abort("{.fn query_and} needs a query as input")
72 |
73 | # TODO: is this really the best way to append the list?
74 | q$and[[length(q$and) + 1]] <- list(field_name = field_name,
75 | operation = operation,
76 | field_values = as.list(field_values))
77 |
78 | return(clean_query(q))
79 | }
80 |
81 |
82 | #' @rdname query
83 | #' @export
84 | query_or <- function(q, field_name, operation, field_values) {
85 | if (!is_query(q))
86 | cli::cli_abort("{.fn query_or} needs a query as input")
87 |
88 | q$or[[length(q$or) + 1]] <- list(field_name = field_name,
89 | operation = operation,
90 | field_values = as.list(field_values))
91 |
92 | return(clean_query(q))
93 | }
94 |
95 |
96 | #' @rdname query
97 | #' @export
98 | query_not <- function(q, field_name, operation, field_values) {
99 | if (!is_query(q))
100 | cli::cli_abort("{.fn query_not} needs a query as input")
101 |
102 | q$not[[length(q$not) + 1]] <- list(field_name = field_name,
103 | operation = operation,
104 | field_values = as.list(field_values))
105 |
106 | return(clean_query(q))
107 | }
108 |
109 |
110 | is_query <- function(q) {
111 | methods::is(q, "traktok_query")
112 | }
113 |
114 |
115 | # make sure query only consists of valid entries
116 | clean_query <- function(q) {
117 |
118 | for (o in names(q)) {
119 | q[[o]][purrr::map_int(q[[o]], length) != 3] <- NULL
120 | q[!purrr::map_int(q, length) > 0] <- NULL
121 | }
122 |
123 | return(q)
124 | }
125 |
126 |
127 | #' @title Print a traktok query
128 | #' @description Print a traktok query as a tree
129 | #' @param x An object of class \code{traktok_query}
130 | #' @param ... Additional arguments passed to \code{lobstr::tree}
131 | #' @export
132 | #' @examples
133 | #' query() |>
134 | #' query_and(field_name = "hashtag_name",
135 | #' operation = "EQ",
136 | #' field_values = "rstats") |>
137 | #' print()
138 | print.traktok_query <- function(x, ...) {
139 | lobstr::tree(as.list(x), ...)
140 | }
141 |
--------------------------------------------------------------------------------
/R/shorthands.r:
--------------------------------------------------------------------------------
1 | #' Search videos
2 | #'
3 | #' @description \ifelse{html}{\figure{api-both.svg}{options:
4 | #' alt='[Works on: Both]'}}{\strong{[Works on: Both]}}
5 | #'
6 | #' Searches videos using either the Research API (if an authentication token
7 | #' is present, see \link{auth_research}) or otherwise the unofficial hidden
8 | #' API. See \link{tt_search_api} or \link{tt_search_hidden} respectively for
9 | #' information about these functions.
10 | #'
11 | #' @param ... arguments passed to \link{tt_search_api} or
12 | #' \link{tt_search_hidden}. To use the research API, include \code{token}
13 | #' (e.g., \code{token = NULL}).
14 | #'
15 | #' @return a data.frame
16 | #' @export
17 | tt_search <- function(...) {
18 |
19 | params <- list(...)
20 | token <- params$token
21 | params$token <- NULL
22 | if (is.null(token)) token <- get_token(auth = FALSE)
23 | if (isFALSE(token)) {
24 | tt_search_hidden(...)
25 | } else {
26 | tt_search_api(..., token)
27 | }
28 |
29 | }
30 |
31 |
32 | #' Get videos from a TikTok user's profile
33 | #'
34 | #' @description \ifelse{html}{\figure{api-both.svg}{options:
35 | #' alt='[Works on: Both]'}}{\strong{[Works on: Both]}}
36 | #'
37 | #' Get all videos posted by a user (or multiple user's for the Research API).
38 | #' Searches videos using either the Research API (if an authentication token
39 | #' is present, see \link{auth_research}) or otherwise the unofficial hidden
40 | #' API. See \link{tt_user_videos_api} or \link{tt_user_videos_hidden} respectively for
41 | #' information about these functions.
42 | #'
43 | #' @param username The username or usernames whose videos you want to retrieve.
44 | #' @param ... Additional arguments to be passed to the \code{\link{tt_user_videos_hidden}} or
45 | #' \code{\link{tt_user_videos_api}} function.
46 | #'
47 | #' @examples
48 | #' \dontrun{
49 | #' # Get hidden videos from the user "fpoe_at"
50 | #' tt_user_videos("fpoe_at")
51 | #' }
52 | #' @export
53 | tt_user_videos <- function(username, ...) {
54 | params <- list(...)
55 | token <- params$token
56 | params$token <- NULL
57 | if (is.null(token)) token <- get_token(auth = FALSE)
58 | if (isFALSE(token)) {
59 | tt_search_hidden(username, ...)
60 | } else {
61 | tt_user_videos_api(username, ...)
62 | }
63 | }
64 |
65 |
66 | #' @rdname tt_videos_hidden
67 | #' @export
68 | tt_videos <- function(...) {
69 | # mainly here in case the research API gains the ability to dowload videos
70 | tt_videos_hidden(...)
71 | }
72 |
73 |
74 | #' @rdname tt_user_info_api
75 | #' @export
76 | tt_user_info <- tt_user_info_api
77 |
78 |
79 | #' @rdname tt_playlist_api
80 | #' @export
81 | tt_playlist <- tt_playlist_api
82 |
83 |
84 | #' @rdname tt_user_liked_videos_api
85 | #' @export
86 | tt_get_liked <- tt_user_liked_videos_api
87 |
88 |
89 | #' @rdname tt_user_reposted_api
90 | #' @export
91 | tt_get_reposted <- tt_user_reposted_api
92 |
93 |
94 | #' @rdname tt_user_pinned_videos_api
95 | #' @export
96 | tt_get_pinned <- tt_user_pinned_videos_api
97 |
98 |
99 | #' @rdname tt_comments_api
100 | #' @export
101 | tt_comments <- tt_comments_api
102 |
103 |
104 | #' Get followers and following of users
105 | #'
106 | #' @description \ifelse{html}{\figure{api-both.svg}{options:
107 | #' alt='[Works on: Both]'}}{\strong{[Works on: Both]}}
108 | #'
109 | #' Get usernames of users who follows a user (tt_get_follower) or get who a
110 | #' user is following (tt_get_following).
111 | #'
112 | #' @param ... arguments passed to \link{tt_user_follower_api} or
113 | #' \link{tt_get_follower_hidden}. To use the research API, include \code{token}
114 | #' (e.g., \code{token = NULL}).
115 | #'
116 | #' @return a data.frame
117 | #' @export
118 | tt_get_follower <- function(...) {
119 |
120 | params <- list(...)
121 | token <- params$token
122 | params$token <- NULL
123 | if (is.null(token)) token <- get_token(auth = FALSE)
124 | if (isFALSE(token)) {
125 | tt_get_follower_hidden(...)
126 | } else {
127 | tt_user_follower_api(..., token)
128 | }
129 |
130 | }
131 |
132 |
133 | #' @rdname tt_get_follower
134 | #' @export
135 | tt_get_following <- function(...) {
136 |
137 | params <- list(...)
138 | token <- params$token
139 | params$token <- NULL
140 | if (is.null(token)) token <- get_token(auth = FALSE)
141 | if (isFALSE(token)) {
142 | tt_get_following_hidden(...)
143 | } else {
144 | tt_user_following_api(..., token)
145 | }
146 |
147 | }
148 |
149 |
150 | #' Get json file from a TikTok URL
151 | #'
152 | #' This function was replaced by \code{tt_request_hidden()}.
153 | #'
154 | #' @param ... \code{tt_request_hidden()}.
155 | #' @export
156 | tt_json <- function(...) {
157 |
158 | cli::cli_warn("This function has been replaced by {.fn tt_request_hidden}")
159 | tt_request_hidden(...)
160 | }
161 |
162 |
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 | the <- new.env()
2 |
3 | # base function for extracting regex
4 | #' @noRd
5 | extract_regex <- function(str, pattern) {
6 | regmatches(
7 | str,
8 | regexpr(pattern, str, perl = TRUE)
9 | )
10 | }
11 |
12 |
13 | # check if selected directory exists
14 | #' @noRd
15 | check_dir <- function(dir, name) {
16 | if (!is.null(dir)) {
17 | if (!dir.exists(dir)) {
18 | msg <- paste0("The selected `", name,
19 | "` directory does not exist.")
20 | if (utils::askYesNo(paste(msg, "Do you want to create it?"))) {
21 | dir.create(dir, showWarnings = FALSE)
22 | } else {
23 | stop(msg)
24 | }
25 | }
26 | }
27 | }
28 |
29 |
30 | #' @noRd
31 | wait <- function(sleep_pool, verbose = TRUE) {
32 | sleep <- stats::runif(1) * sample(sleep_pool, 1L)
33 | if (verbose) cli::cli_progress_message("\U23F2 waiting {round(sleep, 1)} seconds", current = FALSE)
34 | Sys.sleep(sleep)
35 | }
36 |
37 |
38 | # vectorised safe pluck
39 | #' @noRd
40 | vpluck <- function(x, ..., val = "character") {
41 | dots <- list(...)
42 | switch(
43 | val,
44 | "character" = {
45 | def <- NA_character_
46 | val <- character(1)
47 | },
48 | "integer" = {
49 | def <- NA_integer_
50 | val <- integer(1)
51 | },
52 | "double" = {
53 | def <- NA_integer_
54 | val <- numeric(1)
55 | },
56 | "logical" = {
57 | def <- NA
58 | val <- logical(1)
59 | },
60 | "list" = {
61 | val <- list()
62 | }
63 | )
64 | if (!is.list(val)) {
65 | vapply(x, purrr::pluck, !!!dots, .default = def, FUN.VALUE = val)
66 | } else {
67 | purrr::map(x, purrr::pluck, !!!dots)
68 | }
69 | }
70 |
71 | # safe pluck
72 | #' @noRd
73 | spluck <- function(.x, ...) {
74 | purrr::pluck(.x, ..., .default = NA)
75 | }
76 |
77 |
78 | # makes sure list can be turned into tibble
79 | as_tibble_onerow <- function(l, ...) {
80 | l <- purrr::map(l, function(c) {
81 | if (length(c) != 1) {
82 | return(list(c))
83 | }
84 | return(c)
85 | })
86 | tibble::as_tibble(l, ...)
87 | }
88 |
89 |
90 | is_datetime <- function(x) {
91 | methods::is(x, "POSIXct") +
92 | methods::is(x, "POSIXlt") +
93 | methods::is(x, "Date") > 0
94 | }
95 |
96 | as_datetime <- function(x) {
97 | # TikTok returns 0 for missing
98 | if (all(x > 0)) {
99 | as.POSIXct(x, origin = "1970-01-01")
100 | } else {
101 | NA
102 | }
103 | }
104 |
105 | id2url <- function(x) {
106 | if (!is.character(x)) {
107 | cli::cli_abort("You need to supply a character vector of video URLs or IDs")
108 | }
109 | x[!grepl("\\D", x)] <- paste0("https://www.tiktok.com/@/video/", x[!grepl("\\D", x)])
110 | return(x)
111 | }
112 |
113 |
114 | clean_names <- function(x) {
115 | gsub(pattern = "([A-Z])", replacement = "_\\L\\1", x = x, perl = TRUE)
116 | }
117 |
118 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 |
6 |
7 | ```{r, include = FALSE}
8 | knitr::opts_chunk$set(
9 | collapse = TRUE,
10 | comment = "#>",
11 | fig.path = "man/figures/README-",
12 | out.width = "100%"
13 | )
14 | ```
15 |
16 | # traktok
17 |
18 |
19 | [](https://lifecycle.r-lib.org/articles/stages.html#stable)
20 | [](https://github.com/JBGruber/traktok/actions/workflows/R-CMD-check.yaml)
21 | [](https://codecov.io/gh/JBGruber/traktok?branch=main)
22 | [](https://saythanks.io/to/JBGruber)
23 |
24 |
25 | ## Feature overview
26 |
27 | | Description | Shorthand | Research API | Hidden API |
28 | |:----------------------------|:-----------------|:--------------------------|:------------------------|
29 | | search videos | tt_search | tt_search_api | tt_search_hidden[^1] |
30 | | get video detail (+file) | tt_videos | \- | tt_videos_hidden |
31 | | get user videos | tt_user_videos | tt_user_videos_api | tt_user_videos_hidden |
32 | | get user info | tt_user_info | tt_user_info_api | tt_user_info_hidden |
33 | | get comments under a video | tt_comments | tt_comments_api | \- |
34 | | get who follows a user | tt_get_follower | tt_user_follower_api | tt_get_follower_hidden |
35 | | get who a user is following | tt_get_following | tt_user_following_api | tt_get_following_hidden |
36 | | get videos a user liked | tt_get_liked | tt_user_liked_videos_api | \- |
37 | | get pinned videos of users | tt_get_pinned | tt_user_pinned_videos_api | \- |
38 | | get videos in a playlist | tt_playlist | tt_playlist_api | \- |
39 | | get raw post data | \- | \- | tt_request_hidden |
40 | | authenticate a session | \- | auth_research | auth_hidden |
41 |
42 | [^1]: Currently not working, see [#14](https://github.com/JBGruber/traktok/issues/14).
43 |
44 |
45 | The goal of traktok is to provide easy access to TikTok data. This package one started as an R port of Deen Freelon's [Pyktok](https://github.com/dfreelon/pyktok) Python module (though it is a complete rewrite without Python dependencies).
46 | It now covers functions from the secret hidden API that TikTok is using to show/search/play videos on their Website and the official [Research API](https://developers.tiktok.com/products/research-api/).
47 | Since the Research API misses some important features (and since not everyone has access to it) it can often make sense to still use the hidden API that mocks requests from a browser.
48 | However, an important disclaimer for the hidden API applies:
49 |
50 | > This program may stop working suddenly if TikTok changes how it stores its data ([see Freelon, 2018](https://osf.io/preprints/socarxiv/56f4q/)).
51 |
52 | However, the last times, it was fixed rather quickly (e.g., #12).
53 |
54 | ## Installation
55 |
56 | You can install the development version of traktok from [GitHub](https://github.com/) with:
57 |
58 | ``` r
59 | # install.packages("remotes")
60 | remotes::install_github("JBGruber/traktok")
61 | ```
62 |
63 | ## In Research
64 |
65 | The research papers and projects below have used traktok to gather their data:
66 |
67 | 1. Hohner, J., Kakavand, A., & Rothut, S. (2024). Analyzing Radical Visuals at Scale: How Far-Right Groups Mobilize on TikTok. Journal of Digital Social Research, 6(1), 10–30. https://doi.org/10.33621/jdsr.v6i1.200
68 | 1. Bach, P., Gitomer, A., Devries, M., Walker, C., Deyoe, D., Atienza-Bathelemy, J., Foucault Welles, B., Freelon, D., & Zulli, D. (2023, October). Stitching Politics and Identity on TikTok. Panel presented at AoIR2023: The 24th Annual Conference of the Association of Internet Researchers. Philadelphia, PA, USA: AoIR. Retrieved from http://spir.aoir.org
69 | 1. Wirz, D. S., Zai, F., Vogler, D., Urman, A., & Eisenegger, M. (2023). Die Qualität von Schweizer Medien auf Instagram und TikTok. https://doi.org/10.5167/UZH-238605
70 | 1. Giglietto, F. (2024). Dashboard: TikTok Coordinated Sharing Network. https://fabiogiglietto.github.io/tiktok_csbn/tt_viz.html
71 | 1. Widholm, A., Ekman, M., & Larsson, A. O. (2024). A Right-Wing Wave on TikTok? Ideological Orientations, Platform Features, and User Engagement During the Early 2022 Election Campaign in Sweden. Social Media + Society, 10(3).
72 |
73 | If you have used traktok in your research paper or project, please extend this list through a Pull Request or create an issue
74 |
75 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # traktok
5 |
6 |
7 |
8 | [](https://lifecycle.r-lib.org/articles/stages.html#stable)
10 | [](https://github.com/JBGruber/traktok/actions/workflows/R-CMD-check.yaml)
11 | [](https://codecov.io/gh/JBGruber/traktok?branch=main)
13 | [](https://saythanks.io/to/JBGruber)
14 |
15 |
16 | ## Feature overview
17 |
18 | | Description | Shorthand | Research API | Hidden API |
19 | |:---|:---|:---|:---|
20 | | search videos | tt_search | tt_search_api | tt_search_hidden[^1] |
21 | | get video detail (+file) | tt_videos | \- | tt_videos_hidden |
22 | | get user videos | tt_user_videos | tt_user_videos_api | tt_user_videos_hidden |
23 | | get user info | tt_user_info | tt_user_info_api | tt_user_info_hidden |
24 | | get comments under a video | tt_comments | tt_comments_api | \- |
25 | | get who follows a user | tt_get_follower | tt_user_follower_api | tt_get_follower_hidden |
26 | | get who a user is following | tt_get_following | tt_user_following_api | tt_get_following_hidden |
27 | | get videos a user liked | tt_get_liked | tt_user_liked_videos_api | \- |
28 | | get pinned videos of users | tt_get_pinned | tt_user_pinned_videos_api | \- |
29 | | get videos in a playlist | tt_playlist | tt_playlist_api | \- |
30 | | get raw post data | \- | \- | tt_request_hidden |
31 | | authenticate a session | \- | auth_research | auth_hidden |
32 |
33 | The goal of traktok is to provide easy access to TikTok data. This
34 | package one started as an R port of Deen Freelon’s
35 | [Pyktok](https://github.com/dfreelon/pyktok) Python module (though it is
36 | a complete rewrite without Python dependencies). It now covers functions
37 | from the secret hidden API that TikTok is using to show/search/play
38 | videos on their Website and the official [Research
39 | API](https://developers.tiktok.com/products/research-api/). Since the
40 | Research API misses some important features (and since not everyone has
41 | access to it) it can often make sense to still use the hidden API that
42 | mocks requests from a browser. However, an important disclaimer for the
43 | hidden API applies:
44 |
45 | > This program may stop working suddenly if TikTok changes how it stores
46 | > its data ([see Freelon,
47 | > 2018](https://osf.io/preprints/socarxiv/56f4q/)).
48 |
49 | However, the last times, it was fixed rather quickly (e.g., \#12).
50 |
51 | ## Installation
52 |
53 | You can install the development version of traktok from
54 | [GitHub](https://github.com/) with:
55 |
56 | ``` r
57 | # install.packages("remotes")
58 | remotes::install_github("JBGruber/traktok")
59 | ```
60 |
61 | ## In Research
62 |
63 | The research papers and projects below have used traktok to gather their
64 | data:
65 |
66 | 1. Hohner, J., Kakavand, A., & Rothut, S. (2024). Analyzing Radical
67 | Visuals at Scale: How Far-Right Groups Mobilize on TikTok. Journal
68 | of Digital Social Research, 6(1), 10–30.
69 |
70 | 2. Bach, P., Gitomer, A., Devries, M., Walker, C., Deyoe, D.,
71 | Atienza-Bathelemy, J., Foucault Welles, B., Freelon, D., & Zulli, D.
72 | (2023, October). Stitching Politics and Identity on TikTok. Panel
73 | presented at AoIR2023: The 24th Annual Conference of the Association
74 | of Internet Researchers. Philadelphia, PA, USA: AoIR. Retrieved from
75 |
76 | 3. Wirz, D. S., Zai, F., Vogler, D., Urman, A., & Eisenegger, M.
77 | (2023). Die Qualität von Schweizer Medien auf Instagram und TikTok.
78 |
79 | 4. Giglietto, F. (2024). Dashboard: TikTok Coordinated Sharing Network.
80 |
81 | 5. Widholm, A., Ekman, M., & Larsson, A. O. (2024). A Right-Wing Wave
82 | on TikTok? Ideological Orientations, Platform Features, and User
83 | Engagement During the Early 2022 Election Campaign in Sweden. Social
84 | Media + Society, 10(3).
85 |
86 | If you have used traktok in your research paper or project, please
87 | extend this list through a Pull Request or create an issue
88 |
89 | [^1]: Currently not working, see
90 | [\#14](https://github.com/JBGruber/traktok/issues/14).
91 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://jbgruber.github.io/traktok/
2 | template:
3 | bootstrap: 5
4 |
5 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 |
3 | coverage:
4 | status:
5 | project:
6 | default:
7 | target: auto
8 | threshold: 1%
9 | informational: true
10 | patch:
11 | default:
12 | target: auto
13 | threshold: 1%
14 | informational: true
15 |
--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
1 | citHeader("To cite traktok in publications use:")
2 |
3 | citEntry(
4 | entry = "Manual",
5 | title = "traktok. Getting TikTok data through the official and unofficial APIs",
6 | author = "Johannes B. Gruber",
7 | year = 2023,
8 | url = "https://github.com/JBGruber/traktok",
9 | note = "R package version 0.0.4.9000",
10 | textVersion = paste(
11 | "Gruber, Johannes B. (2023). traktok. An R package to scrape data from TikTok. R package version 0.0.4.9000. https://github.com/JBGruber/traktok."
12 | )
13 | )
14 |
--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
1 | Analyzing
2 | AoIR
3 | Atienza
4 | Bathelemy
5 | CMD
6 | Codecov
7 | Deen
8 | Devries
9 | Deyoe
10 | Eisenegger
11 | Freelon
12 | Freelon's
13 | Freelon’s
14 | GTE
15 | Giglietto
16 | Gitomer
17 | Hohner
18 | Kakavand
19 | LTE
20 | Lifecycle
21 | Medien
22 | Pyktok
23 | Qualität
24 | Rothut
25 | TikTok
26 | TikTok's
27 | Urman
28 | Vogler
29 | Welles
30 | Wirz
31 | Zai
32 | Zulli
33 | api
34 | auf
35 | chromote
36 | com
37 | cookiefile
38 | etc
39 | json
40 | rstats
41 | rvest
42 | secuid
43 | th
44 | tidyverse
45 | tiktok
46 | tt
47 | ual
48 | und
49 | vidoes
50 |
--------------------------------------------------------------------------------
/man/auth_check.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/auth_check.r
3 | \name{auth_check}
4 | \alias{auth_check}
5 | \title{Check whether you are authenticated}
6 | \usage{
7 | auth_check(research = TRUE, hidden = TRUE, silent = FALSE)
8 | }
9 | \arguments{
10 | \item{research, hidden}{turn check on/off for the research or hidden API.}
11 |
12 | \item{silent}{only return if check(s) were successful, no status on the
13 | screen}
14 | }
15 | \value{
16 | logical vector (invisible)
17 | }
18 | \description{
19 | \ifelse{html}{\figure{api-both.svg}{options:
20 | alt='[Works on: Both]'}}{\strong{[Works on: Both]}}
21 |
22 | Check if the necessary token or cookies are stored on your computer
23 | already. By default, the function checks for the authentication of the
24 | research and hidden API. To learn how you can authenticate, look at the
25 | vignette for the research (\code{vignette("research-api", package =
26 | "traktok")}) or hidden (\code{vignette("unofficial-api", package =
27 | "traktok")}) API.
28 | }
29 | \examples{
30 | auth_check()
31 | }
32 |
--------------------------------------------------------------------------------
/man/auth_hidden.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/auth_hidden.r
3 | \name{auth_hidden}
4 | \alias{auth_hidden}
5 | \title{Authenticate for the hidden/unofficial API}
6 | \usage{
7 | auth_hidden(cookiefile, live = interactive())
8 | }
9 | \arguments{
10 | \item{cookiefile}{path to your cookiefile. Usually not needed after running
11 | \link{auth_hidden} once. See \code{vignette("unofficial-api", package =
12 | "traktok")} for more information on authentication.}
13 |
14 | \item{live}{opens Chromium browser to guide you through the auth process
15 | (experimental).}
16 | }
17 | \value{
18 | nothing. Called to set up authentication
19 | }
20 | \description{
21 | Guides you through authentication for the hidden/unofficial API
22 | }
23 | \examples{
24 | \dontrun{
25 | # to run through the steps of authentication
26 | auth_hidden()
27 | # or point to a cookie file directly
28 | auth_hidden("www.tiktok.com_cookies.txt")
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/man/auth_research.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/auth_research.r
3 | \name{auth_research}
4 | \alias{auth_research}
5 | \title{Authenticate for the official research API}
6 | \usage{
7 | auth_research(client_key, client_secret)
8 | }
9 | \arguments{
10 | \item{client_key}{Client key for authentication}
11 |
12 | \item{client_secret}{Client secret for authentication}
13 | }
14 | \value{
15 | An authentication token (invisible)
16 | }
17 | \description{
18 | Guides you through authentication for the Research API
19 | }
20 | \details{
21 | You need to apply for access to the API and get the key
22 | and secret from TikTok. See
23 | \url{https://developers.tiktok.com/products/research-api/} for more
24 | information.
25 | }
26 | \examples{
27 | \dontrun{
28 | auth_research(client_key, client_secret)
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/man/figures/api-both.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/man/figures/api-research.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/man/figures/api-unofficial.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JBGruber/traktok/d97f4ea5fb4f728aba1f64f9f71f5740bd626442/man/figures/logo.png
--------------------------------------------------------------------------------
/man/last_query.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/last_.r
3 | \name{last_query}
4 | \alias{last_query}
5 | \alias{last_comments}
6 | \title{Retrieve most recent query}
7 | \usage{
8 | last_query()
9 |
10 | last_comments()
11 | }
12 | \value{
13 | a list of unparsed videos
14 | }
15 | \description{
16 | If \code{tt_search_api} or \code{tt_comments_api} fail after already getting
17 | several pages, you can use this function to get all videos that have been
18 | retrieved so far from memory. Does not work when the session has crashed. In
19 | that case, look in \code{tempdir()} for an RDS file as a last resort.
20 | }
21 |
--------------------------------------------------------------------------------
/man/print.traktok_query.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/query_research.r
3 | \name{print.traktok_query}
4 | \alias{print.traktok_query}
5 | \title{Print a traktok query}
6 | \usage{
7 | \method{print}{traktok_query}(x, ...)
8 | }
9 | \arguments{
10 | \item{x}{An object of class \code{traktok_query}}
11 |
12 | \item{...}{Additional arguments passed to \code{lobstr::tree}}
13 | }
14 | \description{
15 | Print a traktok query as a tree
16 | }
17 | \examples{
18 | query() |>
19 | query_and(field_name = "hashtag_name",
20 | operation = "EQ",
21 | field_values = "rstats") |>
22 | print()
23 | }
24 |
--------------------------------------------------------------------------------
/man/print.tt_results.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/parse_research.r
3 | \name{print.tt_results}
4 | \alias{print.tt_results}
5 | \title{Print search result}
6 | \usage{
7 | \method{print}{tt_results}(x, ...)
8 | }
9 | \arguments{
10 | \item{x}{An object of class \code{tt_results}}
11 |
12 | \item{...}{not used.}
13 | }
14 | \description{
15 | Print a traktok search results
16 | }
17 |
--------------------------------------------------------------------------------
/man/query.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/query_research.r
3 | \name{query}
4 | \alias{query}
5 | \alias{query_and}
6 | \alias{query_or}
7 | \alias{query_not}
8 | \title{Create a traktok query}
9 | \usage{
10 | query(and = NULL, or = NULL, not = NULL)
11 |
12 | query_and(q, field_name, operation, field_values)
13 |
14 | query_or(q, field_name, operation, field_values)
15 |
16 | query_not(q, field_name, operation, field_values)
17 | }
18 | \arguments{
19 | \item{and, or, not}{A list of AND/OR/NOT conditions. Must contain one
20 | or multiple lists with \code{field_name}, \code{operation}, and
21 | \code{field_values} each (see example).}
22 |
23 | \item{q}{A traktok query created with \code{query}.}
24 |
25 | \item{field_name}{The field name to query against. One of:
26 | "create_date", "username", "region_code", "video_id",
27 | "hashtag_name", "keyword", "music_id", "effect_id",
28 | "video_length".}
29 |
30 | \item{operation}{One of: "EQ", "IN", "GT", "GTE", "LT", "LTE".}
31 |
32 | \item{field_values}{A vector of values to search for.}
33 | }
34 | \value{
35 | A traktok query.
36 | }
37 | \description{
38 | Create a traktok query from the given parameters.
39 | }
40 | \details{
41 | TikTok's query consists of rather complicated lists
42 | dividing query elements into AND, OR and NOT:
43 | \itemize{
44 | \item \strong{and}: The and conditions specify that all the conditions in the list must be met
45 | \item \strong{or}: The or conditions specify that at least one of the conditions in the list must be met
46 | \item \strong{not}: The not conditions specify that none of the conditions in the list must be met
47 | }
48 |
49 | The query can be constructed by writing the list for each entry
50 | yourself, like in the first example. Alternatively, traktok
51 | provides convenience functions to build up a query using
52 | \code{query_and}, \code{query_or}, and \code{query_not}, which
53 | make building a query a little easier. You can learn more at
54 | \url{https://developers.tiktok.com/doc/research-api-specs-query-videos#query}.
55 | }
56 | \examples{
57 | \dontrun{
58 | # using query directly and supplying the list
59 | query(or = list(
60 | list(
61 | field_name = "hashtag_name",
62 | operation = "EQ",
63 | field_values = "rstats"
64 | ),
65 | list(
66 | field_name = "keyword",
67 | operation = "EQ",
68 | field_values = list("rstats", "API")
69 | )
70 | ))
71 | # starting an empty query and building it up using the query_* functions
72 | query() |>
73 | query_or(field_name = "hashtag_name",
74 | operation = "EQ",
75 | field_values = "rstats") |>
76 | query_or(field_name = "keyword",
77 | operation = "IN",
78 | field_values = c("rstats", "API"))
79 | }
80 |
81 | }
82 |
--------------------------------------------------------------------------------
/man/tt_comments_api.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_research.r, R/shorthands.r
3 | \name{tt_comments_api}
4 | \alias{tt_comments_api}
5 | \alias{tt_comments}
6 | \title{Retrieve video comments}
7 | \usage{
8 | tt_comments_api(
9 | video_id,
10 | fields = "all",
11 | start_cursor = 0L,
12 | max_pages = 1L,
13 | cache = TRUE,
14 | verbose = TRUE,
15 | token = NULL
16 | )
17 |
18 | tt_comments(
19 | video_id,
20 | fields = "all",
21 | start_cursor = 0L,
22 | max_pages = 1L,
23 | cache = TRUE,
24 | verbose = TRUE,
25 | token = NULL
26 | )
27 | }
28 | \arguments{
29 | \item{video_id}{The id or URL of a video}
30 |
31 | \item{fields}{The fields to be returned (defaults to all)}
32 |
33 | \item{start_cursor}{The starting cursor, i.e., how many results to skip (for
34 | picking up an old search).}
35 |
36 | \item{max_pages}{results are returned in batches/pages with 100 videos. How
37 | many should be requested before the function stops?}
38 |
39 | \item{cache}{should progress be saved in the current session? It can then be
40 | retrieved with \code{last_query()} if an error occurs. But the function
41 | will use extra memory.}
42 |
43 | \item{verbose}{should the function print status updates to the screen?}
44 |
45 | \item{token}{The authentication token (usually supplied automatically after
46 | running \link{auth_research} once).}
47 | }
48 | \value{
49 | A data.frame of parsed comments
50 | }
51 | \description{
52 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on: Research API]'}}{\strong{[Works on: Research API]}}
53 | }
54 | \examples{
55 | \dontrun{
56 | tt_comments("https://www.tiktok.com/@tiktok/video/7106594312292453675")
57 | # OR
58 | tt_comments("7106594312292453675")
59 | # OR
60 | tt_comments_api("7106594312292453675")
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/man/tt_get_follower.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/shorthands.r
3 | \name{tt_get_follower}
4 | \alias{tt_get_follower}
5 | \alias{tt_get_following}
6 | \title{Get followers and following of users}
7 | \usage{
8 | tt_get_follower(...)
9 |
10 | tt_get_following(...)
11 | }
12 | \arguments{
13 | \item{...}{arguments passed to \link{tt_user_follower_api} or
14 | \link{tt_get_follower_hidden}. To use the research API, include \code{token}
15 | (e.g., \code{token = NULL}).}
16 | }
17 | \value{
18 | a data.frame
19 | }
20 | \description{
21 | \ifelse{html}{\figure{api-both.svg}{options:
22 | alt='[Works on: Both]'}}{\strong{[Works on: Both]}}
23 |
24 | Get usernames of users who follows a user (tt_get_follower) or get who a
25 | user is following (tt_get_following).
26 | }
27 |
--------------------------------------------------------------------------------
/man/tt_get_following_hidden.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_hidden.r
3 | \name{tt_get_following_hidden}
4 | \alias{tt_get_following_hidden}
5 | \alias{tt_get_follower_hidden}
6 | \title{Get followers and following of a user from the hidden API}
7 | \usage{
8 | tt_get_following_hidden(
9 | secuid,
10 | sleep_pool = 1:10,
11 | max_tries = 5L,
12 | cookiefile = NULL,
13 | verbose = TRUE
14 | )
15 |
16 | tt_get_follower_hidden(
17 | secuid,
18 | sleep_pool = 1:10,
19 | max_tries = 5L,
20 | cookiefile = NULL,
21 | verbose = TRUE
22 | )
23 | }
24 | \arguments{
25 | \item{secuid}{The secuid of a user. You can get it with
26 | \link{tt_user_info_hidden} by querying an account (see example).}
27 |
28 | \item{sleep_pool}{a vector of numbers from which a waiting period is randomly
29 | drawn.}
30 |
31 | \item{max_tries}{how often to retry if a request fails.}
32 |
33 | \item{cookiefile}{path to your cookiefile. Usually not needed after running
34 | \link{auth_hidden} once. See \code{vignette("unofficial-api", package =
35 | "traktok")} for more information on authentication.}
36 |
37 | \item{verbose}{should the function print status updates to the screen?}
38 | }
39 | \value{
40 | a data.frame of followers
41 | }
42 | \description{
43 | \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on:
44 | Unofficial API]'}}{\strong{[Works on: Unofficial API]}}
45 |
46 | Get up to 5,000 accounts who follow a user or accounts a user follows.
47 | }
48 | \examples{
49 | \dontrun{
50 | df <- tt_user_info_hidden("https://www.tiktok.com/@fpoe_at")
51 | tt_get_follower_hidden(df$secUid)
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/man/tt_json.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/shorthands.r
3 | \name{tt_json}
4 | \alias{tt_json}
5 | \title{Get json file from a TikTok URL}
6 | \usage{
7 | tt_json(...)
8 | }
9 | \arguments{
10 | \item{...}{\code{tt_request_hidden()}.}
11 | }
12 | \description{
13 | This function was replaced by \code{tt_request_hidden()}.
14 | }
15 |
--------------------------------------------------------------------------------
/man/tt_playlist_api.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_research.r, R/shorthands.r
3 | \name{tt_playlist_api}
4 | \alias{tt_playlist_api}
5 | \alias{tt_playlist}
6 | \title{Lookup TikTok playlist using the research API}
7 | \usage{
8 | tt_playlist_api(playlist_id, verbose = TRUE, token = NULL)
9 |
10 | tt_playlist(playlist_id, verbose = TRUE, token = NULL)
11 | }
12 | \arguments{
13 | \item{playlist_id}{playlist ID or URL to a playlist.}
14 |
15 | \item{verbose}{should the function print status updates to the screen?}
16 |
17 | \item{token}{The authentication token (usually supplied automatically after
18 | running \link{auth_research} once).}
19 | }
20 | \value{
21 | A data.frame
22 | }
23 | \description{
24 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
25 | Research API]'}}{\strong{[Works on: Research API]}}
26 | }
27 |
--------------------------------------------------------------------------------
/man/tt_request_hidden.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_hidden.r
3 | \name{tt_request_hidden}
4 | \alias{tt_request_hidden}
5 | \title{Get json string from a TikTok URL using the hidden API}
6 | \usage{
7 | tt_request_hidden(url, max_tries = 5L, cookiefile = NULL)
8 | }
9 | \arguments{
10 | \item{url}{a URL to a TikTok video or account}
11 |
12 | \item{max_tries}{how often to retry if a request fails.}
13 |
14 | \item{cookiefile}{path to your cookiefile. Usually not needed after running
15 | \link{auth_hidden} once. See \code{vignette("unofficial-api", package =
16 | "traktok")} for more information on authentication.}
17 | }
18 | \description{
19 | \ifelse{html}{\figure{api-unofficial}{options:
20 | alt='[Works on: Unofficial API]'}}{\strong{[Works on: Unofficial API]}}
21 |
22 | Use this function in case you want to check the full data for a given
23 | TikTok video or account. In tt_videos, only an opinionated selection of
24 | data is included in the final object. If you want some different
25 | information, you can use this function.
26 | }
27 |
--------------------------------------------------------------------------------
/man/tt_search.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/shorthands.r
3 | \name{tt_search}
4 | \alias{tt_search}
5 | \title{Search videos}
6 | \usage{
7 | tt_search(...)
8 | }
9 | \arguments{
10 | \item{...}{arguments passed to \link{tt_search_api} or
11 | \link{tt_search_hidden}. To use the research API, include \code{token}
12 | (e.g., \code{token = NULL}).}
13 | }
14 | \value{
15 | a data.frame
16 | }
17 | \description{
18 | \ifelse{html}{\figure{api-both.svg}{options:
19 | alt='[Works on: Both]'}}{\strong{[Works on: Both]}}
20 |
21 | Searches videos using either the Research API (if an authentication token
22 | is present, see \link{auth_research}) or otherwise the unofficial hidden
23 | API. See \link{tt_search_api} or \link{tt_search_hidden} respectively for
24 | information about these functions.
25 | }
26 |
--------------------------------------------------------------------------------
/man/tt_search_api.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_research.r
3 | \name{tt_search_api}
4 | \alias{tt_search_api}
5 | \alias{tt_query_videos}
6 | \title{Query TikTok videos using the research API}
7 | \usage{
8 | tt_search_api(
9 | query,
10 | start_date = Sys.Date() - 1,
11 | end_date = Sys.Date(),
12 | fields = "all",
13 | start_cursor = 0L,
14 | search_id = NULL,
15 | is_random = FALSE,
16 | max_pages = 1,
17 | parse = TRUE,
18 | cache = TRUE,
19 | verbose = TRUE,
20 | token = NULL
21 | )
22 |
23 | tt_query_videos(
24 | query,
25 | start_date = Sys.Date() - 1,
26 | end_date = Sys.Date(),
27 | fields = "all",
28 | start_cursor = 0L,
29 | search_id = NULL,
30 | is_random = FALSE,
31 | max_pages = 1,
32 | parse = TRUE,
33 | cache = TRUE,
34 | verbose = TRUE,
35 | token = NULL
36 | )
37 | }
38 | \arguments{
39 | \item{query}{A query string or object (see \link{query}).}
40 |
41 | \item{start_date, end_date}{A start and end date to narrow the search
42 | (required; can be a maximum of 30 days apart).}
43 |
44 | \item{fields}{The fields to be returned (defaults to all)}
45 |
46 | \item{start_cursor}{The starting cursor, i.e., how many results to skip (for
47 | picking up an old search).}
48 |
49 | \item{search_id}{The search id (for picking up an old search).}
50 |
51 | \item{is_random}{Whether the query is random (defaults to FALSE).}
52 |
53 | \item{max_pages}{results are returned in batches/pages with 100 videos. How
54 | many should be requested before the function stops?}
55 |
56 | \item{parse}{Should the results be parsed? Otherwise, the original JSON
57 | object is returned as a nested list.}
58 |
59 | \item{cache}{should progress be saved in the current session? It can then be
60 | retrieved with \code{last_query()} if an error occurs. But the function
61 | will use extra memory.}
62 |
63 | \item{verbose}{should the function print status updates to the screen?}
64 |
65 | \item{token}{The authentication token (usually supplied automatically after
66 | running \link{auth_research} once).}
67 | }
68 | \value{
69 | A data.frame of parsed TikTok videos (or a nested list).
70 | }
71 | \description{
72 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
73 | Research API]'}}{\strong{[Works on: Research API]}}
74 |
75 | This is the version of \link{tt_search} that explicitly uses Research API.
76 | Use \link{tt_search_hidden} for the unofficial API version.
77 | }
78 | \examples{
79 | \dontrun{
80 | # look for a keyword or hashtag by default
81 | tt_search_api("rstats")
82 |
83 | # or build a more elaborate query
84 | query() |>
85 | query_and(field_name = "region_code",
86 | operation = "IN",
87 | field_values = c("JP", "US")) |>
88 | query_or(field_name = "hashtag_name",
89 | operation = "EQ", # rstats is the only hashtag
90 | field_values = "rstats") |>
91 | query_or(field_name = "keyword",
92 | operation = "IN", # rstats is one of the keywords
93 | field_values = "rstats") |>
94 | query_not(operation = "EQ",
95 | field_name = "video_length",
96 | field_values = "SHORT") |>
97 | tt_search_api()
98 |
99 | # when a search fails after a while, get the results and pick it back up
100 | # (only work with same parameters)
101 | last_pull <- last_query()
102 | query() |>
103 | query_and(field_name = "region_code",
104 | operation = "IN",
105 | field_values = c("JP", "US")) |>
106 | query_or(field_name = "hashtag_name",
107 | operation = "EQ", # rstats is the only hashtag
108 | field_values = "rstats") |>
109 | query_or(field_name = "keyword",
110 | operation = "IN", # rstats is one of the keywords
111 | field_values = "rstats") |>
112 | query_not(operation = "EQ",
113 | field_name = "video_length",
114 | field_values = "SHORT") |>
115 | tt_search_api(start_cursor = length(last_pull) + 1,
116 | search_id = attr(last_pull, "search_id"))
117 | }
118 | }
119 |
--------------------------------------------------------------------------------
/man/tt_search_hidden.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_hidden.r
3 | \name{tt_search_hidden}
4 | \alias{tt_search_hidden}
5 | \title{Search videos}
6 | \usage{
7 | tt_search_hidden(
8 | query,
9 | offset = 0,
10 | max_pages = Inf,
11 | sleep_pool = 1:10,
12 | max_tries = 5L,
13 | cookiefile = NULL,
14 | verbose = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{query}{query as one string}
19 |
20 | \item{offset}{how many videos to skip. For example, if you already have the
21 | first X of a search.}
22 |
23 | \item{max_pages}{how many pages to get before stopping the search.}
24 |
25 | \item{sleep_pool}{a vector of numbers from which a waiting period is randomly
26 | drawn.}
27 |
28 | \item{max_tries}{how often to retry if a request fails.}
29 |
30 | \item{cookiefile}{path to your cookiefile. Usually not needed after running
31 | \link{auth_hidden} once. See \code{vignette("unofficial-api", package =
32 | "traktok")} for more information on authentication.}
33 |
34 | \item{verbose}{should the function print status updates to the screen?}
35 | }
36 | \value{
37 | a data.frame
38 | }
39 | \description{
40 | \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on:
41 | Unofficial API]'}}{\strong{[Works on: Unofficial API]}}
42 |
43 | This is the version of \link{tt_search} that explicitly uses the unofficial
44 | API. Use \link{tt_search_api} for the Research API version.
45 | }
46 | \details{
47 | The function will wait between scraping two videos to make it less
48 | obvious that a scraper is accessing the site. The period is drawn randomly
49 | from the `sleep_pool` and multiplied by a random fraction.
50 | }
51 | \examples{
52 | \dontrun{
53 | tt_search_hidden("#rstats", max_pages = 2)
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/man/tt_user_follower_api.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_research.r
3 | \name{tt_user_follower_api}
4 | \alias{tt_user_follower_api}
5 | \alias{tt_user_following_api}
6 | \title{Get followers and following of users from the research API}
7 | \usage{
8 | tt_user_follower_api(
9 | username,
10 | max_pages = 1,
11 | cache = TRUE,
12 | verbose = TRUE,
13 | token = NULL
14 | )
15 |
16 | tt_user_following_api(
17 | username,
18 | max_pages = 1,
19 | cache = TRUE,
20 | verbose = TRUE,
21 | token = NULL
22 | )
23 | }
24 | \arguments{
25 | \item{username}{name(s) of the user(s) to be queried}
26 |
27 | \item{max_pages}{results are returned in batches/pages with 100 videos. How
28 | many should be requested before the function stops?}
29 |
30 | \item{cache}{should progress be saved in the current session? It can then be
31 | retrieved with \code{last_query()} if an error occurs. But the function
32 | will use extra memory.}
33 |
34 | \item{verbose}{should the function print status updates to the screen?}
35 |
36 | \item{token}{The authentication token (usually supplied automatically after
37 | running \link{auth_research} once).}
38 | }
39 | \value{
40 | A data.frame
41 | }
42 | \description{
43 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
44 | Research API]'}}{\strong{[Works on: Research API]}}
45 | }
46 | \examples{
47 | \dontrun{
48 | tt_user_follower_api("jbgruber")
49 | # OR
50 | tt_user_following_api("https://www.tiktok.com/@tiktok")
51 | # OR
52 | tt_get_follower("https://www.tiktok.com/@tiktok")
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/man/tt_user_info_api.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_research.r, R/shorthands.r
3 | \name{tt_user_info_api}
4 | \alias{tt_user_info_api}
5 | \alias{tt_user_info}
6 | \title{Lookup TikTok information about a user using the research API}
7 | \usage{
8 | tt_user_info_api(username, fields = "all", verbose = TRUE, token = NULL)
9 |
10 | tt_user_info(username, fields = "all", verbose = TRUE, token = NULL)
11 | }
12 | \arguments{
13 | \item{username}{name(s) of the user(s) to be queried}
14 |
15 | \item{fields}{The fields to be returned (defaults to all)}
16 |
17 | \item{verbose}{should the function print status updates to the screen?}
18 |
19 | \item{token}{The authentication token (usually supplied automatically after
20 | running \link{auth_research} once).}
21 | }
22 | \value{
23 | A data.frame of parsed TikTok videos the user has posted
24 | }
25 | \description{
26 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
27 | Research API]'}}{\strong{[Works on: Research API]}}
28 | }
29 | \examples{
30 | \dontrun{
31 | tt_user_info_api("jbgruber")
32 | # OR
33 | tt_user_info_api("https://www.tiktok.com/@tiktok")
34 | # OR
35 | tt_user_info("https://www.tiktok.com/@tiktok")
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/man/tt_user_info_hidden.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_hidden.r
3 | \name{tt_user_info_hidden}
4 | \alias{tt_user_info_hidden}
5 | \title{Get infos about a user from the hidden API}
6 | \usage{
7 | tt_user_info_hidden(username, parse = TRUE)
8 | }
9 | \arguments{
10 | \item{username}{A URL to a video or username.}
11 |
12 | \item{parse}{Whether to parse the data into a data.frame (set to FALSE to get
13 | the full list).}
14 | }
15 | \value{
16 | A data.frame of user info.
17 | }
18 | \description{
19 | Get infos about a user from the hidden API
20 | }
21 | \examples{
22 | \dontrun{
23 | df <- tt_user_info_hidden("https://www.tiktok.com/@fpoe_at")
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/man/tt_user_liked_videos_api.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_research.r, R/shorthands.r
3 | \name{tt_user_liked_videos_api}
4 | \alias{tt_user_liked_videos_api}
5 | \alias{tt_get_liked}
6 | \title{Lookup which videos were liked by a user using the research API}
7 | \usage{
8 | tt_user_liked_videos_api(
9 | username,
10 | fields = "all",
11 | max_pages = 1,
12 | cache = TRUE,
13 | verbose = TRUE,
14 | token = NULL
15 | )
16 |
17 | tt_get_liked(
18 | username,
19 | fields = "all",
20 | max_pages = 1,
21 | cache = TRUE,
22 | verbose = TRUE,
23 | token = NULL
24 | )
25 | }
26 | \arguments{
27 | \item{username}{name(s) of the user(s) to be queried}
28 |
29 | \item{fields}{The fields to be returned (defaults to all)}
30 |
31 | \item{max_pages}{results are returned in batches/pages with 100 videos. How
32 | many should be requested before the function stops?}
33 |
34 | \item{cache}{should progress be saved in the current session? It can then be
35 | retrieved with \code{last_query()} if an error occurs. But the function
36 | will use extra memory.}
37 |
38 | \item{verbose}{should the function print status updates to the screen?}
39 |
40 | \item{token}{The authentication token (usually supplied automatically after
41 | running \link{auth_research} once).}
42 | }
43 | \value{
44 | A data.frame of parsed TikTok videos the user has posted
45 | }
46 | \description{
47 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
48 | Research API]'}}{\strong{[Works on: Research API]}}
49 | }
50 | \examples{
51 | \dontrun{
52 | tt_get_liked("jbgruber")
53 | # OR
54 | tt_user_liked_videos_api("https://www.tiktok.com/@tiktok")
55 | # OR
56 | tt_user_liked_videos_api("https://www.tiktok.com/@tiktok")
57 |
58 | # note: none of these work because I could not find any account that
59 | # has likes public!
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/man/tt_user_pinned_videos_api.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_research.r, R/shorthands.r
3 | \name{tt_user_pinned_videos_api}
4 | \alias{tt_user_pinned_videos_api}
5 | \alias{tt_get_pinned}
6 | \title{Lookup which videos were pinned by a user using the research API}
7 | \usage{
8 | tt_user_pinned_videos_api(
9 | username,
10 | fields = "all",
11 | cache = TRUE,
12 | verbose = TRUE,
13 | token = NULL
14 | )
15 |
16 | tt_get_pinned(
17 | username,
18 | fields = "all",
19 | cache = TRUE,
20 | verbose = TRUE,
21 | token = NULL
22 | )
23 | }
24 | \arguments{
25 | \item{username}{vector of user names (handles) or URLs to users' pages.}
26 |
27 | \item{fields}{The fields to be returned (defaults to all)}
28 |
29 | \item{cache}{should progress be saved in the current session? It can then be
30 | retrieved with \code{last_query()} if an error occurs. But the function
31 | will use extra memory.}
32 |
33 | \item{verbose}{should the function print status updates to the screen?}
34 |
35 | \item{token}{The authentication token (usually supplied automatically after
36 | running \link{auth_research} once).}
37 | }
38 | \value{
39 | A data.frame of parsed TikTok videos the user has posted
40 | }
41 | \description{
42 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
43 | Research API]'}}{\strong{[Works on: Research API]}}
44 | }
45 | \examples{
46 | \dontrun{
47 | tt_get_pinned("jbgruber")
48 | # OR
49 | tt_user_pinned_videos_api("https://www.tiktok.com/@tiktok")
50 | # OR
51 | tt_user_pinned_videos_api("https://www.tiktok.com/@tiktok")
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/man/tt_user_reposted_api.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_research.r, R/shorthands.r
3 | \name{tt_user_reposted_api}
4 | \alias{tt_user_reposted_api}
5 | \alias{tt_get_reposted}
6 | \title{Lookup which videos were liked by a user using the research API}
7 | \usage{
8 | tt_user_reposted_api(
9 | username,
10 | fields = "all",
11 | max_pages = 1,
12 | cache = TRUE,
13 | verbose = TRUE,
14 | token = NULL
15 | )
16 |
17 | tt_get_reposted(
18 | username,
19 | fields = "all",
20 | max_pages = 1,
21 | cache = TRUE,
22 | verbose = TRUE,
23 | token = NULL
24 | )
25 | }
26 | \arguments{
27 | \item{username}{name(s) of the user(s) to be queried}
28 |
29 | \item{fields}{The fields to be returned (defaults to all)}
30 |
31 | \item{max_pages}{results are returned in batches/pages with 100 videos. How
32 | many should be requested before the function stops?}
33 |
34 | \item{cache}{should progress be saved in the current session? It can then be
35 | retrieved with \code{last_query()} if an error occurs. But the function
36 | will use extra memory.}
37 |
38 | \item{verbose}{should the function print status updates to the screen?}
39 |
40 | \item{token}{The authentication token (usually supplied automatically after
41 | running \link{auth_research} once).}
42 | }
43 | \value{
44 | A data.frame of parsed TikTok videos the user has posted
45 | }
46 | \description{
47 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
48 | Research API]'}}{\strong{[Works on: Research API]}}
49 | }
50 | \examples{
51 | \dontrun{
52 | tt_get_reposted("jbgruber")
53 | # OR
54 | tt_user_reposted_api("https://www.tiktok.com/@tiktok")
55 | # OR
56 | tt_user_reposted_api("https://www.tiktok.com/@tiktok")
57 |
58 | # note: none of these work because nobody has this enabled!
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/man/tt_user_videos.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/shorthands.r
3 | \name{tt_user_videos}
4 | \alias{tt_user_videos}
5 | \title{Get videos from a TikTok user's profile}
6 | \usage{
7 | tt_user_videos(username, ...)
8 | }
9 | \arguments{
10 | \item{username}{The username or usernames whose videos you want to retrieve.}
11 |
12 | \item{...}{Additional arguments to be passed to the \code{\link{tt_user_videos_hidden}} or
13 | \code{\link{tt_user_videos_api}} function.}
14 | }
15 | \description{
16 | \ifelse{html}{\figure{api-both.svg}{options:
17 | alt='[Works on: Both]'}}{\strong{[Works on: Both]}}
18 |
19 | Get all videos posted by a user (or multiple user's for the Research API).
20 | Searches videos using either the Research API (if an authentication token
21 | is present, see \link{auth_research}) or otherwise the unofficial hidden
22 | API. See \link{tt_user_videos_api} or \link{tt_user_videos_hidden} respectively for
23 | information about these functions.
24 | }
25 | \examples{
26 | \dontrun{
27 | # Get hidden videos from the user "fpoe_at"
28 | tt_user_videos("fpoe_at")
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/man/tt_user_videos_api.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_research_extended.r
3 | \name{tt_user_videos_api}
4 | \alias{tt_user_videos_api}
5 | \title{Get videos from a TikTok user's profile}
6 | \usage{
7 | tt_user_videos_api(
8 | username,
9 | since = "2020-01-01",
10 | to = Sys.Date(),
11 | verbose = TRUE,
12 | ...
13 | )
14 | }
15 | \arguments{
16 | \item{username}{The username or usernames whose videos you want to retrieve.}
17 |
18 | \item{since, to}{limits from/to when to go through the account in 30 day windows.}
19 |
20 | \item{verbose}{should the function print status updates to the screen?}
21 |
22 | \item{...}{Additional arguments to be passed to the
23 | \code{\link{tt_search_api}} function.}
24 | }
25 | \description{
26 | \ifelse{html}{\figure{api-research.svg}{options: alt='[Works on:
27 | Research API]'}}{\strong{[Works on: Research API]}}
28 |
29 | Get all videos posted by a user or multiple user's. This is a convenience
30 | wrapper around \code{\link{tt_search_api}} that takes care of moving time
31 | windows (search is limited to 30 days). This is the version of
32 | \link{tt_user_videos} that explicitly uses Research API. Use
33 | \link{tt_user_videos_hidden} for the unofficial API version.
34 | }
35 | \examples{
36 | \dontrun{
37 | # Get videos from the user "fpoe_at" since October 2024
38 | tt_user_videos_api("fpoe_at", since = "2024-10-01")
39 |
40 | # often makes sense to combine this with the account creation time from the
41 | # hidden URL
42 | fpoe_at_info <- tt_user_info_hidden(username = "fpoe_at")
43 | tt_user_videos_api("fpoe_at", since = fpoe_at_info$create_time)
44 |
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/man/tt_user_videos_hidden.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_hidden.r
3 | \name{tt_user_videos_hidden}
4 | \alias{tt_user_videos_hidden}
5 | \title{Get videos from a TikTok user's profile}
6 | \usage{
7 | tt_user_videos_hidden(
8 | username,
9 | solve_captchas = FALSE,
10 | return_urls = FALSE,
11 | timeout = 5L,
12 | verbose = TRUE,
13 | ...
14 | )
15 | }
16 | \arguments{
17 | \item{username}{The username of the TikTok user whose hidden videos you want to retrieve.}
18 |
19 | \item{solve_captchas}{open browser to solve appearing captchas manually.}
20 |
21 | \item{return_urls}{return video URLs instead of downloading the vidoes.}
22 |
23 | \item{timeout}{time (in seconds) to wait between scrolling and solving captchas.}
24 |
25 | \item{verbose}{should the function print status updates to the screen?}
26 |
27 | \item{...}{Additional arguments to be passed to the \code{\link{tt_videos_hidden}} function.}
28 | }
29 | \value{
30 | A list of video data or URLs, depending on the value of \code{return_urls}.
31 | }
32 | \description{
33 | \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on:
34 | Unofficial API]'}}{\strong{[Works on: Unofficial API]}}
35 |
36 | Get all videos posted by a TikTok user.
37 | }
38 | \details{
39 | This function uses rvest to scrape a TikTok user's profile and retrieve any hidden videos.
40 | }
41 | \examples{
42 | \dontrun{
43 | # Get hidden videos from the user "fpoe_at"
44 | tt_user_videos_hidden("fpoe_at")
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/man/tt_videos_hidden.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/api_hidden.r, R/shorthands.r
3 | \name{tt_videos_hidden}
4 | \alias{tt_videos_hidden}
5 | \alias{tt_videos}
6 | \title{Get video metadata and video files from URLs}
7 | \usage{
8 | tt_videos_hidden(
9 | video_urls,
10 | save_video = TRUE,
11 | overwrite = FALSE,
12 | dir = ".",
13 | cache_dir = NULL,
14 | sleep_pool = 1:10,
15 | max_tries = 5L,
16 | cookiefile = NULL,
17 | verbose = TRUE,
18 | ...
19 | )
20 |
21 | tt_videos(...)
22 | }
23 | \arguments{
24 | \item{video_urls}{vector of URLs or IDs to TikTok videos.}
25 |
26 | \item{save_video}{logical. Should the videos be downloaded.}
27 |
28 | \item{overwrite}{logical. If save_video=TRUE and the file already exists,
29 | should it be overwritten?}
30 |
31 | \item{dir}{directory to save videos files to.}
32 |
33 | \item{cache_dir}{if set to a path, one RDS file with metadata will be written
34 | to disk for each video. This is useful if you have many videos and want to
35 | pick up where you left if something goes wrong.}
36 |
37 | \item{sleep_pool}{a vector of numbers from which a waiting period is randomly
38 | drawn.}
39 |
40 | \item{max_tries}{how often to retry if a request fails.}
41 |
42 | \item{cookiefile}{path to your cookiefile. Usually not needed after running
43 | \link{auth_hidden} once. See \code{vignette("unofficial-api", package =
44 | "traktok")} for more information on authentication.}
45 |
46 | \item{verbose}{should the function print status updates to the screen?}
47 |
48 | \item{...}{handed to \code{tt_videos_hidden} (for tt_videos) and (further) to
49 | \link{tt_request_hidden}.}
50 | }
51 | \value{
52 | a data.frame
53 | }
54 | \description{
55 | \ifelse{html}{\figure{api-unofficial}{options: alt='[Works on:
56 | Unofficial API]'}}{\strong{[Works on: Unofficial API]}}
57 | }
58 | \details{
59 | The function will wait between scraping two videos to make it less
60 | obvious that a scraper is accessing the site. The period is drawn randomly
61 | from the `sleep_pool` and multiplied by a random fraction.
62 |
63 | Note that the video file has to be requested in the same session as
64 | the metadata. So while the URL to the video file is included in the
65 | metadata, this link will not work in most cases.
66 | }
67 | \examples{
68 | \dontrun{
69 | tt_videos("https://www.tiktok.com/@tiktok/video/7106594312292453675")
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/tests/spelling.R:
--------------------------------------------------------------------------------
1 | if(requireNamespace("spelling", quietly = TRUE)) {
2 | spelling::spell_check_test(
3 | vignettes = TRUE,
4 | error = TRUE,
5 | skip_on_cran = TRUE
6 | )
7 | }
8 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | # This file is part of the standard setup for testthat.
2 | # It is recommended that you do not modify it.
3 | #
4 | # Where should you do additional test configuration?
5 | # Learn more about the roles of various files in:
6 | # * https://r-pkgs.org/tests.html
7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files
8 |
9 | library(testthat)
10 | library(traktok)
11 |
12 | test_check("traktok")
13 |
--------------------------------------------------------------------------------
/tests/testthat/example_query.json:
--------------------------------------------------------------------------------
1 | {
2 | "and": [
3 | {
4 | "field_name": "region_code",
5 | "operation": "IN",
6 | "field_values": ["JP", "US"]
7 | },
8 | {
9 | "field_name":"hashtag_name",
10 | "operation":"EQ",
11 | "field_values":["animal"]
12 | }
13 | ],
14 | "not": [
15 | {
16 | "field_name": "video_length",
17 | "operation": "EQ",
18 | "field_values": ["SHORT"]
19 | }
20 | ]
21 | }
22 |
--------------------------------------------------------------------------------
/tests/testthat/example_request.json:
--------------------------------------------------------------------------------
1 | {
2 | "query": {
3 | "and": [
4 | {
5 | "field_name": "region_code",
6 | "operation": "IN",
7 | "field_values": ["JP", "US"]
8 | },
9 | {
10 | "field_name":"hashtag_name",
11 | "operation":"EQ",
12 | "field_values":["animal"]
13 | }
14 | ],
15 | "not": [
16 | {
17 | "field_name": "video_length",
18 | "operation": "EQ",
19 | "field_values": ["SHORT"]
20 | }
21 | ]
22 | },
23 | "max_count": 100,
24 | "cursor": 0,
25 | "start_date": "20230101",
26 | "end_date": "20230115"
27 | }
28 |
--------------------------------------------------------------------------------
/tests/testthat/example_resp_comments.json:
--------------------------------------------------------------------------------
1 | {
2 | "data": {
3 | "comments": [
4 | {
5 | "text": "AWEEEEEE 🥰🥰🥰",
6 | "video_id": 1234563451201523412,
7 | "create_time": 1671491598,
8 | "id": 12345616934634134,
9 | "like_count": 50,
10 | "parent_comment_id": 1234561201524010,
11 | "reply_count": 10
12 | }
13 | ],
14 | "has_more": true,
15 | "cursor": 300
16 | },
17 | "error": {
18 | "code": "ok",
19 | "message": "",
20 | "log_id": "202207280326050102231031430C7E754E"
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/tests/testthat/example_resp_q_user.json:
--------------------------------------------------------------------------------
1 | {
2 | "data": {
3 | "bio_description": "my_bio",
4 | "is_verified": false,
5 | "likes_count": 27155089,
6 | "video_count": 44,
7 | "avatar_url": "https://some_cdn.com/my_avatar",
8 | "follower_count": 232,
9 | "following_count": 45,
10 | "display_name": "my nick name"
11 | },
12 | "error": {
13 | "code": "ok",
14 | "message": "",
15 | "log_id": "202207280326050102231031430C7E754E"
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/tests/testthat/example_resp_q_videos.json:
--------------------------------------------------------------------------------
1 | {
2 | "data": {
3 | "videos": [
4 | {
5 | "hashtag_names": [
6 | "avengers",
7 | "pov"
8 | ],
9 | "region_code": "CA",
10 | "create_time": 1633823999,
11 | "effect_ids": [
12 | "0"
13 | ],
14 | "video_id": 702874395068494965,
15 | "music_id": 703847506349838790,
16 | "video_description": "lol #pov #avengers",
17 | "view_count": 1050,
18 | "comment_count": 2
19 | },
20 | {
21 | "hashtag_names": [
22 | "avengers",
23 | "pov"
24 | ],
25 | "region_code": "CA",
26 | "create_time": 1633823999,
27 | "effect_ids": [
28 | "0"
29 | ],
30 | "video_id": 702874395068494965,
31 | "music_id": 703847506349838790,
32 | "video_description": "lol #pov #avengers",
33 | "view_count": 1050,
34 | "comment_count": 2
35 | }
36 | ],
37 | "cursor": 100,
38 | "search_id": "7201388525814961198",
39 | "has_more": true
40 | },
41 | "error": {
42 | "code": "ok",
43 | "message": "",
44 | "log_id": "20230113024658F0D7C5D6CA3A9B79C5B9"
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/tests/testthat/test-research_api.R:
--------------------------------------------------------------------------------
1 | test_that("query", {
2 | expect_equal({
3 | query() |>
4 | query_and(field_name = "region_code",
5 | operation = "IN",
6 | field_values = c("JP", "US")) |>
7 | query_and(field_name = "hashtag_name",
8 | operation = "EQ",
9 | field_values = "animal") |>
10 | query_not(operation = "EQ",
11 | field_name = "video_length",
12 | field_values = "SHORT") |>
13 | unclass()
14 | }, jsonlite::read_json("example_query.json"))
15 | })
16 |
17 | test_that("request", {
18 | mock_success <- function(req) {
19 | req <<- req # use this to test request below
20 | httr2::response(status_code = 200,
21 | headers = "Content-Type: application/json",
22 | body = charToRaw(
23 | paste0(
24 | readLines("example_resp_q_videos.json"), collapse = "")
25 | )
26 | )
27 | }
28 | # mock a query to check against example
29 | q <- query() |>
30 | query_and(field_name = "region_code",
31 | operation = "IN",
32 | field_values = c("JP", "US")) |>
33 | query_and(field_name = "hashtag_name",
34 | operation = "EQ",
35 | field_values = "animal") |>
36 | query_not(field_name = "video_length",
37 | operation = "EQ",
38 | field_values = "SHORT")
39 |
40 | httr2::with_mocked_responses(
41 | mock_success,
42 | tt_search_api(q,
43 | start_date = "20230101",
44 | end_date = "20230115",
45 | is_random = NULL,
46 | token = list(access_token = "test"))
47 | )
48 |
49 | ex <- jsonlite::read_json("example_request.json")
50 |
51 | expect_equal({
52 | sort(names(req$body$data))
53 | }, sort(names(ex)))
54 |
55 | expect_equal({
56 | req$body$data$query
57 | }, ex$query)
58 |
59 | expect_equal({
60 | req$body$data$start_date
61 | }, ex$start_date)
62 |
63 | expect_equal({
64 | req$body$data$end_date
65 | }, ex$end_date)
66 |
67 | expect_equal({
68 | df <- httr2::with_mocked_responses(
69 | mock_success,
70 | tt_search_api(q,
71 | start_date = "20230101",
72 | end_date = "20230115",
73 | is_random = NULL,
74 | max_pages = 20,
75 | verbose = FALSE,
76 | token = list(access_token = "test"))
77 | )
78 | nrow(df)
79 | }, 40)
80 |
81 | })
82 |
83 |
84 | test_that("parsing", {
85 | expect_equal({
86 | out <- jsonlite::read_json("example_resp_q_videos.json", bigint_as_char = TRUE) |>
87 | purrr::pluck("data", "videos") |>
88 | parse_api_search()
89 | c(out$video_id, nrow(out), ncol(out))
90 | }, c("702874395068494965", "702874395068494965", "2", "13"))
91 | # apparently, sometimes the video_id is just called id
92 | expect_equal({
93 | out <- list(list(id = "1"), list(video_id = "2")) |>
94 | parse_api_search()
95 | out$video_id
96 | }, c("1", "2"))
97 | expect_equal({
98 | out <- jsonlite::read_json("example_resp_q_user.json", bigint_as_char = TRUE) |>
99 | purrr::pluck("data") |>
100 | tibble::as_tibble()
101 | c(nrow(out), ncol(out))
102 | }, c(1, 8))
103 | expect_equal({
104 | out <- jsonlite::read_json("example_resp_comments.json", bigint_as_char = TRUE) |>
105 | purrr::pluck("data", "comments") |>
106 | parse_api_comments()
107 | c(out$video_id, nrow(out), ncol(out))
108 | }, c("1234563451201523412", "1", "7"))
109 | })
110 |
--------------------------------------------------------------------------------
/tests/testthat/test-research_auth.R:
--------------------------------------------------------------------------------
1 | test_that("authentication works", {
2 | mock_success <- function(req) {
3 | httr2::response(
4 | status_code = 200,
5 | headers = "Content-Type: application/json",
6 | body = charToRaw("{
7 | \"access_token\": \"clt.example12345Example12345Example\",
8 | \"expires_in\": 7200,
9 | \"token_type\": \"Bearer\"
10 | }"))
11 | }
12 |
13 | Sys.setenv("TIKTOK_TOKEN" = "test.rds")
14 |
15 | expect_equal(
16 | httr2::with_mocked_responses(
17 | mock_success,
18 | req_token(client_key = "test", client_secret = "test")
19 | ),
20 | list(access_token = "clt.example12345Example12345Example",
21 | expires_in = 7200L,
22 | token_type = "Bearer")
23 | )
24 |
25 | expect_equal(
26 | httr2::with_mocked_responses(
27 | mock_success,
28 | auth_research(client_key = "test", client_secret = "test")$token_type
29 | ),
30 | "Bearer"
31 | )
32 |
33 | expect_true(file.exists(file.path(tools::R_user_dir("traktok", "cache"), "test.rds")))
34 |
35 | expect_equal(get_token()$access_token, httr2::obfuscated("clt.example12345Example12345Example"))
36 |
37 | on.exit(file.remove(file.path(tools::R_user_dir("traktok", "cache"), "test.rds")))
38 | })
39 |
40 |
41 | test_that("auth error", {
42 | mock_error <- function(req) {
43 | httr2::response(
44 | status_code = 500,
45 | headers = "Content-Type: application/json",
46 | body = charToRaw("{
47 | \"error\": \"invalid_request\",
48 | \"error_description\": \"Client secret is missed in request.\",
49 | \"log_id\": \"202206221854370101130062072500FFA2\"
50 | }"))
51 | }
52 | expect_error({
53 | httr2::with_mocked_responses(
54 | mock_error,
55 | req_token(client_key = "test", client_secret = "test")
56 | )
57 | },
58 | "Request failed with"
59 | )
60 |
61 | })
62 |
--------------------------------------------------------------------------------
/tests/testthat/test-tt_utils.R:
--------------------------------------------------------------------------------
1 | # will be rewritten soon anyway
2 | # test_that("1. cookies as string options", {
3 | # options(tt_cookiefile = "tt_csrf_token=test;")
4 | # expect_equal(auth_hidden(save = FALSE), list(tt_csrf_token = "test;"))
5 | # unlink(list.files(tools::R_user_dir("traktok", "config"), full.names = TRUE))
6 | # })
7 | #
8 | # test_that("2. default cookie file", {
9 | # tmp <- tempfile()
10 | # options(tt_cookiefile = tmp)
11 | # writeLines("\t\t\t\t\ttt_csrf_token\ttest;", tmp)
12 | # expect_equal(auth_hidden(save = FALSE),
13 | # list(tt_csrf_token = "test;"))
14 | # })
15 | #
16 | # test_that("3. default directory", {
17 | # options(tt_cookiefile = NULL)
18 | # tmp <- file.path(tools::R_user_dir("traktok", "config"), "aaa")
19 | # writeLines("\t\t\t\t\ttt_csrf_token\ttest;", tmp)
20 | # expect_equal(auth_hidden(save = FALSE),
21 | # list(tt_csrf_token = "test;"))
22 | # unlink(list.files(tools::R_user_dir("traktok", "config"), full.names = TRUE))
23 | # })
24 | #
25 | # test_that("4. no/invalid cookies", {
26 | # options(tt_cookiefile = NULL)
27 | # expect_error(auth_hidden(save = FALSE),
28 | # "No cookies provided or found")
29 | # expect_error(auth_hidden(x = "test"),
30 | # "No cookies provided or found")
31 | # })
32 | #
33 | # test_that("5. invalid cookie string/file", {
34 | # expect_error(auth_hidden(x = "test=test;"),
35 | # " does not contain valid TikTok cookies")
36 | #
37 | # expect_error(auth_hidden(x = list()),
38 | # " does not contain valid TikTok cookies")
39 | #
40 | # tmp <- tempfile()
41 | # writeLines("\t\t\t\t\ttest\ttest;", tmp)
42 | # expect_error(auth_hidden(x = tmp),
43 | # " does not contain valid TikTok cookies")
44 | # })
45 | #
46 | #
47 | # test_that("vpluck", {
48 | # expect_equal(
49 | # vpluck(list(list(c("A", NA)), list(NULL)), 1, 1),
50 | # c("A", NA_character_)
51 | # )
52 | # expect_equal(
53 | # vpluck(list(list(c("A", NA)), list(NULL)), 1, 2),
54 | # c(NA_character_, NA_character_)
55 | # )
56 | # expect_equal(
57 | # vpluck(list(list(c(1L, NA)), list(NULL)), 1, 1, val = "integer"),
58 | # c(1L, NA_integer_)
59 | # )
60 | # expect_equal(
61 | # vpluck(list(list(c(TRUE, NA)), list(NULL)), 1, 1, val = "logical"),
62 | # c(TRUE, NA)
63 | # )
64 | # })
65 |
--------------------------------------------------------------------------------
/tests/testthat/test-tt_videos.R:
--------------------------------------------------------------------------------
1 | test_that("get meta and download", {
2 | skip("need to rewrite after refactor")
3 | options(tt_cookiefile = Sys.getenv("TT_COOKIES"))
4 | df <- tt_videos(video_urls = c("https://www.tiktok.com/@tiktok/video/6584647400055377158?is_copy_url=1&is_from_webapp=v1",
5 | "https://www.tiktok.com/@tiktok/video/6584647400055377158?is_copy_url=1&is_from_webapp=v1"),
6 | cache_dir = tempdir(),
7 | save_video = FALSE,
8 | dir = tempdir())
9 | expect_equal(nrow(df), 2L)
10 | expect_equal(ncol(df), 16L)
11 | # expect_equal(file.exists(df[["video_fn"]][1]), TRUE)
12 | expect_equal(file.exists(paste0(tempdir(), "/video_meta_6584647400055377158.rds")), TRUE)
13 | expect_lte(sum(is.na(df)), 2L)
14 | expect_warning(tt_videos("https://www.tiktok.com/"),
15 | "https://www.tiktok.com/ can't be reached.")
16 | expect_warning(tt_videos("https://www.tiktok.com/@test/video/6"),
17 | "html status 404, the row will contain NAs")
18 | })
19 |
20 |
21 | test_that("parse", {
22 | expect_warning(parse_video('{"test":1}', video_id = 1L),
23 | "No video data found")
24 | expect_equal(
25 | dim(parse_video('{"ItemModule":{"test":1}}', video_id = 1L)),
26 | c(1L, 18L)
27 | )
28 | expect_equal(
29 | dim(parse_video('{"__DEFAULT_SCOPE__":{"webapp.video-detail":{"itemInfo":{"itemStruct":{"test":1}}}}}', video_id = 1L)),
30 | c(1L, 22L)
31 | )
32 | })
33 |
--------------------------------------------------------------------------------
/traktok.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 |
--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 |
--------------------------------------------------------------------------------
/vignettes/cookies.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JBGruber/traktok/d97f4ea5fb4f728aba1f64f9f71f5740bd626442/vignettes/cookies.png
--------------------------------------------------------------------------------
/vignettes/function_overview.csv:
--------------------------------------------------------------------------------
1 | Description,Shorthand,Research API,Hidden API
2 | search videos,tt_search,tt_search_api,tt_search_hidden
3 | get video detail (+file),tt_videos,-,tt_videos_hidden
4 | get user infos,tt_user_info,tt_user_info_api,-
5 | get comments under a video,tt_comments,tt_comments_api,-
6 | get who follows a user,tt_get_follower,-,tt_get_follower_hidden
7 | get who a user is following,tt_get_following,-,tt_get_following_hidden
8 | get raw video data,-,-,tt_request_hidden
9 | authenticate a session,-,auth_research,auth_hidden
10 |
--------------------------------------------------------------------------------
/vignettes/make_vignettes.r:
--------------------------------------------------------------------------------
1 | knitr::knit("vignettes/unofficial-api.Rmd.orig", output = "vignettes/unofficial-api.Rmd")
2 | knitr::knit("vignettes/research-api.Rmd.orig", output = "vignettes/research-api.Rmd")
3 |
--------------------------------------------------------------------------------
/vignettes/research-api.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Research API"
3 | output: rmarkdown::html_vignette
4 | vignette: >
5 | %\VignetteIndexEntry{research-api}
6 | %\VignetteEngine{knitr::rmarkdown}
7 | %\VignetteEncoding{UTF-8}
8 | ---
9 |
10 |
11 |
12 | TikTok's [Research API](https://developers.tiktok.com/products/research-api/), which was made available to researchers in the US and Europe in 2023, offers three endpoints, which are wrapped in three `traktok` functions:
13 |
14 | 1. You can [search videos](https://developers.tiktok.com/doc/research-api-specs-query-videos) with `tt_search_api` or `tt_search`
15 | 2. You can [get basic user information](https://developers.tiktok.com/doc/research-api-specs-query-user-info) with `tt_user_info_api` or `tt_user_info`
16 | 3. You can [obtain all comments of a video](https://developers.tiktok.com/doc/research-api-specs-query-video-comments) with `tt_comments_api` or `tt_comments`
17 | 4. You can [get the videos a user has liked](https://developers.tiktok.com/doc/research-api-specs-query-user-liked-videos) with `tt_user_liked_videos_api` or `tt_get_liked`
18 | 5. You can [get the videos a user has reposted](https://developers.tiktok.com/doc/research-api-specs-query-user-reposted-videos) with `tt_user_reposted_api` or `tt_get_reposted`
19 | 6. You can [get the videos a user has pinned](https://developers.tiktok.com/doc/research-api-specs-query-user-pinned-videos) with `tt_user_pinned_videos_api` or `tt_get_pinned`
20 | 7. You can [get who a user is following](https://developers.tiktok.com/doc/research-api-specs-query-user-following) or [follows a user](https://developers.tiktok.com/doc/research-api-specs-query-user-following) with `tt_get_following`/`tt_get_follower` or `tt_user_following_api`/`tt_user_follower_api`
21 |
22 |
23 | # Authentication
24 |
25 | To get access to the Research API, you need to:
26 |
27 | 1. [be eligible](https://developers.tiktok.com/products/research-api);
28 | 2. [create a developer account](https://developers.tiktok.com/signup);
29 | 3. and then apply for access to the research API:
30 |
31 | Once you have been approved and have your client key and client secret, you can authenticate with:
32 |
33 |
34 | ``` r
35 | library(traktok)
36 | auth_research()
37 | ```
38 |
39 |
40 |
41 | It is recommended that you run this function only once without arguments, so that your key and secret can be entered through the pop up mask and do not remain unencrypted in your R history or a script.
42 | The function then runs through authentication for you and saves the resulting token encrypted on your hard drive.
43 | Just run it again in case your credentials change.
44 |
45 |
46 | # Usage
47 | ## Search Videos
48 |
49 | TikTok uses a fine-grained, yet complicated [query syntax](https://developers.tiktok.com/doc/research-api-specs-query-videos#query).
50 | For convenience, a query is constructed internally when you search with a key phrase directly:
51 |
52 |
53 | ``` r
54 | tt_query_videos("#rstats", max_pages = 2L)
55 | #>
56 | ℹ Making initial request
57 |
58 | ✔ Making initial request [774ms]
59 | #>
60 | ℹ Parsing data
61 |
62 | ✔ Parsing data [177ms]
63 | #> ── search id: NA ───────────────────────────────────────
64 | #> # A tibble: 0 × 13
65 | #> # ℹ 13 variables: video_id , author_name ,
66 | #> # view_count , comment_count ,
67 | #> # share_count , like_count ,
68 | #> # region_code , create_time ,
69 | #> # effect_ids , music_id ,
70 | #> # video_description , hashtag_names ,
71 | #> # voice_to_text
72 | ```
73 |
74 | This will match your keyword or phrase against keywords and hashtags and return up to 200 results (each page has 100 results and 2 pages are requested by default) from today and yesterday.
75 | Every whitespace is treated as an AND operator.
76 | To extend the data range, you can set a start and end (which can be a maximum of 30 days apart, but there is no limit how far you can go back):
77 |
78 |
79 | ``` r
80 | tt_query_videos("#rstats",
81 | max_pages = 2L,
82 | start_date = as.Date("2023-11-01"),
83 | end_date = as.Date("2023-11-29"))
84 | #>
85 | ℹ Making initial request
86 |
87 | ✔ Making initial request [2s]
88 | #>
89 | ℹ Parsing data
90 |
91 | ✔ Parsing data [63ms]
92 | #> ── search id: 7423432753447932974 ──────────────────────
93 | #> # A tibble: 19 × 13
94 | #> video_id author_name view_count comment_count
95 | #>
96 | #> 1 730689385329705… statistics… 909 4
97 | #> 2 730630774458222… learningca… 1104 11
98 | #> 3 730501447636800… picanumeros 4645 8
99 | #> 4 730297066790799… smooth.lea… 98717 17
100 | #> 5 730247037950160… statistics… 508 0
101 | #> 6 730097749816510… statistics… 27387 1
102 | #> 7 730093147605973… rigochando 2603 4
103 | #> 8 730092229522312… elartedeld… 765 0
104 | #> 9 729998705941704… statistics… 1110 1
105 | #> 10 729965751681473… rigochando 905 4
106 | #> 11 729934294487885… rigochando 555 0
107 | #> 12 729896668413454… rigochando 1312 1
108 | #> 13 729691148659145… biofreelan… 19758 7
109 | #> 14 729691148625178… biofreelan… 5763 1
110 | #> 15 729691147878174… biofreelan… 1019 3
111 | #> 16 729668885660947… mrpecners 657 2
112 | #> 17 729651863537426… l_a_kelly 514 5
113 | #> 18 729649864535081… mrpecners 373 0
114 | #> 19 729628884337898… casaresfel… 274 0
115 | #> # ℹ 9 more variables: share_count ,
116 | #> # like_count , region_code ,
117 | #> # create_time , effect_ids ,
118 | #> # music_id , video_description ,
119 | #> # hashtag_names , voice_to_text
120 | ```
121 |
122 | As said, the query syntax that TikTok uses is a little complicated, as you can use AND, OR and NOT boolean operators on a number of fields (`"create_date"`, `"username"`, `"region_code"`, `"video_id"`, `"hashtag_name"`, `"keyword"`, `"music_id"`, `"effect_id"`, and `"video_length"`):
123 |
124 | | Operator | Results are returned if... |
125 | | -------- | ------------------------------------------ |
126 | | AND | ...all specified conditions are met |
127 | | OR | ...any of the specified conditions are met |
128 | | NOT | ...the not conditions are not met |
129 |
130 | To make this easier to use, `traktok` uses a tidyverse style approach to building queries.
131 | For example, to get to the same query that matches #rstats against keywords and hashtags, you need to build the query like this:
132 |
133 |
134 | ``` r
135 | query() |> # start by using query()
136 | query_or(field_name = "hashtag_name", # add an OR condition on the hashtag field
137 | operation = "IN", # the value should be IN the list of hashtags
138 | field_values = "rstats") |> # the hashtag field does not accept the #-symbol
139 | query_or(field_name = "keyword", # add another OR condition
140 | operation = "IN",
141 | field_values = "#rstats")
142 | #> S3
143 | #> └─or:
144 | #> ├─
145 | #> │ ├─field_name: "hashtag_name"
146 | #> │ ├─operation: "IN"
147 | #> │ └─field_values:
148 | #> │ └─"rstats"
149 | #> └─
150 | #> ├─field_name: "keyword"
151 | #> ├─operation: "IN"
152 | #> └─field_values:
153 | #> └─"#rstats"
154 | ```
155 |
156 | If #rstats is found in either the hashtag or keywords of a video, that video is then returned.
157 | Besides checking for `EQ`ual, you can also use one of the other operations:
158 |
159 | | Operation | Results are returned if field_values are... |
160 | | --------- | ----------------------------------------------- |
161 | | EQ | equal to the value in the field |
162 | | IN | equal to a value in the field |
163 | | GT | greater than the value in the field |
164 | | GTE | greater than or equal to the value in the field |
165 | | LT | lower than the value in the field |
166 | | LTE | lower than or equal to the value in the field |
167 |
168 |
169 | This makes building queries relatively complex, but allows for fine-grained searches in the TikTok data:
170 |
171 |
172 | ``` r
173 | search_df <- query() |>
174 | query_and(field_name = "region_code",
175 | operation = "IN",
176 | field_values = c("JP", "US")) |>
177 | query_or(field_name = "hashtag_name",
178 | operation = "EQ", # rstats is the only hashtag
179 | field_values = "rstats") |>
180 | query_or(field_name = "keyword",
181 | operation = "IN", # rstats is one of the keywords
182 | field_values = "rstats") |>
183 | query_not(operation = "EQ",
184 | field_name = "video_length",
185 | field_values = "SHORT") |>
186 | tt_search_api(start_date = as.Date("2023-11-01"),
187 | end_date = as.Date("2023-11-29"))
188 | #>
189 | ℹ Making initial request
190 |
191 | ✔ Making initial request [1.1s]
192 | #>
193 | ℹ Parsing data
194 |
195 | ✔ Parsing data [59ms]
196 | search_df
197 | #> ── search id: 7423432753447965742 ──────────────────────
198 | #> # A tibble: 2 × 13
199 | #> video_id author_name view_count comment_count
200 | #>
201 | #> 1 7296688856609475… mrpecners 657 2
202 | #> 2 7296498645350812… mrpecners 373 0
203 | #> # ℹ 9 more variables: share_count ,
204 | #> # like_count , region_code ,
205 | #> # create_time , effect_ids ,
206 | #> # music_id , video_description ,
207 | #> # hashtag_names , voice_to_text
208 | ```
209 |
210 | This will return videos posted in the US or Japan, that have rstats as the only hashtag or as one of the keywords and have a length of `"MID"`, `"LONG"`, or `"EXTRA_LONG"`.^[
211 | See for possible values of each field.
212 | ]
213 |
214 | ## Get User Information
215 |
216 | There is not really much to getting basic user info, but this is how you can do it:
217 |
218 |
219 | ``` r
220 | tt_user_info_api(username = c("tiktok", "https://www.tiktok.com/@statisticsglobe"))
221 | #>
222 | ℹ Getting user tiktok
223 |
224 | ✔ Got user tiktok [508ms]
225 | #>
226 | ℹ Getting user statisticsglobe
227 |
228 | ✔ Got user statisticsglobe [518ms]
229 | #> # A tibble: 2 × 8
230 | #> is_verified likes_count video_count avatar_url
231 | #>
232 | #> 1 TRUE 330919903 1073 https://p16-pu-si…
233 | #> 2 FALSE 1660 92 https://p16-sign-…
234 | #> # ℹ 4 more variables: bio_description ,
235 | #> # display_name , follower_count ,
236 | #> # following_count
237 | ```
238 |
239 | If you wish to return the videos of a user, your can use the search again:
240 |
241 |
242 | ``` r
243 | query() |>
244 | query_and(field_name = "username",
245 | operation = "EQ",
246 | field_values = "statisticsglobe") |>
247 | tt_search_api(start_date = as.Date("2023-11-01"),
248 | end_date = as.Date("2023-11-29"))
249 | #>
250 | ℹ Making initial request
251 |
252 | ✔ Making initial request [872ms]
253 | #>
254 | ℹ Parsing data
255 |
256 | ✔ Parsing data [65ms]
257 | #> ── search id: 7423432753448064046 ──────────────────────
258 | #> # A tibble: 5 × 13
259 | #> video_id author_name view_count comment_count
260 | #>
261 | #> 1 7306893853297052… statistics… 909 4
262 | #> 2 7302470379501604… statistics… 508 0
263 | #> 3 7300977498165103… statistics… 27387 1
264 | #> 4 7299987059417042… statistics… 1110 1
265 | #> 5 7297389484524506… statistics… 538 2
266 | #> # ℹ 9 more variables: share_count ,
267 | #> # like_count , region_code ,
268 | #> # create_time , effect_ids ,
269 | #> # music_id , video_description ,
270 | #> # hashtag_names , voice_to_text
271 | ```
272 |
273 | You can also find the videos a user has pinned to the top of their page:
274 |
275 |
276 | ``` r
277 | tt_user_pinned_videos_api(c("tiktok", "https://www.tiktok.com/@smooth.learning.c"))
278 | #>
279 | ℹ Getting user tiktok
280 |
281 | ✖ Getting user tiktok [367ms]
282 | #>
283 | ℹ Getting user smooth.learning.c
284 |
285 | ✔ Got user smooth.learning.c [571ms]
286 | #> # A tibble: 1 × 14
287 | #> pinned_by_user create_time id is_stem_verified
288 | #>
289 | #> 1 smooth.learning.c 1690255097 725959… FALSE
290 | #> # ℹ 10 more variables: region_code ,
291 | #> # video_duration , view_count ,
292 | #> # video_description , comment_count ,
293 | #> # hashtag_names , like_count ,
294 | #> # music_id , share_count , username
295 | ```
296 |
297 |
298 | To find out what a user has liked, you can use:
299 |
300 |
301 | ``` r
302 | tt_get_liked("jbgruber")
303 | #>
304 | ℹ Getting user jbgruber
305 |
306 | ✔ Got user jbgruber [1.5s]
307 | #> # A tibble: 98 × 14
308 | #> id username create_time video_description
309 | #>
310 | #> 1 7355902326877… america… 1712679503 "Stitch with @Mr…
311 | #> 2 7268078476102… carterp… 1692231398 "Are you going t…
312 | #> 3 7419692903460… okbrune… 1727531892 "Die ganze Wahrh…
313 | #> 4 7405633113835… funny_s… 1724258332 "#fyp #fypシ #fu…
314 | #> 5 7398532172048… lib0160… 1722605019 "Me and ChatGPT …
315 | #> 6 7364763547038… vquasch… 1714742648 "Einige Medien u…
316 | #> 7 7346577913858… ct_3003 1710508473 "Diese Platine f…
317 | #> 8 7379856141972… lizthed… 1718256663 "Replying to @Ar…
318 | #> 9 7415189182865… felixba… 1726483284 "Es geht wieder …
319 | #> 10 7422673042553… grueneb… 1728225752 "Was Söder uns e…
320 | #> # ℹ 88 more rows
321 | #> # ℹ 10 more variables: region_code ,
322 | #> # video_duration , view_count ,
323 | #> # like_count , comment_count ,
324 | #> # share_count , music_id ,
325 | #> # hashtag_names , is_stem_verified ,
326 | #> # liked_by_user
327 | ```
328 |
329 | Note, that making likes public is an opt-in feature of TikTok and almost nobody has this enabled, so it will give you a lot of warning...
330 |
331 | What we can usually get is the information who a user follows:
332 |
333 |
334 | ``` r
335 | tt_user_following_api(username = "jbgruber")
336 | #>
337 | ℹ Getting user jbgruber
338 |
339 | ✔ Got user jbgruber [296ms]
340 | #> # A tibble: 19 × 3
341 | #> display_name username following_user
342 | #>
343 | #> 1 SohoBrody rudeboybrody jbgruber
344 | #> 2 Last Week Tonight lastweektonight… jbgruber
345 | #> 3 schlantologie schlantologie jbgruber
346 | #> 4 Alex Falcone alex_falcone jbgruber
347 | #> 5 dadNRG dadnrg jbgruber
348 | #> 6 Einfach Genial Tictok user22690086508… jbgruber
349 | #> 7 noir_concrete_studio noir_concrete_s… jbgruber
350 | #> 8 fatDumbledore fatdumbledore13… jbgruber
351 | #> 9 fragdenstaat.de fragdenstaat.de jbgruber
352 | #> 10 Erikadbka erikadbka jbgruber
353 | #> 11 BÜNDNIS 90/DIE GRÜNEN diegruenen jbgruber
354 | #> 12 lagedernationclips lagedernationcl… jbgruber
355 | #> 13 Alexandra Ils kitty.fantastico jbgruber
356 | #> 14 future infinitive ☸️ lizthedeveloper jbgruber
357 | #> 15 Tim Achtermeyer achtermeyer jbgruber
358 | #> 16 Jay Foreman jayforeman jbgruber
359 | #> 17 Cosmo whereiswanda jbgruber
360 | #> 18 Tim Walz timwalz jbgruber
361 | #> 19 Shahak Shapira shahakshapira jbgruber
362 | ```
363 |
364 | And who they are followed by:
365 |
366 |
367 | ``` r
368 | tt_user_follower_api("https://www.tiktok.com/@tiktok")
369 | #>
370 | ℹ Getting user tiktok
371 |
372 | ✔ Got user tiktok [442ms]
373 | #> # A tibble: 90 × 3
374 | #> username display_name following_user
375 | #>
376 | #> 1 galbruwt reeyyp tiktok
377 | #> 2 user5235623178011 👑কিং রানা 🥀 tiktok
378 | #> 3 rokyevay07 👑Rokye Vay👑 tiktok
379 | #> 4 babyylious08 babyylious08 tiktok
380 | #> 5 user8283823357 hd❤️🩹jaan❤️🩹hi❤️🩹❤️ tiktok
381 | #> 6 user45628309141722 سامي tiktok
382 | #> 7 nu.th085 Nâu Thị tiktok
383 | #> 8 halimeysll halimeysll tiktok
384 | #> 9 taru.tristiyanto Taru Tristiyanto tiktok
385 | #> 10 vng.lan.hng09 Vương Lan Hường tiktok
386 | #> # ℹ 80 more rows
387 | ```
388 |
389 |
390 | ## Obtain all Comments of a Video
391 |
392 | There is again, not much to talk about when it comes to the comments API.
393 | You need to supply a video ID, which you either have already:
394 |
395 |
396 | ``` r
397 | tt_comments_api(video_id = "7302470379501604128")
398 | #>
399 | ℹ Making initial request
400 |
401 | ✔ Making initial request [4.9s]
402 | #>
403 | ℹ Parsing data
404 |
405 | ✔ Parsing data [68ms]
406 | #> ── search id: ─────────────────────────────────────────
407 | #> # A tibble: 1 × 7
408 | #> create_time id like_count parent_comment_id
409 | #>
410 | #> 1 1700243424 730248974199… 0 7302470379501604…
411 | #> # ℹ 3 more variables: reply_count , text ,
412 | #> # video_id
413 | ```
414 |
415 | Or you got it from a search:
416 |
417 |
418 | ``` r
419 | tt_comments_api(video_id = search_df$video_id[1])
420 | #>
421 | ℹ Making initial request
422 |
423 | ✔ Making initial request [4.8s]
424 | #>
425 | ℹ Parsing data
426 |
427 | ✔ Parsing data [61ms]
428 | #> ── search id: ─────────────────────────────────────────
429 | #> # A tibble: 2 × 7
430 | #> create_time id like_count parent_comment_id
431 | #>
432 | #> 1 1698893206 729669068138… 1 7296688856609475…
433 | #> 2 1698893251 729669083429… 0 7296690681388204…
434 | #> # ℹ 3 more variables: reply_count , text ,
435 | #> # video_id
436 | ```
437 |
438 | Or you let the function extract if from a URL to a video:
439 |
440 |
441 | ``` r
442 | tt_comments_api(video_id = "https://www.tiktok.com/@nicksinghtech/video/7195762648716152107?q=%23rstats")
443 | #>
444 | ℹ Making initial request
445 |
446 | ✔ Making initial request [5.9s]
447 | #>
448 | ℹ Parsing data
449 |
450 | ✔ Parsing data [58ms]
451 | #> ── search id: ─────────────────────────────────────────
452 | #> # A tibble: 96 × 7
453 | #> text video_id create_time id like_count
454 | #>
455 | #> 1 You gotta know… 7195762… 1675394834 7195… 314
456 | #> 2 R is the goat … 7195762… 1675457114 7196… 232
457 | #> 3 Ppl who like E… 7195762… 1675458796 7196… 177
458 | #> 4 Fair but doesn… 7195762… 1675395061 7195… 166
459 | #> 5 babe RStudio i… 7195762… 1675624739 7196… 71
460 | #> 6 Excel is the b… 7195762… 1675465779 7196… 71
461 | #> 7 NOT THE SAS SL… 7195762… 1675494738 7196… 27
462 | #> 8 I won't take t… 7195762… 1675691471 7197… 17
463 | #> 9 No love for ST… 7195762… 1675656122 7196… 16
464 | #> 10 I use SAS 🫡 7195762… 1675440749 7195… 16
465 | #> # ℹ 86 more rows
466 | #> # ℹ 2 more variables: parent_comment_id ,
467 | #> # reply_count
468 | ```
469 |
470 | And that is essentially it.
471 | Note, that if you find the functionality of the Research API lacking, there is nothing that keeps you from using the unofficial API functions.
472 |
473 |
474 | # Dealing with rate limits and continuing old searches
475 |
476 | At the moment of writing this vignette, the TikTok rate limits the Research API as follows:
477 |
478 | > Currently, the daily limit is set at 1000 requests per day, allowing you to obtain up to 100,000 records per day across our APIs. (Video and Comments API can return 100 records per request). The daily quota gets reset at 12 AM UTC. [[Source](https://developers.tiktok.com/doc/research-api-faq?enter_method=left_navigation)]
479 |
480 | Depending on what you would like to do, this might not be enough for you.
481 | In this case, you can actually save a search and pick it back up after the reset.
482 | To facilitate this, search result objects contain two extra pieces of information in the attributes:
483 |
484 |
485 | ``` r
486 | search_df <- query() |>
487 | query_and(field_name = "region_code",
488 | operation = "IN",
489 | field_values = c("JP", "US")) |>
490 | tt_search_api(start_date = as.Date("2023-11-01"),
491 | end_date = as.Date("2023-11-29"),
492 | max_pages = 1)
493 | #>
494 | ℹ Making initial request
495 |
496 | ✔ Making initial request [2.4s]
497 | #>
498 | ℹ Parsing data
499 |
500 | ✔ Parsing data [71ms]
501 |
502 | attr(search_df, "search_id")
503 | #> [1] "7423432753448096814"
504 | attr(search_df, "cursor")
505 | #> [1] 100
506 | ```
507 |
508 | When you want to continue this search, whether because of rate limit or because you decided you want more results, you can do so by providing `search_id` and `cursor` to `tt_search_api`.
509 | If your search was cut short by the rate limit or another issue, you can retrieve the results already received with `search_df <- last_query()`.
510 | `search_df` will in both cases contain the relevant `search_id` and `cursor` in the attributes:
511 |
512 |
513 | ``` r
514 | search_df2 <- query() |>
515 | query_and(field_name = "region_code",
516 | operation = "IN",
517 | field_values = c("JP", "US")) |>
518 | tt_search_api(start_date = as.Date("2023-11-01"),
519 | end_date = as.Date("2023-11-29"),
520 |
521 | # this part is new
522 | start_cursor = attr(search_df, "cursor"),
523 | search_id = attr(search_df, "search_id"),
524 | ####
525 | max_pages = 1)
526 | #>
527 | ℹ Making initial request
528 |
529 | ✔ Making initial request [5.1s]
530 | #>
531 | ℹ Parsing data
532 |
533 | ✔ Parsing data [21ms]
534 | attr(search_df2, "search_id")
535 | #> [1] "7336340473470063662"
536 | attr(search_df2, "cursor")
537 | #> [1] 200
538 | ```
539 |
540 | Note that the cursor is not equal to how many videos you got before, as the API also counts videos that are "deleted/marked as private by users etc." [See `max_count` in [Query Videos](https://developers.tiktok.com/doc/research-api-specs-query-videos)].
541 |
542 |
--------------------------------------------------------------------------------
/vignettes/research-api.Rmd.orig:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Research API"
3 | output: rmarkdown::html_vignette
4 | vignette: >
5 | %\VignetteIndexEntry{research-api}
6 | %\VignetteEngine{knitr::rmarkdown}
7 | %\VignetteEncoding{UTF-8}
8 | ---
9 |
10 | ```{r, include = FALSE}
11 | knitr::opts_chunk$set(
12 | collapse = TRUE,
13 | comment = "#>",
14 | eval = TRUE
15 | )
16 | ```
17 |
18 | TikTok's [Research API](https://developers.tiktok.com/products/research-api/), which was made available to researchers in the US and Europe in 2023, offers three endpoints, which are wrapped in three `traktok` functions:
19 |
20 | 1. You can [search videos](https://developers.tiktok.com/doc/research-api-specs-query-videos) with `tt_search_api` or `tt_search`
21 | 2. You can [get basic user information](https://developers.tiktok.com/doc/research-api-specs-query-user-info) with `tt_user_info_api` or `tt_user_info`
22 | 3. You can [obtain all comments of a video](https://developers.tiktok.com/doc/research-api-specs-query-video-comments) with `tt_comments_api` or `tt_comments`
23 | 4. You can [get the videos a user has liked](https://developers.tiktok.com/doc/research-api-specs-query-user-liked-videos) with `tt_user_liked_videos_api` or `tt_get_liked`
24 | 5. You can [get the videos a user has reposted](https://developers.tiktok.com/doc/research-api-specs-query-user-reposted-videos) with `tt_user_reposted_api` or `tt_get_reposted`
25 | 6. You can [get the videos a user has pinned](https://developers.tiktok.com/doc/research-api-specs-query-user-pinned-videos) with `tt_user_pinned_videos_api` or `tt_get_pinned`
26 | 7. You can [get who a user is following](https://developers.tiktok.com/doc/research-api-specs-query-user-following) or [follows a user](https://developers.tiktok.com/doc/research-api-specs-query-user-following) with `tt_get_following`/`tt_get_follower` or `tt_user_following_api`/`tt_user_follower_api`
27 |
28 |
29 | # Authentication
30 |
31 | To get access to the Research API, you need to:
32 |
33 | 1. [be eligible](https://developers.tiktok.com/products/research-api);
34 | 2. [create a developer account](https://developers.tiktok.com/signup);
35 | 3. and then apply for access to the research API:
36 |
37 | Once you have been approved and have your client key and client secret, you can authenticate with:
38 |
39 | ```{r eval=FALSE}
40 | library(traktok)
41 | auth_research()
42 | ```
43 |
44 | ```{r echo=FALSE}
45 | library(traktok)
46 | ```
47 |
48 | It is recommended that you run this function only once without arguments, so that your key and secret can be entered through the pop up mask and do not remain unencrypted in your R history or a script.
49 | The function then runs through authentication for you and saves the resulting token encrypted on your hard drive.
50 | Just run it again in case your credentials change.
51 |
52 |
53 | # Usage
54 | ## Search Videos
55 |
56 | TikTok uses a fine-grained, yet complicated [query syntax](https://developers.tiktok.com/doc/research-api-specs-query-videos#query).
57 | For convenience, a query is constructed internally when you search with a key phrase directly:
58 |
59 | ```{r}
60 | tt_query_videos("#rstats", max_pages = 2L)
61 | ```
62 |
63 | This will match your keyword or phrase against keywords and hashtags and return up to 200 results (each page has 100 results and 2 pages are requested by default) from today and yesterday.
64 | Every whitespace is treated as an AND operator.
65 | To extend the data range, you can set a start and end (which can be a maximum of 30 days apart, but there is no limit how far you can go back):
66 |
67 | ```{r}
68 | tt_query_videos("#rstats",
69 | max_pages = 2L,
70 | start_date = as.Date("2023-11-01"),
71 | end_date = as.Date("2023-11-29"))
72 | ```
73 |
74 | As said, the query syntax that TikTok uses is a little complicated, as you can use AND, OR and NOT boolean operators on a number of fields (`"create_date"`, `"username"`, `"region_code"`, `"video_id"`, `"hashtag_name"`, `"keyword"`, `"music_id"`, `"effect_id"`, and `"video_length"`):
75 |
76 | | Operator | Results are returned if... |
77 | | -------- | ------------------------------------------ |
78 | | AND | ...all specified conditions are met |
79 | | OR | ...any of the specified conditions are met |
80 | | NOT | ...the not conditions are not met |
81 |
82 | To make this easier to use, `traktok` uses a tidyverse style approach to building queries.
83 | For example, to get to the same query that matches #rstats against keywords and hashtags, you need to build the query like this:
84 |
85 | ```{r}
86 | query() |> # start by using query()
87 | query_or(field_name = "hashtag_name", # add an OR condition on the hashtag field
88 | operation = "IN", # the value should be IN the list of hashtags
89 | field_values = "rstats") |> # the hashtag field does not accept the #-symbol
90 | query_or(field_name = "keyword", # add another OR condition
91 | operation = "IN",
92 | field_values = "#rstats")
93 | ```
94 |
95 | If #rstats is found in either the hashtag or keywords of a video, that video is then returned.
96 | Besides checking for `EQ`ual, you can also use one of the other operations:
97 |
98 | | Operation | Results are returned if field_values are... |
99 | | --------- | ----------------------------------------------- |
100 | | EQ | equal to the value in the field |
101 | | IN | equal to a value in the field |
102 | | GT | greater than the value in the field |
103 | | GTE | greater than or equal to the value in the field |
104 | | LT | lower than the value in the field |
105 | | LTE | lower than or equal to the value in the field |
106 |
107 |
108 | This makes building queries relatively complex, but allows for fine-grained searches in the TikTok data:
109 |
110 | ```{r}
111 | search_df <- query() |>
112 | query_and(field_name = "region_code",
113 | operation = "IN",
114 | field_values = c("JP", "US")) |>
115 | query_or(field_name = "hashtag_name",
116 | operation = "EQ", # rstats is the only hashtag
117 | field_values = "rstats") |>
118 | query_or(field_name = "keyword",
119 | operation = "IN", # rstats is one of the keywords
120 | field_values = "rstats") |>
121 | query_not(operation = "EQ",
122 | field_name = "video_length",
123 | field_values = "SHORT") |>
124 | tt_search_api(start_date = as.Date("2023-11-01"),
125 | end_date = as.Date("2023-11-29"))
126 | search_df
127 | ```
128 |
129 | This will return videos posted in the US or Japan, that have rstats as the only hashtag or as one of the keywords and have a length of `"MID"`, `"LONG"`, or `"EXTRA_LONG"`.^[
130 | See for possible values of each field.
131 | ]
132 |
133 | ## Get User Information
134 |
135 | There is not really much to getting basic user info, but this is how you can do it:
136 |
137 | ```{r}
138 | tt_user_info_api(username = c("tiktok", "https://www.tiktok.com/@statisticsglobe"))
139 | ```
140 |
141 | If you wish to return the videos of a user, your can use the search again:
142 |
143 | ```{r}
144 | query() |>
145 | query_and(field_name = "username",
146 | operation = "EQ",
147 | field_values = "statisticsglobe") |>
148 | tt_search_api(start_date = as.Date("2023-11-01"),
149 | end_date = as.Date("2023-11-29"))
150 | ```
151 |
152 | You can also find the videos a user has pinned to the top of their page:
153 |
154 | ```{r}
155 | tt_user_pinned_videos_api(c("tiktok", "https://www.tiktok.com/@smooth.learning.c"))
156 | ```
157 |
158 |
159 | To find out what a user has liked, you can use:
160 |
161 | ```{r}
162 | tt_get_liked("jbgruber")
163 | ```
164 |
165 | Note, that making likes public is an opt-in feature of TikTok and almost nobody has this enabled, so it will give you a lot of warning...
166 |
167 | What we can usually get is the information who a user follows:
168 |
169 | ```{r}
170 | tt_user_following_api(username = "jbgruber")
171 | ```
172 |
173 | And who they are followed by:
174 |
175 | ```{r}
176 | tt_user_follower_api("https://www.tiktok.com/@tiktok")
177 | ```
178 |
179 |
180 | ## Obtain all Comments of a Video
181 |
182 | There is again, not much to talk about when it comes to the comments API.
183 | You need to supply a video ID, which you either have already:
184 |
185 | ```{r}
186 | tt_comments_api(video_id = "7302470379501604128")
187 | ```
188 |
189 | Or you got it from a search:
190 |
191 | ```{r}
192 | tt_comments_api(video_id = search_df$video_id[1])
193 | ```
194 |
195 | Or you let the function extract if from a URL to a video:
196 |
197 | ```{r}
198 | tt_comments_api(video_id = "https://www.tiktok.com/@nicksinghtech/video/7195762648716152107?q=%23rstats")
199 | ```
200 |
201 | And that is essentially it.
202 | Note, that if you find the functionality of the Research API lacking, there is nothing that keeps you from using the unofficial API functions.
203 |
204 |
205 | # Dealing with rate limits and continuing old searches
206 |
207 | At the moment of writing this vignette, the TikTok rate limits the Research API as follows:
208 |
209 | > Currently, the daily limit is set at 1000 requests per day, allowing you to obtain up to 100,000 records per day across our APIs. (Video and Comments API can return 100 records per request). The daily quota gets reset at 12 AM UTC. [[Source](https://developers.tiktok.com/doc/research-api-faq?enter_method=left_navigation)]
210 |
211 | Depending on what you would like to do, this might not be enough for you.
212 | In this case, you can actually save a search and pick it back up after the reset.
213 | To facilitate this, search result objects contain two extra pieces of information in the attributes:
214 |
215 | ```{r}
216 | search_df <- query() |>
217 | query_and(field_name = "region_code",
218 | operation = "IN",
219 | field_values = c("JP", "US")) |>
220 | tt_search_api(start_date = as.Date("2023-11-01"),
221 | end_date = as.Date("2023-11-29"),
222 | max_pages = 1)
223 |
224 | attr(search_df, "search_id")
225 | attr(search_df, "cursor")
226 | ```
227 |
228 | When you want to continue this search, whether because of rate limit or because you decided you want more results, you can do so by providing `search_id` and `cursor` to `tt_search_api`.
229 | If your search was cut short by the rate limit or another issue, you can retrieve the results already received with `search_df <- last_query()`.
230 | `search_df` will in both cases contain the relevant `search_id` and `cursor` in the attributes:
231 |
232 | ```{r}
233 | search_df2 <- query() |>
234 | query_and(field_name = "region_code",
235 | operation = "IN",
236 | field_values = c("JP", "US")) |>
237 | tt_search_api(start_date = as.Date("2023-11-01"),
238 | end_date = as.Date("2023-11-29"),
239 |
240 | # this part is new
241 | start_cursor = attr(search_df, "cursor"),
242 | search_id = attr(search_df, "search_id"),
243 | ####
244 | max_pages = 1)
245 | attr(search_df2, "search_id")
246 | attr(search_df2, "cursor")
247 | ```
248 |
249 | Note that the cursor is not equal to how many videos you got before, as the API also counts videos that are "deleted/marked as private by users etc." [See `max_count` in [Query Videos](https://developers.tiktok.com/doc/research-api-specs-query-videos)].
250 |
251 |
--------------------------------------------------------------------------------
/vignettes/secuid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JBGruber/traktok/d97f4ea5fb4f728aba1f64f9f71f5740bd626442/vignettes/secuid.png
--------------------------------------------------------------------------------
/vignettes/source-code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JBGruber/traktok/d97f4ea5fb4f728aba1f64f9f71f5740bd626442/vignettes/source-code.png
--------------------------------------------------------------------------------
/vignettes/unofficial-api.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Unofficial API"
3 | output: rmarkdown::html_vignette
4 | vignette: >
5 | %\VignetteIndexEntry{unofficial-api}
6 | %\VignetteEngine{knitr::rmarkdown}
7 | %\VignetteEncoding{UTF-8}
8 | ---
9 |
10 |
11 |
12 | The unofficial or hidden API is essentially what the TikTok website uses to display you content.
13 | Partly based on [Deen Freelon's `Pyktok` Python module](https://github.com/dfreelon/pyktok), `traktok` contains functions to simulate a browser accessing some of these API endpoints.
14 | How these endpoints work was discovered through reverse engineering and TikTok might change how these endpoints operate at any moment.
15 | As of writing this (2023-11-28), there are functions that can:
16 |
17 | - search videos using a search term
18 | - get video details and the video files from a given video URL
19 | - get who follows a user
20 | - get who a user is following
21 |
22 | To use these functions, you have to log into first and then give `R` the cookies the browser uses to identify itself.
23 |
24 |
25 | # Authentication
26 |
27 | The easiest way to get the cookies needed for authentication is to export the necessary cookies from your browser using a browser extension (after logging in at TikTok.com at least once).
28 | I can recommend ["Get cookies.txt"](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) for Chromium based browsers or ["cookies.txt"](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) for Firefox (note that almost all browsers used today are based on one of these).
29 |
30 |
31 |
32 | Save the cookies.txt file, which will look something like this:
33 |
34 | ```
35 | # Netscape HTTP Cookie File
36 | # https://curl.haxx.se/rfc/cookie_spec.html
37 | # This is a generated file! Do not edit.
38 |
39 | .tiktok.com TRUE / TRUE 1728810805 cookie-consent {%22ga%22:true%2C%22af%...
40 | .tiktok.com TRUE / TRUE 1700471788 passport_csrf_token e07d3487c11ce5258a3...
41 | .tiktok.com TRUE / FALSE 1700471788 passport_csrf_token_default e07d3487c11...
42 | #HttpOnly_.tiktok.com TRUE / TRUE 1700493610 multi_sids 71573310862246389...
43 | #HttpOnly_.tiktok.com TRUE / TRUE 1700493610 cmpl_token AgQQAPORF-RO0rNtH...
44 | ...
45 | ```
46 |
47 | **It does not matter if you download all cookies or just the ones specific to TikTok**, as we use the `cookiemonster` package to deal with that.
48 | To read the cookies into a specific encrypted file, simply use:
49 |
50 |
51 | ```r
52 | cookiemonster::add_cookies("tiktok.com_cookies.txt")
53 | ```
54 |
55 | And that's it!
56 | `traktok` will access these cookies whenever necessary.
57 |
58 |
59 | # Usage
60 | ## Search videos
61 |
62 | To search for videos, you can use either `tt_search` or `tt_search_hidden`, which do the same, as long as you do not have a token for the Research API.
63 | To get the first two pages of search results (one page has 12 videos), you can use this command:
64 |
65 |
66 | ```r
67 | rstats_df <- tt_search_hidden("#rstats", max_pages = 2)
68 | #>
69 | ℹ Getting page 1
70 | ⏲ waiting 0.5 seconds
71 | ℹ Getting page 1
72 | ✔ Got page 1. Found 12 videos. [1.9s]
73 | #>
74 | ℹ Getting page 2
75 | ✔ Got page 2. Found 12 videos. [690ms]
76 | rstats_df
77 | #> # A tibble: 24 × 20
78 | #> video_id video_timestamp video_url video_length video_title
79 | #>
80 | #> 1 71151144… 2022-06-30 19:17:53 https://… 135 "R for Beg…
81 | #> 2 72522261… 2023-07-05 07:01:45 https://… 36 "Wow!!! TH…
82 | #> 3 72420686… 2023-06-07 22:05:16 https://… 34 "R GRAPHIC…
83 | #> 4 72134135… 2023-03-22 16:49:12 https://… 6 "R and me …
84 | #> 5 72576898… 2023-07-20 00:23:40 https://… 56 "Pie chart…
85 | #> 6 72999870… 2023-11-10 23:58:21 https://… 51 "Quick R Q…
86 | #> 7 72783048… 2023-09-13 13:40:21 https://… 36 "Quick R Q…
87 | #> 8 73029706… 2023-11-19 00:56:09 https://… 163 "What is c…
88 | #> 9 71670108… 2022-11-17 15:42:56 https://… 58 "Here’s an…
89 | #> 10 72933174… 2023-10-24 00:36:48 https://… 9 "#CapCut #…
90 | #> # ℹ 14 more rows
91 | #> # ℹ 15 more variables: video_diggcount ,
92 | #> # video_sharecount , video_commentcount ,
93 | #> # video_playcount , video_is_ad , author_name ,
94 | #> # author_nickname , author_followercount ,
95 | #> # author_followingcount , author_heartcount ,
96 | #> # author_videocount , author_diggcount , …
97 | ```
98 |
99 | This already gives you pretty much all information you could want about the videos that were found.
100 |
101 | ## Get metadata and download videos
102 |
103 | However, you can obtain some more information, and importantly the video file, using `tt_videos`:
104 |
105 |
106 | ```r
107 | rstats_df2 <- tt_videos(rstats_df$video_url[1:2], save_video = TRUE)
108 | #>
109 | ℹ Getting video 7115114419314560298
110 | ⏲ waiting 0.2 seconds
111 | ℹ Getting video 7115114419314560298
112 | ✔ Got video 7115114419314560298 (1/2). File size: 2.5 Mb. [2.5s]
113 | #>
114 | ℹ Getting video 7252226153828584731
115 | ✔ Got video 7252226153828584731 (2/2). File size: 1.7 Mb. [999ms]
116 | rstats_df2
117 | #> # A tibble: 2 × 19
118 | #> video_id video_url video_timestamp video_length video_title
119 | #>
120 | #> 1 711511441… https://… 2022-06-30 19:17:53 135 R for Begi…
121 | #> 2 725222615… https://… 2023-07-05 07:01:45 36 Wow!!! THI…
122 | #> # ℹ 14 more variables: video_locationcreated ,
123 | #> # video_diggcount , video_sharecount ,
124 | #> # video_commentcount , video_playcount ,
125 | #> # author_username , author_nickname ,
126 | #> # author_bio , download_url , html_status ,
127 | #> # music , challenges , is_classified ,
128 | #> # video_fn
129 | ```
130 |
131 | Per default, the function waits between one and ten seconds (chosen at random) between making two calls, to not make it too obvious that data is scraped from TikTok.
132 | You can speed up the process (at your own risk), by changing the `sleep_pool` argument, which controls the minimum and maximum number of seconds to wait:
133 |
134 |
135 | ```r
136 | rstats_df3 <- tt_videos(rstats_df$video_url[3:4], save_video = TRUE, sleep_pool = 0.1)
137 | #>
138 | ℹ Getting video 7242068680484408581
139 | ⏲ waiting 0.1 seconds
140 | ℹ Getting video 7242068680484408581
141 | ✔ Got video 7242068680484408581 (1/2). File size: 1.8 Mb. [2.6s]
142 | #>
143 | ℹ Getting video 7213413598998056234
144 | ✔ Got video 7213413598998056234 (2/2). File size: 598.1 Kb. [1.7s]
145 | rstats_df3
146 | #> # A tibble: 2 × 19
147 | #> video_id video_url video_timestamp video_length video_title
148 | #>
149 | #> 1 724206868… https://… 2023-06-07 22:05:16 34 "R GRAPHIC…
150 | #> 2 721341359… https://… 2023-03-22 16:49:12 6 "R and me …
151 | #> # ℹ 14 more variables: video_locationcreated ,
152 | #> # video_diggcount , video_sharecount ,
153 | #> # video_commentcount , video_playcount ,
154 | #> # author_username , author_nickname ,
155 | #> # author_bio , download_url , html_status ,
156 | #> # music , challenges , is_classified ,
157 | #> # video_fn
158 | ```
159 |
160 | When you are scraping a lot of URLs, the function might fail eventually, due to a poor connection or because TikTok is blocking your requests.
161 | It therefore usually makes sense to save your progress in a cache directory:
162 |
163 |
164 | ```r
165 | rstats_df3 <- tt_videos(rstats_df$video_url[5:6], cache_dir = "rstats")
166 | #>
167 | ℹ Getting video 7257689890245201153
168 | ⏲ waiting 1.7 seconds
169 | ℹ Getting video 7257689890245201153
170 | ✔ Got video 7257689890245201153 (1/2). File size: 1.7 Mb. [2.6s]
171 | #>
172 | ℹ Getting video 7299987059417042209
173 | ✔ Got video 7299987059417042209 (2/2). File size: 1.2 Mb. [1.8s]
174 | list.files("rstats")
175 | #> [1] "7257689890245201153.json" "7299987059417042209.json"
176 | ```
177 |
178 | Note that the video files are downloaded into the `dir` directory (your working directory by default), independently from your cache directory.
179 |
180 | If there are information that you feel are missing from the `data.frame` `tt_videos` returns, you can look at the raw, unparsed json data using:
181 |
182 |
183 | ```r
184 | rstats_list1 <- tt_request_hidden(rstats_df$video_url[1]) |>
185 | jsonlite::fromJSON()
186 | ```
187 |
188 | Parsing the result into a list using `fromJSON`, results in a rather complex nested list.
189 | You can look through this and see for yourself if the data you are interested in is there
190 |
191 | ## Get followers and who a user is following
192 |
193 | Getting followers and who a user is following is (at the moment?) a little tricky to use, since TikTok blocks requests to a users profile page with anti-scraping measures.
194 | To circumvent that, you can open a users page in your browser and then right-click to show the source code:^[
195 | You can of course also use tools like [Playwright](https://playwright.dev/), [Selenium](https://www.selenium.dev/), or [](https://pptr.dev/) to automate this process.
196 | But that is beyond the scope of this vignette and package.
197 | ]
198 |
199 |
200 | 
201 |
202 | You can then search for and copy the `authorSecId` value:
203 |
204 | 
205 |
206 | Once you have this `authorSecId` you can look up a maximum of 5,000 followers per account:
207 |
208 |
209 | ```r
210 | tt_get_follower(secuid = "MS4wLjABAAAAwiH32UMb5RenqEN7duyfLIeGQgSIx9WtgtOILt55q6ueUXgz4gHqZC5HFx4nabPi",
211 | verbose = FALSE)
212 | #>
213 | #> # A tibble: 1,116 × 27
214 | #> avatarLarger avatarMedium avatarThumb commentSetting
215 | #>
216 | #> 1 https://p16-sign-sg.tik… https://p16… https://p1… 0
217 | #> 2 https://p16-sign-va.tik… https://p16… https://p1… 0
218 | #> 3 https://p16-sign-va.tik… https://p16… https://p1… 0
219 | #> 4 https://p16-sign-va.tik… https://p16… https://p1… 0
220 | #> 5 https://p16-sign-va.tik… https://p16… https://p1… 0
221 | #> 6 https://p16-sign-va.tik… https://p16… https://p1… 0
222 | #> 7 https://p16-sign-va.tik… https://p16… https://p1… 0
223 | #> 8 https://p16-sign-va.tik… https://p16… https://p1… 0
224 | #> 9 https://p16-sign-va.tik… https://p16… https://p1… 0
225 | #> 10 https://p16-sign-va.tik… https://p16… https://p1… 0
226 | #> # ℹ 1,106 more rows
227 | #> # ℹ 23 more variables: downloadSetting , duetSetting ,
228 | #> # ftc , id , isADVirtual , nickname ,
229 | #> # openFavorite , privateAccount , relation ,
230 | #> # secUid , secret , signature ,
231 | #> # stitchSetting , ttSeller , uniqueId ,
232 | #> # verified , diggCount , followerCount , …
233 | ```
234 |
235 | Likewise, you can also check who this account follows:
236 |
237 |
238 | ```r
239 | tt_get_following(secuid = "MS4wLjABAAAAwiH32UMb5RenqEN7duyfLIeGQgSIx9WtgtOILt55q6ueUXgz4gHqZC5HFx4nabPi",
240 | verbose = FALSE)
241 | #>
242 | #> # A tibble: 489 × 28
243 | #> avatarLarger avatarMedium avatarThumb commentSetting
244 | #>
245 | #> 1 https://p16-sign-va.tik… https://p16… https://p1… 0
246 | #> 2 https://p16-sign-va.tik… https://p16… https://p1… 0
247 | #> 3 https://p16-sign-va.tik… https://p16… https://p1… 0
248 | #> 4 https://p16-sign-va.tik… https://p16… https://p1… 0
249 | #> 5 https://p16-sign-va.tik… https://p16… https://p1… 0
250 | #> 6 https://p16-sign-va.tik… https://p16… https://p1… 0
251 | #> 7 https://p16-sign-va.tik… https://p16… https://p1… 0
252 | #> 8 https://p16-sign-va.tik… https://p16… https://p1… 0
253 | #> 9 https://p16-sign-va.tik… https://p16… https://p1… 0
254 | #> 10 https://p16-sign-va.tik… https://p16… https://p1… 0
255 | #> # ℹ 479 more rows
256 | #> # ℹ 24 more variables: downloadSetting , duetSetting ,
257 | #> # ftc , id , isADVirtual , nickname ,
258 | #> # openFavorite , privateAccount , relation ,
259 | #> # secUid , secret , signature ,
260 | #> # stitchSetting , ttSeller , uniqueId ,
261 | #> # verified , diggCount , followerCount , …
262 | ```
263 |
264 |
265 |
266 | ```r
267 | list.files(pattern = ".mp4") |>
268 | unlink()
269 | unlink("rstats", recursive = TRUE)
270 | ```
271 |
272 |
--------------------------------------------------------------------------------
/vignettes/unofficial-api.Rmd.orig:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Unofficial API"
3 | output: rmarkdown::html_vignette
4 | vignette: >
5 | %\VignetteIndexEntry{unofficial-api}
6 | %\VignetteEngine{knitr::rmarkdown}
7 | %\VignetteEncoding{UTF-8}
8 | ---
9 |
10 | ```{r, include = FALSE}
11 | knitr::opts_chunk$set(
12 | collapse = TRUE,
13 | comment = "#>",
14 | eval = TRUE
15 | )
16 | ```
17 |
18 | The unofficial or hidden API is essentially what the TikTok website uses to display you content.
19 | Partly based on [Deen Freelon's `Pyktok` Python module](https://github.com/dfreelon/pyktok), `traktok` contains functions to simulate a browser accessing some of these API endpoints.
20 | How these endpoints work was discovered through reverse engineering and TikTok might change how these endpoints operate at any moment.
21 | As of writing this (2023-11-28), there are functions that can:
22 |
23 | - search videos using a search term
24 | - get video details and the video files from a given video URL
25 | - get who follows a user
26 | - get who a user is following
27 |
28 | To use these functions, you have to log into first and then give `R` the cookies the browser uses to identify itself.
29 |
30 |
31 | # Authentication
32 |
33 | The easiest way to get the cookies needed for authentication is to export the necessary cookies from your browser using a browser extension (after logging in at TikTok.com at least once).
34 | I can recommend ["Get cookies.txt"](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) for Chromium based browsers or ["cookies.txt"](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) for Firefox (note that almost all browsers used today are based on one of these).
35 |
36 | `r knitr::include_graphics("cookies.png")`
37 |
38 | Save the cookies.txt file, which will look something like this:
39 |
40 | ```
41 | # Netscape HTTP Cookie File
42 | # https://curl.haxx.se/rfc/cookie_spec.html
43 | # This is a generated file! Do not edit.
44 |
45 | .tiktok.com TRUE / TRUE 1728810805 cookie-consent {%22ga%22:true%2C%22af%...
46 | .tiktok.com TRUE / TRUE 1700471788 passport_csrf_token e07d3487c11ce5258a3...
47 | .tiktok.com TRUE / FALSE 1700471788 passport_csrf_token_default e07d3487c11...
48 | #HttpOnly_.tiktok.com TRUE / TRUE 1700493610 multi_sids 71573310862246389...
49 | #HttpOnly_.tiktok.com TRUE / TRUE 1700493610 cmpl_token AgQQAPORF-RO0rNtH...
50 | ...
51 | ```
52 |
53 | **It does not matter if you download all cookies or just the ones specific to TikTok**, as we use the `cookiemonster` package to deal with that.
54 | To read the cookies into a specific encrypted file, simply use:
55 |
56 | ```{r eval=FALSE}
57 | cookiemonster::add_cookies("tiktok.com_cookies.txt")
58 | ```
59 |
60 | And that's it!
61 | `traktok` will access these cookies whenever necessary.
62 |
63 |
64 | # Usage
65 | ## Search videos
66 |
67 | To search for videos, you can use either `tt_search` or `tt_search_hidden`, which do the same, as long as you do not have a token for the Research API.
68 | To get the first two pages of search results (one page has 12 videos), you can use this command:
69 |
70 | ```{r}
71 | rstats_df <- tt_search_hidden("#rstats", max_pages = 2)
72 | rstats_df
73 | ```
74 |
75 | This already gives you pretty much all information you could want about the videos that were found.
76 |
77 | ## Get metadata and download videos
78 |
79 | However, you can obtain some more information, and importantly the video file, using `tt_videos`:
80 |
81 | ```{r}
82 | rstats_df2 <- tt_videos(rstats_df$video_url[1:2], save_video = TRUE)
83 | rstats_df2
84 | ```
85 |
86 | Per default, the function waits between one and ten seconds (chosen at random) between making two calls, to not make it too obvious that data is scraped from TikTok.
87 | You can speed up the process (at your own risk), by changing the `sleep_pool` argument, which controls the minimum and maximum number of seconds to wait:
88 |
89 | ```{r}
90 | rstats_df3 <- tt_videos(rstats_df$video_url[3:4], save_video = TRUE, sleep_pool = 0.1)
91 | rstats_df3
92 | ```
93 |
94 | When you are scraping a lot of URLs, the function might fail eventually, due to a poor connection or because TikTok is blocking your requests.
95 | It therefore usually makes sense to save your progress in a cache directory:
96 |
97 | ```{r}
98 | rstats_df3 <- tt_videos(rstats_df$video_url[5:6], cache_dir = "rstats")
99 | list.files("rstats")
100 | ```
101 |
102 | Note that the video files are downloaded into the `dir` directory (your working directory by default), independently from your cache directory.
103 |
104 | If there are information that you feel are missing from the `data.frame` `tt_videos` returns, you can look at the raw, unparsed json data using:
105 |
106 | ```{r eval=FALSE}
107 | rstats_list1 <- tt_request_hidden(rstats_df$video_url[1]) |>
108 | jsonlite::fromJSON()
109 | ```
110 |
111 | Parsing the result into a list using `fromJSON`, results in a rather complex nested list.
112 | You can look through this and see for yourself if the data you are interested in is there
113 |
114 | ## Get followers and who a user is following
115 |
116 | Getting followers and who a user is following is (at the moment?) a little tricky to use, since TikTok blocks requests to a users profile page with anti-scraping measures.
117 | To circumvent that, you can open a users page in your browser and then right-click to show the source code:^[
118 | You can of course also use tools like [Playwright](https://playwright.dev/), [Selenium](https://www.selenium.dev/), or [](https://pptr.dev/) to automate this process.
119 | But that is beyond the scope of this vignette and package.
120 | ]
121 |
122 |
123 | 
124 |
125 | You can then search for and copy the `authorSecId` value:
126 |
127 | 
128 |
129 | Once you have this `authorSecId` you can look up a maximum of 5,000 followers per account:
130 |
131 | ```{r}
132 | tt_get_follower(secuid = "MS4wLjABAAAAwiH32UMb5RenqEN7duyfLIeGQgSIx9WtgtOILt55q6ueUXgz4gHqZC5HFx4nabPi",
133 | verbose = FALSE)
134 | ```
135 |
136 | Likewise, you can also check who this account follows:
137 |
138 | ```{r}
139 | tt_get_following(secuid = "MS4wLjABAAAAwiH32UMb5RenqEN7duyfLIeGQgSIx9WtgtOILt55q6ueUXgz4gHqZC5HFx4nabPi",
140 | verbose = FALSE)
141 | ```
142 |
143 |
144 | ```{r}
145 | list.files(pattern = ".mp4") |>
146 | unlink()
147 | unlink("rstats", recursive = TRUE)
148 | ```
149 |
150 |
--------------------------------------------------------------------------------