├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── R
├── dbplyr.R
├── s3_register.R
├── tidyduck.R
└── zzz.R
├── README.md
├── _pkgdown.yml
├── dependencies.R
├── man
├── backend-duckdb.Rd
└── tidyduck-package.Rd
├── tests
├── testthat.R
└── testthat
│ ├── _snaps
│ └── dbplyr.md
│ ├── data
│ ├── binary_string.parquet
│ └── userdata1.parquet
│ ├── test_dbplyr.R
│ └── test_tbl__duckdb_connection.R
└── tidyduck.Rproj
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | .*\.sh
2 | .*\.tar.gz
3 | .*\.tgz
4 | .*\.zip
5 | git
6 | configure
7 | dependencies.R
8 | src/Makevars.in
9 | ^.*\.Rproj$
10 | ^\.Rproj\.user$
11 | ^build_win$
12 | ^tic\.R$
13 | ^_pkgdown\.yml$
14 | ^docs$
15 | deploy-*
16 | CMakeLists.txt
17 | NEWS.md
18 | tests/regression
19 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | src/duckdb/
2 | *.tar.gz
3 | *.tgz
4 | src/Makevars
5 | src/Makevars.win
6 | # Created by https://www.toptal.com/developers/gitignore/api/r
7 | # Edit at https://www.toptal.com/developers/gitignore?templates=r
8 | ### R ###
9 | # History files
10 | .Rhistory
11 | .Rapp.history
12 | # Session Data files
13 | .RData
14 | # User-specific files
15 | .Ruserdata
16 | # Example code in package build process
17 | *-Ex.R
18 | # Output files from R CMD build
19 | /*.tar.gz
20 | # Output files from R CMD check
21 | /*.Rcheck/
22 | # RStudio files
23 | .Rproj.user/
24 | # produced vignettes
25 | vignettes/*.html
26 | vignettes/*.pdf
27 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
28 | .httr-oauth
29 | # knitr and R markdown default cache directories
30 | *_cache/
31 | /cache/
32 | # Temporary files created by R markdown
33 | *.utf8.md
34 | *.knit.md
35 | # R Environment Variables
36 | .Renviron
37 | ### R.Bookdown Stack ###
38 | # R package: bookdown caching files
39 | /*_files/
40 | # End of https://www.toptal.com/developers/gitignore/api/r
41 | docs/
42 | # Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode
43 | # Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode
44 | ### VisualStudioCode ###
45 | .vscode/*
46 | !.vscode/settings.json
47 | !.vscode/tasks.json
48 | !.vscode/launch.json
49 | !.vscode/extensions.json
50 | *.code-workspace
51 | ### VisualStudioCode Patch ###
52 | # Ignore all local history of files
53 | .history
54 | # End of https://www.toptal.com/developers/gitignore/api/visualstudiocode
55 | # Created by https://www.toptal.com/developers/gitignore/api/macos
56 | # Edit at https://www.toptal.com/developers/gitignore?templates=macos
57 | ### macOS ###
58 | # General
59 | .DS_Store
60 | .AppleDouble
61 | .LSOverride
62 | # Icon must end with two \r
63 | Icon
64 | # Thumbnails
65 | ._*
66 | # Files that might appear in the root of a volume
67 | .DocumentRevisions-V100
68 | .fseventsd
69 | .Spotlight-V100
70 | .TemporaryItems
71 | .Trashes
72 | .VolumeIcon.icns
73 | .com.apple.timemachine.donotpresent
74 | # Directories potentially created on remote AFP share
75 | .AppleDB
76 | .AppleDesktop
77 | Network Trash Folder
78 | Temporary Items
79 | .apdisk
80 | # End of https://www.toptal.com/developers/gitignore/api/macos
81 | # Created by https://www.toptal.com/developers/gitignore/api/windows
82 | # Edit at https://www.toptal.com/developers/gitignore?templates=windows
83 | ### Windows ###
84 | # Windows thumbnail cache files
85 | Thumbs.db
86 | Thumbs.db:encryptable
87 | ehthumbs.db
88 | ehthumbs_vista.db
89 | # Dump file
90 | *.stackdump
91 | # Folder config file
92 | [Dd]esktop.ini
93 | # Recycle Bin used on file shares
94 | $RECYCLE.BIN/
95 | # Windows Installer files
96 | *.cab
97 | *.msi
98 | *.msix
99 | *.msm
100 | *.msp
101 | # Windows shortcuts
102 | *.lnk
103 | # End of https://www.toptal.com/developers/gitignore/api/windows
104 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: tidyduck
2 | Title: High-level bindings for the DuckDB Database Management System
3 | Version: 0.5.0
4 | Authors@R:
5 | c(person(given = "Kirill",
6 | family = "M\u00fcller",
7 | role = c("aut", "cre"),
8 | email = "krlmlr+r@mailbox.org",
9 | comment = c(ORCID = "0000-0002-1416-3412")),
10 | person("Stichting DuckDB Foundation", role = "cph"))
11 | Description: The DuckDB project is an embedded analytical data
12 | management system with support for the Structured Query Language (SQL).
13 | This package includes optional bindings to DuckDB for the tidyverse
14 | and other ecosystems.
15 | License: MIT + file LICENSE
16 | URL: https://duckdb.org/, https://github.com/duckdb/tidyduck
17 | BugReports: https://github.com/duckdb/tidyduck/issues
18 | Depends:
19 | R (>= 3.6.0)
20 | Imports:
21 | arrow,
22 | bit64,
23 | callr,
24 | dbplyr,
25 | DBI,
26 | dplyr,
27 | duckdb (>= 0.5.0),
28 | methods,
29 | tibble,
30 | utils,
31 | vctrs,
32 | withr
33 | Suggests:
34 | testthat
35 | Encoding: UTF-8
36 | Roxygen: list(markdown = TRUE)
37 | RoxygenNote: 7.2.1
38 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2018
2 | COPYRIGHT HOLDER: Stichting DuckDB Foundation
3 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(simulate_duckdb)
4 | export(translate_duckdb)
5 | import(dbplyr, except = c(sql, ident))
6 | import(dplyr)
7 |
--------------------------------------------------------------------------------
/R/dbplyr.R:
--------------------------------------------------------------------------------
1 | #' DuckDB SQL backend for dbplyr
2 | #'
3 | #' @description
4 | #' This is a SQL backend for dbplyr tailored to take into account DuckDB's
5 | #' possibilities. This mainly follows the backend for PostgreSQL, but
6 | #' contains more mapped functions.
7 | #'
8 | #' @name backend-duckdb
9 | #' @aliases NULL
10 | #' @examples
11 | #' library(dplyr, warn.conflicts = FALSE)
12 | #' con <- DBI::dbConnect(duckdb::duckdb(), path = ":memory:")
13 | #'
14 | #' dbiris <- copy_to(con, iris, overwrite = TRUE)
15 | #'
16 | #' dbiris %>% select(Petal.Length, Petal.Width) %>% filter(Petal.Length > 1.5) %>% head(5)
17 | #'
18 | #' DBI::dbDisconnect(con, shutdown = TRUE)
19 | NULL
20 |
21 | #' Connection object for simulation of the SQL generation without actual database.
22 | #' dbplyr overrides database specific identifier and string quotes
23 | #' @param ... Any parameters to be forwarded
24 | #' @export
25 | #' @rdname backend-duckdb
26 | simulate_duckdb <- function(...) {
27 | structure(list(), ..., class = c("duckdb_connection", "TestConnection", "DBIConnection"))
28 | }
29 |
30 | #' Connection object for simulation of the SQL generation without actual database.
31 | #' This version keeps the database specific identifier and string quotes, i.e.
32 | #' allows to translate to DuckDB SQL dialect.
33 | #' @param ... Any parameters to be forwarded
34 | #' @export
35 | #' @rdname backend-duckdb
36 | translate_duckdb <- function(...) {
37 | structure(list(), ..., class = c("duckdb_connection", "DBIConnection"))
38 | }
39 |
40 | # Declare which version of dbplyr API is being called.
41 | # @param con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()}
42 | # @name dbplyr_edition
43 | dbplyr_edition.duckdb_connection <- function(con) {
44 | 2L
45 | }
46 |
47 | # Description of the database connection
48 | # @param con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()}
49 | # @name db_connection_describe
50 | # @return
51 | # String consisting of DuckDB version, user login name, operating system, R version and the name of database
52 | db_connection_describe.duckdb_connection <- function(con) {
53 | info <- DBI::dbGetInfo(con)
54 | paste0(
55 | "DuckDB ", info$db.version, " [", Sys.info()["login"], "@",
56 | paste(Sys.info()[c("sysname", "release")], collapse = " "), ":",
57 | "R ", R.version$major, ".", R.version$minor, "/", info$dbname, "]"
58 | )
59 | }
60 |
61 | duckdb_grepl <- function(pattern, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE) {
62 | # https://duckdb.org/docs/sql/functions/patternmatching
63 | if (any(c(perl, fixed, useBytes))) {
64 | stop("Parameters `perl`, `fixed` and `useBytes` in grepl are not currently supported in DuckDB backend", call. = FALSE)
65 | }
66 |
67 | sql_expr <- pkg_method("sql_expr", "dbplyr")
68 |
69 | if (ignore.case) {
70 | icpattern <- paste0("(?i)", pattern)
71 | sql_expr(REGEXP_MATCHES((!!x), (!!icpattern)))
72 | } else {
73 | sql_expr(REGEXP_MATCHES((!!x), (!!pattern)))
74 | }
75 | }
76 |
77 |
78 | # Customized translation functions for DuckDB SQL
79 | # @param con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()}
80 | # @name sql_translation
81 | sql_translation.duckdb_connection <- function(con) {
82 | sql_variant <- pkg_method("sql_variant", "dbplyr")
83 | sql_translator <- pkg_method("sql_translator", "dbplyr")
84 | sql <- pkg_method("sql", "dbplyr")
85 | build_sql <- pkg_method("build_sql", "dbplyr")
86 | sql_expr <- pkg_method("sql_expr", "dbplyr")
87 | sql_prefix <- pkg_method("sql_prefix", "dbplyr")
88 | sql_cast <- pkg_method("sql_cast", "dbplyr")
89 | sql_paste <- pkg_method("sql_paste", "dbplyr")
90 | sql_aggregate <- pkg_method("sql_aggregate", "dbplyr")
91 | sql_aggregate_2 <- pkg_method("sql_aggregate_2", "dbplyr")
92 | win_aggregate <- pkg_method("win_aggregate", "dbplyr")
93 | win_aggregate_2 <- pkg_method("win_aggregate_2", "dbplyr")
94 | win_over <- pkg_method("win_over", "dbplyr")
95 | win_current_order <- pkg_method("win_current_order", "dbplyr")
96 | win_current_group <- pkg_method("win_current_order", "dbplyr")
97 |
98 |
99 | base_scalar <- pkg_method("base_scalar", "dbplyr")
100 | base_agg <- pkg_method("base_agg", "dbplyr")
101 | base_win <- pkg_method("base_win", "dbplyr")
102 |
103 | sql_variant(
104 | sql_translator(
105 | .parent = base_scalar,
106 | as.raw = sql_cast("VARBINARY"),
107 | `%%` = function(a, b) sql_expr(FMOD(!!a, !!b)),
108 | `%/%` = function(a, b) sql_expr(FDIV(!!a, !!b)),
109 | `^` = sql_prefix("POW", 2),
110 | bitwOr = function(a, b) sql_expr((CAST((!!a) %AS% INTEGER)) | (CAST((!!b) %AS% INTEGER))),
111 | bitwAnd = function(a, b) sql_expr((CAST((!!a) %AS% INTEGER)) & (CAST((!!b) %AS% INTEGER))),
112 | bitwXor = function(a, b) sql_expr(XOR((CAST((!!a) %AS% INTEGER)), (CAST((!!b) %AS% INTEGER)))),
113 | bitwNot = function(a) sql_expr(~ (CAST((!!a) %AS% INTEGER))),
114 | bitwShiftL = function(a, b) sql_expr((CAST((!!a) %AS% INTEGER)) %<<% (CAST((!!b) %AS% INTEGER))),
115 | bitwShiftR = function(a, b) sql_expr((CAST((!!a) %AS% INTEGER)) %>>% (CAST((!!b) %AS% INTEGER))),
116 | log = function(x, base = exp(1)) {
117 | if (isTRUE(all.equal(base, exp(1)))) {
118 | sql_expr(LN(!!x))
119 | } else
120 | if (base == 10) {
121 | sql_expr(LOG10(!!x))
122 | } else
123 | if (base == 2) {
124 | sql_expr(LOG2(!!x))
125 | } else {
126 | sql_expr(LOG(!!x) / LOG(!!base))
127 | }
128 | },
129 | log10 = sql_prefix("LOG10", 1),
130 | log2 = sql_prefix("LOG2", 1),
131 |
132 | # See https://github.com/duckdb/duckdb/issues/530 about NaN, infinites and NULL in DuckDB
133 | # The following is how R functions for detecting those should behave:
134 | # Function Inf –Inf NaN NA
135 | # is.finite() FALSE FALSE FALSE FALSE
136 | # is.infinite() TRUE TRUE FALSE FALSE
137 | # is.nan() FALSE FALSE TRUE FALSE
138 | # is.na() FALSE FALSE TRUE TRUE
139 | # https://github.com/duckdb/duckdb/issues/3019
140 | # is.na = function(a) build_sql("(", a, " IS NULL OR PRINTF('%f', ", a, ") = 'nan')"),
141 | is.nan = function(a) build_sql("(", a, " IS NOT NULL AND PRINTF('%f', ", a, ") = 'nan')"),
142 | is.infinite = function(a) build_sql("(", a, " IS NOT NULL AND REGEXP_MATCHES(PRINTF('%f', ", a, "), 'inf'))"),
143 | is.finite = function(a) build_sql("(NOT (", a, " IS NULL OR REGEXP_MATCHES(PRINTF('%f', ", a, "), 'inf|nan')))"),
144 | grepl = duckdb_grepl,
145 |
146 | # Return index where the first match starts,-1 if no match
147 | regexpr = function(p, x) {
148 | build_sql("(CASE WHEN REGEXP_MATCHES(", x, ", ", p, ") THEN (LENGTH(LIST_EXTRACT(STRING_SPLIT_REGEX(", x, ", ", p, "), 0))+1) ELSE -1 END)")
149 | },
150 | round = function(x, digits) sql_expr(ROUND(!!x, CAST(ROUND((!!digits), 0L) %AS% INTEGER))),
151 | as.Date = sql_cast("DATE"),
152 | as.POSIXct = sql_cast("TIMESTAMP"),
153 |
154 | # lubridate functions
155 |
156 | month = function(x, label = FALSE, abbr = TRUE) {
157 | if (!label) {
158 | sql_expr(EXTRACT(MONTH %FROM% !!x))
159 | } else {
160 | if (abbr) {
161 | sql_expr(STRFTIME(!!x, "%b"))
162 | } else {
163 | sql_expr(STRFTIME(!!x, "%B"))
164 | }
165 | }
166 | },
167 | quarter = function(x, type = "quarter", fiscal_start = 1, with_year = identical(type, "year.quarter")) {
168 | if (fiscal_start != 1) {
169 | stop("`fiscal_start` is not yet supported in DuckDB translation. Must be 1.", call. = FALSE)
170 | }
171 | if (is.logical(type)) {
172 | type <- if (type) {
173 | "year.quarter"
174 | } else {
175 | "quarter"
176 | }
177 | }
178 | if (with_year) {
179 | type <- "year.quarter"
180 | }
181 | switch(type,
182 | quarter = {
183 | sql_expr(EXTRACT(QUARTER %FROM% !!x))
184 | },
185 | year.quarter = {
186 | sql_expr((EXTRACT(YEAR %FROM% !!x) || "." || EXTRACT(QUARTER %FROM% !!x)))
187 | },
188 | date_first = {
189 | sql_expr((CAST(DATE_TRUNC("QUARTER", !!x) %AS% DATE)))
190 | },
191 | date_last = {
192 | sql_expr((CAST((DATE_TRUNC("QUARTER", !!x) + !!sql("INTERVAL '1 QUARTER'") - !!sql("INTERVAL '1 DAY'")) %AS% DATE)))
193 | },
194 | stop(paste("Unsupported type", type), call. = FALSE)
195 | )
196 | },
197 | qday = function(x) {
198 | build_sql("DATE_DIFF('DAYS', DATE_TRUNC('QUARTER', CAST((", x, ") AS DATE)), (CAST((", x, ") AS DATE) + INTERVAL '1 DAY'))")
199 | },
200 | wday = function(x, label = FALSE, abbr = TRUE, week_start = NULL) {
201 | if (!label) {
202 | week_start <- if (!is.null(week_start)) week_start else getOption("lubridate.week.start", 7)
203 | offset <- as.integer(7 - week_start)
204 | sql_expr(EXTRACT("dow" %FROM% CAST((!!x) %AS% DATE) + !!offset) + 1L)
205 | } else if (label && !abbr) {
206 | sql_expr(STRFTIME(!!x, "%A"))
207 | } else if (label && abbr) {
208 | sql_expr(STRFTIME(!!x, "%a"))
209 | } else {
210 | stop("Unrecognized arguments to `wday`", call. = FALSE)
211 | }
212 | },
213 | yday = function(x) sql_expr(EXTRACT(DOY %FROM% !!x)),
214 |
215 | # These work fine internally, but getting INTERVAL-type data out of DuckDB
216 | # seems problematic until there is a fix for the issue #1920 / #2900
217 | # (https://github.com/duckdb/duckdb/issues/1920)
218 | seconds = function(x) {
219 | sql_expr(TO_SECONDS(CAST((!!x) %AS% BIGINT)))
220 | },
221 | minutes = function(x) {
222 | sql_expr(TO_MINUTES(CAST((!!x) %AS% BIGINT)))
223 | },
224 | hours = function(x) {
225 | sql_expr(TO_HOURS(CAST((!!x) %AS% BIGINT)))
226 | },
227 | days = function(x) {
228 | sql_expr(TO_DAYS(CAST((!!x) %AS% INTEGER)))
229 | },
230 | weeks = function(x) {
231 | sql_expr(TO_DAYS(7L * CAST((!!x) %AS% INTEGER)))
232 | },
233 | months = function(x) {
234 | sql_expr(TO_MONTHS(CAST((!!x) %AS% INTEGER)))
235 | },
236 | years = function(x) {
237 | sql_expr(TO_YEARS(CAST((!!x) %AS% INTEGER)))
238 | },
239 |
240 | # Week_start algorithm: https://github.com/tidyverse/lubridate/issues/509#issuecomment-287030620
241 | floor_date = function(x, unit = "seconds", week_start = NULL) {
242 | if (unit %in% c("week", "weeks")) {
243 | week_start <- if (!is.null(week_start)) week_start else getOption("lubridate.week.start", 7)
244 | if (week_start == 1) {
245 | sql_expr(DATE_TRUNC(!!unit, !!x))
246 | } else {
247 | offset <- as.integer(7 - week_start)
248 | sql_expr(CAST((!!x) %AS% DATE) - CAST(EXTRACT("dow" %FROM% CAST((!!x) %AS% DATE) + !!offset) %AS% INTEGER))
249 | }
250 | } else {
251 | sql_expr(DATE_TRUNC(!!unit, !!x))
252 | }
253 | },
254 | paste = sql_paste(" "),
255 | paste0 = sql_paste(""),
256 |
257 | # stringr functions
258 | str_c = sql_paste(""),
259 | str_detect = function(string, pattern, negate = FALSE) {
260 | if (negate) {
261 | sql_expr((NOT(REGEXP_MATCHES(!!string, !!pattern))))
262 | } else {
263 | sql_expr(REGEXP_MATCHES(!!string, !!pattern))
264 | }
265 | },
266 | str_replace = function(string, pattern, replacement) {
267 | sql_expr(REGEXP_REPLACE(!!string, !!pattern, !!replacement))
268 | },
269 | str_replace_all = function(string, pattern, replacement) {
270 | sql_expr(REGEXP_REPLACE(!!string, !!pattern, !!replacement, "g"))
271 | },
272 | str_squish = function(string) {
273 | sql_expr(TRIM(REGEXP_REPLACE(!!string, "\\s+", " ", "g")))
274 | },
275 | str_remove = function(string, pattern) {
276 | sql_expr(REGEXP_REPLACE(!!string, !!pattern, ""))
277 | },
278 | str_remove_all = function(string, pattern) {
279 | sql_expr(REGEXP_REPLACE(!!string, !!pattern, "", "g"))
280 | },
281 | # str_to_title = function(string) {
282 | # sql_expr(INITCAP(!!string))
283 | # },
284 | str_to_sentence = function(string) {
285 | build_sql("(UPPER(", string, "[0]) || ", string, "[1:NULL])")
286 | },
287 | # Respect OR (|) operator: https://github.com/tidyverse/stringr/pull/340
288 | str_starts = function(string, pattern) {
289 | build_sql("REGEXP_MATCHES(", string, ",'^(?:'||", pattern, "))")
290 | },
291 | str_ends = function(string, pattern) {
292 | build_sql("REGEXP_MATCHES((?:", string, ",", pattern, "||')$')")
293 | },
294 | # NOTE: GREATEST needed because DuckDB PAD-functions truncate the string if width < length of string
295 | str_pad = function(string, width, side = "left", pad = " ", use_length = FALSE) {
296 | if (side %in% c("left")) {
297 | sql_expr(LPAD(!!string, CAST(GREATEST(!!as.integer(width), LENGTH(!!string)) %AS% INTEGER), !!pad))
298 | } else if (side %in% c("right")) {
299 | sql_expr(RPAD(!!string, CAST(GREATEST(!!as.integer(width), LENGTH(!!string)) %AS% INTEGER), !!pad))
300 | } else if (side %in% c("both")) {
301 | sql_expr(RPAD(REPEAT(!!pad, (!!as.integer(width) - LENGTH(!!string)) / 2L) %||% !!string, CAST(GREATEST(!!as.integer(width), LENGTH(!!string)) %AS% INTEGER), !!pad))
302 | } else {
303 | stop('Argument \'side\' should be "left", "right" or "both"', call. = FALSE)
304 | }
305 | }
306 | ),
307 | sql_translator(
308 | .parent = base_agg,
309 | cor = sql_aggregate_2("CORR"),
310 | cov = sql_aggregate_2("COVAR_SAMP"),
311 | sd = sql_aggregate("STDDEV", "sd"),
312 | var = sql_aggregate("VARIANCE", "var"),
313 | all = sql_aggregate("BOOL_AND", "all"),
314 | any = sql_aggregate("BOOL_OR", "any"),
315 | str_flatten = function(x, collapse) sql_expr(STRING_AGG(!!x, !!collapse)),
316 | first = sql_prefix("FIRST", 1),
317 | last = sql_prefix("LAST", 1)
318 | ),
319 | sql_translator(
320 | .parent = base_win,
321 | cor = win_aggregate_2("CORR"),
322 | cov = win_aggregate_2("COVAR_SAMP"),
323 | sd = win_aggregate("STDDEV"),
324 | var = win_aggregate("VARIANCE"),
325 | all = win_aggregate("BOOL_AND"),
326 | any = win_aggregate("BOOL_OR"),
327 | str_flatten = function(x, collapse) {
328 | win_over(
329 | sql_expr(STRING_AGG(!!x, !!collapse)),
330 | partition = win_current_group(),
331 | order = win_current_order()
332 | )
333 | }
334 | )
335 | )
336 | }
337 |
338 |
339 | # Customized translation for comparing to objects in DuckDB SQL
340 | # @param con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()}
341 | # @param x First object to be compared
342 | # @param y Second object to be compared
343 | # @name sql_expr_matches
344 | sql_expr_matches.duckdb_connection <- function(con, x, y) {
345 | build_sql <- pkg_method("build_sql", "dbplyr")
346 | # https://duckdb.org/docs/sql/expressions/comparison_operators
347 | build_sql(x, " IS NOT DISTINCT FROM ", y, con = con)
348 | }
349 |
350 | # Customized escape translation for date objects
351 | # @param con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()}
352 | # @param x Date object to be escaped
353 | # @name sql_escape_date
354 | sql_escape_date.duckdb_connection <- function(con, x) {
355 | # https://github.com/tidyverse/dbplyr/issues/727
356 | dbQLit <- pkg_method("dbQuoteLiteral", "DBI")
357 | dbQLit(con, x)
358 | }
359 |
360 | # Customized escape translation for datetime objects
361 | # @param con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()}
362 | # @param x Datetime object to be escaped
363 | # @name sql_escape_datetime
364 | sql_escape_datetime.duckdb_connection <- function(con, x) {
365 | dbQLit <- pkg_method("dbQuoteLiteral", "DBI")
366 | dbQLit(con, x)
367 | }
368 |
369 | # Customized translation for fill function
370 | # @param .con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()}
371 | # @param .data Data frame
372 | # @param cols_to_fill Which colums to be altered
373 | # @param order_by_cols Defined order of variables
374 | # @param .direction Direction in which to fill missing values.
375 | # @name dbplyr_fill0
376 | dbplyr_fill0.duckdb_connection <- function(.con, .data, cols_to_fill, order_by_cols, .direction) {
377 | dbplyr_fill0 <- pkg_method("dbplyr_fill0.SQLiteConnection", "dbplyr")
378 |
379 | # Required because of the bug in dbplyr (con is not passed to "translate_sql(cumsum..." call)
380 | # See https://github.com/tidyverse/dbplyr/pull/753
381 | setcon <- pkg_method("set_current_con", "dbplyr")
382 | setcon(.con)
383 |
384 | dbplyr_fill0(.con, .data, cols_to_fill, order_by_cols, .direction)
385 | }
386 |
387 | # Customized handling for tbl() to allow the use of replacement scans
388 | # @param src .con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()}
389 | # @param from Table or parquet/csv -files to be registered
390 | # @param cache Enable object cache for parquet files
391 | tbl.duckdb_connection <- function(src, from, cache = FALSE, ...) {
392 | ident_q <- pkg_method("ident_q", "dbplyr")
393 | if (!DBI::dbExistsTable(src, from)) from <- ident_q(from)
394 | if (cache) DBI::dbExecute(src, "PRAGMA enable_object_cache")
395 | NextMethod("tbl")
396 | }
397 |
398 | # Needed to suppress the R CHECK notes (due to the use of sql_expr)
399 | globalVariables(c("REGEXP_MATCHES", "CAST", "%AS%", "INTEGER", "XOR", "%<<%", "%>>%", "LN", "LOG", "ROUND", "EXTRACT", "%FROM%", "MONTH", "STRFTIME", "QUARTER", "YEAR", "DATE_TRUNC", "DATE", "DOY", "TO_SECONDS", "BIGINT", "TO_MINUTES", "TO_HOURS", "TO_DAYS", "TO_WEEKS", "TO_MONTHS", "TO_YEARS", "STRPOS", "NOT", "REGEXP_REPLACE", "TRIM", "LPAD", "RPAD", "%||%", "REPEAT", "LENGTH", "STRING_AGG", "GREATEST", "LIST_EXTRACT", "LOG10", "LOG2", "STRING_SPLIT_REGEX", "FLOOR", "FMOD", "FDIV"))
400 |
--------------------------------------------------------------------------------
/R/s3_register.R:
--------------------------------------------------------------------------------
1 | # nocov start
2 |
3 | # From: https://github.com/r-lib/rlang/blob/d5df93251d055721abb4a576433fb867ca40d527/R/compat-s3-register.R#L53-L122
4 | s3_register <- function(generic, class, method = NULL) {
5 | stopifnot(is.character(generic), length(generic) == 1)
6 | stopifnot(is.character(class), length(class) == 1)
7 |
8 | pieces <- strsplit(generic, "::")[[1]]
9 | stopifnot(length(pieces) == 2)
10 | package <- pieces[[1]]
11 | generic <- pieces[[2]]
12 |
13 | caller <- parent.frame()
14 |
15 | get_method_env <- function() {
16 | top <- topenv(caller)
17 | if (isNamespace(top)) {
18 | asNamespace(environmentName(top))
19 | } else {
20 | caller
21 | }
22 | }
23 | get_method <- function(method) {
24 | if (is.null(method)) {
25 | get(paste0(generic, ".", class), envir = get_method_env())
26 | } else {
27 | method
28 | }
29 | }
30 |
31 | register <- function(...) {
32 | envir <- asNamespace(package)
33 |
34 | # Refresh the method each time, it might have been updated by
35 | # `devtools::load_all()`
36 | method_fn <- get_method(method)
37 | stopifnot(is.function(method_fn))
38 |
39 |
40 | # Only register if generic can be accessed
41 | if (exists(generic, envir)) {
42 | registerS3method(generic, class, method_fn, envir = envir)
43 | } else if (identical(Sys.getenv("NOT_CRAN"), "true")) {
44 | warning(sprintf(
45 | "Can't find generic `%s` in package %s to register S3 method.",
46 | generic,
47 | package
48 | ))
49 | }
50 | }
51 |
52 | # Always register hook in case package is later unloaded & reloaded
53 | setHook(packageEvent(package, "onLoad"), function(...) {
54 | register()
55 | })
56 |
57 | # Avoid registration failures during loading (pkgload or regular)
58 | if (isNamespaceLoaded(package) && environmentIsLocked(asNamespace(package))) {
59 | register()
60 | }
61 |
62 | invisible()
63 | }
64 |
65 | # From: https://github.com/DyfanJones/noctua/blob/b82113098df6b3a7981cf8ca0c1ae9f2ff408756/R/utils.R#L168-L175
66 | # get parent pkg function and method
67 | pkg_method <- function(fun, pkg) {
68 | if (!requireNamespace(pkg, quietly = TRUE)) {
69 | stop(fun, " requires the ", pkg, " package, please install it first and try again",
70 | call. = FALSE
71 | )
72 | }
73 | fun_name <- utils::getFromNamespace(fun, pkg)
74 | return(fun_name)
75 | }
76 |
77 | # nocov end
78 |
--------------------------------------------------------------------------------
/R/tidyduck.R:
--------------------------------------------------------------------------------
1 | #' DuckDB high-level bindings for R
2 | #'
3 | #' R high-level bindings for DuckDB: an embeddable SQL OLAP Database Management System.
4 | #'
5 | #' @seealso
6 | #' [duckdb::duckdb()] for connection instructions.
7 | #'
8 | #' for the project website.
9 | #'
10 | ## usethis namespace: start
11 | #' @import dplyr
12 | #' @rawNamespace import(dbplyr, except = c(sql, ident))
13 | ## usethis namespace: end
14 | #' @docType package
15 | #' @name tidyduck-package
16 | NULL
17 |
--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .onLoad <- function(...) {
2 | s3_register("dbplyr::dbplyr_edition", "duckdb_connection")
3 | s3_register("dbplyr::db_connection_describe", "duckdb_connection")
4 | s3_register("dbplyr::sql_translation", "duckdb_connection")
5 | s3_register("dbplyr::dbplyr_fill0", "duckdb_connection")
6 | s3_register("dbplyr::sql_expr_matches", "duckdb_connection")
7 | s3_register("dbplyr::sql_escape_date", "duckdb_connection")
8 | s3_register("dbplyr::sql_escape_datetime", "duckdb_connection")
9 | s3_register("dplyr::tbl", "duckdb_connection")
10 |
11 | invisible()
12 | }
13 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # duckdb R package
4 |
5 | ## Installation from CRAN
6 |
7 | ```r
8 | install.packages("duckdb")
9 | ```
10 |
11 | ## Building
12 |
13 | The default build compiles a release version from an amalgamation.
14 |
15 | ```sh
16 | cd tools/rpkg
17 | R CMD INSTALL .
18 | ```
19 |
20 | Optional extensions can be enabled by passing them (comma-separated, if there is more than one) to the environment variable `DUCKDB_R_EXTENSIONS`:
21 |
22 | ```sh
23 | DUCKDB_R_EXTENSIONS=tpch R CMD INSTALL .
24 | ```
25 |
26 | ## Development
27 |
28 | For development, setting the `DUCKDB_R_DEBUG` environment variable enables incremental debugging builds for the R package.
29 |
30 | ```sh
31 | cd tools/rpkg
32 | DUCKDB_R_DEBUG=1 R CMD INSTALL .
33 | ```
34 |
35 | This also works for devtools:
36 |
37 | ```r
38 | Sys.setenv(DUCKDB_R_DEBUG = "1")
39 | pkgload::load_all()
40 | ```
41 |
42 | Add the following to your `.Renviron` to make this the default:
43 |
44 | ```
45 | DUCKDB_R_DEBUG=1
46 | ```
47 |
48 | If you do this, remember to use `--vanilla` for building release builds.
49 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | reference:
2 | - title: Driver
3 | contents:
4 | - duckdb_driver
5 | - title: Connection
6 | contents:
7 | - duckdb_connection
8 | - title: Result
9 | contents:
10 | - duckdb_result
11 | - title: duckdb Package
12 | contents:
13 | - duckdb-package
14 |
--------------------------------------------------------------------------------
/dependencies.R:
--------------------------------------------------------------------------------
1 | local({
2 | pkg <- c("DBI", "callr", "DBItest", "dbplyr", "nycflights13", "testthat", "bit64", "cpp11", "arrow", "covr", "pkgbuild", "remotes", "bit64")
3 |
4 | if (.Platform$OS.type == "unix") {
5 | options(HTTPUserAgent = sprintf("R/4.1.0 R (4.1.0 %s)", paste(R.version$platform, R.version$arch, R.version$os)))
6 | install.packages(pkg, repos = "https://packagemanager.rstudio.com/all/__linux__/focal/latest")
7 | # https://github.com/r-lib/covr/pull/499
8 | remotes::install_github("r-lib/covr")
9 | } else {
10 | install.packages(pkg, repos = "https://cloud.r-project.org", pkgType = "binary")
11 | }
12 | })
13 |
--------------------------------------------------------------------------------
/man/backend-duckdb.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dbplyr.R
3 | \name{backend-duckdb}
4 | \alias{simulate_duckdb}
5 | \alias{translate_duckdb}
6 | \title{DuckDB SQL backend for dbplyr}
7 | \usage{
8 | simulate_duckdb(...)
9 |
10 | translate_duckdb(...)
11 | }
12 | \arguments{
13 | \item{...}{Any parameters to be forwarded}
14 | }
15 | \description{
16 | This is a SQL backend for dbplyr tailored to take into account DuckDB's
17 | possibilities. This mainly follows the backend for PostgreSQL, but
18 | contains more mapped functions.
19 | }
20 | \examples{
21 | library(dplyr, warn.conflicts = FALSE)
22 | con <- DBI::dbConnect(duckdb::duckdb(), path = ":memory:")
23 |
24 | dbiris <- copy_to(con, iris, overwrite = TRUE)
25 |
26 | dbiris \%>\% select(Petal.Length, Petal.Width) \%>\% filter(Petal.Length > 1.5) \%>\% head(5)
27 |
28 | DBI::dbDisconnect(con, shutdown = TRUE)
29 | }
30 |
--------------------------------------------------------------------------------
/man/tidyduck-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tidyduck.R
3 | \docType{package}
4 | \name{tidyduck-package}
5 | \alias{tidyduck-package}
6 | \title{DuckDB high-level bindings for R}
7 | \description{
8 | R high-level bindings for DuckDB: an embeddable SQL OLAP Database Management System.
9 | }
10 | \seealso{
11 | \code{\link[duckdb:duckdb]{duckdb::duckdb()}} for connection instructions.
12 |
13 | \url{https://duckdb.org/} for the project website.
14 | }
15 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library("testthat")
2 | library("DBI")
3 |
4 | test_check("duckdb")
5 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/dbplyr.md:
--------------------------------------------------------------------------------
1 | # snapshots of dbplyr generic scalar translation
2 |
3 | Code
4 | translate(as.character(1))
5 | Output
6 | CAST(1.0 AS TEXT)
7 | Code
8 | translate(as.character(1L))
9 | Output
10 | CAST(1 AS TEXT)
11 | Code
12 | translate(as.numeric(1))
13 | Output
14 | CAST(1.0 AS NUMERIC)
15 | Code
16 | translate(as.double(1.2))
17 | Output
18 | CAST(1.2 AS NUMERIC)
19 | Code
20 | translate(as.integer(1.2))
21 | Output
22 | CAST(1.2 AS INTEGER)
23 | Code
24 | translate(as.integer64(1.2))
25 | Output
26 | CAST(1.2 AS BIGINT)
27 | Code
28 | translate(as.logical("TRUE"))
29 | Output
30 | CAST('TRUE' AS BOOLEAN)
31 | Code
32 | translate(tolower("HELLO"))
33 | Output
34 | LOWER('HELLO')
35 | Code
36 | translate(toupper("hello"))
37 | Output
38 | UPPER('hello')
39 | Code
40 | translate(pmax(1, 2, na.rm = TRUE))
41 | Output
42 | GREATEST(1.0, 2.0)
43 | Code
44 | translate(pmin(1, 2, na.rm = TRUE))
45 | Output
46 | LEAST(1.0, 2.0)
47 | Code
48 | translate(as.character("2020-01-01"))
49 | Output
50 | CAST('2020-01-01' AS TEXT)
51 | Code
52 | translate(c("2020-01-01", "2020-13-02"))
53 | Output
54 | ('2020-01-01', '2020-13-02')
55 | Code
56 | translate(iris[["sepal_length"]])
57 | Output
58 | iris.sepal_length
59 | Code
60 | translate(iris[[1]])
61 | Output
62 | iris[1]
63 | Code
64 | translate(cot(x))
65 | Output
66 | COT(x)
67 | Code
68 | translate(substr("test", 2, 3))
69 | Output
70 | SUBSTR('test', 2, 2)
71 |
72 | # snapshots of duckdb custom scalars translations
73 |
74 | Code
75 | translate(as.raw(10))
76 | Output
77 | CAST(10.0 AS VARBINARY)
78 | Code
79 | translate(13 %% 5)
80 | Output
81 | FMOD(13.0, 5.0)
82 | Code
83 | translate(35.8 %/% 4)
84 | Output
85 | FDIV(35.8, 4.0)
86 | Code
87 | translate(35.8^2.51)
88 | Output
89 | POW(35.8, 2.51)
90 | Code
91 | translate(bitwOr(x, 128L))
92 | Output
93 | (CAST(x AS INTEGER)) | (CAST(128 AS INTEGER))
94 | Code
95 | translate(bitwAnd(x, 128))
96 | Output
97 | (CAST(x AS INTEGER)) & (CAST(128.0 AS INTEGER))
98 | Code
99 | translate(bitwXor(x, 128L))
100 | Output
101 | XOR((CAST(x AS INTEGER)), (CAST(128 AS INTEGER)))
102 | Code
103 | translate(bitwNot(x))
104 | Output
105 | ~(CAST(x AS INTEGER))
106 | Code
107 | translate(bitwShiftL(x, 5L))
108 | Output
109 | (CAST(x AS INTEGER)) << (CAST(5 AS INTEGER))
110 | Code
111 | translate(bitwShiftR(x, 4L))
112 | Output
113 | (CAST(x AS INTEGER)) >> (CAST(4 AS INTEGER))
114 | Code
115 | translate(log(x))
116 | Output
117 | LN(x)
118 | Code
119 | translate(log(x, base = 5))
120 | Output
121 | LOG(x) / LOG(5.0)
122 | Code
123 | translate(log(x, base = 10))
124 | Output
125 | LOG10(x)
126 | Code
127 | translate(log(x, base = 2))
128 | Output
129 | LOG2(x)
130 | Code
131 | translate(log10(x))
132 | Output
133 | LOG10(x)
134 | Code
135 | translate(log2(x))
136 | Output
137 | LOG2(x)
138 | Code
139 | translate(is.nan(var1))
140 | Output
141 | (var1 IS NOT NULL AND PRINTF('%f', var1) = 'nan')
142 | Code
143 | translate(is.infinite(var1))
144 | Output
145 | (var1 IS NOT NULL AND REGEXP_MATCHES(PRINTF('%f', var1), 'inf'))
146 | Code
147 | translate(is.finite(var1))
148 | Output
149 | (NOT (var1 IS NULL OR REGEXP_MATCHES(PRINTF('%f', var1), 'inf|nan')))
150 | Code
151 | translate(grepl("pattern", text))
152 | Output
153 | REGEXP_MATCHES("text", 'pattern')
154 | Code
155 | translate(grepl("pattern", text, ignore.case = TRUE))
156 | Output
157 | REGEXP_MATCHES("text", '(?i)pattern')
158 | Code
159 | translate(regexpr("pattern", text))
160 | Output
161 | (CASE WHEN REGEXP_MATCHES("text", 'pattern') THEN (LENGTH(LIST_EXTRACT(STRING_SPLIT_REGEX("text", 'pattern'), 0))+1) ELSE -1 END)
162 | Code
163 | translate(round(x, digits = 1.1))
164 | Output
165 | ROUND(x, CAST(ROUND(1.1, 0) AS INTEGER))
166 | Code
167 | translate(as.Date("2019-01-01"))
168 | Output
169 | CAST('2019-01-01' AS DATE)
170 | Code
171 | translate(as.POSIXct("2019-01-01 01:01:01"))
172 | Output
173 | CAST('2019-01-01 01:01:01' AS TIMESTAMP)
174 |
175 | # snapshot tests for pasting translate
176 |
177 | Code
178 | translate(paste("hi", "bye"))
179 | Output
180 | CONCAT_WS(' ', 'hi', 'bye')
181 | Code
182 | translate(paste("hi", "bye", sep = "-"))
183 | Output
184 | CONCAT_WS('-', 'hi', 'bye')
185 | Code
186 | translate(paste0("hi", "bye"))
187 | Output
188 | CONCAT_WS('', 'hi', 'bye')
189 | Code
190 | translate(paste(x, y), window = FALSE)
191 | Output
192 | CONCAT_WS(' ', x, y)
193 | Code
194 | translate(paste0(x, y), window = FALSE)
195 | Output
196 | CONCAT_WS('', x, y)
197 |
198 | # snapshots for custom lubridate functions translated correctly
199 |
200 | Code
201 | translate(yday(x))
202 | Output
203 | EXTRACT(DOY FROM x)
204 | Code
205 | translate(quarter(x))
206 | Output
207 | EXTRACT(QUARTER FROM x)
208 | Code
209 | translate(quarter(x))
210 | Output
211 | EXTRACT(QUARTER FROM x)
212 | Code
213 | translate(quarter(x, type = "year.quarter"))
214 | Output
215 | (EXTRACT(YEAR FROM x) || '.' || EXTRACT(QUARTER FROM x))
216 | Code
217 | translate(quarter(x, type = "quarter"))
218 | Output
219 | EXTRACT(QUARTER FROM x)
220 | Code
221 | translate(quarter(x, type = TRUE))
222 | Output
223 | (EXTRACT(YEAR FROM x) || '.' || EXTRACT(QUARTER FROM x))
224 | Code
225 | translate(quarter(x, type = FALSE))
226 | Output
227 | EXTRACT(QUARTER FROM x)
228 | Code
229 | translate(quarter(x, type = "date_first"))
230 | Output
231 | (CAST(DATE_TRUNC('QUARTER', x) AS DATE))
232 | Code
233 | translate(quarter(x, type = "date_last"))
234 | Output
235 | (CAST((DATE_TRUNC('QUARTER', x) + INTERVAL '1 QUARTER' - INTERVAL '1 DAY') AS DATE))
236 | Code
237 | translate(month(x, label = FALSE))
238 | Output
239 | EXTRACT(MONTH FROM x)
240 | Code
241 | translate(month(x, label = TRUE))
242 | Output
243 | STRFTIME(x, '%b')
244 | Code
245 | translate(month(x, label = TRUE, abbr = FALSE))
246 | Output
247 | STRFTIME(x, '%B')
248 | Code
249 | translate(qday(x))
250 | Output
251 | DATE_DIFF('DAYS', DATE_TRUNC('QUARTER', CAST((x) AS DATE)), (CAST((x) AS DATE) + INTERVAL '1 DAY'))
252 | Code
253 | translate(wday(x))
254 | Output
255 | EXTRACT('dow' FROM CAST(x AS DATE) + 0) + 1
256 | Code
257 | translate(wday(x, week_start = 4))
258 | Output
259 | EXTRACT('dow' FROM CAST(x AS DATE) + 3) + 1
260 | Code
261 | translate(wday(x, label = TRUE))
262 | Output
263 | STRFTIME(x, '%a')
264 | Code
265 | translate(wday(x, label = TRUE, abbr = FALSE))
266 | Output
267 | STRFTIME(x, '%A')
268 | Code
269 | translate(seconds(x))
270 | Output
271 | TO_SECONDS(CAST(x AS BIGINT))
272 | Code
273 | translate(minutes(x))
274 | Output
275 | TO_MINUTES(CAST(x AS BIGINT))
276 | Code
277 | translate(hours(x))
278 | Output
279 | TO_HOURS(CAST(x AS BIGINT))
280 | Code
281 | translate(days(x))
282 | Output
283 | TO_DAYS(CAST(x AS INTEGER))
284 | Code
285 | translate(weeks(x))
286 | Output
287 | TO_DAYS(7 * CAST(x AS INTEGER))
288 | Code
289 | translate(months(x))
290 | Output
291 | TO_MONTHS(CAST(x AS INTEGER))
292 | Code
293 | translate(years(x))
294 | Output
295 | TO_YEARS(CAST(x AS INTEGER))
296 | Code
297 | translate(floor_date(x, "month"))
298 | Output
299 | DATE_TRUNC('month', x)
300 | Code
301 | translate(floor_date(x, "week"))
302 | Output
303 | CAST(x AS DATE) - CAST(EXTRACT('dow' FROM CAST(x AS DATE) + 0) AS INTEGER)
304 | Code
305 | translate(floor_date(x, "week", week_start = 1))
306 | Output
307 | DATE_TRUNC('week', x)
308 | Code
309 | translate(floor_date(x, "week", week_start = 4))
310 | Output
311 | CAST(x AS DATE) - CAST(EXTRACT('dow' FROM CAST(x AS DATE) + 3) AS INTEGER)
312 |
313 | # snapshots for custom stringr functions translated correctly
314 |
315 | Code
316 | translate(str_c(x, y))
317 | Output
318 | CONCAT_WS('', x, y)
319 | Code
320 | translate(str_detect(x, y))
321 | Output
322 | REGEXP_MATCHES(x, y)
323 | Code
324 | translate(str_detect(x, y, negate = TRUE))
325 | Output
326 | (NOT(REGEXP_MATCHES(x, y)))
327 | Code
328 | translate(str_replace(x, y, z))
329 | Output
330 | REGEXP_REPLACE(x, y, z)
331 | Code
332 | translate(str_replace_all(x, y, z))
333 | Output
334 | REGEXP_REPLACE(x, y, z, 'g')
335 | Code
336 | translate(str_squish(x))
337 | Output
338 | TRIM(REGEXP_REPLACE(x, '\s+', ' ', 'g'))
339 | Code
340 | translate(str_remove(x, y))
341 | Output
342 | REGEXP_REPLACE(x, y, '')
343 | Code
344 | translate(str_remove_all(x, y))
345 | Output
346 | REGEXP_REPLACE(x, y, '', 'g')
347 | Code
348 | translate(str_to_sentence(x))
349 | Output
350 | (UPPER(x[0]) || x[1:NULL])
351 | Code
352 | translate(str_starts(x, y))
353 | Output
354 | REGEXP_MATCHES(x,'^(?:'||y))
355 | Code
356 | translate(str_ends(x, y))
357 | Output
358 | REGEXP_MATCHES((?:x,y||')$')
359 | Code
360 | translate(str_pad(x, width = 10))
361 | Output
362 | LPAD(x, CAST(GREATEST(10, LENGTH(x)) AS INTEGER), ' ')
363 | Code
364 | translate(str_pad(x, width = 10, side = "right"))
365 | Output
366 | RPAD(x, CAST(GREATEST(10, LENGTH(x)) AS INTEGER), ' ')
367 | Code
368 | translate(str_pad(x, width = 10, side = "both", pad = "<"))
369 | Output
370 | RPAD(REPEAT('<', (10 - LENGTH(x)) / 2) || x, CAST(GREATEST(10, LENGTH(x)) AS INTEGER), '<')
371 |
372 | # snapshots datetime escaping working as in DBI
373 |
374 | Code
375 | test_date <- as.Date("2020-01-01")
376 | escape(test_date, con = con)
377 | Output
378 | '2020-01-01'::date
379 | Code
380 | escape("2020-01-01", con = con)
381 | Output
382 | '2020-01-01'
383 | Code
384 | test_datetime <- as.POSIXct("2020-01-01 01:23:45 UTC", tz = "UTC")
385 | escape(test_datetime, con = con)
386 | Output
387 | '2020-01-01 01:23:45'::timestamp
388 | Code
389 | escape("2020-01-01 01:23:45 UTC", con = con)
390 | Output
391 | '2020-01-01 01:23:45 UTC'
392 | Code
393 | test_datetime_tz <- as.POSIXct("2020-01-01 18:23:45 UTC", tz = "America/Los_Angeles")
394 | escape(test_datetime_tz, con = con)
395 | Output
396 | '2020-01-02 02:23:45'::timestamp
397 | Code
398 | escape("2020-01-01 18:23:45 PST", con = con)
399 | Output
400 | '2020-01-01 18:23:45 PST'
401 |
402 | # two variable aggregates are translated correctly
403 |
404 | Code
405 | translate(cor(x, y), window = FALSE)
406 | Output
407 | CORR(x, y)
408 | Code
409 | translate(cor(x, y), window = TRUE)
410 | Output
411 | CORR(x, y) OVER ()
412 |
413 | # these should give errors
414 |
415 | Code
416 | translate(grepl("dummy", txt, perl = TRUE))
417 | Error
418 | Parameters `perl`, `fixed` and `useBytes` in grepl are not currently supported in DuckDB backend
419 | Code
420 | translate(quarter(x, type = "other"))
421 | Error
422 | Unsupported type other
423 | Code
424 | translate(quarter(x, fiscal_start = 2))
425 | Error
426 | `fiscal_start` is not yet supported in DuckDB translation. Must be 1.
427 | Code
428 | translate(str_pad(x, width = 10, side = "other"))
429 | Error
430 | Argument 'side' should be "left", "right" or "both"
431 |
432 |
--------------------------------------------------------------------------------
/tests/testthat/data/binary_string.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krlmlr/tidyduck/1d8c2972412c5706519d87f906c92823505da9f3/tests/testthat/data/binary_string.parquet
--------------------------------------------------------------------------------
/tests/testthat/data/userdata1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krlmlr/tidyduck/1d8c2972412c5706519d87f906c92823505da9f3/tests/testthat/data/userdata1.parquet
--------------------------------------------------------------------------------
/tests/testthat/test_dbplyr.R:
--------------------------------------------------------------------------------
1 | skip_if_no_R4 <- function() {
2 | if (R.Version()$major < 4) {
3 | skip("R 4.0.0 or newer not available for testing")
4 | }
5 | }
6 |
7 | test_that("dbplyr generic scalars translated correctly", {
8 | skip_if_no_R4()
9 | skip_if_not_installed("dbplyr")
10 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb())
11 | sql <- function(...) dbplyr::sql(...)
12 |
13 | expect_equal(translate(as.character(1)), sql(r"{CAST(1.0 AS TEXT)}"))
14 | expect_equal(translate(as.character(1L)), sql(r"{CAST(1 AS TEXT)}"))
15 | expect_equal(translate(as.numeric(1)), sql(r"{CAST(1.0 AS NUMERIC)}"))
16 | expect_equal(translate(as.double(1.2)), sql(r"{CAST(1.2 AS NUMERIC)}"))
17 | expect_equal(translate(as.integer(1.2)), sql(r"{CAST(1.2 AS INTEGER)}"))
18 | expect_equal(translate(as.integer64(1.2)), sql(r"{CAST(1.2 AS BIGINT)}"))
19 | expect_equal(translate(as.logical("TRUE")), sql(r"{CAST('TRUE' AS BOOLEAN)}"))
20 | expect_equal(translate(tolower("HELLO")), sql(r"{LOWER('HELLO')}"))
21 | expect_equal(translate(toupper("hello")), sql(r"{UPPER('hello')}"))
22 | expect_equal(translate(pmax(1, 2, na.rm = TRUE)), sql(r"{GREATEST(1.0, 2.0)}"))
23 | expect_equal(translate(pmin(1, 2, na.rm = TRUE)), sql(r"{LEAST(1.0, 2.0)}"))
24 | expect_equal(translate(as.character("2020-01-01")), sql(r"{CAST('2020-01-01' AS TEXT)}"))
25 | expect_equal(translate(c("2020-01-01", "2020-13-02")), sql(r"{('2020-01-01', '2020-13-02')}"))
26 | expect_equal(translate(iris[["sepal_length"]]), sql(r"{iris.sepal_length}"))
27 | expect_equal(translate(iris[[1]]), sql(r"{iris[1]}"))
28 | expect_equal(translate(cot(x)), sql(r"{COT(x)}"))
29 | expect_equal(translate(substr("test", 2, 3)), sql(r"{SUBSTR('test', 2, 2)}"))
30 | })
31 |
32 | test_that("duckdb custom scalars translated correctly", {
33 | skip_if_no_R4()
34 | skip_if_not_installed("dbplyr")
35 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb())
36 | sql <- function(...) dbplyr::sql(...)
37 |
38 | # expect_equal(translate(as(1,"CHARACTER")), sql(r"{CAST(1.0 AS TEXT}")) # Not implemented
39 | expect_equal(translate(as.raw(10)), sql(r"{CAST(10.0 AS VARBINARY)}"))
40 | expect_equal(translate(13 %% 5), sql(r"{FMOD(13.0, 5.0)}"))
41 | expect_equal(translate(35.8 %/% 4), sql(r"{FDIV(35.8, 4.0)}"))
42 | expect_equal(translate(35.8^2.51), sql(r"{POW(35.8, 2.51)}"))
43 | expect_equal(translate(bitwOr(x, 128L)), sql(r"{(CAST(x AS INTEGER)) | (CAST(128 AS INTEGER))}"))
44 | expect_equal(translate(bitwAnd(x, 128)), sql(r"{(CAST(x AS INTEGER)) & (CAST(128.0 AS INTEGER))}"))
45 | expect_equal(translate(bitwXor(x, 128L)), sql(r"{XOR((CAST(x AS INTEGER)), (CAST(128 AS INTEGER)))}"))
46 | expect_equal(translate(bitwNot(x)), sql(r"{~(CAST(x AS INTEGER))}"))
47 | expect_equal(translate(bitwShiftL(x, 5L)), sql(r"{(CAST(x AS INTEGER)) << (CAST(5 AS INTEGER))}"))
48 | expect_equal(translate(bitwShiftR(x, 4L)), sql(r"{(CAST(x AS INTEGER)) >> (CAST(4 AS INTEGER))}"))
49 | expect_equal(translate(log(x)), sql(r"{LN(x)}"))
50 | expect_equal(translate(log(x, base = 5)), sql(r"{LOG(x) / LOG(5.0)}"))
51 | expect_equal(translate(log(x, base = 10)), sql(r"{LOG10(x)}"))
52 | expect_equal(translate(log(x, base = 2)), sql(r"{LOG2(x)}"))
53 | expect_equal(translate(log10(x)), sql(r"{LOG10(x)}"))
54 | expect_equal(translate(log2(x)), sql(r"{LOG2(x)}"))
55 | expect_equal(translate(is.nan(var1)), sql(r"{(var1 IS NOT NULL AND PRINTF('%f', var1) = 'nan')}"))
56 | expect_equal(translate(is.infinite(var1)), sql(r"{(var1 IS NOT NULL AND REGEXP_MATCHES(PRINTF('%f', var1), 'inf'))}"))
57 | expect_equal(translate(is.finite(var1)), sql(r"{(NOT (var1 IS NULL OR REGEXP_MATCHES(PRINTF('%f', var1), 'inf|nan')))}"))
58 | expect_equal(translate(grepl("pattern", text)), sql(r"{REGEXP_MATCHES("text", 'pattern')}"))
59 | expect_equal(translate(grepl("pattern", text, ignore.case = TRUE)), sql(r"{REGEXP_MATCHES("text", '(?i)pattern')}"))
60 | expect_error(translate(grepl("dummy", txt, perl = TRUE)))
61 | expect_equal(translate(regexpr("pattern", text)), sql(r"{(CASE WHEN REGEXP_MATCHES("text", 'pattern') THEN (LENGTH(LIST_EXTRACT(STRING_SPLIT_REGEX("text", 'pattern'), 0))+1) ELSE -1 END)}"))
62 | expect_equal(translate(round(x, digits = 1.1)), sql(r"{ROUND(x, CAST(ROUND(1.1, 0) AS INTEGER))}"))
63 | expect_equal(translate(as.Date("2019-01-01")), sql(r"{CAST('2019-01-01' AS DATE)}"))
64 | expect_equal(translate(as.POSIXct("2019-01-01 01:01:01")), sql(r"{CAST('2019-01-01 01:01:01' AS TIMESTAMP)}"))
65 | })
66 |
67 |
68 |
69 | test_that("pasting translated correctly", {
70 | skip_if_no_R4()
71 | skip_if_not_installed("dbplyr")
72 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb())
73 | sql <- function(...) dbplyr::sql(...)
74 |
75 | expect_equal(translate(paste("hi", "bye")), sql(r"{CONCAT_WS(' ', 'hi', 'bye')}"))
76 | expect_equal(translate(paste("hi", "bye", sep = "-")), sql(r"{CONCAT_WS('-', 'hi', 'bye')}"))
77 | expect_equal(translate(paste0("hi", "bye")), sql(r"{CONCAT_WS('', 'hi', 'bye')}"))
78 |
79 | expect_equal(translate(paste(x, y), window = FALSE), sql(r"{CONCAT_WS(' ', x, y)}"))
80 | expect_equal(translate(paste0(x, y), window = FALSE), sql(r"{CONCAT_WS('', x, y)}"))
81 |
82 | # expect_error(translate(paste0(x, collapse = ""), window = FALSE), "`collapse` not supported")
83 | })
84 |
85 |
86 | # lubridate functions
87 |
88 | test_that("custom lubridate functions translated correctly", {
89 | skip_if_no_R4()
90 | skip_if_not_installed("dbplyr")
91 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb())
92 | sql <- function(...) dbplyr::sql(...)
93 |
94 | expect_equal(translate(yday(x)), sql(r"{EXTRACT(DOY FROM x)}"))
95 | expect_equal(translate(quarter(x)), sql(r"{EXTRACT(QUARTER FROM x)}"))
96 | expect_equal(translate(quarter(x, with_year = TRUE)), sql(r"{(EXTRACT(YEAR FROM x) || '.' || EXTRACT(QUARTER FROM x))}"))
97 | expect_equal(translate(quarter(x, type = "year.quarter")), sql(r"{(EXTRACT(YEAR FROM x) || '.' || EXTRACT(QUARTER FROM x))}"))
98 | expect_equal(translate(quarter(x, type = "quarter")), sql(r"{EXTRACT(QUARTER FROM x)}"))
99 | expect_equal(translate(quarter(x, type = TRUE)), sql(r"{(EXTRACT(YEAR FROM x) || '.' || EXTRACT(QUARTER FROM x))}"))
100 | expect_equal(translate(quarter(x, type = FALSE)), sql(r"{EXTRACT(QUARTER FROM x)}"))
101 | expect_equal(translate(quarter(x, type = "date_first")), sql(r"{(CAST(DATE_TRUNC('QUARTER', x) AS DATE))}"))
102 | expect_equal(translate(quarter(x, type = "date_last")), sql(r"{(CAST((DATE_TRUNC('QUARTER', x) + INTERVAL '1 QUARTER' - INTERVAL '1 DAY') AS DATE))}"))
103 | expect_error(translate(quarter(x, type = "other")))
104 | expect_error(translate(quarter(x, fiscal_start = 2)))
105 | expect_equal(translate(month(x, label = FALSE)), sql(r"{EXTRACT(MONTH FROM x)}"))
106 | expect_equal(translate(month(x, label = TRUE)), sql(r"{STRFTIME(x, '%b')}"))
107 | expect_equal(translate(month(x, label = TRUE, abbr = FALSE)), sql(r"{STRFTIME(x, '%B')}"))
108 | expect_equal(translate(qday(x)), sql(r"{DATE_DIFF('DAYS', DATE_TRUNC('QUARTER', CAST((x) AS DATE)), (CAST((x) AS DATE) + INTERVAL '1 DAY'))}"))
109 | expect_equal(translate(wday(x)), sql(r"{EXTRACT('dow' FROM CAST(x AS DATE) + 0) + 1}"))
110 | expect_equal(translate(wday(x, week_start = 4)), sql(r"{EXTRACT('dow' FROM CAST(x AS DATE) + 3) + 1}"))
111 | expect_equal(translate(wday(x, label = TRUE)), sql(r"{STRFTIME(x, '%a')}"))
112 | expect_equal(translate(wday(x, label = TRUE, abbr = FALSE)), sql(r"{STRFTIME(x, '%A')}"))
113 | expect_equal(translate(seconds(x)), sql(r"{TO_SECONDS(CAST(x AS BIGINT))}"))
114 | expect_equal(translate(minutes(x)), sql(r"{TO_MINUTES(CAST(x AS BIGINT))}"))
115 | expect_equal(translate(hours(x)), sql(r"{TO_HOURS(CAST(x AS BIGINT))}"))
116 | expect_equal(translate(days(x)), sql(r"{TO_DAYS(CAST(x AS INTEGER))}"))
117 | expect_equal(translate(weeks(x)), sql(r"{TO_DAYS(7 * CAST(x AS INTEGER))}"))
118 | expect_equal(translate(months(x)), sql(r"{TO_MONTHS(CAST(x AS INTEGER))}"))
119 | expect_equal(translate(years(x)), sql(r"{TO_YEARS(CAST(x AS INTEGER))}"))
120 | expect_equal(translate(floor_date(x, "month")), sql(r"{DATE_TRUNC('month', x)}"))
121 | expect_equal(translate(floor_date(x, "week")), sql(r"{CAST(x AS DATE) - CAST(EXTRACT('dow' FROM CAST(x AS DATE) + 0) AS INTEGER)}"))
122 | expect_equal(translate(floor_date(x, "week", week_start = 1)), sql(r"{DATE_TRUNC('week', x)}"))
123 | expect_equal(translate(floor_date(x, "week", week_start = 4)), sql(r"{CAST(x AS DATE) - CAST(EXTRACT('dow' FROM CAST(x AS DATE) + 3) AS INTEGER)}"))
124 | })
125 |
126 | # stringr functions
127 |
128 | test_that("custom stringr functions translated correctly", {
129 | skip_if_no_R4()
130 | skip_if_not_installed("dbplyr")
131 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb())
132 | sql <- function(...) dbplyr::sql(...)
133 |
134 | expect_equal(translate(str_c(x, y)), sql(r"{CONCAT_WS('', x, y)}"))
135 | # expect_error(translate(str_c(x, collapse = "")), "`collapse` not supported")
136 | expect_equal(translate(str_detect(x, y)), sql(r"{REGEXP_MATCHES(x, y)}"))
137 | expect_equal(translate(str_detect(x, y, negate = TRUE)), sql(r"{(NOT(REGEXP_MATCHES(x, y)))}"))
138 | expect_equal(translate(str_replace(x, y, z)), sql(r"{REGEXP_REPLACE(x, y, z)}"))
139 | expect_equal(translate(str_replace_all(x, y, z)), sql(r"{REGEXP_REPLACE(x, y, z, 'g')}"))
140 | expect_equal(translate(str_squish(x)), sql(r"{TRIM(REGEXP_REPLACE(x, '\s+', ' ', 'g'))}"))
141 | expect_equal(translate(str_remove(x, y)), sql(r"{REGEXP_REPLACE(x, y, '')}"))
142 | expect_equal(translate(str_remove_all(x, y)), sql(r"{REGEXP_REPLACE(x, y, '', 'g')}"))
143 | expect_equal(translate(str_to_sentence(x)), sql(r"{(UPPER(x[0]) || x[1:NULL])}"))
144 | expect_equal(translate(str_starts(x, y)), sql(r"{REGEXP_MATCHES(x,'^(?:'||y))}"))
145 | expect_equal(translate(str_ends(x, y)), sql(r"{REGEXP_MATCHES((?:x,y||')$')}"))
146 | expect_equal(translate(str_pad(x, width = 10)), sql(r"{LPAD(x, CAST(GREATEST(10, LENGTH(x)) AS INTEGER), ' ')}"))
147 | expect_equal(translate(str_pad(x, width = 10, side = "right")), sql(r"{RPAD(x, CAST(GREATEST(10, LENGTH(x)) AS INTEGER), ' ')}"))
148 | expect_equal(translate(str_pad(x, width = 10, side = "both", pad = "<")), sql(r"{RPAD(REPEAT('<', (10 - LENGTH(x)) / 2) || x, CAST(GREATEST(10, LENGTH(x)) AS INTEGER), '<')}"))
149 | expect_error(translate(str_pad(x, width = 10, side = "other")))
150 | })
151 |
152 | test_that("datetime escaping working as in DBI", {
153 | skip_if_no_R4()
154 | skip_if_not_installed("dbplyr")
155 | con <- duckdb::translate_duckdb()
156 | escape <- function(...) dbplyr::escape(...)
157 | sql <- function(...) dbplyr::sql(...)
158 |
159 | test_date <- as.Date("2020-01-01")
160 | expect_equal(escape(test_date, con = con), sql(r"{'2020-01-01'::date}"))
161 | expect_equal(escape("2020-01-01", con = con), sql(r"{'2020-01-01'}"))
162 |
163 | test_datetime <- as.POSIXct("2020-01-01 01:23:45 UTC", tz = "UTC")
164 | expect_equal(escape(test_datetime, con = con), sql(r"{'2020-01-01 01:23:45'::timestamp}"))
165 | expect_equal(escape("2020-01-01 01:23:45 UTC", con = con), sql(r"{'2020-01-01 01:23:45 UTC'}"))
166 |
167 | test_datetime_tz <- as.POSIXct("2020-01-01 18:23:45 UTC", tz = "America/Los_Angeles")
168 | expect_equal(escape(test_datetime_tz, con = con), sql(r"{'2020-01-02 02:23:45'::timestamp}"))
169 | expect_equal(escape("2020-01-01 18:23:45 PST", con = con), sql(r"{'2020-01-01 18:23:45 PST'}"))
170 | })
171 |
172 | test_that("two variable aggregates are translated correctly", {
173 | skip_if_no_R4()
174 | skip_if_not_installed("dbplyr")
175 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb())
176 | sql <- function(...) dbplyr::sql(...)
177 |
178 | expect_equal(translate(cor(x, y), window = FALSE), sql(r"{CORR(x, y)}"))
179 | expect_equal(translate(cor(x, y), window = TRUE), sql(r"{CORR(x, y) OVER ()}"))
180 | })
181 |
182 |
183 |
184 |
185 | # Snapshot tests
186 |
187 | test_that("snapshots of dbplyr generic scalar translation", {
188 | skip_on_cran()
189 | skip_if_not_installed("dbplyr")
190 | local_edition(3)
191 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb())
192 |
193 | expect_snapshot({
194 | translate(as.character(1))
195 | translate(as.character(1L))
196 | translate(as.numeric(1))
197 | translate(as.double(1.2))
198 | translate(as.integer(1.2))
199 | translate(as.integer64(1.2))
200 | translate(as.logical("TRUE"))
201 | translate(tolower("HELLO"))
202 | translate(toupper("hello"))
203 | translate(pmax(1, 2, na.rm = TRUE))
204 | translate(pmin(1, 2, na.rm = TRUE))
205 | translate(as.character("2020-01-01"))
206 | translate(c("2020-01-01", "2020-13-02"))
207 | translate(iris[["sepal_length"]])
208 | translate(iris[[1]])
209 | translate(cot(x))
210 | translate(substr("test", 2, 3))
211 | })
212 | })
213 |
214 |
215 | test_that("snapshots of duckdb custom scalars translations", {
216 | skip_on_cran()
217 | skip_if_not_installed("dbplyr")
218 | local_edition(3)
219 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb())
220 |
221 | expect_snapshot({
222 | # translate(as(1,"CHARACTER")) # Not implemented
223 | translate(as.raw(10))
224 | translate(13 %% 5)
225 | translate(35.8 %/% 4)
226 | translate(35.8^2.51)
227 | translate(bitwOr(x, 128L))
228 | translate(bitwAnd(x, 128))
229 | translate(bitwXor(x, 128L))
230 | translate(bitwNot(x))
231 | translate(bitwShiftL(x, 5L))
232 | translate(bitwShiftR(x, 4L))
233 | translate(log(x))
234 | translate(log(x, base = 5))
235 | translate(log(x, base = 10))
236 | translate(log(x, base = 2))
237 | translate(log10(x))
238 | translate(log2(x))
239 | translate(is.nan(var1))
240 | translate(is.infinite(var1))
241 | translate(is.finite(var1))
242 | translate(grepl("pattern", text))
243 | translate(grepl("pattern", text, ignore.case = TRUE))
244 | # translate(grepl("dummy", txt, perl = TRUE)) # Error tests later
245 | translate(regexpr("pattern", text))
246 | translate(round(x, digits = 1.1))
247 | translate(as.Date("2019-01-01"))
248 | translate(as.POSIXct("2019-01-01 01:01:01"))
249 | })
250 | })
251 |
252 |
253 |
254 | test_that("snapshot tests for pasting translate", {
255 | skip_on_cran()
256 | skip_if_not_installed("dbplyr")
257 | local_edition(3)
258 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb())
259 |
260 | expect_snapshot({
261 | translate(paste("hi", "bye"))
262 | translate(paste("hi", "bye", sep = "-"))
263 | translate(paste0("hi", "bye"))
264 |
265 | translate(paste(x, y), window = FALSE)
266 | translate(paste0(x, y), window = FALSE)
267 |
268 | # translate(paste0(x, collapse = ""), window = FALSE) # Expected error
269 | })
270 | })
271 |
272 |
273 | # lubridate functions
274 |
275 | test_that("snapshots for custom lubridate functions translated correctly", {
276 | skip_on_cran()
277 | skip_if_not_installed("dbplyr")
278 | local_edition(3)
279 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb())
280 |
281 | expect_snapshot({
282 | translate(yday(x))
283 | translate(quarter(x))
284 | translate(quarter(x))
285 | translate(quarter(x, type = "year.quarter"))
286 | translate(quarter(x, type = "quarter"))
287 | translate(quarter(x, type = TRUE))
288 | translate(quarter(x, type = FALSE))
289 | translate(quarter(x, type = "date_first"))
290 | translate(quarter(x, type = "date_last"))
291 | # translate(quarter(x, type = "other")) # Not supported - error
292 | # translate(quarter(x, fiscal_start = 2)) # Not supported - error
293 | translate(month(x, label = FALSE))
294 | translate(month(x, label = TRUE))
295 | translate(month(x, label = TRUE, abbr = FALSE))
296 | translate(qday(x))
297 | translate(wday(x))
298 | translate(wday(x, week_start = 4))
299 | translate(wday(x, label = TRUE))
300 | translate(wday(x, label = TRUE, abbr = FALSE))
301 | translate(seconds(x))
302 | translate(minutes(x))
303 | translate(hours(x))
304 | translate(days(x))
305 | translate(weeks(x))
306 | translate(months(x))
307 | translate(years(x))
308 | translate(floor_date(x, "month"))
309 | translate(floor_date(x, "week"))
310 | translate(floor_date(x, "week", week_start = 1))
311 | translate(floor_date(x, "week", week_start = 4))
312 | })
313 | })
314 |
315 | # stringr functions
316 |
317 | test_that("snapshots for custom stringr functions translated correctly", {
318 | skip_on_cran()
319 | skip_if_not_installed("dbplyr")
320 | local_edition(3)
321 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb())
322 |
323 | expect_snapshot({
324 | translate(str_c(x, y))
325 | # translate(str_c(x, collapse = "")) # Error
326 | translate(str_detect(x, y))
327 | translate(str_detect(x, y, negate = TRUE))
328 | translate(str_replace(x, y, z))
329 | translate(str_replace_all(x, y, z))
330 | translate(str_squish(x))
331 | translate(str_remove(x, y))
332 | translate(str_remove_all(x, y))
333 | translate(str_to_sentence(x))
334 | translate(str_starts(x, y))
335 | translate(str_ends(x, y))
336 | translate(str_pad(x, width = 10))
337 | translate(str_pad(x, width = 10, side = "right"))
338 | translate(str_pad(x, width = 10, side = "both", pad = "<"))
339 | # translate(str_pad(x, width = 10, side = "other")) # Error
340 | })
341 | })
342 |
343 | test_that("snapshots datetime escaping working as in DBI", {
344 | skip_on_cran()
345 | skip_if_not_installed("dbplyr")
346 | local_edition(3)
347 | con <- duckdb::translate_duckdb()
348 | escape <- function(...) dbplyr::escape(...)
349 |
350 | expect_snapshot({
351 | test_date <- as.Date("2020-01-01")
352 | escape(test_date, con = con)
353 | escape("2020-01-01", con = con)
354 |
355 | test_datetime <- as.POSIXct("2020-01-01 01:23:45 UTC", tz = "UTC")
356 | escape(test_datetime, con = con)
357 | escape("2020-01-01 01:23:45 UTC", con = con)
358 |
359 | test_datetime_tz <- as.POSIXct("2020-01-01 18:23:45 UTC", tz = "America/Los_Angeles")
360 | escape(test_datetime_tz, con = con)
361 | escape("2020-01-01 18:23:45 PST", con = con)
362 | })
363 | })
364 |
365 | test_that("two variable aggregates are translated correctly", {
366 | skip_on_cran()
367 | skip_if_not_installed("dbplyr")
368 | local_edition(3)
369 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb())
370 |
371 | expect_snapshot({
372 | translate(cor(x, y), window = FALSE)
373 | translate(cor(x, y), window = TRUE)
374 | })
375 | })
376 |
377 | test_that("these should give errors", {
378 | skip_on_cran()
379 | skip_if_not_installed("dbplyr")
380 | local_edition(3)
381 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb())
382 |
383 | expect_snapshot(error = TRUE, {
384 | translate(grepl("dummy", txt, perl = TRUE)) # Expected error
385 | # translate(paste0(x, collapse = ""), window = FALSE) # Skip because of changing rlang_error (sql_paste())
386 | translate(quarter(x, type = "other")) # Not supported - error
387 | translate(quarter(x, fiscal_start = 2)) # Not supported - error
388 | # translate(str_c(x, collapse = "")) # Skip because of changing rlang_error (sql_paste())
389 | translate(str_pad(x, width = 10, side = "other")) # Error
390 | })
391 | })
392 |
--------------------------------------------------------------------------------
/tests/testthat/test_tbl__duckdb_connection.R:
--------------------------------------------------------------------------------
1 | skip_on_cran()
2 | `%>%` <- dplyr::`%>%`
3 |
4 | test_that("Parquet files can be registered with dplyr::tbl()", {
5 | con <- DBI::dbConnect(duckdb::duckdb())
6 | on.exit(DBI::dbDisconnect(con, shutdown = TRUE))
7 |
8 | tab0 <- dplyr::tbl(con, "data/userdata1.parquet")
9 | expect_true(inherits(tab0, "tbl_duckdb_connection"))
10 | expect_true(tab0 %>% dplyr::count() %>% dplyr::collect() == 1000)
11 |
12 | tab1 <- dplyr::tbl(con, "read_parquet(['data/userdata1.parquet'])")
13 | expect_true(inherits(tab1, "tbl_duckdb_connection"))
14 | expect_true(tab1 %>% dplyr::count() %>% dplyr::collect() == 1000)
15 |
16 | tab2 <- dplyr::tbl(con, "'data/userdata1.parquet'")
17 | expect_true(inherits(tab2, "tbl_duckdb_connection"))
18 | expect_true(tab2 %>% dplyr::count() %>% dplyr::collect() == 1000)
19 |
20 | tab3 <- dplyr::tbl(con, "parquet_scan(['data/userdata1.parquet'])")
21 | expect_true(inherits(tab3, "tbl_duckdb_connection"))
22 | expect_true(tab3 %>% dplyr::count() %>% dplyr::collect() == 1000)
23 | })
24 |
25 |
26 | test_that("Object cache can be enabled for parquet files with dplyr::tbl()", {
27 | con <- DBI::dbConnect(duckdb::duckdb())
28 | on.exit(DBI::dbDisconnect(con, shutdown = TRUE))
29 |
30 | DBI::dbExecute(con, "SET enable_object_cache=False;")
31 | tab1 <- dplyr::tbl(con, "data/userdata1.parquet", cache = TRUE)
32 | expect_true(DBI::dbGetQuery(con, "SELECT value FROM duckdb_settings() WHERE name='enable_object_cache';") == "True")
33 |
34 | DBI::dbExecute(con, "SET enable_object_cache=False;")
35 | tab2 <- dplyr::tbl(con, "'data/userdata1.parquet'", cache = FALSE)
36 | expect_true(DBI::dbGetQuery(con, "SELECT value FROM duckdb_settings() WHERE name='enable_object_cache';") == "False")
37 | })
38 |
39 |
40 | test_that("CSV files can be registered with dplyr::tbl()", {
41 | path <- file.path(tempdir(), "duckdbtest.csv")
42 | write.csv(iris, file = path)
43 | on.exit(unlink(path))
44 |
45 | con <- DBI::dbConnect(duckdb::duckdb())
46 | on.exit(DBI::dbDisconnect(con, shutdown = TRUE), add = TRUE)
47 |
48 | tab1 <- dplyr::tbl(con, path)
49 | expect_true(inherits(tab1, "tbl_duckdb_connection"))
50 | expect_true(tab1 %>% dplyr::count() %>% dplyr::collect() == 150)
51 |
52 | tab2 <- dplyr::tbl(con, paste0("read_csv_auto('", path, "')"))
53 | expect_true(inherits(tab2, "tbl_duckdb_connection"))
54 | expect_true(tab2 %>% dplyr::count() %>% dplyr::collect() == 150)
55 | })
56 |
57 | test_that("Other replacement scans or functions can be registered with dplyr::tbl()", {
58 | con <- DBI::dbConnect(duckdb::duckdb())
59 | on.exit(DBI::dbDisconnect(con, shutdown = TRUE))
60 |
61 | obj <- dplyr::tbl(con, "duckdb_keywords()")
62 | expect_true(inherits(obj, "tbl_duckdb_connection"))
63 | expect_true(obj %>% dplyr::filter(keyword_name == "all") %>% dplyr::count() %>% dplyr::collect() == 1)
64 | })
65 |
66 | rm(`%>%`)
--------------------------------------------------------------------------------
/tidyduck.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: No
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source --no-byte-compile
21 | PackageRoxygenize: rd,collate,namespace
22 |
--------------------------------------------------------------------------------