├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R ├── dbplyr.R ├── s3_register.R ├── tidyduck.R └── zzz.R ├── README.md ├── _pkgdown.yml ├── dependencies.R ├── man ├── backend-duckdb.Rd └── tidyduck-package.Rd ├── tests ├── testthat.R └── testthat │ ├── _snaps │ └── dbplyr.md │ ├── data │ ├── binary_string.parquet │ └── userdata1.parquet │ ├── test_dbplyr.R │ └── test_tbl__duckdb_connection.R └── tidyduck.Rproj /.Rbuildignore: -------------------------------------------------------------------------------- 1 | .*\.sh 2 | .*\.tar.gz 3 | .*\.tgz 4 | .*\.zip 5 | git 6 | configure 7 | dependencies.R 8 | src/Makevars.in 9 | ^.*\.Rproj$ 10 | ^\.Rproj\.user$ 11 | ^build_win$ 12 | ^tic\.R$ 13 | ^_pkgdown\.yml$ 14 | ^docs$ 15 | deploy-* 16 | CMakeLists.txt 17 | NEWS.md 18 | tests/regression 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | src/duckdb/ 2 | *.tar.gz 3 | *.tgz 4 | src/Makevars 5 | src/Makevars.win 6 | # Created by https://www.toptal.com/developers/gitignore/api/r 7 | # Edit at https://www.toptal.com/developers/gitignore?templates=r 8 | ### R ### 9 | # History files 10 | .Rhistory 11 | .Rapp.history 12 | # Session Data files 13 | .RData 14 | # User-specific files 15 | .Ruserdata 16 | # Example code in package build process 17 | *-Ex.R 18 | # Output files from R CMD build 19 | /*.tar.gz 20 | # Output files from R CMD check 21 | /*.Rcheck/ 22 | # RStudio files 23 | .Rproj.user/ 24 | # produced vignettes 25 | vignettes/*.html 26 | vignettes/*.pdf 27 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 28 | .httr-oauth 29 | # knitr and R markdown default cache directories 30 | *_cache/ 31 | /cache/ 32 | # Temporary files created by R markdown 33 | *.utf8.md 34 | *.knit.md 35 | # R Environment Variables 36 | .Renviron 37 | ### R.Bookdown Stack ### 38 | # R package: bookdown caching files 39 | /*_files/ 40 | # End of https://www.toptal.com/developers/gitignore/api/r 41 | docs/ 42 | # Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode 43 | # Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode 44 | ### VisualStudioCode ### 45 | .vscode/* 46 | !.vscode/settings.json 47 | !.vscode/tasks.json 48 | !.vscode/launch.json 49 | !.vscode/extensions.json 50 | *.code-workspace 51 | ### VisualStudioCode Patch ### 52 | # Ignore all local history of files 53 | .history 54 | # End of https://www.toptal.com/developers/gitignore/api/visualstudiocode 55 | # Created by https://www.toptal.com/developers/gitignore/api/macos 56 | # Edit at https://www.toptal.com/developers/gitignore?templates=macos 57 | ### macOS ### 58 | # General 59 | .DS_Store 60 | .AppleDouble 61 | .LSOverride 62 | # Icon must end with two \r 63 | Icon 64 | # Thumbnails 65 | ._* 66 | # Files that might appear in the root of a volume 67 | .DocumentRevisions-V100 68 | .fseventsd 69 | .Spotlight-V100 70 | .TemporaryItems 71 | .Trashes 72 | .VolumeIcon.icns 73 | .com.apple.timemachine.donotpresent 74 | # Directories potentially created on remote AFP share 75 | .AppleDB 76 | .AppleDesktop 77 | Network Trash Folder 78 | Temporary Items 79 | .apdisk 80 | # End of https://www.toptal.com/developers/gitignore/api/macos 81 | # Created by https://www.toptal.com/developers/gitignore/api/windows 82 | # Edit at https://www.toptal.com/developers/gitignore?templates=windows 83 | ### Windows ### 84 | # Windows thumbnail cache files 85 | Thumbs.db 86 | Thumbs.db:encryptable 87 | ehthumbs.db 88 | ehthumbs_vista.db 89 | # Dump file 90 | *.stackdump 91 | # Folder config file 92 | [Dd]esktop.ini 93 | # Recycle Bin used on file shares 94 | $RECYCLE.BIN/ 95 | # Windows Installer files 96 | *.cab 97 | *.msi 98 | *.msix 99 | *.msm 100 | *.msp 101 | # Windows shortcuts 102 | *.lnk 103 | # End of https://www.toptal.com/developers/gitignore/api/windows 104 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: tidyduck 2 | Title: High-level bindings for the DuckDB Database Management System 3 | Version: 0.5.0 4 | Authors@R: 5 | c(person(given = "Kirill", 6 | family = "M\u00fcller", 7 | role = c("aut", "cre"), 8 | email = "krlmlr+r@mailbox.org", 9 | comment = c(ORCID = "0000-0002-1416-3412")), 10 | person("Stichting DuckDB Foundation", role = "cph")) 11 | Description: The DuckDB project is an embedded analytical data 12 | management system with support for the Structured Query Language (SQL). 13 | This package includes optional bindings to DuckDB for the tidyverse 14 | and other ecosystems. 15 | License: MIT + file LICENSE 16 | URL: https://duckdb.org/, https://github.com/duckdb/tidyduck 17 | BugReports: https://github.com/duckdb/tidyduck/issues 18 | Depends: 19 | R (>= 3.6.0) 20 | Imports: 21 | arrow, 22 | bit64, 23 | callr, 24 | dbplyr, 25 | DBI, 26 | dplyr, 27 | duckdb (>= 0.5.0), 28 | methods, 29 | tibble, 30 | utils, 31 | vctrs, 32 | withr 33 | Suggests: 34 | testthat 35 | Encoding: UTF-8 36 | Roxygen: list(markdown = TRUE) 37 | RoxygenNote: 7.2.1 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2018 2 | COPYRIGHT HOLDER: Stichting DuckDB Foundation 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(simulate_duckdb) 4 | export(translate_duckdb) 5 | import(dbplyr, except = c(sql, ident)) 6 | import(dplyr) 7 | -------------------------------------------------------------------------------- /R/dbplyr.R: -------------------------------------------------------------------------------- 1 | #' DuckDB SQL backend for dbplyr 2 | #' 3 | #' @description 4 | #' This is a SQL backend for dbplyr tailored to take into account DuckDB's 5 | #' possibilities. This mainly follows the backend for PostgreSQL, but 6 | #' contains more mapped functions. 7 | #' 8 | #' @name backend-duckdb 9 | #' @aliases NULL 10 | #' @examples 11 | #' library(dplyr, warn.conflicts = FALSE) 12 | #' con <- DBI::dbConnect(duckdb::duckdb(), path = ":memory:") 13 | #' 14 | #' dbiris <- copy_to(con, iris, overwrite = TRUE) 15 | #' 16 | #' dbiris %>% select(Petal.Length, Petal.Width) %>% filter(Petal.Length > 1.5) %>% head(5) 17 | #' 18 | #' DBI::dbDisconnect(con, shutdown = TRUE) 19 | NULL 20 | 21 | #' Connection object for simulation of the SQL generation without actual database. 22 | #' dbplyr overrides database specific identifier and string quotes 23 | #' @param ... Any parameters to be forwarded 24 | #' @export 25 | #' @rdname backend-duckdb 26 | simulate_duckdb <- function(...) { 27 | structure(list(), ..., class = c("duckdb_connection", "TestConnection", "DBIConnection")) 28 | } 29 | 30 | #' Connection object for simulation of the SQL generation without actual database. 31 | #' This version keeps the database specific identifier and string quotes, i.e. 32 | #' allows to translate to DuckDB SQL dialect. 33 | #' @param ... Any parameters to be forwarded 34 | #' @export 35 | #' @rdname backend-duckdb 36 | translate_duckdb <- function(...) { 37 | structure(list(), ..., class = c("duckdb_connection", "DBIConnection")) 38 | } 39 | 40 | # Declare which version of dbplyr API is being called. 41 | # @param con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()} 42 | # @name dbplyr_edition 43 | dbplyr_edition.duckdb_connection <- function(con) { 44 | 2L 45 | } 46 | 47 | # Description of the database connection 48 | # @param con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()} 49 | # @name db_connection_describe 50 | # @return 51 | # String consisting of DuckDB version, user login name, operating system, R version and the name of database 52 | db_connection_describe.duckdb_connection <- function(con) { 53 | info <- DBI::dbGetInfo(con) 54 | paste0( 55 | "DuckDB ", info$db.version, " [", Sys.info()["login"], "@", 56 | paste(Sys.info()[c("sysname", "release")], collapse = " "), ":", 57 | "R ", R.version$major, ".", R.version$minor, "/", info$dbname, "]" 58 | ) 59 | } 60 | 61 | duckdb_grepl <- function(pattern, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE) { 62 | # https://duckdb.org/docs/sql/functions/patternmatching 63 | if (any(c(perl, fixed, useBytes))) { 64 | stop("Parameters `perl`, `fixed` and `useBytes` in grepl are not currently supported in DuckDB backend", call. = FALSE) 65 | } 66 | 67 | sql_expr <- pkg_method("sql_expr", "dbplyr") 68 | 69 | if (ignore.case) { 70 | icpattern <- paste0("(?i)", pattern) 71 | sql_expr(REGEXP_MATCHES((!!x), (!!icpattern))) 72 | } else { 73 | sql_expr(REGEXP_MATCHES((!!x), (!!pattern))) 74 | } 75 | } 76 | 77 | 78 | # Customized translation functions for DuckDB SQL 79 | # @param con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()} 80 | # @name sql_translation 81 | sql_translation.duckdb_connection <- function(con) { 82 | sql_variant <- pkg_method("sql_variant", "dbplyr") 83 | sql_translator <- pkg_method("sql_translator", "dbplyr") 84 | sql <- pkg_method("sql", "dbplyr") 85 | build_sql <- pkg_method("build_sql", "dbplyr") 86 | sql_expr <- pkg_method("sql_expr", "dbplyr") 87 | sql_prefix <- pkg_method("sql_prefix", "dbplyr") 88 | sql_cast <- pkg_method("sql_cast", "dbplyr") 89 | sql_paste <- pkg_method("sql_paste", "dbplyr") 90 | sql_aggregate <- pkg_method("sql_aggregate", "dbplyr") 91 | sql_aggregate_2 <- pkg_method("sql_aggregate_2", "dbplyr") 92 | win_aggregate <- pkg_method("win_aggregate", "dbplyr") 93 | win_aggregate_2 <- pkg_method("win_aggregate_2", "dbplyr") 94 | win_over <- pkg_method("win_over", "dbplyr") 95 | win_current_order <- pkg_method("win_current_order", "dbplyr") 96 | win_current_group <- pkg_method("win_current_order", "dbplyr") 97 | 98 | 99 | base_scalar <- pkg_method("base_scalar", "dbplyr") 100 | base_agg <- pkg_method("base_agg", "dbplyr") 101 | base_win <- pkg_method("base_win", "dbplyr") 102 | 103 | sql_variant( 104 | sql_translator( 105 | .parent = base_scalar, 106 | as.raw = sql_cast("VARBINARY"), 107 | `%%` = function(a, b) sql_expr(FMOD(!!a, !!b)), 108 | `%/%` = function(a, b) sql_expr(FDIV(!!a, !!b)), 109 | `^` = sql_prefix("POW", 2), 110 | bitwOr = function(a, b) sql_expr((CAST((!!a) %AS% INTEGER)) | (CAST((!!b) %AS% INTEGER))), 111 | bitwAnd = function(a, b) sql_expr((CAST((!!a) %AS% INTEGER)) & (CAST((!!b) %AS% INTEGER))), 112 | bitwXor = function(a, b) sql_expr(XOR((CAST((!!a) %AS% INTEGER)), (CAST((!!b) %AS% INTEGER)))), 113 | bitwNot = function(a) sql_expr(~ (CAST((!!a) %AS% INTEGER))), 114 | bitwShiftL = function(a, b) sql_expr((CAST((!!a) %AS% INTEGER)) %<<% (CAST((!!b) %AS% INTEGER))), 115 | bitwShiftR = function(a, b) sql_expr((CAST((!!a) %AS% INTEGER)) %>>% (CAST((!!b) %AS% INTEGER))), 116 | log = function(x, base = exp(1)) { 117 | if (isTRUE(all.equal(base, exp(1)))) { 118 | sql_expr(LN(!!x)) 119 | } else 120 | if (base == 10) { 121 | sql_expr(LOG10(!!x)) 122 | } else 123 | if (base == 2) { 124 | sql_expr(LOG2(!!x)) 125 | } else { 126 | sql_expr(LOG(!!x) / LOG(!!base)) 127 | } 128 | }, 129 | log10 = sql_prefix("LOG10", 1), 130 | log2 = sql_prefix("LOG2", 1), 131 | 132 | # See https://github.com/duckdb/duckdb/issues/530 about NaN, infinites and NULL in DuckDB 133 | # The following is how R functions for detecting those should behave: 134 | # Function Inf –Inf NaN NA 135 | # is.finite() FALSE FALSE FALSE FALSE 136 | # is.infinite() TRUE TRUE FALSE FALSE 137 | # is.nan() FALSE FALSE TRUE FALSE 138 | # is.na() FALSE FALSE TRUE TRUE 139 | # https://github.com/duckdb/duckdb/issues/3019 140 | # is.na = function(a) build_sql("(", a, " IS NULL OR PRINTF('%f', ", a, ") = 'nan')"), 141 | is.nan = function(a) build_sql("(", a, " IS NOT NULL AND PRINTF('%f', ", a, ") = 'nan')"), 142 | is.infinite = function(a) build_sql("(", a, " IS NOT NULL AND REGEXP_MATCHES(PRINTF('%f', ", a, "), 'inf'))"), 143 | is.finite = function(a) build_sql("(NOT (", a, " IS NULL OR REGEXP_MATCHES(PRINTF('%f', ", a, "), 'inf|nan')))"), 144 | grepl = duckdb_grepl, 145 | 146 | # Return index where the first match starts,-1 if no match 147 | regexpr = function(p, x) { 148 | build_sql("(CASE WHEN REGEXP_MATCHES(", x, ", ", p, ") THEN (LENGTH(LIST_EXTRACT(STRING_SPLIT_REGEX(", x, ", ", p, "), 0))+1) ELSE -1 END)") 149 | }, 150 | round = function(x, digits) sql_expr(ROUND(!!x, CAST(ROUND((!!digits), 0L) %AS% INTEGER))), 151 | as.Date = sql_cast("DATE"), 152 | as.POSIXct = sql_cast("TIMESTAMP"), 153 | 154 | # lubridate functions 155 | 156 | month = function(x, label = FALSE, abbr = TRUE) { 157 | if (!label) { 158 | sql_expr(EXTRACT(MONTH %FROM% !!x)) 159 | } else { 160 | if (abbr) { 161 | sql_expr(STRFTIME(!!x, "%b")) 162 | } else { 163 | sql_expr(STRFTIME(!!x, "%B")) 164 | } 165 | } 166 | }, 167 | quarter = function(x, type = "quarter", fiscal_start = 1, with_year = identical(type, "year.quarter")) { 168 | if (fiscal_start != 1) { 169 | stop("`fiscal_start` is not yet supported in DuckDB translation. Must be 1.", call. = FALSE) 170 | } 171 | if (is.logical(type)) { 172 | type <- if (type) { 173 | "year.quarter" 174 | } else { 175 | "quarter" 176 | } 177 | } 178 | if (with_year) { 179 | type <- "year.quarter" 180 | } 181 | switch(type, 182 | quarter = { 183 | sql_expr(EXTRACT(QUARTER %FROM% !!x)) 184 | }, 185 | year.quarter = { 186 | sql_expr((EXTRACT(YEAR %FROM% !!x) || "." || EXTRACT(QUARTER %FROM% !!x))) 187 | }, 188 | date_first = { 189 | sql_expr((CAST(DATE_TRUNC("QUARTER", !!x) %AS% DATE))) 190 | }, 191 | date_last = { 192 | sql_expr((CAST((DATE_TRUNC("QUARTER", !!x) + !!sql("INTERVAL '1 QUARTER'") - !!sql("INTERVAL '1 DAY'")) %AS% DATE))) 193 | }, 194 | stop(paste("Unsupported type", type), call. = FALSE) 195 | ) 196 | }, 197 | qday = function(x) { 198 | build_sql("DATE_DIFF('DAYS', DATE_TRUNC('QUARTER', CAST((", x, ") AS DATE)), (CAST((", x, ") AS DATE) + INTERVAL '1 DAY'))") 199 | }, 200 | wday = function(x, label = FALSE, abbr = TRUE, week_start = NULL) { 201 | if (!label) { 202 | week_start <- if (!is.null(week_start)) week_start else getOption("lubridate.week.start", 7) 203 | offset <- as.integer(7 - week_start) 204 | sql_expr(EXTRACT("dow" %FROM% CAST((!!x) %AS% DATE) + !!offset) + 1L) 205 | } else if (label && !abbr) { 206 | sql_expr(STRFTIME(!!x, "%A")) 207 | } else if (label && abbr) { 208 | sql_expr(STRFTIME(!!x, "%a")) 209 | } else { 210 | stop("Unrecognized arguments to `wday`", call. = FALSE) 211 | } 212 | }, 213 | yday = function(x) sql_expr(EXTRACT(DOY %FROM% !!x)), 214 | 215 | # These work fine internally, but getting INTERVAL-type data out of DuckDB 216 | # seems problematic until there is a fix for the issue #1920 / #2900 217 | # (https://github.com/duckdb/duckdb/issues/1920) 218 | seconds = function(x) { 219 | sql_expr(TO_SECONDS(CAST((!!x) %AS% BIGINT))) 220 | }, 221 | minutes = function(x) { 222 | sql_expr(TO_MINUTES(CAST((!!x) %AS% BIGINT))) 223 | }, 224 | hours = function(x) { 225 | sql_expr(TO_HOURS(CAST((!!x) %AS% BIGINT))) 226 | }, 227 | days = function(x) { 228 | sql_expr(TO_DAYS(CAST((!!x) %AS% INTEGER))) 229 | }, 230 | weeks = function(x) { 231 | sql_expr(TO_DAYS(7L * CAST((!!x) %AS% INTEGER))) 232 | }, 233 | months = function(x) { 234 | sql_expr(TO_MONTHS(CAST((!!x) %AS% INTEGER))) 235 | }, 236 | years = function(x) { 237 | sql_expr(TO_YEARS(CAST((!!x) %AS% INTEGER))) 238 | }, 239 | 240 | # Week_start algorithm: https://github.com/tidyverse/lubridate/issues/509#issuecomment-287030620 241 | floor_date = function(x, unit = "seconds", week_start = NULL) { 242 | if (unit %in% c("week", "weeks")) { 243 | week_start <- if (!is.null(week_start)) week_start else getOption("lubridate.week.start", 7) 244 | if (week_start == 1) { 245 | sql_expr(DATE_TRUNC(!!unit, !!x)) 246 | } else { 247 | offset <- as.integer(7 - week_start) 248 | sql_expr(CAST((!!x) %AS% DATE) - CAST(EXTRACT("dow" %FROM% CAST((!!x) %AS% DATE) + !!offset) %AS% INTEGER)) 249 | } 250 | } else { 251 | sql_expr(DATE_TRUNC(!!unit, !!x)) 252 | } 253 | }, 254 | paste = sql_paste(" "), 255 | paste0 = sql_paste(""), 256 | 257 | # stringr functions 258 | str_c = sql_paste(""), 259 | str_detect = function(string, pattern, negate = FALSE) { 260 | if (negate) { 261 | sql_expr((NOT(REGEXP_MATCHES(!!string, !!pattern)))) 262 | } else { 263 | sql_expr(REGEXP_MATCHES(!!string, !!pattern)) 264 | } 265 | }, 266 | str_replace = function(string, pattern, replacement) { 267 | sql_expr(REGEXP_REPLACE(!!string, !!pattern, !!replacement)) 268 | }, 269 | str_replace_all = function(string, pattern, replacement) { 270 | sql_expr(REGEXP_REPLACE(!!string, !!pattern, !!replacement, "g")) 271 | }, 272 | str_squish = function(string) { 273 | sql_expr(TRIM(REGEXP_REPLACE(!!string, "\\s+", " ", "g"))) 274 | }, 275 | str_remove = function(string, pattern) { 276 | sql_expr(REGEXP_REPLACE(!!string, !!pattern, "")) 277 | }, 278 | str_remove_all = function(string, pattern) { 279 | sql_expr(REGEXP_REPLACE(!!string, !!pattern, "", "g")) 280 | }, 281 | # str_to_title = function(string) { 282 | # sql_expr(INITCAP(!!string)) 283 | # }, 284 | str_to_sentence = function(string) { 285 | build_sql("(UPPER(", string, "[0]) || ", string, "[1:NULL])") 286 | }, 287 | # Respect OR (|) operator: https://github.com/tidyverse/stringr/pull/340 288 | str_starts = function(string, pattern) { 289 | build_sql("REGEXP_MATCHES(", string, ",'^(?:'||", pattern, "))") 290 | }, 291 | str_ends = function(string, pattern) { 292 | build_sql("REGEXP_MATCHES((?:", string, ",", pattern, "||')$')") 293 | }, 294 | # NOTE: GREATEST needed because DuckDB PAD-functions truncate the string if width < length of string 295 | str_pad = function(string, width, side = "left", pad = " ", use_length = FALSE) { 296 | if (side %in% c("left")) { 297 | sql_expr(LPAD(!!string, CAST(GREATEST(!!as.integer(width), LENGTH(!!string)) %AS% INTEGER), !!pad)) 298 | } else if (side %in% c("right")) { 299 | sql_expr(RPAD(!!string, CAST(GREATEST(!!as.integer(width), LENGTH(!!string)) %AS% INTEGER), !!pad)) 300 | } else if (side %in% c("both")) { 301 | sql_expr(RPAD(REPEAT(!!pad, (!!as.integer(width) - LENGTH(!!string)) / 2L) %||% !!string, CAST(GREATEST(!!as.integer(width), LENGTH(!!string)) %AS% INTEGER), !!pad)) 302 | } else { 303 | stop('Argument \'side\' should be "left", "right" or "both"', call. = FALSE) 304 | } 305 | } 306 | ), 307 | sql_translator( 308 | .parent = base_agg, 309 | cor = sql_aggregate_2("CORR"), 310 | cov = sql_aggregate_2("COVAR_SAMP"), 311 | sd = sql_aggregate("STDDEV", "sd"), 312 | var = sql_aggregate("VARIANCE", "var"), 313 | all = sql_aggregate("BOOL_AND", "all"), 314 | any = sql_aggregate("BOOL_OR", "any"), 315 | str_flatten = function(x, collapse) sql_expr(STRING_AGG(!!x, !!collapse)), 316 | first = sql_prefix("FIRST", 1), 317 | last = sql_prefix("LAST", 1) 318 | ), 319 | sql_translator( 320 | .parent = base_win, 321 | cor = win_aggregate_2("CORR"), 322 | cov = win_aggregate_2("COVAR_SAMP"), 323 | sd = win_aggregate("STDDEV"), 324 | var = win_aggregate("VARIANCE"), 325 | all = win_aggregate("BOOL_AND"), 326 | any = win_aggregate("BOOL_OR"), 327 | str_flatten = function(x, collapse) { 328 | win_over( 329 | sql_expr(STRING_AGG(!!x, !!collapse)), 330 | partition = win_current_group(), 331 | order = win_current_order() 332 | ) 333 | } 334 | ) 335 | ) 336 | } 337 | 338 | 339 | # Customized translation for comparing to objects in DuckDB SQL 340 | # @param con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()} 341 | # @param x First object to be compared 342 | # @param y Second object to be compared 343 | # @name sql_expr_matches 344 | sql_expr_matches.duckdb_connection <- function(con, x, y) { 345 | build_sql <- pkg_method("build_sql", "dbplyr") 346 | # https://duckdb.org/docs/sql/expressions/comparison_operators 347 | build_sql(x, " IS NOT DISTINCT FROM ", y, con = con) 348 | } 349 | 350 | # Customized escape translation for date objects 351 | # @param con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()} 352 | # @param x Date object to be escaped 353 | # @name sql_escape_date 354 | sql_escape_date.duckdb_connection <- function(con, x) { 355 | # https://github.com/tidyverse/dbplyr/issues/727 356 | dbQLit <- pkg_method("dbQuoteLiteral", "DBI") 357 | dbQLit(con, x) 358 | } 359 | 360 | # Customized escape translation for datetime objects 361 | # @param con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()} 362 | # @param x Datetime object to be escaped 363 | # @name sql_escape_datetime 364 | sql_escape_datetime.duckdb_connection <- function(con, x) { 365 | dbQLit <- pkg_method("dbQuoteLiteral", "DBI") 366 | dbQLit(con, x) 367 | } 368 | 369 | # Customized translation for fill function 370 | # @param .con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()} 371 | # @param .data Data frame 372 | # @param cols_to_fill Which colums to be altered 373 | # @param order_by_cols Defined order of variables 374 | # @param .direction Direction in which to fill missing values. 375 | # @name dbplyr_fill0 376 | dbplyr_fill0.duckdb_connection <- function(.con, .data, cols_to_fill, order_by_cols, .direction) { 377 | dbplyr_fill0 <- pkg_method("dbplyr_fill0.SQLiteConnection", "dbplyr") 378 | 379 | # Required because of the bug in dbplyr (con is not passed to "translate_sql(cumsum..." call) 380 | # See https://github.com/tidyverse/dbplyr/pull/753 381 | setcon <- pkg_method("set_current_con", "dbplyr") 382 | setcon(.con) 383 | 384 | dbplyr_fill0(.con, .data, cols_to_fill, order_by_cols, .direction) 385 | } 386 | 387 | # Customized handling for tbl() to allow the use of replacement scans 388 | # @param src .con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()} 389 | # @param from Table or parquet/csv -files to be registered 390 | # @param cache Enable object cache for parquet files 391 | tbl.duckdb_connection <- function(src, from, cache = FALSE, ...) { 392 | ident_q <- pkg_method("ident_q", "dbplyr") 393 | if (!DBI::dbExistsTable(src, from)) from <- ident_q(from) 394 | if (cache) DBI::dbExecute(src, "PRAGMA enable_object_cache") 395 | NextMethod("tbl") 396 | } 397 | 398 | # Needed to suppress the R CHECK notes (due to the use of sql_expr) 399 | globalVariables(c("REGEXP_MATCHES", "CAST", "%AS%", "INTEGER", "XOR", "%<<%", "%>>%", "LN", "LOG", "ROUND", "EXTRACT", "%FROM%", "MONTH", "STRFTIME", "QUARTER", "YEAR", "DATE_TRUNC", "DATE", "DOY", "TO_SECONDS", "BIGINT", "TO_MINUTES", "TO_HOURS", "TO_DAYS", "TO_WEEKS", "TO_MONTHS", "TO_YEARS", "STRPOS", "NOT", "REGEXP_REPLACE", "TRIM", "LPAD", "RPAD", "%||%", "REPEAT", "LENGTH", "STRING_AGG", "GREATEST", "LIST_EXTRACT", "LOG10", "LOG2", "STRING_SPLIT_REGEX", "FLOOR", "FMOD", "FDIV")) 400 | -------------------------------------------------------------------------------- /R/s3_register.R: -------------------------------------------------------------------------------- 1 | # nocov start 2 | 3 | # From: https://github.com/r-lib/rlang/blob/d5df93251d055721abb4a576433fb867ca40d527/R/compat-s3-register.R#L53-L122 4 | s3_register <- function(generic, class, method = NULL) { 5 | stopifnot(is.character(generic), length(generic) == 1) 6 | stopifnot(is.character(class), length(class) == 1) 7 | 8 | pieces <- strsplit(generic, "::")[[1]] 9 | stopifnot(length(pieces) == 2) 10 | package <- pieces[[1]] 11 | generic <- pieces[[2]] 12 | 13 | caller <- parent.frame() 14 | 15 | get_method_env <- function() { 16 | top <- topenv(caller) 17 | if (isNamespace(top)) { 18 | asNamespace(environmentName(top)) 19 | } else { 20 | caller 21 | } 22 | } 23 | get_method <- function(method) { 24 | if (is.null(method)) { 25 | get(paste0(generic, ".", class), envir = get_method_env()) 26 | } else { 27 | method 28 | } 29 | } 30 | 31 | register <- function(...) { 32 | envir <- asNamespace(package) 33 | 34 | # Refresh the method each time, it might have been updated by 35 | # `devtools::load_all()` 36 | method_fn <- get_method(method) 37 | stopifnot(is.function(method_fn)) 38 | 39 | 40 | # Only register if generic can be accessed 41 | if (exists(generic, envir)) { 42 | registerS3method(generic, class, method_fn, envir = envir) 43 | } else if (identical(Sys.getenv("NOT_CRAN"), "true")) { 44 | warning(sprintf( 45 | "Can't find generic `%s` in package %s to register S3 method.", 46 | generic, 47 | package 48 | )) 49 | } 50 | } 51 | 52 | # Always register hook in case package is later unloaded & reloaded 53 | setHook(packageEvent(package, "onLoad"), function(...) { 54 | register() 55 | }) 56 | 57 | # Avoid registration failures during loading (pkgload or regular) 58 | if (isNamespaceLoaded(package) && environmentIsLocked(asNamespace(package))) { 59 | register() 60 | } 61 | 62 | invisible() 63 | } 64 | 65 | # From: https://github.com/DyfanJones/noctua/blob/b82113098df6b3a7981cf8ca0c1ae9f2ff408756/R/utils.R#L168-L175 66 | # get parent pkg function and method 67 | pkg_method <- function(fun, pkg) { 68 | if (!requireNamespace(pkg, quietly = TRUE)) { 69 | stop(fun, " requires the ", pkg, " package, please install it first and try again", 70 | call. = FALSE 71 | ) 72 | } 73 | fun_name <- utils::getFromNamespace(fun, pkg) 74 | return(fun_name) 75 | } 76 | 77 | # nocov end 78 | -------------------------------------------------------------------------------- /R/tidyduck.R: -------------------------------------------------------------------------------- 1 | #' DuckDB high-level bindings for R 2 | #' 3 | #' R high-level bindings for DuckDB: an embeddable SQL OLAP Database Management System. 4 | #' 5 | #' @seealso 6 | #' [duckdb::duckdb()] for connection instructions. 7 | #' 8 | #' for the project website. 9 | #' 10 | ## usethis namespace: start 11 | #' @import dplyr 12 | #' @rawNamespace import(dbplyr, except = c(sql, ident)) 13 | ## usethis namespace: end 14 | #' @docType package 15 | #' @name tidyduck-package 16 | NULL 17 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onLoad <- function(...) { 2 | s3_register("dbplyr::dbplyr_edition", "duckdb_connection") 3 | s3_register("dbplyr::db_connection_describe", "duckdb_connection") 4 | s3_register("dbplyr::sql_translation", "duckdb_connection") 5 | s3_register("dbplyr::dbplyr_fill0", "duckdb_connection") 6 | s3_register("dbplyr::sql_expr_matches", "duckdb_connection") 7 | s3_register("dbplyr::sql_escape_date", "duckdb_connection") 8 | s3_register("dbplyr::sql_escape_datetime", "duckdb_connection") 9 | s3_register("dplyr::tbl", "duckdb_connection") 10 | 11 | invisible() 12 | } 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # duckdb R package 4 | 5 | ## Installation from CRAN 6 | 7 | ```r 8 | install.packages("duckdb") 9 | ``` 10 | 11 | ## Building 12 | 13 | The default build compiles a release version from an amalgamation. 14 | 15 | ```sh 16 | cd tools/rpkg 17 | R CMD INSTALL . 18 | ``` 19 | 20 | Optional extensions can be enabled by passing them (comma-separated, if there is more than one) to the environment variable `DUCKDB_R_EXTENSIONS`: 21 | 22 | ```sh 23 | DUCKDB_R_EXTENSIONS=tpch R CMD INSTALL . 24 | ``` 25 | 26 | ## Development 27 | 28 | For development, setting the `DUCKDB_R_DEBUG` environment variable enables incremental debugging builds for the R package. 29 | 30 | ```sh 31 | cd tools/rpkg 32 | DUCKDB_R_DEBUG=1 R CMD INSTALL . 33 | ``` 34 | 35 | This also works for devtools: 36 | 37 | ```r 38 | Sys.setenv(DUCKDB_R_DEBUG = "1") 39 | pkgload::load_all() 40 | ``` 41 | 42 | Add the following to your `.Renviron` to make this the default: 43 | 44 | ``` 45 | DUCKDB_R_DEBUG=1 46 | ``` 47 | 48 | If you do this, remember to use `--vanilla` for building release builds. 49 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | reference: 2 | - title: Driver 3 | contents: 4 | - duckdb_driver 5 | - title: Connection 6 | contents: 7 | - duckdb_connection 8 | - title: Result 9 | contents: 10 | - duckdb_result 11 | - title: duckdb Package 12 | contents: 13 | - duckdb-package 14 | -------------------------------------------------------------------------------- /dependencies.R: -------------------------------------------------------------------------------- 1 | local({ 2 | pkg <- c("DBI", "callr", "DBItest", "dbplyr", "nycflights13", "testthat", "bit64", "cpp11", "arrow", "covr", "pkgbuild", "remotes", "bit64") 3 | 4 | if (.Platform$OS.type == "unix") { 5 | options(HTTPUserAgent = sprintf("R/4.1.0 R (4.1.0 %s)", paste(R.version$platform, R.version$arch, R.version$os))) 6 | install.packages(pkg, repos = "https://packagemanager.rstudio.com/all/__linux__/focal/latest") 7 | # https://github.com/r-lib/covr/pull/499 8 | remotes::install_github("r-lib/covr") 9 | } else { 10 | install.packages(pkg, repos = "https://cloud.r-project.org", pkgType = "binary") 11 | } 12 | }) 13 | -------------------------------------------------------------------------------- /man/backend-duckdb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dbplyr.R 3 | \name{backend-duckdb} 4 | \alias{simulate_duckdb} 5 | \alias{translate_duckdb} 6 | \title{DuckDB SQL backend for dbplyr} 7 | \usage{ 8 | simulate_duckdb(...) 9 | 10 | translate_duckdb(...) 11 | } 12 | \arguments{ 13 | \item{...}{Any parameters to be forwarded} 14 | } 15 | \description{ 16 | This is a SQL backend for dbplyr tailored to take into account DuckDB's 17 | possibilities. This mainly follows the backend for PostgreSQL, but 18 | contains more mapped functions. 19 | } 20 | \examples{ 21 | library(dplyr, warn.conflicts = FALSE) 22 | con <- DBI::dbConnect(duckdb::duckdb(), path = ":memory:") 23 | 24 | dbiris <- copy_to(con, iris, overwrite = TRUE) 25 | 26 | dbiris \%>\% select(Petal.Length, Petal.Width) \%>\% filter(Petal.Length > 1.5) \%>\% head(5) 27 | 28 | DBI::dbDisconnect(con, shutdown = TRUE) 29 | } 30 | -------------------------------------------------------------------------------- /man/tidyduck-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tidyduck.R 3 | \docType{package} 4 | \name{tidyduck-package} 5 | \alias{tidyduck-package} 6 | \title{DuckDB high-level bindings for R} 7 | \description{ 8 | R high-level bindings for DuckDB: an embeddable SQL OLAP Database Management System. 9 | } 10 | \seealso{ 11 | \code{\link[duckdb:duckdb]{duckdb::duckdb()}} for connection instructions. 12 | 13 | \url{https://duckdb.org/} for the project website. 14 | } 15 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library("testthat") 2 | library("DBI") 3 | 4 | test_check("duckdb") 5 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/dbplyr.md: -------------------------------------------------------------------------------- 1 | # snapshots of dbplyr generic scalar translation 2 | 3 | Code 4 | translate(as.character(1)) 5 | Output 6 | CAST(1.0 AS TEXT) 7 | Code 8 | translate(as.character(1L)) 9 | Output 10 | CAST(1 AS TEXT) 11 | Code 12 | translate(as.numeric(1)) 13 | Output 14 | CAST(1.0 AS NUMERIC) 15 | Code 16 | translate(as.double(1.2)) 17 | Output 18 | CAST(1.2 AS NUMERIC) 19 | Code 20 | translate(as.integer(1.2)) 21 | Output 22 | CAST(1.2 AS INTEGER) 23 | Code 24 | translate(as.integer64(1.2)) 25 | Output 26 | CAST(1.2 AS BIGINT) 27 | Code 28 | translate(as.logical("TRUE")) 29 | Output 30 | CAST('TRUE' AS BOOLEAN) 31 | Code 32 | translate(tolower("HELLO")) 33 | Output 34 | LOWER('HELLO') 35 | Code 36 | translate(toupper("hello")) 37 | Output 38 | UPPER('hello') 39 | Code 40 | translate(pmax(1, 2, na.rm = TRUE)) 41 | Output 42 | GREATEST(1.0, 2.0) 43 | Code 44 | translate(pmin(1, 2, na.rm = TRUE)) 45 | Output 46 | LEAST(1.0, 2.0) 47 | Code 48 | translate(as.character("2020-01-01")) 49 | Output 50 | CAST('2020-01-01' AS TEXT) 51 | Code 52 | translate(c("2020-01-01", "2020-13-02")) 53 | Output 54 | ('2020-01-01', '2020-13-02') 55 | Code 56 | translate(iris[["sepal_length"]]) 57 | Output 58 | iris.sepal_length 59 | Code 60 | translate(iris[[1]]) 61 | Output 62 | iris[1] 63 | Code 64 | translate(cot(x)) 65 | Output 66 | COT(x) 67 | Code 68 | translate(substr("test", 2, 3)) 69 | Output 70 | SUBSTR('test', 2, 2) 71 | 72 | # snapshots of duckdb custom scalars translations 73 | 74 | Code 75 | translate(as.raw(10)) 76 | Output 77 | CAST(10.0 AS VARBINARY) 78 | Code 79 | translate(13 %% 5) 80 | Output 81 | FMOD(13.0, 5.0) 82 | Code 83 | translate(35.8 %/% 4) 84 | Output 85 | FDIV(35.8, 4.0) 86 | Code 87 | translate(35.8^2.51) 88 | Output 89 | POW(35.8, 2.51) 90 | Code 91 | translate(bitwOr(x, 128L)) 92 | Output 93 | (CAST(x AS INTEGER)) | (CAST(128 AS INTEGER)) 94 | Code 95 | translate(bitwAnd(x, 128)) 96 | Output 97 | (CAST(x AS INTEGER)) & (CAST(128.0 AS INTEGER)) 98 | Code 99 | translate(bitwXor(x, 128L)) 100 | Output 101 | XOR((CAST(x AS INTEGER)), (CAST(128 AS INTEGER))) 102 | Code 103 | translate(bitwNot(x)) 104 | Output 105 | ~(CAST(x AS INTEGER)) 106 | Code 107 | translate(bitwShiftL(x, 5L)) 108 | Output 109 | (CAST(x AS INTEGER)) << (CAST(5 AS INTEGER)) 110 | Code 111 | translate(bitwShiftR(x, 4L)) 112 | Output 113 | (CAST(x AS INTEGER)) >> (CAST(4 AS INTEGER)) 114 | Code 115 | translate(log(x)) 116 | Output 117 | LN(x) 118 | Code 119 | translate(log(x, base = 5)) 120 | Output 121 | LOG(x) / LOG(5.0) 122 | Code 123 | translate(log(x, base = 10)) 124 | Output 125 | LOG10(x) 126 | Code 127 | translate(log(x, base = 2)) 128 | Output 129 | LOG2(x) 130 | Code 131 | translate(log10(x)) 132 | Output 133 | LOG10(x) 134 | Code 135 | translate(log2(x)) 136 | Output 137 | LOG2(x) 138 | Code 139 | translate(is.nan(var1)) 140 | Output 141 | (var1 IS NOT NULL AND PRINTF('%f', var1) = 'nan') 142 | Code 143 | translate(is.infinite(var1)) 144 | Output 145 | (var1 IS NOT NULL AND REGEXP_MATCHES(PRINTF('%f', var1), 'inf')) 146 | Code 147 | translate(is.finite(var1)) 148 | Output 149 | (NOT (var1 IS NULL OR REGEXP_MATCHES(PRINTF('%f', var1), 'inf|nan'))) 150 | Code 151 | translate(grepl("pattern", text)) 152 | Output 153 | REGEXP_MATCHES("text", 'pattern') 154 | Code 155 | translate(grepl("pattern", text, ignore.case = TRUE)) 156 | Output 157 | REGEXP_MATCHES("text", '(?i)pattern') 158 | Code 159 | translate(regexpr("pattern", text)) 160 | Output 161 | (CASE WHEN REGEXP_MATCHES("text", 'pattern') THEN (LENGTH(LIST_EXTRACT(STRING_SPLIT_REGEX("text", 'pattern'), 0))+1) ELSE -1 END) 162 | Code 163 | translate(round(x, digits = 1.1)) 164 | Output 165 | ROUND(x, CAST(ROUND(1.1, 0) AS INTEGER)) 166 | Code 167 | translate(as.Date("2019-01-01")) 168 | Output 169 | CAST('2019-01-01' AS DATE) 170 | Code 171 | translate(as.POSIXct("2019-01-01 01:01:01")) 172 | Output 173 | CAST('2019-01-01 01:01:01' AS TIMESTAMP) 174 | 175 | # snapshot tests for pasting translate 176 | 177 | Code 178 | translate(paste("hi", "bye")) 179 | Output 180 | CONCAT_WS(' ', 'hi', 'bye') 181 | Code 182 | translate(paste("hi", "bye", sep = "-")) 183 | Output 184 | CONCAT_WS('-', 'hi', 'bye') 185 | Code 186 | translate(paste0("hi", "bye")) 187 | Output 188 | CONCAT_WS('', 'hi', 'bye') 189 | Code 190 | translate(paste(x, y), window = FALSE) 191 | Output 192 | CONCAT_WS(' ', x, y) 193 | Code 194 | translate(paste0(x, y), window = FALSE) 195 | Output 196 | CONCAT_WS('', x, y) 197 | 198 | # snapshots for custom lubridate functions translated correctly 199 | 200 | Code 201 | translate(yday(x)) 202 | Output 203 | EXTRACT(DOY FROM x) 204 | Code 205 | translate(quarter(x)) 206 | Output 207 | EXTRACT(QUARTER FROM x) 208 | Code 209 | translate(quarter(x)) 210 | Output 211 | EXTRACT(QUARTER FROM x) 212 | Code 213 | translate(quarter(x, type = "year.quarter")) 214 | Output 215 | (EXTRACT(YEAR FROM x) || '.' || EXTRACT(QUARTER FROM x)) 216 | Code 217 | translate(quarter(x, type = "quarter")) 218 | Output 219 | EXTRACT(QUARTER FROM x) 220 | Code 221 | translate(quarter(x, type = TRUE)) 222 | Output 223 | (EXTRACT(YEAR FROM x) || '.' || EXTRACT(QUARTER FROM x)) 224 | Code 225 | translate(quarter(x, type = FALSE)) 226 | Output 227 | EXTRACT(QUARTER FROM x) 228 | Code 229 | translate(quarter(x, type = "date_first")) 230 | Output 231 | (CAST(DATE_TRUNC('QUARTER', x) AS DATE)) 232 | Code 233 | translate(quarter(x, type = "date_last")) 234 | Output 235 | (CAST((DATE_TRUNC('QUARTER', x) + INTERVAL '1 QUARTER' - INTERVAL '1 DAY') AS DATE)) 236 | Code 237 | translate(month(x, label = FALSE)) 238 | Output 239 | EXTRACT(MONTH FROM x) 240 | Code 241 | translate(month(x, label = TRUE)) 242 | Output 243 | STRFTIME(x, '%b') 244 | Code 245 | translate(month(x, label = TRUE, abbr = FALSE)) 246 | Output 247 | STRFTIME(x, '%B') 248 | Code 249 | translate(qday(x)) 250 | Output 251 | DATE_DIFF('DAYS', DATE_TRUNC('QUARTER', CAST((x) AS DATE)), (CAST((x) AS DATE) + INTERVAL '1 DAY')) 252 | Code 253 | translate(wday(x)) 254 | Output 255 | EXTRACT('dow' FROM CAST(x AS DATE) + 0) + 1 256 | Code 257 | translate(wday(x, week_start = 4)) 258 | Output 259 | EXTRACT('dow' FROM CAST(x AS DATE) + 3) + 1 260 | Code 261 | translate(wday(x, label = TRUE)) 262 | Output 263 | STRFTIME(x, '%a') 264 | Code 265 | translate(wday(x, label = TRUE, abbr = FALSE)) 266 | Output 267 | STRFTIME(x, '%A') 268 | Code 269 | translate(seconds(x)) 270 | Output 271 | TO_SECONDS(CAST(x AS BIGINT)) 272 | Code 273 | translate(minutes(x)) 274 | Output 275 | TO_MINUTES(CAST(x AS BIGINT)) 276 | Code 277 | translate(hours(x)) 278 | Output 279 | TO_HOURS(CAST(x AS BIGINT)) 280 | Code 281 | translate(days(x)) 282 | Output 283 | TO_DAYS(CAST(x AS INTEGER)) 284 | Code 285 | translate(weeks(x)) 286 | Output 287 | TO_DAYS(7 * CAST(x AS INTEGER)) 288 | Code 289 | translate(months(x)) 290 | Output 291 | TO_MONTHS(CAST(x AS INTEGER)) 292 | Code 293 | translate(years(x)) 294 | Output 295 | TO_YEARS(CAST(x AS INTEGER)) 296 | Code 297 | translate(floor_date(x, "month")) 298 | Output 299 | DATE_TRUNC('month', x) 300 | Code 301 | translate(floor_date(x, "week")) 302 | Output 303 | CAST(x AS DATE) - CAST(EXTRACT('dow' FROM CAST(x AS DATE) + 0) AS INTEGER) 304 | Code 305 | translate(floor_date(x, "week", week_start = 1)) 306 | Output 307 | DATE_TRUNC('week', x) 308 | Code 309 | translate(floor_date(x, "week", week_start = 4)) 310 | Output 311 | CAST(x AS DATE) - CAST(EXTRACT('dow' FROM CAST(x AS DATE) + 3) AS INTEGER) 312 | 313 | # snapshots for custom stringr functions translated correctly 314 | 315 | Code 316 | translate(str_c(x, y)) 317 | Output 318 | CONCAT_WS('', x, y) 319 | Code 320 | translate(str_detect(x, y)) 321 | Output 322 | REGEXP_MATCHES(x, y) 323 | Code 324 | translate(str_detect(x, y, negate = TRUE)) 325 | Output 326 | (NOT(REGEXP_MATCHES(x, y))) 327 | Code 328 | translate(str_replace(x, y, z)) 329 | Output 330 | REGEXP_REPLACE(x, y, z) 331 | Code 332 | translate(str_replace_all(x, y, z)) 333 | Output 334 | REGEXP_REPLACE(x, y, z, 'g') 335 | Code 336 | translate(str_squish(x)) 337 | Output 338 | TRIM(REGEXP_REPLACE(x, '\s+', ' ', 'g')) 339 | Code 340 | translate(str_remove(x, y)) 341 | Output 342 | REGEXP_REPLACE(x, y, '') 343 | Code 344 | translate(str_remove_all(x, y)) 345 | Output 346 | REGEXP_REPLACE(x, y, '', 'g') 347 | Code 348 | translate(str_to_sentence(x)) 349 | Output 350 | (UPPER(x[0]) || x[1:NULL]) 351 | Code 352 | translate(str_starts(x, y)) 353 | Output 354 | REGEXP_MATCHES(x,'^(?:'||y)) 355 | Code 356 | translate(str_ends(x, y)) 357 | Output 358 | REGEXP_MATCHES((?:x,y||')$') 359 | Code 360 | translate(str_pad(x, width = 10)) 361 | Output 362 | LPAD(x, CAST(GREATEST(10, LENGTH(x)) AS INTEGER), ' ') 363 | Code 364 | translate(str_pad(x, width = 10, side = "right")) 365 | Output 366 | RPAD(x, CAST(GREATEST(10, LENGTH(x)) AS INTEGER), ' ') 367 | Code 368 | translate(str_pad(x, width = 10, side = "both", pad = "<")) 369 | Output 370 | RPAD(REPEAT('<', (10 - LENGTH(x)) / 2) || x, CAST(GREATEST(10, LENGTH(x)) AS INTEGER), '<') 371 | 372 | # snapshots datetime escaping working as in DBI 373 | 374 | Code 375 | test_date <- as.Date("2020-01-01") 376 | escape(test_date, con = con) 377 | Output 378 | '2020-01-01'::date 379 | Code 380 | escape("2020-01-01", con = con) 381 | Output 382 | '2020-01-01' 383 | Code 384 | test_datetime <- as.POSIXct("2020-01-01 01:23:45 UTC", tz = "UTC") 385 | escape(test_datetime, con = con) 386 | Output 387 | '2020-01-01 01:23:45'::timestamp 388 | Code 389 | escape("2020-01-01 01:23:45 UTC", con = con) 390 | Output 391 | '2020-01-01 01:23:45 UTC' 392 | Code 393 | test_datetime_tz <- as.POSIXct("2020-01-01 18:23:45 UTC", tz = "America/Los_Angeles") 394 | escape(test_datetime_tz, con = con) 395 | Output 396 | '2020-01-02 02:23:45'::timestamp 397 | Code 398 | escape("2020-01-01 18:23:45 PST", con = con) 399 | Output 400 | '2020-01-01 18:23:45 PST' 401 | 402 | # two variable aggregates are translated correctly 403 | 404 | Code 405 | translate(cor(x, y), window = FALSE) 406 | Output 407 | CORR(x, y) 408 | Code 409 | translate(cor(x, y), window = TRUE) 410 | Output 411 | CORR(x, y) OVER () 412 | 413 | # these should give errors 414 | 415 | Code 416 | translate(grepl("dummy", txt, perl = TRUE)) 417 | Error 418 | Parameters `perl`, `fixed` and `useBytes` in grepl are not currently supported in DuckDB backend 419 | Code 420 | translate(quarter(x, type = "other")) 421 | Error 422 | Unsupported type other 423 | Code 424 | translate(quarter(x, fiscal_start = 2)) 425 | Error 426 | `fiscal_start` is not yet supported in DuckDB translation. Must be 1. 427 | Code 428 | translate(str_pad(x, width = 10, side = "other")) 429 | Error 430 | Argument 'side' should be "left", "right" or "both" 431 | 432 | -------------------------------------------------------------------------------- /tests/testthat/data/binary_string.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krlmlr/tidyduck/1d8c2972412c5706519d87f906c92823505da9f3/tests/testthat/data/binary_string.parquet -------------------------------------------------------------------------------- /tests/testthat/data/userdata1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krlmlr/tidyduck/1d8c2972412c5706519d87f906c92823505da9f3/tests/testthat/data/userdata1.parquet -------------------------------------------------------------------------------- /tests/testthat/test_dbplyr.R: -------------------------------------------------------------------------------- 1 | skip_if_no_R4 <- function() { 2 | if (R.Version()$major < 4) { 3 | skip("R 4.0.0 or newer not available for testing") 4 | } 5 | } 6 | 7 | test_that("dbplyr generic scalars translated correctly", { 8 | skip_if_no_R4() 9 | skip_if_not_installed("dbplyr") 10 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb()) 11 | sql <- function(...) dbplyr::sql(...) 12 | 13 | expect_equal(translate(as.character(1)), sql(r"{CAST(1.0 AS TEXT)}")) 14 | expect_equal(translate(as.character(1L)), sql(r"{CAST(1 AS TEXT)}")) 15 | expect_equal(translate(as.numeric(1)), sql(r"{CAST(1.0 AS NUMERIC)}")) 16 | expect_equal(translate(as.double(1.2)), sql(r"{CAST(1.2 AS NUMERIC)}")) 17 | expect_equal(translate(as.integer(1.2)), sql(r"{CAST(1.2 AS INTEGER)}")) 18 | expect_equal(translate(as.integer64(1.2)), sql(r"{CAST(1.2 AS BIGINT)}")) 19 | expect_equal(translate(as.logical("TRUE")), sql(r"{CAST('TRUE' AS BOOLEAN)}")) 20 | expect_equal(translate(tolower("HELLO")), sql(r"{LOWER('HELLO')}")) 21 | expect_equal(translate(toupper("hello")), sql(r"{UPPER('hello')}")) 22 | expect_equal(translate(pmax(1, 2, na.rm = TRUE)), sql(r"{GREATEST(1.0, 2.0)}")) 23 | expect_equal(translate(pmin(1, 2, na.rm = TRUE)), sql(r"{LEAST(1.0, 2.0)}")) 24 | expect_equal(translate(as.character("2020-01-01")), sql(r"{CAST('2020-01-01' AS TEXT)}")) 25 | expect_equal(translate(c("2020-01-01", "2020-13-02")), sql(r"{('2020-01-01', '2020-13-02')}")) 26 | expect_equal(translate(iris[["sepal_length"]]), sql(r"{iris.sepal_length}")) 27 | expect_equal(translate(iris[[1]]), sql(r"{iris[1]}")) 28 | expect_equal(translate(cot(x)), sql(r"{COT(x)}")) 29 | expect_equal(translate(substr("test", 2, 3)), sql(r"{SUBSTR('test', 2, 2)}")) 30 | }) 31 | 32 | test_that("duckdb custom scalars translated correctly", { 33 | skip_if_no_R4() 34 | skip_if_not_installed("dbplyr") 35 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb()) 36 | sql <- function(...) dbplyr::sql(...) 37 | 38 | # expect_equal(translate(as(1,"CHARACTER")), sql(r"{CAST(1.0 AS TEXT}")) # Not implemented 39 | expect_equal(translate(as.raw(10)), sql(r"{CAST(10.0 AS VARBINARY)}")) 40 | expect_equal(translate(13 %% 5), sql(r"{FMOD(13.0, 5.0)}")) 41 | expect_equal(translate(35.8 %/% 4), sql(r"{FDIV(35.8, 4.0)}")) 42 | expect_equal(translate(35.8^2.51), sql(r"{POW(35.8, 2.51)}")) 43 | expect_equal(translate(bitwOr(x, 128L)), sql(r"{(CAST(x AS INTEGER)) | (CAST(128 AS INTEGER))}")) 44 | expect_equal(translate(bitwAnd(x, 128)), sql(r"{(CAST(x AS INTEGER)) & (CAST(128.0 AS INTEGER))}")) 45 | expect_equal(translate(bitwXor(x, 128L)), sql(r"{XOR((CAST(x AS INTEGER)), (CAST(128 AS INTEGER)))}")) 46 | expect_equal(translate(bitwNot(x)), sql(r"{~(CAST(x AS INTEGER))}")) 47 | expect_equal(translate(bitwShiftL(x, 5L)), sql(r"{(CAST(x AS INTEGER)) << (CAST(5 AS INTEGER))}")) 48 | expect_equal(translate(bitwShiftR(x, 4L)), sql(r"{(CAST(x AS INTEGER)) >> (CAST(4 AS INTEGER))}")) 49 | expect_equal(translate(log(x)), sql(r"{LN(x)}")) 50 | expect_equal(translate(log(x, base = 5)), sql(r"{LOG(x) / LOG(5.0)}")) 51 | expect_equal(translate(log(x, base = 10)), sql(r"{LOG10(x)}")) 52 | expect_equal(translate(log(x, base = 2)), sql(r"{LOG2(x)}")) 53 | expect_equal(translate(log10(x)), sql(r"{LOG10(x)}")) 54 | expect_equal(translate(log2(x)), sql(r"{LOG2(x)}")) 55 | expect_equal(translate(is.nan(var1)), sql(r"{(var1 IS NOT NULL AND PRINTF('%f', var1) = 'nan')}")) 56 | expect_equal(translate(is.infinite(var1)), sql(r"{(var1 IS NOT NULL AND REGEXP_MATCHES(PRINTF('%f', var1), 'inf'))}")) 57 | expect_equal(translate(is.finite(var1)), sql(r"{(NOT (var1 IS NULL OR REGEXP_MATCHES(PRINTF('%f', var1), 'inf|nan')))}")) 58 | expect_equal(translate(grepl("pattern", text)), sql(r"{REGEXP_MATCHES("text", 'pattern')}")) 59 | expect_equal(translate(grepl("pattern", text, ignore.case = TRUE)), sql(r"{REGEXP_MATCHES("text", '(?i)pattern')}")) 60 | expect_error(translate(grepl("dummy", txt, perl = TRUE))) 61 | expect_equal(translate(regexpr("pattern", text)), sql(r"{(CASE WHEN REGEXP_MATCHES("text", 'pattern') THEN (LENGTH(LIST_EXTRACT(STRING_SPLIT_REGEX("text", 'pattern'), 0))+1) ELSE -1 END)}")) 62 | expect_equal(translate(round(x, digits = 1.1)), sql(r"{ROUND(x, CAST(ROUND(1.1, 0) AS INTEGER))}")) 63 | expect_equal(translate(as.Date("2019-01-01")), sql(r"{CAST('2019-01-01' AS DATE)}")) 64 | expect_equal(translate(as.POSIXct("2019-01-01 01:01:01")), sql(r"{CAST('2019-01-01 01:01:01' AS TIMESTAMP)}")) 65 | }) 66 | 67 | 68 | 69 | test_that("pasting translated correctly", { 70 | skip_if_no_R4() 71 | skip_if_not_installed("dbplyr") 72 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb()) 73 | sql <- function(...) dbplyr::sql(...) 74 | 75 | expect_equal(translate(paste("hi", "bye")), sql(r"{CONCAT_WS(' ', 'hi', 'bye')}")) 76 | expect_equal(translate(paste("hi", "bye", sep = "-")), sql(r"{CONCAT_WS('-', 'hi', 'bye')}")) 77 | expect_equal(translate(paste0("hi", "bye")), sql(r"{CONCAT_WS('', 'hi', 'bye')}")) 78 | 79 | expect_equal(translate(paste(x, y), window = FALSE), sql(r"{CONCAT_WS(' ', x, y)}")) 80 | expect_equal(translate(paste0(x, y), window = FALSE), sql(r"{CONCAT_WS('', x, y)}")) 81 | 82 | # expect_error(translate(paste0(x, collapse = ""), window = FALSE), "`collapse` not supported") 83 | }) 84 | 85 | 86 | # lubridate functions 87 | 88 | test_that("custom lubridate functions translated correctly", { 89 | skip_if_no_R4() 90 | skip_if_not_installed("dbplyr") 91 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb()) 92 | sql <- function(...) dbplyr::sql(...) 93 | 94 | expect_equal(translate(yday(x)), sql(r"{EXTRACT(DOY FROM x)}")) 95 | expect_equal(translate(quarter(x)), sql(r"{EXTRACT(QUARTER FROM x)}")) 96 | expect_equal(translate(quarter(x, with_year = TRUE)), sql(r"{(EXTRACT(YEAR FROM x) || '.' || EXTRACT(QUARTER FROM x))}")) 97 | expect_equal(translate(quarter(x, type = "year.quarter")), sql(r"{(EXTRACT(YEAR FROM x) || '.' || EXTRACT(QUARTER FROM x))}")) 98 | expect_equal(translate(quarter(x, type = "quarter")), sql(r"{EXTRACT(QUARTER FROM x)}")) 99 | expect_equal(translate(quarter(x, type = TRUE)), sql(r"{(EXTRACT(YEAR FROM x) || '.' || EXTRACT(QUARTER FROM x))}")) 100 | expect_equal(translate(quarter(x, type = FALSE)), sql(r"{EXTRACT(QUARTER FROM x)}")) 101 | expect_equal(translate(quarter(x, type = "date_first")), sql(r"{(CAST(DATE_TRUNC('QUARTER', x) AS DATE))}")) 102 | expect_equal(translate(quarter(x, type = "date_last")), sql(r"{(CAST((DATE_TRUNC('QUARTER', x) + INTERVAL '1 QUARTER' - INTERVAL '1 DAY') AS DATE))}")) 103 | expect_error(translate(quarter(x, type = "other"))) 104 | expect_error(translate(quarter(x, fiscal_start = 2))) 105 | expect_equal(translate(month(x, label = FALSE)), sql(r"{EXTRACT(MONTH FROM x)}")) 106 | expect_equal(translate(month(x, label = TRUE)), sql(r"{STRFTIME(x, '%b')}")) 107 | expect_equal(translate(month(x, label = TRUE, abbr = FALSE)), sql(r"{STRFTIME(x, '%B')}")) 108 | expect_equal(translate(qday(x)), sql(r"{DATE_DIFF('DAYS', DATE_TRUNC('QUARTER', CAST((x) AS DATE)), (CAST((x) AS DATE) + INTERVAL '1 DAY'))}")) 109 | expect_equal(translate(wday(x)), sql(r"{EXTRACT('dow' FROM CAST(x AS DATE) + 0) + 1}")) 110 | expect_equal(translate(wday(x, week_start = 4)), sql(r"{EXTRACT('dow' FROM CAST(x AS DATE) + 3) + 1}")) 111 | expect_equal(translate(wday(x, label = TRUE)), sql(r"{STRFTIME(x, '%a')}")) 112 | expect_equal(translate(wday(x, label = TRUE, abbr = FALSE)), sql(r"{STRFTIME(x, '%A')}")) 113 | expect_equal(translate(seconds(x)), sql(r"{TO_SECONDS(CAST(x AS BIGINT))}")) 114 | expect_equal(translate(minutes(x)), sql(r"{TO_MINUTES(CAST(x AS BIGINT))}")) 115 | expect_equal(translate(hours(x)), sql(r"{TO_HOURS(CAST(x AS BIGINT))}")) 116 | expect_equal(translate(days(x)), sql(r"{TO_DAYS(CAST(x AS INTEGER))}")) 117 | expect_equal(translate(weeks(x)), sql(r"{TO_DAYS(7 * CAST(x AS INTEGER))}")) 118 | expect_equal(translate(months(x)), sql(r"{TO_MONTHS(CAST(x AS INTEGER))}")) 119 | expect_equal(translate(years(x)), sql(r"{TO_YEARS(CAST(x AS INTEGER))}")) 120 | expect_equal(translate(floor_date(x, "month")), sql(r"{DATE_TRUNC('month', x)}")) 121 | expect_equal(translate(floor_date(x, "week")), sql(r"{CAST(x AS DATE) - CAST(EXTRACT('dow' FROM CAST(x AS DATE) + 0) AS INTEGER)}")) 122 | expect_equal(translate(floor_date(x, "week", week_start = 1)), sql(r"{DATE_TRUNC('week', x)}")) 123 | expect_equal(translate(floor_date(x, "week", week_start = 4)), sql(r"{CAST(x AS DATE) - CAST(EXTRACT('dow' FROM CAST(x AS DATE) + 3) AS INTEGER)}")) 124 | }) 125 | 126 | # stringr functions 127 | 128 | test_that("custom stringr functions translated correctly", { 129 | skip_if_no_R4() 130 | skip_if_not_installed("dbplyr") 131 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb()) 132 | sql <- function(...) dbplyr::sql(...) 133 | 134 | expect_equal(translate(str_c(x, y)), sql(r"{CONCAT_WS('', x, y)}")) 135 | # expect_error(translate(str_c(x, collapse = "")), "`collapse` not supported") 136 | expect_equal(translate(str_detect(x, y)), sql(r"{REGEXP_MATCHES(x, y)}")) 137 | expect_equal(translate(str_detect(x, y, negate = TRUE)), sql(r"{(NOT(REGEXP_MATCHES(x, y)))}")) 138 | expect_equal(translate(str_replace(x, y, z)), sql(r"{REGEXP_REPLACE(x, y, z)}")) 139 | expect_equal(translate(str_replace_all(x, y, z)), sql(r"{REGEXP_REPLACE(x, y, z, 'g')}")) 140 | expect_equal(translate(str_squish(x)), sql(r"{TRIM(REGEXP_REPLACE(x, '\s+', ' ', 'g'))}")) 141 | expect_equal(translate(str_remove(x, y)), sql(r"{REGEXP_REPLACE(x, y, '')}")) 142 | expect_equal(translate(str_remove_all(x, y)), sql(r"{REGEXP_REPLACE(x, y, '', 'g')}")) 143 | expect_equal(translate(str_to_sentence(x)), sql(r"{(UPPER(x[0]) || x[1:NULL])}")) 144 | expect_equal(translate(str_starts(x, y)), sql(r"{REGEXP_MATCHES(x,'^(?:'||y))}")) 145 | expect_equal(translate(str_ends(x, y)), sql(r"{REGEXP_MATCHES((?:x,y||')$')}")) 146 | expect_equal(translate(str_pad(x, width = 10)), sql(r"{LPAD(x, CAST(GREATEST(10, LENGTH(x)) AS INTEGER), ' ')}")) 147 | expect_equal(translate(str_pad(x, width = 10, side = "right")), sql(r"{RPAD(x, CAST(GREATEST(10, LENGTH(x)) AS INTEGER), ' ')}")) 148 | expect_equal(translate(str_pad(x, width = 10, side = "both", pad = "<")), sql(r"{RPAD(REPEAT('<', (10 - LENGTH(x)) / 2) || x, CAST(GREATEST(10, LENGTH(x)) AS INTEGER), '<')}")) 149 | expect_error(translate(str_pad(x, width = 10, side = "other"))) 150 | }) 151 | 152 | test_that("datetime escaping working as in DBI", { 153 | skip_if_no_R4() 154 | skip_if_not_installed("dbplyr") 155 | con <- duckdb::translate_duckdb() 156 | escape <- function(...) dbplyr::escape(...) 157 | sql <- function(...) dbplyr::sql(...) 158 | 159 | test_date <- as.Date("2020-01-01") 160 | expect_equal(escape(test_date, con = con), sql(r"{'2020-01-01'::date}")) 161 | expect_equal(escape("2020-01-01", con = con), sql(r"{'2020-01-01'}")) 162 | 163 | test_datetime <- as.POSIXct("2020-01-01 01:23:45 UTC", tz = "UTC") 164 | expect_equal(escape(test_datetime, con = con), sql(r"{'2020-01-01 01:23:45'::timestamp}")) 165 | expect_equal(escape("2020-01-01 01:23:45 UTC", con = con), sql(r"{'2020-01-01 01:23:45 UTC'}")) 166 | 167 | test_datetime_tz <- as.POSIXct("2020-01-01 18:23:45 UTC", tz = "America/Los_Angeles") 168 | expect_equal(escape(test_datetime_tz, con = con), sql(r"{'2020-01-02 02:23:45'::timestamp}")) 169 | expect_equal(escape("2020-01-01 18:23:45 PST", con = con), sql(r"{'2020-01-01 18:23:45 PST'}")) 170 | }) 171 | 172 | test_that("two variable aggregates are translated correctly", { 173 | skip_if_no_R4() 174 | skip_if_not_installed("dbplyr") 175 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb()) 176 | sql <- function(...) dbplyr::sql(...) 177 | 178 | expect_equal(translate(cor(x, y), window = FALSE), sql(r"{CORR(x, y)}")) 179 | expect_equal(translate(cor(x, y), window = TRUE), sql(r"{CORR(x, y) OVER ()}")) 180 | }) 181 | 182 | 183 | 184 | 185 | # Snapshot tests 186 | 187 | test_that("snapshots of dbplyr generic scalar translation", { 188 | skip_on_cran() 189 | skip_if_not_installed("dbplyr") 190 | local_edition(3) 191 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb()) 192 | 193 | expect_snapshot({ 194 | translate(as.character(1)) 195 | translate(as.character(1L)) 196 | translate(as.numeric(1)) 197 | translate(as.double(1.2)) 198 | translate(as.integer(1.2)) 199 | translate(as.integer64(1.2)) 200 | translate(as.logical("TRUE")) 201 | translate(tolower("HELLO")) 202 | translate(toupper("hello")) 203 | translate(pmax(1, 2, na.rm = TRUE)) 204 | translate(pmin(1, 2, na.rm = TRUE)) 205 | translate(as.character("2020-01-01")) 206 | translate(c("2020-01-01", "2020-13-02")) 207 | translate(iris[["sepal_length"]]) 208 | translate(iris[[1]]) 209 | translate(cot(x)) 210 | translate(substr("test", 2, 3)) 211 | }) 212 | }) 213 | 214 | 215 | test_that("snapshots of duckdb custom scalars translations", { 216 | skip_on_cran() 217 | skip_if_not_installed("dbplyr") 218 | local_edition(3) 219 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb()) 220 | 221 | expect_snapshot({ 222 | # translate(as(1,"CHARACTER")) # Not implemented 223 | translate(as.raw(10)) 224 | translate(13 %% 5) 225 | translate(35.8 %/% 4) 226 | translate(35.8^2.51) 227 | translate(bitwOr(x, 128L)) 228 | translate(bitwAnd(x, 128)) 229 | translate(bitwXor(x, 128L)) 230 | translate(bitwNot(x)) 231 | translate(bitwShiftL(x, 5L)) 232 | translate(bitwShiftR(x, 4L)) 233 | translate(log(x)) 234 | translate(log(x, base = 5)) 235 | translate(log(x, base = 10)) 236 | translate(log(x, base = 2)) 237 | translate(log10(x)) 238 | translate(log2(x)) 239 | translate(is.nan(var1)) 240 | translate(is.infinite(var1)) 241 | translate(is.finite(var1)) 242 | translate(grepl("pattern", text)) 243 | translate(grepl("pattern", text, ignore.case = TRUE)) 244 | # translate(grepl("dummy", txt, perl = TRUE)) # Error tests later 245 | translate(regexpr("pattern", text)) 246 | translate(round(x, digits = 1.1)) 247 | translate(as.Date("2019-01-01")) 248 | translate(as.POSIXct("2019-01-01 01:01:01")) 249 | }) 250 | }) 251 | 252 | 253 | 254 | test_that("snapshot tests for pasting translate", { 255 | skip_on_cran() 256 | skip_if_not_installed("dbplyr") 257 | local_edition(3) 258 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb()) 259 | 260 | expect_snapshot({ 261 | translate(paste("hi", "bye")) 262 | translate(paste("hi", "bye", sep = "-")) 263 | translate(paste0("hi", "bye")) 264 | 265 | translate(paste(x, y), window = FALSE) 266 | translate(paste0(x, y), window = FALSE) 267 | 268 | # translate(paste0(x, collapse = ""), window = FALSE) # Expected error 269 | }) 270 | }) 271 | 272 | 273 | # lubridate functions 274 | 275 | test_that("snapshots for custom lubridate functions translated correctly", { 276 | skip_on_cran() 277 | skip_if_not_installed("dbplyr") 278 | local_edition(3) 279 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb()) 280 | 281 | expect_snapshot({ 282 | translate(yday(x)) 283 | translate(quarter(x)) 284 | translate(quarter(x)) 285 | translate(quarter(x, type = "year.quarter")) 286 | translate(quarter(x, type = "quarter")) 287 | translate(quarter(x, type = TRUE)) 288 | translate(quarter(x, type = FALSE)) 289 | translate(quarter(x, type = "date_first")) 290 | translate(quarter(x, type = "date_last")) 291 | # translate(quarter(x, type = "other")) # Not supported - error 292 | # translate(quarter(x, fiscal_start = 2)) # Not supported - error 293 | translate(month(x, label = FALSE)) 294 | translate(month(x, label = TRUE)) 295 | translate(month(x, label = TRUE, abbr = FALSE)) 296 | translate(qday(x)) 297 | translate(wday(x)) 298 | translate(wday(x, week_start = 4)) 299 | translate(wday(x, label = TRUE)) 300 | translate(wday(x, label = TRUE, abbr = FALSE)) 301 | translate(seconds(x)) 302 | translate(minutes(x)) 303 | translate(hours(x)) 304 | translate(days(x)) 305 | translate(weeks(x)) 306 | translate(months(x)) 307 | translate(years(x)) 308 | translate(floor_date(x, "month")) 309 | translate(floor_date(x, "week")) 310 | translate(floor_date(x, "week", week_start = 1)) 311 | translate(floor_date(x, "week", week_start = 4)) 312 | }) 313 | }) 314 | 315 | # stringr functions 316 | 317 | test_that("snapshots for custom stringr functions translated correctly", { 318 | skip_on_cran() 319 | skip_if_not_installed("dbplyr") 320 | local_edition(3) 321 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb()) 322 | 323 | expect_snapshot({ 324 | translate(str_c(x, y)) 325 | # translate(str_c(x, collapse = "")) # Error 326 | translate(str_detect(x, y)) 327 | translate(str_detect(x, y, negate = TRUE)) 328 | translate(str_replace(x, y, z)) 329 | translate(str_replace_all(x, y, z)) 330 | translate(str_squish(x)) 331 | translate(str_remove(x, y)) 332 | translate(str_remove_all(x, y)) 333 | translate(str_to_sentence(x)) 334 | translate(str_starts(x, y)) 335 | translate(str_ends(x, y)) 336 | translate(str_pad(x, width = 10)) 337 | translate(str_pad(x, width = 10, side = "right")) 338 | translate(str_pad(x, width = 10, side = "both", pad = "<")) 339 | # translate(str_pad(x, width = 10, side = "other")) # Error 340 | }) 341 | }) 342 | 343 | test_that("snapshots datetime escaping working as in DBI", { 344 | skip_on_cran() 345 | skip_if_not_installed("dbplyr") 346 | local_edition(3) 347 | con <- duckdb::translate_duckdb() 348 | escape <- function(...) dbplyr::escape(...) 349 | 350 | expect_snapshot({ 351 | test_date <- as.Date("2020-01-01") 352 | escape(test_date, con = con) 353 | escape("2020-01-01", con = con) 354 | 355 | test_datetime <- as.POSIXct("2020-01-01 01:23:45 UTC", tz = "UTC") 356 | escape(test_datetime, con = con) 357 | escape("2020-01-01 01:23:45 UTC", con = con) 358 | 359 | test_datetime_tz <- as.POSIXct("2020-01-01 18:23:45 UTC", tz = "America/Los_Angeles") 360 | escape(test_datetime_tz, con = con) 361 | escape("2020-01-01 18:23:45 PST", con = con) 362 | }) 363 | }) 364 | 365 | test_that("two variable aggregates are translated correctly", { 366 | skip_on_cran() 367 | skip_if_not_installed("dbplyr") 368 | local_edition(3) 369 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb()) 370 | 371 | expect_snapshot({ 372 | translate(cor(x, y), window = FALSE) 373 | translate(cor(x, y), window = TRUE) 374 | }) 375 | }) 376 | 377 | test_that("these should give errors", { 378 | skip_on_cran() 379 | skip_if_not_installed("dbplyr") 380 | local_edition(3) 381 | translate <- function(...) dbplyr::translate_sql(..., con = duckdb::translate_duckdb()) 382 | 383 | expect_snapshot(error = TRUE, { 384 | translate(grepl("dummy", txt, perl = TRUE)) # Expected error 385 | # translate(paste0(x, collapse = ""), window = FALSE) # Skip because of changing rlang_error (sql_paste()) 386 | translate(quarter(x, type = "other")) # Not supported - error 387 | translate(quarter(x, fiscal_start = 2)) # Not supported - error 388 | # translate(str_c(x, collapse = "")) # Skip because of changing rlang_error (sql_paste()) 389 | translate(str_pad(x, width = 10, side = "other")) # Error 390 | }) 391 | }) 392 | -------------------------------------------------------------------------------- /tests/testthat/test_tbl__duckdb_connection.R: -------------------------------------------------------------------------------- 1 | skip_on_cran() 2 | `%>%` <- dplyr::`%>%` 3 | 4 | test_that("Parquet files can be registered with dplyr::tbl()", { 5 | con <- DBI::dbConnect(duckdb::duckdb()) 6 | on.exit(DBI::dbDisconnect(con, shutdown = TRUE)) 7 | 8 | tab0 <- dplyr::tbl(con, "data/userdata1.parquet") 9 | expect_true(inherits(tab0, "tbl_duckdb_connection")) 10 | expect_true(tab0 %>% dplyr::count() %>% dplyr::collect() == 1000) 11 | 12 | tab1 <- dplyr::tbl(con, "read_parquet(['data/userdata1.parquet'])") 13 | expect_true(inherits(tab1, "tbl_duckdb_connection")) 14 | expect_true(tab1 %>% dplyr::count() %>% dplyr::collect() == 1000) 15 | 16 | tab2 <- dplyr::tbl(con, "'data/userdata1.parquet'") 17 | expect_true(inherits(tab2, "tbl_duckdb_connection")) 18 | expect_true(tab2 %>% dplyr::count() %>% dplyr::collect() == 1000) 19 | 20 | tab3 <- dplyr::tbl(con, "parquet_scan(['data/userdata1.parquet'])") 21 | expect_true(inherits(tab3, "tbl_duckdb_connection")) 22 | expect_true(tab3 %>% dplyr::count() %>% dplyr::collect() == 1000) 23 | }) 24 | 25 | 26 | test_that("Object cache can be enabled for parquet files with dplyr::tbl()", { 27 | con <- DBI::dbConnect(duckdb::duckdb()) 28 | on.exit(DBI::dbDisconnect(con, shutdown = TRUE)) 29 | 30 | DBI::dbExecute(con, "SET enable_object_cache=False;") 31 | tab1 <- dplyr::tbl(con, "data/userdata1.parquet", cache = TRUE) 32 | expect_true(DBI::dbGetQuery(con, "SELECT value FROM duckdb_settings() WHERE name='enable_object_cache';") == "True") 33 | 34 | DBI::dbExecute(con, "SET enable_object_cache=False;") 35 | tab2 <- dplyr::tbl(con, "'data/userdata1.parquet'", cache = FALSE) 36 | expect_true(DBI::dbGetQuery(con, "SELECT value FROM duckdb_settings() WHERE name='enable_object_cache';") == "False") 37 | }) 38 | 39 | 40 | test_that("CSV files can be registered with dplyr::tbl()", { 41 | path <- file.path(tempdir(), "duckdbtest.csv") 42 | write.csv(iris, file = path) 43 | on.exit(unlink(path)) 44 | 45 | con <- DBI::dbConnect(duckdb::duckdb()) 46 | on.exit(DBI::dbDisconnect(con, shutdown = TRUE), add = TRUE) 47 | 48 | tab1 <- dplyr::tbl(con, path) 49 | expect_true(inherits(tab1, "tbl_duckdb_connection")) 50 | expect_true(tab1 %>% dplyr::count() %>% dplyr::collect() == 150) 51 | 52 | tab2 <- dplyr::tbl(con, paste0("read_csv_auto('", path, "')")) 53 | expect_true(inherits(tab2, "tbl_duckdb_connection")) 54 | expect_true(tab2 %>% dplyr::count() %>% dplyr::collect() == 150) 55 | }) 56 | 57 | test_that("Other replacement scans or functions can be registered with dplyr::tbl()", { 58 | con <- DBI::dbConnect(duckdb::duckdb()) 59 | on.exit(DBI::dbDisconnect(con, shutdown = TRUE)) 60 | 61 | obj <- dplyr::tbl(con, "duckdb_keywords()") 62 | expect_true(inherits(obj, "tbl_duckdb_connection")) 63 | expect_true(obj %>% dplyr::filter(keyword_name == "all") %>% dplyr::count() %>% dplyr::collect() == 1) 64 | }) 65 | 66 | rm(`%>%`) -------------------------------------------------------------------------------- /tidyduck.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source --no-byte-compile 21 | PackageRoxygenize: rd,collate,namespace 22 | --------------------------------------------------------------------------------