├── LICENSE
├── tests
    ├── testthat.R
    └── testthat
    │   └── test-translate-teradata.R
├── .gitignore
├── .Rbuildignore
├── .travis.yml
├── man
    ├── todbc.Rd
    ├── n_if.Rd
    ├── like.Rd
    ├── count_if.Rd
    ├── mod.Rd
    ├── bool_to_int.Rd
    ├── db_list_tables_with_pattern.Rd
    ├── to_timestamp.Rd
    ├── blob_to_string.Rd
    ├── TeradataOdbcDriver.Rd
    └── dbConnect-TeradataOdbcDriver-method.Rd
├── dplyr.teradata.Rproj
├── R
    ├── zzz.R
    ├── utils.R
    ├── db-odbc-teradata.R
    ├── tbl-teradata.R
    ├── translate-sql-base-teradata.R
    └── Driver.R
├── NAMESPACE
├── DESCRIPTION
├── NEWS.md
├── vignettes
    ├── dplyr.teradata.R
    ├── dplyr.teradata.Rmd
    └── dplyr.teradata.html
├── README.Rmd
└── README.md


/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2017-2020
2 | COPYRIGHT HOLDER: Koji MAKIYAMA
3 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(dplyr.teradata)
3 | 
4 | test_check("dplyr.teradata")
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | .Rprofile
6 | tests/testthat/test-my_*
7 | README\.html
8 | inst/doc
9 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^\.Rprofile$
4 | README\.Rmd
5 | README\.html
6 | ^\.travis\.yml$
7 | tests/testthat/test-my_*
8 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
 2 | 
 3 | language: R
 4 | sudo: false
 5 | cache: packages
 6 | 
 7 | r:
 8 |   - oldrel
 9 |   - release
10 |   - devel
11 | 
12 | r_github_packages:
13 |   - jimhester/covr
14 | 
15 | after_success:
16 |   - Rscript -e 'covr::coveralls()'
17 | 


--------------------------------------------------------------------------------
/man/todbc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Driver.R
 3 | \name{todbc}
 4 | \alias{todbc}
 5 | \title{Teradata Odbc driver}
 6 | \usage{
 7 | todbc()
 8 | }
 9 | \description{
10 | Driver for an Teradata ODBC database.
11 | }
12 | \examples{
13 | \dontrun{
14 | todbc()
15 | }
16 | }
17 | 


--------------------------------------------------------------------------------
/man/n_if.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/translate-sql-base-teradata.R
 3 | \name{n_if}
 4 | \alias{n_if}
 5 | \title{Translatable function to count rows satisfied a condition}
 6 | \usage{
 7 | n_if(cond)
 8 | }
 9 | \arguments{
10 | \item{cond}{condition}
11 | }
12 | \description{
13 | Translatable function to count rows satisfied a condition
14 | }
15 | 


--------------------------------------------------------------------------------
/man/like.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/translate-sql-base-teradata.R
 3 | \name{like}
 4 | \alias{like}
 5 | \title{Translatable function for 'LIKE' operator}
 6 | \usage{
 7 | like(x, pattern)
 8 | }
 9 | \arguments{
10 | \item{x}{column name}
11 | 
12 | \item{pattern}{LIKE pattern}
13 | }
14 | \description{
15 | Translatable function for 'LIKE' operator
16 | }
17 | 


--------------------------------------------------------------------------------
/man/count_if.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/translate-sql-base-teradata.R
 3 | \name{count_if}
 4 | \alias{count_if}
 5 | \title{Translatable function to count rows satisfied a condition}
 6 | \usage{
 7 | count_if(cond)
 8 | }
 9 | \arguments{
10 | \item{cond}{condition}
11 | }
12 | \description{
13 | Translatable function to count rows satisfied a condition
14 | }
15 | 


--------------------------------------------------------------------------------
/man/mod.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/translate-sql-base-teradata.R
 3 | \name{mod}
 4 | \alias{mod}
 5 | \title{Translatable Function for '\%\%' operator}
 6 | \usage{
 7 | mod(x, divisor)
 8 | }
 9 | \arguments{
10 | \item{x}{dividend}
11 | 
12 | \item{divisor}{divisor}
13 | }
14 | \value{
15 | modulo
16 | }
17 | \description{
18 | Translatable Function for '\%\%' operator
19 | }
20 | 


--------------------------------------------------------------------------------
/man/bool_to_int.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/translate-sql-base-teradata.R
 3 | \name{bool_to_int}
 4 | \alias{bool_to_int}
 5 | \title{Translatable function to convert boolean to integer}
 6 | \usage{
 7 | bool_to_int(cond)
 8 | }
 9 | \arguments{
10 | \item{cond}{condition}
11 | }
12 | \value{
13 | vector. 1 while cond is TRUE, 0 if FALSE
14 | }
15 | \description{
16 | Translatable function to convert boolean to integer
17 | }
18 | 


--------------------------------------------------------------------------------
/dplyr.teradata.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace,vignette
22 | 


--------------------------------------------------------------------------------
/man/db_list_tables_with_pattern.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{db_list_tables_with_pattern}
 4 | \alias{db_list_tables_with_pattern}
 5 | \title{List tables with specified pattern}
 6 | \usage{
 7 | db_list_tables_with_pattern(con, pattern)
 8 | }
 9 | \arguments{
10 | \item{con}{Teradata connection.}
11 | 
12 | \item{pattern}{character string containing a regular expression.}
13 | }
14 | \description{
15 | List tables with specified pattern
16 | }
17 | 


--------------------------------------------------------------------------------
/man/to_timestamp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/translate-sql-base-teradata.R
 3 | \name{to_timestamp}
 4 | \alias{to_timestamp}
 5 | \title{Translatable function to convert UNIX time to time-stamp}
 6 | \usage{
 7 | to_timestamp(x)
 8 | }
 9 | \arguments{
10 | \item{x}{column name stored UNIX time (e.g. 1609459200)}
11 | }
12 | \value{
13 | time-stamp (e.g. "2021-01-01 00:00:00")
14 | }
15 | \description{
16 | Translatable function to convert UNIX time to time-stamp
17 | }
18 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | .onAttach <- function(libname, pkgname) {
 2 |   assign("cut", sql_cut, envir = dbplyr::base_odbc_scalar)
 3 |   assign("like", sql_like, envir = dbplyr::base_odbc_scalar)
 4 |   assign("to_timestamp", sql_to_timestamp, envir = dbplyr::base_odbc_scalar)
 5 |   assign("%%", sql_mod, envir = dbplyr::base_odbc_scalar)
 6 |   assign("count_if", sql_count_if, envir = dbplyr::base_odbc_scalar)
 7 |   assign("n_if", sql_count_if, envir = dbplyr::base_odbc_scalar)
 8 |   assign("bool_to_int", sql_bool_to_int, envir = dbplyr::base_odbc_scalar)
 9 | }
10 | 


--------------------------------------------------------------------------------
/man/blob_to_string.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{blob_to_string}
 4 | \alias{blob_to_string}
 5 | \title{Convert blob to character.}
 6 | \usage{
 7 | blob_to_string(blob)
 8 | }
 9 | \arguments{
10 | \item{blob}{blob vector.}
11 | }
12 | \description{
13 | Convert blob to character.
14 | }
15 | \examples{
16 | (x <- blob::as_blob("Good morning"))
17 | #> [1] blob[12 B]
18 | x[[1]]
19 | #> [1] 47 6f 6f 64 20 6d 6f 72 6e 69 6e 67
20 | blob_to_string(x)
21 | #> [1] "476f6f64206d6f726e696e67"
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/man/TeradataOdbcDriver.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Driver.R
 3 | \docType{class}
 4 | \name{TeradataOdbcDriver}
 5 | \alias{TeradataOdbcDriver}
 6 | \alias{TeradataOdbcDriver-class}
 7 | \alias{show,TeradataOdbcDriver-method}
 8 | \title{Teradata Odbc Driver Methods}
 9 | \usage{
10 | \S4method{show}{TeradataOdbcDriver}(object)
11 | }
12 | \arguments{
13 | \item{object}{Any R object}
14 | }
15 | \description{
16 | Implementations of pure virtual functions defined in the \code{DBI} package
17 | for \code{TeradataOdbcDriver} objects.
18 | }
19 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | #' List tables with specified pattern
 2 | #'
 3 | #' @param con Teradata connection.
 4 | #' @param pattern character string containing a regular expression.
 5 | #'
 6 | #' @importFrom dplyr db_list_tables
 7 | #' @export
 8 | db_list_tables_with_pattern <- function(con, pattern) {
 9 |   table_names <- db_list_tables(con)
10 |   Filter(function(x) grepl(pattern, x), table_names)
11 | }
12 | 
13 | #' Convert blob to character.
14 | #'
15 | #' @param blob blob vector.
16 | #'
17 | #' @examples
18 | #' (x <- blob::as_blob("Good morning"))
19 | #' #> [1] blob[12 B]
20 | #' x[[1]]
21 | #' #> [1] 47 6f 6f 64 20 6d 6f 72 6e 69 6e 67
22 | #' blob_to_string(x)
23 | #' #> [1] "476f6f64206d6f726e696e67"
24 | #'
25 | #' @export
26 | blob_to_string <- function(blob) {
27 |   vapply(blob, function(x) paste(as.character(x), collapse = ""), character(1L))
28 | }
29 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(db_create_table,Teradata)
 4 | S3method(db_has_table,Teradata)
 5 | S3method(db_list_tables,Teradata)
 6 | S3method(sample_frac,tbl_Teradata)
 7 | S3method(sample_n,tbl_Teradata)
 8 | S3method(slice_sample,tbl_Teradata)
 9 | S3method(sql_build,op_sample)
10 | export(blob_to_string)
11 | export(bool_to_int)
12 | export(count_if)
13 | export(db_list_tables_with_pattern)
14 | export(like)
15 | export(mod)
16 | export(n_if)
17 | export(to_timestamp)
18 | export(todbc)
19 | exportClasses(TeradataOdbcDriver)
20 | exportMethods(dbConnect)
21 | exportMethods(show)
22 | import(DBI)
23 | import(bit64)
24 | import(dbplyr)
25 | import(methods)
26 | import(odbc)
27 | importFrom(dplyr,db_create_table)
28 | importFrom(dplyr,db_has_table)
29 | importFrom(dplyr,db_list_tables)
30 | importFrom(dplyr,sample_frac)
31 | importFrom(dplyr,sample_n)
32 | importFrom(dplyr,slice_sample)
33 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: dplyr.teradata
 2 | Type: Package
 3 | Title: A 'Teradata' Backend for 'dplyr'
 4 | Version: 0.4.1.9001
 5 | Description: A 'Teradata' backend for 'dplyr'. It makes it possible to operate
 6 |   'Teradata' database <https://www.teradata.com/products-and-services/teradata-database/>
 7 |   in the same way as manipulating data frames with 'dplyr'.
 8 | Authors@R: c(
 9 |     person("Koji", "Makiyama", email = "hoxo.smile@gmail.com", role = c("cre", "aut")),
10 |     person("Jim", "Hester", role = "ctb"))
11 | URL: https://github.com/hoxo-m/dplyr.teradata
12 | BugReports: https://github.com/hoxo-m/dplyr.teradata/issues
13 | License: MIT + file LICENSE
14 | Encoding: UTF-8
15 | LazyData: true
16 | Depends:
17 |     dplyr (>= 1.0.0),
18 |     dbplyr (>= 2.0.0),
19 |     odbc (>= 1.3.0)
20 | Imports:
21 |     bit64,
22 |     DBI (>= 1.1.0),
23 |     methods
24 | Suggests:
25 |     blob,
26 |     covr,
27 |     knitr,
28 |     rmarkdown,
29 |     testthat
30 | Roxygen: list(markdown = TRUE)
31 | RoxygenNote: 7.1.1
32 | VignetteBuilder: knitr
33 | 


--------------------------------------------------------------------------------
/R/db-odbc-teradata.R:
--------------------------------------------------------------------------------
 1 | # DBI methods ------------------------------------------------------------------
 2 | 
 3 | #' @importFrom dplyr db_list_tables
 4 | #' @export
 5 | db_list_tables.Teradata <- function(con) {
 6 |   # message("Getting all table names for all schema.")
 7 |   query <- sprintf("SELECT DATABASENAME, TABLENAME FROM DBC.TABLES")
 8 |   res <- dbGetQuery(con, query)
 9 |   dbname <- tolower(con@info$dbname)
10 |   if (nzchar(dbname)) {
11 |     table_names <- res[tolower(trimws(res$DatabaseName)) == dbname, ]$TableName
12 |   } else {
13 |     table_names <- sprintf("%s.%s", res$DatabaseName, res$TableName)
14 |   }
15 |   trimws(table_names)
16 | }
17 | 
18 | #' @importFrom dplyr db_has_table db_list_tables
19 | #' @export
20 | db_has_table.Teradata <- function(con, table, ...) {
21 |   table <- tolower(table)
22 |   table_names <- tolower(db_list_tables(con))
23 |   table %in% table_names
24 | }
25 | 
26 | #' @importFrom dplyr db_create_table
27 | #' @export
28 | db_create_table.Teradata <- function(con, table, types, temporary = TRUE, ...) {
29 |   sql <- sqlCreateTable(con, table, types, temporary = temporary)
30 |   dbExecute(con, sql)
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/testthat/test-translate-teradata.R:
--------------------------------------------------------------------------------
 1 | context("translate-teradata")
 2 | 
 3 | test_that("custom scalar translated correctly", {
 4 | 
 5 |   trans <- function(x) {
 6 |     translate_sql(!!enquo(x), con = simulate_teradata())
 7 |   }
 8 | 
 9 |   expect_equal(trans(case_when(x == 1L ~ 1L, x == 2L ~ 2L, TRUE ~ 3L)),
10 |                sql('CASE\nWHEN (`x` = 1) THEN (1)\nWHEN (`x` = 2) THEN (2)\nELSE (3)\nEND'))
11 |   expect_equal(trans(cut(x, 1:3)),
12 |                sql("CASE\n WHEN x > 1 AND x <= 2 THEN '(1,2]'\n WHEN x > 2 AND x <= 3 THEN '(2,3]'\n ELSE NULL\nEND"))
13 |   expect_equal(trans(like(x, "%pattern_")), sql("`x` LIKE '%pattern_'"))
14 |   expect_equal(trans(to_timestamp(x)),
15 |                sql("CAST(DATE '1970-01-01' + (`x` / 86400) AS TIMESTAMP(0)) + (`x` MOD 86400) * (INTERVAL '00:00:01' HOUR TO SECOND)"))
16 |   expect_equal(trans(x %% 5L), sql("`x` MOD 5"))
17 |   expect_equal(trans(count_if(x > 0L)), sql("SUM(CASE WHEN (`x` > 0) THEN 1 WHEN NOT(`x` > 0) THEN 0 END)"))
18 |   expect_equal(trans(n_if(x > 0L)), sql("SUM(CASE WHEN (`x` > 0) THEN 1 WHEN NOT(`x` > 0) THEN 0 END)"))
19 |   expect_equal(trans(bool_to_int(x > 0L)), sql("CASE WHEN (`x` > 0) THEN 1 WHEN NOT(`x` > 0) THEN 0 END"))
20 | })
21 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # dplyr.teradata 0.4.1.9001
 2 | 
 3 | ## New features
 4 | 
 5 | - Add `sample_frac()` and `slice_sample()`.
 6 | 
 7 | # dplyr.teradata 0.4.1
 8 | 
 9 | - Now supports for **dplyr** 1.0.0 and **dbplyr** 2.0.0 (#26)
10 | 
11 | ## Changes
12 | 
13 | - Delegate Teradata Odbc Connection Methods to **odbc** (#27)
14 | - Deprecate `todbc()` (#28). Use `odbc::odbc()` instead. If you use the versions of Teradata ODBC Driver 15.20 or earlier, you may need to set like `dbConnect(odbc(), ..., dbms.name = 'Teradata')`. 
15 | 
16 | ## Improvements
17 | 
18 | - Export dummy functions (`like`, `to_timestamp`, `mod`, `count_if`, `n_if`, and `bool_to_int`) to make code completion work (#29). These functions can only be used inside the functions to build queries (e.g. `mutate`).
19 | 
20 | # dplyr.teradata 0.3.2
21 | 
22 | ## Changes
23 | 
24 | - Delegate SQL translations of `case_when()` and **lubridate**-family (e.g. `year()`, `month()`, `day()`) to **dbplyr**. See https://github.com/tidyverse/dbplyr/blob/master/NEWS.md#sql-translations.
25 | 
26 | # dplyr.teradata 0.3.1
27 | 
28 | ## Changes
29 | 
30 | - Add the dots arguments to `sample_n()` for **dplyr** 0.8. See https://github.com/tidyverse/dplyr/blob/master/NEWS.md#breaking-changes.
31 | 
32 | # dplyr.teradata 0.3.0
33 | 
34 | ## New features
35 | 
36 | - Add `sample_n()` (#17)
37 | 
38 | ## Changes
39 | 
40 | - Remove dependency to **tidyverse**
41 | - Revert loading messages
42 | - Remove dbUnQuoteIdentifier, as it was removed in favor of
43 |   DBI::dbUnquoteIdentifier in odbc 1.1.6 (#14, @jimhester)
44 |   
45 | ## Improvements
46 | 
47 | - Activate `charset` argument on `dbConnect()`
48 | 
49 | # dplyr.teradata 0.2.0
50 | 
51 | ## New features
52 | 
53 | - Add `count_if()` and `n_if()` (#6)
54 | - Add `bool_to_int()`
55 | 
56 | ## Improvements
57 | 
58 | - Change loading messages
59 | 


--------------------------------------------------------------------------------
/R/tbl-teradata.R:
--------------------------------------------------------------------------------
 1 | #' @import odbc dbplyr
 2 | NULL
 3 | 
 4 | #' @importFrom dplyr sample_n
 5 | #' @export
 6 | sample_n.tbl_Teradata <- function(tbl, size, replace = FALSE, weight = NULL,
 7 |                                   .env = NULL, randomized_allocation = TRUE,
 8 |                                   ...) {
 9 |   size <- as.integer(size)
10 |   stopifnot(size >= 1L)
11 |   if (!is.null(weight) || !is.null(.env)) {
12 |     warning("sample_n() has not implemented for arguments weight and .env")
13 |   }
14 |   sample_impl(tbl, size, replace, randomized_allocation)
15 | }
16 | 
17 | #' @importFrom dplyr sample_frac
18 | #' @export
19 | sample_frac.tbl_Teradata <- function(tbl, size = 1, replace = FALSE,
20 |                                      weight = NULL, .env = NULL,
21 |                                      randomized_allocation = TRUE, ...) {
22 |   size <- as.double(size)
23 |   stopifnot(0.0 < size, size < 1.0)
24 |   if (!is.null(weight) || !is.null(.env)) {
25 |     warning("sample_frac() has not implemented for arguments weight and .env")
26 |   }
27 |   sample_impl(tbl, size, replace, randomized_allocation)
28 | }
29 | 
30 | #' @importFrom dplyr slice_sample
31 | #' @export
32 | slice_sample.tbl_Teradata <- function(.data, ..., randomized_allocation = TRUE,
33 |                                       n, prop, weight_by = NULL, replace = FALSE) {
34 |   if (missing(n) && missing(prop)) {
35 |     n <- 1L
36 |   }
37 |   if (!is.null(weight_by)) {
38 |     warning("slice_sample() has not implemented for the argument 'weight_by'.")
39 |   }
40 |   size <- NULL
41 |   if (missing(prop)) {
42 |     n <- as.integer(n)
43 |     stopifnot(n >= 1L)
44 |     size <- n
45 |   } else {
46 |     prop <- as.double(prop)
47 |     stopifnot(0.0 < prop, prop < 1.0)
48 |     size <- prop
49 |   }
50 |   sample_impl(.data, size, replace, randomized_allocation)
51 | }
52 | 
53 | sample_impl <- function(tbl, size, replace, randomized_allocation) {
54 |   if (inherits(tbl$ops, "op_sample")) {
55 |     tbl$ops$args$size <- min(tbl$ops$args$size, size)
56 |   } else {
57 |     tbl$ops <- op_single("sample", x = tbl$ops, dots = tbl$ops$dots,
58 |                          args = list(size = size, replace = replace,
59 |                                      randomized_allocation = randomized_allocation))
60 |   }
61 |   tbl
62 | }
63 | 
64 | #' @export
65 | sql_build.op_sample <- function(op, con, ...) {
66 |   qry <- sql_build(op$x, con)
67 |   sql <- sql_render(qry, con)
68 | 
69 |   str <- "\nSAMPLE "
70 |   if (op$args$replace) {
71 |     str <- paste0(str, "WITH REPLACEMENT ")
72 |   }
73 |   if (op$args$randomized_allocation) {
74 |     str <- paste0(str, "RANDOMIZED ALLOCATION ")
75 |   }
76 |   build_sql(sql, sql(str), op$args$size, con = con)
77 | }
78 | 


--------------------------------------------------------------------------------
/man/dbConnect-TeradataOdbcDriver-method.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Driver.R
 3 | \name{dbConnect,TeradataOdbcDriver-method}
 4 | \alias{dbConnect,TeradataOdbcDriver-method}
 5 | \alias{dbConnect}
 6 | \title{Connect to a Teradata ODBC compatible database}
 7 | \usage{
 8 | \S4method{dbConnect}{TeradataOdbcDriver}(
 9 |   drv,
10 |   dsn = NULL,
11 |   ...,
12 |   timezone = "UTC",
13 |   encoding = "UTF-8",
14 |   bigint = c("integer64", "integer", "numeric", "character"),
15 |   driver = NULL,
16 |   server = NULL,
17 |   DBCName = NULL,
18 |   database = "",
19 |   uid = NULL,
20 |   pwd = NULL,
21 |   charset = "ASCII",
22 |   tmode = c("ANSI", "TERA"),
23 |   dbms.name = NULL,
24 |   .connection_string = NULL
25 | )
26 | }
27 | \arguments{
28 | \item{drv}{an object that inherits from \linkS4class{DBIDriver},
29 | or an existing \linkS4class{DBIConnection}
30 | object (in order to clone an existing connection).}
31 | 
32 | \item{dsn}{The Data Source Name.}
33 | 
34 | \item{...}{Additional ODBC keywords, these will be joined with the other
35 | arguments to form the final connection string.}
36 | 
37 | \item{timezone}{The Server time zone. Useful if the database has an internal
38 | timezone that is \emph{not} 'UTC'. If the database is in your local timezone set
39 | to \code{Sys.timezone()}. See \code{\link[=OlsonNames]{OlsonNames()}} for a complete list of available
40 | timezones on your system.}
41 | 
42 | \item{encoding}{Alias of \code{charset}.}
43 | 
44 | \item{bigint}{The R type that \code{SQL_BIGINT} types should be mapped to,
45 | default is \link[bit64:bit64-package]{bit64::integer64}, which allows the full range of 64 bit
46 | integers.}
47 | 
48 | \item{driver}{The ODBC driver name.}
49 | 
50 | \item{server}{Alias of \code{DBCName}.}
51 | 
52 | \item{DBCName}{The server hostname.}
53 | 
54 | \item{database}{The database on the server.}
55 | 
56 | \item{uid}{The user identifier.}
57 | 
58 | \item{pwd}{The password to use.}
59 | 
60 | \item{charset}{Character Set. \code{"ASCII"}(default), \code{"UTF8"} or \code{"UTF16"}.}
61 | 
62 | \item{tmode}{TMODE. \code{"ANSI"}(default) or \code{"TERA"}.}
63 | 
64 | \item{dbms.name}{The database management system name. This should normally
65 | be queried automatically by the ODBC driver. This name is used as the class
66 | name for the OdbcConnect object returned from  \code{dbConnect()}. However if the
67 | driver does not return a valid value it can be set manually with this
68 | parameter.}
69 | 
70 | \item{.connection_string}{A complete connection string, useful if you are
71 | copy pasting it from another source. If this argument is used any additional
72 | arguments will be appended to this string.}
73 | }
74 | \description{
75 | Connect to a Teradata ODBC compatible database
76 | }
77 | \details{
78 | The connection string keywords are driver dependent. The parameters
79 | documented here are common, but some drivers may not accept them. Please see
80 | the specific driver documentation for allowed parameters,
81 | \url{https://www.connectionstrings.com} is also a useful resource of example
82 | connection strings for a variety of databases.
83 | }
84 | 


--------------------------------------------------------------------------------
/vignettes/dplyr.teradata.R:
--------------------------------------------------------------------------------
  1 | ## ----setup, include = FALSE---------------------------------------------------
  2 | knitr::opts_chunk$set(
  3 |   collapse = TRUE,
  4 |   comment = "#>",
  5 |   fig.path = "README-",
  6 |   eval = FALSE,
  7 |   message = FALSE
  8 | )
  9 | 
 10 | ## ----eval=FALSE---------------------------------------------------------------
 11 | #  library(dplyr.teradata)
 12 | #  
 13 | #  # Establish a connection to Teradata
 14 | #  con <- dbConnect(todbc(),
 15 | #                   driver = "{Teradata Driver}", DBCName = "host_name_or_IP_address",
 16 | #                   uid = "user_name", pwd = "*****")
 17 | #  my_table <- tbl(con, "my_table_name")
 18 | #  
 19 | #  # Build a query
 20 | #  q <- my_table %>%
 21 | #    filter(between(date, "2017-01-01", "2017-01-03")) %>%
 22 | #    group_by(date) %>%
 23 | #    summarise(n = n()) %>%
 24 | #    arrange(date)
 25 | #  
 26 | #  show_query(q)
 27 | #  #> <SQL>
 28 | #  #> SELECT "date", count(*) AS "n"
 29 | #  #> FROM "my_table_name"
 30 | #  #> WHERE ("date" BETWEEN '2017-01-01' AND '2017-01-03')
 31 | #  #> GROUP BY "date"
 32 | #  #> ORDER BY "date"
 33 | #  
 34 | #  # Send the query and get its result on R
 35 | #  df <- q %>% collect
 36 | #  df
 37 | #  #> # A tibble: 3 x 2
 38 | #  #>          date        n
 39 | #  #>        <date>    <int>
 40 | #  #>  1 2017-01-01   123456
 41 | #  #>  2 2017-01-02  7891011
 42 | #  #>  3 2017-01-03 12131415
 43 | 
 44 | ## ----eval=FALSE---------------------------------------------------------------
 45 | #  install.packages("dplyr.teradata")
 46 | 
 47 | ## ----eval=FALSE---------------------------------------------------------------
 48 | #  install.packages("devtools") # if you have not installed "devtools" package
 49 | #  devtools::install_github("hoxo-m/dplyr.teradata")
 50 | 
 51 | ## -----------------------------------------------------------------------------
 52 | #  # Establish a connection to Teradata
 53 | #  con <- dbConnect(odbc(),
 54 | #                   driver = "{Teradata Driver}", DBCName = "host_name_or_IP_address",
 55 | #                   uid = "user_name", pwd = "*****")
 56 | 
 57 | ## -----------------------------------------------------------------------------
 58 | #  # Getting table
 59 | #  my_table <- tbl(con, "my_table_name")
 60 | #  
 61 | #  # Getting table in schema
 62 | #  my_table <- tbl(con, in_schema("my_schema", "my_table_name"))
 63 | 
 64 | ## -----------------------------------------------------------------------------
 65 | #  # Build a query
 66 | #  q <- my_table %>%
 67 | #    filter(between(date, "2017-01-01", "2017-01-03")) %>%
 68 | #    group_by(date) %>%
 69 | #    summarise(n = n()) %>%
 70 | #    arrange(date)
 71 | 
 72 | ## -----------------------------------------------------------------------------
 73 | #  show_query(q)
 74 | #  #> <SQL>
 75 | #  #> SELECT "date", count(*) AS "n"
 76 | #  #> FROM "my_table_name"
 77 | #  #> WHERE ("date" BETWEEN '2017-01-01' AND '2017-01-03')
 78 | #  #> GROUP BY "date"
 79 | #  #> ORDER BY "date"
 80 | 
 81 | ## -----------------------------------------------------------------------------
 82 | #  # Send the query and get its result on R
 83 | #  df <- q %>% collect
 84 | #  df
 85 | #  #> # A tibble: 3 x 2
 86 | #  #>          date        n
 87 | #  #>        <date>    <int>
 88 | #  #>  1 2017-01-01   123456
 89 | #  #>  2 2017-01-02  7891011
 90 | #  #>  3 2017-01-03 12131415
 91 | 
 92 | ## ----eval=TRUE, echo=FALSE----------------------------------------------------
 93 | library(dplyr.teradata)
 94 | trans <- function(x) {
 95 |   translate_sql(!!enquo(x), con = simulate_teradata())
 96 | }
 97 | 
 98 | ## -----------------------------------------------------------------------------
 99 | #  mutate(is_positive = bool_to_int(x > 0L))
100 | 
101 | ## ----echo=FALSE, eval=TRUE----------------------------------------------------
102 | trans(bool_to_int(x > 0L))
103 | 
104 | ## -----------------------------------------------------------------------------
105 | #  summarize(n = count_if(x > 0L))
106 | 
107 | ## ----echo=FALSE, eval=TRUE----------------------------------------------------
108 | trans(count_if(x > 0L))
109 | 
110 | ## -----------------------------------------------------------------------------
111 | #  mutate(ts = to_timestamp(unixtime_column))
112 | 
113 | ## ----echo=FALSE, eval=TRUE----------------------------------------------------
114 | trans(to_timestamp(unixtime_column))
115 | 
116 | ## ----eval=TRUE----------------------------------------------------------------
117 | x <- 1:6
118 | breaks <- c(0, 2, 4, 6)
119 | cut(x, breaks)
120 | 
121 | ## -----------------------------------------------------------------------------
122 | #  breaks = c(0, 2, 4, 6)
123 | #  mutate(y = cut(x, breaks))
124 | 
125 | ## ----echo=FALSE, eval=TRUE----------------------------------------------------
126 | trans(cut(x, c(0, 2, 4, 6)))
127 | 
128 | ## -----------------------------------------------------------------------------
129 | #  breaks = c(0, 2, 4, 6)
130 | #  mutate(y = cut(x, breaks, labels = "-", include.lowest = TRUE))
131 | 
132 | ## ----echo=FALSE, eval=TRUE----------------------------------------------------
133 | trans(cut(x, c(0, 2, 4, 6), labels = "-", include.lowest = TRUE))
134 | 
135 | ## ----eval=TRUE----------------------------------------------------------------
136 | x <- blob::as_blob("Good morning")
137 | x
138 | 
139 | # print raw data in blob
140 | x[[1]]
141 | 
142 | blob_to_string(x)
143 | 
144 | 


--------------------------------------------------------------------------------
/R/translate-sql-base-teradata.R:
--------------------------------------------------------------------------------
  1 | #' @import dbplyr
  2 | NULL
  3 | 
  4 | # cut ---------------------------------------------------------------------
  5 | sql_cut <- function(x, breaks = NULL, labels = NULL,
  6 |                          include.lowest = FALSE, right = TRUE, dig.lab = 3,
  7 |                          ...) {
  8 | 
  9 |   # Prepare Arguments -------------------------------------------------------
 10 |   x <- deparse(substitute(x))
 11 |   if(is.null(breaks)) stop("cut() needs breaks argument")
 12 |   if(is.null(labels)) {
 13 |     labels <- levels(cut(0, breaks = breaks, include.lowest = include.lowest,
 14 |                          right = right, dig.lab = dig.lab))
 15 |   }
 16 |   if(right) {
 17 |     lower_op <- ">"
 18 |     higher_op <- "<="
 19 |   } else {
 20 |     lower_op <- ">="
 21 |     higher_op <- "<"
 22 |   }
 23 |   if(length(labels) == 1 && is.character(labels)) {
 24 |     # label is the center mark
 25 |     labels <- generate_range_labels(breaks, include.lowest = include.lowest,
 26 |                                     right = right, center = labels)
 27 |   }
 28 | 
 29 |   # Build SQL ---------------------------------------------------------------
 30 |   n <- length(labels)
 31 |   sql <- build_sql("CASE\n")
 32 |   for (i in seq_len(n)) {
 33 |     lower_cond <- sql(sprintf("%s %s %s", x, lower_op, breaks[i]))
 34 |     higher_cond <- sql(sprintf("%s %s %s", x, higher_op, breaks[i+1]))
 35 |     if(i == 1 && breaks[1] == -Inf) {
 36 |       sql <- build_sql(sql, " WHEN ", higher_cond, " THEN ", labels[1], "\n")
 37 |     } else if(i == 1 && include.lowest && right) {
 38 |       lower_cond <- sql(sprintf("%s >= %s", x, breaks[i]))
 39 |       sql <- build_sql(sql, " WHEN ", lower_cond, " AND ", higher_cond, " THEN ", labels[i], "\n")
 40 |     } else if(i == n && breaks[n+1] == Inf) {
 41 |       sql <- build_sql(sql, " WHEN ", lower_cond, " THEN ", labels[n], "\n")
 42 |     } else if(i == n && include.lowest && !right) {
 43 |       higher_cond <- sql(sprintf("%s <= %s", x, breaks[i+1]))
 44 |       sql <- build_sql(sql, " WHEN ", lower_cond, " AND ", higher_cond, " THEN ", labels[i], "\n")
 45 |     } else {
 46 |       sql <- build_sql(sql, " WHEN ", lower_cond, " AND ", higher_cond, " THEN ", labels[i], "\n")
 47 |     }
 48 |   }
 49 |   sql <- build_sql(sql, " ELSE NULL\nEND")
 50 |   sql
 51 | }
 52 | 
 53 | is_integer_or_infinaite <- function(values) {
 54 |   all(ifelse(is.finite(values), values %% 1 == 0, TRUE))
 55 | }
 56 | 
 57 | generate_range_labels <- function(breaks, include.lowest = FALSE, right = TRUE,
 58 |                                   center = "-", left_char = "", right_char = "") {
 59 |   if(is_integer_or_infinaite(breaks)) {
 60 |     len <- length(breaks) - 1
 61 |     labels <- character(len)
 62 |     for(i in seq_len(len)) {
 63 |       p <- breaks[i]
 64 |       n <- breaks[i+1]
 65 |       if(right) {
 66 |         if(i != 1 || !include.lowest) {
 67 |           p <- p + 1
 68 |         }
 69 |         if(p == -Inf) {
 70 |           label <- sprintf("%s%s%s%s", left_char, center, n, right_char)
 71 |         } else if(n == Inf) {
 72 |           label <- sprintf("%s%s%s%s", left_char, p, center, right_char)
 73 |         } else if(p == n) {
 74 |           label <- sprintf("%s%s%s", left_char, p, right_char)
 75 |         } else {
 76 |           label <- sprintf("%s%s%s%s%s", left_char, p, center, n, right_char)
 77 |         }
 78 |       } else {
 79 |         if(i != len || !include.lowest) {
 80 |           n <- n - 1
 81 |         }
 82 |         if(p == -Inf) {
 83 |           label <- sprintf("%s%s%s%s", left_char, center, n, right_char)
 84 |         } else if(n == Inf) {
 85 |           label <- sprintf("%s%s%s%s", left_char, p, center, right_char)
 86 |         } else if(p == n) {
 87 |           label <- sprintf("%s%s%s", left_char, p, right_char)
 88 |         } else {
 89 |           label <- sprintf("%s%s%s%s%s", left_char, p, center, n, right_char)
 90 |         }
 91 |       }
 92 |       labels[i] <- label
 93 |     }
 94 |     labels
 95 |   } else {
 96 |     stop("breaks are not integer or infinite")
 97 |   }
 98 | }
 99 | 
100 | # like --------------------------------------------------------------------
101 | sql_like <- function(x, pattern) {
102 |   build_sql(x, " LIKE ", pattern)
103 | }
104 | 
105 | # to_timestamp ------------------------------------------------------------
106 | sql_to_timestamp <- function(x) {
107 |   build_sql("CAST(DATE '1970-01-01' + (", x ,
108 |             " / 86400) AS TIMESTAMP(0)) + (", x,
109 |             " MOD 86400) * (INTERVAL '00:00:01' HOUR TO SECOND)")
110 | }
111 | 
112 | # mod ---------------------------------------------------------------------
113 | sql_mod <- function(x, divisor) {
114 |   build_sql(x, " MOD ", divisor)
115 | }
116 | 
117 | # count_if ----------------------------------------------------------------
118 | sql_count_if <- function(cond) {
119 |   build_sql("SUM(CASE WHEN (", cond, ") THEN 1 WHEN NOT(", cond, ") THEN 0 END)")
120 | }
121 | 
122 | # bool_to_int -------------------------------------------------------------
123 | sql_bool_to_int <- function(cond) {
124 |   build_sql("CASE WHEN (", cond, ") THEN 1 WHEN NOT(", cond, ") THEN 0 END")
125 | }
126 | 
127 | # dummy functions ---------------------------------------------------------
128 | # Export dummy functions to make code completion work.
129 | # Note: cut has the original function base::cut
130 | 
131 | #' Translatable function for 'LIKE' operator
132 | #' @param x column name
133 | #' @param pattern LIKE pattern
134 | #' @export
135 | like <- function(x, pattern) stop("unimplemented function")
136 | 
137 | #' Translatable function to convert UNIX time to time-stamp
138 | #' @param x column name stored UNIX time (e.g. 1609459200)
139 | #' @return time-stamp (e.g. "2021-01-01 00:00:00")
140 | #' @export
141 | to_timestamp <- function(x) stop("unimplemented function")
142 | 
143 | #' Translatable Function for '%%' operator
144 | #' @param x dividend
145 | #' @param divisor divisor
146 | #' @return modulo
147 | #' @export
148 | mod <- function(x, divisor) stop("unimplemented function")
149 | 
150 | #' Translatable function to count rows satisfied a condition
151 | #' @param cond condition
152 | #' @export
153 | count_if <- function(cond) stop("unimplemented function")
154 | 
155 | #' Translatable function to count rows satisfied a condition
156 | #' @param cond condition
157 | #' @export
158 | n_if <- function(cond) stop("unimplemented function")
159 | 
160 | #' Translatable function to convert boolean to integer
161 | #' @param cond condition
162 | #' @return vector. 1 while cond is TRUE, 0 if FALSE
163 | #' @export
164 | bool_to_int <- function(cond) stop("unimplemented function")
165 | 


--------------------------------------------------------------------------------
/R/Driver.R:
--------------------------------------------------------------------------------
  1 | # Refer to https://github.com/rstats-db/odbc/blob/master/R/Driver.R
  2 | 
  3 | #' Teradata Odbc Driver Methods
  4 | #'
  5 | #' Implementations of pure virtual functions defined in the `DBI` package
  6 | #' for `TeradataOdbcDriver` objects.
  7 | #' @name TeradataOdbcDriver
  8 | NULL
  9 | 
 10 | #' Teradata Odbc driver
 11 | #'
 12 | #' Driver for an Teradata ODBC database.
 13 | #'
 14 | #' @export
 15 | #' @import methods odbc DBI bit64
 16 | #' @examples
 17 | #' \dontrun{
 18 | #' todbc()
 19 | #' }
 20 | todbc <- function() {
 21 |   message <- paste(
 22 |     "Hint: for versions of Teradata ODBC Driver 15.10 and earlier,",
 23 |     "you may need to set like dbConnect(odbc(), ..., dbms.name = 'Teradata')"
 24 |   )
 25 |   .Deprecated("odbc::odbc")
 26 |   .Deprecated(msg = message)
 27 |   new("TeradataOdbcDriver")
 28 | }
 29 | 
 30 | #' @rdname TeradataOdbcDriver
 31 | #' @export
 32 | setClass("TeradataOdbcDriver", contains = "OdbcDriver")
 33 | 
 34 | #' @rdname TeradataOdbcDriver
 35 | #' @inheritParams methods::show
 36 | #' @export
 37 | setMethod(
 38 |   "show", "TeradataOdbcDriver",
 39 |   function(object) {
 40 |     cat("<TeradataOdbcDriver>\n")
 41 |     # TODO: Print more details
 42 |   })
 43 | 
 44 | #' Connect to a Teradata ODBC compatible database
 45 | #'
 46 | #' @inheritParams DBI::dbConnect
 47 | #' @param dsn The Data Source Name.
 48 | #' @param timezone The Server time zone. Useful if the database has an internal
 49 | #' timezone that is _not_ 'UTC'. If the database is in your local timezone set
 50 | #' to `Sys.timezone()`. See [OlsonNames()] for a complete list of available
 51 | #' timezones on your system.
 52 | #' @param encoding Alias of `charset`.
 53 | #' @param driver The ODBC driver name.
 54 | #' @param server Alias of `DBCName`.
 55 | #' @param DBCName The server hostname.
 56 | #' @param database The database on the server.
 57 | #' @param uid The user identifier.
 58 | #' @param pwd The password to use.
 59 | #' @param charset Character Set. `"ASCII"`(default), `"UTF8"` or `"UTF16"`.
 60 | #' @param tmode TMODE. `"ANSI"`(default) or `"TERA"`.
 61 | #' @param dbms.name The database management system name. This should normally
 62 | #' be queried automatically by the ODBC driver. This name is used as the class
 63 | #' name for the OdbcConnect object returned from  `dbConnect()`. However if the
 64 | #' driver does not return a valid value it can be set manually with this
 65 | #' parameter.
 66 | #' @param ... Additional ODBC keywords, these will be joined with the other
 67 | #' arguments to form the final connection string.
 68 | #' @param .connection_string A complete connection string, useful if you are
 69 | #' copy pasting it from another source. If this argument is used any additional
 70 | #' arguments will be appended to this string.
 71 | #' @param bigint The R type that `SQL_BIGINT` types should be mapped to,
 72 | #' default is [bit64::integer64], which allows the full range of 64 bit
 73 | #' integers.
 74 | #' @details
 75 | #' The connection string keywords are driver dependent. The parameters
 76 | #' documented here are common, but some drivers may not accept them. Please see
 77 | #' the specific driver documentation for allowed parameters,
 78 | #' \url{https://www.connectionstrings.com} is also a useful resource of example
 79 | #' connection strings for a variety of databases.
 80 | #' @aliases dbConnect
 81 | #' @export
 82 | setMethod(
 83 |   "dbConnect", "TeradataOdbcDriver",
 84 |   function(drv,
 85 |            dsn = NULL,
 86 |            ...,
 87 |            timezone = "UTC",
 88 |            encoding = "UTF-8",
 89 |            bigint = c("integer64", "integer", "numeric", "character"),
 90 |            driver = NULL,
 91 |            server = NULL,
 92 |            DBCName = NULL,
 93 |            database = "",
 94 |            uid = NULL,
 95 |            pwd = NULL,
 96 |            charset = "ASCII",
 97 |            tmode = c("ANSI", "TERA"),
 98 |            dbms.name = NULL,
 99 |            .connection_string = NULL) {
100 | 
101 |     # Preprocessing -----------------------------------------------------------
102 |     bigint <- match.arg(bigint)
103 |     tmode <- match.arg(tmode)
104 |     if (!is.null(server)) {
105 |       DBCName <- server
106 |     }
107 |     if (!is.null(dsn)) {
108 |       stop("DSN is unsupported yet.")
109 |     }
110 |     port <- 1025
111 |     if (!is.null(list(...)$port)) {
112 |       port <- list(...)$port
113 |     }
114 | 
115 |     # DB Connection -----------------------------------------------------------
116 |     dbms.name <- "Teradata"
117 |     dbConnectODBC <- getMethod("dbConnect", c("OdbcDriver"))
118 |     observer <- getOption("connectionObserver")
119 |     options(connectionObserver = NULL)
120 |     tryCatch({
121 |       con <- dbConnectODBC(
122 |         drv, timezone = timezone, encoding = encoding, bigint = bigint,
123 |         driver = driver, DBCName = DBCName, database = database,
124 |         uid = uid, pwd = pwd, charset = charset, tmode = tmode, port = port,
125 |         dbms.name = dbms.name,
126 |         .connection_string = .connection_string, ...=...)
127 |       info <- generate_connection_info(
128 |         dbname = database, dbms.name = dbms.name, uid = uid, DBCName = DBCName,
129 |         port = port, driver = driver, info = con@info)
130 |       con@info <- info
131 |       con@quote <- '"'
132 |     }, finally = options(connectionObserver = observer))
133 | 
134 |     if (!is.null(getOption("connectionObserver"))) {
135 |       addTaskCallback(function(expr, ...) {
136 |         tryCatch({
137 |           if (is.call(expr) && identical(expr[[1]], as.symbol("<-"))) {
138 |             connection <- eval(expr[[2]])
139 |             observer <- getOption("connectionObserver")
140 |             observer$connectionOpened(
141 |               type = info$dbms.name,
142 |               displayName = sprintf("%s - %s@%s", info$dbname, info$username, info$servername),
143 |               host = odbc:::computeHostName(connection),
144 |               connectCode = paste(c("library(dplyr.teradata)", deparse(expr)), collapse = "\n"),
145 |               disconnect = function() odbc::dbDisconnect(connection),
146 |               listObjectTypes = function () odbc::odbcListObjectTypes(connection),
147 |               listObjects = function(...) odbc::odbcListObjects(connection, ...),
148 |               listColumns = function(...) odbc::odbcListColumns(connection, ...),
149 |               previewObject = function(rowLimit, ...) odbcPreviewObject(connection, rowLimit, ...),
150 |               actions = odbc::odbcConnectionActions(connection),
151 |               connectionObject = connection
152 |             )
153 |           }
154 |         }, error = function(e) {
155 |           warning("Could not notify connection observer. ", e$message, call. = FALSE)
156 |         })
157 | 
158 |         # always return false so the task callback is run at most once
159 |         FALSE
160 |       })
161 |     } # nocov end
162 | 
163 |     con
164 |   }
165 | )
166 | 
167 | generate_connection_info <- function(dbname, dbms.name, uid, DBCName, port, driver, info) {
168 |   info <- list(
169 |     dbname = dbname,
170 |     dbms.name = dbms.name,
171 |     db.version = "",
172 |     username = uid,
173 |     host = DBCName,
174 |     port = port,
175 |     sourcename = "",
176 |     servername = DBCName,
177 |     drivername = driver,
178 |     odbc.version = info$odbc.version,
179 |     driver.version = "",
180 |     odbcdriver.version = "",
181 |     supports.transactions = info$supports.transactions)
182 |   class(info) <- c(dbms.name, "driver_info", "list")
183 |   info
184 | }
185 | 


--------------------------------------------------------------------------------
/vignettes/dplyr.teradata.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "A Teradata Backend for dplyr"
  3 | author: "Koji Makiyama (@hoxo-m)"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{A Teradata Backend for dplyr}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{UTF-8}
 10 | ---
 11 | 
 12 | ```{r setup, include = FALSE}
 13 | knitr::opts_chunk$set(
 14 |   collapse = TRUE,
 15 |   comment = "#>",
 16 |   fig.path = "README-",
 17 |   eval = FALSE,
 18 |   message = FALSE
 19 | )
 20 | ```
 21 | 
 22 | ## 1. Overview
 23 | 
 24 | The package provides a Teradata backend for **dplyr**. 
 25 | 
 26 | It makes it possible to operate [Teradata Database](https://www.teradata.com/products-and-services/teradata-database/) in the same way as manipulating data frames with **dplyr**.
 27 | 
 28 | ```{r eval=FALSE}
 29 | library(dplyr.teradata)
 30 | 
 31 | # Establish a connection to Teradata
 32 | con <- dbConnect(todbc(), 
 33 |                  driver = "{Teradata Driver}", DBCName = "host_name_or_IP_address",
 34 |                  uid = "user_name", pwd = "*****")
 35 | my_table <- tbl(con, "my_table_name")
 36 | 
 37 | # Build a query
 38 | q <- my_table %>% 
 39 |   filter(between(date, "2017-01-01", "2017-01-03")) %>% 
 40 |   group_by(date) %>%
 41 |   summarise(n = n()) %>%
 42 |   arrange(date)
 43 | 
 44 | show_query(q)
 45 | #> <SQL>
 46 | #> SELECT "date", count(*) AS "n"
 47 | #> FROM "my_table_name"
 48 | #> WHERE ("date" BETWEEN '2017-01-01' AND '2017-01-03')
 49 | #> GROUP BY "date"
 50 | #> ORDER BY "date"
 51 | 
 52 | # Send the query and get its result on R
 53 | df <- q %>% collect
 54 | df
 55 | #> # A tibble: 3 x 2
 56 | #>          date        n
 57 | #>        <date>    <int>
 58 | #>  1 2017-01-01   123456
 59 | #>  2 2017-01-02  7891011
 60 | #>  3 2017-01-03 12131415
 61 | ```
 62 | 
 63 | ## 2. Installation
 64 | 
 65 | You can install the **dplyr.teradata** package from CRAN.
 66 | 
 67 | ```{r eval=FALSE}
 68 | install.packages("dplyr.teradata")
 69 | ```
 70 | 
 71 | You can also install the development version of the package from GitHub.
 72 | 
 73 | ```{r eval=FALSE}
 74 | install.packages("devtools") # if you have not installed "devtools" package
 75 | devtools::install_github("hoxo-m/dplyr.teradata")
 76 | ```
 77 | 
 78 | The source code for **dplyr.teradata** package is available on GitHub at
 79 | 
 80 | - https://github.com/hoxo-m/dplyr.teradata.
 81 | 
 82 | ## 3. Motivation
 83 | 
 84 | The package provides a Teradata backend for **dplyr**. 
 85 | It makes it possible to build SQL for [Teradata Database](https://www.teradata.com/products-and-services/teradata-database/) in the same way as manipulating data frames with the **dplyr** package.
 86 | It also can send the queries and then receive its results on R.
 87 | 
 88 | Therefore, you can complete data analysis with Teradata only on R.
 89 | It means that you are freed from troublesome switching of tools and switching thoughts that cause mistakes.
 90 | 
 91 | ## 4. Usage
 92 | 
 93 | The package uses the **odbc** package to connect database and the **dbplyr** package to build SQL.
 94 | 
 95 | First, you need to establish an ODBC connection to Teradata. See: 
 96 | 
 97 | - [README - **odbc** package](https://CRAN.R-project.org/package=odbc/readme/README.html).
 98 | 
 99 | ```{r}
100 | # Establish a connection to Teradata
101 | con <- dbConnect(odbc(), 
102 |                  driver = "{Teradata Driver}", DBCName = "host_name_or_IP_address",
103 |                  uid = "user_name", pwd = "*****")
104 | ```
105 | 
106 | Second, you need to specify a table to build SQL. See:
107 | 
108 | - [Introduction to dbplyr • dbplyr](https://dbplyr.tidyverse.org/articles/dbplyr.html).
109 | 
110 | To specify a table, you can use `tbl()`:
111 | 
112 | ```{r}
113 | # Getting table
114 | my_table <- tbl(con, "my_table_name")
115 | 
116 | # Getting table in schema
117 | my_table <- tbl(con, in_schema("my_schema", "my_table_name"))
118 | ```
119 | 
120 | Third, you build queries. It can do in the same way as manipulating data frames with **dplyr**:
121 | 
122 | - [A Grammar of Data Manipulation • dplyr](https://dplyr.tidyverse.org/).
123 | 
124 | For example, you can use follows:
125 | 
126 | - `mutate()` adds new *columns* that are functions of existing *columns*.
127 | - `select()` picks *columns* based on their names.
128 | - `filter()` picks *rows* based on their values.
129 | - `summarise()` reduces multiple values down to a single summary.
130 | - `arrange()` changes the ordering of the rows.
131 | 
132 | ```{r}
133 | # Build a query
134 | q <- my_table %>% 
135 |   filter(between(date, "2017-01-01", "2017-01-03")) %>% 
136 |   group_by(date) %>%
137 |   summarise(n = n()) %>%
138 |   arrange(date)
139 | ```
140 | 
141 | `n()` is a function in **dplyr** to return the number of rows in the current group but here it will be translated to `count(*)` as a SQL function.
142 | 
143 | If you want to show built queries, use `show_query()`:
144 | 
145 | ```{r}
146 | show_query(q)
147 | #> <SQL>
148 | #> SELECT "date", count(*) AS "n"
149 | #> FROM "my_table_name"
150 | #> WHERE ("date" BETWEEN '2017-01-01' AND '2017-01-03')
151 | #> GROUP BY "date"
152 | #> ORDER BY "date"
153 | ```
154 | 
155 | Finally, you send built queries and get its results on R using `collect()`.
156 | 
157 | ```{r}
158 | # Send the query and get its result on R
159 | df <- q %>% collect
160 | df
161 | #> # A tibble: 3 x 2
162 | #>          date        n
163 | #>        <date>    <int>
164 | #>  1 2017-01-01   123456
165 | #>  2 2017-01-02  7891011
166 | #>  3 2017-01-03 12131415
167 | ```
168 | 
169 | ## 5. Translatable functions
170 | 
171 | The package mainly use **dbplyr** to translate manipulations into queries.
172 | 
173 | *Translatable functions* are the available functions in manipulations that it can translate into SQL functions.
174 | 
175 | For instance, `n()` is translated to `count(*)` in the above example.
176 | 
177 | To know translatable functions for Teradata, refer the following:
178 | 
179 | - [Adds Teradata translation](https://github.com/tidyverse/dbplyr/pull/43)
180 | 
181 | Here, we introduce the special translatable functions that it becomes available by **dplyr.teradata**.
182 | 
183 | ```{r eval=TRUE, echo=FALSE}
184 | library(dplyr.teradata)
185 | trans <- function(x) {
186 |   translate_sql(!!enquo(x), con = simulate_teradata())
187 | }
188 | ```
189 | 
190 | ### 5.1. Treat Boolean
191 | 
192 | Teradata does not have the boolean data type.
193 | So when you use boolean, you need to write some complex statements.
194 | The package has several functions to treat it briefly.
195 | 
196 | `bool_to_int` transforms boolean to integer.
197 | 
198 | ```{r}
199 | mutate(is_positive = bool_to_int(x > 0L))
200 | ```
201 | 
202 | ```{r echo=FALSE, eval=TRUE}
203 | trans(bool_to_int(x > 0L))
204 | ```
205 | 
206 | `count_if()` or `n_if()` counts a number of rows satisfying a condition.
207 | 
208 | ```{r}
209 | summarize(n = count_if(x > 0L))
210 | ```
211 | 
212 | ```{r echo=FALSE, eval=TRUE}
213 | trans(count_if(x > 0L))
214 | ```
215 | 
216 | ### 5.2. `to_timestamp()`
217 | 
218 | When your tables has some columns stored UNIX time and you want to convert it to timestamp, you need to write complex SQL.
219 | 
220 | `to_timestamp()` is a translatable function that makes it easy.
221 | 
222 | ```{r}
223 | mutate(ts = to_timestamp(unixtime_column))
224 | ```
225 | 
226 | Such as above manipulation is translated into SQL like following:
227 | 
228 | ```{r echo=FALSE, eval=TRUE}
229 | trans(to_timestamp(unixtime_column))
230 | ```
231 | 
232 | ### 5.3. `cut()`
233 | 
234 | `cut()` is very useful function that you can use in base R.
235 | 
236 | For example, you want to cut values of `x` into three parts of ranges by break points 2 and 4:
237 | 
238 | ```{r eval=TRUE}
239 | x <- 1:6
240 | breaks <- c(0, 2, 4, 6)
241 | cut(x, breaks)
242 | ```
243 | 
244 | **dplyr.teradata** has a translatable function similar to this:
245 | 
246 | ```{r}
247 | breaks = c(0, 2, 4, 6)
248 | mutate(y = cut(x, breaks))
249 | ```
250 | 
251 | In the result, it is translated to a `CASE WHEN` statement as follows:
252 | 
253 | ```{r echo=FALSE, eval=TRUE}
254 | trans(cut(x, c(0, 2, 4, 6)))
255 | ```
256 | 
257 | Arguments of base `cut()` are also available:
258 | 
259 | ```{r}
260 | breaks = c(0, 2, 4, 6)
261 | mutate(y = cut(x, breaks, labels = "-", include.lowest = TRUE))
262 | ```
263 | 
264 | ```{r echo=FALSE, eval=TRUE}
265 | trans(cut(x, c(0, 2, 4, 6), labels = "-", include.lowest = TRUE))
266 | ```
267 | 
268 | ## 6. Other useful functions
269 | 
270 | ### 6.1. `blob_to_string()`
271 | 
272 | The `blob` object from databases sometimes prevents manipulations with **dplyr**.
273 | 
274 | You might want to convert them to string.
275 | 
276 | `blob_to_string()` is a function to make it easy:
277 | 
278 | ```{r eval=TRUE}
279 | x <- blob::as_blob("Good morning")
280 | x
281 | 
282 | # print raw data in blob
283 | x[[1]]
284 | 
285 | blob_to_string(x)
286 | ```
287 | 
288 | ## 7. Related work
289 | 
290 | - [A 'dplyr' Backend for Databases • dbplyr](https://dbplyr.tidyverse.org/)
291 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: A Teradata Backend for dplyr
  3 | output: github_document
  4 | ---
  5 | 
  6 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  7 | 
  8 | #### *Koji Makiyama (@hoxo-m)*
  9 | 
 10 | ```{r setup, include=FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>",
 14 |   fig.path = "man/figures/README-",
 15 |   message = FALSE,
 16 |   eval = FALSE
 17 | )
 18 | ```
 19 | 
 20 | <!-- badges: start -->
 21 | [![Travis-CI Build Status](https://travis-ci.org/hoxo-m/dplyr.teradata.svg?branch=master)](https://travis-ci.org/hoxo-m/dplyr.teradata)
 22 | [![CRAN Version](https://www.r-pkg.org/badges/version-ago/dplyr.teradata)](https://cran.r-project.org/package=dplyr.teradata)
 23 | [![](https://cranlogs.r-pkg.org/badges/dplyr.teradata)](https://cran.r-project.org/package=dplyr.teradata)
 24 | [![Coverage Status](https://img.shields.io/coveralls/hoxo-m/dplyr.teradata.svg)](https://coveralls.io/github/hoxo-m/dplyr.teradata)
 25 | <!-- badges: end -->
 26 | 
 27 | ## 1. Overview
 28 | 
 29 | The package provides a Teradata backend for **dplyr**. 
 30 | 
 31 | It makes it possible to operate [Teradata Database](https://www.teradata.com/products-and-services/teradata-database/) in the same way as manipulating data frames with **dplyr**.
 32 | 
 33 | ```{r eval=FALSE}
 34 | library(dplyr.teradata)
 35 | 
 36 | # Establish a connection to Teradata
 37 | con <- dbConnect(odbc(), 
 38 |                  driver = "{Teradata Driver}", DBCName = "host_name_or_IP_address",
 39 |                  uid = "user_name", pwd = "*****")
 40 | my_table <- tbl(con, "my_table_name")
 41 | 
 42 | # Build a query
 43 | q <- my_table %>% 
 44 |   filter(between(date, "2017-01-01", "2017-01-03")) %>% 
 45 |   group_by(date) %>%
 46 |   summarise(n = n()) %>%
 47 |   arrange(date)
 48 | 
 49 | show_query(q)
 50 | #> <SQL>
 51 | #> SELECT "date", count(*) AS "n"
 52 | #> FROM "my_table_name"
 53 | #> WHERE ("date" BETWEEN '2017-01-01' AND '2017-01-03')
 54 | #> GROUP BY "date"
 55 | #> ORDER BY "date"
 56 | 
 57 | # Send the query and get its result on R
 58 | df <- q %>% collect
 59 | df
 60 | #> # A tibble: 3 x 2
 61 | #>          date        n
 62 | #>        <date>    <int>
 63 | #>  1 2017-01-01   123456
 64 | #>  2 2017-01-02  7891011
 65 | #>  3 2017-01-03 12131415
 66 | ```
 67 | 
 68 | ## 2. Installation
 69 | 
 70 | You can install the **dplyr.teradata** package from CRAN.
 71 | 
 72 | ```{r eval=FALSE}
 73 | install.packages("dplyr.teradata")
 74 | ```
 75 | 
 76 | You can also install the development version of the package from GitHub.
 77 | 
 78 | ```{r eval=FALSE}
 79 | install.packages("remotes") # if you have not installed "remotes" package
 80 | remotes::install_github("hoxo-m/dplyr.teradata")
 81 | ```
 82 | 
 83 | The source code for **dplyr.teradata** package is available on GitHub at
 84 | 
 85 | - https://github.com/hoxo-m/dplyr.teradata.
 86 | 
 87 | ## 3. Motivation
 88 | 
 89 | The package provides a Teradata backend for **dplyr**. 
 90 | It makes it possible to build SQL for [Teradata Database](https://www.teradata.com/products-and-services/teradata-database/) in the same way as manipulating data frames with the **dplyr** package.
 91 | It also can send the queries and then receive its results on R.
 92 | 
 93 | Therefore, you can complete data analysis with Teradata only on R.
 94 | It means that you are freed from troublesome switching of tools and switching thoughts that cause mistakes.
 95 | 
 96 | ## 4. Usage
 97 | 
 98 | The package uses the **odbc** package to connect database and the **dbplyr** package to build SQL.
 99 | 
100 | First, you need to establish an ODBC connection to Teradata. See: 
101 | 
102 | - [README - **odbc** package](https://CRAN.R-project.org/package=odbc/readme/README.html).
103 | 
104 | ```{r}
105 | # Establish a connection to Teradata
106 | con <- dbConnect(odbc(), 
107 |                  driver = "{Teradata Driver}", DBCName = "host_name_or_IP_address",
108 |                  uid = "user_name", pwd = "*****")
109 | ```
110 | 
111 | Second, you need to specify a table to build SQL. See:
112 | 
113 | - [Introduction to dbplyr • dbplyr](https://dbplyr.tidyverse.org/articles/dbplyr.html).
114 | 
115 | To specify a table, you can use `tbl()`:
116 | 
117 | ```{r}
118 | # Getting table
119 | my_table <- tbl(con, "my_table_name")
120 | 
121 | # Getting table in schema
122 | my_table <- tbl(con, in_schema("my_schema", "my_table_name"))
123 | ```
124 | 
125 | Third, you build queries. It can do in the same way as manipulating data frames with **dplyr**:
126 | 
127 | - [A Grammar of Data Manipulation • dplyr](https://dplyr.tidyverse.org/).
128 | 
129 | For example, you can use follows:
130 | 
131 | - `mutate()` adds new *columns* that are functions of existing *columns*.
132 | - `select()` picks *columns* based on their names.
133 | - `filter()` picks *rows* based on their values.
134 | - `summarise()` reduces multiple values down to a single summary.
135 | - `arrange()` changes the ordering of the rows.
136 | 
137 | ```{r}
138 | # Build a query
139 | q <- my_table %>% 
140 |   filter(between(date, "2017-01-01", "2017-01-03")) %>% 
141 |   group_by(date) %>%
142 |   summarise(n = n()) %>%
143 |   arrange(date)
144 | ```
145 | 
146 | `n()` is a function in **dplyr** to return the number of rows in the current group but here it will be translated to `count(*)` as a SQL function.
147 | 
148 | If you want to show built queries, use `show_query()`:
149 | 
150 | ```{r}
151 | show_query(q)
152 | #> <SQL>
153 | #> SELECT "date", count(*) AS "n"
154 | #> FROM "my_table_name"
155 | #> WHERE ("date" BETWEEN '2017-01-01' AND '2017-01-03')
156 | #> GROUP BY "date"
157 | #> ORDER BY "date"
158 | ```
159 | 
160 | Finally, you send built queries and get its results on R using `collect()`.
161 | 
162 | ```{r}
163 | # Send the query and get its result on R
164 | df <- q %>% collect
165 | df
166 | #> # A tibble: 3 x 2
167 | #>          date        n
168 | #>        <date>    <int>
169 | #>  1 2017-01-01   123456
170 | #>  2 2017-01-02  7891011
171 | #>  3 2017-01-03 12131415
172 | ```
173 | 
174 | ## 5. Translatable functions
175 | 
176 | The package mainly use **dbplyr** to translate manipulations into queries.
177 | 
178 | *Translatable functions* are the available functions in manipulations that it can translate into SQL functions.
179 | 
180 | For instance, `n()` is translated to `count(*)` in the above example.
181 | 
182 | To know translatable functions for Teradata, refer the following:
183 | 
184 | - [Adds Teradata translation](https://github.com/tidyverse/dbplyr/pull/43)
185 | 
186 | Here, we introduce the special translatable functions that it becomes available by **dplyr.teradata**.
187 | 
188 | ```{r eval=TRUE, echo=FALSE}
189 | library(dplyr.teradata)
190 | trans <- function(x) {
191 |   translate_sql(!!enquo(x), con = simulate_teradata())
192 | }
193 | ```
194 | 
195 | ### 5.1. Treat Boolean
196 | 
197 | Teradata does not have the boolean data type.
198 | So when you use boolean, you need to write some complex statements.
199 | The package has several functions to treat it briefly.
200 | 
201 | `bool_to_int` transforms boolean to integer.
202 | 
203 | ```{r}
204 | mutate(is_positive = bool_to_int(x > 0L))
205 | ```
206 | 
207 | ```{r echo=FALSE, eval=TRUE}
208 | trans(bool_to_int(x > 0L))
209 | ```
210 | 
211 | `count_if()` or `n_if()` counts a number of rows satisfying a condition.
212 | 
213 | ```{r}
214 | summarize(n = count_if(x > 0L))
215 | ```
216 | 
217 | ```{r echo=FALSE, eval=TRUE}
218 | trans(count_if(x > 0L))
219 | ```
220 | 
221 | ### 5.2. `to_timestamp()`
222 | 
223 | When your tables has some columns stored UNIX time and you want to convert it to timestamp, you need to write complex SQL.
224 | 
225 | `to_timestamp()` is a translatable function that makes it easy.
226 | 
227 | ```{r}
228 | mutate(ts = to_timestamp(unixtime_column))
229 | ```
230 | 
231 | Such as above manipulation is translated into SQL like following:
232 | 
233 | ```{r echo=FALSE, eval=TRUE}
234 | trans(to_timestamp(unixtime_column))
235 | ```
236 | 
237 | ### 5.3. `cut()`
238 | 
239 | `cut()` is very useful function that you can use in base R.
240 | 
241 | For example, you want to cut values of `x` into three parts of ranges by break points 2 and 4:
242 | 
243 | ```{r eval=TRUE}
244 | x <- 1:6
245 | breaks <- c(0, 2, 4, 6)
246 | cut(x, breaks)
247 | ```
248 | 
249 | **dplyr.teradata** has a translatable function similar to this:
250 | 
251 | ```{r}
252 | breaks = c(0, 2, 4, 6)
253 | mutate(y = cut(x, breaks))
254 | ```
255 | 
256 | In the result, it is translated to a `CASE WHEN` statement as follows:
257 | 
258 | ```{r echo=FALSE, eval=TRUE}
259 | trans(cut(x, c(0, 2, 4, 6)))
260 | ```
261 | 
262 | Arguments of base `cut()` are also available:
263 | 
264 | ```{r}
265 | breaks = c(0, 2, 4, 6)
266 | mutate(y = cut(x, breaks, labels = "-", include.lowest = TRUE))
267 | ```
268 | 
269 | ```{r echo=FALSE, eval=TRUE}
270 | trans(cut(x, c(0, 2, 4, 6), labels = "-", include.lowest = TRUE))
271 | ```
272 | 
273 | ## 6. Miscellaneous
274 | 
275 | ### 6.1. Sampling Data
276 | 
277 | Teradata supports sampling rows from tables:
278 | 
279 | - [SAMPLE Clause](https://docs.teradata.com/r/b8dd8xEYJnxfsq4uFRrHQQ/hyfVBxhz8aKKK1HS77zXnA),
280 | 
281 | and **dplyr** has the same purpose verb `slice_sample()`. 
282 | The package makes them work well.
283 | 
284 | For example, by the number of rows:
285 | 
286 | ```{r}
287 | q <- my_table %>% slice_sample(n = 100L)
288 | 
289 | show_query(q)
290 | #> <SQL>
291 | #> SELECT *
292 | #> FROM "my_table_name"
293 | #> SAMPLE RANDOMIZED ALLOCATION 100
294 | ```
295 | 
296 | or by the proportion of rows:
297 | 
298 | ```{r}
299 | q <- my_table %>% slice_sample(prop = 0.1)
300 | 
301 | show_query(q)
302 | #> <SQL>
303 | #> SELECT *
304 | #> FROM "my_table_name"
305 | #> SAMPLE RANDOMIZED ALLOCATION 0.1
306 | ```
307 | 
308 | It also supports sampling with replacement:
309 | 
310 | ```{r}
311 | q <- my_table %>% slice_sample(n = 100L, replace = TRUE)
312 | 
313 | show_query(q)
314 | #> <SQL>
315 | #> SELECT *
316 | #> FROM "my_table_name"
317 | #> SAMPLE WITH REPLACEMENT RANDOMIZED ALLOCATION 100
318 | ```
319 | 
320 | and supports a random sample stratified by AMPs (it is much faster, especially for very large samples):
321 | 
322 | ```{r}
323 | q <- my_table %>% slice_sample(n = 100L, randomized_allocation = FALSE)
324 | 
325 | show_query(q)
326 | #> <SQL>
327 | #> SELECT *
328 | #> FROM "my_table_name"
329 | #> SAMPLE 100
330 | ```
331 | 
332 | The package currently supports the verbs old versions.
333 | 
334 | ```{r}
335 | # By the number of rows
336 | q <- my_table %>% sample_n(100L)
337 | # By the proportion of rows
338 | q <- my_table %>% sample_frac(0.1)
339 | ```
340 | 
341 | ### 6.2. `blob_to_string()`
342 | 
343 | The `blob` objects from databases sometimes prevents manipulations with **dplyr**.
344 | 
345 | You might want to convert them to string.
346 | 
347 | `blob_to_string()` is a function to make it easy:
348 | 
349 | ```{r eval=TRUE}
350 | x <- blob::as_blob("Good morning")
351 | x
352 | 
353 | # print raw data in blob
354 | x[[1]]
355 | 
356 | blob_to_string(x)
357 | ```
358 | 
359 | ## 7. Related work
360 | 
361 | - [A 'dplyr' Backend for Databases • dbplyr](https://dbplyr.tidyverse.org/)
362 | - [Teradata Vantage™ SQL Data Manipulation Language](https://docs.teradata.com/r/b8dd8xEYJnxfsq4uFRrHQQ/7CvqIbRdnlAZNyaVKhlZTg)
363 | - [tdplyr - Teradata R Package](https://github.com/Teradata/tdplyr)
364 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | A Teradata Backend for dplyr
  2 | ================
  3 | 
  4 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  5 | 
  6 | #### *Koji Makiyama (@hoxo-m)*
  7 | 
  8 | <!-- badges: start -->
  9 | 
 10 | [![Travis-CI Build
 11 | Status](https://travis-ci.org/hoxo-m/dplyr.teradata.svg?branch=master)](https://travis-ci.org/hoxo-m/dplyr.teradata)
 12 | [![CRAN
 13 | Version](https://www.r-pkg.org/badges/version-ago/dplyr.teradata)](https://cran.r-project.org/package=dplyr.teradata)
 14 | [![](https://cranlogs.r-pkg.org/badges/dplyr.teradata)](https://cran.r-project.org/package=dplyr.teradata)
 15 | [![Coverage
 16 | Status](https://img.shields.io/coveralls/hoxo-m/dplyr.teradata.svg)](https://coveralls.io/github/hoxo-m/dplyr.teradata)
 17 | <!-- badges: end -->
 18 | 
 19 | ## 1. Overview
 20 | 
 21 | The package provides a Teradata backend for **dplyr**.
 22 | 
 23 | It makes it possible to operate [Teradata
 24 | Database](https://www.teradata.com/products-and-services/teradata-database/)
 25 | in the same way as manipulating data frames with **dplyr**.
 26 | 
 27 | ``` r
 28 | library(dplyr.teradata)
 29 | 
 30 | # Establish a connection to Teradata
 31 | con <- dbConnect(odbc(), 
 32 |                  driver = "{Teradata Driver}", DBCName = "host_name_or_IP_address",
 33 |                  uid = "user_name", pwd = "*****")
 34 | my_table <- tbl(con, "my_table_name")
 35 | 
 36 | # Build a query
 37 | q <- my_table %>% 
 38 |   filter(between(date, "2017-01-01", "2017-01-03")) %>% 
 39 |   group_by(date) %>%
 40 |   summarise(n = n()) %>%
 41 |   arrange(date)
 42 | 
 43 | show_query(q)
 44 | #> <SQL>
 45 | #> SELECT "date", count(*) AS "n"
 46 | #> FROM "my_table_name"
 47 | #> WHERE ("date" BETWEEN '2017-01-01' AND '2017-01-03')
 48 | #> GROUP BY "date"
 49 | #> ORDER BY "date"
 50 | 
 51 | # Send the query and get its result on R
 52 | df <- q %>% collect
 53 | df
 54 | #> # A tibble: 3 x 2
 55 | #>          date        n
 56 | #>        <date>    <int>
 57 | #>  1 2017-01-01   123456
 58 | #>  2 2017-01-02  7891011
 59 | #>  3 2017-01-03 12131415
 60 | ```
 61 | 
 62 | ## 2. Installation
 63 | 
 64 | You can install the **dplyr.teradata** package from CRAN.
 65 | 
 66 | ``` r
 67 | install.packages("dplyr.teradata")
 68 | ```
 69 | 
 70 | You can also install the development version of the package from GitHub.
 71 | 
 72 | ``` r
 73 | install.packages("remotes") # if you have not installed "remotes" package
 74 | remotes::install_github("hoxo-m/dplyr.teradata")
 75 | ```
 76 | 
 77 | The source code for **dplyr.teradata** package is available on GitHub at
 78 | 
 79 | -   <https://github.com/hoxo-m/dplyr.teradata>.
 80 | 
 81 | ## 3. Motivation
 82 | 
 83 | The package provides a Teradata backend for **dplyr**. It makes it
 84 | possible to build SQL for [Teradata
 85 | Database](https://www.teradata.com/products-and-services/teradata-database/)
 86 | in the same way as manipulating data frames with the **dplyr** package.
 87 | It also can send the queries and then receive its results on R.
 88 | 
 89 | Therefore, you can complete data analysis with Teradata only on R. It
 90 | means that you are freed from troublesome switching of tools and
 91 | switching thoughts that cause mistakes.
 92 | 
 93 | ## 4. Usage
 94 | 
 95 | The package uses the **odbc** package to connect database and the
 96 | **dbplyr** package to build SQL.
 97 | 
 98 | First, you need to establish an ODBC connection to Teradata. See:
 99 | 
100 | -   [README - **odbc**
101 |     package](https://CRAN.R-project.org/package=odbc/readme/README.html).
102 | 
103 | ``` r
104 | # Establish a connection to Teradata
105 | con <- dbConnect(odbc(), 
106 |                  driver = "{Teradata Driver}", DBCName = "host_name_or_IP_address",
107 |                  uid = "user_name", pwd = "*****")
108 | ```
109 | 
110 | Second, you need to specify a table to build SQL. See:
111 | 
112 | -   [Introduction to dbplyr •
113 |     dbplyr](https://dbplyr.tidyverse.org/articles/dbplyr.html).
114 | 
115 | To specify a table, you can use `tbl()`:
116 | 
117 | ``` r
118 | # Getting table
119 | my_table <- tbl(con, "my_table_name")
120 | 
121 | # Getting table in schema
122 | my_table <- tbl(con, in_schema("my_schema", "my_table_name"))
123 | ```
124 | 
125 | Third, you build queries. It can do in the same way as manipulating data
126 | frames with **dplyr**:
127 | 
128 | -   [A Grammar of Data Manipulation •
129 |     dplyr](https://dplyr.tidyverse.org/).
130 | 
131 | For example, you can use follows:
132 | 
133 | -   `mutate()` adds new *columns* that are functions of existing
134 |     *columns*.
135 | -   `select()` picks *columns* based on their names.
136 | -   `filter()` picks *rows* based on their values.
137 | -   `summarise()` reduces multiple values down to a single summary.
138 | -   `arrange()` changes the ordering of the rows.
139 | 
140 | ``` r
141 | # Build a query
142 | q <- my_table %>% 
143 |   filter(between(date, "2017-01-01", "2017-01-03")) %>% 
144 |   group_by(date) %>%
145 |   summarise(n = n()) %>%
146 |   arrange(date)
147 | ```
148 | 
149 | `n()` is a function in **dplyr** to return the number of rows in the
150 | current group but here it will be translated to `count(*)` as a SQL
151 | function.
152 | 
153 | If you want to show built queries, use `show_query()`:
154 | 
155 | ``` r
156 | show_query(q)
157 | #> <SQL>
158 | #> SELECT "date", count(*) AS "n"
159 | #> FROM "my_table_name"
160 | #> WHERE ("date" BETWEEN '2017-01-01' AND '2017-01-03')
161 | #> GROUP BY "date"
162 | #> ORDER BY "date"
163 | ```
164 | 
165 | Finally, you send built queries and get its results on R using
166 | `collect()`.
167 | 
168 | ``` r
169 | # Send the query and get its result on R
170 | df <- q %>% collect
171 | df
172 | #> # A tibble: 3 x 2
173 | #>          date        n
174 | #>        <date>    <int>
175 | #>  1 2017-01-01   123456
176 | #>  2 2017-01-02  7891011
177 | #>  3 2017-01-03 12131415
178 | ```
179 | 
180 | ## 5. Translatable functions
181 | 
182 | The package mainly use **dbplyr** to translate manipulations into
183 | queries.
184 | 
185 | *Translatable functions* are the available functions in manipulations
186 | that it can translate into SQL functions.
187 | 
188 | For instance, `n()` is translated to `count(*)` in the above example.
189 | 
190 | To know translatable functions for Teradata, refer the following:
191 | 
192 | -   [Adds Teradata
193 |     translation](https://github.com/tidyverse/dbplyr/pull/43)
194 | 
195 | Here, we introduce the special translatable functions that it becomes
196 | available by **dplyr.teradata**.
197 | 
198 | ### 5.1. Treat Boolean
199 | 
200 | Teradata does not have the boolean data type. So when you use boolean,
201 | you need to write some complex statements. The package has several
202 | functions to treat it briefly.
203 | 
204 | `bool_to_int` transforms boolean to integer.
205 | 
206 | ``` r
207 | mutate(is_positive = bool_to_int(x > 0L))
208 | ```
209 | 
210 |     #> <SQL> CASE WHEN (`x` > 0) THEN 1 WHEN NOT(`x` > 0) THEN 0 END
211 | 
212 | `count_if()` or `n_if()` counts a number of rows satisfying a condition.
213 | 
214 | ``` r
215 | summarize(n = count_if(x > 0L))
216 | ```
217 | 
218 |     #> <SQL> SUM(CASE WHEN (`x` > 0) THEN 1 WHEN NOT(`x` > 0) THEN 0 END)
219 | 
220 | ### 5.2. `to_timestamp()`
221 | 
222 | When your tables has some columns stored UNIX time and you want to
223 | convert it to timestamp, you need to write complex SQL.
224 | 
225 | `to_timestamp()` is a translatable function that makes it easy.
226 | 
227 | ``` r
228 | mutate(ts = to_timestamp(unixtime_column))
229 | ```
230 | 
231 | Such as above manipulation is translated into SQL like following:
232 | 
233 |     #> <SQL> CAST(DATE '1970-01-01' + (`unixtime_column` / 86400) AS TIMESTAMP(0)) + (`unixtime_column` MOD 86400) * (INTERVAL '00:00:01' HOUR TO SECOND)
234 | 
235 | ### 5.3. `cut()`
236 | 
237 | `cut()` is very useful function that you can use in base R.
238 | 
239 | For example, you want to cut values of `x` into three parts of ranges by
240 | break points 2 and 4:
241 | 
242 | ``` r
243 | x <- 1:6
244 | breaks <- c(0, 2, 4, 6)
245 | cut(x, breaks)
246 | #> [1] (0,2] (0,2] (2,4] (2,4] (4,6] (4,6]
247 | #> Levels: (0,2] (2,4] (4,6]
248 | ```
249 | 
250 | **dplyr.teradata** has a translatable function similar to this:
251 | 
252 | ``` r
253 | breaks = c(0, 2, 4, 6)
254 | mutate(y = cut(x, breaks))
255 | ```
256 | 
257 | In the result, it is translated to a `CASE WHEN` statement as follows:
258 | 
259 |     #> <SQL> CASE
260 |     #>  WHEN x > 0 AND x <= 2 THEN '(0,2]'
261 |     #>  WHEN x > 2 AND x <= 4 THEN '(2,4]'
262 |     #>  WHEN x > 4 AND x <= 6 THEN '(4,6]'
263 |     #>  ELSE NULL
264 |     #> END
265 | 
266 | Arguments of base `cut()` are also available:
267 | 
268 | ``` r
269 | breaks = c(0, 2, 4, 6)
270 | mutate(y = cut(x, breaks, labels = "-", include.lowest = TRUE))
271 | ```
272 | 
273 |     #> <SQL> CASE
274 |     #>  WHEN x >= 0 AND x <= 2 THEN '0-2'
275 |     #>  WHEN x > 2 AND x <= 4 THEN '3-4'
276 |     #>  WHEN x > 4 AND x <= 6 THEN '5-6'
277 |     #>  ELSE NULL
278 |     #> END
279 | 
280 | ## 6. Miscellaneous
281 | 
282 | ### 6.1. Sampling Data
283 | 
284 | Teradata supports sampling rows from tables:
285 | 
286 | -   [SAMPLE
287 |     Clause](https://docs.teradata.com/r/b8dd8xEYJnxfsq4uFRrHQQ/hyfVBxhz8aKKK1HS77zXnA),
288 | 
289 | and **dplyr** has the same purpose verb `slice_sample()`. The package
290 | makes them work well.
291 | 
292 | For example, by the number of rows:
293 | 
294 | ``` r
295 | q <- my_table %>% slice_sample(n = 100L)
296 | 
297 | show_query(q)
298 | #> <SQL>
299 | #> SELECT *
300 | #> FROM "my_table_name"
301 | #> SAMPLE RANDOMIZED ALLOCATION 100
302 | ```
303 | 
304 | or by the proportion of rows:
305 | 
306 | ``` r
307 | q <- my_table %>% slice_sample(prop = 0.1)
308 | 
309 | show_query(q)
310 | #> <SQL>
311 | #> SELECT *
312 | #> FROM "my_table_name"
313 | #> SAMPLE RANDOMIZED ALLOCATION 0.1
314 | ```
315 | 
316 | It also supports sampling with replacement:
317 | 
318 | ``` r
319 | q <- my_table %>% slice_sample(n = 100L, replace = TRUE)
320 | 
321 | show_query(q)
322 | #> <SQL>
323 | #> SELECT *
324 | #> FROM "my_table_name"
325 | #> SAMPLE WITH REPLACEMENT RANDOMIZED ALLOCATION 100
326 | ```
327 | 
328 | and supports a random sample stratified by AMPs (it is much faster,
329 | especially for very large samples):
330 | 
331 | ``` r
332 | q <- my_table %>% slice_sample(n = 100L, randomized_allocation = FALSE)
333 | 
334 | show_query(q)
335 | #> <SQL>
336 | #> SELECT *
337 | #> FROM "my_table_name"
338 | #> SAMPLE 100
339 | ```
340 | 
341 | The package currently supports the verbs old versions.
342 | 
343 | ``` r
344 | # By the number of rows
345 | q <- my_table %>% sample_n(100L)
346 | # By the proportion of rows
347 | q <- my_table %>% sample_frac(0.1)
348 | ```
349 | 
350 | ### 6.2. `blob_to_string()`
351 | 
352 | The `blob` objects from databases sometimes prevents manipulations with
353 | **dplyr**.
354 | 
355 | You might want to convert them to string.
356 | 
357 | `blob_to_string()` is a function to make it easy:
358 | 
359 | ``` r
360 | x <- blob::as_blob("Good morning")
361 | x
362 | #> <blob[1]>
363 | #> [1] blob[12 B]
364 | 
365 | # print raw data in blob
366 | x[[1]]
367 | #>  [1] 47 6f 6f 64 20 6d 6f 72 6e 69 6e 67
368 | 
369 | blob_to_string(x)
370 | #> [1] "476f6f64206d6f726e696e67"
371 | ```
372 | 
373 | ## 7. Related work
374 | 
375 | -   [A ‘dplyr’ Backend for Databases •
376 |     dbplyr](https://dbplyr.tidyverse.org/)
377 | -   [Teradata Vantage™ SQL Data Manipulation
378 |     Language](https://docs.teradata.com/r/b8dd8xEYJnxfsq4uFRrHQQ/7CvqIbRdnlAZNyaVKhlZTg)
379 | -   [tdplyr - Teradata R Package](https://github.com/Teradata/tdplyr)
380 | 


--------------------------------------------------------------------------------
/vignettes/dplyr.teradata.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | 
  3 | <html>
  4 | 
  5 | <head>
  6 | 
  7 | <meta charset="utf-8" />
  8 | <meta name="generator" content="pandoc" />
  9 | <meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
 10 | 
 11 | <meta name="viewport" content="width=device-width, initial-scale=1" />
 12 | 
 13 | <meta name="author" content="Koji Makiyama (@hoxo-m)" />
 14 | 
 15 | <meta name="date" content="2020-11-12" />
 16 | 
 17 | <title>A Teradata Backend for dplyr</title>
 18 | 
 19 | <script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 20 | // be compatible with the behavior of Pandoc < 2.8).
 21 | document.addEventListener('DOMContentLoaded', function(e) {
 22 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 23 |   var i, h, a;
 24 |   for (i = 0; i < hs.length; i++) {
 25 |     h = hs[i];
 26 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 27 |     a = h.attributes;
 28 |     while (a.length > 0) h.removeAttribute(a[0].name);
 29 |   }
 30 | });
 31 | </script>
 32 | <style type="text/css">
 33 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;}
 34 | a.anchor-section::before {content: '#';}
 35 | .hasAnchor:hover a.anchor-section {visibility: visible;}
 36 | </style>
 37 | <script>// Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020.
 38 | document.addEventListener('DOMContentLoaded', function() {
 39 |   // Do nothing if AnchorJS is used
 40 |   if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) {
 41 |     return;
 42 |   }
 43 | 
 44 |   const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6');
 45 | 
 46 |   // Do nothing if sections are already anchored
 47 |   if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) {
 48 |     return null;
 49 |   }
 50 | 
 51 |   // Use section id when pandoc runs with --section-divs
 52 |   const section_id = function(x) {
 53 |     return ((x.classList.contains('section') || (x.tagName === 'SECTION'))
 54 |             ? x.id : '');
 55 |   };
 56 | 
 57 |   // Add anchors
 58 |   h.forEach(function(x) {
 59 |     const id = x.id || section_id(x.parentElement);
 60 |     if (id === '') {
 61 |       return null;
 62 |     }
 63 |     let anchor = document.createElement('a');
 64 |     anchor.href = '#' + id;
 65 |     anchor.classList = ['anchor-section'];
 66 |     x.classList.add('hasAnchor');
 67 |     x.appendChild(anchor);
 68 |   });
 69 | });
 70 | </script>
 71 | 
 72 | <style type="text/css">
 73 |   code{white-space: pre-wrap;}
 74 |   span.smallcaps{font-variant: small-caps;}
 75 |   span.underline{text-decoration: underline;}
 76 |   div.column{display: inline-block; vertical-align: top; width: 50%;}
 77 |   div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
 78 |   ul.task-list{list-style: none;}
 79 |     </style>
 80 | 
 81 | 
 82 | <style type="text/css">code{white-space: pre;}</style>
 83 | <style type="text/css" data-origin="pandoc">
 84 | pre > code.sourceCode { white-space: pre; position: relative; }
 85 | pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
 86 | pre > code.sourceCode > span:empty { height: 1.2em; }
 87 | code.sourceCode > span { color: inherit; text-decoration: inherit; }
 88 | div.sourceCode { margin: 1em 0; }
 89 | pre.sourceCode { margin: 0; }
 90 | @media screen {
 91 | div.sourceCode { overflow: auto; }
 92 | }
 93 | @media print {
 94 | pre > code.sourceCode { white-space: pre-wrap; }
 95 | pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
 96 | }
 97 | pre.numberSource code
 98 |   { counter-reset: source-line 0; }
 99 | pre.numberSource code > span
100 |   { position: relative; left: -4em; counter-increment: source-line; }
101 | pre.numberSource code > span > a:first-child::before
102 |   { content: counter(source-line);
103 |     position: relative; left: -1em; text-align: right; vertical-align: baseline;
104 |     border: none; display: inline-block;
105 |     -webkit-touch-callout: none; -webkit-user-select: none;
106 |     -khtml-user-select: none; -moz-user-select: none;
107 |     -ms-user-select: none; user-select: none;
108 |     padding: 0 4px; width: 4em;
109 |     color: #aaaaaa;
110 |   }
111 | pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
112 | div.sourceCode
113 |   {   }
114 | @media screen {
115 | pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
116 | }
117 | code span.al { color: #ff0000; font-weight: bold; } /* Alert */
118 | code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
119 | code span.at { color: #7d9029; } /* Attribute */
120 | code span.bn { color: #40a070; } /* BaseN */
121 | code span.bu { } /* BuiltIn */
122 | code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
123 | code span.ch { color: #4070a0; } /* Char */
124 | code span.cn { color: #880000; } /* Constant */
125 | code span.co { color: #60a0b0; font-style: italic; } /* Comment */
126 | code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
127 | code span.do { color: #ba2121; font-style: italic; } /* Documentation */
128 | code span.dt { color: #902000; } /* DataType */
129 | code span.dv { color: #40a070; } /* DecVal */
130 | code span.er { color: #ff0000; font-weight: bold; } /* Error */
131 | code span.ex { } /* Extension */
132 | code span.fl { color: #40a070; } /* Float */
133 | code span.fu { color: #06287e; } /* Function */
134 | code span.im { } /* Import */
135 | code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
136 | code span.kw { color: #007020; font-weight: bold; } /* Keyword */
137 | code span.op { color: #666666; } /* Operator */
138 | code span.ot { color: #007020; } /* Other */
139 | code span.pp { color: #bc7a00; } /* Preprocessor */
140 | code span.sc { color: #4070a0; } /* SpecialChar */
141 | code span.ss { color: #bb6688; } /* SpecialString */
142 | code span.st { color: #4070a0; } /* String */
143 | code span.va { color: #19177c; } /* Variable */
144 | code span.vs { color: #4070a0; } /* VerbatimString */
145 | code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
146 | 
147 | </style>
148 | <script>
149 | // apply pandoc div.sourceCode style to pre.sourceCode instead
150 | (function() {
151 |   var sheets = document.styleSheets;
152 |   for (var i = 0; i < sheets.length; i++) {
153 |     if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
154 |     try { var rules = sheets[i].cssRules; } catch (e) { continue; }
155 |     for (var j = 0; j < rules.length; j++) {
156 |       var rule = rules[j];
157 |       // check if there is a div.sourceCode rule
158 |       if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") continue;
159 |       var style = rule.style.cssText;
160 |       // check if color or background-color is set
161 |       if (rule.style.color === '' && rule.style.backgroundColor === '') continue;
162 |       // replace div.sourceCode by a pre.sourceCode rule
163 |       sheets[i].deleteRule(j);
164 |       sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
165 |     }
166 |   }
167 | })();
168 | </script>
169 | 
170 | 
171 | 
172 | <style type="text/css">body {
173 | background-color: #fff;
174 | margin: 1em auto;
175 | max-width: 700px;
176 | overflow: visible;
177 | padding-left: 2em;
178 | padding-right: 2em;
179 | font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
180 | font-size: 14px;
181 | line-height: 1.35;
182 | }
183 | #TOC {
184 | clear: both;
185 | margin: 0 0 10px 10px;
186 | padding: 4px;
187 | width: 400px;
188 | border: 1px solid #CCCCCC;
189 | border-radius: 5px;
190 | background-color: #f6f6f6;
191 | font-size: 13px;
192 | line-height: 1.3;
193 | }
194 | #TOC .toctitle {
195 | font-weight: bold;
196 | font-size: 15px;
197 | margin-left: 5px;
198 | }
199 | #TOC ul {
200 | padding-left: 40px;
201 | margin-left: -1.5em;
202 | margin-top: 5px;
203 | margin-bottom: 5px;
204 | }
205 | #TOC ul ul {
206 | margin-left: -2em;
207 | }
208 | #TOC li {
209 | line-height: 16px;
210 | }
211 | table {
212 | margin: 1em auto;
213 | border-width: 1px;
214 | border-color: #DDDDDD;
215 | border-style: outset;
216 | border-collapse: collapse;
217 | }
218 | table th {
219 | border-width: 2px;
220 | padding: 5px;
221 | border-style: inset;
222 | }
223 | table td {
224 | border-width: 1px;
225 | border-style: inset;
226 | line-height: 18px;
227 | padding: 5px 5px;
228 | }
229 | table, table th, table td {
230 | border-left-style: none;
231 | border-right-style: none;
232 | }
233 | table thead, table tr.even {
234 | background-color: #f7f7f7;
235 | }
236 | p {
237 | margin: 0.5em 0;
238 | }
239 | blockquote {
240 | background-color: #f6f6f6;
241 | padding: 0.25em 0.75em;
242 | }
243 | hr {
244 | border-style: solid;
245 | border: none;
246 | border-top: 1px solid #777;
247 | margin: 28px 0;
248 | }
249 | dl {
250 | margin-left: 0;
251 | }
252 | dl dd {
253 | margin-bottom: 13px;
254 | margin-left: 13px;
255 | }
256 | dl dt {
257 | font-weight: bold;
258 | }
259 | ul {
260 | margin-top: 0;
261 | }
262 | ul li {
263 | list-style: circle outside;
264 | }
265 | ul ul {
266 | margin-bottom: 0;
267 | }
268 | pre, code {
269 | background-color: #f7f7f7;
270 | border-radius: 3px;
271 | color: #333;
272 | white-space: pre-wrap; 
273 | }
274 | pre {
275 | border-radius: 3px;
276 | margin: 5px 0px 10px 0px;
277 | padding: 10px;
278 | }
279 | pre:not([class]) {
280 | background-color: #f7f7f7;
281 | }
282 | code {
283 | font-family: Consolas, Monaco, 'Courier New', monospace;
284 | font-size: 85%;
285 | }
286 | p > code, li > code {
287 | padding: 2px 0px;
288 | }
289 | div.figure {
290 | text-align: center;
291 | }
292 | img {
293 | background-color: #FFFFFF;
294 | padding: 2px;
295 | border: 1px solid #DDDDDD;
296 | border-radius: 3px;
297 | border: 1px solid #CCCCCC;
298 | margin: 0 5px;
299 | }
300 | h1 {
301 | margin-top: 0;
302 | font-size: 35px;
303 | line-height: 40px;
304 | }
305 | h2 {
306 | border-bottom: 4px solid #f7f7f7;
307 | padding-top: 10px;
308 | padding-bottom: 2px;
309 | font-size: 145%;
310 | }
311 | h3 {
312 | border-bottom: 2px solid #f7f7f7;
313 | padding-top: 10px;
314 | font-size: 120%;
315 | }
316 | h4 {
317 | border-bottom: 1px solid #f7f7f7;
318 | margin-left: 8px;
319 | font-size: 105%;
320 | }
321 | h5, h6 {
322 | border-bottom: 1px solid #ccc;
323 | font-size: 105%;
324 | }
325 | a {
326 | color: #0033dd;
327 | text-decoration: none;
328 | }
329 | a:hover {
330 | color: #6666ff; }
331 | a:visited {
332 | color: #800080; }
333 | a:visited:hover {
334 | color: #BB00BB; }
335 | a[href^="http:"] {
336 | text-decoration: underline; }
337 | a[href^="https:"] {
338 | text-decoration: underline; }
339 | 
340 | code > span.kw { color: #555; font-weight: bold; } 
341 | code > span.dt { color: #902000; } 
342 | code > span.dv { color: #40a070; } 
343 | code > span.bn { color: #d14; } 
344 | code > span.fl { color: #d14; } 
345 | code > span.ch { color: #d14; } 
346 | code > span.st { color: #d14; } 
347 | code > span.co { color: #888888; font-style: italic; } 
348 | code > span.ot { color: #007020; } 
349 | code > span.al { color: #ff0000; font-weight: bold; } 
350 | code > span.fu { color: #900; font-weight: bold; } 
351 | code > span.er { color: #a61717; background-color: #e3d2d2; } 
352 | </style>
353 | 
354 | 
355 | 
356 | 
357 | </head>
358 | 
359 | <body>
360 | 
361 | 
362 | 
363 | 
364 | <h1 class="title toc-ignore">A Teradata Backend for dplyr</h1>
365 | <h4 class="author">Koji Makiyama (<span class="citation">@hoxo-m</span>)</h4>
366 | <h4 class="date">2020-11-12</h4>
367 | 
368 | 
369 | 
370 | <div id="overview" class="section level2">
371 | <h2>1. Overview</h2>
372 | <p>The package provides a Teradata backend for <strong>dplyr</strong>.</p>
373 | <p>It makes it possible to operate <a href="https://www.teradata.com/products-and-services/teradata-database/">Teradata Database</a> in the same way as manipulating data frames with <strong>dplyr</strong>.</p>
374 | <div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(dplyr.teradata)</span>
375 | <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a></span>
376 | <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Establish a connection to Teradata</span></span>
377 | <span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a>con <span class="ot">&lt;-</span> <span class="fu">dbConnect</span>(<span class="fu">todbc</span>(), </span>
378 | <span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a>                 <span class="at">driver =</span> <span class="st">&quot;{Teradata Driver}&quot;</span>, <span class="at">DBCName =</span> <span class="st">&quot;host_name_or_IP_address&quot;</span>,</span>
379 | <span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a>                 <span class="at">uid =</span> <span class="st">&quot;user_name&quot;</span>, <span class="at">pwd =</span> <span class="st">&quot;*****&quot;</span>)</span>
380 | <span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>my_table <span class="ot">&lt;-</span> <span class="fu">tbl</span>(con, <span class="st">&quot;my_table_name&quot;</span>)</span>
381 | <span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a></span>
382 | <span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Build a query</span></span>
383 | <span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a>q <span class="ot">&lt;-</span> my_table <span class="sc">%&gt;%</span> </span>
384 | <span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(<span class="fu">between</span>(date, <span class="st">&quot;2017-01-01&quot;</span>, <span class="st">&quot;2017-01-03&quot;</span>)) <span class="sc">%&gt;%</span> </span>
385 | <span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(date) <span class="sc">%&gt;%</span></span>
386 | <span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(<span class="at">n =</span> <span class="fu">n</span>()) <span class="sc">%&gt;%</span></span>
387 | <span id="cb1-14"><a href="#cb1-14" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrange</span>(date)</span>
388 | <span id="cb1-15"><a href="#cb1-15" aria-hidden="true" tabindex="-1"></a></span>
389 | <span id="cb1-16"><a href="#cb1-16" aria-hidden="true" tabindex="-1"></a><span class="fu">show_query</span>(q)</span>
390 | <span id="cb1-17"><a href="#cb1-17" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; &lt;SQL&gt;</span></span>
391 | <span id="cb1-18"><a href="#cb1-18" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; SELECT &quot;date&quot;, count(*) AS &quot;n&quot;</span></span>
392 | <span id="cb1-19"><a href="#cb1-19" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; FROM &quot;my_table_name&quot;</span></span>
393 | <span id="cb1-20"><a href="#cb1-20" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; WHERE (&quot;date&quot; BETWEEN &#39;2017-01-01&#39; AND &#39;2017-01-03&#39;)</span></span>
394 | <span id="cb1-21"><a href="#cb1-21" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; GROUP BY &quot;date&quot;</span></span>
395 | <span id="cb1-22"><a href="#cb1-22" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; ORDER BY &quot;date&quot;</span></span>
396 | <span id="cb1-23"><a href="#cb1-23" aria-hidden="true" tabindex="-1"></a></span>
397 | <span id="cb1-24"><a href="#cb1-24" aria-hidden="true" tabindex="-1"></a><span class="co"># Send the query and get its result on R</span></span>
398 | <span id="cb1-25"><a href="#cb1-25" aria-hidden="true" tabindex="-1"></a>df <span class="ot">&lt;-</span> q <span class="sc">%&gt;%</span> collect</span>
399 | <span id="cb1-26"><a href="#cb1-26" aria-hidden="true" tabindex="-1"></a>df</span>
400 | <span id="cb1-27"><a href="#cb1-27" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; # A tibble: 3 x 2</span></span>
401 | <span id="cb1-28"><a href="#cb1-28" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt;          date        n</span></span>
402 | <span id="cb1-29"><a href="#cb1-29" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt;        &lt;date&gt;    &lt;int&gt;</span></span>
403 | <span id="cb1-30"><a href="#cb1-30" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt;  1 2017-01-01   123456</span></span>
404 | <span id="cb1-31"><a href="#cb1-31" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt;  2 2017-01-02  7891011</span></span>
405 | <span id="cb1-32"><a href="#cb1-32" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt;  3 2017-01-03 12131415</span></span></code></pre></div>
406 | </div>
407 | <div id="installation" class="section level2">
408 | <h2>2. Installation</h2>
409 | <p>You can install the <strong>dplyr.teradata</strong> package from CRAN.</p>
410 | <div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">install.packages</span>(<span class="st">&quot;dplyr.teradata&quot;</span>)</span></code></pre></div>
411 | <p>You can also install the development version of the package from GitHub.</p>
412 | <div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="fu">install.packages</span>(<span class="st">&quot;devtools&quot;</span>) <span class="co"># if you have not installed &quot;devtools&quot; package</span></span>
413 | <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>devtools<span class="sc">::</span><span class="fu">install_github</span>(<span class="st">&quot;hoxo-m/dplyr.teradata&quot;</span>)</span></code></pre></div>
414 | <p>The source code for <strong>dplyr.teradata</strong> package is available on GitHub at</p>
415 | <ul>
416 | <li><a href="https://github.com/hoxo-m/dplyr.teradata" class="uri">https://github.com/hoxo-m/dplyr.teradata</a>.</li>
417 | </ul>
418 | </div>
419 | <div id="motivation" class="section level2">
420 | <h2>3. Motivation</h2>
421 | <p>The package provides a Teradata backend for <strong>dplyr</strong>. It makes it possible to build SQL for <a href="https://www.teradata.com/products-and-services/teradata-database/">Teradata Database</a> in the same way as manipulating data frames with the <strong>dplyr</strong> package. It also can send the queries and then receive its results on R.</p>
422 | <p>Therefore, you can complete data analysis with Teradata only on R. It means that you are freed from troublesome switching of tools and switching thoughts that cause mistakes.</p>
423 | </div>
424 | <div id="usage" class="section level2">
425 | <h2>4. Usage</h2>
426 | <p>The package uses the <strong>odbc</strong> package to connect database and the <strong>dbplyr</strong> package to build SQL.</p>
427 | <p>First, you need to establish an ODBC connection to Teradata. See:</p>
428 | <ul>
429 | <li><a href="https://CRAN.R-project.org/package=odbc/readme/README.html">README - <strong>odbc</strong> package</a>.</li>
430 | </ul>
431 | <div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Establish a connection to Teradata</span></span>
432 | <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>con <span class="ot">&lt;-</span> <span class="fu">dbConnect</span>(<span class="fu">odbc</span>(), </span>
433 | <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>                 <span class="at">driver =</span> <span class="st">&quot;{Teradata Driver}&quot;</span>, <span class="at">DBCName =</span> <span class="st">&quot;host_name_or_IP_address&quot;</span>,</span>
434 | <span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a>                 <span class="at">uid =</span> <span class="st">&quot;user_name&quot;</span>, <span class="at">pwd =</span> <span class="st">&quot;*****&quot;</span>)</span></code></pre></div>
435 | <p>Second, you need to specify a table to build SQL. See:</p>
436 | <ul>
437 | <li><a href="https://dbplyr.tidyverse.org/articles/dbplyr.html">Introduction to dbplyr • dbplyr</a>.</li>
438 | </ul>
439 | <p>To specify a table, you can use <code>tbl()</code>:</p>
440 | <div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Getting table</span></span>
441 | <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>my_table <span class="ot">&lt;-</span> <span class="fu">tbl</span>(con, <span class="st">&quot;my_table_name&quot;</span>)</span>
442 | <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a></span>
443 | <span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Getting table in schema</span></span>
444 | <span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a>my_table <span class="ot">&lt;-</span> <span class="fu">tbl</span>(con, <span class="fu">in_schema</span>(<span class="st">&quot;my_schema&quot;</span>, <span class="st">&quot;my_table_name&quot;</span>))</span></code></pre></div>
445 | <p>Third, you build queries. It can do in the same way as manipulating data frames with <strong>dplyr</strong>:</p>
446 | <ul>
447 | <li><a href="https://dplyr.tidyverse.org/">A Grammar of Data Manipulation • dplyr</a>.</li>
448 | </ul>
449 | <p>For example, you can use follows:</p>
450 | <ul>
451 | <li><code>mutate()</code> adds new <em>columns</em> that are functions of existing <em>columns</em>.</li>
452 | <li><code>select()</code> picks <em>columns</em> based on their names.</li>
453 | <li><code>filter()</code> picks <em>rows</em> based on their values.</li>
454 | <li><code>summarise()</code> reduces multiple values down to a single summary.</li>
455 | <li><code>arrange()</code> changes the ordering of the rows.</li>
456 | </ul>
457 | <div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Build a query</span></span>
458 | <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>q <span class="ot">&lt;-</span> my_table <span class="sc">%&gt;%</span> </span>
459 | <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(<span class="fu">between</span>(date, <span class="st">&quot;2017-01-01&quot;</span>, <span class="st">&quot;2017-01-03&quot;</span>)) <span class="sc">%&gt;%</span> </span>
460 | <span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(date) <span class="sc">%&gt;%</span></span>
461 | <span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(<span class="at">n =</span> <span class="fu">n</span>()) <span class="sc">%&gt;%</span></span>
462 | <span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrange</span>(date)</span></code></pre></div>
463 | <p><code>n()</code> is a function in <strong>dplyr</strong> to return the number of rows in the current group but here it will be translated to <code>count(*)</code> as a SQL function.</p>
464 | <p>If you want to show built queries, use <code>show_query()</code>:</p>
465 | <div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">show_query</span>(q)</span>
466 | <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; &lt;SQL&gt;</span></span>
467 | <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; SELECT &quot;date&quot;, count(*) AS &quot;n&quot;</span></span>
468 | <span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; FROM &quot;my_table_name&quot;</span></span>
469 | <span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; WHERE (&quot;date&quot; BETWEEN &#39;2017-01-01&#39; AND &#39;2017-01-03&#39;)</span></span>
470 | <span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; GROUP BY &quot;date&quot;</span></span>
471 | <span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; ORDER BY &quot;date&quot;</span></span></code></pre></div>
472 | <p>Finally, you send built queries and get its results on R using <code>collect()</code>.</p>
473 | <div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Send the query and get its result on R</span></span>
474 | <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>df <span class="ot">&lt;-</span> q <span class="sc">%&gt;%</span> collect</span>
475 | <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>df</span>
476 | <span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; # A tibble: 3 x 2</span></span>
477 | <span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt;          date        n</span></span>
478 | <span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt;        &lt;date&gt;    &lt;int&gt;</span></span>
479 | <span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt;  1 2017-01-01   123456</span></span>
480 | <span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt;  2 2017-01-02  7891011</span></span>
481 | <span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt;  3 2017-01-03 12131415</span></span></code></pre></div>
482 | </div>
483 | <div id="translatable-functions" class="section level2">
484 | <h2>5. Translatable functions</h2>
485 | <p>The package mainly use <strong>dbplyr</strong> to translate manipulations into queries.</p>
486 | <p><em>Translatable functions</em> are the available functions in manipulations that it can translate into SQL functions.</p>
487 | <p>For instance, <code>n()</code> is translated to <code>count(*)</code> in the above example.</p>
488 | <p>To know translatable functions for Teradata, refer the following:</p>
489 | <ul>
490 | <li><a href="https://github.com/tidyverse/dbplyr/pull/43">Adds Teradata translation</a></li>
491 | </ul>
492 | <p>Here, we introduce the special translatable functions that it becomes available by <strong>dplyr.teradata</strong>.</p>
493 | <div id="treat-boolean" class="section level3">
494 | <h3>5.1. Treat Boolean</h3>
495 | <p>Teradata does not have the boolean data type. So when you use boolean, you need to write some complex statements. The package has several functions to treat it briefly.</p>
496 | <p><code>bool_to_int</code> transforms boolean to integer.</p>
497 | <div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="fu">mutate</span>(<span class="at">is_positive =</span> <span class="fu">bool_to_int</span>(x <span class="sc">&gt;</span> 0L))</span></code></pre></div>
498 | <pre><code>#&gt; &lt;SQL&gt; CASE WHEN (`x` &gt; 0) THEN 1 WHEN NOT(`x` &gt; 0) THEN 0 END</code></pre>
499 | <p><code>count_if()</code> or <code>n_if()</code> counts a number of rows satisfying a condition.</p>
500 | <div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="fu">summarize</span>(<span class="at">n =</span> <span class="fu">count_if</span>(x <span class="sc">&gt;</span> 0L))</span></code></pre></div>
501 | <pre><code>#&gt; &lt;SQL&gt; SUM(CASE WHEN (`x` &gt; 0) THEN 1 WHEN NOT(`x` &gt; 0) THEN 0 END)</code></pre>
502 | </div>
503 | <div id="to_timestamp" class="section level3">
504 | <h3>5.2. <code>to_timestamp()</code></h3>
505 | <p>When your tables has some columns stored UNIX time and you want to convert it to timestamp, you need to write complex SQL.</p>
506 | <p><code>to_timestamp()</code> is a translatable function that makes it easy.</p>
507 | <div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="fu">mutate</span>(<span class="at">ts =</span> <span class="fu">to_timestamp</span>(unixtime_column))</span></code></pre></div>
508 | <p>Such as above manipulation is translated into SQL like following:</p>
509 | <pre><code>#&gt; &lt;SQL&gt; CAST(DATE &#39;1970-01-01&#39; + (`unixtime_column` / 86400) AS TIMESTAMP(0)) + (`unixtime_column` MOD 86400) * (INTERVAL &#39;00:00:01&#39; HOUR TO SECOND)</code></pre>
510 | </div>
511 | <div id="cut" class="section level3">
512 | <h3>5.3. <code>cut()</code></h3>
513 | <p><code>cut()</code> is very useful function that you can use in base R.</p>
514 | <p>For example, you want to cut values of <code>x</code> into three parts of ranges by break points 2 and 4:</p>
515 | <div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">6</span></span>
516 | <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>breaks <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">2</span>, <span class="dv">4</span>, <span class="dv">6</span>)</span>
517 | <span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="fu">cut</span>(x, breaks)</span>
518 | <span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; [1] (0,2] (0,2] (2,4] (2,4] (4,6] (4,6]</span></span>
519 | <span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; Levels: (0,2] (2,4] (4,6]</span></span></code></pre></div>
520 | <p><strong>dplyr.teradata</strong> has a translatable function similar to this:</p>
521 | <div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>breaks <span class="ot">=</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">2</span>, <span class="dv">4</span>, <span class="dv">6</span>)</span>
522 | <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="fu">mutate</span>(<span class="at">y =</span> <span class="fu">cut</span>(x, breaks))</span></code></pre></div>
523 | <p>In the result, it is translated to a <code>CASE WHEN</code> statement as follows:</p>
524 | <pre><code>#&gt; &lt;SQL&gt; CASE
525 | #&gt;  WHEN x &gt; 0 AND x &lt;= 2 THEN &#39;(0,2]&#39;
526 | #&gt;  WHEN x &gt; 2 AND x &lt;= 4 THEN &#39;(2,4]&#39;
527 | #&gt;  WHEN x &gt; 4 AND x &lt;= 6 THEN &#39;(4,6]&#39;
528 | #&gt;  ELSE NULL
529 | #&gt; END</code></pre>
530 | <p>Arguments of base <code>cut()</code> are also available:</p>
531 | <div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>breaks <span class="ot">=</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">2</span>, <span class="dv">4</span>, <span class="dv">6</span>)</span>
532 | <span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a><span class="fu">mutate</span>(<span class="at">y =</span> <span class="fu">cut</span>(x, breaks, <span class="at">labels =</span> <span class="st">&quot;-&quot;</span>, <span class="at">include.lowest =</span> <span class="cn">TRUE</span>))</span></code></pre></div>
533 | <pre><code>#&gt; &lt;SQL&gt; CASE
534 | #&gt;  WHEN x &gt;= 0 AND x &lt;= 2 THEN &#39;0-2&#39;
535 | #&gt;  WHEN x &gt; 2 AND x &lt;= 4 THEN &#39;3-4&#39;
536 | #&gt;  WHEN x &gt; 4 AND x &lt;= 6 THEN &#39;5-6&#39;
537 | #&gt;  ELSE NULL
538 | #&gt; END</code></pre>
539 | </div>
540 | </div>
541 | <div id="other-useful-functions" class="section level2">
542 | <h2>6. Other useful functions</h2>
543 | <div id="blob_to_string" class="section level3">
544 | <h3>6.1. <code>blob_to_string()</code></h3>
545 | <p>The <code>blob</code> object from databases sometimes prevents manipulations with <strong>dplyr</strong>.</p>
546 | <p>You might want to convert them to string.</p>
547 | <p><code>blob_to_string()</code> is a function to make it easy:</p>
548 | <div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>x <span class="ot">&lt;-</span> blob<span class="sc">::</span><span class="fu">as_blob</span>(<span class="st">&quot;Good morning&quot;</span>)</span>
549 | <span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>x</span>
550 | <span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; &lt;blob[1]&gt;</span></span>
551 | <span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; [1] blob[12 B]</span></span>
552 | <span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a></span>
553 | <span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a><span class="co"># print raw data in blob</span></span>
554 | <span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a>x[[<span class="dv">1</span>]]</span>
555 | <span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt;  [1] 47 6f 6f 64 20 6d 6f 72 6e 69 6e 67</span></span>
556 | <span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a></span>
557 | <span id="cb20-10"><a href="#cb20-10" aria-hidden="true" tabindex="-1"></a><span class="fu">blob_to_string</span>(x)</span>
558 | <span id="cb20-11"><a href="#cb20-11" aria-hidden="true" tabindex="-1"></a><span class="co">#&gt; [1] &quot;476f6f64206d6f726e696e67&quot;</span></span></code></pre></div>
559 | </div>
560 | </div>
561 | <div id="related-work" class="section level2">
562 | <h2>7. Related work</h2>
563 | <ul>
564 | <li><a href="https://dbplyr.tidyverse.org/">A ‘dplyr’ Backend for Databases • dbplyr</a></li>
565 | </ul>
566 | </div>
567 | 
568 | 
569 | 
570 | <!-- code folding -->
571 | 
572 | 
573 | <!-- dynamically load mathjax for compatibility with self-contained -->
574 | <script>
575 |   (function () {
576 |     var script = document.createElement("script");
577 |     script.type = "text/javascript";
578 |     script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
579 |     document.getElementsByTagName("head")[0].appendChild(script);
580 |   })();
581 | </script>
582 | 
583 | </body>
584 | </html>
585 | 


--------------------------------------------------------------------------------